FileServerDedupe/ArchivedCode/FindNonDupFiles.ps1

149 lines
6.6 KiB
PowerShell
Raw Permalink Normal View History

2023-09-14 10:53:02 -06:00
$CSV_ImportFile = 'M:\IT\Egnyte\DuplicateFiles\WorkingRun\Output'
$CSV_ClientList = "M:\IT\Egnyte\DuplicateFiles\Client List.csv"
$date = Get-Date -Format FileDate
$exportPath_main = "M:\IT\Egnyte\DuplicateFiles\$date NonDuplicates.csv"
$AllDuplicates_files = Get-ChildItem -Path $CSV_ImportFile -Filter "* Duplicates.csv" -File
$AllDuplicates = @()
foreach ($file in $AllDuplicates_files) {
$temp_val = Import-Csv $file.FullName
$AllDuplicates += $temp_val
}
$ClientList = Import-csv $CSV_ClientList
$BaseDrive = 'X:\'
$ProjectRegex = '^X:\\[A-Z]{2}\W\d{2}\\\d{2}'
$OfficeList = Get-ChildItem -Path $BaseDrive -Directory -Depth 0
$OfficeList | ForEach-Object -ThrottleLimit 3 -Parallel {
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
$XDrivePath = $_.FullName
Write-Host "Processing " $_.FullName -ForegroundColor Green
$ProjectFolders = get-childitem $_.FullName -Directory -Depth 2 | Where-Object FullName -Match $USING:ProjectRegex
$All_DupParent_Main = $USING:AllDuplicates.OriginalParent | sort -Unique
$All_DupProject_Main = $USING:AllDuplicates.OriginalProject | sort -Unique
$exportPath = $USING:exportPath_main
# $ProjectFolders now contains all folders down to the 3rd level of project number - for Example, 1234-567-001. We can now generate a project number for the output file.
# We need to get a file count for each "last" folder - IE, 001 in the example 1234-567-001.
$ProjectFolders | ForEach-Object -ThrottleLimit 15 -Parallel { #uncomment for threaded mode
# $ProjectFolders | ForEach-Object { #comment for threaded mode
Write-Host "Processing " $_.FullName -ForegroundColor Cyan
$projectNumber = ''
# thank you ChatGPT
$string = $_.FullName
$regex = "\\(\d+(-\d+)*)"
$matches = [regex]::Matches($string, $regex)
$result = ""
foreach ($match in $matches) {
$result += $match.Groups[1].Value
if ($match.Groups[2].Success) {
$result += $match.Groups[2].Value
}
# if ($match.Groups[3].Success) {
# $result += $match.Groups[3].Value
# }
}
$projectnumber = ("'" + $result)
$result3 = [Regex]::Matches($_.FullName, "^(.*[\\\/])") # This gets everything up the last slash, thus the "parent"
$ParentPath = $result3.value
# / Thank you Chat GPT
if ($projectnumber.Length -lt 6 ) {
# write-host $_.FullName " is not a project" -ForegroundColor Cyan
}
elseif ($projectnumber.Length -ge 6) {
$out = $null
$OfficeRegex = "(X:\\[A-Z]+)"
$str = $_.FullName -replace ($OfficeRegex, "")
$OfficeServer = $_.FullName.Substring(1, 4) -replace (':|\\', '')
$All_DupParent_copy = $USING:All_DupParent_main
if ($ParentPath -notin $All_DupParent_copy) {
$str = $ParentPath -replace ($OfficeRegex, "")
$EgnytePath1 = ("/Shared/N-Data" + $str) -replace ("\\", "/")
$fileList = Get-ChildItem $_.FullName -Recurse -Depth 1
$FileSize = $fileList | measure -Sum Length
$totalFiles = $fileList | measure
$LastWriteDate = $fileList | Measure-Object LastWriteTime -Maximum
$LastAccessDate = $fileList | Measure-Object LastAccessTime -Maximum
$out = [PSCustomObject]@{
OfficeServer = $OfficeServer
ProjectNumber = $projectNumber
ParentPath = $ParentPath
UNCPath = $ParentPath -replace ('X:', "\\mpe.ca\datadrive")
EgnytePath1 = $EgnytePath1
# EgnytePath2 = $EgnytePath2
FileCount = $totalFiles.Count
FolderSize = ($FileSize.Sum / 1024 / 1024 / 1024) # Get's size in GB
LastWrite = $LastWriteDate.Maximum
LastAccess = $LastAccessDate.Maximum
Parent = "Yes"
}
do {
try {
$out | Export-Csv -Path $exportPath -Append -ErrorVariable CSV_result -ErrorAction SilentlyContinue
}
catch {
<#Do this if a terminating exception happens#>
}
} while ($CSV_result.ErrorRecord -ne $null)
}
elseif ($projectNumber -notin $USING:All_DupProject_Main) {
$str = $_.FullName -replace ($OfficeRegex, "")
$EgnytePath1 = ("/Shared/N-Data" + $str) -replace ("\\", "/")
$fileList = Get-ChildItem $_.FullName -Recurse -Depth 50
$FileSize = $fileList | measure -Sum Length
$totalFiles = $fileList | measure
$LastWriteDate = $fileList | Measure-Object LastWriteTime -Maximum
$LastAccessDate = $fileList | Measure-Object LastAccessTime -Maximum
if (($FileSize.Sum -gt 0) -or ($totalFiles.Count -gt 0)) {
# no need to do anything if either value is Zero.
$out = [PSCustomObject]@{
OfficeServer = $OfficeServer
ProjectNumber = $projectNumber
ParentPath = $ParentPath
UNCPath = $_.FullName -replace ('X:', "\\mpe.ca\datadrive")
EgnytePath1 = $EgnytePath1
# EgnytePath2 = $EgnytePath2
FileCount = $totalFiles.Count
FolderSize = ($FileSize.Sum / 1024 / 1024 / 1024) # Get's size in GB
LastWrite = $LastWriteDate.Maximum
LastAccess = $LastAccessDate.Maximum
Parent = "No"
}
}
do {
try {
$out | Export-Csv -Path $USING:exportPath -Append -ErrorVariable CSV_result -ErrorAction SilentlyContinue
# $out | Export-Csv -Path $exportPath -Append -ErrorVariable CSV_result -ErrorAction SilentlyContinue
}
catch {
<#Do this if a terminating exception happens#>
}
} while ($CSV_result.ErrorRecord -ne $null)
}
}
}
}