<# for each server / Office sanitize project number Compare against each other office if matched get-file counts for each server report duplicate site / count #> <# Migrating to use a SQLLite DB. #> $PathToDB = "C:\Users\eeckert\Downloads\EgnyteTesting\Dedupe.SQLite" $PathToOutput = "C:\Users\eeckert\Downloads\EgnyteTesting\Output" $CSV_ImportFolder = "M:\IT\Egnyte\DuplicateFiles\WorkingRun" $CSV_List = Get-ChildItem -Path $CSV_ImportFolder -Filter '*.csv' $Office_to_compare_data = $null foreach ($CSV in $CSV_List) { $csv_temp = Import-Csv $CSV.FullName $Office_to_compare_data += $csv_temp # all Offices loaded into data strcuture 1 } $Office_to_compare_data = $Office_to_compare_data | sort Project, OfficeServer $temp_CSV_List = Get-ChildItem -Path $CSV_ImportFolder -Exclude "Output" | sort name -Descending # get list of CSV's to compare to. $temp_csv_data = $null foreach ($temp_csv in $temp_CSV_List) { $temp_csv_temp = Import-Csv -Path $temp_csv.FullName # load comparison data $temp_csv_data += $temp_csv_temp | sort Project, OfficeServer # all offices loaded into comparison data structure } $ServerProgress = @{ ID = 1 Activity = "Processing: " + $CSV.OfficeServer # PercentComplete = 100 # CurrentOperation = $_.Project } Write-Progress @ServerProgress $CurrentDate = Get-Date if ($CurrentDate.DayOfWeek -in ("Monday", "Tuesday", "Wednesday", "Thursday", "Friday") -and $CurrentDate.Hour -GT 17 ) { $ThreadLimit = 30 } elseif ($CurrentDate.DayOfWeek -in ("Saturday", "Sunday")) { $ThreadLimit = 30 } else { $ThreadLimit = 15 } # foreach ($row in $Office_to_compare_data) { # $Office_to_compare_data | Foreach-Object { $Office_to_compare_data | Foreach-Object -ThrottleLimit $ThreadLimit -Parallel { #Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname $PSStyle.Progress.View = "Minimal" $DuplicateObj = $null $NonDuplicateObj = $null $JobID = Get-Random $ComparisonProgress = @{ ID = $JobID Activity = "Starting: " + $_.OfficeServer + ':' + $_.Project PercentComplete = 0 # CurrentOperation = $_.Project } Write-Progress @ComparisonProgress # if ($_.Project -in $USING:temp_csv_data.Project) { if ($_.Project -in $USING:temp_csv_data.Project -and $USING:temp_csv_data.OfficeServer -ne $_.OfficeServer) { $ignoreOffice = $_.OfficeServer $duplicates = '' $duplicates = $USING:temp_csv_data | where -property Project -eq $_.Project | where -Property OfficeServer -ne $_.OfficeServer # $duplicates = $USING:temp_csv_data | where ((Project -eq $_.Project) -and (OfficeServer -ne $_.OfficeServer)) if (($_ -eq $duplicates) -or !($duplicates)) { } else { $ComparisonProgress = @{ ID = $JobID Activity = "Get Original Data: " + $_.OfficeServer + ':' + $_.Project PercentComplete = 15 CurrentOperation = "Getting Original data from " + $_.OfficeServer } Write-Progress @ComparisonProgress Write-Debug ("Getting file counts from current office...this will take a second...") -Debug # TODO - identify NON-Duplicated files in these file structures... # $row_file_count = [System.IO.Directory]::GetFiles($_.FullPath, "*.*", "AllDirectories") | measure $row_file = get-childitem -path $_.FullPath -recurse -depth 50 -File $row_file_count = $row_file | measure $row_file_lastwrite = $row_file | measure LastWriteTime -Maximum $row_file_lastaccess = $row_file | measure LastAccessTime -Maximum foreach ($duplicate in $duplicates) { $ComparisonProgress = @{ ID = $JobID Activity = "Get Duplicated data: " + $duplicate.OfficeServer + ':' + $_.Project PercentComplete = 33 CurrentOperation = "Now getting duplicate path data from " + $duplicate.OfficeServer } Write-Progress @ComparisonProgress $duplicate_file = get-childitem -path $duplicate.FullPath -recurse -depth 50 -File $duplicate_file_count = $duplicate_file | measure $duplicate_file_lastwrite = $duplicate_file | measure LastWriteTime -Maximum $duplicate_file_lastaccess = $duplicate_file | measure LastAccessTime -Maximum $ComparisonProgress = @{ ID = $JobID Activity = "Processing: " + $duplicate.OfficeServer + ':' + $_.Project PercentComplete = 66 # CurrentOperation = $_.Project } Write-Progress @ComparisonProgress $result3 = [Regex]::Matches($_.FullPath, "^(.*[\\\/])") # This gets everything up the last slash, thus the "parent" $ParentPath = $result3.value $OriginalProject = "'" + [String]$_.Project $DuplicateProject = "'" + [String]$duplicate.Project if ($row_file_count.Count -ge 1 -and $duplicate_file_count.Count -ge 1) { $DuplicateObj = [PSCustomObject]@{ OriginalServer = $_.OfficeServer OriginalProject = "'" + $_.project OriginalParent = $ParentPath OriginalPath = $_.FullPath OriginalFileCount = $row_file_count.Count ? $row_file_count.Count : 0 OriginalFileLastWrite = $row_file_Lastwrite.maximum OriginalFileAccess = $row_file_lastaccess.maximum DuplicateServer = $duplicate.OfficeServer DuplicatePath = $duplicate.FullPath DuplicateFileCount = $duplicate_file_count.Count ? $duplicate_file_count.count : 0 DuplicateFileLastWrite = $duplicate_file_Lastwrite.maximum DuplicateFileAccess = $duplicate_file_lastaccess.maximum } } elseif ($row_file_count.Count -eq 0 -and $duplicate_file_count.Count -gt 0) { # Write-Host "File count is 0 on current side, remote server is source" -ForegroundColor DarkBlue $NonDuplicateObj = [PSCustomObject]@{ OriginalServer = $duplicate.OfficeServer OriginalProject = "'" + $duplicate.Project OriginalPath = $duplicate.FullPath OriginalFileCount = $duplicate_file_count.Count ? $duplicate_file_count.Count : 0 OriginalFileLastWrite = $duplicate_file_Lastwrite.maximum OriginalFileAccess = $duplicate_file_lastaccess.maximum } } elseif ($row_file_count.count -gt 0 -and $duplicate_file_count.Count -eq 0) { # Write-Host "File count is 0 on one side, not a duplicate..." -ForegroundColor Blue $NonDuplicateObj = [PSCustomObject]@{ OriginalServer = $_.OfficeServer OriginalProject = "'" + $_.Project OriginalPath = $_.FullPath OriginalFileCount = $row_file_count.Count ? $row_file_count.Count : 0 OriginalFileLastWrite = $row_file_Lastwrite.maximum OriginalFileAccess = $row_file_lastaccess.maximum } } elseif ($_.Project -inotin $Office_to_compare_data.Project) { $NonDuplicateObj = [PSCustomObject]@{ OriginalServer = $_.OfficeServer OriginalProject = "'" + $_.Project OriginalPath = $_.FullPath OriginalFileCount = $row_file_count.Count ? $row_file_count.Count : 0 OriginalFileLastWrite = $row_file_Lastwrite.maximum OriginalFileAccess = $row_file_lastaccess.maximum } } $ComparisonProgress = @{ ID = $JobID Activity = "Writing: " + $_.OfficeServer + ':' + $_.Project PercentComplete = 100 # CurrentOperation = $_.Project } Write-Progress @ComparisonProgress -Completed # Write-Host -ForegroundColor DarkGreen $_.OfficeServer "done, dumping output..." } do { try { # $DuplicateObj | Export-Csv -Path ($USING:CSV_ImportFolder + '\Output\' + $USING:CSV.Name + ' Duplicates.csv') -Append -ErrorVariable Dup_CSV_result -ErrorAction SilentlyContinue $DuplicateObj | Export-Csv -Path ($USING:CSV_ImportFolder + '\Output\' + $_.OfficeServer + ' Duplicates.csv') -Append -ErrorVariable Dup_CSV_result -ErrorAction SilentlyContinue } catch { <#Do this if a terminating exception happens#> } } while ($Dup_CSV_result.ErrorRecord -ne $null) } } # do { # try { # $NonDuplicateObj | Export-Csv -Path ($USING:CSV_ImportFolder + '\Output\' + $USING:CSV.Name + 'NonDuplicates.csv') -Append -ErrorVariable Non_Dup_CSV_result -ErrorAction SilentlyContinue # } # catch { # <#Do this if a terminating exception happens#> # } # } while ($Non_Dup_CSV_Result.ErrorRecord -ne $null) $ComparisonProgress = @{ ID = $JobID Activity = "Done: " + $_.OfficeServer + ':' + $_.Project PercentComplete = 100 # CurrentOperation = $_.Project } Write-Progress @ComparisonProgress -Completed } # $OutPAth = $CSV_ImportFolder + '\Output\' + $CSV.Name + ' Duplicates.csv' Write-Progress @ServerProgress -Completed