FileServerDedupe/ArchivedCode/Find-DuplicatePaths.ps1
2023-09-14 10:53:02 -06:00

247 lines
10 KiB
PowerShell

<#
for each server / Office
sanitize project number
Compare against each other office
if matched
get-file counts for each server
report duplicate site / count
#>
<#
Migrating to use a SQLLite DB.
#>
$PathToDB = "C:\Users\eeckert\Downloads\EgnyteTesting\Dedupe.SQLite"
$PathToOutput = "C:\Users\eeckert\Downloads\EgnyteTesting\Output"
$CSV_ImportFolder = "M:\IT\Egnyte\DuplicateFiles\WorkingRun"
$CSV_List = Get-ChildItem -Path $CSV_ImportFolder -Filter '*.csv'
$Office_to_compare_data = $null
foreach ($CSV in $CSV_List) {
$csv_temp = Import-Csv $CSV.FullName
$Office_to_compare_data += $csv_temp
# all Offices loaded into data strcuture 1
}
$Office_to_compare_data = $Office_to_compare_data | sort Project, OfficeServer
$temp_CSV_List = Get-ChildItem -Path $CSV_ImportFolder -Exclude "Output" | sort name -Descending # get list of CSV's to compare to.
$temp_csv_data = $null
foreach ($temp_csv in $temp_CSV_List) {
$temp_csv_temp = Import-Csv -Path $temp_csv.FullName # load comparison data
$temp_csv_data += $temp_csv_temp | sort Project, OfficeServer
# all offices loaded into comparison data structure
}
$ServerProgress = @{
ID = 1
Activity = "Processing: " + $CSV.OfficeServer
# PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
$CurrentDate = Get-Date
if ($CurrentDate.DayOfWeek -in ("Monday", "Tuesday", "Wednesday", "Thursday", "Friday") -and $CurrentDate.Hour -GT 17 ) {
$ThreadLimit = 30
}
elseif ($CurrentDate.DayOfWeek -in ("Saturday", "Sunday")) {
$ThreadLimit = 30
}
else {
$ThreadLimit = 15
}
# foreach ($row in $Office_to_compare_data) {
# $Office_to_compare_data | Foreach-Object {
$Office_to_compare_data | Foreach-Object -ThrottleLimit $ThreadLimit -Parallel {
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
$PSStyle.Progress.View = "Minimal"
$DuplicateObj = $null
$NonDuplicateObj = $null
$JobID = Get-Random
$ComparisonProgress = @{
ID = $JobID
Activity = "Starting: " + $_.OfficeServer + ':' + $_.Project
PercentComplete = 0
# CurrentOperation = $_.Project
}
Write-Progress @ComparisonProgress
# if ($_.Project -in $USING:temp_csv_data.Project) {
if ($_.Project -in $USING:temp_csv_data.Project -and $USING:temp_csv_data.OfficeServer -ne $_.OfficeServer) {
$ignoreOffice = $_.OfficeServer
$duplicates = ''
$duplicates = $USING:temp_csv_data | where -property Project -eq $_.Project | where -Property OfficeServer -ne $_.OfficeServer
# $duplicates = $USING:temp_csv_data | where ((Project -eq $_.Project) -and (OfficeServer -ne $_.OfficeServer))
if (($_ -eq $duplicates) -or !($duplicates)) {
}
else {
$ComparisonProgress = @{
ID = $JobID
Activity = "Get Original Data: " + $_.OfficeServer + ':' + $_.Project
PercentComplete = 15
CurrentOperation = "Getting Original data from " + $_.OfficeServer
}
Write-Progress @ComparisonProgress
Write-Debug ("Getting file counts from current office...this will take a second...") -Debug
# TODO - identify NON-Duplicated files in these file structures...
# $row_file_count = [System.IO.Directory]::GetFiles($_.FullPath, "*.*", "AllDirectories") | measure
$row_file = get-childitem -path $_.FullPath -recurse -depth 50 -File
$row_file_count = $row_file | measure
$row_file_lastwrite = $row_file | measure LastWriteTime -Maximum
$row_file_lastaccess = $row_file | measure LastAccessTime -Maximum
foreach ($duplicate in $duplicates) {
$ComparisonProgress = @{
ID = $JobID
Activity = "Get Duplicated data: " + $duplicate.OfficeServer + ':' + $_.Project
PercentComplete = 33
CurrentOperation = "Now getting duplicate path data from " + $duplicate.OfficeServer
}
Write-Progress @ComparisonProgress
$duplicate_file = get-childitem -path $duplicate.FullPath -recurse -depth 50 -File
$duplicate_file_count = $duplicate_file | measure
$duplicate_file_lastwrite = $duplicate_file | measure LastWriteTime -Maximum
$duplicate_file_lastaccess = $duplicate_file | measure LastAccessTime -Maximum
$ComparisonProgress = @{
ID = $JobID
Activity = "Processing: " + $duplicate.OfficeServer + ':' + $_.Project
PercentComplete = 66
# CurrentOperation = $_.Project
}
Write-Progress @ComparisonProgress
$result3 = [Regex]::Matches($_.FullPath, "^(.*[\\\/])") # This gets everything up the last slash, thus the "parent"
$ParentPath = $result3.value
$OriginalProject = "'" + [String]$_.Project
$DuplicateProject = "'" + [String]$duplicate.Project
if ($row_file_count.Count -ge 1 -and $duplicate_file_count.Count -ge 1) {
$DuplicateObj = [PSCustomObject]@{
OriginalServer = $_.OfficeServer
OriginalProject = "'" + $_.project
OriginalParent = $ParentPath
OriginalPath = $_.FullPath
OriginalFileCount = $row_file_count.Count ? $row_file_count.Count : 0
OriginalFileLastWrite = $row_file_Lastwrite.maximum
OriginalFileAccess = $row_file_lastaccess.maximum
DuplicateServer = $duplicate.OfficeServer
DuplicatePath = $duplicate.FullPath
DuplicateFileCount = $duplicate_file_count.Count ? $duplicate_file_count.count : 0
DuplicateFileLastWrite = $duplicate_file_Lastwrite.maximum
DuplicateFileAccess = $duplicate_file_lastaccess.maximum
}
}
elseif ($row_file_count.Count -eq 0 -and $duplicate_file_count.Count -gt 0) {
# Write-Host "File count is 0 on current side, remote server is source" -ForegroundColor DarkBlue
$NonDuplicateObj = [PSCustomObject]@{
OriginalServer = $duplicate.OfficeServer
OriginalProject = "'" + $duplicate.Project
OriginalPath = $duplicate.FullPath
OriginalFileCount = $duplicate_file_count.Count ? $duplicate_file_count.Count : 0
OriginalFileLastWrite = $duplicate_file_Lastwrite.maximum
OriginalFileAccess = $duplicate_file_lastaccess.maximum
}
}
elseif ($row_file_count.count -gt 0 -and $duplicate_file_count.Count -eq 0) {
# Write-Host "File count is 0 on one side, not a duplicate..." -ForegroundColor Blue
$NonDuplicateObj = [PSCustomObject]@{
OriginalServer = $_.OfficeServer
OriginalProject = "'" + $_.Project
OriginalPath = $_.FullPath
OriginalFileCount = $row_file_count.Count ? $row_file_count.Count : 0
OriginalFileLastWrite = $row_file_Lastwrite.maximum
OriginalFileAccess = $row_file_lastaccess.maximum
}
}
elseif ($_.Project -inotin $Office_to_compare_data.Project) {
$NonDuplicateObj = [PSCustomObject]@{
OriginalServer = $_.OfficeServer
OriginalProject = "'" + $_.Project
OriginalPath = $_.FullPath
OriginalFileCount = $row_file_count.Count ? $row_file_count.Count : 0
OriginalFileLastWrite = $row_file_Lastwrite.maximum
OriginalFileAccess = $row_file_lastaccess.maximum
}
}
$ComparisonProgress = @{
ID = $JobID
Activity = "Writing: " + $_.OfficeServer + ':' + $_.Project
PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ComparisonProgress -Completed
# Write-Host -ForegroundColor DarkGreen $_.OfficeServer "done, dumping output..."
}
do {
try {
# $DuplicateObj | Export-Csv -Path ($USING:CSV_ImportFolder + '\Output\' + $USING:CSV.Name + ' Duplicates.csv') -Append -ErrorVariable Dup_CSV_result -ErrorAction SilentlyContinue
$DuplicateObj | Export-Csv -Path ($USING:CSV_ImportFolder + '\Output\' + $_.OfficeServer + ' Duplicates.csv') -Append -ErrorVariable Dup_CSV_result -ErrorAction SilentlyContinue
}
catch {
<#Do this if a terminating exception happens#>
}
} while ($Dup_CSV_result.ErrorRecord -ne $null)
}
}
# do {
# try {
# $NonDuplicateObj | Export-Csv -Path ($USING:CSV_ImportFolder + '\Output\' + $USING:CSV.Name + 'NonDuplicates.csv') -Append -ErrorVariable Non_Dup_CSV_result -ErrorAction SilentlyContinue
# }
# catch {
# <#Do this if a terminating exception happens#>
# }
# } while ($Non_Dup_CSV_Result.ErrorRecord -ne $null)
$ComparisonProgress = @{
ID = $JobID
Activity = "Done: " + $_.OfficeServer + ':' + $_.Project
PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ComparisonProgress -Completed
}
# $OutPAth = $CSV_ImportFolder + '\Output\' + $CSV.Name + ' Duplicates.csv'
Write-Progress @ServerProgress -Completed