235 lines
7.9 KiB
PowerShell
235 lines
7.9 KiB
PowerShell
<#
|
|
Script should:
|
|
1) get list of all Project Folders
|
|
2) Determine if there's content in these folders (exlude shortcuts, lnks, single files)
|
|
3) Flag folders with content
|
|
4) Output results to file for each server.
|
|
|
|
Each Output file should be read by the analysis script to determine locations with duplicate data.
|
|
|
|
Output File should show:
|
|
X:\CG | ProjectNumber | FullPath | filecount
|
|
|
|
#>
|
|
|
|
[CmdletBinding()]
|
|
param (
|
|
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
|
|
[Parameter(Mandatory = $false)]
|
|
[switch]
|
|
$Refresh
|
|
)
|
|
|
|
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
|
|
|
|
|
|
$PathToDB = "M:\IT\Egnyte\DuplicateFiles\WorkingRun\Dedupe.SQLite"
|
|
$OutputFolder = "M:\IT\Egnyte\DuplicateFiles\WorkingRun\Output"
|
|
$BaseDrive = 'X:\'
|
|
$ProjectRegex = '^X:\\[A-Z]{2}\W\d{2}\\\d{2}'
|
|
$OfficeList = Get-ChildItem -Path $BaseDrive -Directory -Depth 0
|
|
|
|
if ($Refresh) {
|
|
# Reset the DB, start fresh
|
|
$DBConnect = New-SqliteConnection -DataSource $PathToDB
|
|
$ClearQuery = 'DELETE FROM FolderData;'
|
|
Invoke-SqliteQuery -DataSource $PathToDB -Query $ClearQuery
|
|
|
|
$DBConnect.close()
|
|
}
|
|
|
|
|
|
|
|
|
|
$OfficeList | ForEach-Object -parallel {
|
|
$PathToDB_Copy_Main = $USING:PathToDB
|
|
$ServerProgress = @{
|
|
ID = 1
|
|
Activity = "Processing: " + $_.FullName
|
|
# PercentComplete = 100
|
|
# CurrentOperation = $_.Project
|
|
}
|
|
Write-Progress @ServerProgress
|
|
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
|
|
|
|
$XDrivePath = $_.FullName
|
|
# Write-Host "Processing " $_.FullName -ForegroundColor Green
|
|
$ProjectFolders = get-childitem $_.FullName -Directory -Depth 2 | Where-Object FullName -Match $USING:ProjectRegex
|
|
|
|
# $ProjectFolders now contains all folders down to the 3rd level of project number - for Example, 1234-567-001. We can now generate a project number for the output file.
|
|
# We need to get a file count for each "last" folder - IE, 001 in the example 1234-567-001.
|
|
|
|
$ProjectFolders | ForEach-Object -ThrottleLimit 5 -Parallel {
|
|
$PathToDB_Copy = $USING:PathToDB_Copy_Main
|
|
|
|
$JobID = Get-Random
|
|
$CurentLoopProgress = @{
|
|
ID = $JobID
|
|
Activity = "Analyzing " + $_.FullName
|
|
PercentComplete = 10
|
|
}
|
|
|
|
# Write-Progress @CurentLoopProgress
|
|
|
|
|
|
$projectNumber = ''
|
|
# thank you ChatGPT
|
|
$string = $_.FullName
|
|
$regex = "\\(\d+(-\d+)*)"
|
|
$matches = [regex]::Matches($string, $regex)
|
|
$result = ""
|
|
foreach ($match in $matches) {
|
|
$result += $match.Groups[1].Value
|
|
if ($match.Groups[2].Success) {
|
|
$result += $match.Groups[2].Value
|
|
}
|
|
# if ($match.Groups[3].Success) {
|
|
# $result += $match.Groups[3].Value
|
|
# }
|
|
}
|
|
# / Thank you Chat GPT
|
|
# $result = $result -replace ('-','')
|
|
|
|
|
|
if ($result.Length -lt 5 ) {
|
|
# $CurentLoopProgress = @{
|
|
# ID = $JobID
|
|
# Activity = $_.FullName + " Not a Project"
|
|
# }
|
|
|
|
# Write-Progress @CurentLoopProgress
|
|
}
|
|
elseif ($result.Length -ge 5) {
|
|
$Server = $_.FullName.Substring(1, 4) -replace (':|\\', '')
|
|
$projectnumber = $result.tostring()
|
|
$DBConnect = New-SqliteConnection -DataSource $PathToDB_Copy
|
|
$Query = "Select Project FROM FolderData where Project = '$ProjectNumber' AND Server = '$Server';"
|
|
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB_Copy -Query $Query
|
|
|
|
if (($sql_result.project -eq $null)) {
|
|
$CurentLoopProgress = @{
|
|
ID = $JobID
|
|
Activity = "Loading Telemetry for " + $_.FullName
|
|
PercentComplete = 75
|
|
}
|
|
|
|
Write-Progress @CurentLoopProgress
|
|
|
|
|
|
$result3 = [Regex]::Matches($_.FullName, "^(.*[\\\/])") # This gets everything up the last slash, thus the "parent"
|
|
$FolderPath_Data = Get-ChildItem -Path $_.FullName -File -Depth 50 -Recurse
|
|
$folder_Parent = $result3.value
|
|
$folder_FileSize = ($FolderPath_Data | Measure -sum Length).sum / 1024 / 1024 / 1024
|
|
$folder_FileCount = ($FolderPath_Data | measure).Count
|
|
$folder_LastWrite = ($FolderPath_Data | measure LastWriteTime -Maximum).Maximum
|
|
$folder_LastAccess = ($FolderPath_Data | measure LastAccessTime -Maximum).Maximum
|
|
|
|
if ($folder_FileSize -le 0 -or $folder_FileCount -eq $null) {
|
|
|
|
#Let's ignore Null Values
|
|
}
|
|
else {
|
|
|
|
$out = [PSCustomObject]@{
|
|
Server = $Server
|
|
Project = $ProjectNumber.ToString()
|
|
Parent = $folder_Parent
|
|
Path = $_.FullName
|
|
FileCount = $folder_FileCount
|
|
FileSize = $folder_FileSize
|
|
FileLastWrite = $folder_LastWrite
|
|
FileLastAccess = $folder_LastAccess
|
|
} | Out-DataTable
|
|
|
|
Invoke-SQLiteBulkCopy -DataSource $PathToDB_Copy -Table "FolderData" -DataTable $out -Force
|
|
|
|
}
|
|
}
|
|
$DBConnect.Close()
|
|
}
|
|
Write-Progress @CurentLoopProgress -Completed
|
|
}
|
|
Write-Progress @ServerProgress -Completed
|
|
} -ThrottleLimit 5
|
|
|
|
$ServerProgress = @{
|
|
ID = 1
|
|
Activity = "Database Work"
|
|
PercentComplete = 80
|
|
# CurrentOperation = $_.Project
|
|
}
|
|
Write-Progress @ServerProgress
|
|
|
|
|
|
## Kick off GenTables
|
|
|
|
$SQLFile_to_Run = $PSScriptRoot + '\GenTables.sql'
|
|
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
|
|
$SQLFile_to_Run = $PSScriptRoot + '\DuplicatePairing.sql'
|
|
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
|
|
|
|
$DBConnect = New-SqliteConnection -DataSource $PathToDB
|
|
|
|
foreach ($office in $OfficeList) {
|
|
$OfficeServer = $office.Substring(1, 4) -replace (':|\\', '')
|
|
|
|
$Query = "select * from OfficeDuplicates where Server = '$OfficeServer';"
|
|
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
|
|
|
|
if ($sql_result) {
|
|
$OutFile = $OutputFolder + "\$OfficeServer Duplicates.xlsx"
|
|
$sql_result | Export-Excel -Path $OutFile -AutoFilter
|
|
|
|
}
|
|
}
|
|
# Generate MPE All Xlsx
|
|
$Query = "select * from OfficeDuplicates;"
|
|
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
|
|
|
|
if ($sql_result) {
|
|
$OutFile = $OutputFolder + "\MPE All Duplicates.xlsx"
|
|
$sql_result | Export-Excel -Path $OutFile -AutoFilter
|
|
|
|
}
|
|
|
|
# Generate Parent Non-Duplicates
|
|
|
|
$Query = "select * from NonDupParent;"
|
|
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
|
|
|
|
if ($sql_result) {
|
|
$OutFile = $OutputFolder + "\MPE Non-Duplicates - Parent Folders.xlsx"
|
|
$sql_result | Export-Excel -Path $OutFile -AutoFilter
|
|
|
|
}
|
|
|
|
|
|
# Generate Project Non-Duplicates
|
|
|
|
$Query = "select * from NonDupProject where Path NOT IN (select Path from NonDupParent);"
|
|
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
|
|
|
|
if ($sql_result) {
|
|
$OutFile = $OutputFolder + "\MPE Non-Duplicates - Project Folders.xlsx"
|
|
$sql_result | Export-Excel -Path $OutFile -AutoFilter
|
|
|
|
}
|
|
|
|
## Generate Egnyte Data Inventory Sheet
|
|
|
|
$OutFile = $OutputFolder + "\MPE Data Inventory " + (Get-Date -Format FileDateTime).DateTime + ".xlsx"
|
|
$SQLFile_to_Run = $PSScriptRoot + '\Generate Egnyte Export.sql'
|
|
$EgnyteExport = Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
|
|
|
|
if ($EgnyteExport) {
|
|
$OutFile = Export-Excel -Path $OutputFile -AutoFilter
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
$DBConnect.Close()
|
|
Write-Progress @ServerProgress -completed
|
|
|