FileServerDedupe/Get-ProjectFolders.ps1

241 lines
8.1 KiB
PowerShell
Raw Normal View History

2023-09-14 10:53:02 -06:00
<#
Script should:
1) get list of all Project Folders
2) Determine if there's content in these folders (exlude shortcuts, lnks, single files)
3) Flag folders with content
4) Output results to file for each server.
Each Output file should be read by the analysis script to determine locations with duplicate data.
Output File should show:
X:\CG | ProjectNumber | FullPath | filecount
#>
2023-09-14 12:06:31 -06:00
2023-09-14 13:37:32 -06:00
[CmdletBinding()]
param (
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
[Parameter(Mandatory = $false)]
[switch]
$Refresh
)
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
2023-09-14 12:06:31 -06:00
2023-09-14 10:53:02 -06:00
$PathToDB = "M:\IT\Egnyte\DuplicateFiles\WorkingRun\Dedupe.SQLite"
2023-09-14 12:06:31 -06:00
$OutputFolder = "M:\IT\Egnyte\DuplicateFiles\WorkingRun\Output"
2023-09-14 10:53:02 -06:00
$BaseDrive = 'X:\'
$ProjectRegex = '^X:\\[A-Z]{2}\W\d{2}\\\d{2}'
$OfficeList = Get-ChildItem -Path $BaseDrive -Directory -Depth 0
2023-09-14 12:08:37 -06:00
if ($Refresh) {
# Reset the DB, start fresh
$DBConnect = New-SqliteConnection -DataSource $PathToDB
$ClearQuery = 'DELETE FROM FolderData;'
Invoke-SqliteQuery -DataSource $PathToDB -Query $ClearQuery
$DBConnect.close()
}
2023-09-14 10:53:02 -06:00
$OfficeList | ForEach-Object -parallel {
$PathToDB_Copy_Main = $USING:PathToDB
$ServerProgress = @{
ID = 1
Activity = "Processing: " + $_.FullName
# PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
$XDrivePath = $_.FullName
# Write-Host "Processing " $_.FullName -ForegroundColor Green
$ProjectFolders = get-childitem $_.FullName -Directory -Depth 2 | Where-Object FullName -Match $USING:ProjectRegex
# $ProjectFolders now contains all folders down to the 3rd level of project number - for Example, 1234-567-001. We can now generate a project number for the output file.
# We need to get a file count for each "last" folder - IE, 001 in the example 1234-567-001.
2023-09-14 13:17:16 -06:00
$ProjectFolders | ForEach-Object -ThrottleLimit 5 -Parallel {
2023-09-14 10:53:02 -06:00
$PathToDB_Copy = $USING:PathToDB_Copy_Main
$JobID = Get-Random
$CurentLoopProgress = @{
ID = $JobID
Activity = "Analyzing " + $_.FullName
PercentComplete = 10
}
2023-09-14 11:31:31 -06:00
# Write-Progress @CurentLoopProgress
2023-09-14 10:53:02 -06:00
2023-09-14 12:06:31 -06:00
2023-09-14 10:53:02 -06:00
$projectNumber = ''
# thank you ChatGPT
$string = $_.FullName
$regex = "\\(\d+(-\d+)*)"
$matches = [regex]::Matches($string, $regex)
$result = ""
foreach ($match in $matches) {
$result += $match.Groups[1].Value
if ($match.Groups[2].Success) {
$result += $match.Groups[2].Value
}
# if ($match.Groups[3].Success) {
# $result += $match.Groups[3].Value
# }
}
# / Thank you Chat GPT
# $result = $result -replace ('-','')
if ($result.Length -lt 5 ) {
# $CurentLoopProgress = @{
# ID = $JobID
# Activity = $_.FullName + " Not a Project"
# }
# Write-Progress @CurentLoopProgress
}
elseif ($result.Length -ge 5) {
2023-09-15 09:08:24 -06:00
$Server = $_.FullName.Substring(1, 4) -replace (':|\\', '')
$projectnumber = $result.tostring()
2023-09-14 10:53:02 -06:00
$DBConnect = New-SqliteConnection -DataSource $PathToDB_Copy
2023-09-15 09:08:24 -06:00
$Query = "Select Project FROM FolderData where Project = '$ProjectNumber' AND Server = '$Server';"
2023-09-14 10:53:02 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB_Copy -Query $Query
2023-09-14 12:06:31 -06:00
if (($sql_result.project -eq $null)) {
2023-09-14 10:53:02 -06:00
$CurentLoopProgress = @{
ID = $JobID
Activity = "Loading Telemetry for " + $_.FullName
PercentComplete = 75
}
Write-Progress @CurentLoopProgress
$result3 = [Regex]::Matches($_.FullName, "^(.*[\\\/])") # This gets everything up the last slash, thus the "parent"
$FolderPath_Data = Get-ChildItem -Path $_.FullName -File -Depth 50 -Recurse
$folder_Parent = $result3.value
$folder_FileSize = ($FolderPath_Data | Measure -sum Length).sum / 1024 / 1024 / 1024
$folder_FileCount = ($FolderPath_Data | measure).Count
$folder_LastWrite = ($FolderPath_Data | measure LastWriteTime -Maximum).Maximum
$folder_LastAccess = ($FolderPath_Data | measure LastAccessTime -Maximum).Maximum
if ($folder_FileSize -le 0 -or $folder_FileCount -eq $null) {
#Let's ignore Null Values
}
else {
$out = [PSCustomObject]@{
2023-09-15 09:08:24 -06:00
Server = $Server
2023-09-14 12:06:31 -06:00
Project = $ProjectNumber.ToString()
2023-09-14 10:53:02 -06:00
Parent = $folder_Parent
Path = $_.FullName
FileCount = $folder_FileCount
FileSize = $folder_FileSize
FileLastWrite = $folder_LastWrite
FileLastAccess = $folder_LastAccess
} | Out-DataTable
Invoke-SQLiteBulkCopy -DataSource $PathToDB_Copy -Table "FolderData" -DataTable $out -Force
}
}
$DBConnect.Close()
}
Write-Progress @CurentLoopProgress -Completed
}
Write-Progress @ServerProgress -Completed
2023-09-14 12:06:31 -06:00
} -ThrottleLimit 5
$ServerProgress = @{
ID = 1
Activity = "Database Work"
PercentComplete = 80
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
## Kick off GenTables
$SQLFile_to_Run = $PSScriptRoot + '\GenTables.sql'
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
$SQLFile_to_Run = $PSScriptRoot + '\DuplicatePairing.sql'
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
$DBConnect = New-SqliteConnection -DataSource $PathToDB
foreach ($office in $OfficeList) {
2023-09-15 10:49:03 -06:00
$OfficeServer = $office.Name
2023-09-14 12:06:31 -06:00
2023-09-14 13:26:42 -06:00
$Query = "select * from OfficeDuplicates where Server = '$OfficeServer';"
2023-09-14 12:06:31 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ($sql_result) {
$OutFile = $OutputFolder + "\$OfficeServer Duplicates.xlsx"
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 12:06:31 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
}
# Generate MPE All Xlsx
2023-09-14 13:26:42 -06:00
$Query = "select * from OfficeDuplicates;"
2023-09-14 12:06:31 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ($sql_result) {
$OutFile = $OutputFolder + "\MPE All Duplicates.xlsx"
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 12:06:31 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 13:26:42 -06:00
# Generate Parent Non-Duplicates
$Query = "select * from NonDupParent;"
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ($sql_result) {
$OutFile = $OutputFolder + "\MPE Non-Duplicates - Parent Folders.xlsx"
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 13:26:42 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
# Generate Project Non-Duplicates
$Query = "select * from NonDupProject where Path NOT IN (select Path from NonDupParent);"
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ($sql_result) {
$OutFile = $OutputFolder + "\MPE Non-Duplicates - Project Folders.xlsx"
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 13:26:42 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 15:29:46 -06:00
## Generate Egnyte Data Inventory Sheet
$OutFile = $OutputFolder + "\MPE Data Inventory " + (Get-Date -Format FileDateTime).DateTime + ".xlsx"
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 15:29:46 -06:00
$SQLFile_to_Run = $PSScriptRoot + '\Generate Egnyte Export.sql'
$EgnyteExport = Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
if ($EgnyteExport) {
2023-09-15 11:03:39 -06:00
2023-09-14 15:29:46 -06:00
$OutFile = Export-Excel -Path $OutputFile -AutoFilter
}
2023-09-14 13:26:42 -06:00
2023-09-14 12:06:31 -06:00
$DBConnect.Close()
2023-09-15 09:08:24 -06:00
Write-Progress @ServerProgress -completed
2023-09-14 12:06:31 -06:00