FileServerDedupe/Get-ProjectFolders.ps1

312 lines
11 KiB
PowerShell
Raw Permalink Normal View History

2023-09-14 10:53:02 -06:00
<#
Script should:
1) get list of all Project Folders
2) Determine if there's content in these folders (exlude shortcuts, lnks, single files)
3) Flag folders with content
4) Output results to file for each server.
Each Output file should be read by the analysis script to determine locations with duplicate data.
Output File should show:
X:\CG | ProjectNumber | FullPath | filecount
#>
2023-09-14 12:06:31 -06:00
2023-09-14 13:37:32 -06:00
[CmdletBinding()]
param (
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
[Parameter(Mandatory = $false)]
[switch]
$Refresh
)
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
2023-09-14 12:06:31 -06:00
2023-09-27 14:20:31 -06:00
$Path_To_Working_copy = "M:\00 - Egnyte PS Data Inventory\Output\MPE Data Inventory - Working.xlsx"
$PAth_To_Migration_Status = "M:\00 - Egnyte PS Data Inventory\Migration_status\Migration_Health_stat_MPE.xlsx"
2023-09-14 10:53:02 -06:00
$PathToDB = "M:\IT\Egnyte\DuplicateFiles\WorkingRun\Dedupe.SQLite"
2023-09-14 12:06:31 -06:00
$OutputFolder = "M:\IT\Egnyte\DuplicateFiles\WorkingRun\Output"
2023-09-14 10:53:02 -06:00
$BaseDrive = 'X:\'
2023-10-04 14:56:27 -06:00
$ProjectRegex = '^X:\\[A-Z]{2}\W[a-zA-Z0-9 ]*\\'
2023-09-14 10:53:02 -06:00
$OfficeList = Get-ChildItem -Path $BaseDrive -Directory -Depth 0
2023-09-14 12:08:37 -06:00
if ($Refresh) {
# Reset the DB, start fresh
$DBConnect = New-SqliteConnection -DataSource $PathToDB
$ClearQuery = 'DELETE FROM FolderData;'
Invoke-SqliteQuery -DataSource $PathToDB -Query $ClearQuery
$DBConnect.close()
}
2023-09-14 10:53:02 -06:00
$OfficeList | ForEach-Object -parallel {
$PathToDB_Copy_Main = $USING:PathToDB
$ServerProgress = @{
ID = 1
Activity = "Processing: " + $_.FullName
# PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
$XDrivePath = $_.FullName
# Write-Host "Processing " $_.FullName -ForegroundColor Green
$ProjectFolders = get-childitem $_.FullName -Directory -Depth 1 | Where-Object FullName -Match $USING:ProjectRegex
2023-09-14 10:53:02 -06:00
# $ProjectFolders now contains all folders down to the 3rd level of project number - for Example, 1234-567-001. We can now generate a project number for the output file.
# We need to get a file count for each "last" folder - IE, 001 in the example 1234-567-001.
2023-09-29 14:01:36 -06:00
$ProjectFolders | ForEach-Object -ThrottleLimit 8 -Parallel {
2023-09-14 10:53:02 -06:00
$PathToDB_Copy = $USING:PathToDB_Copy_Main
$JobID = Get-Random
$CurentLoopProgress = @{
ID = $JobID
Activity = "Analyzing " + $_.FullName
PercentComplete = 10
}
2023-09-14 11:31:31 -06:00
# Write-Progress @CurentLoopProgress
2023-09-14 10:53:02 -06:00
2023-09-14 12:06:31 -06:00
2023-10-02 16:17:35 -06:00
2023-09-14 10:53:02 -06:00
# thank you ChatGPT
$string = $_.FullName
2023-10-02 16:17:35 -06:00
$RefreshDays = -15 - (Get-Random -Maximum 5)
$Server = $_.FullName.Substring(1, 4) -replace (':|\\', '')
$Path = $_.FullName.Replace("'", "`'") #SQL Formatted Path
$Path = $Path.Replace("X:\", "\\mpe.ca\datadrive\")
$DBConnect = New-SqliteConnection -DataSource $PathToDB_Copy
$Query = "Select fld_LastRefresh FROM FolderData where Server = '$Server' AND Path = `"$Path`";"
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB_Copy -Query $Query
if (($sql_result.fld_LastRefresh -le (Get-Date).AddDays($RefreshDays).ToShortDateString())) {
$CurentLoopProgress = @{
ID = $JobID
Activity = "Loading Telemetry for " + $_.FullName
PercentComplete = 75
2023-09-14 10:53:02 -06:00
}
2023-10-02 16:17:35 -06:00
Write-Progress @CurentLoopProgress
2023-10-02 16:08:58 -06:00
2023-10-02 16:17:35 -06:00
$result3 = [Regex]::Matches($_.FullName, "^(.*[\\\/])") # This gets everything up the last slash, thus the "parent"
$FolderPath_Data = Get-ChildItem -Path $_.FullName -File -Depth 50 -Recurse
$folder_Parent = $result3.value.replace("'", "`'") #SQL Formatting
$folder_Parent = $folder_Parent.Replace("X:\", "\\mpe.ca\datadrive\")
$folder_FileSize = ($FolderPath_Data | Measure -sum Length).sum / 1024 / 1024 / 1024
$folder_FileCount = ($FolderPath_Data | measure).Count
$folder_LastWrite = ($FolderPath_Data | measure LastWriteTime -Maximum).Maximum
$folder_LastAccess = ($FolderPath_Data | measure LastAccessTime -Maximum).Maximum
2023-09-14 10:53:02 -06:00
2023-10-02 16:17:35 -06:00
if ($folder_FileSize -le 0 -or $folder_FileCount -eq 0) {
2023-09-14 10:53:02 -06:00
2023-10-02 16:17:35 -06:00
#Let's ignore Null Values
}
else {
2023-09-14 10:53:02 -06:00
2023-10-02 16:17:35 -06:00
$out = [PSCustomObject]@{
Server = $Server
Parent = $folder_Parent
Path = $Path
FileCount = $folder_FileCount
FileSize = $folder_FileSize
FileLastWrite = $folder_LastWrite
FileLastAccess = $folder_LastAccess
Status = $null # added to support Migration Status
fld_MigrationID = $null
fld_LastRefresh = (Get-Date).ToShortDateString()
} | Out-DataTable
2023-09-14 10:53:02 -06:00
2023-10-02 16:17:35 -06:00
Invoke-SQLiteBulkCopy -DataSource $PathToDB_Copy -Table "FolderData" -DataTable $out -Force
2023-09-14 10:53:02 -06:00
}
}
2023-10-02 16:17:35 -06:00
$DBConnect.Close()
2023-09-14 10:53:02 -06:00
Write-Progress @CurentLoopProgress -Completed
}
Write-Progress @ServerProgress -Completed
2023-09-29 14:01:36 -06:00
} -ThrottleLimit 3
2023-09-14 12:06:31 -06:00
$ServerProgress = @{
ID = 1
Activity = "Database Work"
PercentComplete = 80
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
2023-09-27 14:20:31 -06:00
<#
TODO Grab data from Egnyte Working Copy to re-incorporate into our DB. This will allow us to mark data as "in progress", and thus filter it out from future data inventory updates.
TODO Need to update Working Copy sheet to contain Project Number and Server for unique keys, and add antoher column for Egnyte Friendly Server Names to match the Migration Server names.
IE,
server,path,path2,etc,status,OriginalServer=RD,OriginalProject=ProjectNumber
#>
2023-10-04 14:56:27 -06:00
$DBConnect = New-SqliteConnection -DataSource $PathToDB
2023-09-27 16:14:42 -06:00
<# Get Migration Status
CREATE TABLE "MigrationStatus" (
"MigrationID" TEXT NOT NULL UNIQUE,
"MigrationName" TEXT,
"MigrationFolderSource" TEXT,
"MigrationFolderDestination" TEXT,
"MigrationStatus" INTEGER,
PRIMARY KEY("MigrationID")
);
#>
$MigrationStatus_import = Import-Excel -Path $PAth_To_Migration_Status -WorksheetName "MPE Migration Tracker" -StartRow 2
foreach ($status in $MigrationStatus_import) {
$SQLSourcePath = $status.'Source Path'.replace("'", "`'") #SQL Formatting
2023-09-27 16:59:49 -06:00
if ($SQLSourcePath -notmatch '\\$') {
$SQLSourcePath += '\'
}
2023-09-27 16:14:42 -06:00
$SQLDestPath = $status.'Destination Path'.replace("'", "`'") #SQL Formatting
if ($SQLDestPath -notmatch '\/$') {
$SQLDestPath += '/'
2023-09-27 16:59:49 -06:00
}
2023-09-27 16:14:42 -06:00
$SQLMigrationID = $status.'Migration ID'
$SQLMigrationName = $status.'Migration Name'
$SQLMigrationStatus = $status.'Current State'
$Query = "Select * from MigrationStatus WHERE MigrationID = `'$SQLMigrationID`'"
$Test = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ($test) {
$Query = "UPDATE MigrationStatus SET MigrationStatus = '$SQLMigrationStatus' WHERE MigrationID = '$SQLMigrationID'"
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
}
else {
$Query = "INSERT INTO MigrationStatus VALUES ('$SQLMigrationID','$SQLMigrationName',`'$SQLSourcePath`',`'$SQLDestPath`','$SQLMigrationStatus','')"
2023-09-27 16:14:42 -06:00
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
}
}
# Update DB with new Data from MigrationStatus
$Query = 'update FolderData SET fld_MigrationID = (SELECT MigrationID from MigrationStatus where Parent = MigrationFolderSource);'
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-27 16:14:42 -06:00
## DB Now contains MigrationID in fld_MigrationID Column. We can now reliably track the individual folder status'
$query = "update FolderData set Status = (Select MigrationStatus from MigrationStatus where fld_MigrationID = MigrationID);"
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-27 14:20:31 -06:00
2023-09-14 12:06:31 -06:00
## Kick off GenTables
2023-10-02 16:20:36 -06:00
# $SQLFile_to_Run = $PSScriptRoot + '\GenTables.sql'
# Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
# $SQLFile_to_Run = $PSScriptRoot + '\DuplicatePairing.sql'
# Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
2023-09-14 12:06:31 -06:00
2023-10-04 14:56:27 -06:00
2023-09-14 12:06:31 -06:00
2023-10-02 16:20:36 -06:00
# foreach ($office in $OfficeList) {
# $OfficeServer = $office.Name
2023-09-14 12:06:31 -06:00
2023-10-02 16:20:36 -06:00
# $Query = "select * from OfficeDuplicates where Server = '$OfficeServer';"
# $sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-14 12:06:31 -06:00
2023-10-02 16:20:36 -06:00
# if ($sql_result) {
# $OutFile = $OutputFolder + "\$OfficeServer Duplicates.xlsx"
# Remove-Item -Path $OutFile -Force
# $sql_result | Export-Excel -Path $OutFile -AutoFilter
2023-09-14 12:06:31 -06:00
2023-10-02 16:20:36 -06:00
# }
# }
2023-09-14 12:06:31 -06:00
# Generate MPE All Xlsx
2023-10-02 16:20:36 -06:00
# $Query = "select * from OfficeDuplicates;"
# $sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-14 12:06:31 -06:00
2023-10-02 16:20:36 -06:00
# if ($sql_result) {
# $OutFile = $OutputFolder + "\MPE All Duplicates.xlsx"
# Remove-Item -Path $OutFile -Force
# $sql_result | Export-Excel -Path $OutFile -AutoFilter
2023-09-14 12:06:31 -06:00
2023-10-02 16:20:36 -06:00
# }
2023-09-14 12:06:31 -06:00
2023-09-14 13:26:42 -06:00
# Generate Parent Non-Duplicates
2023-10-02 16:20:36 -06:00
# $Query = "select * from NonDupParent;"
# $sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-14 13:26:42 -06:00
2023-10-02 16:20:36 -06:00
# if ($sql_result) {
# $OutFile = $OutputFolder + "\MPE Non-Duplicates - Parent Folders.xlsx"
# Remove-Item -Path $OutFile -Force
# $sql_result | Export-Excel -Path $OutFile -AutoFilter
2023-09-14 13:26:42 -06:00
2023-10-02 16:20:36 -06:00
# }
2023-09-14 13:26:42 -06:00
2023-10-02 16:20:36 -06:00
# # Generate Project Non-Duplicates
2023-09-14 13:26:42 -06:00
2023-10-02 16:20:36 -06:00
# $Query = "select * from NonDupProject where Path NOT IN (select Path from NonDupParent);"
# $sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-14 13:26:42 -06:00
2023-10-02 16:20:36 -06:00
# if ($sql_result) {
# $OutFile = $OutputFolder + "\MPE Non-Duplicates - Project Folders.xlsx"
# Remove-Item -Path $OutFile -Force
# $sql_result | Export-Excel -Path $OutFile -AutoFilter
2023-09-14 13:26:42 -06:00
2023-10-02 16:20:36 -06:00
# }
2023-09-14 13:26:42 -06:00
2023-09-14 15:29:46 -06:00
## Generate Egnyte Data Inventory Sheet
2023-09-15 13:04:44 -06:00
$OutFile = $OutputFolder + "\MPE Data Inventory " + (Get-Date -Format FileDate) + ".xlsx"
2023-09-15 11:27:45 -06:00
try {
Remove-Item -Path $OutFile -Force -ErrorAction SilentlyContinue
}
catch {
<#Do this if a terminating exception happens#>
}
2023-09-14 15:29:46 -06:00
$SQLFile_to_Run = $PSScriptRoot + '\Generate Egnyte Export.sql'
2023-09-15 11:27:45 -06:00
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
2023-10-03 13:15:44 -06:00
$EgnyteExport = Invoke-SqliteQuery -DataSource $PathToDB -Query "Select Server,UNCPath,EgnytePath,FileCount,FileSize,Status from DataInventory"
2023-09-14 15:29:46 -06:00
if ($EgnyteExport) {
2023-09-15 11:03:39 -06:00
2023-09-15 11:27:45 -06:00
$EgnyteExport | Export-Excel -Path $OutFile -AutoFilter
2023-09-14 15:29:46 -06:00
}
2023-10-04 14:56:27 -06:00
$OutFile = $OutputFolder + "\FULL MPE Data Inventory " + (Get-Date -Format FileDate) + ".xlsx"
try {
Remove-Item -Path $OutFile -Force -ErrorAction SilentlyContinue
}
catch {
<#Do this if a terminating exception happens#>
}
$Query = 'Select * from (
SELECT SERVER,
REPLACE(
Parent,
"X:\","\\mpe.ca\datadrive\") AS UNCPath,
REPLACE(REPLACE(REPLACE(Parent,("\\mpe.ca\datadrive\" || Server),REPLACE("/Shared/N-Data/" || Server || "_/","_/","_")),"\","/"),"_/","_") AS EgnytePath,
sum(FileCount) as FileCount, Sum(FileSize) as FileSize, Status, fld_MigrationID,MigrationName from FolderData
LEFT JOIN MigrationStatus on FolderData.fld_MigrationID = MigrationID
-- WHERE FolderData.fld_MigrationID IS NULL
GROUP by UNCPath
ORDER by UNCPath
);'
$FullEgnyteExport = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ($FullEgnyteExport) {
$FullEgnyteExport | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 15:29:46 -06:00
2023-09-14 12:06:31 -06:00
$DBConnect.Close()
2023-09-15 09:08:24 -06:00
Write-Progress @ServerProgress -completed
2023-09-14 12:06:31 -06:00