2023-09-14 10:53:02 -06:00
<#
Script should :
1 ) get list of all Project Folders
2 ) Determine if there ' s content in these folders ( exlude shortcuts , lnks , single files )
3 ) Flag folders with content
4 ) Output results to file for each server .
Each Output file should be read by the analysis script to determine locations with duplicate data .
Output File should show :
X: \ CG | ProjectNumber | FullPath | filecount
#>
2023-09-14 12:06:31 -06:00
2023-09-14 13:37:32 -06:00
[ CmdletBinding ( ) ]
param (
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
[ Parameter ( Mandatory = $false ) ]
[ switch ]
$Refresh
)
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
2023-09-14 12:06:31 -06:00
2023-09-14 10:53:02 -06:00
$PathToDB = " M:\IT\Egnyte\DuplicateFiles\WorkingRun\Dedupe.SQLite "
2023-09-14 12:06:31 -06:00
$OutputFolder = " M:\IT\Egnyte\DuplicateFiles\WorkingRun\Output "
2023-09-14 10:53:02 -06:00
$BaseDrive = 'X:\'
$ProjectRegex = '^X:\\[A-Z]{2}\W\d{2}\\\d{2}'
$OfficeList = Get-ChildItem -Path $BaseDrive -Directory -Depth 0
2023-09-14 12:08:37 -06:00
if ( $Refresh ) {
# Reset the DB, start fresh
$DBConnect = New-SqliteConnection -DataSource $PathToDB
$ClearQuery = 'DELETE FROM FolderData;'
Invoke-SqliteQuery -DataSource $PathToDB -Query $ClearQuery
$DBConnect . close ( )
}
2023-09-14 10:53:02 -06:00
$OfficeList | ForEach-Object -parallel {
$PathToDB_Copy_Main = $USING : PathToDB
$ServerProgress = @ {
ID = 1
Activity = " Processing: " + $_ . FullName
# PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
$XDrivePath = $_ . FullName
# Write-Host "Processing " $_.FullName -ForegroundColor Green
$ProjectFolders = get-childitem $_ . FullName -Directory -Depth 2 | Where-Object FullName -Match $USING : ProjectRegex
# $ProjectFolders now contains all folders down to the 3rd level of project number - for Example, 1234-567-001. We can now generate a project number for the output file.
# We need to get a file count for each "last" folder - IE, 001 in the example 1234-567-001.
2023-09-14 13:17:16 -06:00
$ProjectFolders | ForEach-Object -ThrottleLimit 5 -Parallel {
2023-09-14 10:53:02 -06:00
$PathToDB_Copy = $USING : PathToDB_Copy_Main
$JobID = Get-Random
$CurentLoopProgress = @ {
ID = $JobID
Activity = " Analyzing " + $_ . FullName
PercentComplete = 10
}
2023-09-14 11:31:31 -06:00
# Write-Progress @CurentLoopProgress
2023-09-14 10:53:02 -06:00
2023-09-14 12:06:31 -06:00
2023-09-14 10:53:02 -06:00
$projectNumber = ''
# thank you ChatGPT
$string = $_ . FullName
$regex = " \\(\d+(-\d+)*) "
$matches = [ regex ] :: Matches ( $string , $regex )
$result = " "
foreach ( $match in $matches ) {
$result + = $match . Groups [ 1 ] . Value
if ( $match . Groups [ 2 ] . Success ) {
$result + = $match . Groups [ 2 ] . Value
}
# if ($match.Groups[3].Success) {
# $result += $match.Groups[3].Value
# }
}
# / Thank you Chat GPT
# $result = $result -replace ('-','')
if ( $result . Length -lt 5 ) {
# $CurentLoopProgress = @{
# ID = $JobID
# Activity = $_.FullName + " Not a Project"
# }
# Write-Progress @CurentLoopProgress
}
elseif ( $result . Length -ge 5 ) {
2023-09-15 09:08:24 -06:00
$Server = $_ . FullName . Substring ( 1 , 4 ) -replace ( ':|\\' , '' )
$projectnumber = $result . tostring ( )
2023-09-14 10:53:02 -06:00
$DBConnect = New-SqliteConnection -DataSource $PathToDB_Copy
2023-09-15 09:08:24 -06:00
$Query = " Select Project FROM FolderData where Project = ' $ProjectNumber ' AND Server = ' $Server '; "
2023-09-14 10:53:02 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB_Copy -Query $Query
2023-09-14 12:06:31 -06:00
if ( ( $sql_result . project -eq $null ) ) {
2023-09-14 10:53:02 -06:00
$CurentLoopProgress = @ {
ID = $JobID
Activity = " Loading Telemetry for " + $_ . FullName
PercentComplete = 75
}
Write-Progress @CurentLoopProgress
$result3 = [ Regex ] :: Matches ( $_ . FullName , " ^(.*[\\\/]) " ) # This gets everything up the last slash, thus the "parent"
$FolderPath_Data = Get-ChildItem -Path $_ . FullName -File -Depth 50 -Recurse
$folder_Parent = $result3 . value
$folder_FileSize = ( $FolderPath_Data | Measure -sum Length ) . sum / 1024 / 1024 / 1024
$folder_FileCount = ( $FolderPath_Data | measure ) . Count
$folder_LastWrite = ( $FolderPath_Data | measure LastWriteTime -Maximum ) . Maximum
$folder_LastAccess = ( $FolderPath_Data | measure LastAccessTime -Maximum ) . Maximum
if ( $folder_FileSize -le 0 -or $folder_FileCount -eq $null ) {
#Let's ignore Null Values
}
else {
$out = [ PSCustomObject ] @ {
2023-09-15 09:08:24 -06:00
Server = $Server
2023-09-14 12:06:31 -06:00
Project = $ProjectNumber . ToString ( )
2023-09-14 10:53:02 -06:00
Parent = $folder_Parent
Path = $_ . FullName
FileCount = $folder_FileCount
FileSize = $folder_FileSize
FileLastWrite = $folder_LastWrite
FileLastAccess = $folder_LastAccess
} | Out-DataTable
Invoke-SQLiteBulkCopy -DataSource $PathToDB_Copy -Table " FolderData " -DataTable $out -Force
}
}
$DBConnect . Close ( )
}
Write-Progress @CurentLoopProgress -Completed
}
Write-Progress @ServerProgress -Completed
2023-09-14 12:06:31 -06:00
} -ThrottleLimit 5
$ServerProgress = @ {
ID = 1
Activity = " Database Work "
PercentComplete = 80
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
## Kick off GenTables
$SQLFile_to_Run = $PSScriptRoot + '\GenTables.sql'
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
$SQLFile_to_Run = $PSScriptRoot + '\DuplicatePairing.sql'
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
$DBConnect = New-SqliteConnection -DataSource $PathToDB
foreach ( $office in $OfficeList ) {
2023-09-15 10:49:03 -06:00
$OfficeServer = $office . Name
2023-09-14 12:06:31 -06:00
2023-09-14 13:26:42 -06:00
$Query = " select * from OfficeDuplicates where Server = ' $OfficeServer '; "
2023-09-14 12:06:31 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \ $OfficeServer Duplicates.xlsx "
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
}
# Generate MPE All Xlsx
2023-09-14 13:26:42 -06:00
$Query = " select * from OfficeDuplicates; "
2023-09-14 12:06:31 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \MPE All Duplicates.xlsx "
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 13:26:42 -06:00
# Generate Parent Non-Duplicates
$Query = " select * from NonDupParent; "
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \MPE Non-Duplicates - Parent Folders.xlsx "
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
# Generate Project Non-Duplicates
$Query = " select * from NonDupProject where Path NOT IN (select Path from NonDupParent); "
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \MPE Non-Duplicates - Project Folders.xlsx "
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 15:29:46 -06:00
## Generate Egnyte Data Inventory Sheet
$OutFile = $OutputFolder + " \MPE Data Inventory " + ( Get-Date -Format FileDateTime ) . DateTime + " .xlsx "
$SQLFile_to_Run = $PSScriptRoot + '\Generate Egnyte Export.sql'
$EgnyteExport = Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
if ( $EgnyteExport ) {
$OutFile = Export-Excel -Path $OutputFile -AutoFilter
}
2023-09-14 13:26:42 -06:00
2023-09-14 12:06:31 -06:00
$DBConnect . Close ( )
2023-09-15 09:08:24 -06:00
Write-Progress @ServerProgress -completed
2023-09-14 12:06:31 -06:00