2023-09-14 10:53:02 -06:00
<#
Script should :
1 ) get list of all Project Folders
2 ) Determine if there ' s content in these folders ( exlude shortcuts , lnks , single files )
3 ) Flag folders with content
4 ) Output results to file for each server .
Each Output file should be read by the analysis script to determine locations with duplicate data .
Output File should show :
X: \ CG | ProjectNumber | FullPath | filecount
#>
2023-09-14 12:06:31 -06:00
2023-09-14 13:37:32 -06:00
[ CmdletBinding ( ) ]
param (
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
[ Parameter ( Mandatory = $false ) ]
[ switch ]
$Refresh
)
#refresh all data - causes the DB to be wiped clean and all file folders to be re-analyzed.
2023-09-14 12:06:31 -06:00
2023-09-27 14:20:31 -06:00
$Path_To_Working_copy = " M:\00 - Egnyte PS Data Inventory\Output\MPE Data Inventory - Working.xlsx "
$PAth_To_Migration_Status = " M:\00 - Egnyte PS Data Inventory\Migration_status\Migration_Health_stat_MPE.xlsx "
2023-09-14 10:53:02 -06:00
$PathToDB = " M:\IT\Egnyte\DuplicateFiles\WorkingRun\Dedupe.SQLite "
2023-09-14 12:06:31 -06:00
$OutputFolder = " M:\IT\Egnyte\DuplicateFiles\WorkingRun\Output "
2023-09-14 10:53:02 -06:00
$BaseDrive = 'X:\'
$ProjectRegex = '^X:\\[A-Z]{2}\W\d{2}\\\d{2}'
$OfficeList = Get-ChildItem -Path $BaseDrive -Directory -Depth 0
2023-09-14 12:08:37 -06:00
if ( $Refresh ) {
# Reset the DB, start fresh
$DBConnect = New-SqliteConnection -DataSource $PathToDB
$ClearQuery = 'DELETE FROM FolderData;'
Invoke-SqliteQuery -DataSource $PathToDB -Query $ClearQuery
$DBConnect . close ( )
}
2023-09-14 10:53:02 -06:00
$OfficeList | ForEach-Object -parallel {
$PathToDB_Copy_Main = $USING : PathToDB
$ServerProgress = @ {
ID = 1
Activity = " Processing: " + $_ . FullName
# PercentComplete = 100
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
#Action that will run in Parallel. Reference the current object via $PSItem and bring in outside variables with $USING:varname
$XDrivePath = $_ . FullName
# Write-Host "Processing " $_.FullName -ForegroundColor Green
$ProjectFolders = get-childitem $_ . FullName -Directory -Depth 2 | Where-Object FullName -Match $USING : ProjectRegex
# $ProjectFolders now contains all folders down to the 3rd level of project number - for Example, 1234-567-001. We can now generate a project number for the output file.
# We need to get a file count for each "last" folder - IE, 001 in the example 1234-567-001.
2023-09-14 13:17:16 -06:00
$ProjectFolders | ForEach-Object -ThrottleLimit 5 -Parallel {
2023-09-14 10:53:02 -06:00
$PathToDB_Copy = $USING : PathToDB_Copy_Main
$JobID = Get-Random
$CurentLoopProgress = @ {
ID = $JobID
Activity = " Analyzing " + $_ . FullName
PercentComplete = 10
}
2023-09-14 11:31:31 -06:00
# Write-Progress @CurentLoopProgress
2023-09-14 10:53:02 -06:00
2023-09-14 12:06:31 -06:00
2023-09-14 10:53:02 -06:00
$projectNumber = ''
# thank you ChatGPT
$string = $_ . FullName
$regex = " \\(\d+(-\d+)*) "
$matches = [ regex ] :: Matches ( $string , $regex )
$result = " "
foreach ( $match in $matches ) {
$result + = $match . Groups [ 1 ] . Value
if ( $match . Groups [ 2 ] . Success ) {
$result + = $match . Groups [ 2 ] . Value
}
# if ($match.Groups[3].Success) {
# $result += $match.Groups[3].Value
# }
}
# / Thank you Chat GPT
# $result = $result -replace ('-','')
if ( $result . Length -lt 5 ) {
# $CurentLoopProgress = @{
# ID = $JobID
# Activity = $_.FullName + " Not a Project"
# }
# Write-Progress @CurentLoopProgress
}
elseif ( $result . Length -ge 5 ) {
2023-09-29 10:49:25 -06:00
$RefreshDays = -15 - ( Get-Random -Maximum 5 )
2023-09-28 15:58:07 -06:00
2023-09-15 09:08:24 -06:00
$Server = $_ . FullName . Substring ( 1 , 4 ) -replace ( ':|\\' , '' )
2023-09-27 14:20:31 -06:00
$Path = $_ . FullName . Replace ( " ' " , " `' " ) #SQL Formatted Path
2023-09-27 16:14:42 -06:00
$Path = $Path . Replace ( " X:\ " , " \\mpe.ca\datadrive\ " )
2023-09-15 09:08:24 -06:00
$projectnumber = $result . tostring ( )
2023-09-14 10:53:02 -06:00
$DBConnect = New-SqliteConnection -DataSource $PathToDB_Copy
2023-09-28 15:58:07 -06:00
$Query = " Select Project,fld_LastRefresh FROM FolderData where Project = ' $ProjectNumber ' AND Server = ' $Server ' AND Path = `" $Path `" ; "
2023-09-14 10:53:02 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB_Copy -Query $Query
2023-09-28 15:58:07 -06:00
if ( ( $sql_result . project -eq $null ) -or ( $sql_result . fld_LastRefresh -le ( Get-Date ) . AddDays ( $RefreshDays ) . ToShortDateString ( ) ) ) {
2023-09-14 10:53:02 -06:00
$CurentLoopProgress = @ {
ID = $JobID
2023-09-28 15:58:07 -06:00
Activity = " Loading Telemetry for " + $_ . FullName
2023-09-14 10:53:02 -06:00
PercentComplete = 75
}
Write-Progress @CurentLoopProgress
2023-09-28 15:58:07 -06:00
if ( $sql_result . project -ne $null ) {
# found a record, but needs updating - delete old record from DB
$Query = " Delete from FolderData WHERE ' $ProjectNumber ' AND Server = ' $Server ' AND Path = `" $Path `" "
Invoke-SqliteQuery -DataSource $PathToDB_Copy -Query $Query
}
2023-09-14 10:53:02 -06:00
$result3 = [ Regex ] :: Matches ( $_ . FullName , " ^(.*[\\\/]) " ) # This gets everything up the last slash, thus the "parent"
$FolderPath_Data = Get-ChildItem -Path $_ . FullName -File -Depth 50 -Recurse
2023-09-27 14:20:31 -06:00
$folder_Parent = $result3 . value . replace ( " ' " , " `' " ) #SQL Formatting
2023-09-27 16:14:42 -06:00
$folder_Parent = $folder_Parent . Replace ( " X:\ " , " \\mpe.ca\datadrive\ " )
2023-09-14 10:53:02 -06:00
$folder_FileSize = ( $FolderPath_Data | Measure -sum Length ) . sum / 1024 / 1024 / 1024
$folder_FileCount = ( $FolderPath_Data | measure ) . Count
$folder_LastWrite = ( $FolderPath_Data | measure LastWriteTime -Maximum ) . Maximum
$folder_LastAccess = ( $FolderPath_Data | measure LastAccessTime -Maximum ) . Maximum
2023-09-15 13:12:12 -06:00
if ( $folder_FileSize -le 0 -or $folder_FileCount -eq 0 ) {
2023-09-14 10:53:02 -06:00
#Let's ignore Null Values
}
else {
$out = [ PSCustomObject ] @ {
2023-09-28 15:14:17 -06:00
Server = $Server
Project = $ProjectNumber . ToString ( )
Parent = $folder_Parent
Path = $Path
FileCount = $folder_FileCount
FileSize = $folder_FileSize
FileLastWrite = $folder_LastWrite
FileLastAccess = $folder_LastAccess
Status = $null # added to support Migration Status
fld_MigrationID = $null
2023-09-28 15:58:07 -06:00
fld_LastRefresh = ( Get-Date ) . ToShortDateString ( )
2023-09-14 10:53:02 -06:00
} | Out-DataTable
Invoke-SQLiteBulkCopy -DataSource $PathToDB_Copy -Table " FolderData " -DataTable $out -Force
}
}
$DBConnect . Close ( )
}
Write-Progress @CurentLoopProgress -Completed
}
Write-Progress @ServerProgress -Completed
2023-09-14 12:06:31 -06:00
} -ThrottleLimit 5
$ServerProgress = @ {
ID = 1
Activity = " Database Work "
PercentComplete = 80
# CurrentOperation = $_.Project
}
Write-Progress @ServerProgress
2023-09-27 14:20:31 -06:00
<#
TODO Grab data from Egnyte Working Copy to re-incorporate into our DB . This will allow us to mark data as " in progress " , and thus filter it out from future data inventory updates .
TODO Need to update Working Copy sheet to contain Project Number and Server for unique keys , and add antoher column for Egnyte Friendly Server Names to match the Migration Server names .
IE ,
server , path , path2 , etc , status , OriginalServer = RD , OriginalProject = ProjectNumber
#>
2023-09-27 16:14:42 -06:00
<# Get Migration Status
CREATE TABLE " MigrationStatus " (
" MigrationID " TEXT NOT NULL UNIQUE ,
" MigrationName " TEXT ,
" MigrationFolderSource " TEXT ,
" MigrationFolderDestination " TEXT ,
" MigrationStatus " INTEGER ,
PRIMARY KEY ( " MigrationID " )
) ;
#>
$MigrationStatus_import = Import-Excel -Path $PAth_To_Migration_Status -WorksheetName " MPE Migration Tracker " -StartRow 2
foreach ( $status in $MigrationStatus_import ) {
$SQLSourcePath = $status . 'Source Path' . replace ( " ' " , " `' " ) #SQL Formatting
2023-09-27 16:59:49 -06:00
if ( $SQLSourcePath -notmatch '\\$' ) {
$SQLSourcePath + = '\'
}
2023-09-27 16:14:42 -06:00
$SQLDestPath = $status . 'Destination Path' . replace ( " ' " , " `' " ) #SQL Formatting
2023-09-28 15:14:17 -06:00
if ( $SQLDestPath -notmatch '\/$' ) {
$SQLDestPath + = '/'
2023-09-27 16:59:49 -06:00
}
2023-09-27 16:14:42 -06:00
$SQLMigrationID = $status . 'Migration ID'
$SQLMigrationName = $status . 'Migration Name'
$SQLMigrationStatus = $status . 'Current State'
$Query = " Select * from MigrationStatus WHERE MigrationID = `' $SQLMigrationID `' "
$Test = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $test ) {
$Query = " UPDATE MigrationStatus SET MigrationStatus = ' $SQLMigrationStatus ' WHERE MigrationID = ' $SQLMigrationID ' "
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
}
else {
2023-09-28 15:14:17 -06:00
$Query = " INSERT INTO MigrationStatus VALUES (' $SQLMigrationID ',' $SQLMigrationName ', `' $SQLSourcePath `' , `' $SQLDestPath `' ,' $SQLMigrationStatus ','') "
2023-09-27 16:14:42 -06:00
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
}
}
2023-09-28 15:14:17 -06:00
# Update DB with new Data from MigrationStatus
$Query = 'update FolderData SET fld_MigrationID = (SELECT MigrationID from MigrationStatus where Parent = MigrationFolderSource);'
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-27 16:14:42 -06:00
2023-09-28 15:14:17 -06:00
## DB Now contains MigrationID in fld_MigrationID Column. We can now reliably track the individual folder status'
$query = " update FolderData set Status = (Select MigrationStatus from MigrationStatus where fld_MigrationID = MigrationID); "
Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
2023-09-27 14:20:31 -06:00
2023-09-14 12:06:31 -06:00
## Kick off GenTables
$SQLFile_to_Run = $PSScriptRoot + '\GenTables.sql'
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
$SQLFile_to_Run = $PSScriptRoot + '\DuplicatePairing.sql'
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
$DBConnect = New-SqliteConnection -DataSource $PathToDB
foreach ( $office in $OfficeList ) {
2023-09-15 10:49:03 -06:00
$OfficeServer = $office . Name
2023-09-14 12:06:31 -06:00
2023-09-14 13:26:42 -06:00
$Query = " select * from OfficeDuplicates where Server = ' $OfficeServer '; "
2023-09-14 12:06:31 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \ $OfficeServer Duplicates.xlsx "
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 12:06:31 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
}
# Generate MPE All Xlsx
2023-09-14 13:26:42 -06:00
$Query = " select * from OfficeDuplicates; "
2023-09-14 12:06:31 -06:00
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \MPE All Duplicates.xlsx "
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 12:06:31 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 13:26:42 -06:00
# Generate Parent Non-Duplicates
$Query = " select * from NonDupParent; "
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \MPE Non-Duplicates - Parent Folders.xlsx "
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 13:26:42 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
# Generate Project Non-Duplicates
$Query = " select * from NonDupProject where Path NOT IN (select Path from NonDupParent); "
$sql_result = Invoke-SqliteQuery -DataSource $PathToDB -Query $Query
if ( $sql_result ) {
$OutFile = $OutputFolder + " \MPE Non-Duplicates - Project Folders.xlsx "
2023-09-15 11:03:39 -06:00
Remove-Item -Path $OutFile -Force
2023-09-14 13:26:42 -06:00
$sql_result | Export-Excel -Path $OutFile -AutoFilter
}
2023-09-14 15:29:46 -06:00
## Generate Egnyte Data Inventory Sheet
2023-09-15 13:04:44 -06:00
$OutFile = $OutputFolder + " \MPE Data Inventory " + ( Get-Date -Format FileDate ) + " .xlsx "
2023-09-15 11:27:45 -06:00
try {
Remove-Item -Path $OutFile -Force -ErrorAction SilentlyContinue
}
catch {
<# Do this if a terminating exception happens #>
}
2023-09-14 15:29:46 -06:00
$SQLFile_to_Run = $PSScriptRoot + '\Generate Egnyte Export.sql'
2023-09-15 11:27:45 -06:00
Invoke-SqliteQuery -DataSource $PathToDB -InputFile $SQLFile_to_Run
2023-09-15 14:07:21 -06:00
$EgnyteExport = Invoke-SqliteQuery -DataSource $PathToDB -Query " Select Server,UNCPath,EgnytePath,FileCount,FileSize,Source from DataInventory "
2023-09-14 15:29:46 -06:00
if ( $EgnyteExport ) {
2023-09-15 11:03:39 -06:00
2023-09-15 11:27:45 -06:00
$EgnyteExport | Export-Excel -Path $OutFile -AutoFilter
2023-09-14 15:29:46 -06:00
}
2023-09-14 12:06:31 -06:00
$DBConnect . Close ( )
2023-09-15 09:08:24 -06:00
Write-Progress @ServerProgress -completed
2023-09-14 12:06:31 -06:00