Advertisement
jargon

7Scan.ps1

Mar 12th, 2025 (edited)
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # "7Scan.ps1" - Copyright Tim Keal alias jargon 2025 03/12
  2. # This script scans for 7zip archives and lists file names matched by similarity threshold.
  3. # This script is for Powershell 7 ("pwsh.exe" not "Powershell.exe")
  4.  
  5. # Requires 7-Zip installed
  6. $SevenZipPath = "C:\Program Files\7-Zip\7z.exe"
  7. $EarlyExit = 0
  8.  
  9. if (-not (Test-Path $SevenZipPath -PathType Leaf)) {
  10.     Write-Host "Error: 7-Zip not found." -ForegroundColor Red
  11.     Read-Host "Press Enter to exit"
  12.     exit 1
  13. }
  14.  
  15. function Convert-UpperCaseSeries {
  16.     param (
  17.         [string]$inputString
  18.     )
  19.  
  20.     if ($inputString.Length -gt 1) {
  21.         return $inputString.Substring(0,1) + $inputString.Substring(1).ToLower()
  22.     }
  23.    
  24.     return $inputString
  25. }
  26.  
  27. # Function to calculate Levenshtein Distance as a percentile
  28. function Get-LevenshteinSimilarity {
  29.     param ([string]$source, [string]$target)
  30.    
  31.     $source = $source.ToLower()
  32.     $target = $target.ToLower()
  33.    
  34.     $sourceLength = $source.Length
  35.     $targetLength = $target.Length
  36.    
  37.     if ($sourceLength -eq 0 -and $targetLength -eq 0) { return 100.000000 }
  38.    
  39.     $distanceMatrix = @()
  40.     for ($i = 0; $i -le $sourceLength; $i++) {
  41.         $distanceMatrix += ,(@(0) * ($targetLength + 1))  
  42.     }
  43.    
  44.     for ($i = 0; $i -le $sourceLength; $i++) { $distanceMatrix[$i][0] = $i }
  45.     for ($j = 0; $j -le $targetLength; $j++) { $distanceMatrix[0][$j] = $j }
  46.    
  47.     for ($i = 1; $i -le $sourceLength; $i++) {
  48.         for ($j = 1; $j -le $targetLength; $j++) {
  49.             $cost = if ($source[$i - 1] -eq $target[$j - 1]) { 0 } else { 1 }
  50.            
  51.             $delete = $distanceMatrix[$i - 1][$j] + 1
  52.             $insert = $distanceMatrix[$i][$j - 1] + 1
  53.             $substitute = $distanceMatrix[$i - 1][$j - 1] + $cost
  54.            
  55.             $distanceMatrix[$i][$j] = [math]::Min([math]::Min($delete, $insert), $substitute)
  56.         }
  57.     }
  58.    
  59.     $distance = $distanceMatrix[$sourceLength][$targetLength]
  60.     $maxLength = [math]::Max($sourceLength, $targetLength)
  61.     $similarity = ((1 - ($distance / $maxLength)) * 100)
  62.     return [math]::Round($similarity, 6)
  63. }
  64.  
  65. # Function to scan inside .7z files that meet archive similarity criteria
  66. function Scan-7zArchives {
  67.     param (
  68.         [string]$searchTerm,
  69.         [string]$path = "./",
  70.         [double]$threshold = 70,
  71.         [string]$archiveTestString = "",
  72.         [double]$archiveThreshold = 70,
  73.         [string]$outName = ""
  74.     )
  75.    
  76.     $basePath = Resolve-Path -Path $path
  77.     Write-Host "Scanning: $basePath" -ForegroundColor Yellow
  78.    
  79.     $archives = Get-ChildItem -Path $basePath -Filter "*.7z" -Recurse -File | ForEach-Object {
  80.    
  81.         # Check if a key is pressed
  82.         if ([System.Console]::KeyAvailable) {
  83.             $key = [System.Console]::ReadKey($true)  # Read the key without displaying it
  84.            
  85.             if ($key.Key -eq "Escape") {
  86.                 Write-Host "Escape key pressed. Exiting file crawl." -ForegroundColor Red
  87.                 $EarlyExit = 1
  88.             }  
  89.         }
  90.            
  91.         if ($_.FullName -notmatch '.*\.(git|vs).*') {
  92.             Write-Host "Crawling: $($_.FullName)" -ForegroundColor Yellow
  93.             $_
  94.         }
  95.        
  96.         if ($EarlyExit -eq 1)
  97.         {
  98.             break
  99.         }
  100.     }
  101.    
  102.     if ($EarlyExit -eq 1)
  103.     {
  104.         return
  105.     }
  106.    
  107.     $outputFile = "Results\$outName.txt"
  108.     $hits = @()
  109.  
  110.     foreach ($archive in $archives) {
  111.  
  112.         # Check if a key is pressed
  113.         if ([System.Console]::KeyAvailable) {
  114.             $key = [System.Console]::ReadKey($true)  # Read the key without displaying it
  115.  
  116.             if ($key.Key -eq "Escape") {
  117.                 $EarlyExit = 1
  118.             }
  119.         }
  120.  
  121.         if($EarlyExit -eq 1)
  122.         {
  123.             Write-Host "Escape key pressed. Exiting..." -ForegroundColor Red
  124.             return
  125.         }
  126.  
  127.         $archiveNameOnly = [System.IO.Path]::GetFileName($archive.Name)
  128.        
  129.         $pattern = '\b\d{4}-\d{4}-\d{4}(?:-\d{4})?\b'
  130.         if ($archiveNameOnly -match $pattern) {
  131.             $timestamp = $matches[0]
  132.         } else {
  133.             $timestamp = "NO_TIMESTAMP"
  134.         }
  135.        
  136.         $pattern = "^(.*) " + [regex]::Escape($timestamp)
  137.         if ($archiveNameOnly -cmatch $pattern) {
  138.             $project = $matches[1]
  139.         } else {
  140.             $project = "UNKNOWN_PROJECT"
  141.         }
  142.        
  143.         $cleanedName = $archiveNameOnly -replace $pattern, "" -replace "\s{2,}", " " -replace "^\s+|\s+$", ""
  144.        
  145.         $pattern = '.*\((.*?)\).*'
  146.         if ($cleanedName -cmatch $pattern) {
  147.             $cleanedName = $matches[1]
  148.         }
  149.        
  150.         $patterns = @(
  151.             '(?-i)\bAUTOMATIC BACKUP\b',
  152.             '(?-i)\b[A-Z]{3,}|OK\b',
  153.             ' \+ ',
  154.             '^\+',
  155.             '\+$',
  156.             ' \+ ',
  157.             '\+'
  158.         )
  159.        
  160.         foreach ($p in $patterns) {
  161.             $cleanedName = $cleanedName -replace $p, " " -replace "\s{2,}", " " -replace "^\s+|\s+$", ""
  162.         }
  163.        
  164.         $pattern = '([A-Z]+)'
  165.         $cleanedName = $cleanedName -replace $pattern, { Convert-UpperCaseSeries $_.Value }
  166.        
  167.         $cleanedName = $cleanedName.Trim()
  168.        
  169.         $archiveSimilarity = Get-LevenshteinSimilarity -source $archiveTestString -target $cleanedName
  170.        
  171.         $formattedArchiveSimilarity = $archiveSimilarity.ToString("000.00000000")
  172.        
  173.         $dumpName = "$formattedArchiveSimilarity% > $project > $timestamp > $cleanedName"
  174.        
  175.         if ($EarlyExit -eq 1)
  176.         {
  177.             return
  178.         }
  179.  
  180.         if ($archiveSimilarity -lt $archiveThreshold) {
  181.             Write-Host "Skipping: $dumpName" -ForegroundColor Red
  182.             continue
  183.         }
  184.  
  185.         Write-Host "Scanning: $dumpName" -ForegroundColor Yellow
  186.        
  187.         $output = & $SevenZipPath l -ba "$($archive.FullName)" 2>$null
  188.  
  189.         foreach ($line in $output) {
  190.  
  191.             # Check if a key is pressed
  192.             if ([System.Console]::KeyAvailable) {
  193.  
  194.                 $key = [System.Console]::ReadKey($true)  # Read the key without displaying it
  195.    
  196.                 if ($key.Key -eq "Escape") {
  197.                     $EarlyExit = 1
  198.                 }
  199.             }
  200.            
  201.             if($EarlyExit -eq 1) { break }
  202.            
  203.             $fileNameOnly = $line -replace '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\s+\S+\s+\d+\s+', ''
  204.            
  205.             if ($fileNameOnly -match '^\s*$' -or $line -match 'D\.\.\.\.') { continue }
  206.            
  207.             if ( $fileNameOnly -match '^(.*(\\|\/)){0,}\.(git|vs)((\\|\/).*){0,}$' ) { continue; }
  208.                        
  209.             if ($fileNameOnly -match '([^\\\/]+)$') {
  210.                 $fileNameOnly = $matches[1]
  211.             }
  212.            
  213.             if ($fileNameOnly -match '^.*\.([^\.]+)$') {
  214.                 $fileExtOnly = $matches[1]
  215.             }
  216.  
  217.             if ($fileNameOnly -match '^(.*)\.[^\.]+$') {
  218.                 $fileNameOnly = $matches[1]
  219.             }
  220.  
  221.             if ($fileExtOnly -notmatch "^(?:$searchExtensions)$") { continue; }
  222.            
  223.             Write-Host "Checking: $fileExtOnly > $fileNameOnly" -ForegroundColor Yellow
  224.             $fileNameOnly = [System.IO.Path]::GetFileName($fileNameOnly)
  225.  
  226.             $similarity = Get-LevenshteinSimilarity -source $searchTerm -target $fileNameOnly
  227.             $formattedSimilarity = $similarity.ToString("000.00000000")
  228.            
  229.             if ($similarity -ge $threshold) {
  230.                 $matchString = "$formattedSimilarity% > $archiveNameOnly > $fileExtOnly > $fileNameOnly"
  231.                 $hits += $matchString
  232.                 Write-Host "Found $hits.Count: $matchString" -ForegroundColor Green
  233.             }
  234.         }
  235.     }
  236.    
  237.     $sortedHits = $hits | Sort-Object {
  238.         $similarity = 0
  239.         if ($_ -match '^(\d+\.\d+)%') {
  240.             $similarity = [double]$matches[1]  # Corrected: Extract similarity from the captured match
  241.         }
  242.         $similarity
  243.     } -Descending
  244.    
  245.     $hitCount = $hits.Count
  246.    
  247.     # Write to output file
  248.     Set-Content -Path $outputFile -Value "7z Archive Scan Results`r`nHits: $hitCount`r`n====================================`r`n"
  249.     Add-Content -Path $outputFile -Value ($sortedHits -join "`r`n")
  250.  
  251.     Write-Host "Scan complete. $hitCount results saved to: $outputFile" -ForegroundColor Green
  252. }
  253. $defaultsFile = "Default"
  254.  
  255. if ($defaultsFile -notmatch ' Config$')
  256. { $defaultsFile += " Config" }
  257.  
  258. $Defaults = Get-Content -Path "Config\$($defaultsFile).json" | ConvertFrom-Json
  259.  
  260. # User input defaults
  261.  
  262. $defaultsFile = Read-Host "Enter Defaults Path (default is $($Defaults.defaultsFile))"
  263.  
  264. if (-not $defaultsFile) { $defaultsFile = $Defaults.defaultsFile }
  265.  
  266. if ($defaultsFile -notmatch ' Config$') {
  267.     $defaultsFile += " Config"
  268. }
  269.  
  270. if ($Defaults.defaultsFile -ne "Default Config") {
  271.     $defaultsPath = "Config\$($defaultsFile).json"
  272.    
  273.     if (Test-Path $defaultsPath) {
  274.         $Defaults = Get-Content -Path $defaultsPath | ConvertFrom-Json
  275.     }
  276.     else {
  277.         Write-Host "Warning: Defaults file '$defaultsPath' not found. Using existing defaults."
  278.     }
  279. }
  280.  
  281. # User input handling
  282.  
  283. $searchTerm = Read-Host "Enter filename to search (default is $($Defaults.searchTerm))"
  284. if (-not $searchTerm) { $searchTerm = $Defaults.searchTerm }
  285.  
  286. $searchPath = Read-Host "Enter path to search in (default is $($Defaults.searchPath))"
  287. if (-not $searchPath) { $searchPath = $Defaults.searchPath }
  288.  
  289. $searchThreshold = Read-Host "Threshold in percent for file match (default is $($Defaults.searchThreshold))"
  290. if (-not $searchThreshold) { $searchThreshold = $Defaults.searchThreshold }
  291. $searchThreshold = [double]$searchThreshold
  292.  
  293. $archiveTestString = Read-Host "Enter test string for archive filtering (default is $($Defaults.archiveTestString))"
  294. if (-not $archiveTestString) { $archiveTestString = $Defaults.archiveTestString }
  295.  
  296. $archiveThreshold = Read-Host "Threshold in percent for archive match (default is $($Defaults.archiveThreshold))"
  297. if (-not $archiveThreshold) { $archiveThreshold = $Defaults.archiveThreshold }
  298. $archiveThreshold = [double]$archiveThreshold
  299.  
  300. $searchExtensions = Read-Host "Valid extensions separated by commas (default is $($Defaults.searchExtensions))"
  301. if (-not $searchExtensions) { $searchExtensions = $Defaults.searchExtensions }
  302. $searchExtensions = $searchExtensions -replace '\s*,\s*', '|'
  303.  
  304. $outName = Read-Host "Output name (default is $($Defaults.outName))"
  305. if (-not $outName) { $outName = $Defaults.outName }
  306.  
  307. if ($outName -notmatch ' Results$') {
  308.     $outName += " Results"
  309. }
  310.  
  311. Scan-7zArchives -searchTerm $searchTerm -path $searchPath -threshold $searchThreshold -archiveTestString $archiveTestString -archiveThreshold $archiveThreshold -outName $outName
  312.  
  313. Read-Host "Press Enter to exit"
  314.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement