Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # "7Scan.ps1" - Copyright Tim Keal alias jargon 2025 03/12
- # This script scans for 7zip archives and lists file names matched by similarity threshold.
- # This script is for Powershell 7 ("pwsh.exe" not "Powershell.exe")
- # Requires 7-Zip installed
- $SevenZipPath = "C:\Program Files\7-Zip\7z.exe"
- $EarlyExit = 0
- if (-not (Test-Path $SevenZipPath -PathType Leaf)) {
- Write-Host "Error: 7-Zip not found." -ForegroundColor Red
- Read-Host "Press Enter to exit"
- exit 1
- }
- function Convert-UpperCaseSeries {
- param (
- [string]$inputString
- )
- if ($inputString.Length -gt 1) {
- return $inputString.Substring(0,1) + $inputString.Substring(1).ToLower()
- }
- return $inputString
- }
- # Function to calculate Levenshtein Distance as a percentile
- function Get-LevenshteinSimilarity {
- param ([string]$source, [string]$target)
- $source = $source.ToLower()
- $target = $target.ToLower()
- $sourceLength = $source.Length
- $targetLength = $target.Length
- if ($sourceLength -eq 0 -and $targetLength -eq 0) { return 100.000000 }
- $distanceMatrix = @()
- for ($i = 0; $i -le $sourceLength; $i++) {
- $distanceMatrix += ,(@(0) * ($targetLength + 1))
- }
- for ($i = 0; $i -le $sourceLength; $i++) { $distanceMatrix[$i][0] = $i }
- for ($j = 0; $j -le $targetLength; $j++) { $distanceMatrix[0][$j] = $j }
- for ($i = 1; $i -le $sourceLength; $i++) {
- for ($j = 1; $j -le $targetLength; $j++) {
- $cost = if ($source[$i - 1] -eq $target[$j - 1]) { 0 } else { 1 }
- $delete = $distanceMatrix[$i - 1][$j] + 1
- $insert = $distanceMatrix[$i][$j - 1] + 1
- $substitute = $distanceMatrix[$i - 1][$j - 1] + $cost
- $distanceMatrix[$i][$j] = [math]::Min([math]::Min($delete, $insert), $substitute)
- }
- }
- $distance = $distanceMatrix[$sourceLength][$targetLength]
- $maxLength = [math]::Max($sourceLength, $targetLength)
- $similarity = ((1 - ($distance / $maxLength)) * 100)
- return [math]::Round($similarity, 6)
- }
- # Function to scan inside .7z files that meet archive similarity criteria
- function Scan-7zArchives {
- param (
- [string]$searchTerm,
- [string]$path = "./",
- [double]$threshold = 70,
- [string]$archiveTestString = "",
- [double]$archiveThreshold = 70,
- [string]$outName = ""
- )
- $basePath = Resolve-Path -Path $path
- Write-Host "Scanning: $basePath" -ForegroundColor Yellow
- $archives = Get-ChildItem -Path $basePath -Filter "*.7z" -Recurse -File | ForEach-Object {
- # Check if a key is pressed
- if ([System.Console]::KeyAvailable) {
- $key = [System.Console]::ReadKey($true) # Read the key without displaying it
- if ($key.Key -eq "Escape") {
- Write-Host "Escape key pressed. Exiting file crawl." -ForegroundColor Red
- $EarlyExit = 1
- }
- }
- if ($_.FullName -notmatch '.*\.(git|vs).*') {
- Write-Host "Crawling: $($_.FullName)" -ForegroundColor Yellow
- $_
- }
- if ($EarlyExit -eq 1)
- {
- break
- }
- }
- if ($EarlyExit -eq 1)
- {
- return
- }
- $outputFile = "Results\$outName.txt"
- $hits = @()
- foreach ($archive in $archives) {
- # Check if a key is pressed
- if ([System.Console]::KeyAvailable) {
- $key = [System.Console]::ReadKey($true) # Read the key without displaying it
- if ($key.Key -eq "Escape") {
- $EarlyExit = 1
- }
- }
- if($EarlyExit -eq 1)
- {
- Write-Host "Escape key pressed. Exiting..." -ForegroundColor Red
- return
- }
- $archiveNameOnly = [System.IO.Path]::GetFileName($archive.Name)
- $pattern = '\b\d{4}-\d{4}-\d{4}(?:-\d{4})?\b'
- if ($archiveNameOnly -match $pattern) {
- $timestamp = $matches[0]
- } else {
- $timestamp = "NO_TIMESTAMP"
- }
- $pattern = "^(.*) " + [regex]::Escape($timestamp)
- if ($archiveNameOnly -cmatch $pattern) {
- $project = $matches[1]
- } else {
- $project = "UNKNOWN_PROJECT"
- }
- $cleanedName = $archiveNameOnly -replace $pattern, "" -replace "\s{2,}", " " -replace "^\s+|\s+$", ""
- $pattern = '.*\((.*?)\).*'
- if ($cleanedName -cmatch $pattern) {
- $cleanedName = $matches[1]
- }
- $patterns = @(
- '(?-i)\bAUTOMATIC BACKUP\b',
- '(?-i)\b[A-Z]{3,}|OK\b',
- ' \+ ',
- '^\+',
- '\+$',
- ' \+ ',
- '\+'
- )
- foreach ($p in $patterns) {
- $cleanedName = $cleanedName -replace $p, " " -replace "\s{2,}", " " -replace "^\s+|\s+$", ""
- }
- $pattern = '([A-Z]+)'
- $cleanedName = $cleanedName -replace $pattern, { Convert-UpperCaseSeries $_.Value }
- $cleanedName = $cleanedName.Trim()
- $archiveSimilarity = Get-LevenshteinSimilarity -source $archiveTestString -target $cleanedName
- $formattedArchiveSimilarity = $archiveSimilarity.ToString("000.00000000")
- $dumpName = "$formattedArchiveSimilarity% > $project > $timestamp > $cleanedName"
- if ($EarlyExit -eq 1)
- {
- return
- }
- if ($archiveSimilarity -lt $archiveThreshold) {
- Write-Host "Skipping: $dumpName" -ForegroundColor Red
- continue
- }
- Write-Host "Scanning: $dumpName" -ForegroundColor Yellow
- $output = & $SevenZipPath l -ba "$($archive.FullName)" 2>$null
- foreach ($line in $output) {
- # Check if a key is pressed
- if ([System.Console]::KeyAvailable) {
- $key = [System.Console]::ReadKey($true) # Read the key without displaying it
- if ($key.Key -eq "Escape") {
- $EarlyExit = 1
- }
- }
- if($EarlyExit -eq 1) { break }
- $fileNameOnly = $line -replace '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\s+\S+\s+\d+\s+', ''
- if ($fileNameOnly -match '^\s*$' -or $line -match 'D\.\.\.\.') { continue }
- if ( $fileNameOnly -match '^(.*(\\|\/)){0,}\.(git|vs)((\\|\/).*){0,}$' ) { continue; }
- if ($fileNameOnly -match '([^\\\/]+)$') {
- $fileNameOnly = $matches[1]
- }
- if ($fileNameOnly -match '^.*\.([^\.]+)$') {
- $fileExtOnly = $matches[1]
- }
- if ($fileNameOnly -match '^(.*)\.[^\.]+$') {
- $fileNameOnly = $matches[1]
- }
- if ($fileExtOnly -notmatch "^(?:$searchExtensions)$") { continue; }
- Write-Host "Checking: $fileExtOnly > $fileNameOnly" -ForegroundColor Yellow
- $fileNameOnly = [System.IO.Path]::GetFileName($fileNameOnly)
- $similarity = Get-LevenshteinSimilarity -source $searchTerm -target $fileNameOnly
- $formattedSimilarity = $similarity.ToString("000.00000000")
- if ($similarity -ge $threshold) {
- $matchString = "$formattedSimilarity% > $archiveNameOnly > $fileExtOnly > $fileNameOnly"
- $hits += $matchString
- Write-Host "Found $hits.Count: $matchString" -ForegroundColor Green
- }
- }
- }
- $sortedHits = $hits | Sort-Object {
- $similarity = 0
- if ($_ -match '^(\d+\.\d+)%') {
- $similarity = [double]$matches[1] # Corrected: Extract similarity from the captured match
- }
- $similarity
- } -Descending
- $hitCount = $hits.Count
- # Write to output file
- Set-Content -Path $outputFile -Value "7z Archive Scan Results`r`nHits: $hitCount`r`n====================================`r`n"
- Add-Content -Path $outputFile -Value ($sortedHits -join "`r`n")
- Write-Host "Scan complete. $hitCount results saved to: $outputFile" -ForegroundColor Green
- }
- $defaultsFile = "Default"
- if ($defaultsFile -notmatch ' Config$')
- { $defaultsFile += " Config" }
- $Defaults = Get-Content -Path "Config\$($defaultsFile).json" | ConvertFrom-Json
- # User input defaults
- $defaultsFile = Read-Host "Enter Defaults Path (default is $($Defaults.defaultsFile))"
- if (-not $defaultsFile) { $defaultsFile = $Defaults.defaultsFile }
- if ($defaultsFile -notmatch ' Config$') {
- $defaultsFile += " Config"
- }
- if ($Defaults.defaultsFile -ne "Default Config") {
- $defaultsPath = "Config\$($defaultsFile).json"
- if (Test-Path $defaultsPath) {
- $Defaults = Get-Content -Path $defaultsPath | ConvertFrom-Json
- }
- else {
- Write-Host "Warning: Defaults file '$defaultsPath' not found. Using existing defaults."
- }
- }
- # User input handling
- $searchTerm = Read-Host "Enter filename to search (default is $($Defaults.searchTerm))"
- if (-not $searchTerm) { $searchTerm = $Defaults.searchTerm }
- $searchPath = Read-Host "Enter path to search in (default is $($Defaults.searchPath))"
- if (-not $searchPath) { $searchPath = $Defaults.searchPath }
- $searchThreshold = Read-Host "Threshold in percent for file match (default is $($Defaults.searchThreshold))"
- if (-not $searchThreshold) { $searchThreshold = $Defaults.searchThreshold }
- $searchThreshold = [double]$searchThreshold
- $archiveTestString = Read-Host "Enter test string for archive filtering (default is $($Defaults.archiveTestString))"
- if (-not $archiveTestString) { $archiveTestString = $Defaults.archiveTestString }
- $archiveThreshold = Read-Host "Threshold in percent for archive match (default is $($Defaults.archiveThreshold))"
- if (-not $archiveThreshold) { $archiveThreshold = $Defaults.archiveThreshold }
- $archiveThreshold = [double]$archiveThreshold
- $searchExtensions = Read-Host "Valid extensions separated by commas (default is $($Defaults.searchExtensions))"
- if (-not $searchExtensions) { $searchExtensions = $Defaults.searchExtensions }
- $searchExtensions = $searchExtensions -replace '\s*,\s*', '|'
- $outName = Read-Host "Output name (default is $($Defaults.outName))"
- if (-not $outName) { $outName = $Defaults.outName }
- if ($outName -notmatch ' Results$') {
- $outName += " Results"
- }
- Scan-7zArchives -searchTerm $searchTerm -path $searchPath -threshold $searchThreshold -archiveTestString $archiveTestString -archiveThreshold $archiveThreshold -outName $outName
- Read-Host "Press Enter to exit"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement