Advertisement
Paul_Pedant

Sample Awk pre-processor for string matching.

Jun 8th, 2020
378
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 2.42 KB | None | 0 0
  1. #! /bin/bash
  2.  
  3. HERE=~/"SandBox/Similar"
  4. #.. Mount this first.
  5. MUSIC="/media/paul/Windows/40_Music"
  6.  
  7. ALL="./Music.all"
  8. TRACK="./Track.all"
  9. RANK="./Track.rank"
  10. UNIQ="./Track.uniq"
  11.  
  12. function Tracks {
  13.  
  14. AWK='
  15. BEGIN {
  16.     stderr = "cat 1>&2"; FS = "/";
  17.     reBox = "[[][^]]*[]]";
  18.     reSfx = "[.](WMA|wma|WAV|wav|MP3|mp3)$";
  19. }
  20. function Prune (tx) {
  21.     #.. Early return for unhelpful filenames.
  22.     #.. Like: AlbumArt_{6FE0559F-2C09-4AF7-8774-2912DC3CEDD7}_Large.jpg
  23.     if (tx ~ /^AlbumArt.*Large.jpg$/) { ++CC["Delete Art-Large.jpg"]; return; }
  24.     if (tx ~ /^AlbumArt.*Small.jpg$/) { ++CC["Delete Art-Small.jpg"]; return; }
  25.     #.. Like: desktop.ini.
  26.     if (tx !~ reSfx) { ++CC["Delete " tx]; return; }
  27.     #.. Like: 08 Track 8.mp3
  28.     if (tx ~ "^[0-9]*[ ]?Track [1-9][0-9]?") {
  29.         ++CC["Delete unlabelled track"]; return;
  30.     }
  31.     #.. Remove generic texts before printing.
  32.     #.. Like: 01 Love Minus Zero-No Limit.mp3
  33.     if (sub (/^[0-9]+[ ]*/, "", tx)) ++CC["Omit track prefix"];
  34.     #.. Like: Poetry in Motion>>.mp3
  35.     if (sub (/[.][Mm][Pp]3$/, "", tx))    ++CC["Omit .mp3 suffix"];
  36.     if (sub (/[.][Ww][Mm][Aa]$/, "", tx)) ++CC["Omit .wma suffix"];
  37.     if (sub (/[.][Ww][Aa][Vv]$/, "", tx)) ++CC["Omit .wav suffix"];
  38.     #.. Like: [Alternate Take]
  39.     while (match (tx, reBox)) {
  40.         ++CC["Omit " substr (tx, RSTART, RLENGTH)];
  41.         tx = substr (tx, 1, RSTART-1) substr (tx, RSTART+RLENGTH);
  42.     }
  43.     print tx;
  44. }
  45. #.. Only consider filename -- last field.
  46. { Prune( $(NF)); }
  47.  
  48. function End (Local, j, K) {
  49.     asorti (CC, K);
  50.     for (j = 1; j in K; j++)
  51.         printf ("%8d  %s\n", CC[K[j]], K[j]) | stderr;
  52. }
  53. END { End( ); }
  54. '
  55.     awk -f <( printf '%s\n' "${AWK}" )
  56. }
  57.  
  58. #### Script Body Starts Here.
  59.  
  60.     #.. Compare the .mp3 tracks against each other.
  61.  
  62.     [ x ] && {
  63.         echo 1>&2 ".... Extracting MP3_Tree"
  64.         ( cd "${MUSIC}/MP3_Tree" && find * -type f ) > "${ALL}"
  65.         Tracks < "${ALL}" > "${TRACK}"
  66.         echo 1>&2 ".... One-file comparison"
  67.         time ./Sim -f -b -t 0.75 -p -w < "${TRACK}" > "${RANK}"
  68.         uniq -c < "${RANK}" > "${UNIQ}"
  69.     }
  70.  
  71.     #.. Compare all the .wma tracks against the .mp3 ones.
  72.  
  73.     [ x ] && {
  74.         echo 1>&2 ".... Extracting CD_RIPS"
  75.         ( cd "${MUSIC}/CD_RIPS"  && find * -type f ) | Tracks > "./WMA.track"
  76.         echo 1>&2 ".... Extracting MP3_Tree"
  77.         ( cd "${MUSIC}/MP3_Tree" && find * -type f ) | Tracks > "./MP3.track"
  78.         echo 1>&2 ".... Two-file comparison"
  79.         time ./Sim -p -f -b -t 0.75 "./WMA.track" "./MP3.track" > "./Pair.rank"
  80.         uniq -c < "./Pair.rank" > "./Pair.uniq"
  81.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement