Advertisement
FlyFar

passat.py

Jun 6th, 2023
695
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.48 KB | Cybersecurity | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import binascii
  4. import sys
  5. import re
  6. import argparse
  7. import json
  8. from fuzzywuzzy import process
  9. from collections import Counter
  10.  
  11. VERSION = "1.7"
  12.  
  13. SYMBOLS = "~`!@#$%^&*()_\-+=}\]{[|\\\"':;?/>.<, "
  14.  
  15. stats_regex = {
  16.     "Contains: 123": f"123",
  17.     "Contains: 1234": f"1234",
  18.     "Contains: space": " ",
  19.     "Has: All lowercase": "^[a-z]+$",
  20.     "Has: All num": "^[\d]+$",
  21.     "Has: All uppercase": "^[A-Z]+$",
  22.     "Has: First capital, last number": "^[A-Z].*\d$",
  23.     "Has: First capital, last symbol": f"^[A-Z].*[{SYMBOLS}]$",
  24.     "Has: Four digits at the end": "[^\d]\d\d\d\d$",
  25.     "Has: Single digit at the end": "[^\d]\d$",
  26.     "Has: Three digits at the end": "[^\d]\d\d\d$",
  27.     "Has: Two digits at the end": "[^\d]\d\d$",
  28.     "Has: Upper + lower + num + symbol": f"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[{SYMBOLS}]).*$",
  29.     "Has: Lower + num + symbol": f"^(?=.*[a-z])(?=.*\d)(?=.*[{SYMBOLS}])[a-z\d{SYMBOLS}]*$",
  30.     "Has: Upper + num + symbol": f"^(?=.*[A-Z])(?=.*\d)(?=.*[{SYMBOLS}])[A-Z\d{SYMBOLS}]*$",
  31.     "Has: Upper + lower + num": "^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)[A-Za-z\d]*$",
  32.     "Has: Alpha + num":    "^(?=.*[a-zA-Z])(?=.*\d)[A-Za-z\d]*$",
  33.     "Has: Alpha + symbol": f"^(?=.*[a-zA-Z])(?=.*[{SYMBOLS}])[A-Za-z{SYMBOLS}]*$",
  34.     "Has: Upper + lower + symbol": f"^(?=.*[a-z])(?=.*[A-Z])(?=.*[{SYMBOLS}])[A-Za-z{SYMBOLS}]*$",
  35.     "Has: Upper + lower": "^(?=.*[a-z])(?=.*[A-Z])[A-Za-z]*$",
  36.     "Last digit is '0'": "0$",
  37.     "Last digits are '020'": "020$",
  38.     "Last digits are '19xx'": "19\d\d$",
  39.     "Last digits are '20'": "20$",
  40.     "Last digits are '2020'": "2020$",
  41.     "Last digits are '20xx'": "20\d\d$",
  42.     "Seq: 1 upper > lower > num or symbol": f"^[A-Z][a-z]+[\d{SYMBOLS}]+$",
  43.     "Seq: 1 upper > lower > num": f"^[A-Z][a-z]+[\d]+$",
  44.     "Seq: aplha > num > alpha": f"^[A-Za-z]+\d+[A-Za-z]+$",
  45.     "Seq: aplha > num > symbol": f"^[A-Za-z]+\d+[{SYMBOLS}]+$",
  46.     "Seq: aplha > num": "^[A-Za-z]+\d+$",
  47.     "Seq: aplha > symbol > num": f"^[A-Za-z]+[{SYMBOLS}]+\d+$",
  48. }
  49.  
  50. stats = {k: re.compile(v, re.UNICODE) for (k, v) in stats_regex.items()}
  51.  
  52. #pat_regex = {
  53. #    "[a-z]": "a",
  54. #    "[A-Z]": "A",
  55. #    "[\d]": "1",
  56. #    f"[{SYMBOLS}]": "@",
  57. #}
  58. #
  59. #pat_subs = {v: re.compile(k, re.UNICODE) for (k, v) in pat_regex.items()}
  60.  
  61. tr_from = f'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789{SYMBOLS}'
  62. tr_to   =  'aaaaaaaaaaaaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAA1111111111'.ljust(len(tr_from), '@')
  63. trans = str.maketrans(tr_from, tr_to)
  64.  
  65. #hex_re = re.compile("^\$HEX\[([0-9a-fA-F]*)\]$", re.UNICODE)
  66.  
  67. line_re = re.compile("(?:.*?:)?(?:.*?:)?(.*)$", re.UNICODE)
  68.  
  69.  
  70. def print_counter(title, cnt, grand_total, limit=15):
  71.     print(f"{title}")
  72.     print("=" * len(title))
  73.     items = cnt.most_common(limit)
  74.     if not items:
  75.         print("---- no data ----")
  76.         print("")
  77.         return
  78.     max_width = max([len(str(i[0])) for i in items])
  79.     for i in cnt.most_common(limit):
  80.         value = i[1]
  81.         percentage = 1.0 * value / grand_total
  82.         print(f"{i[0]:<{max_width}}  {i[1]:>6}  {percentage:>6.1%}")
  83.     print("")
  84.  
  85.  
  86. def progbar(curr, total, full_progbar=40):
  87.     frac = curr * 100 // total
  88.     if frac == progbar.last_frac:
  89.         return
  90.     progbar.last_frac = frac
  91.  
  92.     filled_progbar = ('#' * (frac * full_progbar // 100)).ljust(full_progbar)
  93.     msg = 'Completed: [' + filled_progbar + '] ' + '[{:>3d}%]'.format(frac)
  94.     print(msg, end='\r')
  95.     #sys.stdout.flush()
  96.  
  97. progbar.last_frac = -1
  98.  
  99.  
  100. def main():
  101.     parser = argparse.ArgumentParser(
  102.         description=f"Audit password quality v{VERSION}")
  103.     parser.add_argument("input_file", type=str,
  104.                         default=['-'], nargs="*",
  105.                         help="input file names, one password per line. If ommited, read from stdin")
  106.     parser.add_argument("-v", "--verbose", help="increase output verbosity",
  107.                         action="store_true")
  108.     parser.add_argument("-f", "--freq", help="run frequency analysis for characters used",
  109.                         action="store_true")
  110.     parser.add_argument("--no-categories", help="don't perform fuzzy categorization, improves performance",
  111.                         action="store_true")
  112.     parser.add_argument("-c", "--categories", help="json file with password categories for fuzzy matching, defaults to categories.json",
  113.                         default="categories.json")
  114.     args = parser.parse_args()
  115.  
  116.     if not args.no_categories:
  117.         word2category = {}
  118.         with open(args.categories, "r") as read_file:
  119.             categories = json.load(read_file)
  120.         words = set([x for y in categories.values() for x in y])
  121.         for w in words:
  122.             cats = []
  123.             for c, v in categories.items():
  124.                 if w in v:
  125.                     cats.append(c)
  126.             word2category[w] = cats
  127.  
  128.     verbose = args.verbose
  129.     cnt = Counter()
  130.     cnt_length = Counter()
  131.     cnt_pwd = Counter()
  132.     cnt_root = Counter()
  133.     cnt_regex = Counter()
  134.     cnt_symbol = Counter()
  135.     cnt_alpha = Counter()
  136.     cnt_num = Counter()
  137.     cnt_totals = Counter()
  138.     cnt_pattern = Counter()
  139.  
  140.     sys.stdin.reconfigure(errors='replace')
  141.  
  142.     grand_total = 0
  143.     total_valid_passwords = 0
  144.     for f in args.input_file:
  145.         print(f"Reading: {f}")
  146.         if f == '-':
  147.             f = sys.stdin.fileno()
  148.         with open(f, 'r', errors='replace') as f:
  149.             # to avoid newlines
  150.             lines = f.read().splitlines()
  151.  
  152.         total = len(lines)
  153.         print(f"Processing: {total} passwords")
  154.         progress = 0
  155.         valid_passwords = 0
  156.         for l in lines:
  157.             progress += 1
  158.  
  159.             # process line formats:
  160.             # password
  161.             # user:password
  162.             # user:hash:password
  163.             # ... and extract password only
  164.             p = line_re.match(l).group(1)
  165.  
  166.             # skip empty passwords
  167.             if not p:
  168.                 continue
  169.  
  170.             valid_passwords += 1
  171.  
  172.             # convert $HEX[abcd1234] passwords
  173.             # m = hex_re.match(p)
  174.             if p.startswith("$HEX[") and p[-1] == "]":
  175.                 p = binascii.unhexlify(p[5:-1]).decode("latin1")
  176.  
  177.             # length stats
  178.             cnt_length[len(p)] += 1
  179.  
  180.             # same password counting
  181.             cnt_pwd[p] += 1
  182.             if verbose:
  183.                 print(p)
  184.  
  185.             # letter frequency analysis
  186.             if args.freq:
  187.                 cnt_totals["chars"] += len(p)
  188.                 for letter in p:
  189.                     if letter.isnumeric():
  190.                         cnt_num[letter] += 1
  191.                         cnt_totals["num"] += 1
  192.                     elif letter.isalpha():
  193.                         cnt_alpha[letter] += 1
  194.                         cnt_totals["alpha"] += 1
  195.                     else:
  196.                         cnt_symbol[letter] += 1
  197.                         cnt_totals["symbol"] += 1
  198.  
  199.             # pattern counting
  200.             #pwd_pat = p
  201.             #for subst, pat in pat_subs.items():
  202.             #    pwd_pat = pat.sub(subst, pwd_pat)
  203.             pwd_pat = p.translate(trans)
  204.             cnt_pattern[pwd_pat] += 1
  205.  
  206.             # Matching various regex categories
  207.             for cat, pat in stats.items():
  208.                 if pat.search(p):
  209.                     cnt_regex[cat] += 1
  210.                     if verbose:
  211.                         print(cat)
  212.  
  213.             # Fuzzy matching to categories
  214.             if len(p) > 3 and not args.no_categories and words:
  215.                 #highest = process.extractOne(p, words)
  216.                 mall = process.extract(p, words)
  217.                 if verbose:
  218.                     print(mall)
  219.                 pw_categories = set()
  220.                 for m in mall:
  221.                     if verbose:
  222.                         print(f"{p} > {m[0]} : {m[1]}")
  223.                     if m[1] > 80:
  224.                         cnt_root[m[0]] += 1
  225.                         pw_categories.update(word2category[m[0]])
  226.  
  227.                 if not pw_categories:
  228.                     pw_categories = ['no_category']
  229.  
  230.                 #print(f">>>> {pw_match} {score} {pw_categories}")
  231.                 for pw_category in pw_categories:
  232.                     cnt[pw_category] += 1
  233.                 if verbose:
  234.                     print(f"{p} > {pw_categories}")
  235.                     #print(f"'{p}'", highest, pw_category)
  236.  
  237.             if verbose:
  238.                 print()
  239.             else:
  240.                 progbar(progress, total)
  241.  
  242.         grand_total += total
  243.         total_valid_passwords += valid_passwords
  244.         print()
  245.  
  246.     print()
  247.     print(f"Total lines processed: {grand_total}")
  248.     print(f"Valid passwords found: {total_valid_passwords}")
  249.     print()
  250.     if not args.no_categories:
  251.         print_counter("Categories", cnt, grand_total)
  252.         print_counter("Password base words:", cnt_root, grand_total)
  253.     print_counter("Password length frequency:", cnt_length, grand_total)
  254.     print_counter("Password values:", cnt_pwd, grand_total)
  255.     print_counter("Charsets and sequences:", cnt_regex,
  256.                   grand_total, len(stats_regex))
  257.     print_counter("Password patterns:", cnt_pattern, grand_total, 15)
  258.     if args.freq:
  259.         print_counter("Most frequent alpha chars:",
  260.                       cnt_alpha, cnt_totals["alpha"])
  261.         print_counter("Most frequent num chars:", cnt_num, cnt_totals["num"])
  262.         print_counter("Most frequent symbol chars:",
  263.                       cnt_symbol, cnt_totals["symbol"])
  264.  
  265.  
  266. if __name__ == '__main__':
  267.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement