Python: Duplicate File Finder

#!/usr/bin/env python3

# POSTED ONLINE: https://pastebin.com/VimRpgv1

# dupFinder.py
# SOURCE: https://www.pythoncentral.io/finding-duplicate-files-with-python/
# SOURCE: https://www.pythoncentral.io/hashing-files-with-python/

import os
import sys
import hashlib

def findDups(folders):
    dups = {} # This will be a dictionary of lists with this layout: {hash:[names]}

    if args.redact and args.target == None:
        print('WARNING: Search cancelled. Using --redact without --target means all output should be redacted.')
        return dups

    for path in folders:
        if os.path.exists(path):
            print('Searching "%s"' % path)

            # Find the duplicated files and append them to the dups.
            mergeDups(dups, findDup(path))

        else:
            print('%s does not exist.' % path)

    # For all dictionary keys that contain a single file.
    for key in [key for key, value in dups.items() if len(value) == 1]:
        # Delete the item.
        del dups[key]

    return dups

def findDup(parentFolder):
    dups = {} # This will be a dictionary of lists with this layout: {hash:[names]}

    for dirName, subdirs, fileList in os.walk(parentFolder):
        #  print('Searching %s' % dirName)

        for filename in fileList:
            # Get the path to the file.
            path = os.path.join(dirName, filename)
            # Calculate hash
            file_hash = hashfile(path)

            # Add or append the file path.
            if file_hash in dups:
                dups[file_hash].append(path)
            else:
                dups[file_hash] = [path]

    return dups

def mergeDups(a, b):
    for key in b.keys():
        if key in a:
            # Merge lists.
            a[key] += b[key]
        else:
            # Add list as a new entry.
            a[key] = b[key]

def hashfile(path, blocksize = 65536):
    f = open(path, 'rb')
    hasher = hashlib.sha256()
    buf = f.read(blocksize)
    while len(buf) > 0:
        hasher.update(buf)
        buf = f.read(blocksize)
    f.close()
    return hasher.hexdigest()

def filterUnique(dups):
    if len(dups) == 0:
        return

    if not args.unique:
        return

    # If no targets.
    if args.target == None:
        for key, value in dups.items():
            # Remove first item.
            value[:] = value[1:]
        return

    for key, value in dups.items():
        # If list is too small.
        if len(value) <= 1:
            continue

        # If all items are targets.
        if all([any([target in path for target in args.target]) for path in value]):
            # Remove first item.
            value[:] = value[1:]
            continue

        # Remove the first non-target item from every result.
        total = 0
        value[:] = [path for path in value if any([target in path for target in args.target]) or (total := total + 1) > 1]

def filterRedact(dups):
    if len(dups) == 0:
        return

    if not args.redact:
        return

    if args.target == None:
        # Remove every result.
        dups.clear()

    else:
        # Remove non-targets from every result.
        for key, value in dups.items():
            value[:] = [path for path in value if any([target in path for target in args.target])]

def filterClean(dups):
    if len(dups) == 0:
        return

    # For all dictionary keys that contain no files (made empty by other filters).
    for key in [key for key, value in dups.items() if len(value) == 0]:
        # Delete the item.
        del dups[key]

def printResults(dups):
    print()

    if len(dups) == 0:
        print('No duplicates found.')
        return

    print('The following files are identical:')

    for key, value in dups.items():
        print()
        for path in sorted(value):
            output = ''

            if args.showhash:
                output += key
                output += ' '

            if args.prepend != '':
                output += args.prepend

            output += '"%s"' % path

            if args.append != '':
                output += args.append

            print(output)

def wait_for_any_keypress():
    import sys
    if sys.platform == 'win32':
        import os
        os.system('pause')
    elif sys.platform.startswith('linux') or sys.platform == 'darwin':
        print('Press any key to continue . . .')
        import termios
        import tty
        stdin_file_desc = sys.stdin.fileno()
        old_stdin_tty_attr = termios.tcgetattr(stdin_file_desc)
        try:
            tty.setraw(stdin_file_desc)
            sys.stdin.read(1)
        finally:
            termios.tcsetattr(stdin_file_desc, termios.TCSADRAIN, old_stdin_tty_attr)

def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('-r', '--redact', action='store_true', help='only display paths which contain a "target" word', required=False)
    parser.add_argument('-a', '--append', help='append this text after every path', type=str, default='', required=False)
    parser.add_argument('-p', '--prepend', help='prepend this text before every path', type=str, default='', required=False)
    parser.add_argument('-t', '--target', action='append', help='only display duplicate groups if one of the paths contains a "target" word', required=False)
    parser.add_argument('-u', '--unique', action='store_true', help='do not display one of the paths in a duplicate group so you can delete the duplicates', required=False)
    parser.add_argument('-s', '--showhash', action='store_true', help='print the SHA-256 hash for each duplicate file', required=False)
    parser.add_argument('folders', help='the directory paths to compare', type=str, nargs='+')

    try:
        global args
        args = parser.parse_args()
    except SystemExit:
        wait_for_any_keypress()
        return

    # Print args.
    import sys
    print(sys.argv)

    dups = findDups(args.folders)
    filterUnique(dups)
    filterRedact(dups)
    filterClean(dups)
    printResults(dups)

if __name__ == '__main__':
    main()