Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # Filename: get_duplicate_md5.py
- # Version: 1.0.0
- # Author: Jeoi Reqi
- """
- Description:
- This script finds duplicate files within the current working directory based on their MD5 hashes.
- Expected Output:
- -------------------------------------------------------------------------------
- Duplicate files with MD5 hash: 098f6bcd4621d373cade4e832627b4f6
- C:\\Users\\pytho\\OneDrive\\Desktop\\MyScripts\\New PY Scripts\\test.txt
- C:\\Users\\pytho\\OneDrive\\Desktop\\MyScripts\\New PY Scripts\\test_new_name.txt
- You can safely delete the duplicate file(s).
- Program has completed without errors. GoodBye!
- -------------------------------------------------------------------------------
- Usage:
- - The script prints the paths of duplicate files along with their MD5 hash.
- - To use this script, simply run it. It will search for duplicate files within the current working directory.
- Additional Notes:
- - This script only searches for duplicate files within the current working directory.
- - It calculates the MD5 hash of each file to identify duplicates.
- """
- import os
- import hashlib
- from collections import defaultdict
- # Function to calculate the MD5 hash of a file
- def get_md5_hex(file_path):
- with open(file_path, "rb") as f:
- md5 = hashlib.md5()
- for chunk in iter(lambda: f.read(4096), b""):
- md5.update(chunk)
- return md5.hexdigest()
- # Function to find duplicate files in the current working directory
- def find_duplicate_files():
- md5_hashes_seen = defaultdict(list)
- cwd = os.getcwd() # Get the current working directory
- for root, _, files in os.walk(cwd):
- for file in files:
- file_path = os.path.join(root, file)
- md5_hash = get_md5_hex(file_path)
- md5_hashes_seen[md5_hash].append(file_path)
- for md5_hash, files in md5_hashes_seen.items():
- if len(files) > 1:
- print("\nDuplicate files with MD5 hash:", md5_hash, "\n")
- for file_path in files:
- print(file_path)
- print(
- "\n\tYou can safely delete the duplicate file(s).\n\nProgram has completed without errors.\tGoodBye!\n"
- )
- # Main function
- def main():
- find_duplicate_files()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement