import os
import sys
import hashlib
folder_dict = {}
hash_dict = {}
def calculate_checksum(file_path, algorithm="md5"):
if algorithm == "md5":
hash_algorithm = hashlib.md5()
elif algorithm == "sha1":
hash_algorithm = hashlib.sha1()
elif algorithm == "sha256":
hash_algorithm = hashlib.sha256()
else:
raise ValueError("Unsupported algorithm."
" Please choose md5, sha1, or sha256.")
if os.path.exists(file_path):
checksum = None
try:
with open(file_path, 'rb') as file:
for chunk in iter(lambda: file.read(4096), b''):
hash_algorithm.update(chunk)
checksum = hash_algorithm.hexdigest()
except Exception as e:
print(e)
return checksum
return "not exists"
def recursive_file_search(path):
_path = os.path.abspath(path)
for root, _, files in os.walk(_path):
for file in files:
folder = folder_dict.get(root)
file_path = os.path.join(root, file)
hash_value = calculate_checksum(file_path, "md5")
if not hash_dict.get(hash_value):
hash_dict[hash_value] = [folder, file]
else:
print("found duplicated file", file_path)
def recursive_folder_search(path):
_path = os.path.abspath(path)
index_no = 0
for root, dirs, _ in os.walk(_path):
for dir in dirs:
index_no += 1
dir_path = os.path.join(root, dir)
folder_dict[dir_path] = index_no
print(f"folder count {index_no}")
if __name__ == '__main__':
if len(sys.argv) != 2:
exit(1)
current_directory = sys.argv[1]
print(current_directory)
recursive_folder_search(current_directory)
recursive_file_search(current_directory)