FuzzuDuplicateFinder — Python Desktop App to Find & Remove Duplicate Files (CustomTkinter GUI)
Demo :
Click Video πππ
Features :
-
Modern customtkinter UI with responsive threading
-
Chunked MD5 hashing for accurate duplicate detection
-
Size pre-filter to skip unique files (faster)
-
Treeview grouped duplicate preview with file sizes
-
Open containing folder, preview full path, delete selected with confirmation
-
Progress bar + status updates for long scans
-
Safe-by-default: confirm before delete — learning template for Python devs
Code :
"""
Python Tkinter - Duplicate File Finder
File: main.py
Features:
- Modern stylish GUI using customtkinter
- Select folder to scan for duplicate files
- Uses MD5 hashing (chunked) to detect duplicates
- Displays duplicate groups in a treeview with file sizes
- Ability to preview file path, open containing folder, and delete selected duplicates
- Threaded scanning to keep UI responsive
- Progress bar and status updates
Requirements:
- Python 3.8+
- customtkinter (pip install customtkinter)
Folder structure (suggested):
duplicate_finder/
├── main.py
├── requirements.txt
└── assets/
├── icon.png # optional app icon
Usage:
1. pip install -r requirements.txt
2. python main.py
Notes:
- This is meant as a learning/full project template. Use carefully when deleting files.
- Always double-check files before deleting.
"""
import os
import hashlib
import threading
import math
import webbrowser
from pathlib import Path
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
try:
import customtkinter as ctk
except Exception:
raise SystemExit("customtkinter is required. Install with: pip install customtkinter")
APP_NAME = "FuzzuDuplicateFinder"
CHUNK_SIZE = 8192
ctk.set_appearance_mode("dark") # "dark" or "light"
ctk.set_default_color_theme("blue") # use built-in color themes
def human_readable_size(size, decimal_places=2):
if size == 0:
return "0 B"
units = ["B", "KB", "MB", "GB", "TB"]
i = int(math.floor(math.log(size, 1024)))
p = math.pow(1024, i)
s = round(size / p, decimal_places)
return f"{s} {units[i]}"
def file_md5(path, chunk_size=CHUNK_SIZE):
md5 = hashlib.md5()
try:
with open(path, "rb") as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
break
md5.update(chunk)
return md5.hexdigest()
except Exception as e:
return None
class DuplicateFinderApp(ctk.CTk):
def __init__(self):
super().__init__()
self.title(f"{APP_NAME} — Find & Remove Duplicate Files")
self.geometry("930x640")
self.minsize(880, 560)
# Main layout: left control panel, right result panel
self.grid_rowconfigure(0, weight=1)
self.grid_columnconfigure(1, weight=1)
# Left panel
self.control_frame = ctk.CTkFrame(self, width=320, corner_radius=12)
self.control_frame.grid(row=0, column=0, sticky="nswe", padx=(20, 10), pady=20)
self.control_frame.grid_rowconfigure(6, weight=1)
self.lbl_title = ctk.CTkLabel(self.control_frame, text="Duplicate File Finder", font=ctk.CTkFont(size=20, weight="bold"))
self.lbl_title.grid(row=0, column=0, padx=16, pady=(16, 8), sticky="w")
self.lbl_desc = ctk.CTkLabel(self.control_frame, text="Select a folder to scan. The app will detect files with identical content using MD5 hashes.", wraplength=260, justify="left")
self.lbl_desc.grid(row=1, column=0, padx=16, pady=(0, 12), sticky="w")
self.path_var = tk.StringVar()
self.entry_path = ctk.CTkEntry(self.control_frame, textvariable=self.path_var, placeholder_text="Select folder to scan...", width=260)
self.entry_path.grid(row=2, column=0, padx=16, pady=(0, 8), sticky="w")
self.btn_browse = ctk.CTkButton(self.control_frame, text="Browse Folder", command=self.browse_folder)
self.btn_browse.grid(row=3, column=0, padx=16, pady=(0, 12), sticky="w")
self.btn_scan = ctk.CTkButton(self.control_frame, text="Start Scan", fg_color="#1f6aa5", hover_color="#165d9a", command=self.start_scan)
self.btn_scan.grid(row=4, column=0, padx=16, pady=(0, 8), sticky="w")
self.btn_stop = ctk.CTkButton(self.control_frame, text="Stop Scan", fg_color="#a51f3b", hover_color="#8e1630", command=self.stop_scan, state="disabled")
self.btn_stop.grid(row=5, column=0, padx=16, pady=(0, 8), sticky="w")
self.progress = ttk.Progressbar(self.control_frame, orient="horizontal", mode="determinate")
self.progress.grid(row=6, column=0, padx=16, pady=(8, 8), sticky="we")
self.status_var = tk.StringVar(value="Idle")
self.lbl_status = ctk.CTkLabel(self.control_frame, textvariable=self.status_var, anchor="w")
self.lbl_status.grid(row=7, column=0, padx=16, pady=(8, 16), sticky="w")
# Options
self.chk_var_preview = tk.BooleanVar(value=True)
self.chk_preview = ctk.CTkCheckBox(self.control_frame, text="Show preview & paths", variable=self.chk_var_preview)
self.chk_preview.grid(row=8, column=0, padx=16, pady=(0, 8), sticky="w")
self.chk_var_delete_confirm = tk.BooleanVar(value=True)
self.chk_confirm = ctk.CTkCheckBox(self.control_frame, text="Confirm before delete", variable=self.chk_var_delete_confirm)
self.chk_confirm.grid(row=9, column=0, padx=16, pady=(0, 8), sticky="w")
# Right panel
self.result_frame = ctk.CTkFrame(self, corner_radius=12)
self.result_frame.grid(row=0, column=1, sticky="nswe", padx=(10, 20), pady=20)
self.result_frame.grid_rowconfigure(2, weight=1)
self.result_frame.grid_columnconfigure(0, weight=1)
# Top controls
self.btn_expand_all = ctk.CTkButton(self.result_frame, text="Expand All", command=self.expand_all)
self.btn_expand_all.grid(row=0, column=0, padx=12, pady=(12, 8), sticky="w")
self.btn_select_all_dups = ctk.CTkButton(self.result_frame, text="Select All Duplicates", command=self.select_all_duplicates)
self.btn_select_all_dups.grid(row=0, column=0, padx=(130,12), pady=(12,8), sticky="w")
self.btn_delete_selected = ctk.CTkButton(self.result_frame, text="Delete Selected", fg_color="#d9534f", command=self.delete_selected)
self.btn_delete_selected.grid(row=0, column=0, padx=(320,12), pady=(12,8), sticky="w")
# Treeview for results
columns = ("size", "path")
self.tree = ttk.Treeview(self.result_frame, columns=columns, show="tree headings")
self.tree.heading('#0', text='Hash / Group')
self.tree.heading('size', text='Size')
self.tree.heading('path', text='Path')
self.tree.column('size', width=100, anchor='center')
self.tree.column('path', width=520, anchor='w')
vsb = ttk.Scrollbar(self.result_frame, orient="vertical", command=self.tree.yview)
hsb = ttk.Scrollbar(self.result_frame, orient="horizontal", command=self.tree.xview)
self.tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
self.tree.grid(row=2, column=0, sticky="nswe", padx=(12,0), pady=(8,12))
vsb.grid(row=2, column=1, sticky="ns", pady=(8,12))
hsb.grid(row=3, column=0, sticky="we", padx=(12,0))
# Bottom action buttons
self.btn_open_folder = ctk.CTkButton(self.result_frame, text="Open Containing Folder", command=self.open_containing)
self.btn_open_folder.grid(row=4, column=0, padx=12, pady=(8,12), sticky="w")
self.btn_clear = ctk.CTkButton(self.result_frame, text="Clear Results", command=self.clear_results)
self.btn_clear.grid(row=4, column=0, padx=(220,12), pady=(8,12), sticky="w")
# Data holders
self._stop_event = threading.Event()
self.scan_thread = None
self.hash_map = {} # { md5: [file1, file2, ...] }
# Style tweaks for ttk treeview to blend with customtkinter
style = ttk.Style(self)
style.theme_use('default')
style.configure("Treeview", rowheight=26, fieldbackground="#222222", background="#222222", foreground="#ffffff")
# Bind double click
self.tree.bind("<Double-1>", self.on_tree_double_click)
# ---------------------- UI Actions ----------------------
def browse_folder(self):
folder = filedialog.askdirectory()
if folder:
self.path_var.set(folder)
def start_scan(self):
folder = self.path_var.get().strip()
if not folder or not os.path.isdir(folder):
messagebox.showwarning("Select Folder", "Please select a valid folder to scan.")
return
if self.scan_thread and self.scan_thread.is_alive():
messagebox.showinfo("Scanning", "A scan is already running.")
return
self._stop_event.clear()
self.hash_map.clear()
self.clear_results()
self.progress['value'] = 0
self.status_var.set("Starting scan...")
self.btn_scan.configure(state="disabled")
self.btn_stop.configure(state="normal")
self.scan_thread = threading.Thread(target=self.scan_folder, args=(folder,), daemon=True)
self.scan_thread.start()
def stop_scan(self):
self._stop_event.set()
self.status_var.set("Stopping scan...")
self.btn_stop.configure(state="disabled")
def expand_all(self):
for item in self.tree.get_children(''):
self.tree.item(item, open=True)
def select_all_duplicates(self):
# Select all child items (which are duplicate file entries)
self.tree.selection_remove(self.tree.selection())
for group in self.tree.get_children(''):
for child in self.tree.get_children(group):
self.tree.selection_add(child)
def delete_selected(self):
sel = self.tree.selection()
if not sel:
messagebox.showinfo("No selection", "Please select files to delete.")
return
paths = []
for item in sel:
# only delete file nodes (not group nodes)
parent = self.tree.parent(item)
if parent:
p = self.tree.set(item, 'path')
if p:
paths.append(p)
if not paths:
messagebox.showinfo("No files", "No deletable files selected.")
return
if self.chk_var_delete_confirm.get():
if not messagebox.askyesno("Confirm Delete", f"Are you sure you want to permanently delete {len(paths)} files?"):
return
failed = []
for p in paths:
try:
os.remove(p)
# remove from tree
for it in self.tree.get_children(''):
for ch in self.tree.get_children(it):
if self.tree.set(ch, 'path') == p:
self.tree.delete(ch)
except Exception as e:
failed.append((p, str(e)))
if failed:
messagebox.showwarning("Delete Errors", f"Some files could not be deleted.\n{failed}")
else:
messagebox.showinfo("Deleted", f"Deleted {len(paths)} files.")
def open_containing(self):
sel = self.tree.selection()
if not sel:
messagebox.showinfo("No selection", "Please select a file to open its folder.")
return
item = sel[0]
parent = self.tree.parent(item)
path = None
if parent:
path = self.tree.set(item, 'path')
else:
# if group node selected, open first child
children = self.tree.get_children(item)
if children:
path = self.tree.set(children[0], 'path')
if path:
folder = os.path.dirname(path)
if os.path.isdir(folder):
webbrowser.open(folder)
else:
messagebox.showwarning("Folder not found", "Containing folder not found.")
def clear_results(self):
for it in self.tree.get_children(''):
self.tree.delete(it)
def on_tree_double_click(self, event):
item = self.tree.selection()
if not item:
return
it = item[0]
parent = self.tree.parent(it)
if parent:
path = self.tree.set(it, 'path')
if path and os.path.isfile(path):
# open file with default program
try:
os.startfile(path)
except Exception:
try:
webbrowser.open(path)
except Exception:
messagebox.showinfo("Open File", f"File path: {path}")
# ---------------------- Scanning Logic ----------------------
def scan_folder(self, folder):
try:
files = []
total_bytes = 0
# walk and collect files first
for root, dirs, filenames in os.walk(folder):
if self._stop_event.is_set():
break
for name in filenames:
full = os.path.join(root, name)
try:
size = os.path.getsize(full)
except Exception:
continue
files.append((full, size))
total_bytes += size
if not files:
self.status_var.set("No files found.")
self.btn_scan.configure(state="normal")
self.btn_stop.configure(state="disabled")
return
self.status_var.set(f"Scanning {len(files)} files...")
processed = 0
bytes_processed = 0
# First group by size to avoid hashing files with unique sizes
size_map = {}
for full, size in files:
size_map.setdefault(size, []).append(full)
# Candidate files are those with same size >1
candidates = []
for size, arr in size_map.items():
if len(arr) > 1:
candidates.extend([(p, size) for p in arr])
total_candidates = len(candidates)
if total_candidates == 0:
self.status_var.set("No duplicates found.")
self.btn_scan.configure(state="normal")
self.btn_stop.configure(state="disabled")
return
# Update progressbar maximum
self.progress['maximum'] = total_candidates
for p, size in candidates:
if self._stop_event.is_set():
break
md5 = file_md5(p)
if md5:
self.hash_map.setdefault(md5, []).append((p, size))
processed += 1
bytes_processed += size
self.progress['value'] = processed
self.status_var.set(f"Hashed {processed}/{total_candidates} candidates")
# populate tree with groups that have more than 1 file
groups = {h: arr for h, arr in self.hash_map.items() if len(arr) > 1}
if not groups:
self.status_var.set("No duplicates found after hashing.")
else:
self.status_var.set(f"Found {len(groups)} duplicate groups")
for h, arr in groups.items():
# parent node shows hash and count
total_group_size = sum(s for _, s in arr)
parent_text = f"{h} ({len(arr)} files)"
parent_id = self.tree.insert('', 'end', text=parent_text, values=(human_readable_size(total_group_size), ''))
for file_path, size in arr:
fname = os.path.basename(file_path)
child_id = self.tree.insert(parent_id, 'end', text=fname, values=(human_readable_size(size), file_path))
# optionally show preview or tooltip
self.btn_scan.configure(state="normal")
self.btn_stop.configure(state="disabled")
self.status_var.set("Scan complete.")
except Exception as e:
self.status_var.set(f"Error: {e}")
self.btn_scan.configure(state="normal")
self.btn_stop.configure(state="disabled")
if __name__ == '__main__':
app = DuplicateFinderApp()
app.mainloop()
Comments
Post a Comment