FuzzuDuplicateFinder — Python Desktop App to Find & Remove Duplicate Files (CustomTkinter GUI)

 Demo :


Click Video πŸ‘‡πŸ‘‡πŸ‘‡





























Features :

  • Modern customtkinter UI with responsive threading

  • Chunked MD5 hashing for accurate duplicate detection

  • Size pre-filter to skip unique files (faster)

  • Treeview grouped duplicate preview with file sizes

  • Open containing folder, preview full path, delete selected with confirmation

  • Progress bar + status updates for long scans

  • Safe-by-default: confirm before delete — learning template for Python devs


Code :


"""

Python Tkinter - Duplicate File Finder

File: main.py


Features:

- Modern stylish GUI using customtkinter

- Select folder to scan for duplicate files

- Uses MD5 hashing (chunked) to detect duplicates

- Displays duplicate groups in a treeview with file sizes

- Ability to preview file path, open containing folder, and delete selected duplicates

- Threaded scanning to keep UI responsive

- Progress bar and status updates


Requirements:

- Python 3.8+

- customtkinter (pip install customtkinter)


Folder structure (suggested):

duplicate_finder/

├── main.py

├── requirements.txt

└── assets/

    ├── icon.png   # optional app icon


Usage:

1. pip install -r requirements.txt

2. python main.py


Notes:

- This is meant as a learning/full project template. Use carefully when deleting files.

- Always double-check files before deleting.


"""


import os

import hashlib

import threading

import math

import webbrowser

from pathlib import Path

import tkinter as tk

from tkinter import ttk, filedialog, messagebox


try:

    import customtkinter as ctk

except Exception:

    raise SystemExit("customtkinter is required. Install with: pip install customtkinter")


APP_NAME = "FuzzuDuplicateFinder"

CHUNK_SIZE = 8192


ctk.set_appearance_mode("dark")  # "dark" or "light"

ctk.set_default_color_theme("blue")  # use built-in color themes



def human_readable_size(size, decimal_places=2):

    if size == 0:

        return "0 B"

    units = ["B", "KB", "MB", "GB", "TB"]

    i = int(math.floor(math.log(size, 1024)))

    p = math.pow(1024, i)

    s = round(size / p, decimal_places)

    return f"{s} {units[i]}"



def file_md5(path, chunk_size=CHUNK_SIZE):

    md5 = hashlib.md5()

    try:

        with open(path, "rb") as f:

            while True:

                chunk = f.read(chunk_size)

                if not chunk:

                    break

                md5.update(chunk)

        return md5.hexdigest()

    except Exception as e:

        return None



class DuplicateFinderApp(ctk.CTk):

    def __init__(self):

        super().__init__()

        self.title(f"{APP_NAME} — Find & Remove Duplicate Files")

        self.geometry("930x640")

        self.minsize(880, 560)


        # Main layout: left control panel, right result panel

        self.grid_rowconfigure(0, weight=1)

        self.grid_columnconfigure(1, weight=1)


        # Left panel

        self.control_frame = ctk.CTkFrame(self, width=320, corner_radius=12)

        self.control_frame.grid(row=0, column=0, sticky="nswe", padx=(20, 10), pady=20)

        self.control_frame.grid_rowconfigure(6, weight=1)


        self.lbl_title = ctk.CTkLabel(self.control_frame, text="Duplicate File Finder", font=ctk.CTkFont(size=20, weight="bold"))

        self.lbl_title.grid(row=0, column=0, padx=16, pady=(16, 8), sticky="w")


        self.lbl_desc = ctk.CTkLabel(self.control_frame, text="Select a folder to scan. The app will detect files with identical content using MD5 hashes.", wraplength=260, justify="left")

        self.lbl_desc.grid(row=1, column=0, padx=16, pady=(0, 12), sticky="w")


        self.path_var = tk.StringVar()

        self.entry_path = ctk.CTkEntry(self.control_frame, textvariable=self.path_var, placeholder_text="Select folder to scan...", width=260)

        self.entry_path.grid(row=2, column=0, padx=16, pady=(0, 8), sticky="w")


        self.btn_browse = ctk.CTkButton(self.control_frame, text="Browse Folder", command=self.browse_folder)

        self.btn_browse.grid(row=3, column=0, padx=16, pady=(0, 12), sticky="w")


        self.btn_scan = ctk.CTkButton(self.control_frame, text="Start Scan", fg_color="#1f6aa5", hover_color="#165d9a", command=self.start_scan)

        self.btn_scan.grid(row=4, column=0, padx=16, pady=(0, 8), sticky="w")


        self.btn_stop = ctk.CTkButton(self.control_frame, text="Stop Scan", fg_color="#a51f3b", hover_color="#8e1630", command=self.stop_scan, state="disabled")

        self.btn_stop.grid(row=5, column=0, padx=16, pady=(0, 8), sticky="w")


        self.progress = ttk.Progressbar(self.control_frame, orient="horizontal", mode="determinate")

        self.progress.grid(row=6, column=0, padx=16, pady=(8, 8), sticky="we")


        self.status_var = tk.StringVar(value="Idle")

        self.lbl_status = ctk.CTkLabel(self.control_frame, textvariable=self.status_var, anchor="w")

        self.lbl_status.grid(row=7, column=0, padx=16, pady=(8, 16), sticky="w")


        # Options

        self.chk_var_preview = tk.BooleanVar(value=True)

        self.chk_preview = ctk.CTkCheckBox(self.control_frame, text="Show preview & paths", variable=self.chk_var_preview)

        self.chk_preview.grid(row=8, column=0, padx=16, pady=(0, 8), sticky="w")


        self.chk_var_delete_confirm = tk.BooleanVar(value=True)

        self.chk_confirm = ctk.CTkCheckBox(self.control_frame, text="Confirm before delete", variable=self.chk_var_delete_confirm)

        self.chk_confirm.grid(row=9, column=0, padx=16, pady=(0, 8), sticky="w")


        # Right panel

        self.result_frame = ctk.CTkFrame(self, corner_radius=12)

        self.result_frame.grid(row=0, column=1, sticky="nswe", padx=(10, 20), pady=20)

        self.result_frame.grid_rowconfigure(2, weight=1)

        self.result_frame.grid_columnconfigure(0, weight=1)


        # Top controls

        self.btn_expand_all = ctk.CTkButton(self.result_frame, text="Expand All", command=self.expand_all)

        self.btn_expand_all.grid(row=0, column=0, padx=12, pady=(12, 8), sticky="w")


        self.btn_select_all_dups = ctk.CTkButton(self.result_frame, text="Select All Duplicates", command=self.select_all_duplicates)

        self.btn_select_all_dups.grid(row=0, column=0, padx=(130,12), pady=(12,8), sticky="w")


        self.btn_delete_selected = ctk.CTkButton(self.result_frame, text="Delete Selected", fg_color="#d9534f", command=self.delete_selected)

        self.btn_delete_selected.grid(row=0, column=0, padx=(320,12), pady=(12,8), sticky="w")


        # Treeview for results

        columns = ("size", "path")

        self.tree = ttk.Treeview(self.result_frame, columns=columns, show="tree headings")

        self.tree.heading('#0', text='Hash / Group')

        self.tree.heading('size', text='Size')

        self.tree.heading('path', text='Path')

        self.tree.column('size', width=100, anchor='center')

        self.tree.column('path', width=520, anchor='w')


        vsb = ttk.Scrollbar(self.result_frame, orient="vertical", command=self.tree.yview)

        hsb = ttk.Scrollbar(self.result_frame, orient="horizontal", command=self.tree.xview)

        self.tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)


        self.tree.grid(row=2, column=0, sticky="nswe", padx=(12,0), pady=(8,12))

        vsb.grid(row=2, column=1, sticky="ns", pady=(8,12))

        hsb.grid(row=3, column=0, sticky="we", padx=(12,0))


        # Bottom action buttons

        self.btn_open_folder = ctk.CTkButton(self.result_frame, text="Open Containing Folder", command=self.open_containing)

        self.btn_open_folder.grid(row=4, column=0, padx=12, pady=(8,12), sticky="w")


        self.btn_clear = ctk.CTkButton(self.result_frame, text="Clear Results", command=self.clear_results)

        self.btn_clear.grid(row=4, column=0, padx=(220,12), pady=(8,12), sticky="w")


        # Data holders

        self._stop_event = threading.Event()

        self.scan_thread = None

        self.hash_map = {}  # { md5: [file1, file2, ...] }


        # Style tweaks for ttk treeview to blend with customtkinter

        style = ttk.Style(self)

        style.theme_use('default')

        style.configure("Treeview", rowheight=26, fieldbackground="#222222", background="#222222", foreground="#ffffff")


        # Bind double click

        self.tree.bind("<Double-1>", self.on_tree_double_click)


    # ---------------------- UI Actions ----------------------

    def browse_folder(self):

        folder = filedialog.askdirectory()

        if folder:

            self.path_var.set(folder)


    def start_scan(self):

        folder = self.path_var.get().strip()

        if not folder or not os.path.isdir(folder):

            messagebox.showwarning("Select Folder", "Please select a valid folder to scan.")

            return

        if self.scan_thread and self.scan_thread.is_alive():

            messagebox.showinfo("Scanning", "A scan is already running.")

            return

        self._stop_event.clear()

        self.hash_map.clear()

        self.clear_results()

        self.progress['value'] = 0

        self.status_var.set("Starting scan...")

        self.btn_scan.configure(state="disabled")

        self.btn_stop.configure(state="normal")

        self.scan_thread = threading.Thread(target=self.scan_folder, args=(folder,), daemon=True)

        self.scan_thread.start()


    def stop_scan(self):

        self._stop_event.set()

        self.status_var.set("Stopping scan...")

        self.btn_stop.configure(state="disabled")


    def expand_all(self):

        for item in self.tree.get_children(''):

            self.tree.item(item, open=True)


    def select_all_duplicates(self):

        # Select all child items (which are duplicate file entries)

        self.tree.selection_remove(self.tree.selection())

        for group in self.tree.get_children(''):

            for child in self.tree.get_children(group):

                self.tree.selection_add(child)


    def delete_selected(self):

        sel = self.tree.selection()

        if not sel:

            messagebox.showinfo("No selection", "Please select files to delete.")

            return

        paths = []

        for item in sel:

            # only delete file nodes (not group nodes)

            parent = self.tree.parent(item)

            if parent:

                p = self.tree.set(item, 'path')

                if p:

                    paths.append(p)

        if not paths:

            messagebox.showinfo("No files", "No deletable files selected.")

            return

        if self.chk_var_delete_confirm.get():

            if not messagebox.askyesno("Confirm Delete", f"Are you sure you want to permanently delete {len(paths)} files?"):

                return

        failed = []

        for p in paths:

            try:

                os.remove(p)

                # remove from tree

                for it in self.tree.get_children(''):

                    for ch in self.tree.get_children(it):

                        if self.tree.set(ch, 'path') == p:

                            self.tree.delete(ch)

            except Exception as e:

                failed.append((p, str(e)))

        if failed:

            messagebox.showwarning("Delete Errors", f"Some files could not be deleted.\n{failed}")

        else:

            messagebox.showinfo("Deleted", f"Deleted {len(paths)} files.")


    def open_containing(self):

        sel = self.tree.selection()

        if not sel:

            messagebox.showinfo("No selection", "Please select a file to open its folder.")

            return

        item = sel[0]

        parent = self.tree.parent(item)

        path = None

        if parent:

            path = self.tree.set(item, 'path')

        else:

            # if group node selected, open first child

            children = self.tree.get_children(item)

            if children:

                path = self.tree.set(children[0], 'path')

        if path:

            folder = os.path.dirname(path)

            if os.path.isdir(folder):

                webbrowser.open(folder)

            else:

                messagebox.showwarning("Folder not found", "Containing folder not found.")


    def clear_results(self):

        for it in self.tree.get_children(''):

            self.tree.delete(it)


    def on_tree_double_click(self, event):

        item = self.tree.selection()

        if not item:

            return

        it = item[0]

        parent = self.tree.parent(it)

        if parent:

            path = self.tree.set(it, 'path')

            if path and os.path.isfile(path):

                # open file with default program

                try:

                    os.startfile(path)

                except Exception:

                    try:

                        webbrowser.open(path)

                    except Exception:

                        messagebox.showinfo("Open File", f"File path: {path}")


    # ---------------------- Scanning Logic ----------------------

    def scan_folder(self, folder):

        try:

            files = []

            total_bytes = 0

            # walk and collect files first

            for root, dirs, filenames in os.walk(folder):

                if self._stop_event.is_set():

                    break

                for name in filenames:

                    full = os.path.join(root, name)

                    try:

                        size = os.path.getsize(full)

                    except Exception:

                        continue

                    files.append((full, size))

                    total_bytes += size

            if not files:

                self.status_var.set("No files found.")

                self.btn_scan.configure(state="normal")

                self.btn_stop.configure(state="disabled")

                return


            self.status_var.set(f"Scanning {len(files)} files...")

            processed = 0

            bytes_processed = 0


            # First group by size to avoid hashing files with unique sizes

            size_map = {}

            for full, size in files:

                size_map.setdefault(size, []).append(full)


            # Candidate files are those with same size >1

            candidates = []

            for size, arr in size_map.items():

                if len(arr) > 1:

                    candidates.extend([(p, size) for p in arr])


            total_candidates = len(candidates)

            if total_candidates == 0:

                self.status_var.set("No duplicates found.")

                self.btn_scan.configure(state="normal")

                self.btn_stop.configure(state="disabled")

                return


            # Update progressbar maximum

            self.progress['maximum'] = total_candidates


            for p, size in candidates:

                if self._stop_event.is_set():

                    break

                md5 = file_md5(p)

                if md5:

                    self.hash_map.setdefault(md5, []).append((p, size))

                processed += 1

                bytes_processed += size

                self.progress['value'] = processed

                self.status_var.set(f"Hashed {processed}/{total_candidates} candidates")


            # populate tree with groups that have more than 1 file

            groups = {h: arr for h, arr in self.hash_map.items() if len(arr) > 1}

            if not groups:

                self.status_var.set("No duplicates found after hashing.")

            else:

                self.status_var.set(f"Found {len(groups)} duplicate groups")

                for h, arr in groups.items():

                    # parent node shows hash and count

                    total_group_size = sum(s for _, s in arr)

                    parent_text = f"{h}  ({len(arr)} files)"

                    parent_id = self.tree.insert('', 'end', text=parent_text, values=(human_readable_size(total_group_size), ''))

                    for file_path, size in arr:

                        fname = os.path.basename(file_path)

                        child_id = self.tree.insert(parent_id, 'end', text=fname, values=(human_readable_size(size), file_path))

                        # optionally show preview or tooltip


            self.btn_scan.configure(state="normal")

            self.btn_stop.configure(state="disabled")

            self.status_var.set("Scan complete.")

        except Exception as e:

            self.status_var.set(f"Error: {e}")

            self.btn_scan.configure(state="normal")

            self.btn_stop.configure(state="disabled")



if __name__ == '__main__':

    app = DuplicateFinderApp()

    app.mainloop()

Comments

Popular posts from this blog

πŸš€ Simple Login & Registration System in Python Tkinter πŸ“±

πŸš€ Create a Python Screen Recorder with Audio (Complete Code)

πŸ“‘ Fuzzu Packet Sniffer – Python GUI for Real-Time IP Monitoring | Tkinter + Scapy