From 4da03d9e37a5b53257756d587176c84254e01363 Mon Sep 17 00:00:00 2001 From: glitchySid Date: Tue, 13 Jan 2026 19:13:26 +0530 Subject: [PATCH] Find Duplicate Files and prompts to delete them. --- src/cli/args.rs | 3 + src/files/duplicate/confirmation.rs | 36 ++++++ src/files/duplicate/display.rs | 34 +++++ src/files/duplicate/duplicate_detector.rs | 146 ++++++++++++++++++++++ src/files/duplicate/mod.rs | 41 ++++++ src/files/duplicate/types.rs | 77 ++++++++++++ src/files/mod.rs | 1 + src/main.rs | 13 +- tests/test_online_handler.rs | 2 + tests/test_undo_handler.rs | 1 + 10 files changed, 348 insertions(+), 6 deletions(-) create mode 100644 src/files/duplicate/confirmation.rs create mode 100644 src/files/duplicate/display.rs create mode 100644 src/files/duplicate/duplicate_detector.rs create mode 100644 src/files/duplicate/mod.rs create mode 100644 src/files/duplicate/types.rs diff --git a/src/cli/args.rs b/src/cli/args.rs index fd16a2d..e30bfe1 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -26,6 +26,9 @@ pub struct Args { #[arg(long, help = "Use offline mode (extension-based categorization)")] pub offline: bool, + #[arg(long, help = "Detect duplicate files")] + pub duplicate: bool, + /// Optional path to organize instead of the configured download folder /// /// If provided, this path will be used instead of the download folder diff --git a/src/files/duplicate/confirmation.rs b/src/files/duplicate/confirmation.rs new file mode 100644 index 0000000..1863227 --- /dev/null +++ b/src/files/duplicate/confirmation.rs @@ -0,0 +1,36 @@ +use super::types::DuplicateError; +use std::io; + +pub trait ConfirmationStrategy { + fn confirm(&self) -> Result; +} + +pub struct StdinConfirmation; + +impl ConfirmationStrategy for StdinConfirmation { + fn confirm(&self) -> Result { + eprint!("\nDo you want to apply these changes? [y/N]: "); + + let mut input = String::new(); + if io::stdin().read_line(&mut input).is_err() { + return Err(DuplicateError::InputReadFailed( + "Failed to read input. Operation cancelled.".to_string(), + )); + } + + let input = input.trim().to_lowercase(); + if input != "y" && input != "yes" { + return Err(DuplicateError::UserCancelled); + } + + Ok(true) + } +} + +pub struct AutoConfirm; + +impl ConfirmationStrategy for AutoConfirm { + fn confirm(&self) -> Result { + Ok(true) + } +} diff --git a/src/files/duplicate/display.rs b/src/files/duplicate/display.rs new file mode 100644 index 0000000..89fdac5 --- /dev/null +++ b/src/files/duplicate/display.rs @@ -0,0 +1,34 @@ +use super::types::DuplicateSummary; +use colored::*; + +pub(super) fn print_duplicate_summary(summary: &DuplicateSummary) { + println!("\n{}", "Duplicate Removal Complete!".bold().green()); + + if summary.duplicate_count() > 0 || summary.error_count() > 0 { + println!( + "Files deleted: {}, Space saved: {}, Errors: {}", + summary.duplicate_count().to_string().green(), + format_size(summary.total_size_saved()).blue(), + summary.error_count().to_string().red() + ); + } else { + println!("{}", "No duplicate files were deleted.".yellow()); + } +} + +fn format_size(bytes: u64) -> String { + const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"]; + let mut size = bytes as f64; + let mut unit_index = 0; + + while size >= 1024.0 && unit_index < UNITS.len() - 1 { + size /= 1024.0; + unit_index += 1; + } + + if unit_index == 0 { + format!("{} {}", bytes, UNITS[unit_index]) + } else { + format!("{:.2} {}", size, UNITS[unit_index]) + } +} diff --git a/src/files/duplicate/duplicate_detector.rs b/src/files/duplicate/duplicate_detector.rs new file mode 100644 index 0000000..d31422c --- /dev/null +++ b/src/files/duplicate/duplicate_detector.rs @@ -0,0 +1,146 @@ +use std::{collections::HashMap, fs::File, path::Path}; + +use blake3::Hasher; +use colored::Colorize; +use walkdir::WalkDir; + +use crate::files::duplicate::{ + confirmation::ConfirmationStrategy, + display::print_duplicate_summary, + types::{DuplicateError, DuplicateSummary}, +}; +use crate::settings::get_or_prompt_download_folder; + +pub fn compute_file_hash(path: &Path) -> Result { + let mut file = File::open(path)?; + let mut hasher = Hasher::new(); + std::io::copy(&mut file, &mut hasher)?; + Ok(hasher.finalize()) +} + +pub fn find_duplicates<'a>(paths: &[&'a Path]) -> Vec> { + let mut hash_map: HashMap> = HashMap::new(); + + for &path in paths { + if let Ok(hash) = compute_file_hash(path) { + hash_map.entry(hash).or_default().push(path); + } + } + + hash_map + .into_values() + .filter(|files| files.len() > 1) + .collect() +} + +pub fn print_duplicates(path: &Path, recursive: bool) -> Result<(), DuplicateError> { + let mut file_paths = Vec::new(); + + let walker = if recursive { + WalkDir::new(path).follow_links(false) + } else { + WalkDir::new(path).max_depth(1).follow_links(false) + }; + + for entry in walker.into_iter() { + let entry = entry?; + if entry.file_type().is_file() { + file_paths.push(entry.path().to_path_buf()); + } + } + + let refs: Vec<&Path> = file_paths.iter().map(|p| p.as_path()).collect(); + let duplicates = find_duplicates(&refs); + + if duplicates.is_empty() { + return Err(DuplicateError::NoDuplicate); + } else { + println!("Duplicate files:"); + for group in duplicates { + for file in group { + println!("{}", format!("{}", file.display()).green()); + } + println!(); + } + } + + Ok(()) +} + +pub fn execute_delete_duplicates( + confirmation: &C, + recursive: bool, +) -> Result { + let download_path = get_or_prompt_download_folder()?; + match print_duplicates(&download_path, recursive) { + Ok(_) => { + confirmation.confirm()?; + + let summary = delete_duplicates(&download_path, recursive)?; + print_duplicate_summary(&summary); + Ok(summary) + } + Err(e) => Err(e), + } +} + +pub fn delete_duplicates(path: &Path, recursive: bool) -> Result { + let mut file_paths = Vec::new(); + let mut summary = DuplicateSummary::new(); + + let walker = if recursive { + WalkDir::new(path).follow_links(false) + } else { + WalkDir::new(path).max_depth(1).follow_links(false) + }; + + for entry in walker.into_iter() { + let entry = entry?; + if entry.file_type().is_file() { + file_paths.push(entry.path().to_path_buf()); + } + } + + let refs: Vec<&Path> = file_paths.iter().map(|p| p.as_path()).collect(); + let duplicates = find_duplicates(&refs); + + if duplicates.is_empty() { + println!("No duplicate files found to delete."); + return Ok(summary); + } + + let mut total_deleted = 0; + + for group in duplicates { + if group.len() < 2 { + continue; + } + + // Keep the first file, delete the rest + let to_keep = &group[0]; + let to_delete = &group[1..]; + + println!("Keeping: {}", to_keep.display()); + + for file in to_delete { + match std::fs::remove_file(file) { + Ok(_) => { + println!("Deleted: {}", file.display()); + total_deleted += 1; + summary.duplicated(); + + if let Ok(metadata) = std::fs::metadata(file) { + summary.size_saved(metadata.len()); + } + } + Err(e) => { + eprintln!("Error deleting file {}: {}", file.display(), e); + } + } + } + println!(); + } + + println!("Total files deleted: {}", total_deleted); + Ok(summary) +} diff --git a/src/files/duplicate/mod.rs b/src/files/duplicate/mod.rs new file mode 100644 index 0000000..6ff9703 --- /dev/null +++ b/src/files/duplicate/mod.rs @@ -0,0 +1,41 @@ +pub mod confirmation; +pub mod display; +pub mod duplicate_detector; +pub mod types; + +use crate::settings::get_or_prompt_download_folder; +pub use confirmation::{AutoConfirm, ConfirmationStrategy, StdinConfirmation}; +use display::print_duplicate_summary; +use duplicate_detector::{execute_delete_duplicates, print_duplicates}; +pub use types::{DuplicateError, DuplicateSummary}; + +pub fn execute_delete(recursive: bool) { + let confirmation = StdinConfirmation; + match execute_delete_duplicates(&confirmation, recursive) { + Ok(summary) => print_duplicate_summary(&summary), + Err(err) => eprintln!("Error deleting duplicates: {}", err), + } +} + +pub fn show_duplicates(recursive: bool) { + let download_path = match get_or_prompt_download_folder() { + Ok(path) => path, + Err(err) => { + eprintln!("Error getting download folder: {}", err); + return; + } + }; + + match print_duplicates(&download_path, recursive) { + Ok(_) => {} + Err(err) => eprintln!("Error finding duplicates: {}", err), + } +} + +pub fn execute_delete_auto() { + let confirmation = AutoConfirm; + match execute_delete_duplicates(&confirmation, false) { + Ok(summary) => print_duplicate_summary(&summary), + Err(err) => eprintln!("Error deleting duplicates: {}", err), + } +} diff --git a/src/files/duplicate/types.rs b/src/files/duplicate/types.rs new file mode 100644 index 0000000..5a7f13e --- /dev/null +++ b/src/files/duplicate/types.rs @@ -0,0 +1,77 @@ +#[derive(Debug, Clone, Default)] +pub struct DuplicateSummary { + pub total_duplicates: u64, + pub total_size_saved: u64, + pub error_count: u64, +} + +impl DuplicateSummary { + pub fn new() -> Self { + Self::default() + } + pub fn duplicated(&mut self) { + self.total_duplicates += 1; + } + pub fn size_saved(&mut self, size: u64) { + self.total_size_saved += size; + } + pub fn errored(&mut self) { + self.error_count += 1; + } + pub fn duplicate_count(&self) -> u64 { + self.total_duplicates + } + pub fn total_size_saved(&self) -> u64 { + self.total_size_saved + } + pub fn error_count(&self) -> u64 { + self.error_count + } + pub fn has_errors(&self) -> bool { + self.error_count > 0 + } + pub fn total_processed(&self) -> u64 { + self.total_duplicates + self.error_count + } +} + +#[derive(Debug)] +pub enum DuplicateError { + InputReadFailed(String), + UserCancelled, + IoError(std::io::Error), + WalkdirError(String), + NoDuplicate, +} + +impl From for DuplicateError { + fn from(err: std::io::Error) -> Self { + DuplicateError::IoError(err) + } +} + +impl std::fmt::Display for DuplicateError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DuplicateError::InputReadFailed(message) => write!(f, "InputReadFailed: {}", message), + DuplicateError::UserCancelled => write!(f, "UserCancelled"), + DuplicateError::IoError(err) => write!(f, "IoError: {}", err), + DuplicateError::WalkdirError(err) => write!(f, "WalkdirError: {}", err), + DuplicateError::NoDuplicate => write!(f, "No Duplicate Found"), + } + } +} + +impl std::error::Error for DuplicateError {} + +impl From for DuplicateError { + fn from(err: walkdir::Error) -> Self { + DuplicateError::WalkdirError(err.to_string()) + } +} + +impl From> for DuplicateError { + fn from(err: Box) -> Self { + DuplicateError::InputReadFailed(err.to_string()) + } +} diff --git a/src/files/mod.rs b/src/files/mod.rs index b303759..207f999 100644 --- a/src/files/mod.rs +++ b/src/files/mod.rs @@ -1,6 +1,7 @@ pub mod batch; pub mod categorizer; pub mod detector; +pub mod duplicate; mod file_ops; pub mod mover; pub mod undo; diff --git a/src/main.rs b/src/main.rs index beb9ced..7986c63 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,6 @@ use clap::Parser; use noentropy::cli::{Args, handle_organization, handle_undo}; +use noentropy::files::duplicate::execute_delete; use noentropy::settings::config::change_and_prompt_api_key; use noentropy::settings::{get_or_prompt_config, get_or_prompt_download_folder}; @@ -11,8 +12,7 @@ async fn main() -> Result<(), Box> { let download_path = get_or_prompt_download_folder()?; handle_undo(args, download_path).await?; return Ok(()); - } - if args.change_key { + } else if args.change_key { let api_key = change_and_prompt_api_key(); match api_key { Ok(_key) => println!("Key saved"), @@ -20,11 +20,12 @@ async fn main() -> Result<(), Box> { eprintln!("{e}") } } + } else if args.duplicate { + execute_delete(args.recursive); + } else { + let config = get_or_prompt_config()?; + handle_organization(args, config).await?; } - let config = get_or_prompt_config()?; - - handle_organization(args, config).await?; - Ok(()) } diff --git a/tests/test_online_handler.rs b/tests/test_online_handler.rs index f8a4c0c..36a6de1 100644 --- a/tests/test_online_handler.rs +++ b/tests/test_online_handler.rs @@ -30,6 +30,7 @@ fn create_test_args(dry_run: bool, max_concurrent: usize) -> Args { undo: false, change_key: false, offline: false, + duplicate: false, path: None, } } @@ -101,6 +102,7 @@ fn test_args_all_flags() { change_key: true, offline: true, path: Some(PathBuf::from("/test/path")), + duplicate: true, }; assert!(args.dry_run); diff --git a/tests/test_undo_handler.rs b/tests/test_undo_handler.rs index 03e8fe3..f57bb4e 100644 --- a/tests/test_undo_handler.rs +++ b/tests/test_undo_handler.rs @@ -28,6 +28,7 @@ fn create_test_args(dry_run: bool, path: Option) -> Args { undo: true, change_key: false, offline: false, + duplicate: false, path, } }