Merge pull request #20 from glitchySid/feature/duplicate
Some checks failed
Rust / build (push) Has been cancelled
Rust / create-release (push) Has been cancelled
Rust / upload-assets (macos-latest, x86_64-apple-darwin) (push) Has been cancelled
Rust / upload-assets (ubuntu-latest, x86_64-unknown-linux-gnu) (push) Has been cancelled
Rust / upload-assets (windows-latest, x86_64-pc-windows-msvc) (push) Has been cancelled

Find Duplicate Files and prompts to delete them.
This commit is contained in:
Siddhesh Mhatre
2026-01-13 19:18:18 +05:30
committed by GitHub
10 changed files with 348 additions and 6 deletions

View File

@@ -26,6 +26,9 @@ pub struct Args {
#[arg(long, help = "Use offline mode (extension-based categorization)")]
pub offline: bool,
#[arg(long, help = "Detect duplicate files")]
pub duplicate: bool,
/// Optional path to organize instead of the configured download folder
///
/// If provided, this path will be used instead of the download folder

View File

@@ -0,0 +1,36 @@
use super::types::DuplicateError;
use std::io;
pub trait ConfirmationStrategy {
fn confirm(&self) -> Result<bool, DuplicateError>;
}
pub struct StdinConfirmation;
impl ConfirmationStrategy for StdinConfirmation {
fn confirm(&self) -> Result<bool, DuplicateError> {
eprint!("\nDo you want to apply these changes? [y/N]: ");
let mut input = String::new();
if io::stdin().read_line(&mut input).is_err() {
return Err(DuplicateError::InputReadFailed(
"Failed to read input. Operation cancelled.".to_string(),
));
}
let input = input.trim().to_lowercase();
if input != "y" && input != "yes" {
return Err(DuplicateError::UserCancelled);
}
Ok(true)
}
}
pub struct AutoConfirm;
impl ConfirmationStrategy for AutoConfirm {
fn confirm(&self) -> Result<bool, DuplicateError> {
Ok(true)
}
}

View File

@@ -0,0 +1,34 @@
use super::types::DuplicateSummary;
use colored::*;
pub(super) fn print_duplicate_summary(summary: &DuplicateSummary) {
println!("\n{}", "Duplicate Removal Complete!".bold().green());
if summary.duplicate_count() > 0 || summary.error_count() > 0 {
println!(
"Files deleted: {}, Space saved: {}, Errors: {}",
summary.duplicate_count().to_string().green(),
format_size(summary.total_size_saved()).blue(),
summary.error_count().to_string().red()
);
} else {
println!("{}", "No duplicate files were deleted.".yellow());
}
}
fn format_size(bytes: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
let mut size = bytes as f64;
let mut unit_index = 0;
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
size /= 1024.0;
unit_index += 1;
}
if unit_index == 0 {
format!("{} {}", bytes, UNITS[unit_index])
} else {
format!("{:.2} {}", size, UNITS[unit_index])
}
}

View File

@@ -0,0 +1,146 @@
use std::{collections::HashMap, fs::File, path::Path};
use blake3::Hasher;
use colored::Colorize;
use walkdir::WalkDir;
use crate::files::duplicate::{
confirmation::ConfirmationStrategy,
display::print_duplicate_summary,
types::{DuplicateError, DuplicateSummary},
};
use crate::settings::get_or_prompt_download_folder;
pub fn compute_file_hash(path: &Path) -> Result<blake3::Hash, std::io::Error> {
let mut file = File::open(path)?;
let mut hasher = Hasher::new();
std::io::copy(&mut file, &mut hasher)?;
Ok(hasher.finalize())
}
pub fn find_duplicates<'a>(paths: &[&'a Path]) -> Vec<Vec<&'a Path>> {
let mut hash_map: HashMap<blake3::Hash, Vec<&Path>> = HashMap::new();
for &path in paths {
if let Ok(hash) = compute_file_hash(path) {
hash_map.entry(hash).or_default().push(path);
}
}
hash_map
.into_values()
.filter(|files| files.len() > 1)
.collect()
}
pub fn print_duplicates(path: &Path, recursive: bool) -> Result<(), DuplicateError> {
let mut file_paths = Vec::new();
let walker = if recursive {
WalkDir::new(path).follow_links(false)
} else {
WalkDir::new(path).max_depth(1).follow_links(false)
};
for entry in walker.into_iter() {
let entry = entry?;
if entry.file_type().is_file() {
file_paths.push(entry.path().to_path_buf());
}
}
let refs: Vec<&Path> = file_paths.iter().map(|p| p.as_path()).collect();
let duplicates = find_duplicates(&refs);
if duplicates.is_empty() {
return Err(DuplicateError::NoDuplicate);
} else {
println!("Duplicate files:");
for group in duplicates {
for file in group {
println!("{}", format!("{}", file.display()).green());
}
println!();
}
}
Ok(())
}
pub fn execute_delete_duplicates<C: ConfirmationStrategy>(
confirmation: &C,
recursive: bool,
) -> Result<DuplicateSummary, DuplicateError> {
let download_path = get_or_prompt_download_folder()?;
match print_duplicates(&download_path, recursive) {
Ok(_) => {
confirmation.confirm()?;
let summary = delete_duplicates(&download_path, recursive)?;
print_duplicate_summary(&summary);
Ok(summary)
}
Err(e) => Err(e),
}
}
pub fn delete_duplicates(path: &Path, recursive: bool) -> Result<DuplicateSummary, DuplicateError> {
let mut file_paths = Vec::new();
let mut summary = DuplicateSummary::new();
let walker = if recursive {
WalkDir::new(path).follow_links(false)
} else {
WalkDir::new(path).max_depth(1).follow_links(false)
};
for entry in walker.into_iter() {
let entry = entry?;
if entry.file_type().is_file() {
file_paths.push(entry.path().to_path_buf());
}
}
let refs: Vec<&Path> = file_paths.iter().map(|p| p.as_path()).collect();
let duplicates = find_duplicates(&refs);
if duplicates.is_empty() {
println!("No duplicate files found to delete.");
return Ok(summary);
}
let mut total_deleted = 0;
for group in duplicates {
if group.len() < 2 {
continue;
}
// Keep the first file, delete the rest
let to_keep = &group[0];
let to_delete = &group[1..];
println!("Keeping: {}", to_keep.display());
for file in to_delete {
match std::fs::remove_file(file) {
Ok(_) => {
println!("Deleted: {}", file.display());
total_deleted += 1;
summary.duplicated();
if let Ok(metadata) = std::fs::metadata(file) {
summary.size_saved(metadata.len());
}
}
Err(e) => {
eprintln!("Error deleting file {}: {}", file.display(), e);
}
}
}
println!();
}
println!("Total files deleted: {}", total_deleted);
Ok(summary)
}

View File

@@ -0,0 +1,41 @@
pub mod confirmation;
pub mod display;
pub mod duplicate_detector;
pub mod types;
use crate::settings::get_or_prompt_download_folder;
pub use confirmation::{AutoConfirm, ConfirmationStrategy, StdinConfirmation};
use display::print_duplicate_summary;
use duplicate_detector::{execute_delete_duplicates, print_duplicates};
pub use types::{DuplicateError, DuplicateSummary};
pub fn execute_delete(recursive: bool) {
let confirmation = StdinConfirmation;
match execute_delete_duplicates(&confirmation, recursive) {
Ok(summary) => print_duplicate_summary(&summary),
Err(err) => eprintln!("Error deleting duplicates: {}", err),
}
}
pub fn show_duplicates(recursive: bool) {
let download_path = match get_or_prompt_download_folder() {
Ok(path) => path,
Err(err) => {
eprintln!("Error getting download folder: {}", err);
return;
}
};
match print_duplicates(&download_path, recursive) {
Ok(_) => {}
Err(err) => eprintln!("Error finding duplicates: {}", err),
}
}
pub fn execute_delete_auto() {
let confirmation = AutoConfirm;
match execute_delete_duplicates(&confirmation, false) {
Ok(summary) => print_duplicate_summary(&summary),
Err(err) => eprintln!("Error deleting duplicates: {}", err),
}
}

View File

@@ -0,0 +1,77 @@
#[derive(Debug, Clone, Default)]
pub struct DuplicateSummary {
pub total_duplicates: u64,
pub total_size_saved: u64,
pub error_count: u64,
}
impl DuplicateSummary {
pub fn new() -> Self {
Self::default()
}
pub fn duplicated(&mut self) {
self.total_duplicates += 1;
}
pub fn size_saved(&mut self, size: u64) {
self.total_size_saved += size;
}
pub fn errored(&mut self) {
self.error_count += 1;
}
pub fn duplicate_count(&self) -> u64 {
self.total_duplicates
}
pub fn total_size_saved(&self) -> u64 {
self.total_size_saved
}
pub fn error_count(&self) -> u64 {
self.error_count
}
pub fn has_errors(&self) -> bool {
self.error_count > 0
}
pub fn total_processed(&self) -> u64 {
self.total_duplicates + self.error_count
}
}
#[derive(Debug)]
pub enum DuplicateError {
InputReadFailed(String),
UserCancelled,
IoError(std::io::Error),
WalkdirError(String),
NoDuplicate,
}
impl From<std::io::Error> for DuplicateError {
fn from(err: std::io::Error) -> Self {
DuplicateError::IoError(err)
}
}
impl std::fmt::Display for DuplicateError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DuplicateError::InputReadFailed(message) => write!(f, "InputReadFailed: {}", message),
DuplicateError::UserCancelled => write!(f, "UserCancelled"),
DuplicateError::IoError(err) => write!(f, "IoError: {}", err),
DuplicateError::WalkdirError(err) => write!(f, "WalkdirError: {}", err),
DuplicateError::NoDuplicate => write!(f, "No Duplicate Found"),
}
}
}
impl std::error::Error for DuplicateError {}
impl From<walkdir::Error> for DuplicateError {
fn from(err: walkdir::Error) -> Self {
DuplicateError::WalkdirError(err.to_string())
}
}
impl From<Box<dyn std::error::Error>> for DuplicateError {
fn from(err: Box<dyn std::error::Error>) -> Self {
DuplicateError::InputReadFailed(err.to_string())
}
}

View File

@@ -1,6 +1,7 @@
pub mod batch;
pub mod categorizer;
pub mod detector;
pub mod duplicate;
mod file_ops;
pub mod mover;
pub mod undo;

View File

@@ -1,5 +1,6 @@
use clap::Parser;
use noentropy::cli::{Args, handle_organization, handle_undo};
use noentropy::files::duplicate::execute_delete;
use noentropy::settings::config::change_and_prompt_api_key;
use noentropy::settings::{get_or_prompt_config, get_or_prompt_download_folder};
@@ -11,8 +12,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let download_path = get_or_prompt_download_folder()?;
handle_undo(args, download_path).await?;
return Ok(());
}
if args.change_key {
} else if args.change_key {
let api_key = change_and_prompt_api_key();
match api_key {
Ok(_key) => println!("Key saved"),
@@ -20,11 +20,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
eprintln!("{e}")
}
}
} else if args.duplicate {
execute_delete(args.recursive);
} else {
let config = get_or_prompt_config()?;
handle_organization(args, config).await?;
}
let config = get_or_prompt_config()?;
handle_organization(args, config).await?;
Ok(())
}

View File

@@ -30,6 +30,7 @@ fn create_test_args(dry_run: bool, max_concurrent: usize) -> Args {
undo: false,
change_key: false,
offline: false,
duplicate: false,
path: None,
}
}
@@ -101,6 +102,7 @@ fn test_args_all_flags() {
change_key: true,
offline: true,
path: Some(PathBuf::from("/test/path")),
duplicate: true,
};
assert!(args.dry_run);

View File

@@ -28,6 +28,7 @@ fn create_test_args(dry_run: bool, path: Option<PathBuf>) -> Args {
undo: true,
change_key: false,
offline: false,
duplicate: false,
path,
}
}