Merge pull request #20 from glitchySid/feature/duplicate
Some checks failed
Rust / build (push) Has been cancelled
Rust / create-release (push) Has been cancelled
Rust / upload-assets (macos-latest, x86_64-apple-darwin) (push) Has been cancelled
Rust / upload-assets (ubuntu-latest, x86_64-unknown-linux-gnu) (push) Has been cancelled
Rust / upload-assets (windows-latest, x86_64-pc-windows-msvc) (push) Has been cancelled
Some checks failed
Rust / build (push) Has been cancelled
Rust / create-release (push) Has been cancelled
Rust / upload-assets (macos-latest, x86_64-apple-darwin) (push) Has been cancelled
Rust / upload-assets (ubuntu-latest, x86_64-unknown-linux-gnu) (push) Has been cancelled
Rust / upload-assets (windows-latest, x86_64-pc-windows-msvc) (push) Has been cancelled
Find Duplicate Files and prompts to delete them.
This commit is contained in:
@@ -26,6 +26,9 @@ pub struct Args {
|
||||
#[arg(long, help = "Use offline mode (extension-based categorization)")]
|
||||
pub offline: bool,
|
||||
|
||||
#[arg(long, help = "Detect duplicate files")]
|
||||
pub duplicate: bool,
|
||||
|
||||
/// Optional path to organize instead of the configured download folder
|
||||
///
|
||||
/// If provided, this path will be used instead of the download folder
|
||||
|
||||
36
src/files/duplicate/confirmation.rs
Normal file
36
src/files/duplicate/confirmation.rs
Normal file
@@ -0,0 +1,36 @@
|
||||
use super::types::DuplicateError;
|
||||
use std::io;
|
||||
|
||||
pub trait ConfirmationStrategy {
|
||||
fn confirm(&self) -> Result<bool, DuplicateError>;
|
||||
}
|
||||
|
||||
pub struct StdinConfirmation;
|
||||
|
||||
impl ConfirmationStrategy for StdinConfirmation {
|
||||
fn confirm(&self) -> Result<bool, DuplicateError> {
|
||||
eprint!("\nDo you want to apply these changes? [y/N]: ");
|
||||
|
||||
let mut input = String::new();
|
||||
if io::stdin().read_line(&mut input).is_err() {
|
||||
return Err(DuplicateError::InputReadFailed(
|
||||
"Failed to read input. Operation cancelled.".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let input = input.trim().to_lowercase();
|
||||
if input != "y" && input != "yes" {
|
||||
return Err(DuplicateError::UserCancelled);
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AutoConfirm;
|
||||
|
||||
impl ConfirmationStrategy for AutoConfirm {
|
||||
fn confirm(&self) -> Result<bool, DuplicateError> {
|
||||
Ok(true)
|
||||
}
|
||||
}
|
||||
34
src/files/duplicate/display.rs
Normal file
34
src/files/duplicate/display.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use super::types::DuplicateSummary;
|
||||
use colored::*;
|
||||
|
||||
pub(super) fn print_duplicate_summary(summary: &DuplicateSummary) {
|
||||
println!("\n{}", "Duplicate Removal Complete!".bold().green());
|
||||
|
||||
if summary.duplicate_count() > 0 || summary.error_count() > 0 {
|
||||
println!(
|
||||
"Files deleted: {}, Space saved: {}, Errors: {}",
|
||||
summary.duplicate_count().to_string().green(),
|
||||
format_size(summary.total_size_saved()).blue(),
|
||||
summary.error_count().to_string().red()
|
||||
);
|
||||
} else {
|
||||
println!("{}", "No duplicate files were deleted.".yellow());
|
||||
}
|
||||
}
|
||||
|
||||
fn format_size(bytes: u64) -> String {
|
||||
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
|
||||
let mut size = bytes as f64;
|
||||
let mut unit_index = 0;
|
||||
|
||||
while size >= 1024.0 && unit_index < UNITS.len() - 1 {
|
||||
size /= 1024.0;
|
||||
unit_index += 1;
|
||||
}
|
||||
|
||||
if unit_index == 0 {
|
||||
format!("{} {}", bytes, UNITS[unit_index])
|
||||
} else {
|
||||
format!("{:.2} {}", size, UNITS[unit_index])
|
||||
}
|
||||
}
|
||||
146
src/files/duplicate/duplicate_detector.rs
Normal file
146
src/files/duplicate/duplicate_detector.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
use std::{collections::HashMap, fs::File, path::Path};
|
||||
|
||||
use blake3::Hasher;
|
||||
use colored::Colorize;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::files::duplicate::{
|
||||
confirmation::ConfirmationStrategy,
|
||||
display::print_duplicate_summary,
|
||||
types::{DuplicateError, DuplicateSummary},
|
||||
};
|
||||
use crate::settings::get_or_prompt_download_folder;
|
||||
|
||||
pub fn compute_file_hash(path: &Path) -> Result<blake3::Hash, std::io::Error> {
|
||||
let mut file = File::open(path)?;
|
||||
let mut hasher = Hasher::new();
|
||||
std::io::copy(&mut file, &mut hasher)?;
|
||||
Ok(hasher.finalize())
|
||||
}
|
||||
|
||||
pub fn find_duplicates<'a>(paths: &[&'a Path]) -> Vec<Vec<&'a Path>> {
|
||||
let mut hash_map: HashMap<blake3::Hash, Vec<&Path>> = HashMap::new();
|
||||
|
||||
for &path in paths {
|
||||
if let Ok(hash) = compute_file_hash(path) {
|
||||
hash_map.entry(hash).or_default().push(path);
|
||||
}
|
||||
}
|
||||
|
||||
hash_map
|
||||
.into_values()
|
||||
.filter(|files| files.len() > 1)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn print_duplicates(path: &Path, recursive: bool) -> Result<(), DuplicateError> {
|
||||
let mut file_paths = Vec::new();
|
||||
|
||||
let walker = if recursive {
|
||||
WalkDir::new(path).follow_links(false)
|
||||
} else {
|
||||
WalkDir::new(path).max_depth(1).follow_links(false)
|
||||
};
|
||||
|
||||
for entry in walker.into_iter() {
|
||||
let entry = entry?;
|
||||
if entry.file_type().is_file() {
|
||||
file_paths.push(entry.path().to_path_buf());
|
||||
}
|
||||
}
|
||||
|
||||
let refs: Vec<&Path> = file_paths.iter().map(|p| p.as_path()).collect();
|
||||
let duplicates = find_duplicates(&refs);
|
||||
|
||||
if duplicates.is_empty() {
|
||||
return Err(DuplicateError::NoDuplicate);
|
||||
} else {
|
||||
println!("Duplicate files:");
|
||||
for group in duplicates {
|
||||
for file in group {
|
||||
println!("{}", format!("{}", file.display()).green());
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn execute_delete_duplicates<C: ConfirmationStrategy>(
|
||||
confirmation: &C,
|
||||
recursive: bool,
|
||||
) -> Result<DuplicateSummary, DuplicateError> {
|
||||
let download_path = get_or_prompt_download_folder()?;
|
||||
match print_duplicates(&download_path, recursive) {
|
||||
Ok(_) => {
|
||||
confirmation.confirm()?;
|
||||
|
||||
let summary = delete_duplicates(&download_path, recursive)?;
|
||||
print_duplicate_summary(&summary);
|
||||
Ok(summary)
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn delete_duplicates(path: &Path, recursive: bool) -> Result<DuplicateSummary, DuplicateError> {
|
||||
let mut file_paths = Vec::new();
|
||||
let mut summary = DuplicateSummary::new();
|
||||
|
||||
let walker = if recursive {
|
||||
WalkDir::new(path).follow_links(false)
|
||||
} else {
|
||||
WalkDir::new(path).max_depth(1).follow_links(false)
|
||||
};
|
||||
|
||||
for entry in walker.into_iter() {
|
||||
let entry = entry?;
|
||||
if entry.file_type().is_file() {
|
||||
file_paths.push(entry.path().to_path_buf());
|
||||
}
|
||||
}
|
||||
|
||||
let refs: Vec<&Path> = file_paths.iter().map(|p| p.as_path()).collect();
|
||||
let duplicates = find_duplicates(&refs);
|
||||
|
||||
if duplicates.is_empty() {
|
||||
println!("No duplicate files found to delete.");
|
||||
return Ok(summary);
|
||||
}
|
||||
|
||||
let mut total_deleted = 0;
|
||||
|
||||
for group in duplicates {
|
||||
if group.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Keep the first file, delete the rest
|
||||
let to_keep = &group[0];
|
||||
let to_delete = &group[1..];
|
||||
|
||||
println!("Keeping: {}", to_keep.display());
|
||||
|
||||
for file in to_delete {
|
||||
match std::fs::remove_file(file) {
|
||||
Ok(_) => {
|
||||
println!("Deleted: {}", file.display());
|
||||
total_deleted += 1;
|
||||
summary.duplicated();
|
||||
|
||||
if let Ok(metadata) = std::fs::metadata(file) {
|
||||
summary.size_saved(metadata.len());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Error deleting file {}: {}", file.display(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
println!("Total files deleted: {}", total_deleted);
|
||||
Ok(summary)
|
||||
}
|
||||
41
src/files/duplicate/mod.rs
Normal file
41
src/files/duplicate/mod.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
pub mod confirmation;
|
||||
pub mod display;
|
||||
pub mod duplicate_detector;
|
||||
pub mod types;
|
||||
|
||||
use crate::settings::get_or_prompt_download_folder;
|
||||
pub use confirmation::{AutoConfirm, ConfirmationStrategy, StdinConfirmation};
|
||||
use display::print_duplicate_summary;
|
||||
use duplicate_detector::{execute_delete_duplicates, print_duplicates};
|
||||
pub use types::{DuplicateError, DuplicateSummary};
|
||||
|
||||
pub fn execute_delete(recursive: bool) {
|
||||
let confirmation = StdinConfirmation;
|
||||
match execute_delete_duplicates(&confirmation, recursive) {
|
||||
Ok(summary) => print_duplicate_summary(&summary),
|
||||
Err(err) => eprintln!("Error deleting duplicates: {}", err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn show_duplicates(recursive: bool) {
|
||||
let download_path = match get_or_prompt_download_folder() {
|
||||
Ok(path) => path,
|
||||
Err(err) => {
|
||||
eprintln!("Error getting download folder: {}", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
match print_duplicates(&download_path, recursive) {
|
||||
Ok(_) => {}
|
||||
Err(err) => eprintln!("Error finding duplicates: {}", err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute_delete_auto() {
|
||||
let confirmation = AutoConfirm;
|
||||
match execute_delete_duplicates(&confirmation, false) {
|
||||
Ok(summary) => print_duplicate_summary(&summary),
|
||||
Err(err) => eprintln!("Error deleting duplicates: {}", err),
|
||||
}
|
||||
}
|
||||
77
src/files/duplicate/types.rs
Normal file
77
src/files/duplicate/types.rs
Normal file
@@ -0,0 +1,77 @@
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct DuplicateSummary {
|
||||
pub total_duplicates: u64,
|
||||
pub total_size_saved: u64,
|
||||
pub error_count: u64,
|
||||
}
|
||||
|
||||
impl DuplicateSummary {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
pub fn duplicated(&mut self) {
|
||||
self.total_duplicates += 1;
|
||||
}
|
||||
pub fn size_saved(&mut self, size: u64) {
|
||||
self.total_size_saved += size;
|
||||
}
|
||||
pub fn errored(&mut self) {
|
||||
self.error_count += 1;
|
||||
}
|
||||
pub fn duplicate_count(&self) -> u64 {
|
||||
self.total_duplicates
|
||||
}
|
||||
pub fn total_size_saved(&self) -> u64 {
|
||||
self.total_size_saved
|
||||
}
|
||||
pub fn error_count(&self) -> u64 {
|
||||
self.error_count
|
||||
}
|
||||
pub fn has_errors(&self) -> bool {
|
||||
self.error_count > 0
|
||||
}
|
||||
pub fn total_processed(&self) -> u64 {
|
||||
self.total_duplicates + self.error_count
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DuplicateError {
|
||||
InputReadFailed(String),
|
||||
UserCancelled,
|
||||
IoError(std::io::Error),
|
||||
WalkdirError(String),
|
||||
NoDuplicate,
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for DuplicateError {
|
||||
fn from(err: std::io::Error) -> Self {
|
||||
DuplicateError::IoError(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DuplicateError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
DuplicateError::InputReadFailed(message) => write!(f, "InputReadFailed: {}", message),
|
||||
DuplicateError::UserCancelled => write!(f, "UserCancelled"),
|
||||
DuplicateError::IoError(err) => write!(f, "IoError: {}", err),
|
||||
DuplicateError::WalkdirError(err) => write!(f, "WalkdirError: {}", err),
|
||||
DuplicateError::NoDuplicate => write!(f, "No Duplicate Found"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for DuplicateError {}
|
||||
|
||||
impl From<walkdir::Error> for DuplicateError {
|
||||
fn from(err: walkdir::Error) -> Self {
|
||||
DuplicateError::WalkdirError(err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Box<dyn std::error::Error>> for DuplicateError {
|
||||
fn from(err: Box<dyn std::error::Error>) -> Self {
|
||||
DuplicateError::InputReadFailed(err.to_string())
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
pub mod batch;
|
||||
pub mod categorizer;
|
||||
pub mod detector;
|
||||
pub mod duplicate;
|
||||
mod file_ops;
|
||||
pub mod mover;
|
||||
pub mod undo;
|
||||
|
||||
13
src/main.rs
13
src/main.rs
@@ -1,5 +1,6 @@
|
||||
use clap::Parser;
|
||||
use noentropy::cli::{Args, handle_organization, handle_undo};
|
||||
use noentropy::files::duplicate::execute_delete;
|
||||
use noentropy::settings::config::change_and_prompt_api_key;
|
||||
use noentropy::settings::{get_or_prompt_config, get_or_prompt_download_folder};
|
||||
|
||||
@@ -11,8 +12,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let download_path = get_or_prompt_download_folder()?;
|
||||
handle_undo(args, download_path).await?;
|
||||
return Ok(());
|
||||
}
|
||||
if args.change_key {
|
||||
} else if args.change_key {
|
||||
let api_key = change_and_prompt_api_key();
|
||||
match api_key {
|
||||
Ok(_key) => println!("Key saved"),
|
||||
@@ -20,11 +20,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
eprintln!("{e}")
|
||||
}
|
||||
}
|
||||
} else if args.duplicate {
|
||||
execute_delete(args.recursive);
|
||||
} else {
|
||||
let config = get_or_prompt_config()?;
|
||||
handle_organization(args, config).await?;
|
||||
}
|
||||
|
||||
let config = get_or_prompt_config()?;
|
||||
|
||||
handle_organization(args, config).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ fn create_test_args(dry_run: bool, max_concurrent: usize) -> Args {
|
||||
undo: false,
|
||||
change_key: false,
|
||||
offline: false,
|
||||
duplicate: false,
|
||||
path: None,
|
||||
}
|
||||
}
|
||||
@@ -101,6 +102,7 @@ fn test_args_all_flags() {
|
||||
change_key: true,
|
||||
offline: true,
|
||||
path: Some(PathBuf::from("/test/path")),
|
||||
duplicate: true,
|
||||
};
|
||||
|
||||
assert!(args.dry_run);
|
||||
|
||||
@@ -28,6 +28,7 @@ fn create_test_args(dry_run: bool, path: Option<PathBuf>) -> Args {
|
||||
undo: true,
|
||||
change_key: false,
|
||||
offline: false,
|
||||
duplicate: false,
|
||||
path,
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user