use std::collections::HashMap; use std::path::Path; use std::sync::LazyLock; use crate::models::{FileCategory, OrganizationPlan}; static EXTENSION_MAP: LazyLock> = LazyLock::new(|| { HashMap::from([ // Images ("jpg", "Images"), ("jpeg", "Images"), ("png", "Images"), ("gif", "Images"), ("bmp", "Images"), ("svg", "Images"), ("webp", "Images"), ("ico", "Images"), ("tiff", "Images"), ("tif", "Images"), ("raw", "Images"), ("heic", "Images"), ("heif", "Images"), // Documents ("pdf", "Documents"), ("doc", "Documents"), ("docx", "Documents"), ("txt", "Documents"), ("rtf", "Documents"), ("odt", "Documents"), ("xls", "Documents"), ("xlsx", "Documents"), ("ppt", "Documents"), ("pptx", "Documents"), ("csv", "Documents"), ("md", "Documents"), ("epub", "Documents"), // Installers ("exe", "Installers"), ("msi", "Installers"), ("dmg", "Installers"), ("deb", "Installers"), ("rpm", "Installers"), ("app", "Installers"), ("appimage", "Installers"), ("pkg", "Installers"), ("snap", "Installers"), // Music ("mp3", "Music"), ("wav", "Music"), ("flac", "Music"), ("aac", "Music"), ("ogg", "Music"), ("wma", "Music"), ("m4a", "Music"), ("opus", "Music"), ("aiff", "Music"), // Video ("mp4", "Video"), ("mkv", "Video"), ("avi", "Video"), ("mov", "Video"), ("wmv", "Video"), ("flv", "Video"), ("webm", "Video"), ("m4v", "Video"), ("mpeg", "Video"), ("mpg", "Video"), // Archives ("zip", "Archives"), ("tar", "Archives"), ("gz", "Archives"), ("rar", "Archives"), ("7z", "Archives"), ("bz2", "Archives"), ("xz", "Archives"), ("tgz", "Archives"), ("zst", "Archives"), // Code ("rs", "Code"), ("py", "Code"), ("js", "Code"), ("ts", "Code"), ("java", "Code"), ("c", "Code"), ("cpp", "Code"), ("h", "Code"), ("hpp", "Code"), ("go", "Code"), ("rb", "Code"), ("php", "Code"), ("html", "Code"), ("css", "Code"), ("json", "Code"), ("yaml", "Code"), ("yml", "Code"), ("toml", "Code"), ("xml", "Code"), ("sh", "Code"), ("bash", "Code"), ("sql", "Code"), ]) }); /// Categorizes a file by its extension. /// Returns `Some(category)` if the extension is known, `None` otherwise. pub fn categorize_by_extension(filename: &str) -> Option<&'static str> { Path::new(filename) .extension() .and_then(|ext| ext.to_str()) .map(|ext| ext.to_lowercase()) .as_deref() .and_then(|ext| EXTENSION_MAP.get(ext).copied()) } /// Result of offline categorization pub struct OfflineCategorizationResult { pub plan: OrganizationPlan, pub skipped: Vec, } /// Categorizes a list of filenames using extension-based rules. /// Returns categorized files and a list of skipped filenames. pub fn categorize_files_offline(filenames: Vec) -> OfflineCategorizationResult { let mut files = Vec::with_capacity(filenames.len()); let mut skipped = Vec::new(); for filename in filenames { match categorize_by_extension(&filename) { Some(category) => { files.push(FileCategory { filename, category: category.to_string(), sub_category: String::new(), }); } None => { skipped.push(filename); } } } OfflineCategorizationResult { plan: OrganizationPlan { files }, skipped, } } #[cfg(test)] mod tests { use super::*; #[test] fn test_categorize_known_extensions() { assert_eq!(categorize_by_extension("photo.jpg"), Some("Images")); assert_eq!(categorize_by_extension("document.pdf"), Some("Documents")); assert_eq!(categorize_by_extension("setup.exe"), Some("Installers")); assert_eq!(categorize_by_extension("song.mp3"), Some("Music")); assert_eq!(categorize_by_extension("movie.mp4"), Some("Video")); assert_eq!(categorize_by_extension("archive.zip"), Some("Archives")); assert_eq!(categorize_by_extension("main.rs"), Some("Code")); } #[test] fn test_categorize_case_insensitive() { assert_eq!(categorize_by_extension("PHOTO.JPG"), Some("Images")); assert_eq!(categorize_by_extension("Photo.Png"), Some("Images")); } #[test] fn test_categorize_unknown_extension() { assert_eq!(categorize_by_extension("file.xyz"), None); assert_eq!(categorize_by_extension("file.unknown"), None); } #[test] fn test_categorize_no_extension() { assert_eq!(categorize_by_extension("README"), None); assert_eq!(categorize_by_extension("Makefile"), None); } #[test] fn test_categorize_files_offline() { let filenames = vec![ "photo.jpg".to_string(), "doc.pdf".to_string(), "unknown".to_string(), "file.xyz".to_string(), ]; let result = categorize_files_offline(filenames); assert_eq!(result.plan.files.len(), 2); assert_eq!(result.skipped.len(), 2); assert!(result.skipped.contains(&"unknown".to_string())); assert!(result.skipped.contains(&"file.xyz".to_string())); } }