feat: add offline mode with extension-based file categorization
- Add --offline flag to force offline mode - Automatic detection when Gemini API is unavailable - Prompt user to continue with offline mode on API failure - Extension-based categorization for 80+ file extensions - Add Video category to default categories - Skip files with unknown extensions in offline mode
This commit is contained in:
@@ -23,6 +23,9 @@ pub struct Args {
|
||||
#[arg(long, help = "Change api key")]
|
||||
pub change_key: bool,
|
||||
|
||||
#[arg(long, help = "Use offline mode (extension-based categorization)")]
|
||||
pub offline: bool,
|
||||
|
||||
/// Optional path to organize instead of the configured download folder
|
||||
///
|
||||
/// If provided, this path will be used instead of the download folder
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
use crate::cli::Args;
|
||||
use crate::files::{FileBatch, execute_move, is_text_file, read_file_sample};
|
||||
use crate::files::{
|
||||
FileBatch, categorize_files_offline, execute_move, is_text_file, read_file_sample,
|
||||
};
|
||||
use crate::gemini::GeminiClient;
|
||||
use crate::models::OrganizationPlan;
|
||||
use crate::settings::Config;
|
||||
use crate::settings::{Config, Prompter};
|
||||
use crate::storage::{Cache, UndoLog};
|
||||
use colored::*;
|
||||
use futures::future::join_all;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Validates that a path exists and is a readable directory
|
||||
@@ -128,8 +130,6 @@ pub async fn handle_organization(
|
||||
args: Args,
|
||||
config: Config,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let client: GeminiClient = GeminiClient::new(config.api_key, config.categories.clone());
|
||||
|
||||
let data_dir = Config::get_data_dir()?;
|
||||
let cache_path = data_dir.join(".noentropy_cache.json");
|
||||
let mut cache = Cache::load_or_create(&cache_path);
|
||||
@@ -144,7 +144,11 @@ pub async fn handle_organization(
|
||||
undo_log.cleanup_old_entries(UNDO_LOG_RETENTION_SECONDS);
|
||||
|
||||
// Use custom path if provided, otherwise fall back to configured download folder
|
||||
let target_path = args.path.unwrap_or(config.download_folder);
|
||||
let target_path = args
|
||||
.path
|
||||
.as_ref()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| config.download_folder.clone());
|
||||
|
||||
// Validate and normalize the target path early
|
||||
let target_path = match validate_and_normalize_path(&target_path) {
|
||||
@@ -162,19 +166,140 @@ pub async fn handle_organization(
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("Found {} files to organize.", batch.count());
|
||||
|
||||
// Determine if we should use offline mode
|
||||
let use_offline = if args.offline {
|
||||
println!("{}", "Using offline mode (--offline flag).".cyan());
|
||||
true
|
||||
} else {
|
||||
let client = GeminiClient::new(config.api_key.clone(), config.categories.clone());
|
||||
match client.check_connectivity().await {
|
||||
Ok(()) => false,
|
||||
Err(e) => {
|
||||
if Prompter::prompt_offline_mode(&e.to_string()) {
|
||||
true
|
||||
} else {
|
||||
println!("{}", "Exiting.".yellow());
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let plan = if use_offline {
|
||||
handle_offline_organization(&batch, &target_path, args.dry_run, &mut undo_log)?
|
||||
} else {
|
||||
handle_online_organization(
|
||||
&args,
|
||||
&config,
|
||||
batch,
|
||||
&target_path,
|
||||
&mut cache,
|
||||
&mut undo_log,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
|
||||
// Only save if we have a plan (online mode returns None after moving)
|
||||
if plan.is_none()
|
||||
&& let Err(e) = cache.save(cache_path.as_path())
|
||||
{
|
||||
eprintln!("Warning: Failed to save cache: {}", e);
|
||||
}
|
||||
|
||||
if let Err(e) = undo_log.save(&undo_log_path) {
|
||||
eprintln!("Warning: Failed to save undo log: {}", e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_offline_organization(
|
||||
batch: &FileBatch,
|
||||
target_path: &Path,
|
||||
dry_run: bool,
|
||||
undo_log: &mut UndoLog,
|
||||
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
|
||||
println!("{}", "Categorizing files by extension...".cyan());
|
||||
|
||||
let result = categorize_files_offline(&batch.filenames);
|
||||
|
||||
if result.plan.files.is_empty() {
|
||||
println!("{}", "No files could be categorized offline.".yellow());
|
||||
print_skipped_files(&result.skipped);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Print categorization summary
|
||||
print_categorization_summary(&result.plan);
|
||||
print_skipped_files(&result.skipped);
|
||||
|
||||
if dry_run {
|
||||
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
|
||||
} else {
|
||||
execute_move(target_path, result.plan, Some(undo_log));
|
||||
}
|
||||
|
||||
println!("{}", "Done!".green().bold());
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn print_categorization_summary(plan: &OrganizationPlan) {
|
||||
use std::collections::HashMap;
|
||||
|
||||
let mut counts: HashMap<&str, usize> = HashMap::new();
|
||||
for file in &plan.files {
|
||||
*counts.entry(file.category.as_str()).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
println!();
|
||||
println!("{}", "Categorized files:".green());
|
||||
for (category, count) in &counts {
|
||||
println!(" {}: {} file(s)", category.cyan(), count);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
fn print_skipped_files(skipped: &[String]) {
|
||||
if skipped.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
println!(
|
||||
"Found {} files. Asking Gemini to organize...",
|
||||
batch.count()
|
||||
"{} {} file(s) with unknown extension:",
|
||||
"Skipped".yellow(),
|
||||
skipped.len()
|
||||
);
|
||||
for filename in skipped.iter().take(10) {
|
||||
println!(" - {}", filename);
|
||||
}
|
||||
if skipped.len() > 10 {
|
||||
println!(" ... and {} more", skipped.len() - 10);
|
||||
}
|
||||
println!();
|
||||
}
|
||||
|
||||
async fn handle_online_organization(
|
||||
args: &Args,
|
||||
config: &Config,
|
||||
batch: FileBatch,
|
||||
target_path: &Path,
|
||||
cache: &mut Cache,
|
||||
undo_log: &mut UndoLog,
|
||||
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
|
||||
let client = GeminiClient::new(config.api_key.clone(), config.categories.clone());
|
||||
|
||||
println!("Asking Gemini to organize...");
|
||||
|
||||
let mut plan: OrganizationPlan = match client
|
||||
.organize_files_in_batches(batch.filenames, Some(&mut cache), Some(&target_path))
|
||||
.organize_files_in_batches(batch.filenames, Some(cache), Some(target_path))
|
||||
.await
|
||||
{
|
||||
Ok(plan) => plan,
|
||||
Err(e) => {
|
||||
handle_gemini_error(e);
|
||||
return Ok(());
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -229,19 +354,11 @@ pub async fn handle_organization(
|
||||
if args.dry_run {
|
||||
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
|
||||
} else {
|
||||
execute_move(&target_path, plan, Some(&mut undo_log));
|
||||
execute_move(target_path, plan, Some(undo_log));
|
||||
}
|
||||
println!("{}", "Done!".green().bold());
|
||||
|
||||
if let Err(e) = cache.save(cache_path.as_path()) {
|
||||
eprintln!("Warning: Failed to save cache: {}", e);
|
||||
}
|
||||
|
||||
if let Err(e) = undo_log.save(&undo_log_path) {
|
||||
eprintln!("Warning: Failed to save undo log: {}", e);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub async fn handle_undo(
|
||||
|
||||
197
src/files/categorizer.rs
Normal file
197
src/files/categorizer.rs
Normal file
@@ -0,0 +1,197 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use crate::models::{FileCategory, OrganizationPlan};
|
||||
|
||||
static EXTENSION_MAP: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
|
||||
HashMap::from([
|
||||
// Images
|
||||
("jpg", "Images"),
|
||||
("jpeg", "Images"),
|
||||
("png", "Images"),
|
||||
("gif", "Images"),
|
||||
("bmp", "Images"),
|
||||
("svg", "Images"),
|
||||
("webp", "Images"),
|
||||
("ico", "Images"),
|
||||
("tiff", "Images"),
|
||||
("tif", "Images"),
|
||||
("raw", "Images"),
|
||||
("heic", "Images"),
|
||||
("heif", "Images"),
|
||||
// Documents
|
||||
("pdf", "Documents"),
|
||||
("doc", "Documents"),
|
||||
("docx", "Documents"),
|
||||
("txt", "Documents"),
|
||||
("rtf", "Documents"),
|
||||
("odt", "Documents"),
|
||||
("xls", "Documents"),
|
||||
("xlsx", "Documents"),
|
||||
("ppt", "Documents"),
|
||||
("pptx", "Documents"),
|
||||
("csv", "Documents"),
|
||||
("md", "Documents"),
|
||||
("epub", "Documents"),
|
||||
// Installers
|
||||
("exe", "Installers"),
|
||||
("msi", "Installers"),
|
||||
("dmg", "Installers"),
|
||||
("deb", "Installers"),
|
||||
("rpm", "Installers"),
|
||||
("app", "Installers"),
|
||||
("appimage", "Installers"),
|
||||
("pkg", "Installers"),
|
||||
("snap", "Installers"),
|
||||
// Music
|
||||
("mp3", "Music"),
|
||||
("wav", "Music"),
|
||||
("flac", "Music"),
|
||||
("aac", "Music"),
|
||||
("ogg", "Music"),
|
||||
("wma", "Music"),
|
||||
("m4a", "Music"),
|
||||
("opus", "Music"),
|
||||
("aiff", "Music"),
|
||||
// Video
|
||||
("mp4", "Video"),
|
||||
("mkv", "Video"),
|
||||
("avi", "Video"),
|
||||
("mov", "Video"),
|
||||
("wmv", "Video"),
|
||||
("flv", "Video"),
|
||||
("webm", "Video"),
|
||||
("m4v", "Video"),
|
||||
("mpeg", "Video"),
|
||||
("mpg", "Video"),
|
||||
// Archives
|
||||
("zip", "Archives"),
|
||||
("tar", "Archives"),
|
||||
("gz", "Archives"),
|
||||
("rar", "Archives"),
|
||||
("7z", "Archives"),
|
||||
("bz2", "Archives"),
|
||||
("xz", "Archives"),
|
||||
("tgz", "Archives"),
|
||||
("zst", "Archives"),
|
||||
// Code
|
||||
("rs", "Code"),
|
||||
("py", "Code"),
|
||||
("js", "Code"),
|
||||
("ts", "Code"),
|
||||
("java", "Code"),
|
||||
("c", "Code"),
|
||||
("cpp", "Code"),
|
||||
("h", "Code"),
|
||||
("hpp", "Code"),
|
||||
("go", "Code"),
|
||||
("rb", "Code"),
|
||||
("php", "Code"),
|
||||
("html", "Code"),
|
||||
("css", "Code"),
|
||||
("json", "Code"),
|
||||
("yaml", "Code"),
|
||||
("yml", "Code"),
|
||||
("toml", "Code"),
|
||||
("xml", "Code"),
|
||||
("sh", "Code"),
|
||||
("bash", "Code"),
|
||||
("sql", "Code"),
|
||||
])
|
||||
});
|
||||
|
||||
/// Categorizes a file by its extension.
|
||||
/// Returns `Some(category)` if the extension is known, `None` otherwise.
|
||||
pub fn categorize_by_extension(filename: &str) -> Option<&'static str> {
|
||||
Path::new(filename)
|
||||
.extension()
|
||||
.and_then(|ext| ext.to_str())
|
||||
.map(|ext| ext.to_lowercase())
|
||||
.as_deref()
|
||||
.and_then(|ext| EXTENSION_MAP.get(ext).copied())
|
||||
}
|
||||
|
||||
/// Result of offline categorization
|
||||
pub struct OfflineCategorizationResult {
|
||||
pub plan: OrganizationPlan,
|
||||
pub skipped: Vec<String>,
|
||||
}
|
||||
|
||||
/// Categorizes a list of filenames using extension-based rules.
|
||||
/// Returns categorized files and a list of skipped filenames.
|
||||
pub fn categorize_files_offline(filenames: &[String]) -> OfflineCategorizationResult {
|
||||
let mut files = Vec::with_capacity(filenames.len());
|
||||
let mut skipped = Vec::new();
|
||||
|
||||
for filename in filenames {
|
||||
match categorize_by_extension(filename) {
|
||||
Some(category) => {
|
||||
files.push(FileCategory {
|
||||
filename: filename.clone(),
|
||||
category: category.to_string(),
|
||||
sub_category: String::new(),
|
||||
});
|
||||
}
|
||||
None => {
|
||||
skipped.push(filename.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OfflineCategorizationResult {
|
||||
plan: OrganizationPlan { files },
|
||||
skipped,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_categorize_known_extensions() {
|
||||
assert_eq!(categorize_by_extension("photo.jpg"), Some("Images"));
|
||||
assert_eq!(categorize_by_extension("document.pdf"), Some("Documents"));
|
||||
assert_eq!(categorize_by_extension("setup.exe"), Some("Installers"));
|
||||
assert_eq!(categorize_by_extension("song.mp3"), Some("Music"));
|
||||
assert_eq!(categorize_by_extension("movie.mp4"), Some("Video"));
|
||||
assert_eq!(categorize_by_extension("archive.zip"), Some("Archives"));
|
||||
assert_eq!(categorize_by_extension("main.rs"), Some("Code"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_categorize_case_insensitive() {
|
||||
assert_eq!(categorize_by_extension("PHOTO.JPG"), Some("Images"));
|
||||
assert_eq!(categorize_by_extension("Photo.Png"), Some("Images"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_categorize_unknown_extension() {
|
||||
assert_eq!(categorize_by_extension("file.xyz"), None);
|
||||
assert_eq!(categorize_by_extension("file.unknown"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_categorize_no_extension() {
|
||||
assert_eq!(categorize_by_extension("README"), None);
|
||||
assert_eq!(categorize_by_extension("Makefile"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_categorize_files_offline() {
|
||||
let filenames = vec![
|
||||
"photo.jpg".to_string(),
|
||||
"doc.pdf".to_string(),
|
||||
"unknown".to_string(),
|
||||
"file.xyz".to_string(),
|
||||
];
|
||||
|
||||
let result = categorize_files_offline(&filenames);
|
||||
|
||||
assert_eq!(result.plan.files.len(), 2);
|
||||
assert_eq!(result.skipped.len(), 2);
|
||||
assert!(result.skipped.contains(&"unknown".to_string()));
|
||||
assert!(result.skipped.contains(&"file.xyz".to_string()));
|
||||
}
|
||||
}
|
||||
@@ -1,9 +1,11 @@
|
||||
pub mod batch;
|
||||
pub mod categorizer;
|
||||
pub mod detector;
|
||||
pub mod mover;
|
||||
pub mod undo;
|
||||
|
||||
pub use batch::FileBatch;
|
||||
pub use categorizer::{OfflineCategorizationResult, categorize_files_offline};
|
||||
pub use detector::{is_text_file, read_file_sample};
|
||||
pub use mover::execute_move;
|
||||
pub use undo::undo_moves;
|
||||
|
||||
@@ -25,6 +25,22 @@ pub struct GeminiClient {
|
||||
}
|
||||
|
||||
impl GeminiClient {
|
||||
/// Checks if the Gemini API is reachable and the API key is valid.
|
||||
/// Makes a minimal request to verify connectivity.
|
||||
pub async fn check_connectivity(&self) -> Result<(), GeminiError> {
|
||||
let url = self.build_url();
|
||||
let request_body = json!({
|
||||
"contents": [{ "parts": [{ "text": "ping" }] }],
|
||||
"generationConfig": { "maxOutputTokens": 1 }
|
||||
});
|
||||
|
||||
match self.client.post(&url).json(&request_body).send().await {
|
||||
Ok(response) if response.status().is_success() => Ok(()),
|
||||
Ok(response) => Err(GeminiError::from_response(response).await),
|
||||
Err(e) => Err(GeminiError::NetworkError(e)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(api_key: String, categories: Vec<String>) -> Self {
|
||||
Self::with_model(api_key, DEFAULT_MODEL.to_string(), categories)
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@ pub fn default_categories() -> Vec<String> {
|
||||
"Documents".to_string(),
|
||||
"Installers".to_string(),
|
||||
"Music".to_string(),
|
||||
"Video".to_string(),
|
||||
"Archives".to_string(),
|
||||
"Code".to_string(),
|
||||
"Misc".to_string(),
|
||||
|
||||
@@ -8,6 +8,33 @@ const MAX_RETRIES: u32 = 3;
|
||||
pub struct Prompter;
|
||||
|
||||
impl Prompter {
|
||||
pub fn prompt_offline_mode(error_msg: &str) -> bool {
|
||||
println!();
|
||||
println!(
|
||||
"{} Unable to connect to Gemini API: {}",
|
||||
"WARNING:".yellow(),
|
||||
error_msg
|
||||
);
|
||||
println!();
|
||||
println!(
|
||||
"Continue with {} (extension-based categorization)?",
|
||||
"offline mode".cyan()
|
||||
);
|
||||
println!("Note: Files with unknown extensions will be skipped.");
|
||||
print!("[y/N]: ");
|
||||
|
||||
if std::io::stdout().flush().is_err() {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut input = String::new();
|
||||
if std::io::stdin().read_line(&mut input).is_err() {
|
||||
return false;
|
||||
}
|
||||
|
||||
matches!(input.trim().to_lowercase().as_str(), "y" | "yes")
|
||||
}
|
||||
|
||||
pub fn prompt_api_key() -> Result<String, Box<dyn std::error::Error>> {
|
||||
println!();
|
||||
println!(
|
||||
|
||||
Reference in New Issue
Block a user