feat: add offline mode with extension-based file categorization

- Add --offline flag to force offline mode
- Automatic detection when Gemini API is unavailable
- Prompt user to continue with offline mode on API failure
- Extension-based categorization for 80+ file extensions
- Add Video category to default categories
- Skip files with unknown extensions in offline mode
This commit is contained in:
2026-01-08 22:30:22 +05:30
parent 2ad9761d2d
commit eb5db4f4e6
7 changed files with 383 additions and 20 deletions

View File

@@ -23,6 +23,9 @@ pub struct Args {
#[arg(long, help = "Change api key")] #[arg(long, help = "Change api key")]
pub change_key: bool, pub change_key: bool,
#[arg(long, help = "Use offline mode (extension-based categorization)")]
pub offline: bool,
/// Optional path to organize instead of the configured download folder /// Optional path to organize instead of the configured download folder
/// ///
/// If provided, this path will be used instead of the download folder /// If provided, this path will be used instead of the download folder

View File

@@ -1,13 +1,15 @@
use crate::cli::Args; use crate::cli::Args;
use crate::files::{FileBatch, execute_move, is_text_file, read_file_sample}; use crate::files::{
FileBatch, categorize_files_offline, execute_move, is_text_file, read_file_sample,
};
use crate::gemini::GeminiClient; use crate::gemini::GeminiClient;
use crate::models::OrganizationPlan; use crate::models::OrganizationPlan;
use crate::settings::Config; use crate::settings::{Config, Prompter};
use crate::storage::{Cache, UndoLog}; use crate::storage::{Cache, UndoLog};
use colored::*; use colored::*;
use futures::future::join_all; use futures::future::join_all;
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
/// Validates that a path exists and is a readable directory /// Validates that a path exists and is a readable directory
@@ -128,8 +130,6 @@ pub async fn handle_organization(
args: Args, args: Args,
config: Config, config: Config,
) -> Result<(), Box<dyn std::error::Error>> { ) -> Result<(), Box<dyn std::error::Error>> {
let client: GeminiClient = GeminiClient::new(config.api_key, config.categories.clone());
let data_dir = Config::get_data_dir()?; let data_dir = Config::get_data_dir()?;
let cache_path = data_dir.join(".noentropy_cache.json"); let cache_path = data_dir.join(".noentropy_cache.json");
let mut cache = Cache::load_or_create(&cache_path); let mut cache = Cache::load_or_create(&cache_path);
@@ -144,7 +144,11 @@ pub async fn handle_organization(
undo_log.cleanup_old_entries(UNDO_LOG_RETENTION_SECONDS); undo_log.cleanup_old_entries(UNDO_LOG_RETENTION_SECONDS);
// Use custom path if provided, otherwise fall back to configured download folder // Use custom path if provided, otherwise fall back to configured download folder
let target_path = args.path.unwrap_or(config.download_folder); let target_path = args
.path
.as_ref()
.cloned()
.unwrap_or_else(|| config.download_folder.clone());
// Validate and normalize the target path early // Validate and normalize the target path early
let target_path = match validate_and_normalize_path(&target_path) { let target_path = match validate_and_normalize_path(&target_path) {
@@ -162,19 +166,140 @@ pub async fn handle_organization(
return Ok(()); return Ok(());
} }
println!("Found {} files to organize.", batch.count());
// Determine if we should use offline mode
let use_offline = if args.offline {
println!("{}", "Using offline mode (--offline flag).".cyan());
true
} else {
let client = GeminiClient::new(config.api_key.clone(), config.categories.clone());
match client.check_connectivity().await {
Ok(()) => false,
Err(e) => {
if Prompter::prompt_offline_mode(&e.to_string()) {
true
} else {
println!("{}", "Exiting.".yellow());
return Ok(());
}
}
}
};
let plan = if use_offline {
handle_offline_organization(&batch, &target_path, args.dry_run, &mut undo_log)?
} else {
handle_online_organization(
&args,
&config,
batch,
&target_path,
&mut cache,
&mut undo_log,
)
.await?
};
// Only save if we have a plan (online mode returns None after moving)
if plan.is_none()
&& let Err(e) = cache.save(cache_path.as_path())
{
eprintln!("Warning: Failed to save cache: {}", e);
}
if let Err(e) = undo_log.save(&undo_log_path) {
eprintln!("Warning: Failed to save undo log: {}", e);
}
Ok(())
}
fn handle_offline_organization(
batch: &FileBatch,
target_path: &Path,
dry_run: bool,
undo_log: &mut UndoLog,
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
println!("{}", "Categorizing files by extension...".cyan());
let result = categorize_files_offline(&batch.filenames);
if result.plan.files.is_empty() {
println!("{}", "No files could be categorized offline.".yellow());
print_skipped_files(&result.skipped);
return Ok(None);
}
// Print categorization summary
print_categorization_summary(&result.plan);
print_skipped_files(&result.skipped);
if dry_run {
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
} else {
execute_move(target_path, result.plan, Some(undo_log));
}
println!("{}", "Done!".green().bold());
Ok(None)
}
fn print_categorization_summary(plan: &OrganizationPlan) {
use std::collections::HashMap;
let mut counts: HashMap<&str, usize> = HashMap::new();
for file in &plan.files {
*counts.entry(file.category.as_str()).or_insert(0) += 1;
}
println!();
println!("{}", "Categorized files:".green());
for (category, count) in &counts {
println!(" {}: {} file(s)", category.cyan(), count);
}
println!();
}
fn print_skipped_files(skipped: &[String]) {
if skipped.is_empty() {
return;
}
println!( println!(
"Found {} files. Asking Gemini to organize...", "{} {} file(s) with unknown extension:",
batch.count() "Skipped".yellow(),
skipped.len()
); );
for filename in skipped.iter().take(10) {
println!(" - {}", filename);
}
if skipped.len() > 10 {
println!(" ... and {} more", skipped.len() - 10);
}
println!();
}
async fn handle_online_organization(
args: &Args,
config: &Config,
batch: FileBatch,
target_path: &Path,
cache: &mut Cache,
undo_log: &mut UndoLog,
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
let client = GeminiClient::new(config.api_key.clone(), config.categories.clone());
println!("Asking Gemini to organize...");
let mut plan: OrganizationPlan = match client let mut plan: OrganizationPlan = match client
.organize_files_in_batches(batch.filenames, Some(&mut cache), Some(&target_path)) .organize_files_in_batches(batch.filenames, Some(cache), Some(target_path))
.await .await
{ {
Ok(plan) => plan, Ok(plan) => plan,
Err(e) => { Err(e) => {
handle_gemini_error(e); handle_gemini_error(e);
return Ok(()); return Ok(None);
} }
}; };
@@ -229,19 +354,11 @@ pub async fn handle_organization(
if args.dry_run { if args.dry_run {
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan()); println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
} else { } else {
execute_move(&target_path, plan, Some(&mut undo_log)); execute_move(target_path, plan, Some(undo_log));
} }
println!("{}", "Done!".green().bold()); println!("{}", "Done!".green().bold());
if let Err(e) = cache.save(cache_path.as_path()) { Ok(None)
eprintln!("Warning: Failed to save cache: {}", e);
}
if let Err(e) = undo_log.save(&undo_log_path) {
eprintln!("Warning: Failed to save undo log: {}", e);
}
Ok(())
} }
pub async fn handle_undo( pub async fn handle_undo(

197
src/files/categorizer.rs Normal file
View File

@@ -0,0 +1,197 @@
use std::collections::HashMap;
use std::path::Path;
use std::sync::LazyLock;
use crate::models::{FileCategory, OrganizationPlan};
static EXTENSION_MAP: LazyLock<HashMap<&'static str, &'static str>> = LazyLock::new(|| {
HashMap::from([
// Images
("jpg", "Images"),
("jpeg", "Images"),
("png", "Images"),
("gif", "Images"),
("bmp", "Images"),
("svg", "Images"),
("webp", "Images"),
("ico", "Images"),
("tiff", "Images"),
("tif", "Images"),
("raw", "Images"),
("heic", "Images"),
("heif", "Images"),
// Documents
("pdf", "Documents"),
("doc", "Documents"),
("docx", "Documents"),
("txt", "Documents"),
("rtf", "Documents"),
("odt", "Documents"),
("xls", "Documents"),
("xlsx", "Documents"),
("ppt", "Documents"),
("pptx", "Documents"),
("csv", "Documents"),
("md", "Documents"),
("epub", "Documents"),
// Installers
("exe", "Installers"),
("msi", "Installers"),
("dmg", "Installers"),
("deb", "Installers"),
("rpm", "Installers"),
("app", "Installers"),
("appimage", "Installers"),
("pkg", "Installers"),
("snap", "Installers"),
// Music
("mp3", "Music"),
("wav", "Music"),
("flac", "Music"),
("aac", "Music"),
("ogg", "Music"),
("wma", "Music"),
("m4a", "Music"),
("opus", "Music"),
("aiff", "Music"),
// Video
("mp4", "Video"),
("mkv", "Video"),
("avi", "Video"),
("mov", "Video"),
("wmv", "Video"),
("flv", "Video"),
("webm", "Video"),
("m4v", "Video"),
("mpeg", "Video"),
("mpg", "Video"),
// Archives
("zip", "Archives"),
("tar", "Archives"),
("gz", "Archives"),
("rar", "Archives"),
("7z", "Archives"),
("bz2", "Archives"),
("xz", "Archives"),
("tgz", "Archives"),
("zst", "Archives"),
// Code
("rs", "Code"),
("py", "Code"),
("js", "Code"),
("ts", "Code"),
("java", "Code"),
("c", "Code"),
("cpp", "Code"),
("h", "Code"),
("hpp", "Code"),
("go", "Code"),
("rb", "Code"),
("php", "Code"),
("html", "Code"),
("css", "Code"),
("json", "Code"),
("yaml", "Code"),
("yml", "Code"),
("toml", "Code"),
("xml", "Code"),
("sh", "Code"),
("bash", "Code"),
("sql", "Code"),
])
});
/// Categorizes a file by its extension.
/// Returns `Some(category)` if the extension is known, `None` otherwise.
pub fn categorize_by_extension(filename: &str) -> Option<&'static str> {
Path::new(filename)
.extension()
.and_then(|ext| ext.to_str())
.map(|ext| ext.to_lowercase())
.as_deref()
.and_then(|ext| EXTENSION_MAP.get(ext).copied())
}
/// Result of offline categorization
pub struct OfflineCategorizationResult {
pub plan: OrganizationPlan,
pub skipped: Vec<String>,
}
/// Categorizes a list of filenames using extension-based rules.
/// Returns categorized files and a list of skipped filenames.
pub fn categorize_files_offline(filenames: &[String]) -> OfflineCategorizationResult {
let mut files = Vec::with_capacity(filenames.len());
let mut skipped = Vec::new();
for filename in filenames {
match categorize_by_extension(filename) {
Some(category) => {
files.push(FileCategory {
filename: filename.clone(),
category: category.to_string(),
sub_category: String::new(),
});
}
None => {
skipped.push(filename.clone());
}
}
}
OfflineCategorizationResult {
plan: OrganizationPlan { files },
skipped,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_categorize_known_extensions() {
assert_eq!(categorize_by_extension("photo.jpg"), Some("Images"));
assert_eq!(categorize_by_extension("document.pdf"), Some("Documents"));
assert_eq!(categorize_by_extension("setup.exe"), Some("Installers"));
assert_eq!(categorize_by_extension("song.mp3"), Some("Music"));
assert_eq!(categorize_by_extension("movie.mp4"), Some("Video"));
assert_eq!(categorize_by_extension("archive.zip"), Some("Archives"));
assert_eq!(categorize_by_extension("main.rs"), Some("Code"));
}
#[test]
fn test_categorize_case_insensitive() {
assert_eq!(categorize_by_extension("PHOTO.JPG"), Some("Images"));
assert_eq!(categorize_by_extension("Photo.Png"), Some("Images"));
}
#[test]
fn test_categorize_unknown_extension() {
assert_eq!(categorize_by_extension("file.xyz"), None);
assert_eq!(categorize_by_extension("file.unknown"), None);
}
#[test]
fn test_categorize_no_extension() {
assert_eq!(categorize_by_extension("README"), None);
assert_eq!(categorize_by_extension("Makefile"), None);
}
#[test]
fn test_categorize_files_offline() {
let filenames = vec![
"photo.jpg".to_string(),
"doc.pdf".to_string(),
"unknown".to_string(),
"file.xyz".to_string(),
];
let result = categorize_files_offline(&filenames);
assert_eq!(result.plan.files.len(), 2);
assert_eq!(result.skipped.len(), 2);
assert!(result.skipped.contains(&"unknown".to_string()));
assert!(result.skipped.contains(&"file.xyz".to_string()));
}
}

View File

@@ -1,9 +1,11 @@
pub mod batch; pub mod batch;
pub mod categorizer;
pub mod detector; pub mod detector;
pub mod mover; pub mod mover;
pub mod undo; pub mod undo;
pub use batch::FileBatch; pub use batch::FileBatch;
pub use categorizer::{OfflineCategorizationResult, categorize_files_offline};
pub use detector::{is_text_file, read_file_sample}; pub use detector::{is_text_file, read_file_sample};
pub use mover::execute_move; pub use mover::execute_move;
pub use undo::undo_moves; pub use undo::undo_moves;

View File

@@ -25,6 +25,22 @@ pub struct GeminiClient {
} }
impl GeminiClient { impl GeminiClient {
/// Checks if the Gemini API is reachable and the API key is valid.
/// Makes a minimal request to verify connectivity.
pub async fn check_connectivity(&self) -> Result<(), GeminiError> {
let url = self.build_url();
let request_body = json!({
"contents": [{ "parts": [{ "text": "ping" }] }],
"generationConfig": { "maxOutputTokens": 1 }
});
match self.client.post(&url).json(&request_body).send().await {
Ok(response) if response.status().is_success() => Ok(()),
Ok(response) => Err(GeminiError::from_response(response).await),
Err(e) => Err(GeminiError::NetworkError(e)),
}
}
pub fn new(api_key: String, categories: Vec<String>) -> Self { pub fn new(api_key: String, categories: Vec<String>) -> Self {
Self::with_model(api_key, DEFAULT_MODEL.to_string(), categories) Self::with_model(api_key, DEFAULT_MODEL.to_string(), categories)
} }

View File

@@ -11,6 +11,7 @@ pub fn default_categories() -> Vec<String> {
"Documents".to_string(), "Documents".to_string(),
"Installers".to_string(), "Installers".to_string(),
"Music".to_string(), "Music".to_string(),
"Video".to_string(),
"Archives".to_string(), "Archives".to_string(),
"Code".to_string(), "Code".to_string(),
"Misc".to_string(), "Misc".to_string(),

View File

@@ -8,6 +8,33 @@ const MAX_RETRIES: u32 = 3;
pub struct Prompter; pub struct Prompter;
impl Prompter { impl Prompter {
pub fn prompt_offline_mode(error_msg: &str) -> bool {
println!();
println!(
"{} Unable to connect to Gemini API: {}",
"WARNING:".yellow(),
error_msg
);
println!();
println!(
"Continue with {} (extension-based categorization)?",
"offline mode".cyan()
);
println!("Note: Files with unknown extensions will be skipped.");
print!("[y/N]: ");
if std::io::stdout().flush().is_err() {
return false;
}
let mut input = String::new();
if std::io::stdin().read_line(&mut input).is_err() {
return false;
}
matches!(input.trim().to_lowercase().as_str(), "y" | "yes")
}
pub fn prompt_api_key() -> Result<String, Box<dyn std::error::Error>> { pub fn prompt_api_key() -> Result<String, Box<dyn std::error::Error>> {
println!(); println!();
println!( println!(