refactor: modularize CLI and optimize cache metadata lookups
- Extract error handling, path validation, and handlers into separate modules - Add CacheCheckResult to pre-fetch metadata and avoid double lookups - Deprecate legacy cache methods in favor of optimized alternatives - Enable tokio fs feature for async file operations - Remove debug profile from release build
This commit is contained in:
@@ -13,12 +13,9 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"
|
|||||||
serde = { version = "1.0.228", features = ["derive"] }
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
serde_json = "1.0.145"
|
serde_json = "1.0.145"
|
||||||
thiserror = "2.0.11"
|
thiserror = "2.0.11"
|
||||||
tokio = { version = "1.48.0", features = ["rt-multi-thread", "macros", "sync", "time"] }
|
tokio = { version = "1.48.0", features = ["rt-multi-thread", "macros", "sync", "time", "fs"] }
|
||||||
toml = "0.8.19"
|
toml = "0.8.19"
|
||||||
walkdir = "2.5.0"
|
walkdir = "2.5.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.15"
|
tempfile = "3.15"
|
||||||
|
|
||||||
[profile.release]
|
|
||||||
debug = true
|
|
||||||
|
|||||||
79
src/cli/errors.rs
Normal file
79
src/cli/errors.rs
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
use colored::*;
|
||||||
|
|
||||||
|
pub fn handle_gemini_error(error: crate::gemini::GeminiError) {
|
||||||
|
match error {
|
||||||
|
crate::gemini::GeminiError::RateLimitExceeded { retry_after } => {
|
||||||
|
println!(
|
||||||
|
"{} API rate limit exceeded. Please wait {} seconds before trying again.",
|
||||||
|
"ERROR:".red(),
|
||||||
|
retry_after
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::QuotaExceeded { limit } => {
|
||||||
|
println!(
|
||||||
|
"{} Quota exceeded: {}. Please check your Gemini API usage.",
|
||||||
|
"ERROR:".red(),
|
||||||
|
limit
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::ModelNotFound { model } => {
|
||||||
|
println!(
|
||||||
|
"{} Model '{}' not found. Please check the model name in the configuration.",
|
||||||
|
"ERROR:".red(),
|
||||||
|
model
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::InvalidApiKey => {
|
||||||
|
println!(
|
||||||
|
"{} Invalid API key. Please check your GEMINI_API_KEY environment variable.",
|
||||||
|
"ERROR:".red()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::ContentPolicyViolation { reason } => {
|
||||||
|
println!("{} Content policy violation: {}", "ERROR:".red(), reason);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::ServiceUnavailable { reason } => {
|
||||||
|
println!(
|
||||||
|
"{} Gemini service is temporarily unavailable: {}",
|
||||||
|
"ERROR:".red(),
|
||||||
|
reason
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::NetworkError(e) => {
|
||||||
|
println!("{} Network error: {}", "ERROR:".red(), e);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::Timeout { seconds } => {
|
||||||
|
println!(
|
||||||
|
"{} Request timed out after {} seconds.",
|
||||||
|
"ERROR:".red(),
|
||||||
|
seconds
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::InvalidRequest { details } => {
|
||||||
|
println!("{} Invalid request: {}", "ERROR:".red(), details);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::ApiError { status, message } => {
|
||||||
|
println!(
|
||||||
|
"{} API error (HTTP {}): {}",
|
||||||
|
"ERROR:".red(),
|
||||||
|
status,
|
||||||
|
message
|
||||||
|
);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::InvalidResponse(msg) => {
|
||||||
|
println!("{} Invalid response from Gemini: {}", "ERROR:".red(), msg);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::InternalError { details } => {
|
||||||
|
println!("{} Internal server error: {}", "ERROR:".red(), details);
|
||||||
|
}
|
||||||
|
crate::gemini::GeminiError::SerializationError(e) => {
|
||||||
|
println!("{} JSON serialization error: {}", "ERROR:".red(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("\n{} Check the following:", "HINT:".yellow());
|
||||||
|
println!(" - Your GEMINI_API_KEY is correctly set");
|
||||||
|
println!(" - Your internet connection is working");
|
||||||
|
println!(" - Gemini API service is available");
|
||||||
|
println!(" - You haven't exceeded your API quota");
|
||||||
|
}
|
||||||
7
src/cli/handlers/mod.rs
Normal file
7
src/cli/handlers/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
mod offline;
|
||||||
|
mod online;
|
||||||
|
mod undo;
|
||||||
|
|
||||||
|
pub use offline::handle_offline_organization;
|
||||||
|
pub use online::handle_online_organization;
|
||||||
|
pub use undo::handle_undo;
|
||||||
69
src/cli/handlers/offline.rs
Normal file
69
src/cli/handlers/offline.rs
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
use crate::files::{FileBatch, categorize_files_offline, execute_move};
|
||||||
|
use crate::models::OrganizationPlan;
|
||||||
|
use crate::storage::UndoLog;
|
||||||
|
use colored::*;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
pub fn handle_offline_organization(
|
||||||
|
batch: &FileBatch,
|
||||||
|
target_path: &Path,
|
||||||
|
dry_run: bool,
|
||||||
|
undo_log: &mut UndoLog,
|
||||||
|
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
|
||||||
|
println!("{}", "Categorizing files by extension...".cyan());
|
||||||
|
|
||||||
|
let result = categorize_files_offline(&batch.filenames);
|
||||||
|
|
||||||
|
if result.plan.files.is_empty() {
|
||||||
|
println!("{}", "No files could be categorized offline.".yellow());
|
||||||
|
print_skipped_files(&result.skipped);
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print categorization summary
|
||||||
|
print_categorization_summary(&result.plan);
|
||||||
|
print_skipped_files(&result.skipped);
|
||||||
|
|
||||||
|
if dry_run {
|
||||||
|
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
|
||||||
|
} else {
|
||||||
|
execute_move(target_path, result.plan, Some(undo_log));
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("{}", "Done!".green().bold());
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_categorization_summary(plan: &OrganizationPlan) {
|
||||||
|
let mut counts: HashMap<&str, usize> = HashMap::new();
|
||||||
|
for file in &plan.files {
|
||||||
|
*counts.entry(file.category.as_str()).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("{}", "Categorized files:".green());
|
||||||
|
for (category, count) in &counts {
|
||||||
|
println!(" {}: {} file(s)", category.cyan(), count);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn print_skipped_files(skipped: &[String]) {
|
||||||
|
if skipped.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{} {} file(s) with unknown extension:",
|
||||||
|
"Skipped".yellow(),
|
||||||
|
skipped.len()
|
||||||
|
);
|
||||||
|
for filename in skipped.iter().take(10) {
|
||||||
|
println!(" - {}", filename);
|
||||||
|
}
|
||||||
|
if skipped.len() > 10 {
|
||||||
|
println!(" ... and {} more", skipped.len() - 10);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
92
src/cli/handlers/online.rs
Normal file
92
src/cli/handlers/online.rs
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
use crate::cli::Args;
|
||||||
|
use crate::cli::errors::handle_gemini_error;
|
||||||
|
use crate::files::{FileBatch, execute_move, is_text_file, read_file_sample};
|
||||||
|
use crate::gemini::GeminiClient;
|
||||||
|
use crate::models::OrganizationPlan;
|
||||||
|
use crate::settings::Config;
|
||||||
|
use crate::storage::{Cache, UndoLog};
|
||||||
|
use colored::*;
|
||||||
|
use futures::future::join_all;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
pub async fn handle_online_organization(
|
||||||
|
args: &Args,
|
||||||
|
config: &Config,
|
||||||
|
batch: FileBatch,
|
||||||
|
target_path: &Path,
|
||||||
|
cache: &mut Cache,
|
||||||
|
undo_log: &mut UndoLog,
|
||||||
|
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
|
||||||
|
let client = GeminiClient::new(config.api_key.clone(), config.categories.clone());
|
||||||
|
|
||||||
|
println!("Asking Gemini to organize...");
|
||||||
|
|
||||||
|
let mut plan: OrganizationPlan = match client
|
||||||
|
.organize_files_in_batches(batch.filenames, Some(cache), Some(target_path))
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(plan) => plan,
|
||||||
|
Err(e) => {
|
||||||
|
handle_gemini_error(e);
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
"Gemini Plan received! Performing deep inspection...".green()
|
||||||
|
);
|
||||||
|
|
||||||
|
let client_arc: Arc<GeminiClient> = Arc::new(client);
|
||||||
|
let semaphore: Arc<tokio::sync::Semaphore> =
|
||||||
|
Arc::new(tokio::sync::Semaphore::new(args.max_concurrent));
|
||||||
|
|
||||||
|
let tasks: Vec<_> = plan
|
||||||
|
.files
|
||||||
|
.iter_mut()
|
||||||
|
.zip(batch.paths.iter())
|
||||||
|
.map(
|
||||||
|
|(file_category, path): (&mut crate::models::FileCategory, &PathBuf)| {
|
||||||
|
let client: Arc<GeminiClient> = Arc::clone(&client_arc);
|
||||||
|
let filename: String = file_category.filename.clone();
|
||||||
|
let category: String = file_category.category.clone();
|
||||||
|
let path: PathBuf = path.clone();
|
||||||
|
let semaphore: Arc<tokio::sync::Semaphore> = Arc::clone(&semaphore);
|
||||||
|
|
||||||
|
async move {
|
||||||
|
if is_text_file(&path) {
|
||||||
|
let _permit = semaphore.acquire().await.unwrap();
|
||||||
|
if let Some(content) = read_file_sample(&path, 5000) {
|
||||||
|
println!("Reading content of {}...", filename.green());
|
||||||
|
client
|
||||||
|
.get_ai_sub_category(&filename, &category, &content)
|
||||||
|
.await
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let sub_categories: Vec<String> = join_all(tasks).await;
|
||||||
|
|
||||||
|
for (file_category, sub_category) in plan.files.iter_mut().zip(sub_categories) {
|
||||||
|
file_category.sub_category = sub_category;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("{}", "Deep inspection complete! Moving Files.....".green());
|
||||||
|
|
||||||
|
if args.dry_run {
|
||||||
|
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
|
||||||
|
} else {
|
||||||
|
execute_move(target_path, plan, Some(undo_log));
|
||||||
|
}
|
||||||
|
println!("{}", "Done!".green().bold());
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
53
src/cli/handlers/undo.rs
Normal file
53
src/cli/handlers/undo.rs
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
use crate::cli::Args;
|
||||||
|
use crate::cli::path_utils::validate_and_normalize_path;
|
||||||
|
use crate::settings::Config;
|
||||||
|
use crate::storage::UndoLog;
|
||||||
|
use colored::*;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
pub async fn handle_undo(
|
||||||
|
args: Args,
|
||||||
|
download_path: PathBuf,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let undo_log_path = Config::get_undo_log_path()?;
|
||||||
|
|
||||||
|
if !undo_log_path.exists() {
|
||||||
|
println!("{}", "No undo log found. Nothing to undo.".yellow());
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut undo_log = UndoLog::load_or_create(&undo_log_path);
|
||||||
|
|
||||||
|
if !undo_log.has_completed_moves() {
|
||||||
|
println!("{}", "No completed moves to undo.".yellow());
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use custom path if provided, otherwise use the configured download path
|
||||||
|
let target_path = args.path.unwrap_or(download_path);
|
||||||
|
|
||||||
|
// Validate and normalize the target path early
|
||||||
|
let target_path = match validate_and_normalize_path(&target_path).await {
|
||||||
|
Ok(normalized) => normalized,
|
||||||
|
Err(e) => {
|
||||||
|
println!("{}", format!("ERROR: {}", e).red());
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
crate::files::undo_moves(&target_path, &mut undo_log, args.dry_run)?;
|
||||||
|
|
||||||
|
if let Err(e) = undo_log.save(&undo_log_path) {
|
||||||
|
eprintln!(
|
||||||
|
"{}",
|
||||||
|
format!(
|
||||||
|
"WARNING: Failed to save undo log to '{}': {}. Your undo history may be incomplete.",
|
||||||
|
undo_log_path.display(),
|
||||||
|
e
|
||||||
|
)
|
||||||
|
.yellow()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -1,5 +1,10 @@
|
|||||||
pub mod args;
|
pub mod args;
|
||||||
|
pub mod errors;
|
||||||
|
mod handlers;
|
||||||
pub mod orchestrator;
|
pub mod orchestrator;
|
||||||
|
pub mod path_utils;
|
||||||
|
|
||||||
pub use args::Args;
|
pub use args::Args;
|
||||||
pub use orchestrator::{handle_gemini_error, handle_organization, handle_undo};
|
pub use errors::handle_gemini_error;
|
||||||
|
pub use handlers::handle_undo;
|
||||||
|
pub use orchestrator::handle_organization;
|
||||||
|
|||||||
@@ -1,131 +1,14 @@
|
|||||||
use crate::cli::Args;
|
use crate::cli::Args;
|
||||||
use crate::files::{
|
use crate::cli::handlers::{handle_offline_organization, handle_online_organization};
|
||||||
FileBatch, categorize_files_offline, execute_move, is_text_file, read_file_sample,
|
use crate::cli::path_utils::validate_and_normalize_path;
|
||||||
};
|
use crate::files::FileBatch;
|
||||||
use crate::gemini::GeminiClient;
|
use crate::gemini::GeminiClient;
|
||||||
use crate::models::OrganizationPlan;
|
|
||||||
use crate::settings::{Config, Prompter};
|
use crate::settings::{Config, Prompter};
|
||||||
use crate::storage::{Cache, UndoLog};
|
use crate::storage::{Cache, UndoLog};
|
||||||
use colored::*;
|
use colored::*;
|
||||||
use futures::future::join_all;
|
|
||||||
use std::fs;
|
|
||||||
use std::path::{Path, PathBuf};
|
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
/// Validates that a path exists and is a readable directory
|
|
||||||
/// Returns the canonicalized path if validation succeeds
|
|
||||||
fn validate_and_normalize_path(path: &PathBuf) -> Result<PathBuf, String> {
|
|
||||||
if !path.exists() {
|
|
||||||
return Err(format!("Path '{}' does not exist", path.display()));
|
|
||||||
}
|
|
||||||
|
|
||||||
if !path.is_dir() {
|
|
||||||
return Err(format!("Path '{}' is not a directory", path.display()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we can read the directory
|
|
||||||
match fs::read_dir(path) {
|
|
||||||
Ok(_) => (),
|
|
||||||
Err(e) => {
|
|
||||||
return Err(format!(
|
|
||||||
"Cannot access directory '{}': {}",
|
|
||||||
path.display(),
|
|
||||||
e
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize the path to resolve ., .., and symlinks
|
|
||||||
match path.canonicalize() {
|
|
||||||
Ok(canonical) => Ok(canonical),
|
|
||||||
Err(e) => Err(format!(
|
|
||||||
"Failed to normalize path '{}': {}",
|
|
||||||
path.display(),
|
|
||||||
e
|
|
||||||
)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn handle_gemini_error(error: crate::gemini::GeminiError) {
|
|
||||||
use colored::*;
|
|
||||||
|
|
||||||
match error {
|
|
||||||
crate::gemini::GeminiError::RateLimitExceeded { retry_after } => {
|
|
||||||
println!(
|
|
||||||
"{} API rate limit exceeded. Please wait {} seconds before trying again.",
|
|
||||||
"ERROR:".red(),
|
|
||||||
retry_after
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::QuotaExceeded { limit } => {
|
|
||||||
println!(
|
|
||||||
"{} Quota exceeded: {}. Please check your Gemini API usage.",
|
|
||||||
"ERROR:".red(),
|
|
||||||
limit
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::ModelNotFound { model } => {
|
|
||||||
println!(
|
|
||||||
"{} Model '{}' not found. Please check the model name in the configuration.",
|
|
||||||
"ERROR:".red(),
|
|
||||||
model
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::InvalidApiKey => {
|
|
||||||
println!(
|
|
||||||
"{} Invalid API key. Please check your GEMINI_API_KEY environment variable.",
|
|
||||||
"ERROR:".red()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::ContentPolicyViolation { reason } => {
|
|
||||||
println!("{} Content policy violation: {}", "ERROR:".red(), reason);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::ServiceUnavailable { reason } => {
|
|
||||||
println!(
|
|
||||||
"{} Gemini service is temporarily unavailable: {}",
|
|
||||||
"ERROR:".red(),
|
|
||||||
reason
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::NetworkError(e) => {
|
|
||||||
println!("{} Network error: {}", "ERROR:".red(), e);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::Timeout { seconds } => {
|
|
||||||
println!(
|
|
||||||
"{} Request timed out after {} seconds.",
|
|
||||||
"ERROR:".red(),
|
|
||||||
seconds
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::InvalidRequest { details } => {
|
|
||||||
println!("{} Invalid request: {}", "ERROR:".red(), details);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::ApiError { status, message } => {
|
|
||||||
println!(
|
|
||||||
"{} API error (HTTP {}): {}",
|
|
||||||
"ERROR:".red(),
|
|
||||||
status,
|
|
||||||
message
|
|
||||||
);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::InvalidResponse(msg) => {
|
|
||||||
println!("{} Invalid response from Gemini: {}", "ERROR:".red(), msg);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::InternalError { details } => {
|
|
||||||
println!("{} Internal server error: {}", "ERROR:".red(), details);
|
|
||||||
}
|
|
||||||
crate::gemini::GeminiError::SerializationError(e) => {
|
|
||||||
println!("{} JSON serialization error: {}", "ERROR:".red(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("\n{} Check the following:", "HINT:".yellow());
|
|
||||||
println!(" • Your GEMINI_API_KEY is correctly set");
|
|
||||||
println!(" • Your internet connection is working");
|
|
||||||
println!(" • Gemini API service is available");
|
|
||||||
println!(" • You haven't exceeded your API quota");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/// Main entry point for file organization.
|
||||||
|
/// Coordinates cache, undo log, and delegates to online/offline handlers.
|
||||||
pub async fn handle_organization(
|
pub async fn handle_organization(
|
||||||
args: Args,
|
args: Args,
|
||||||
config: Config,
|
config: Config,
|
||||||
@@ -151,7 +34,7 @@ pub async fn handle_organization(
|
|||||||
.unwrap_or_else(|| config.download_folder.clone());
|
.unwrap_or_else(|| config.download_folder.clone());
|
||||||
|
|
||||||
// Validate and normalize the target path early
|
// Validate and normalize the target path early
|
||||||
let target_path = match validate_and_normalize_path(&target_path) {
|
let target_path = match validate_and_normalize_path(&target_path).await {
|
||||||
Ok(normalized) => normalized,
|
Ok(normalized) => normalized,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("{}", format!("ERROR: {}", e).red());
|
println!("{}", format!("ERROR: {}", e).red());
|
||||||
@@ -214,195 +97,3 @@ pub async fn handle_organization(
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_offline_organization(
|
|
||||||
batch: &FileBatch,
|
|
||||||
target_path: &Path,
|
|
||||||
dry_run: bool,
|
|
||||||
undo_log: &mut UndoLog,
|
|
||||||
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
|
|
||||||
println!("{}", "Categorizing files by extension...".cyan());
|
|
||||||
|
|
||||||
let result = categorize_files_offline(&batch.filenames);
|
|
||||||
|
|
||||||
if result.plan.files.is_empty() {
|
|
||||||
println!("{}", "No files could be categorized offline.".yellow());
|
|
||||||
print_skipped_files(&result.skipped);
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print categorization summary
|
|
||||||
print_categorization_summary(&result.plan);
|
|
||||||
print_skipped_files(&result.skipped);
|
|
||||||
|
|
||||||
if dry_run {
|
|
||||||
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
|
|
||||||
} else {
|
|
||||||
execute_move(target_path, result.plan, Some(undo_log));
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("{}", "Done!".green().bold());
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_categorization_summary(plan: &OrganizationPlan) {
|
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
let mut counts: HashMap<&str, usize> = HashMap::new();
|
|
||||||
for file in &plan.files {
|
|
||||||
*counts.entry(file.category.as_str()).or_insert(0) += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
println!();
|
|
||||||
println!("{}", "Categorized files:".green());
|
|
||||||
for (category, count) in &counts {
|
|
||||||
println!(" {}: {} file(s)", category.cyan(), count);
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
|
|
||||||
fn print_skipped_files(skipped: &[String]) {
|
|
||||||
if skipped.is_empty() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"{} {} file(s) with unknown extension:",
|
|
||||||
"Skipped".yellow(),
|
|
||||||
skipped.len()
|
|
||||||
);
|
|
||||||
for filename in skipped.iter().take(10) {
|
|
||||||
println!(" - {}", filename);
|
|
||||||
}
|
|
||||||
if skipped.len() > 10 {
|
|
||||||
println!(" ... and {} more", skipped.len() - 10);
|
|
||||||
}
|
|
||||||
println!();
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn handle_online_organization(
|
|
||||||
args: &Args,
|
|
||||||
config: &Config,
|
|
||||||
batch: FileBatch,
|
|
||||||
target_path: &Path,
|
|
||||||
cache: &mut Cache,
|
|
||||||
undo_log: &mut UndoLog,
|
|
||||||
) -> Result<Option<OrganizationPlan>, Box<dyn std::error::Error>> {
|
|
||||||
let client = GeminiClient::new(config.api_key.clone(), config.categories.clone());
|
|
||||||
|
|
||||||
println!("Asking Gemini to organize...");
|
|
||||||
|
|
||||||
let mut plan: OrganizationPlan = match client
|
|
||||||
.organize_files_in_batches(batch.filenames, Some(cache), Some(target_path))
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
Ok(plan) => plan,
|
|
||||||
Err(e) => {
|
|
||||||
handle_gemini_error(e);
|
|
||||||
return Ok(None);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
println!(
|
|
||||||
"{}",
|
|
||||||
"Gemini Plan received! Performing deep inspection...".green()
|
|
||||||
);
|
|
||||||
|
|
||||||
let client_arc: Arc<GeminiClient> = Arc::new(client);
|
|
||||||
let semaphore: Arc<tokio::sync::Semaphore> =
|
|
||||||
Arc::new(tokio::sync::Semaphore::new(args.max_concurrent));
|
|
||||||
|
|
||||||
let tasks: Vec<_> = plan
|
|
||||||
.files
|
|
||||||
.iter_mut()
|
|
||||||
.zip(batch.paths.iter())
|
|
||||||
.map(
|
|
||||||
|(file_category, path): (&mut crate::models::FileCategory, &PathBuf)| {
|
|
||||||
let client: Arc<GeminiClient> = Arc::clone(&client_arc);
|
|
||||||
let filename: String = file_category.filename.clone();
|
|
||||||
let category: String = file_category.category.clone();
|
|
||||||
let path: PathBuf = path.clone();
|
|
||||||
let semaphore: Arc<tokio::sync::Semaphore> = Arc::clone(&semaphore);
|
|
||||||
|
|
||||||
async move {
|
|
||||||
if is_text_file(&path) {
|
|
||||||
let _permit = semaphore.acquire().await.unwrap();
|
|
||||||
if let Some(content) = read_file_sample(&path, 5000) {
|
|
||||||
println!("Reading content of {}...", filename.green());
|
|
||||||
client
|
|
||||||
.get_ai_sub_category(&filename, &category, &content)
|
|
||||||
.await
|
|
||||||
} else {
|
|
||||||
String::new()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
String::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let sub_categories: Vec<String> = join_all(tasks).await;
|
|
||||||
|
|
||||||
for (file_category, sub_category) in plan.files.iter_mut().zip(sub_categories) {
|
|
||||||
file_category.sub_category = sub_category;
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("{}", "Deep inspection complete! Moving Files.....".green());
|
|
||||||
|
|
||||||
if args.dry_run {
|
|
||||||
println!("{} Dry run mode - skipping file moves.", "INFO:".cyan());
|
|
||||||
} else {
|
|
||||||
execute_move(target_path, plan, Some(undo_log));
|
|
||||||
}
|
|
||||||
println!("{}", "Done!".green().bold());
|
|
||||||
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn handle_undo(
|
|
||||||
args: Args,
|
|
||||||
download_path: PathBuf,
|
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let undo_log_path = Config::get_undo_log_path()?;
|
|
||||||
|
|
||||||
if !undo_log_path.exists() {
|
|
||||||
println!("{}", "No undo log found. Nothing to undo.".yellow());
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut undo_log = UndoLog::load_or_create(&undo_log_path);
|
|
||||||
|
|
||||||
if !undo_log.has_completed_moves() {
|
|
||||||
println!("{}", "No completed moves to undo.".yellow());
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use custom path if provided, otherwise use the configured download path
|
|
||||||
let target_path = args.path.unwrap_or(download_path);
|
|
||||||
|
|
||||||
// Validate and normalize the target path early
|
|
||||||
let target_path = match validate_and_normalize_path(&target_path) {
|
|
||||||
Ok(normalized) => normalized,
|
|
||||||
Err(e) => {
|
|
||||||
println!("{}", format!("ERROR: {}", e).red());
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
crate::files::undo_moves(&target_path, &mut undo_log, args.dry_run)?;
|
|
||||||
|
|
||||||
if let Err(e) = undo_log.save(&undo_log_path) {
|
|
||||||
eprintln!(
|
|
||||||
"{}",
|
|
||||||
format!(
|
|
||||||
"WARNING: Failed to save undo log to '{}': {}. Your undo history may be incomplete.",
|
|
||||||
undo_log_path.display(),
|
|
||||||
e
|
|
||||||
).yellow()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|||||||
30
src/cli/path_utils.rs
Normal file
30
src/cli/path_utils.rs
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
/// Validates that a path exists and is a readable directory.
|
||||||
|
/// Returns the canonicalized path if validation succeeds.
|
||||||
|
pub async fn validate_and_normalize_path(path: &Path) -> Result<PathBuf, String> {
|
||||||
|
// Use tokio::fs for async file operations
|
||||||
|
let metadata = tokio::fs::metadata(path).await.map_err(|e| {
|
||||||
|
if e.kind() == std::io::ErrorKind::NotFound {
|
||||||
|
format!("Path '{}' does not exist", path.display())
|
||||||
|
} else {
|
||||||
|
format!("Cannot access '{}': {}", path.display(), e)
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
if !metadata.is_dir() {
|
||||||
|
return Err(format!("Path '{}' is not a directory", path.display()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we can read the directory
|
||||||
|
let _ = tokio::fs::read_dir(path)
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Cannot access directory '{}': {}", path.display(), e))?;
|
||||||
|
|
||||||
|
// canonicalize is sync-only, use spawn_blocking
|
||||||
|
let path_owned = path.to_path_buf();
|
||||||
|
tokio::task::spawn_blocking(move || path_owned.canonicalize())
|
||||||
|
.await
|
||||||
|
.map_err(|e| format!("Task failed: {}", e))?
|
||||||
|
.map_err(|e| format!("Failed to normalize path '{}': {}", path.display(), e))
|
||||||
|
}
|
||||||
@@ -89,10 +89,17 @@ impl GeminiClient {
|
|||||||
) -> Result<OrganizationPlan, GeminiError> {
|
) -> Result<OrganizationPlan, GeminiError> {
|
||||||
let url = self.build_url();
|
let url = self.build_url();
|
||||||
|
|
||||||
if let (Some(cache), Some(base_path)) = (cache.as_ref(), base_path)
|
// Check cache and get pre-fetched metadata in one pass
|
||||||
&& let Some(cached_response) = cache.get_cached_response(&filenames, base_path)
|
let cache_result = match (cache.as_ref(), base_path) {
|
||||||
|
(Some(c), Some(bp)) => Some(c.check_cache(&filenames, bp)),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Return cached response if valid
|
||||||
|
if let Some(ref result) = cache_result
|
||||||
|
&& let Some(ref cached_response) = result.cached_response
|
||||||
{
|
{
|
||||||
return Ok(cached_response);
|
return Ok(cached_response.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
let prompt =
|
let prompt =
|
||||||
@@ -102,8 +109,9 @@ impl GeminiClient {
|
|||||||
let res = self.send_request_with_retry(&url, &request_body).await?;
|
let res = self.send_request_with_retry(&url, &request_body).await?;
|
||||||
let plan = self.parse_categorization_response(res).await?;
|
let plan = self.parse_categorization_response(res).await?;
|
||||||
|
|
||||||
if let (Some(cache), Some(base_path)) = (cache.as_mut(), base_path) {
|
// Cache response using pre-fetched metadata (no second metadata lookup)
|
||||||
cache.cache_response(&filenames, plan.clone(), base_path);
|
if let (Some(cache), Some(result)) = (cache.as_mut(), cache_result) {
|
||||||
|
cache.cache_response_with_metadata(&filenames, plan.clone(), result.file_metadata);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(plan)
|
Ok(plan)
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use noentropy::cli::{
|
use noentropy::cli::{Args, handle_organization, handle_undo};
|
||||||
Args,
|
|
||||||
orchestrator::{handle_organization, handle_undo},
|
|
||||||
};
|
|
||||||
use noentropy::settings::config::change_and_prompt_api_key;
|
use noentropy::settings::config::change_and_prompt_api_key;
|
||||||
use noentropy::settings::{get_or_prompt_config, get_or_prompt_download_folder};
|
use noentropy::settings::{get_or_prompt_config, get_or_prompt_download_folder};
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,12 @@ use std::fs;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::time::{SystemTime, UNIX_EPOCH};
|
use std::time::{SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
|
/// Result of checking the cache - includes pre-fetched metadata to avoid double lookups
|
||||||
|
pub struct CacheCheckResult {
|
||||||
|
pub cached_response: Option<OrganizationPlan>,
|
||||||
|
pub file_metadata: HashMap<String, FileMetadata>,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct Cache {
|
pub struct Cache {
|
||||||
entries: HashMap<String, CacheEntry>,
|
entries: HashMap<String, CacheEntry>,
|
||||||
@@ -64,43 +70,92 @@ impl Cache {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Checks cache and returns pre-fetched metadata to avoid double lookups.
|
||||||
|
/// The returned metadata can be passed to `cache_response_with_metadata` on cache miss.
|
||||||
|
pub fn check_cache(&self, filenames: &[String], base_path: &Path) -> CacheCheckResult {
|
||||||
|
// Fetch metadata once for all files
|
||||||
|
let file_metadata: HashMap<String, FileMetadata> = filenames
|
||||||
|
.iter()
|
||||||
|
.filter_map(|filename| {
|
||||||
|
let file_path = base_path.join(filename);
|
||||||
|
Self::get_file_metadata(&file_path)
|
||||||
|
.ok()
|
||||||
|
.map(|m| (filename.clone(), m))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let cache_key = self.generate_cache_key(filenames);
|
||||||
|
|
||||||
|
let cached_response = self.entries.get(&cache_key).and_then(|entry| {
|
||||||
|
// Validate all files are unchanged using pre-fetched metadata
|
||||||
|
let all_unchanged = filenames.iter().all(|filename| {
|
||||||
|
match (
|
||||||
|
file_metadata.get(filename),
|
||||||
|
entry.file_metadata.get(filename),
|
||||||
|
) {
|
||||||
|
(Some(current), Some(cached)) => current == cached,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if all_unchanged {
|
||||||
|
println!("Using cached response (timestamp: {})", entry.timestamp);
|
||||||
|
Some(entry.response.clone())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
CacheCheckResult {
|
||||||
|
cached_response,
|
||||||
|
file_metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cache response using pre-fetched metadata (avoids double metadata lookup)
|
||||||
|
pub fn cache_response_with_metadata(
|
||||||
|
&mut self,
|
||||||
|
filenames: &[String],
|
||||||
|
response: OrganizationPlan,
|
||||||
|
file_metadata: HashMap<String, FileMetadata>,
|
||||||
|
) {
|
||||||
|
let cache_key = self.generate_cache_key(filenames);
|
||||||
|
|
||||||
|
let timestamp = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_secs();
|
||||||
|
|
||||||
|
let entry = CacheEntry {
|
||||||
|
response,
|
||||||
|
timestamp,
|
||||||
|
file_metadata,
|
||||||
|
};
|
||||||
|
|
||||||
|
self.entries.insert(cache_key, entry);
|
||||||
|
|
||||||
|
if self.entries.len() > self.max_entries {
|
||||||
|
self.evict_oldest();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("Cached response for {} files", filenames.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Legacy method - checks cache for a response (fetches metadata internally)
|
||||||
|
#[deprecated(
|
||||||
|
note = "Use check_cache() + cache_response_with_metadata() to avoid double metadata lookups"
|
||||||
|
)]
|
||||||
pub fn get_cached_response(
|
pub fn get_cached_response(
|
||||||
&self,
|
&self,
|
||||||
filenames: &[String],
|
filenames: &[String],
|
||||||
base_path: &Path,
|
base_path: &Path,
|
||||||
) -> Option<OrganizationPlan> {
|
) -> Option<OrganizationPlan> {
|
||||||
let cache_key = self.generate_cache_key(filenames);
|
let result = self.check_cache(filenames, base_path);
|
||||||
|
result.cached_response
|
||||||
if let Some(entry) = self.entries.get(&cache_key) {
|
|
||||||
let mut all_files_unchanged = true;
|
|
||||||
|
|
||||||
for filename in filenames {
|
|
||||||
let file_path = base_path.join(filename);
|
|
||||||
if let Ok(current_metadata) = Self::get_file_metadata(&file_path) {
|
|
||||||
if let Some(cached_metadata) = entry.file_metadata.get(filename) {
|
|
||||||
if current_metadata != *cached_metadata {
|
|
||||||
all_files_unchanged = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
all_files_unchanged = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
all_files_unchanged = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if all_files_unchanged {
|
|
||||||
println!("Using cached response (timestamp: {})", entry.timestamp);
|
|
||||||
return Some(entry.response.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Legacy method - caches a response (fetches metadata internally)
|
||||||
|
#[deprecated(note = "Use cache_response_with_metadata() with pre-fetched metadata")]
|
||||||
pub fn cache_response(
|
pub fn cache_response(
|
||||||
&mut self,
|
&mut self,
|
||||||
filenames: &[String],
|
filenames: &[String],
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
pub mod cache;
|
pub mod cache;
|
||||||
pub mod undo_log;
|
pub mod undo_log;
|
||||||
|
|
||||||
pub use cache::Cache;
|
pub use cache::{Cache, CacheCheckResult};
|
||||||
pub use undo_log::UndoLog;
|
pub use undo_log::UndoLog;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
Reference in New Issue
Block a user