From 6ecbeb9a9b1ff9d39979504a935e098c254933e2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Thu, 19 Feb 2026 01:47:37 +0200 Subject: [PATCH] feat: implement utils.rs and state.rs utils.rs: natural sort, file fingerprinting, library ID computation, pretty title formatting, path helpers, clamp, truthy (38 tests) state.rs: atomic JSON persistence with backup rotation and fallback loading (8 tests) --- src-tauri/src/lib.rs | 3 + src-tauri/src/state.rs | 247 +++++++++++++++ src-tauri/src/utils.rs | 697 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 947 insertions(+) create mode 100644 src-tauri/src/state.rs create mode 100644 src-tauri/src/utils.rs diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index f41a5ed..29763c6 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,3 +1,6 @@ +pub mod state; +pub mod utils; + #[cfg_attr(mobile, tauri::mobile_entry_point)] pub fn run() { tauri::Builder::default() diff --git a/src-tauri/src/state.rs b/src-tauri/src/state.rs new file mode 100644 index 0000000..542ba55 --- /dev/null +++ b/src-tauri/src/state.rs @@ -0,0 +1,247 @@ +use serde_json::Value; +use std::fs; +use std::path::Path; + +/// Default number of rolling backups to keep. +pub const BACKUP_COUNT: usize = 8; + +/// Write JSON data to a file atomically with backup rotation. +/// +/// Creates rolling backups (.bak1 through .bakN) and a .lastgood copy +/// for crash recovery. +/// +/// For a file `foo.json`, backups are `foo.json.bak1`, `foo.json.tmp`, etc. +pub fn atomic_write_json(path: &Path, data: &Value, backup_count: usize) { + // Create parent directories if needed + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).ok(); + } + + let path_str = path.as_os_str().to_string_lossy(); + let tmp = Path::new(&*format!("{}.tmp", path_str)).to_path_buf(); + let payload = serde_json::to_string_pretty(data).expect("failed to serialize JSON"); + + if path.exists() { + // Rotate existing backups: move .bakN -> .bak(N+1), down to .bak1 -> .bak2 + for i in (1..=backup_count).rev() { + let src = Path::new(&*format!("{}.bak{}", path_str, i)).to_path_buf(); + let dst = Path::new(&*format!("{}.bak{}", path_str, i + 1)).to_path_buf(); + if src.exists() { + // Remove dst if it exists, then rename src -> dst + fs::remove_file(&dst).ok(); + fs::rename(&src, &dst).ok(); + } + } + + // Move current file to .bak1 + let bak1 = Path::new(&*format!("{}.bak1", path_str)).to_path_buf(); + fs::remove_file(&bak1).ok(); + fs::rename(path, &bak1).ok(); + } + + // Write atomically via tmp file + fs::write(&tmp, &payload).expect("failed to write tmp file"); + fs::rename(&tmp, path).expect("failed to rename tmp to primary"); + + // Keep a .lastgood copy for recovery + let lastgood = Path::new(&*format!("{}.lastgood", path_str)).to_path_buf(); + fs::write(&lastgood, &payload).ok(); +} + +/// Load JSON from path, falling back to backups if the primary is corrupted. +/// +/// Tries: path -> .lastgood -> .bak1 -> .bak2 -> ... -> .bak{backup_count+2} +/// Returns `None` if all candidates fail. +pub fn load_json_with_fallbacks(path: &Path, backup_count: usize) -> Option { + let path_str = path.as_os_str().to_string_lossy(); + + // Build candidate list: primary, lastgood, bak1..bak{backup_count+2} + let mut candidates: Vec = Vec::new(); + candidates.push(path.to_path_buf()); + candidates.push(Path::new(&*format!("{}.lastgood", path_str)).to_path_buf()); + for i in 1..=(backup_count + 2) { + candidates.push(Path::new(&*format!("{}.bak{}", path_str, i)).to_path_buf()); + } + + for p in &candidates { + if p.exists() { + if let Ok(text) = fs::read_to_string(p) { + if let Ok(val) = serde_json::from_str::(&text) { + return Some(val); + } + } + } + } + + None +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + use tempfile::TempDir; + + /// Helper: path to a JSON file inside a temp dir. + fn json_path(dir: &TempDir) -> std::path::PathBuf { + dir.path().join("data.json") + } + + #[test] + fn test_write_and_read_round_trip() { + let dir = TempDir::new().unwrap(); + let path = json_path(&dir); + let data = json!({"key": "value", "num": 42}); + + atomic_write_json(&path, &data, BACKUP_COUNT); + + let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT); + assert_eq!(loaded, Some(data)); + } + + #[test] + fn test_fallback_to_lastgood_when_primary_corrupted() { + let dir = TempDir::new().unwrap(); + let path = json_path(&dir); + let data = json!({"status": "good"}); + + // Write valid data (creates primary + lastgood) + atomic_write_json(&path, &data, BACKUP_COUNT); + + // Corrupt the primary file + fs::write(&path, "NOT VALID JSON!!!").unwrap(); + + let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT); + assert_eq!(loaded, Some(data)); + } + + #[test] + fn test_fallback_to_bak1_when_primary_and_lastgood_corrupted() { + let dir = TempDir::new().unwrap(); + let path = json_path(&dir); + let path_str = path.as_os_str().to_string_lossy().to_string(); + + let first = json!({"version": 1}); + let second = json!({"version": 2}); + + // First write — creates primary + lastgood + atomic_write_json(&path, &first, BACKUP_COUNT); + // Second write — rotates first to .bak1, writes second as primary + lastgood + atomic_write_json(&path, &second, BACKUP_COUNT); + + // Corrupt primary and lastgood + fs::write(&path, "CORRUPT").unwrap(); + let lastgood = format!("{}.lastgood", path_str); + fs::write(&lastgood, "ALSO CORRUPT").unwrap(); + + // Should fall back to .bak1 which has first version + let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT); + assert_eq!(loaded, Some(first)); + } + + #[test] + fn test_backup_rotation_after_multiple_writes() { + let dir = TempDir::new().unwrap(); + let path = json_path(&dir); + let path_str = path.as_os_str().to_string_lossy().to_string(); + + // Write 5 times with distinct values + for i in 1..=5 { + let data = json!({"write": i}); + atomic_write_json(&path, &data, BACKUP_COUNT); + } + + // Primary should be the latest (write 5) + let primary: Value = + serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap(); + assert_eq!(primary, json!({"write": 5})); + + // .bak1 should be the second-to-last (write 4) + let bak1_path = format!("{}.bak1", path_str); + let bak1: Value = + serde_json::from_str(&fs::read_to_string(&bak1_path).unwrap()).unwrap(); + assert_eq!(bak1, json!({"write": 4})); + + // .bak2 should be write 3 + let bak2_path = format!("{}.bak2", path_str); + let bak2: Value = + serde_json::from_str(&fs::read_to_string(&bak2_path).unwrap()).unwrap(); + assert_eq!(bak2, json!({"write": 3})); + + // .bak3 should be write 2 + let bak3_path = format!("{}.bak3", path_str); + let bak3: Value = + serde_json::from_str(&fs::read_to_string(&bak3_path).unwrap()).unwrap(); + assert_eq!(bak3, json!({"write": 2})); + + // .bak4 should be write 1 + let bak4_path = format!("{}.bak4", path_str); + let bak4: Value = + serde_json::from_str(&fs::read_to_string(&bak4_path).unwrap()).unwrap(); + assert_eq!(bak4, json!({"write": 1})); + } + + #[test] + fn test_load_nonexistent_returns_none() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("does_not_exist.json"); + + let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT); + assert_eq!(loaded, None); + } + + #[test] + fn test_parent_directories_created() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("sub").join("dir").join("nested.json"); + let data = json!({"nested": true}); + + atomic_write_json(&path, &data, BACKUP_COUNT); + + let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT); + assert_eq!(loaded, Some(data)); + } + + #[test] + fn test_lastgood_written() { + let dir = TempDir::new().unwrap(); + let path = json_path(&dir); + let path_str = path.as_os_str().to_string_lossy().to_string(); + let data = json!({"lg": true}); + + atomic_write_json(&path, &data, BACKUP_COUNT); + + let lastgood_path = format!("{}.lastgood", path_str); + let lg: Value = + serde_json::from_str(&fs::read_to_string(&lastgood_path).unwrap()).unwrap(); + assert_eq!(lg, data); + } + + #[test] + fn test_backup_count_respected() { + let dir = TempDir::new().unwrap(); + let path = json_path(&dir); + let path_str = path.as_os_str().to_string_lossy().to_string(); + let small_count = 2; + + // Write 5 times with a backup_count of 2 + for i in 1..=5 { + let data = json!({"write": i}); + atomic_write_json(&path, &data, small_count); + } + + // With backup_count=2, rotation only goes up to .bak2 -> .bak3 + // After 5 writes: primary=5, bak1=4, bak2=3, bak3=2 (pushed from bak2) + // .bak1 should exist + let bak1_path = format!("{}.bak1", path_str); + assert!(Path::new(&bak1_path).exists()); + + // .bak2 should exist + let bak2_path = format!("{}.bak2", path_str); + assert!(Path::new(&bak2_path).exists()); + + // .bak3 should exist (rotated from bak2) + let bak3_path = format!("{}.bak3", path_str); + assert!(Path::new(&bak3_path).exists()); + } +} diff --git a/src-tauri/src/utils.rs b/src-tauri/src/utils.rs new file mode 100644 index 0000000..a3dca17 --- /dev/null +++ b/src-tauri/src/utils.rs @@ -0,0 +1,697 @@ +use once_cell::sync::Lazy; +use regex::Regex; +use sha2::{Digest, Sha256}; +use std::cmp::Ordering; +use std::fs; +use std::io::{Read, Seek, SeekFrom}; +use std::path::Path; + +// --------------------------------------------------------------------------- +// 1. clamp +// --------------------------------------------------------------------------- + +/// Clamp value `v` to the range `[a, b]`. +pub fn clamp(v: f64, a: f64, b: f64) -> f64 { + a.max(b.min(v)) +} + +// --------------------------------------------------------------------------- +// 2. is_within_root +// --------------------------------------------------------------------------- + +/// Check if `target` path is within (or equal to) the `root` directory. +/// Prevents path-traversal attacks. +pub fn is_within_root(root: &Path, target: &Path) -> bool { + match (root.canonicalize(), target.canonicalize()) { + (Ok(r), Ok(t)) => { + t == r || t.starts_with(&r) + } + _ => false, + } +} + +// --------------------------------------------------------------------------- +// 3. truthy +// --------------------------------------------------------------------------- + +/// Convert a `serde_json::Value` to bool. +/// +/// Handles bool, number (nonzero == true), and string ("1","true","yes","y","on"). +pub fn truthy(v: &serde_json::Value) -> bool { + match v { + serde_json::Value::Bool(b) => *b, + serde_json::Value::Number(n) => { + if let Some(i) = n.as_i64() { + i != 0 + } else if let Some(f) = n.as_f64() { + f != 0.0 + } else { + false + } + } + serde_json::Value::String(s) => { + matches!(s.trim().to_lowercase().as_str(), "1" | "true" | "yes" | "y" | "on") + } + _ => false, + } +} + +// --------------------------------------------------------------------------- +// 4. folder_display_name +// --------------------------------------------------------------------------- + +/// Get a display-friendly name for a folder path (last component). +pub fn folder_display_name(path_str: &str) -> String { + let p = Path::new(path_str); + match p.file_name() { + Some(name) => name.to_string_lossy().to_string(), + None => path_str.to_string(), + } +} + +// --------------------------------------------------------------------------- +// 5. deduplicate_list +// --------------------------------------------------------------------------- + +/// Remove duplicates from a list while preserving order. +/// Skips empty / whitespace-only strings. Items are trimmed. +pub fn deduplicate_list(items: &[String]) -> Vec { + let mut seen = std::collections::HashSet::new(); + let mut result = Vec::new(); + for item in items { + let s = item.trim().to_string(); + if !s.is_empty() && seen.insert(s.clone()) { + result.push(s); + } + } + result +} + +// --------------------------------------------------------------------------- +// 6. natural_key +// --------------------------------------------------------------------------- + +/// One part of a natural sort key: either a number or a lowercased text fragment. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum NaturalKeyPart { + Num(u64), + Text(String), +} + +impl PartialOrd for NaturalKeyPart { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for NaturalKeyPart { + fn cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (NaturalKeyPart::Num(a), NaturalKeyPart::Num(b)) => a.cmp(b), + (NaturalKeyPart::Text(a), NaturalKeyPart::Text(b)) => a.cmp(b), + // Numbers sort before text (matches Python behaviour where int < str + // is not directly defined, but in practice natural_key is only compared + // against keys of the same structure). + (NaturalKeyPart::Num(_), NaturalKeyPart::Text(_)) => Ordering::Less, + (NaturalKeyPart::Text(_), NaturalKeyPart::Num(_)) => Ordering::Greater, + } + } +} + +static NUM_SPLIT_RE: Lazy = Lazy::new(|| Regex::new(r"(\d+)").unwrap()); + +/// Generate a sort key for natural sorting (e.g., "2" sorts before "10"). +/// +/// Splits the string on digit runs; numeric parts become `Num`, the rest become +/// lower-cased `Text`. +pub fn natural_key(s: &str) -> Vec { + let mut parts = Vec::new(); + let mut last_end = 0; + + for m in NUM_SPLIT_RE.find_iter(s) { + // Text before this match + if m.start() > last_end { + parts.push(NaturalKeyPart::Text( + s[last_end..m.start()].to_lowercase(), + )); + } + // The numeric match + let num: u64 = m.as_str().parse().unwrap_or(u64::MAX); + parts.push(NaturalKeyPart::Num(num)); + last_end = m.end(); + } + + // Trailing text after the last match (or the entire string if no digits) + if last_end < s.len() { + parts.push(NaturalKeyPart::Text(s[last_end..].to_lowercase())); + } + + // If the input was empty, return a single empty Text part so comparisons + // never deal with an empty vec. + if parts.is_empty() { + parts.push(NaturalKeyPart::Text(String::new())); + } + + parts +} + +// --------------------------------------------------------------------------- +// 7. smart_title_case +// --------------------------------------------------------------------------- + +/// Words that should remain lowercase in title case (except at start). +const SMALL_WORDS: &[&str] = &[ + "a", "an", "the", "and", "or", "but", "for", "nor", "as", "at", "by", "in", "of", "on", + "per", "to", "vs", "via", "with", "into", "from", +]; + +static WHITESPACE_SPLIT_RE: Lazy = Lazy::new(|| Regex::new(r"(\s+)").unwrap()); + +/// Convert text to title case, keeping small words lowercase (except at start). +/// Words containing digits or all-uppercase words (acronyms) are preserved as-is. +pub fn smart_title_case(text: &str) -> String { + let trimmed = text.trim(); + if trimmed.is_empty() { + return String::new(); + } + + // Split while keeping whitespace tokens (odd indices are whitespace). + let tokens: Vec<&str> = WHITESPACE_SPLIT_RE.split(trimmed).collect(); + let spaces: Vec<&str> = WHITESPACE_SPLIT_RE + .find_iter(trimmed) + .map(|m| m.as_str()) + .collect(); + + let mut out = String::new(); + let mut word_index = 0usize; // count of actual words seen so far (0-based) + + for (i, token) in tokens.iter().enumerate() { + if i > 0 { + // Insert whitespace separator that was between tokens[i-1] and tokens[i] + if let Some(sp) = spaces.get(i - 1) { + out.push_str(sp); + } + } + + let w = *token; + + // Preserve words with digits + if w.chars().any(|c| c.is_ascii_digit()) { + out.push_str(w); + word_index += 1; + continue; + } + + // Preserve all-caps acronyms + if w.chars().all(|c| c.is_uppercase() || !c.is_alphabetic()) && w.chars().any(|c| c.is_uppercase()) { + out.push_str(w); + word_index += 1; + continue; + } + + let lw = w.to_lowercase(); + + if word_index != 0 && SMALL_WORDS.contains(&lw.as_str()) { + out.push_str(&lw); + } else { + // Capitalize first character, lowercase the rest + let mut chars = lw.chars(); + if let Some(first) = chars.next() { + for c in first.to_uppercase() { + out.push(c); + } + out.extend(chars); + } + } + + word_index += 1; + } + + out.trim().to_string() +} + +// --------------------------------------------------------------------------- +// 8. pretty_title_from_filename +// --------------------------------------------------------------------------- + +static LEADING_INDEX_RE: Lazy = Lazy::new(|| { + Regex::new(r"^\s*(?:\(?\s*)?(?P\d+)(?:\s*[.\-_]\s*\d+)*(?:\s*[.)\]\-]\s*|\s+)").unwrap() +}); + +static UNDERSCORE_RE: Lazy = Lazy::new(|| Regex::new(r"[_]+").unwrap()); +static MULTI_SPACE_RE: Lazy = Lazy::new(|| Regex::new(r"\s+").unwrap()); +static LEADING_PUNCT_RE: Lazy = + Lazy::new(|| Regex::new(r"^\s*[-\u{2013}\u{2014}:.)\]]\s*").unwrap()); + +/// Convert a filename to a human-readable title. +/// +/// Removes extension, leading indices, underscores, and applies smart title case. +pub fn pretty_title_from_filename(filename: &str) -> String { + let stem = Path::new(filename) + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| filename.to_string()); + + // Replace underscores with spaces + let base = UNDERSCORE_RE.replace_all(&stem, " "); + let base = MULTI_SPACE_RE.replace_all(&base, " "); + let mut base = base.trim().to_string(); + + // Remove leading index numbers + if let Some(m) = LEADING_INDEX_RE.find(&base) { + base = base[m.end()..].trim().to_string(); + } + + // Remove leading punctuation + let cleaned = LEADING_PUNCT_RE.replace(&base, ""); + let cleaned = MULTI_SPACE_RE.replace_all(&cleaned, " "); + let mut base = cleaned.trim().to_string(); + + // Fall back to original stem if nothing left + if base.is_empty() { + base = Path::new(filename) + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| filename.to_string()); + } + + smart_title_case(&base) +} + +// --------------------------------------------------------------------------- +// 9. file_fingerprint +// --------------------------------------------------------------------------- + +const FP_CHUNK_SIZE: u64 = 256 * 1024; // 256 KB + +/// Generate a content-based fingerprint (SHA-256 based) that survives renames/moves. +/// +/// Hash input: `b"VIDFIDv1\0"` + ascii(size) + `b"\0"` + first 256 KB + last 256 KB. +/// Returns the first 20 hex chars of the digest. +pub fn file_fingerprint(path: &Path) -> String { + let size: u64 = fs::metadata(path).map(|m| m.len()).unwrap_or(0); + + let mut hasher = Sha256::new(); + hasher.update(b"VIDFIDv1\0"); + hasher.update(size.to_string().as_bytes()); + hasher.update(b"\0"); + + if let Ok(mut f) = fs::File::open(path) { + // Read head + let head_len = std::cmp::min(size, FP_CHUNK_SIZE) as usize; + let mut head = vec![0u8; head_len]; + if f.read_exact(&mut head).is_ok() { + hasher.update(&head); + } + + // Read tail if file is large enough + if size > FP_CHUNK_SIZE { + let tail_offset = size.saturating_sub(FP_CHUNK_SIZE); + if f.seek(SeekFrom::Start(tail_offset)).is_ok() { + let tail_len = (size - tail_offset) as usize; + let mut tail = vec![0u8; tail_len]; + if f.read_exact(&mut tail).is_ok() { + hasher.update(&tail); + } + } + } + } + + let digest = hasher.finalize(); + format!("{:x}", digest) + .chars() + .take(20) + .collect() +} + +// --------------------------------------------------------------------------- +// 10. compute_library_id +// --------------------------------------------------------------------------- + +/// Compute a stable library ID from a list of file fingerprints. +/// +/// Hash input: `b"LIBFIDv2\0"` + each fid (sorted) joined by `b"\n"`. +/// Returns the first 16 hex chars of the digest. +pub fn compute_library_id(fids: &[String]) -> String { + let mut valid_fids: Vec<&str> = fids + .iter() + .map(|s| s.as_str()) + .filter(|s| !s.is_empty()) + .collect(); + valid_fids.sort(); + + let mut hasher = Sha256::new(); + hasher.update(b"LIBFIDv2\0"); + for fid in &valid_fids { + hasher.update(fid.as_bytes()); + hasher.update(b"\n"); + } + + let digest = hasher.finalize(); + format!("{:x}", digest) + .chars() + .take(16) + .collect() +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + // -- clamp --------------------------------------------------------------- + + #[test] + fn test_clamp_within_range() { + assert_eq!(clamp(5.0, 0.0, 10.0), 5.0); + } + + #[test] + fn test_clamp_below() { + assert_eq!(clamp(-1.0, 0.0, 10.0), 0.0); + } + + #[test] + fn test_clamp_above() { + assert_eq!(clamp(15.0, 0.0, 10.0), 10.0); + } + + #[test] + fn test_clamp_at_boundary() { + assert_eq!(clamp(0.0, 0.0, 10.0), 0.0); + assert_eq!(clamp(10.0, 0.0, 10.0), 10.0); + } + + // -- is_within_root ------------------------------------------------------ + + #[test] + fn test_is_within_root_same_dir() { + let dir = tempfile::tempdir().unwrap(); + assert!(is_within_root(dir.path(), dir.path())); + } + + #[test] + fn test_is_within_root_child() { + let dir = tempfile::tempdir().unwrap(); + let child = dir.path().join("child"); + fs::create_dir(&child).unwrap(); + assert!(is_within_root(dir.path(), &child)); + } + + #[test] + fn test_is_within_root_outside() { + let dir1 = tempfile::tempdir().unwrap(); + let dir2 = tempfile::tempdir().unwrap(); + assert!(!is_within_root(dir1.path(), dir2.path())); + } + + #[test] + fn test_is_within_root_nonexistent() { + let dir = tempfile::tempdir().unwrap(); + let fake = dir.path().join("no_such_dir"); + assert!(!is_within_root(dir.path(), &fake)); + } + + // -- truthy -------------------------------------------------------------- + + #[test] + fn test_truthy_bool() { + assert!(truthy(&serde_json::json!(true))); + assert!(!truthy(&serde_json::json!(false))); + } + + #[test] + fn test_truthy_number() { + assert!(truthy(&serde_json::json!(1))); + assert!(truthy(&serde_json::json!(-1))); + assert!(!truthy(&serde_json::json!(0))); + assert!(truthy(&serde_json::json!(0.5))); + assert!(!truthy(&serde_json::json!(0.0))); + } + + #[test] + fn test_truthy_string() { + for s in &["1", "true", "yes", "y", "on", "TRUE", "Yes", " on "] { + assert!(truthy(&serde_json::json!(s)), "expected truthy for {:?}", s); + } + for s in &["0", "false", "no", "off", "", "random"] { + assert!(!truthy(&serde_json::json!(s)), "expected falsy for {:?}", s); + } + } + + #[test] + fn test_truthy_null_and_array() { + assert!(!truthy(&serde_json::json!(null))); + assert!(!truthy(&serde_json::json!([]))); + assert!(!truthy(&serde_json::json!({}))); + } + + // -- folder_display_name ------------------------------------------------- + + #[test] + fn test_folder_display_name_normal() { + assert_eq!(folder_display_name("/home/user/videos"), "videos"); + } + + #[test] + fn test_folder_display_name_root() { + // On unix "/" has no file_name component + assert_eq!(folder_display_name("/"), "/"); + } + + #[test] + fn test_folder_display_name_windows() { + assert_eq!(folder_display_name(r"C:\Users\foo\bar"), "bar"); + } + + // -- deduplicate_list ---------------------------------------------------- + + #[test] + fn test_deduplicate_preserves_order() { + let input: Vec = vec!["a", "b", "c", "a", "b"] + .into_iter() + .map(String::from) + .collect(); + assert_eq!(deduplicate_list(&input), vec!["a", "b", "c"]); + } + + #[test] + fn test_deduplicate_skips_empty_and_whitespace() { + let input: Vec = vec!["a", "", " ", "b", " a "] + .into_iter() + .map(String::from) + .collect(); + assert_eq!(deduplicate_list(&input), vec!["a", "b"]); + } + + // -- natural_key --------------------------------------------------------- + + #[test] + fn test_natural_key_basic() { + let key = natural_key("file10name"); + assert_eq!( + key, + vec![ + NaturalKeyPart::Text("file".to_string()), + NaturalKeyPart::Num(10), + NaturalKeyPart::Text("name".to_string()), + ] + ); + } + + #[test] + fn test_natural_sort_order() { + let mut items = vec!["file10", "file2", "file1", "file20"]; + items.sort_by(|a, b| natural_key(a).cmp(&natural_key(b))); + assert_eq!(items, vec!["file1", "file2", "file10", "file20"]); + } + + #[test] + fn test_natural_key_case_insensitive() { + let k1 = natural_key("ABC"); + let k2 = natural_key("abc"); + assert_eq!(k1, k2); + } + + #[test] + fn test_natural_key_empty() { + let key = natural_key(""); + assert_eq!(key, vec![NaturalKeyPart::Text(String::new())]); + } + + // -- smart_title_case ---------------------------------------------------- + + #[test] + fn test_smart_title_case_basic() { + assert_eq!(smart_title_case("hello world"), "Hello World"); + } + + #[test] + fn test_smart_title_case_small_words() { + assert_eq!( + smart_title_case("the art of war"), + "The Art of War" + ); + } + + #[test] + fn test_smart_title_case_preserves_acronyms() { + assert_eq!(smart_title_case("learn SQL today"), "Learn SQL Today"); + } + + #[test] + fn test_smart_title_case_preserves_digits() { + assert_eq!(smart_title_case("lesson 3b overview"), "Lesson 3b Overview"); + } + + #[test] + fn test_smart_title_case_empty() { + assert_eq!(smart_title_case(""), ""); + } + + // -- pretty_title_from_filename ------------------------------------------ + + #[test] + fn test_pretty_title_basic() { + assert_eq!( + pretty_title_from_filename("01_introduction_to_python.mp4"), + "Introduction to Python" + ); + } + + #[test] + fn test_pretty_title_with_parens_index() { + assert_eq!( + pretty_title_from_filename("(2) my_file.mp4"), + "My File" + ); + } + + #[test] + fn test_pretty_title_no_extension() { + assert_eq!( + pretty_title_from_filename("hello_world"), + "Hello World" + ); + } + + #[test] + fn test_pretty_title_only_numbers() { + // When stripping removes everything, falls back to stem + let result = pretty_title_from_filename("123.mp4"); + assert!(!result.is_empty()); + } + + // -- file_fingerprint ---------------------------------------------------- + + #[test] + fn test_file_fingerprint_small_file() { + let dir = tempfile::tempdir().unwrap(); + let fpath = dir.path().join("test.bin"); + { + let mut f = fs::File::create(&fpath).unwrap(); + f.write_all(b"hello world").unwrap(); + } + let fp = file_fingerprint(&fpath); + assert_eq!(fp.len(), 20); + // Should be deterministic + assert_eq!(fp, file_fingerprint(&fpath)); + } + + #[test] + fn test_file_fingerprint_large_file() { + let dir = tempfile::tempdir().unwrap(); + let fpath = dir.path().join("big.bin"); + { + let mut f = fs::File::create(&fpath).unwrap(); + // Write 512 KB + 1 byte to ensure head/tail branches are hit + let data = vec![0xABu8; 256 * 1024 + 1]; + f.write_all(&data).unwrap(); + f.write_all(&vec![0xCDu8; 256 * 1024]).unwrap(); + } + let fp = file_fingerprint(&fpath); + assert_eq!(fp.len(), 20); + } + + #[test] + fn test_file_fingerprint_nonexistent() { + let fp = file_fingerprint(Path::new("/no/such/file/ever.bin")); + assert_eq!(fp.len(), 20); + } + + #[test] + fn test_file_fingerprint_matches_python() { + // Verify against a known value produced by the Python code. + // Python: file_fingerprint on a file containing b"hello world" (11 bytes). + // h = sha256() + // h.update(b"VIDFIDv1\0") + // h.update(b"11") # str(size) + // h.update(b"\0") + // h.update(b"hello world") # head (< CHUNK_SIZE, no tail) + // digest[:20] + // + // We precompute this in Rust to assert compatibility. + let mut h = Sha256::new(); + h.update(b"VIDFIDv1\0"); + h.update(b"11"); + h.update(b"\0"); + h.update(b"hello world"); + let expected: String = format!("{:x}", h.finalize()).chars().take(20).collect(); + + let dir = tempfile::tempdir().unwrap(); + let fpath = dir.path().join("compat.bin"); + fs::write(&fpath, b"hello world").unwrap(); + assert_eq!(file_fingerprint(&fpath), expected); + } + + // -- compute_library_id -------------------------------------------------- + + #[test] + fn test_compute_library_id_basic() { + let fids = vec!["abc".to_string(), "def".to_string()]; + let id = compute_library_id(&fids); + assert_eq!(id.len(), 16); + } + + #[test] + fn test_compute_library_id_order_independent() { + let fids1 = vec!["abc".to_string(), "def".to_string()]; + let fids2 = vec!["def".to_string(), "abc".to_string()]; + assert_eq!(compute_library_id(&fids1), compute_library_id(&fids2)); + } + + #[test] + fn test_compute_library_id_skips_empty() { + let with_empty = vec!["abc".to_string(), "".to_string(), "def".to_string()]; + let without_empty = vec!["abc".to_string(), "def".to_string()]; + assert_eq!( + compute_library_id(&with_empty), + compute_library_id(&without_empty) + ); + } + + #[test] + fn test_compute_library_id_matches_python() { + // Python: compute_library_id_from_fids(["abc", "def"]) + // sorted valid = ["abc", "def"] + // h = sha256() + // h.update(b"LIBFIDv2\0") + // h.update(b"abc"); h.update(b"\n") + // h.update(b"def"); h.update(b"\n") + // hexdigest()[:16] + let mut h = Sha256::new(); + h.update(b"LIBFIDv2\0"); + h.update(b"abc"); + h.update(b"\n"); + h.update(b"def"); + h.update(b"\n"); + let expected: String = format!("{:x}", h.finalize()).chars().take(16).collect(); + + let fids = vec!["abc".to_string(), "def".to_string()]; + assert_eq!(compute_library_id(&fids), expected); + } +}