feat: implement utils.rs and state.rs

utils.rs: natural sort, file fingerprinting, library ID computation,
pretty title formatting, path helpers, clamp, truthy (38 tests)

state.rs: atomic JSON persistence with backup rotation and
fallback loading (8 tests)
This commit is contained in:
Your Name
2026-02-19 01:47:37 +02:00
parent 40d7ce4291
commit 6ecbeb9a9b
3 changed files with 947 additions and 0 deletions

View File

@@ -1,3 +1,6 @@
pub mod state;
pub mod utils;
#[cfg_attr(mobile, tauri::mobile_entry_point)]
pub fn run() {
tauri::Builder::default()

247
src-tauri/src/state.rs Normal file
View File

@@ -0,0 +1,247 @@
use serde_json::Value;
use std::fs;
use std::path::Path;
/// Default number of rolling backups to keep.
pub const BACKUP_COUNT: usize = 8;
/// Write JSON data to a file atomically with backup rotation.
///
/// Creates rolling backups (.bak1 through .bakN) and a .lastgood copy
/// for crash recovery.
///
/// For a file `foo.json`, backups are `foo.json.bak1`, `foo.json.tmp`, etc.
pub fn atomic_write_json(path: &Path, data: &Value, backup_count: usize) {
// Create parent directories if needed
if let Some(parent) = path.parent() {
fs::create_dir_all(parent).ok();
}
let path_str = path.as_os_str().to_string_lossy();
let tmp = Path::new(&*format!("{}.tmp", path_str)).to_path_buf();
let payload = serde_json::to_string_pretty(data).expect("failed to serialize JSON");
if path.exists() {
// Rotate existing backups: move .bakN -> .bak(N+1), down to .bak1 -> .bak2
for i in (1..=backup_count).rev() {
let src = Path::new(&*format!("{}.bak{}", path_str, i)).to_path_buf();
let dst = Path::new(&*format!("{}.bak{}", path_str, i + 1)).to_path_buf();
if src.exists() {
// Remove dst if it exists, then rename src -> dst
fs::remove_file(&dst).ok();
fs::rename(&src, &dst).ok();
}
}
// Move current file to .bak1
let bak1 = Path::new(&*format!("{}.bak1", path_str)).to_path_buf();
fs::remove_file(&bak1).ok();
fs::rename(path, &bak1).ok();
}
// Write atomically via tmp file
fs::write(&tmp, &payload).expect("failed to write tmp file");
fs::rename(&tmp, path).expect("failed to rename tmp to primary");
// Keep a .lastgood copy for recovery
let lastgood = Path::new(&*format!("{}.lastgood", path_str)).to_path_buf();
fs::write(&lastgood, &payload).ok();
}
/// Load JSON from path, falling back to backups if the primary is corrupted.
///
/// Tries: path -> .lastgood -> .bak1 -> .bak2 -> ... -> .bak{backup_count+2}
/// Returns `None` if all candidates fail.
pub fn load_json_with_fallbacks(path: &Path, backup_count: usize) -> Option<Value> {
let path_str = path.as_os_str().to_string_lossy();
// Build candidate list: primary, lastgood, bak1..bak{backup_count+2}
let mut candidates: Vec<std::path::PathBuf> = Vec::new();
candidates.push(path.to_path_buf());
candidates.push(Path::new(&*format!("{}.lastgood", path_str)).to_path_buf());
for i in 1..=(backup_count + 2) {
candidates.push(Path::new(&*format!("{}.bak{}", path_str, i)).to_path_buf());
}
for p in &candidates {
if p.exists() {
if let Ok(text) = fs::read_to_string(p) {
if let Ok(val) = serde_json::from_str::<Value>(&text) {
return Some(val);
}
}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
use tempfile::TempDir;
/// Helper: path to a JSON file inside a temp dir.
fn json_path(dir: &TempDir) -> std::path::PathBuf {
dir.path().join("data.json")
}
#[test]
fn test_write_and_read_round_trip() {
let dir = TempDir::new().unwrap();
let path = json_path(&dir);
let data = json!({"key": "value", "num": 42});
atomic_write_json(&path, &data, BACKUP_COUNT);
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
assert_eq!(loaded, Some(data));
}
#[test]
fn test_fallback_to_lastgood_when_primary_corrupted() {
let dir = TempDir::new().unwrap();
let path = json_path(&dir);
let data = json!({"status": "good"});
// Write valid data (creates primary + lastgood)
atomic_write_json(&path, &data, BACKUP_COUNT);
// Corrupt the primary file
fs::write(&path, "NOT VALID JSON!!!").unwrap();
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
assert_eq!(loaded, Some(data));
}
#[test]
fn test_fallback_to_bak1_when_primary_and_lastgood_corrupted() {
let dir = TempDir::new().unwrap();
let path = json_path(&dir);
let path_str = path.as_os_str().to_string_lossy().to_string();
let first = json!({"version": 1});
let second = json!({"version": 2});
// First write — creates primary + lastgood
atomic_write_json(&path, &first, BACKUP_COUNT);
// Second write — rotates first to .bak1, writes second as primary + lastgood
atomic_write_json(&path, &second, BACKUP_COUNT);
// Corrupt primary and lastgood
fs::write(&path, "CORRUPT").unwrap();
let lastgood = format!("{}.lastgood", path_str);
fs::write(&lastgood, "ALSO CORRUPT").unwrap();
// Should fall back to .bak1 which has first version
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
assert_eq!(loaded, Some(first));
}
#[test]
fn test_backup_rotation_after_multiple_writes() {
let dir = TempDir::new().unwrap();
let path = json_path(&dir);
let path_str = path.as_os_str().to_string_lossy().to_string();
// Write 5 times with distinct values
for i in 1..=5 {
let data = json!({"write": i});
atomic_write_json(&path, &data, BACKUP_COUNT);
}
// Primary should be the latest (write 5)
let primary: Value =
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
assert_eq!(primary, json!({"write": 5}));
// .bak1 should be the second-to-last (write 4)
let bak1_path = format!("{}.bak1", path_str);
let bak1: Value =
serde_json::from_str(&fs::read_to_string(&bak1_path).unwrap()).unwrap();
assert_eq!(bak1, json!({"write": 4}));
// .bak2 should be write 3
let bak2_path = format!("{}.bak2", path_str);
let bak2: Value =
serde_json::from_str(&fs::read_to_string(&bak2_path).unwrap()).unwrap();
assert_eq!(bak2, json!({"write": 3}));
// .bak3 should be write 2
let bak3_path = format!("{}.bak3", path_str);
let bak3: Value =
serde_json::from_str(&fs::read_to_string(&bak3_path).unwrap()).unwrap();
assert_eq!(bak3, json!({"write": 2}));
// .bak4 should be write 1
let bak4_path = format!("{}.bak4", path_str);
let bak4: Value =
serde_json::from_str(&fs::read_to_string(&bak4_path).unwrap()).unwrap();
assert_eq!(bak4, json!({"write": 1}));
}
#[test]
fn test_load_nonexistent_returns_none() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("does_not_exist.json");
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
assert_eq!(loaded, None);
}
#[test]
fn test_parent_directories_created() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("sub").join("dir").join("nested.json");
let data = json!({"nested": true});
atomic_write_json(&path, &data, BACKUP_COUNT);
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
assert_eq!(loaded, Some(data));
}
#[test]
fn test_lastgood_written() {
let dir = TempDir::new().unwrap();
let path = json_path(&dir);
let path_str = path.as_os_str().to_string_lossy().to_string();
let data = json!({"lg": true});
atomic_write_json(&path, &data, BACKUP_COUNT);
let lastgood_path = format!("{}.lastgood", path_str);
let lg: Value =
serde_json::from_str(&fs::read_to_string(&lastgood_path).unwrap()).unwrap();
assert_eq!(lg, data);
}
#[test]
fn test_backup_count_respected() {
let dir = TempDir::new().unwrap();
let path = json_path(&dir);
let path_str = path.as_os_str().to_string_lossy().to_string();
let small_count = 2;
// Write 5 times with a backup_count of 2
for i in 1..=5 {
let data = json!({"write": i});
atomic_write_json(&path, &data, small_count);
}
// With backup_count=2, rotation only goes up to .bak2 -> .bak3
// After 5 writes: primary=5, bak1=4, bak2=3, bak3=2 (pushed from bak2)
// .bak1 should exist
let bak1_path = format!("{}.bak1", path_str);
assert!(Path::new(&bak1_path).exists());
// .bak2 should exist
let bak2_path = format!("{}.bak2", path_str);
assert!(Path::new(&bak2_path).exists());
// .bak3 should exist (rotated from bak2)
let bak3_path = format!("{}.bak3", path_str);
assert!(Path::new(&bak3_path).exists());
}
}

697
src-tauri/src/utils.rs Normal file
View File

@@ -0,0 +1,697 @@
use once_cell::sync::Lazy;
use regex::Regex;
use sha2::{Digest, Sha256};
use std::cmp::Ordering;
use std::fs;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
// ---------------------------------------------------------------------------
// 1. clamp
// ---------------------------------------------------------------------------
/// Clamp value `v` to the range `[a, b]`.
pub fn clamp(v: f64, a: f64, b: f64) -> f64 {
a.max(b.min(v))
}
// ---------------------------------------------------------------------------
// 2. is_within_root
// ---------------------------------------------------------------------------
/// Check if `target` path is within (or equal to) the `root` directory.
/// Prevents path-traversal attacks.
pub fn is_within_root(root: &Path, target: &Path) -> bool {
match (root.canonicalize(), target.canonicalize()) {
(Ok(r), Ok(t)) => {
t == r || t.starts_with(&r)
}
_ => false,
}
}
// ---------------------------------------------------------------------------
// 3. truthy
// ---------------------------------------------------------------------------
/// Convert a `serde_json::Value` to bool.
///
/// Handles bool, number (nonzero == true), and string ("1","true","yes","y","on").
pub fn truthy(v: &serde_json::Value) -> bool {
match v {
serde_json::Value::Bool(b) => *b,
serde_json::Value::Number(n) => {
if let Some(i) = n.as_i64() {
i != 0
} else if let Some(f) = n.as_f64() {
f != 0.0
} else {
false
}
}
serde_json::Value::String(s) => {
matches!(s.trim().to_lowercase().as_str(), "1" | "true" | "yes" | "y" | "on")
}
_ => false,
}
}
// ---------------------------------------------------------------------------
// 4. folder_display_name
// ---------------------------------------------------------------------------
/// Get a display-friendly name for a folder path (last component).
pub fn folder_display_name(path_str: &str) -> String {
let p = Path::new(path_str);
match p.file_name() {
Some(name) => name.to_string_lossy().to_string(),
None => path_str.to_string(),
}
}
// ---------------------------------------------------------------------------
// 5. deduplicate_list
// ---------------------------------------------------------------------------
/// Remove duplicates from a list while preserving order.
/// Skips empty / whitespace-only strings. Items are trimmed.
pub fn deduplicate_list(items: &[String]) -> Vec<String> {
let mut seen = std::collections::HashSet::new();
let mut result = Vec::new();
for item in items {
let s = item.trim().to_string();
if !s.is_empty() && seen.insert(s.clone()) {
result.push(s);
}
}
result
}
// ---------------------------------------------------------------------------
// 6. natural_key
// ---------------------------------------------------------------------------
/// One part of a natural sort key: either a number or a lowercased text fragment.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NaturalKeyPart {
Num(u64),
Text(String),
}
impl PartialOrd for NaturalKeyPart {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for NaturalKeyPart {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(NaturalKeyPart::Num(a), NaturalKeyPart::Num(b)) => a.cmp(b),
(NaturalKeyPart::Text(a), NaturalKeyPart::Text(b)) => a.cmp(b),
// Numbers sort before text (matches Python behaviour where int < str
// is not directly defined, but in practice natural_key is only compared
// against keys of the same structure).
(NaturalKeyPart::Num(_), NaturalKeyPart::Text(_)) => Ordering::Less,
(NaturalKeyPart::Text(_), NaturalKeyPart::Num(_)) => Ordering::Greater,
}
}
}
static NUM_SPLIT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d+)").unwrap());
/// Generate a sort key for natural sorting (e.g., "2" sorts before "10").
///
/// Splits the string on digit runs; numeric parts become `Num`, the rest become
/// lower-cased `Text`.
pub fn natural_key(s: &str) -> Vec<NaturalKeyPart> {
let mut parts = Vec::new();
let mut last_end = 0;
for m in NUM_SPLIT_RE.find_iter(s) {
// Text before this match
if m.start() > last_end {
parts.push(NaturalKeyPart::Text(
s[last_end..m.start()].to_lowercase(),
));
}
// The numeric match
let num: u64 = m.as_str().parse().unwrap_or(u64::MAX);
parts.push(NaturalKeyPart::Num(num));
last_end = m.end();
}
// Trailing text after the last match (or the entire string if no digits)
if last_end < s.len() {
parts.push(NaturalKeyPart::Text(s[last_end..].to_lowercase()));
}
// If the input was empty, return a single empty Text part so comparisons
// never deal with an empty vec.
if parts.is_empty() {
parts.push(NaturalKeyPart::Text(String::new()));
}
parts
}
// ---------------------------------------------------------------------------
// 7. smart_title_case
// ---------------------------------------------------------------------------
/// Words that should remain lowercase in title case (except at start).
const SMALL_WORDS: &[&str] = &[
"a", "an", "the", "and", "or", "but", "for", "nor", "as", "at", "by", "in", "of", "on",
"per", "to", "vs", "via", "with", "into", "from",
];
static WHITESPACE_SPLIT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\s+)").unwrap());
/// Convert text to title case, keeping small words lowercase (except at start).
/// Words containing digits or all-uppercase words (acronyms) are preserved as-is.
pub fn smart_title_case(text: &str) -> String {
let trimmed = text.trim();
if trimmed.is_empty() {
return String::new();
}
// Split while keeping whitespace tokens (odd indices are whitespace).
let tokens: Vec<&str> = WHITESPACE_SPLIT_RE.split(trimmed).collect();
let spaces: Vec<&str> = WHITESPACE_SPLIT_RE
.find_iter(trimmed)
.map(|m| m.as_str())
.collect();
let mut out = String::new();
let mut word_index = 0usize; // count of actual words seen so far (0-based)
for (i, token) in tokens.iter().enumerate() {
if i > 0 {
// Insert whitespace separator that was between tokens[i-1] and tokens[i]
if let Some(sp) = spaces.get(i - 1) {
out.push_str(sp);
}
}
let w = *token;
// Preserve words with digits
if w.chars().any(|c| c.is_ascii_digit()) {
out.push_str(w);
word_index += 1;
continue;
}
// Preserve all-caps acronyms
if w.chars().all(|c| c.is_uppercase() || !c.is_alphabetic()) && w.chars().any(|c| c.is_uppercase()) {
out.push_str(w);
word_index += 1;
continue;
}
let lw = w.to_lowercase();
if word_index != 0 && SMALL_WORDS.contains(&lw.as_str()) {
out.push_str(&lw);
} else {
// Capitalize first character, lowercase the rest
let mut chars = lw.chars();
if let Some(first) = chars.next() {
for c in first.to_uppercase() {
out.push(c);
}
out.extend(chars);
}
}
word_index += 1;
}
out.trim().to_string()
}
// ---------------------------------------------------------------------------
// 8. pretty_title_from_filename
// ---------------------------------------------------------------------------
static LEADING_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^\s*(?:\(?\s*)?(?P<num>\d+)(?:\s*[.\-_]\s*\d+)*(?:\s*[.)\]\-]\s*|\s+)").unwrap()
});
static UNDERSCORE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_]+").unwrap());
static MULTI_SPACE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+").unwrap());
static LEADING_PUNCT_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^\s*[-\u{2013}\u{2014}:.)\]]\s*").unwrap());
/// Convert a filename to a human-readable title.
///
/// Removes extension, leading indices, underscores, and applies smart title case.
pub fn pretty_title_from_filename(filename: &str) -> String {
let stem = Path::new(filename)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| filename.to_string());
// Replace underscores with spaces
let base = UNDERSCORE_RE.replace_all(&stem, " ");
let base = MULTI_SPACE_RE.replace_all(&base, " ");
let mut base = base.trim().to_string();
// Remove leading index numbers
if let Some(m) = LEADING_INDEX_RE.find(&base) {
base = base[m.end()..].trim().to_string();
}
// Remove leading punctuation
let cleaned = LEADING_PUNCT_RE.replace(&base, "");
let cleaned = MULTI_SPACE_RE.replace_all(&cleaned, " ");
let mut base = cleaned.trim().to_string();
// Fall back to original stem if nothing left
if base.is_empty() {
base = Path::new(filename)
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| filename.to_string());
}
smart_title_case(&base)
}
// ---------------------------------------------------------------------------
// 9. file_fingerprint
// ---------------------------------------------------------------------------
const FP_CHUNK_SIZE: u64 = 256 * 1024; // 256 KB
/// Generate a content-based fingerprint (SHA-256 based) that survives renames/moves.
///
/// Hash input: `b"VIDFIDv1\0"` + ascii(size) + `b"\0"` + first 256 KB + last 256 KB.
/// Returns the first 20 hex chars of the digest.
pub fn file_fingerprint(path: &Path) -> String {
let size: u64 = fs::metadata(path).map(|m| m.len()).unwrap_or(0);
let mut hasher = Sha256::new();
hasher.update(b"VIDFIDv1\0");
hasher.update(size.to_string().as_bytes());
hasher.update(b"\0");
if let Ok(mut f) = fs::File::open(path) {
// Read head
let head_len = std::cmp::min(size, FP_CHUNK_SIZE) as usize;
let mut head = vec![0u8; head_len];
if f.read_exact(&mut head).is_ok() {
hasher.update(&head);
}
// Read tail if file is large enough
if size > FP_CHUNK_SIZE {
let tail_offset = size.saturating_sub(FP_CHUNK_SIZE);
if f.seek(SeekFrom::Start(tail_offset)).is_ok() {
let tail_len = (size - tail_offset) as usize;
let mut tail = vec![0u8; tail_len];
if f.read_exact(&mut tail).is_ok() {
hasher.update(&tail);
}
}
}
}
let digest = hasher.finalize();
format!("{:x}", digest)
.chars()
.take(20)
.collect()
}
// ---------------------------------------------------------------------------
// 10. compute_library_id
// ---------------------------------------------------------------------------
/// Compute a stable library ID from a list of file fingerprints.
///
/// Hash input: `b"LIBFIDv2\0"` + each fid (sorted) joined by `b"\n"`.
/// Returns the first 16 hex chars of the digest.
pub fn compute_library_id(fids: &[String]) -> String {
let mut valid_fids: Vec<&str> = fids
.iter()
.map(|s| s.as_str())
.filter(|s| !s.is_empty())
.collect();
valid_fids.sort();
let mut hasher = Sha256::new();
hasher.update(b"LIBFIDv2\0");
for fid in &valid_fids {
hasher.update(fid.as_bytes());
hasher.update(b"\n");
}
let digest = hasher.finalize();
format!("{:x}", digest)
.chars()
.take(16)
.collect()
}
// ===========================================================================
// Tests
// ===========================================================================
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
// -- clamp ---------------------------------------------------------------
#[test]
fn test_clamp_within_range() {
assert_eq!(clamp(5.0, 0.0, 10.0), 5.0);
}
#[test]
fn test_clamp_below() {
assert_eq!(clamp(-1.0, 0.0, 10.0), 0.0);
}
#[test]
fn test_clamp_above() {
assert_eq!(clamp(15.0, 0.0, 10.0), 10.0);
}
#[test]
fn test_clamp_at_boundary() {
assert_eq!(clamp(0.0, 0.0, 10.0), 0.0);
assert_eq!(clamp(10.0, 0.0, 10.0), 10.0);
}
// -- is_within_root ------------------------------------------------------
#[test]
fn test_is_within_root_same_dir() {
let dir = tempfile::tempdir().unwrap();
assert!(is_within_root(dir.path(), dir.path()));
}
#[test]
fn test_is_within_root_child() {
let dir = tempfile::tempdir().unwrap();
let child = dir.path().join("child");
fs::create_dir(&child).unwrap();
assert!(is_within_root(dir.path(), &child));
}
#[test]
fn test_is_within_root_outside() {
let dir1 = tempfile::tempdir().unwrap();
let dir2 = tempfile::tempdir().unwrap();
assert!(!is_within_root(dir1.path(), dir2.path()));
}
#[test]
fn test_is_within_root_nonexistent() {
let dir = tempfile::tempdir().unwrap();
let fake = dir.path().join("no_such_dir");
assert!(!is_within_root(dir.path(), &fake));
}
// -- truthy --------------------------------------------------------------
#[test]
fn test_truthy_bool() {
assert!(truthy(&serde_json::json!(true)));
assert!(!truthy(&serde_json::json!(false)));
}
#[test]
fn test_truthy_number() {
assert!(truthy(&serde_json::json!(1)));
assert!(truthy(&serde_json::json!(-1)));
assert!(!truthy(&serde_json::json!(0)));
assert!(truthy(&serde_json::json!(0.5)));
assert!(!truthy(&serde_json::json!(0.0)));
}
#[test]
fn test_truthy_string() {
for s in &["1", "true", "yes", "y", "on", "TRUE", "Yes", " on "] {
assert!(truthy(&serde_json::json!(s)), "expected truthy for {:?}", s);
}
for s in &["0", "false", "no", "off", "", "random"] {
assert!(!truthy(&serde_json::json!(s)), "expected falsy for {:?}", s);
}
}
#[test]
fn test_truthy_null_and_array() {
assert!(!truthy(&serde_json::json!(null)));
assert!(!truthy(&serde_json::json!([])));
assert!(!truthy(&serde_json::json!({})));
}
// -- folder_display_name -------------------------------------------------
#[test]
fn test_folder_display_name_normal() {
assert_eq!(folder_display_name("/home/user/videos"), "videos");
}
#[test]
fn test_folder_display_name_root() {
// On unix "/" has no file_name component
assert_eq!(folder_display_name("/"), "/");
}
#[test]
fn test_folder_display_name_windows() {
assert_eq!(folder_display_name(r"C:\Users\foo\bar"), "bar");
}
// -- deduplicate_list ----------------------------------------------------
#[test]
fn test_deduplicate_preserves_order() {
let input: Vec<String> = vec!["a", "b", "c", "a", "b"]
.into_iter()
.map(String::from)
.collect();
assert_eq!(deduplicate_list(&input), vec!["a", "b", "c"]);
}
#[test]
fn test_deduplicate_skips_empty_and_whitespace() {
let input: Vec<String> = vec!["a", "", " ", "b", " a "]
.into_iter()
.map(String::from)
.collect();
assert_eq!(deduplicate_list(&input), vec!["a", "b"]);
}
// -- natural_key ---------------------------------------------------------
#[test]
fn test_natural_key_basic() {
let key = natural_key("file10name");
assert_eq!(
key,
vec![
NaturalKeyPart::Text("file".to_string()),
NaturalKeyPart::Num(10),
NaturalKeyPart::Text("name".to_string()),
]
);
}
#[test]
fn test_natural_sort_order() {
let mut items = vec!["file10", "file2", "file1", "file20"];
items.sort_by(|a, b| natural_key(a).cmp(&natural_key(b)));
assert_eq!(items, vec!["file1", "file2", "file10", "file20"]);
}
#[test]
fn test_natural_key_case_insensitive() {
let k1 = natural_key("ABC");
let k2 = natural_key("abc");
assert_eq!(k1, k2);
}
#[test]
fn test_natural_key_empty() {
let key = natural_key("");
assert_eq!(key, vec![NaturalKeyPart::Text(String::new())]);
}
// -- smart_title_case ----------------------------------------------------
#[test]
fn test_smart_title_case_basic() {
assert_eq!(smart_title_case("hello world"), "Hello World");
}
#[test]
fn test_smart_title_case_small_words() {
assert_eq!(
smart_title_case("the art of war"),
"The Art of War"
);
}
#[test]
fn test_smart_title_case_preserves_acronyms() {
assert_eq!(smart_title_case("learn SQL today"), "Learn SQL Today");
}
#[test]
fn test_smart_title_case_preserves_digits() {
assert_eq!(smart_title_case("lesson 3b overview"), "Lesson 3b Overview");
}
#[test]
fn test_smart_title_case_empty() {
assert_eq!(smart_title_case(""), "");
}
// -- pretty_title_from_filename ------------------------------------------
#[test]
fn test_pretty_title_basic() {
assert_eq!(
pretty_title_from_filename("01_introduction_to_python.mp4"),
"Introduction to Python"
);
}
#[test]
fn test_pretty_title_with_parens_index() {
assert_eq!(
pretty_title_from_filename("(2) my_file.mp4"),
"My File"
);
}
#[test]
fn test_pretty_title_no_extension() {
assert_eq!(
pretty_title_from_filename("hello_world"),
"Hello World"
);
}
#[test]
fn test_pretty_title_only_numbers() {
// When stripping removes everything, falls back to stem
let result = pretty_title_from_filename("123.mp4");
assert!(!result.is_empty());
}
// -- file_fingerprint ----------------------------------------------------
#[test]
fn test_file_fingerprint_small_file() {
let dir = tempfile::tempdir().unwrap();
let fpath = dir.path().join("test.bin");
{
let mut f = fs::File::create(&fpath).unwrap();
f.write_all(b"hello world").unwrap();
}
let fp = file_fingerprint(&fpath);
assert_eq!(fp.len(), 20);
// Should be deterministic
assert_eq!(fp, file_fingerprint(&fpath));
}
#[test]
fn test_file_fingerprint_large_file() {
let dir = tempfile::tempdir().unwrap();
let fpath = dir.path().join("big.bin");
{
let mut f = fs::File::create(&fpath).unwrap();
// Write 512 KB + 1 byte to ensure head/tail branches are hit
let data = vec![0xABu8; 256 * 1024 + 1];
f.write_all(&data).unwrap();
f.write_all(&vec![0xCDu8; 256 * 1024]).unwrap();
}
let fp = file_fingerprint(&fpath);
assert_eq!(fp.len(), 20);
}
#[test]
fn test_file_fingerprint_nonexistent() {
let fp = file_fingerprint(Path::new("/no/such/file/ever.bin"));
assert_eq!(fp.len(), 20);
}
#[test]
fn test_file_fingerprint_matches_python() {
// Verify against a known value produced by the Python code.
// Python: file_fingerprint on a file containing b"hello world" (11 bytes).
// h = sha256()
// h.update(b"VIDFIDv1\0")
// h.update(b"11") # str(size)
// h.update(b"\0")
// h.update(b"hello world") # head (< CHUNK_SIZE, no tail)
// digest[:20]
//
// We precompute this in Rust to assert compatibility.
let mut h = Sha256::new();
h.update(b"VIDFIDv1\0");
h.update(b"11");
h.update(b"\0");
h.update(b"hello world");
let expected: String = format!("{:x}", h.finalize()).chars().take(20).collect();
let dir = tempfile::tempdir().unwrap();
let fpath = dir.path().join("compat.bin");
fs::write(&fpath, b"hello world").unwrap();
assert_eq!(file_fingerprint(&fpath), expected);
}
// -- compute_library_id --------------------------------------------------
#[test]
fn test_compute_library_id_basic() {
let fids = vec!["abc".to_string(), "def".to_string()];
let id = compute_library_id(&fids);
assert_eq!(id.len(), 16);
}
#[test]
fn test_compute_library_id_order_independent() {
let fids1 = vec!["abc".to_string(), "def".to_string()];
let fids2 = vec!["def".to_string(), "abc".to_string()];
assert_eq!(compute_library_id(&fids1), compute_library_id(&fids2));
}
#[test]
fn test_compute_library_id_skips_empty() {
let with_empty = vec!["abc".to_string(), "".to_string(), "def".to_string()];
let without_empty = vec!["abc".to_string(), "def".to_string()];
assert_eq!(
compute_library_id(&with_empty),
compute_library_id(&without_empty)
);
}
#[test]
fn test_compute_library_id_matches_python() {
// Python: compute_library_id_from_fids(["abc", "def"])
// sorted valid = ["abc", "def"]
// h = sha256()
// h.update(b"LIBFIDv2\0")
// h.update(b"abc"); h.update(b"\n")
// h.update(b"def"); h.update(b"\n")
// hexdigest()[:16]
let mut h = Sha256::new();
h.update(b"LIBFIDv2\0");
h.update(b"abc");
h.update(b"\n");
h.update(b"def");
h.update(b"\n");
let expected: String = format!("{:x}", h.finalize()).chars().take(16).collect();
let fids = vec!["abc".to_string(), "def".to_string()];
assert_eq!(compute_library_id(&fids), expected);
}
}