feat: implement utils.rs and state.rs
utils.rs: natural sort, file fingerprinting, library ID computation, pretty title formatting, path helpers, clamp, truthy (38 tests) state.rs: atomic JSON persistence with backup rotation and fallback loading (8 tests)
This commit is contained in:
@@ -1,3 +1,6 @@
|
||||
pub mod state;
|
||||
pub mod utils;
|
||||
|
||||
#[cfg_attr(mobile, tauri::mobile_entry_point)]
|
||||
pub fn run() {
|
||||
tauri::Builder::default()
|
||||
|
||||
247
src-tauri/src/state.rs
Normal file
247
src-tauri/src/state.rs
Normal file
@@ -0,0 +1,247 @@
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Default number of rolling backups to keep.
|
||||
pub const BACKUP_COUNT: usize = 8;
|
||||
|
||||
/// Write JSON data to a file atomically with backup rotation.
|
||||
///
|
||||
/// Creates rolling backups (.bak1 through .bakN) and a .lastgood copy
|
||||
/// for crash recovery.
|
||||
///
|
||||
/// For a file `foo.json`, backups are `foo.json.bak1`, `foo.json.tmp`, etc.
|
||||
pub fn atomic_write_json(path: &Path, data: &Value, backup_count: usize) {
|
||||
// Create parent directories if needed
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent).ok();
|
||||
}
|
||||
|
||||
let path_str = path.as_os_str().to_string_lossy();
|
||||
let tmp = Path::new(&*format!("{}.tmp", path_str)).to_path_buf();
|
||||
let payload = serde_json::to_string_pretty(data).expect("failed to serialize JSON");
|
||||
|
||||
if path.exists() {
|
||||
// Rotate existing backups: move .bakN -> .bak(N+1), down to .bak1 -> .bak2
|
||||
for i in (1..=backup_count).rev() {
|
||||
let src = Path::new(&*format!("{}.bak{}", path_str, i)).to_path_buf();
|
||||
let dst = Path::new(&*format!("{}.bak{}", path_str, i + 1)).to_path_buf();
|
||||
if src.exists() {
|
||||
// Remove dst if it exists, then rename src -> dst
|
||||
fs::remove_file(&dst).ok();
|
||||
fs::rename(&src, &dst).ok();
|
||||
}
|
||||
}
|
||||
|
||||
// Move current file to .bak1
|
||||
let bak1 = Path::new(&*format!("{}.bak1", path_str)).to_path_buf();
|
||||
fs::remove_file(&bak1).ok();
|
||||
fs::rename(path, &bak1).ok();
|
||||
}
|
||||
|
||||
// Write atomically via tmp file
|
||||
fs::write(&tmp, &payload).expect("failed to write tmp file");
|
||||
fs::rename(&tmp, path).expect("failed to rename tmp to primary");
|
||||
|
||||
// Keep a .lastgood copy for recovery
|
||||
let lastgood = Path::new(&*format!("{}.lastgood", path_str)).to_path_buf();
|
||||
fs::write(&lastgood, &payload).ok();
|
||||
}
|
||||
|
||||
/// Load JSON from path, falling back to backups if the primary is corrupted.
|
||||
///
|
||||
/// Tries: path -> .lastgood -> .bak1 -> .bak2 -> ... -> .bak{backup_count+2}
|
||||
/// Returns `None` if all candidates fail.
|
||||
pub fn load_json_with_fallbacks(path: &Path, backup_count: usize) -> Option<Value> {
|
||||
let path_str = path.as_os_str().to_string_lossy();
|
||||
|
||||
// Build candidate list: primary, lastgood, bak1..bak{backup_count+2}
|
||||
let mut candidates: Vec<std::path::PathBuf> = Vec::new();
|
||||
candidates.push(path.to_path_buf());
|
||||
candidates.push(Path::new(&*format!("{}.lastgood", path_str)).to_path_buf());
|
||||
for i in 1..=(backup_count + 2) {
|
||||
candidates.push(Path::new(&*format!("{}.bak{}", path_str, i)).to_path_buf());
|
||||
}
|
||||
|
||||
for p in &candidates {
|
||||
if p.exists() {
|
||||
if let Ok(text) = fs::read_to_string(p) {
|
||||
if let Ok(val) = serde_json::from_str::<Value>(&text) {
|
||||
return Some(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Helper: path to a JSON file inside a temp dir.
|
||||
fn json_path(dir: &TempDir) -> std::path::PathBuf {
|
||||
dir.path().join("data.json")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_and_read_round_trip() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = json_path(&dir);
|
||||
let data = json!({"key": "value", "num": 42});
|
||||
|
||||
atomic_write_json(&path, &data, BACKUP_COUNT);
|
||||
|
||||
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
|
||||
assert_eq!(loaded, Some(data));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fallback_to_lastgood_when_primary_corrupted() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = json_path(&dir);
|
||||
let data = json!({"status": "good"});
|
||||
|
||||
// Write valid data (creates primary + lastgood)
|
||||
atomic_write_json(&path, &data, BACKUP_COUNT);
|
||||
|
||||
// Corrupt the primary file
|
||||
fs::write(&path, "NOT VALID JSON!!!").unwrap();
|
||||
|
||||
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
|
||||
assert_eq!(loaded, Some(data));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fallback_to_bak1_when_primary_and_lastgood_corrupted() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = json_path(&dir);
|
||||
let path_str = path.as_os_str().to_string_lossy().to_string();
|
||||
|
||||
let first = json!({"version": 1});
|
||||
let second = json!({"version": 2});
|
||||
|
||||
// First write — creates primary + lastgood
|
||||
atomic_write_json(&path, &first, BACKUP_COUNT);
|
||||
// Second write — rotates first to .bak1, writes second as primary + lastgood
|
||||
atomic_write_json(&path, &second, BACKUP_COUNT);
|
||||
|
||||
// Corrupt primary and lastgood
|
||||
fs::write(&path, "CORRUPT").unwrap();
|
||||
let lastgood = format!("{}.lastgood", path_str);
|
||||
fs::write(&lastgood, "ALSO CORRUPT").unwrap();
|
||||
|
||||
// Should fall back to .bak1 which has first version
|
||||
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
|
||||
assert_eq!(loaded, Some(first));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backup_rotation_after_multiple_writes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = json_path(&dir);
|
||||
let path_str = path.as_os_str().to_string_lossy().to_string();
|
||||
|
||||
// Write 5 times with distinct values
|
||||
for i in 1..=5 {
|
||||
let data = json!({"write": i});
|
||||
atomic_write_json(&path, &data, BACKUP_COUNT);
|
||||
}
|
||||
|
||||
// Primary should be the latest (write 5)
|
||||
let primary: Value =
|
||||
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
|
||||
assert_eq!(primary, json!({"write": 5}));
|
||||
|
||||
// .bak1 should be the second-to-last (write 4)
|
||||
let bak1_path = format!("{}.bak1", path_str);
|
||||
let bak1: Value =
|
||||
serde_json::from_str(&fs::read_to_string(&bak1_path).unwrap()).unwrap();
|
||||
assert_eq!(bak1, json!({"write": 4}));
|
||||
|
||||
// .bak2 should be write 3
|
||||
let bak2_path = format!("{}.bak2", path_str);
|
||||
let bak2: Value =
|
||||
serde_json::from_str(&fs::read_to_string(&bak2_path).unwrap()).unwrap();
|
||||
assert_eq!(bak2, json!({"write": 3}));
|
||||
|
||||
// .bak3 should be write 2
|
||||
let bak3_path = format!("{}.bak3", path_str);
|
||||
let bak3: Value =
|
||||
serde_json::from_str(&fs::read_to_string(&bak3_path).unwrap()).unwrap();
|
||||
assert_eq!(bak3, json!({"write": 2}));
|
||||
|
||||
// .bak4 should be write 1
|
||||
let bak4_path = format!("{}.bak4", path_str);
|
||||
let bak4: Value =
|
||||
serde_json::from_str(&fs::read_to_string(&bak4_path).unwrap()).unwrap();
|
||||
assert_eq!(bak4, json!({"write": 1}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_nonexistent_returns_none() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("does_not_exist.json");
|
||||
|
||||
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
|
||||
assert_eq!(loaded, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parent_directories_created() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = dir.path().join("sub").join("dir").join("nested.json");
|
||||
let data = json!({"nested": true});
|
||||
|
||||
atomic_write_json(&path, &data, BACKUP_COUNT);
|
||||
|
||||
let loaded = load_json_with_fallbacks(&path, BACKUP_COUNT);
|
||||
assert_eq!(loaded, Some(data));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lastgood_written() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = json_path(&dir);
|
||||
let path_str = path.as_os_str().to_string_lossy().to_string();
|
||||
let data = json!({"lg": true});
|
||||
|
||||
atomic_write_json(&path, &data, BACKUP_COUNT);
|
||||
|
||||
let lastgood_path = format!("{}.lastgood", path_str);
|
||||
let lg: Value =
|
||||
serde_json::from_str(&fs::read_to_string(&lastgood_path).unwrap()).unwrap();
|
||||
assert_eq!(lg, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backup_count_respected() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let path = json_path(&dir);
|
||||
let path_str = path.as_os_str().to_string_lossy().to_string();
|
||||
let small_count = 2;
|
||||
|
||||
// Write 5 times with a backup_count of 2
|
||||
for i in 1..=5 {
|
||||
let data = json!({"write": i});
|
||||
atomic_write_json(&path, &data, small_count);
|
||||
}
|
||||
|
||||
// With backup_count=2, rotation only goes up to .bak2 -> .bak3
|
||||
// After 5 writes: primary=5, bak1=4, bak2=3, bak3=2 (pushed from bak2)
|
||||
// .bak1 should exist
|
||||
let bak1_path = format!("{}.bak1", path_str);
|
||||
assert!(Path::new(&bak1_path).exists());
|
||||
|
||||
// .bak2 should exist
|
||||
let bak2_path = format!("{}.bak2", path_str);
|
||||
assert!(Path::new(&bak2_path).exists());
|
||||
|
||||
// .bak3 should exist (rotated from bak2)
|
||||
let bak3_path = format!("{}.bak3", path_str);
|
||||
assert!(Path::new(&bak3_path).exists());
|
||||
}
|
||||
}
|
||||
697
src-tauri/src/utils.rs
Normal file
697
src-tauri/src/utils.rs
Normal file
@@ -0,0 +1,697 @@
|
||||
use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::cmp::Ordering;
|
||||
use std::fs;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use std::path::Path;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 1. clamp
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Clamp value `v` to the range `[a, b]`.
|
||||
pub fn clamp(v: f64, a: f64, b: f64) -> f64 {
|
||||
a.max(b.min(v))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 2. is_within_root
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Check if `target` path is within (or equal to) the `root` directory.
|
||||
/// Prevents path-traversal attacks.
|
||||
pub fn is_within_root(root: &Path, target: &Path) -> bool {
|
||||
match (root.canonicalize(), target.canonicalize()) {
|
||||
(Ok(r), Ok(t)) => {
|
||||
t == r || t.starts_with(&r)
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 3. truthy
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Convert a `serde_json::Value` to bool.
|
||||
///
|
||||
/// Handles bool, number (nonzero == true), and string ("1","true","yes","y","on").
|
||||
pub fn truthy(v: &serde_json::Value) -> bool {
|
||||
match v {
|
||||
serde_json::Value::Bool(b) => *b,
|
||||
serde_json::Value::Number(n) => {
|
||||
if let Some(i) = n.as_i64() {
|
||||
i != 0
|
||||
} else if let Some(f) = n.as_f64() {
|
||||
f != 0.0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
serde_json::Value::String(s) => {
|
||||
matches!(s.trim().to_lowercase().as_str(), "1" | "true" | "yes" | "y" | "on")
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 4. folder_display_name
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Get a display-friendly name for a folder path (last component).
|
||||
pub fn folder_display_name(path_str: &str) -> String {
|
||||
let p = Path::new(path_str);
|
||||
match p.file_name() {
|
||||
Some(name) => name.to_string_lossy().to_string(),
|
||||
None => path_str.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 5. deduplicate_list
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Remove duplicates from a list while preserving order.
|
||||
/// Skips empty / whitespace-only strings. Items are trimmed.
|
||||
pub fn deduplicate_list(items: &[String]) -> Vec<String> {
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let mut result = Vec::new();
|
||||
for item in items {
|
||||
let s = item.trim().to_string();
|
||||
if !s.is_empty() && seen.insert(s.clone()) {
|
||||
result.push(s);
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 6. natural_key
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// One part of a natural sort key: either a number or a lowercased text fragment.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum NaturalKeyPart {
|
||||
Num(u64),
|
||||
Text(String),
|
||||
}
|
||||
|
||||
impl PartialOrd for NaturalKeyPart {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for NaturalKeyPart {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
match (self, other) {
|
||||
(NaturalKeyPart::Num(a), NaturalKeyPart::Num(b)) => a.cmp(b),
|
||||
(NaturalKeyPart::Text(a), NaturalKeyPart::Text(b)) => a.cmp(b),
|
||||
// Numbers sort before text (matches Python behaviour where int < str
|
||||
// is not directly defined, but in practice natural_key is only compared
|
||||
// against keys of the same structure).
|
||||
(NaturalKeyPart::Num(_), NaturalKeyPart::Text(_)) => Ordering::Less,
|
||||
(NaturalKeyPart::Text(_), NaturalKeyPart::Num(_)) => Ordering::Greater,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static NUM_SPLIT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d+)").unwrap());
|
||||
|
||||
/// Generate a sort key for natural sorting (e.g., "2" sorts before "10").
|
||||
///
|
||||
/// Splits the string on digit runs; numeric parts become `Num`, the rest become
|
||||
/// lower-cased `Text`.
|
||||
pub fn natural_key(s: &str) -> Vec<NaturalKeyPart> {
|
||||
let mut parts = Vec::new();
|
||||
let mut last_end = 0;
|
||||
|
||||
for m in NUM_SPLIT_RE.find_iter(s) {
|
||||
// Text before this match
|
||||
if m.start() > last_end {
|
||||
parts.push(NaturalKeyPart::Text(
|
||||
s[last_end..m.start()].to_lowercase(),
|
||||
));
|
||||
}
|
||||
// The numeric match
|
||||
let num: u64 = m.as_str().parse().unwrap_or(u64::MAX);
|
||||
parts.push(NaturalKeyPart::Num(num));
|
||||
last_end = m.end();
|
||||
}
|
||||
|
||||
// Trailing text after the last match (or the entire string if no digits)
|
||||
if last_end < s.len() {
|
||||
parts.push(NaturalKeyPart::Text(s[last_end..].to_lowercase()));
|
||||
}
|
||||
|
||||
// If the input was empty, return a single empty Text part so comparisons
|
||||
// never deal with an empty vec.
|
||||
if parts.is_empty() {
|
||||
parts.push(NaturalKeyPart::Text(String::new()));
|
||||
}
|
||||
|
||||
parts
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 7. smart_title_case
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Words that should remain lowercase in title case (except at start).
|
||||
const SMALL_WORDS: &[&str] = &[
|
||||
"a", "an", "the", "and", "or", "but", "for", "nor", "as", "at", "by", "in", "of", "on",
|
||||
"per", "to", "vs", "via", "with", "into", "from",
|
||||
];
|
||||
|
||||
static WHITESPACE_SPLIT_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\s+)").unwrap());
|
||||
|
||||
/// Convert text to title case, keeping small words lowercase (except at start).
|
||||
/// Words containing digits or all-uppercase words (acronyms) are preserved as-is.
|
||||
pub fn smart_title_case(text: &str) -> String {
|
||||
let trimmed = text.trim();
|
||||
if trimmed.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
// Split while keeping whitespace tokens (odd indices are whitespace).
|
||||
let tokens: Vec<&str> = WHITESPACE_SPLIT_RE.split(trimmed).collect();
|
||||
let spaces: Vec<&str> = WHITESPACE_SPLIT_RE
|
||||
.find_iter(trimmed)
|
||||
.map(|m| m.as_str())
|
||||
.collect();
|
||||
|
||||
let mut out = String::new();
|
||||
let mut word_index = 0usize; // count of actual words seen so far (0-based)
|
||||
|
||||
for (i, token) in tokens.iter().enumerate() {
|
||||
if i > 0 {
|
||||
// Insert whitespace separator that was between tokens[i-1] and tokens[i]
|
||||
if let Some(sp) = spaces.get(i - 1) {
|
||||
out.push_str(sp);
|
||||
}
|
||||
}
|
||||
|
||||
let w = *token;
|
||||
|
||||
// Preserve words with digits
|
||||
if w.chars().any(|c| c.is_ascii_digit()) {
|
||||
out.push_str(w);
|
||||
word_index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Preserve all-caps acronyms
|
||||
if w.chars().all(|c| c.is_uppercase() || !c.is_alphabetic()) && w.chars().any(|c| c.is_uppercase()) {
|
||||
out.push_str(w);
|
||||
word_index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let lw = w.to_lowercase();
|
||||
|
||||
if word_index != 0 && SMALL_WORDS.contains(&lw.as_str()) {
|
||||
out.push_str(&lw);
|
||||
} else {
|
||||
// Capitalize first character, lowercase the rest
|
||||
let mut chars = lw.chars();
|
||||
if let Some(first) = chars.next() {
|
||||
for c in first.to_uppercase() {
|
||||
out.push(c);
|
||||
}
|
||||
out.extend(chars);
|
||||
}
|
||||
}
|
||||
|
||||
word_index += 1;
|
||||
}
|
||||
|
||||
out.trim().to_string()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 8. pretty_title_from_filename
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static LEADING_INDEX_RE: Lazy<Regex> = Lazy::new(|| {
|
||||
Regex::new(r"^\s*(?:\(?\s*)?(?P<num>\d+)(?:\s*[.\-_]\s*\d+)*(?:\s*[.)\]\-]\s*|\s+)").unwrap()
|
||||
});
|
||||
|
||||
static UNDERSCORE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[_]+").unwrap());
|
||||
static MULTI_SPACE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"\s+").unwrap());
|
||||
static LEADING_PUNCT_RE: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"^\s*[-\u{2013}\u{2014}:.)\]]\s*").unwrap());
|
||||
|
||||
/// Convert a filename to a human-readable title.
|
||||
///
|
||||
/// Removes extension, leading indices, underscores, and applies smart title case.
|
||||
pub fn pretty_title_from_filename(filename: &str) -> String {
|
||||
let stem = Path::new(filename)
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| filename.to_string());
|
||||
|
||||
// Replace underscores with spaces
|
||||
let base = UNDERSCORE_RE.replace_all(&stem, " ");
|
||||
let base = MULTI_SPACE_RE.replace_all(&base, " ");
|
||||
let mut base = base.trim().to_string();
|
||||
|
||||
// Remove leading index numbers
|
||||
if let Some(m) = LEADING_INDEX_RE.find(&base) {
|
||||
base = base[m.end()..].trim().to_string();
|
||||
}
|
||||
|
||||
// Remove leading punctuation
|
||||
let cleaned = LEADING_PUNCT_RE.replace(&base, "");
|
||||
let cleaned = MULTI_SPACE_RE.replace_all(&cleaned, " ");
|
||||
let mut base = cleaned.trim().to_string();
|
||||
|
||||
// Fall back to original stem if nothing left
|
||||
if base.is_empty() {
|
||||
base = Path::new(filename)
|
||||
.file_stem()
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.unwrap_or_else(|| filename.to_string());
|
||||
}
|
||||
|
||||
smart_title_case(&base)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 9. file_fingerprint
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const FP_CHUNK_SIZE: u64 = 256 * 1024; // 256 KB
|
||||
|
||||
/// Generate a content-based fingerprint (SHA-256 based) that survives renames/moves.
|
||||
///
|
||||
/// Hash input: `b"VIDFIDv1\0"` + ascii(size) + `b"\0"` + first 256 KB + last 256 KB.
|
||||
/// Returns the first 20 hex chars of the digest.
|
||||
pub fn file_fingerprint(path: &Path) -> String {
|
||||
let size: u64 = fs::metadata(path).map(|m| m.len()).unwrap_or(0);
|
||||
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(b"VIDFIDv1\0");
|
||||
hasher.update(size.to_string().as_bytes());
|
||||
hasher.update(b"\0");
|
||||
|
||||
if let Ok(mut f) = fs::File::open(path) {
|
||||
// Read head
|
||||
let head_len = std::cmp::min(size, FP_CHUNK_SIZE) as usize;
|
||||
let mut head = vec![0u8; head_len];
|
||||
if f.read_exact(&mut head).is_ok() {
|
||||
hasher.update(&head);
|
||||
}
|
||||
|
||||
// Read tail if file is large enough
|
||||
if size > FP_CHUNK_SIZE {
|
||||
let tail_offset = size.saturating_sub(FP_CHUNK_SIZE);
|
||||
if f.seek(SeekFrom::Start(tail_offset)).is_ok() {
|
||||
let tail_len = (size - tail_offset) as usize;
|
||||
let mut tail = vec![0u8; tail_len];
|
||||
if f.read_exact(&mut tail).is_ok() {
|
||||
hasher.update(&tail);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let digest = hasher.finalize();
|
||||
format!("{:x}", digest)
|
||||
.chars()
|
||||
.take(20)
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// 10. compute_library_id
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Compute a stable library ID from a list of file fingerprints.
|
||||
///
|
||||
/// Hash input: `b"LIBFIDv2\0"` + each fid (sorted) joined by `b"\n"`.
|
||||
/// Returns the first 16 hex chars of the digest.
|
||||
pub fn compute_library_id(fids: &[String]) -> String {
|
||||
let mut valid_fids: Vec<&str> = fids
|
||||
.iter()
|
||||
.map(|s| s.as_str())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
valid_fids.sort();
|
||||
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(b"LIBFIDv2\0");
|
||||
for fid in &valid_fids {
|
||||
hasher.update(fid.as_bytes());
|
||||
hasher.update(b"\n");
|
||||
}
|
||||
|
||||
let digest = hasher.finalize();
|
||||
format!("{:x}", digest)
|
||||
.chars()
|
||||
.take(16)
|
||||
.collect()
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
// Tests
|
||||
// ===========================================================================
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
|
||||
// -- clamp ---------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_clamp_within_range() {
|
||||
assert_eq!(clamp(5.0, 0.0, 10.0), 5.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clamp_below() {
|
||||
assert_eq!(clamp(-1.0, 0.0, 10.0), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clamp_above() {
|
||||
assert_eq!(clamp(15.0, 0.0, 10.0), 10.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_clamp_at_boundary() {
|
||||
assert_eq!(clamp(0.0, 0.0, 10.0), 0.0);
|
||||
assert_eq!(clamp(10.0, 0.0, 10.0), 10.0);
|
||||
}
|
||||
|
||||
// -- is_within_root ------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_is_within_root_same_dir() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
assert!(is_within_root(dir.path(), dir.path()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_within_root_child() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let child = dir.path().join("child");
|
||||
fs::create_dir(&child).unwrap();
|
||||
assert!(is_within_root(dir.path(), &child));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_within_root_outside() {
|
||||
let dir1 = tempfile::tempdir().unwrap();
|
||||
let dir2 = tempfile::tempdir().unwrap();
|
||||
assert!(!is_within_root(dir1.path(), dir2.path()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_within_root_nonexistent() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let fake = dir.path().join("no_such_dir");
|
||||
assert!(!is_within_root(dir.path(), &fake));
|
||||
}
|
||||
|
||||
// -- truthy --------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_truthy_bool() {
|
||||
assert!(truthy(&serde_json::json!(true)));
|
||||
assert!(!truthy(&serde_json::json!(false)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truthy_number() {
|
||||
assert!(truthy(&serde_json::json!(1)));
|
||||
assert!(truthy(&serde_json::json!(-1)));
|
||||
assert!(!truthy(&serde_json::json!(0)));
|
||||
assert!(truthy(&serde_json::json!(0.5)));
|
||||
assert!(!truthy(&serde_json::json!(0.0)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truthy_string() {
|
||||
for s in &["1", "true", "yes", "y", "on", "TRUE", "Yes", " on "] {
|
||||
assert!(truthy(&serde_json::json!(s)), "expected truthy for {:?}", s);
|
||||
}
|
||||
for s in &["0", "false", "no", "off", "", "random"] {
|
||||
assert!(!truthy(&serde_json::json!(s)), "expected falsy for {:?}", s);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truthy_null_and_array() {
|
||||
assert!(!truthy(&serde_json::json!(null)));
|
||||
assert!(!truthy(&serde_json::json!([])));
|
||||
assert!(!truthy(&serde_json::json!({})));
|
||||
}
|
||||
|
||||
// -- folder_display_name -------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_folder_display_name_normal() {
|
||||
assert_eq!(folder_display_name("/home/user/videos"), "videos");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_folder_display_name_root() {
|
||||
// On unix "/" has no file_name component
|
||||
assert_eq!(folder_display_name("/"), "/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_folder_display_name_windows() {
|
||||
assert_eq!(folder_display_name(r"C:\Users\foo\bar"), "bar");
|
||||
}
|
||||
|
||||
// -- deduplicate_list ----------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_deduplicate_preserves_order() {
|
||||
let input: Vec<String> = vec!["a", "b", "c", "a", "b"]
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
assert_eq!(deduplicate_list(&input), vec!["a", "b", "c"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deduplicate_skips_empty_and_whitespace() {
|
||||
let input: Vec<String> = vec!["a", "", " ", "b", " a "]
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
assert_eq!(deduplicate_list(&input), vec!["a", "b"]);
|
||||
}
|
||||
|
||||
// -- natural_key ---------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_natural_key_basic() {
|
||||
let key = natural_key("file10name");
|
||||
assert_eq!(
|
||||
key,
|
||||
vec![
|
||||
NaturalKeyPart::Text("file".to_string()),
|
||||
NaturalKeyPart::Num(10),
|
||||
NaturalKeyPart::Text("name".to_string()),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_natural_sort_order() {
|
||||
let mut items = vec!["file10", "file2", "file1", "file20"];
|
||||
items.sort_by(|a, b| natural_key(a).cmp(&natural_key(b)));
|
||||
assert_eq!(items, vec!["file1", "file2", "file10", "file20"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_natural_key_case_insensitive() {
|
||||
let k1 = natural_key("ABC");
|
||||
let k2 = natural_key("abc");
|
||||
assert_eq!(k1, k2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_natural_key_empty() {
|
||||
let key = natural_key("");
|
||||
assert_eq!(key, vec![NaturalKeyPart::Text(String::new())]);
|
||||
}
|
||||
|
||||
// -- smart_title_case ----------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_smart_title_case_basic() {
|
||||
assert_eq!(smart_title_case("hello world"), "Hello World");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_smart_title_case_small_words() {
|
||||
assert_eq!(
|
||||
smart_title_case("the art of war"),
|
||||
"The Art of War"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_smart_title_case_preserves_acronyms() {
|
||||
assert_eq!(smart_title_case("learn SQL today"), "Learn SQL Today");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_smart_title_case_preserves_digits() {
|
||||
assert_eq!(smart_title_case("lesson 3b overview"), "Lesson 3b Overview");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_smart_title_case_empty() {
|
||||
assert_eq!(smart_title_case(""), "");
|
||||
}
|
||||
|
||||
// -- pretty_title_from_filename ------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_pretty_title_basic() {
|
||||
assert_eq!(
|
||||
pretty_title_from_filename("01_introduction_to_python.mp4"),
|
||||
"Introduction to Python"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pretty_title_with_parens_index() {
|
||||
assert_eq!(
|
||||
pretty_title_from_filename("(2) my_file.mp4"),
|
||||
"My File"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pretty_title_no_extension() {
|
||||
assert_eq!(
|
||||
pretty_title_from_filename("hello_world"),
|
||||
"Hello World"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pretty_title_only_numbers() {
|
||||
// When stripping removes everything, falls back to stem
|
||||
let result = pretty_title_from_filename("123.mp4");
|
||||
assert!(!result.is_empty());
|
||||
}
|
||||
|
||||
// -- file_fingerprint ----------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_file_fingerprint_small_file() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let fpath = dir.path().join("test.bin");
|
||||
{
|
||||
let mut f = fs::File::create(&fpath).unwrap();
|
||||
f.write_all(b"hello world").unwrap();
|
||||
}
|
||||
let fp = file_fingerprint(&fpath);
|
||||
assert_eq!(fp.len(), 20);
|
||||
// Should be deterministic
|
||||
assert_eq!(fp, file_fingerprint(&fpath));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_fingerprint_large_file() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let fpath = dir.path().join("big.bin");
|
||||
{
|
||||
let mut f = fs::File::create(&fpath).unwrap();
|
||||
// Write 512 KB + 1 byte to ensure head/tail branches are hit
|
||||
let data = vec![0xABu8; 256 * 1024 + 1];
|
||||
f.write_all(&data).unwrap();
|
||||
f.write_all(&vec![0xCDu8; 256 * 1024]).unwrap();
|
||||
}
|
||||
let fp = file_fingerprint(&fpath);
|
||||
assert_eq!(fp.len(), 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_fingerprint_nonexistent() {
|
||||
let fp = file_fingerprint(Path::new("/no/such/file/ever.bin"));
|
||||
assert_eq!(fp.len(), 20);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_file_fingerprint_matches_python() {
|
||||
// Verify against a known value produced by the Python code.
|
||||
// Python: file_fingerprint on a file containing b"hello world" (11 bytes).
|
||||
// h = sha256()
|
||||
// h.update(b"VIDFIDv1\0")
|
||||
// h.update(b"11") # str(size)
|
||||
// h.update(b"\0")
|
||||
// h.update(b"hello world") # head (< CHUNK_SIZE, no tail)
|
||||
// digest[:20]
|
||||
//
|
||||
// We precompute this in Rust to assert compatibility.
|
||||
let mut h = Sha256::new();
|
||||
h.update(b"VIDFIDv1\0");
|
||||
h.update(b"11");
|
||||
h.update(b"\0");
|
||||
h.update(b"hello world");
|
||||
let expected: String = format!("{:x}", h.finalize()).chars().take(20).collect();
|
||||
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let fpath = dir.path().join("compat.bin");
|
||||
fs::write(&fpath, b"hello world").unwrap();
|
||||
assert_eq!(file_fingerprint(&fpath), expected);
|
||||
}
|
||||
|
||||
// -- compute_library_id --------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn test_compute_library_id_basic() {
|
||||
let fids = vec!["abc".to_string(), "def".to_string()];
|
||||
let id = compute_library_id(&fids);
|
||||
assert_eq!(id.len(), 16);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_library_id_order_independent() {
|
||||
let fids1 = vec!["abc".to_string(), "def".to_string()];
|
||||
let fids2 = vec!["def".to_string(), "abc".to_string()];
|
||||
assert_eq!(compute_library_id(&fids1), compute_library_id(&fids2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_library_id_skips_empty() {
|
||||
let with_empty = vec!["abc".to_string(), "".to_string(), "def".to_string()];
|
||||
let without_empty = vec!["abc".to_string(), "def".to_string()];
|
||||
assert_eq!(
|
||||
compute_library_id(&with_empty),
|
||||
compute_library_id(&without_empty)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compute_library_id_matches_python() {
|
||||
// Python: compute_library_id_from_fids(["abc", "def"])
|
||||
// sorted valid = ["abc", "def"]
|
||||
// h = sha256()
|
||||
// h.update(b"LIBFIDv2\0")
|
||||
// h.update(b"abc"); h.update(b"\n")
|
||||
// h.update(b"def"); h.update(b"\n")
|
||||
// hexdigest()[:16]
|
||||
let mut h = Sha256::new();
|
||||
h.update(b"LIBFIDv2\0");
|
||||
h.update(b"abc");
|
||||
h.update(b"\n");
|
||||
h.update(b"def");
|
||||
h.update(b"\n");
|
||||
let expected: String = format!("{:x}", h.finalize()).chars().take(16).collect();
|
||||
|
||||
let fids = vec!["abc".to_string(), "def".to_string()];
|
||||
assert_eq!(compute_library_id(&fids), expected);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user