use std::io::Write; use std::path::PathBuf; use std::process::Command; /// Extract all monetary amounts from a receipt image using tesseract OCR. /// Returns each amount paired with the line of text it was found on (trimmed). /// Results are sorted: lines containing "total" first, then by amount descending. /// Returns None if tesseract is unavailable or no amounts are found. pub fn extract_amounts_from_image(image_bytes: &[u8]) -> Option> { let tesseract = find_tesseract()?; // Write image to a temp file let tmp_dir = std::env::temp_dir(); let tmp_path = tmp_dir.join("outlay_ocr_tmp.png"); let mut file = std::fs::File::create(&tmp_path).ok()?; file.write_all(image_bytes).ok()?; drop(file); let mut cmd = Command::new(&tesseract); cmd.arg(&tmp_path).arg("stdout"); // If using bundled tesseract, point TESSDATA_PREFIX to bundled tessdata if let Some(parent) = tesseract.parent() { let tessdata = parent.join("tessdata"); if tessdata.is_dir() { cmd.env("TESSDATA_PREFIX", parent); } } let output = cmd.output().ok()?; let _ = std::fs::remove_file(&tmp_path); if !output.status.success() { return None; } let text = String::from_utf8_lossy(&output.stdout); let results = parse_all_amounts(&text); if results.is_empty() { None } else { Some(results) } } /// Returns true if tesseract is available (bundled or system). pub fn is_available() -> bool { find_tesseract().is_some() } fn find_tesseract() -> Option { // Check for bundled tesseract next to our binary (AppImage layout) if let Ok(exe) = std::env::current_exe() { if let Some(bin_dir) = exe.parent() { let bundled = bin_dir.join("tesseract"); if bundled.is_file() { return Some(bundled); } // Also check ../lib/tesseract (AppImage usr/lib layout) let lib_bundled = bin_dir.join("../lib/tesseract").canonicalize().ok(); if let Some(p) = lib_bundled { if p.is_file() { return Some(p); } } } } // Fall back to system PATH Command::new("tesseract") .arg("--version") .output() .ok() .filter(|o| o.status.success()) .map(|_| PathBuf::from("tesseract")) } fn parse_all_amounts(text: &str) -> Vec<(f64, String)> { let mut results: Vec<(f64, String, bool)> = Vec::new(); for line in text.lines() { let trimmed = line.trim(); if trimmed.is_empty() { continue; } let line_amounts = extract_amounts_from_line(trimmed); let is_total = trimmed.to_lowercase().contains("total"); for amt in line_amounts { // Deduplicate: skip if we already have this exact amount if !results.iter().any(|(a, _, _)| (*a - amt).abs() < 0.001) { results.push((amt, trimmed.to_string(), is_total)); } } } // Sort: "total" lines first, then by amount descending results.sort_by(|a, b| { b.2.cmp(&a.2).then(b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)) }); results.into_iter().map(|(amt, line, _)| (amt, line)).collect() } fn extract_amounts_from_line(line: &str) -> Vec { let mut results = Vec::new(); let chars: Vec = line.chars().collect(); let len = chars.len(); let mut i = 0; while i < len { // Look for digit sequences followed by separator and exactly 2 digits if chars[i].is_ascii_digit() { let start = i; // Consume integer part while i < len && chars[i].is_ascii_digit() { i += 1; } // Check for decimal separator followed by exactly 2 digits if i < len && (chars[i] == '.' || chars[i] == ',') { let sep = i; i += 1; let decimal_start = i; while i < len && chars[i].is_ascii_digit() { i += 1; } if i - decimal_start == 2 { let int_part: String = chars[start..sep].iter().collect(); let dec_part: String = chars[decimal_start..i].iter().collect(); if let Ok(val) = format!("{}.{}", int_part, dec_part).parse::() { if val > 0.0 { results.push(val); } } } } } else { i += 1; } } results } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_all_returns_sorted() { let text = "Item 1 5.99\nItem 2 3.50\nTotal 9.49\n"; let results = parse_all_amounts(text); // "Total" line should come first assert_eq!(results[0].0, 9.49); assert!(results[0].1.contains("Total")); assert_eq!(results.len(), 3); } #[test] fn test_parse_comma_separator() { let text = "Total: 12,99\n"; let results = parse_all_amounts(text); assert_eq!(results.len(), 1); assert_eq!(results[0].0, 12.99); } #[test] fn test_no_total_sorts_by_amount() { let text = "Coffee 4.50\nSandwich 8.99\n"; let results = parse_all_amounts(text); assert_eq!(results[0].0, 8.99); assert_eq!(results[1].0, 4.50); } #[test] fn test_no_amounts() { let text = "Hello world\nNo numbers here\n"; let results = parse_all_amounts(text); assert!(results.is_empty()); } #[test] fn test_total_case_insensitive() { let text = "Sub 5.00\nTOTAL 15.00\nChange 5.00\n"; let results = parse_all_amounts(text); // TOTAL line first assert_eq!(results[0].0, 15.00); assert!(results[0].1.contains("TOTAL")); } #[test] fn test_deduplicates_amounts() { let text = "Subtotal 10.00\nTotal 10.00\n"; let results = parse_all_amounts(text); // Same amount on two lines - should deduplicate assert_eq!(results.len(), 1); assert_eq!(results[0].0, 10.00); } #[test] fn test_large_amount() { let text = "Grand Total 1250.00\n"; let results = parse_all_amounts(text); assert_eq!(results.len(), 1); assert_eq!(results[0].0, 1250.00); } }