Files
outlay/outlay-core/src/ocr.rs
lashman 10a76e3003 Add feature batch 2, subscription/recurring sync, smooth charts, and app icon
- Implement subscriptions view with bidirectional recurring transaction sync
- Add cascade delete/pause/resume between subscriptions and recurring
- Fix foreign key constraints when deleting recurring transactions
- Add cross-view instant refresh via callback pattern
- Replace Bezier chart smoothing with Fritsch-Carlson monotone Hermite interpolation
- Smooth budget sparklines using shared monotone_subdivide function
- Add vertical spacing to budget rows
- Add app icon (receipt on GNOME blue) in all sizes for desktop, web, and AppImage
- Add calendar, credit cards, forecast, goals, insights, and wishlist views
- Add date picker, numpad, quick-add, category combo, and edit dialog components
- Add import/export for CSV, JSON, OFX, QIF formats
- Add NLP transaction parsing, OCR receipt scanning, expression evaluator
- Add notification support, Sankey chart, tray icon
- Add demo data seeder with full DB wipe
- Expand database schema with subscriptions, goals, credit cards, and more
2026-03-03 21:18:37 +02:00

207 lines
6.4 KiB
Rust

use std::io::Write;
use std::path::PathBuf;
use std::process::Command;
/// Extract all monetary amounts from a receipt image using tesseract OCR.
/// Returns each amount paired with the line of text it was found on (trimmed).
/// Results are sorted: lines containing "total" first, then by amount descending.
/// Returns None if tesseract is unavailable or no amounts are found.
pub fn extract_amounts_from_image(image_bytes: &[u8]) -> Option<Vec<(f64, String)>> {
let tesseract = find_tesseract()?;
// Write image to a temp file
let tmp_dir = std::env::temp_dir();
let tmp_path = tmp_dir.join("outlay_ocr_tmp.png");
let mut file = std::fs::File::create(&tmp_path).ok()?;
file.write_all(image_bytes).ok()?;
drop(file);
let mut cmd = Command::new(&tesseract);
cmd.arg(&tmp_path).arg("stdout");
// If using bundled tesseract, point TESSDATA_PREFIX to bundled tessdata
if let Some(parent) = tesseract.parent() {
let tessdata = parent.join("tessdata");
if tessdata.is_dir() {
cmd.env("TESSDATA_PREFIX", parent);
}
}
let output = cmd.output().ok()?;
let _ = std::fs::remove_file(&tmp_path);
if !output.status.success() {
return None;
}
let text = String::from_utf8_lossy(&output.stdout);
let results = parse_all_amounts(&text);
if results.is_empty() {
None
} else {
Some(results)
}
}
/// Returns true if tesseract is available (bundled or system).
pub fn is_available() -> bool {
find_tesseract().is_some()
}
fn find_tesseract() -> Option<PathBuf> {
// Check for bundled tesseract next to our binary (AppImage layout)
if let Ok(exe) = std::env::current_exe() {
if let Some(bin_dir) = exe.parent() {
let bundled = bin_dir.join("tesseract");
if bundled.is_file() {
return Some(bundled);
}
// Also check ../lib/tesseract (AppImage usr/lib layout)
let lib_bundled = bin_dir.join("../lib/tesseract").canonicalize().ok();
if let Some(p) = lib_bundled {
if p.is_file() {
return Some(p);
}
}
}
}
// Fall back to system PATH
Command::new("tesseract")
.arg("--version")
.output()
.ok()
.filter(|o| o.status.success())
.map(|_| PathBuf::from("tesseract"))
}
fn parse_all_amounts(text: &str) -> Vec<(f64, String)> {
let mut results: Vec<(f64, String, bool)> = Vec::new();
for line in text.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let line_amounts = extract_amounts_from_line(trimmed);
let is_total = trimmed.to_lowercase().contains("total");
for amt in line_amounts {
// Deduplicate: skip if we already have this exact amount
if !results.iter().any(|(a, _, _)| (*a - amt).abs() < 0.001) {
results.push((amt, trimmed.to_string(), is_total));
}
}
}
// Sort: "total" lines first, then by amount descending
results.sort_by(|a, b| {
b.2.cmp(&a.2).then(b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal))
});
results.into_iter().map(|(amt, line, _)| (amt, line)).collect()
}
fn extract_amounts_from_line(line: &str) -> Vec<f64> {
let mut results = Vec::new();
let chars: Vec<char> = line.chars().collect();
let len = chars.len();
let mut i = 0;
while i < len {
// Look for digit sequences followed by separator and exactly 2 digits
if chars[i].is_ascii_digit() {
let start = i;
// Consume integer part
while i < len && chars[i].is_ascii_digit() {
i += 1;
}
// Check for decimal separator followed by exactly 2 digits
if i < len && (chars[i] == '.' || chars[i] == ',') {
let sep = i;
i += 1;
let decimal_start = i;
while i < len && chars[i].is_ascii_digit() {
i += 1;
}
if i - decimal_start == 2 {
let int_part: String = chars[start..sep].iter().collect();
let dec_part: String = chars[decimal_start..i].iter().collect();
if let Ok(val) = format!("{}.{}", int_part, dec_part).parse::<f64>() {
if val > 0.0 {
results.push(val);
}
}
}
}
} else {
i += 1;
}
}
results
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_all_returns_sorted() {
let text = "Item 1 5.99\nItem 2 3.50\nTotal 9.49\n";
let results = parse_all_amounts(text);
// "Total" line should come first
assert_eq!(results[0].0, 9.49);
assert!(results[0].1.contains("Total"));
assert_eq!(results.len(), 3);
}
#[test]
fn test_parse_comma_separator() {
let text = "Total: 12,99\n";
let results = parse_all_amounts(text);
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, 12.99);
}
#[test]
fn test_no_total_sorts_by_amount() {
let text = "Coffee 4.50\nSandwich 8.99\n";
let results = parse_all_amounts(text);
assert_eq!(results[0].0, 8.99);
assert_eq!(results[1].0, 4.50);
}
#[test]
fn test_no_amounts() {
let text = "Hello world\nNo numbers here\n";
let results = parse_all_amounts(text);
assert!(results.is_empty());
}
#[test]
fn test_total_case_insensitive() {
let text = "Sub 5.00\nTOTAL 15.00\nChange 5.00\n";
let results = parse_all_amounts(text);
// TOTAL line first
assert_eq!(results[0].0, 15.00);
assert!(results[0].1.contains("TOTAL"));
}
#[test]
fn test_deduplicates_amounts() {
let text = "Subtotal 10.00\nTotal 10.00\n";
let results = parse_all_amounts(text);
// Same amount on two lines - should deduplicate
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, 10.00);
}
#[test]
fn test_large_amount() {
let text = "Grand Total 1250.00\n";
let results = parse_all_amounts(text);
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, 1250.00);
}
}