- Implement subscriptions view with bidirectional recurring transaction sync - Add cascade delete/pause/resume between subscriptions and recurring - Fix foreign key constraints when deleting recurring transactions - Add cross-view instant refresh via callback pattern - Replace Bezier chart smoothing with Fritsch-Carlson monotone Hermite interpolation - Smooth budget sparklines using shared monotone_subdivide function - Add vertical spacing to budget rows - Add app icon (receipt on GNOME blue) in all sizes for desktop, web, and AppImage - Add calendar, credit cards, forecast, goals, insights, and wishlist views - Add date picker, numpad, quick-add, category combo, and edit dialog components - Add import/export for CSV, JSON, OFX, QIF formats - Add NLP transaction parsing, OCR receipt scanning, expression evaluator - Add notification support, Sankey chart, tray icon - Add demo data seeder with full DB wipe - Expand database schema with subscriptions, goals, credit cards, and more
207 lines
6.4 KiB
Rust
207 lines
6.4 KiB
Rust
use std::io::Write;
|
|
use std::path::PathBuf;
|
|
use std::process::Command;
|
|
|
|
/// Extract all monetary amounts from a receipt image using tesseract OCR.
|
|
/// Returns each amount paired with the line of text it was found on (trimmed).
|
|
/// Results are sorted: lines containing "total" first, then by amount descending.
|
|
/// Returns None if tesseract is unavailable or no amounts are found.
|
|
pub fn extract_amounts_from_image(image_bytes: &[u8]) -> Option<Vec<(f64, String)>> {
|
|
let tesseract = find_tesseract()?;
|
|
|
|
// Write image to a temp file
|
|
let tmp_dir = std::env::temp_dir();
|
|
let tmp_path = tmp_dir.join("outlay_ocr_tmp.png");
|
|
let mut file = std::fs::File::create(&tmp_path).ok()?;
|
|
file.write_all(image_bytes).ok()?;
|
|
drop(file);
|
|
|
|
let mut cmd = Command::new(&tesseract);
|
|
cmd.arg(&tmp_path).arg("stdout");
|
|
|
|
// If using bundled tesseract, point TESSDATA_PREFIX to bundled tessdata
|
|
if let Some(parent) = tesseract.parent() {
|
|
let tessdata = parent.join("tessdata");
|
|
if tessdata.is_dir() {
|
|
cmd.env("TESSDATA_PREFIX", parent);
|
|
}
|
|
}
|
|
|
|
let output = cmd.output().ok()?;
|
|
let _ = std::fs::remove_file(&tmp_path);
|
|
|
|
if !output.status.success() {
|
|
return None;
|
|
}
|
|
|
|
let text = String::from_utf8_lossy(&output.stdout);
|
|
let results = parse_all_amounts(&text);
|
|
if results.is_empty() {
|
|
None
|
|
} else {
|
|
Some(results)
|
|
}
|
|
}
|
|
|
|
/// Returns true if tesseract is available (bundled or system).
|
|
pub fn is_available() -> bool {
|
|
find_tesseract().is_some()
|
|
}
|
|
|
|
fn find_tesseract() -> Option<PathBuf> {
|
|
// Check for bundled tesseract next to our binary (AppImage layout)
|
|
if let Ok(exe) = std::env::current_exe() {
|
|
if let Some(bin_dir) = exe.parent() {
|
|
let bundled = bin_dir.join("tesseract");
|
|
if bundled.is_file() {
|
|
return Some(bundled);
|
|
}
|
|
// Also check ../lib/tesseract (AppImage usr/lib layout)
|
|
let lib_bundled = bin_dir.join("../lib/tesseract").canonicalize().ok();
|
|
if let Some(p) = lib_bundled {
|
|
if p.is_file() {
|
|
return Some(p);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fall back to system PATH
|
|
Command::new("tesseract")
|
|
.arg("--version")
|
|
.output()
|
|
.ok()
|
|
.filter(|o| o.status.success())
|
|
.map(|_| PathBuf::from("tesseract"))
|
|
}
|
|
|
|
fn parse_all_amounts(text: &str) -> Vec<(f64, String)> {
|
|
let mut results: Vec<(f64, String, bool)> = Vec::new();
|
|
|
|
for line in text.lines() {
|
|
let trimmed = line.trim();
|
|
if trimmed.is_empty() {
|
|
continue;
|
|
}
|
|
let line_amounts = extract_amounts_from_line(trimmed);
|
|
let is_total = trimmed.to_lowercase().contains("total");
|
|
for amt in line_amounts {
|
|
// Deduplicate: skip if we already have this exact amount
|
|
if !results.iter().any(|(a, _, _)| (*a - amt).abs() < 0.001) {
|
|
results.push((amt, trimmed.to_string(), is_total));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Sort: "total" lines first, then by amount descending
|
|
results.sort_by(|a, b| {
|
|
b.2.cmp(&a.2).then(b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal))
|
|
});
|
|
|
|
results.into_iter().map(|(amt, line, _)| (amt, line)).collect()
|
|
}
|
|
|
|
fn extract_amounts_from_line(line: &str) -> Vec<f64> {
|
|
let mut results = Vec::new();
|
|
let chars: Vec<char> = line.chars().collect();
|
|
let len = chars.len();
|
|
let mut i = 0;
|
|
|
|
while i < len {
|
|
// Look for digit sequences followed by separator and exactly 2 digits
|
|
if chars[i].is_ascii_digit() {
|
|
let start = i;
|
|
// Consume integer part
|
|
while i < len && chars[i].is_ascii_digit() {
|
|
i += 1;
|
|
}
|
|
// Check for decimal separator followed by exactly 2 digits
|
|
if i < len && (chars[i] == '.' || chars[i] == ',') {
|
|
let sep = i;
|
|
i += 1;
|
|
let decimal_start = i;
|
|
while i < len && chars[i].is_ascii_digit() {
|
|
i += 1;
|
|
}
|
|
if i - decimal_start == 2 {
|
|
let int_part: String = chars[start..sep].iter().collect();
|
|
let dec_part: String = chars[decimal_start..i].iter().collect();
|
|
if let Ok(val) = format!("{}.{}", int_part, dec_part).parse::<f64>() {
|
|
if val > 0.0 {
|
|
results.push(val);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
i += 1;
|
|
}
|
|
}
|
|
|
|
results
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_parse_all_returns_sorted() {
|
|
let text = "Item 1 5.99\nItem 2 3.50\nTotal 9.49\n";
|
|
let results = parse_all_amounts(text);
|
|
// "Total" line should come first
|
|
assert_eq!(results[0].0, 9.49);
|
|
assert!(results[0].1.contains("Total"));
|
|
assert_eq!(results.len(), 3);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_comma_separator() {
|
|
let text = "Total: 12,99\n";
|
|
let results = parse_all_amounts(text);
|
|
assert_eq!(results.len(), 1);
|
|
assert_eq!(results[0].0, 12.99);
|
|
}
|
|
|
|
#[test]
|
|
fn test_no_total_sorts_by_amount() {
|
|
let text = "Coffee 4.50\nSandwich 8.99\n";
|
|
let results = parse_all_amounts(text);
|
|
assert_eq!(results[0].0, 8.99);
|
|
assert_eq!(results[1].0, 4.50);
|
|
}
|
|
|
|
#[test]
|
|
fn test_no_amounts() {
|
|
let text = "Hello world\nNo numbers here\n";
|
|
let results = parse_all_amounts(text);
|
|
assert!(results.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn test_total_case_insensitive() {
|
|
let text = "Sub 5.00\nTOTAL 15.00\nChange 5.00\n";
|
|
let results = parse_all_amounts(text);
|
|
// TOTAL line first
|
|
assert_eq!(results[0].0, 15.00);
|
|
assert!(results[0].1.contains("TOTAL"));
|
|
}
|
|
|
|
#[test]
|
|
fn test_deduplicates_amounts() {
|
|
let text = "Subtotal 10.00\nTotal 10.00\n";
|
|
let results = parse_all_amounts(text);
|
|
// Same amount on two lines - should deduplicate
|
|
assert_eq!(results.len(), 1);
|
|
assert_eq!(results[0].0, 10.00);
|
|
}
|
|
|
|
#[test]
|
|
fn test_large_amount() {
|
|
let text = "Grand Total 1250.00\n";
|
|
let results = parse_all_amounts(text);
|
|
assert_eq!(results.len(), 1);
|
|
assert_eq!(results[0].0, 1250.00);
|
|
}
|
|
}
|