Add feature batch 2, subscription/recurring sync, smooth charts, and app icon
- Implement subscriptions view with bidirectional recurring transaction sync - Add cascade delete/pause/resume between subscriptions and recurring - Fix foreign key constraints when deleting recurring transactions - Add cross-view instant refresh via callback pattern - Replace Bezier chart smoothing with Fritsch-Carlson monotone Hermite interpolation - Smooth budget sparklines using shared monotone_subdivide function - Add vertical spacing to budget rows - Add app icon (receipt on GNOME blue) in all sizes for desktop, web, and AppImage - Add calendar, credit cards, forecast, goals, insights, and wishlist views - Add date picker, numpad, quick-add, category combo, and edit dialog components - Add import/export for CSV, JSON, OFX, QIF formats - Add NLP transaction parsing, OCR receipt scanning, expression evaluator - Add notification support, Sankey chart, tray icon - Add demo data seeder with full DB wipe - Expand database schema with subscriptions, goals, credit cards, and more
This commit is contained in:
206
outlay-core/src/ocr.rs
Normal file
206
outlay-core/src/ocr.rs
Normal file
@@ -0,0 +1,206 @@
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
/// Extract all monetary amounts from a receipt image using tesseract OCR.
|
||||
/// Returns each amount paired with the line of text it was found on (trimmed).
|
||||
/// Results are sorted: lines containing "total" first, then by amount descending.
|
||||
/// Returns None if tesseract is unavailable or no amounts are found.
|
||||
pub fn extract_amounts_from_image(image_bytes: &[u8]) -> Option<Vec<(f64, String)>> {
|
||||
let tesseract = find_tesseract()?;
|
||||
|
||||
// Write image to a temp file
|
||||
let tmp_dir = std::env::temp_dir();
|
||||
let tmp_path = tmp_dir.join("outlay_ocr_tmp.png");
|
||||
let mut file = std::fs::File::create(&tmp_path).ok()?;
|
||||
file.write_all(image_bytes).ok()?;
|
||||
drop(file);
|
||||
|
||||
let mut cmd = Command::new(&tesseract);
|
||||
cmd.arg(&tmp_path).arg("stdout");
|
||||
|
||||
// If using bundled tesseract, point TESSDATA_PREFIX to bundled tessdata
|
||||
if let Some(parent) = tesseract.parent() {
|
||||
let tessdata = parent.join("tessdata");
|
||||
if tessdata.is_dir() {
|
||||
cmd.env("TESSDATA_PREFIX", parent);
|
||||
}
|
||||
}
|
||||
|
||||
let output = cmd.output().ok()?;
|
||||
let _ = std::fs::remove_file(&tmp_path);
|
||||
|
||||
if !output.status.success() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let text = String::from_utf8_lossy(&output.stdout);
|
||||
let results = parse_all_amounts(&text);
|
||||
if results.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(results)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if tesseract is available (bundled or system).
|
||||
pub fn is_available() -> bool {
|
||||
find_tesseract().is_some()
|
||||
}
|
||||
|
||||
fn find_tesseract() -> Option<PathBuf> {
|
||||
// Check for bundled tesseract next to our binary (AppImage layout)
|
||||
if let Ok(exe) = std::env::current_exe() {
|
||||
if let Some(bin_dir) = exe.parent() {
|
||||
let bundled = bin_dir.join("tesseract");
|
||||
if bundled.is_file() {
|
||||
return Some(bundled);
|
||||
}
|
||||
// Also check ../lib/tesseract (AppImage usr/lib layout)
|
||||
let lib_bundled = bin_dir.join("../lib/tesseract").canonicalize().ok();
|
||||
if let Some(p) = lib_bundled {
|
||||
if p.is_file() {
|
||||
return Some(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to system PATH
|
||||
Command::new("tesseract")
|
||||
.arg("--version")
|
||||
.output()
|
||||
.ok()
|
||||
.filter(|o| o.status.success())
|
||||
.map(|_| PathBuf::from("tesseract"))
|
||||
}
|
||||
|
||||
fn parse_all_amounts(text: &str) -> Vec<(f64, String)> {
|
||||
let mut results: Vec<(f64, String, bool)> = Vec::new();
|
||||
|
||||
for line in text.lines() {
|
||||
let trimmed = line.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let line_amounts = extract_amounts_from_line(trimmed);
|
||||
let is_total = trimmed.to_lowercase().contains("total");
|
||||
for amt in line_amounts {
|
||||
// Deduplicate: skip if we already have this exact amount
|
||||
if !results.iter().any(|(a, _, _)| (*a - amt).abs() < 0.001) {
|
||||
results.push((amt, trimmed.to_string(), is_total));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort: "total" lines first, then by amount descending
|
||||
results.sort_by(|a, b| {
|
||||
b.2.cmp(&a.2).then(b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal))
|
||||
});
|
||||
|
||||
results.into_iter().map(|(amt, line, _)| (amt, line)).collect()
|
||||
}
|
||||
|
||||
fn extract_amounts_from_line(line: &str) -> Vec<f64> {
|
||||
let mut results = Vec::new();
|
||||
let chars: Vec<char> = line.chars().collect();
|
||||
let len = chars.len();
|
||||
let mut i = 0;
|
||||
|
||||
while i < len {
|
||||
// Look for digit sequences followed by separator and exactly 2 digits
|
||||
if chars[i].is_ascii_digit() {
|
||||
let start = i;
|
||||
// Consume integer part
|
||||
while i < len && chars[i].is_ascii_digit() {
|
||||
i += 1;
|
||||
}
|
||||
// Check for decimal separator followed by exactly 2 digits
|
||||
if i < len && (chars[i] == '.' || chars[i] == ',') {
|
||||
let sep = i;
|
||||
i += 1;
|
||||
let decimal_start = i;
|
||||
while i < len && chars[i].is_ascii_digit() {
|
||||
i += 1;
|
||||
}
|
||||
if i - decimal_start == 2 {
|
||||
let int_part: String = chars[start..sep].iter().collect();
|
||||
let dec_part: String = chars[decimal_start..i].iter().collect();
|
||||
if let Ok(val) = format!("{}.{}", int_part, dec_part).parse::<f64>() {
|
||||
if val > 0.0 {
|
||||
results.push(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_all_returns_sorted() {
|
||||
let text = "Item 1 5.99\nItem 2 3.50\nTotal 9.49\n";
|
||||
let results = parse_all_amounts(text);
|
||||
// "Total" line should come first
|
||||
assert_eq!(results[0].0, 9.49);
|
||||
assert!(results[0].1.contains("Total"));
|
||||
assert_eq!(results.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_comma_separator() {
|
||||
let text = "Total: 12,99\n";
|
||||
let results = parse_all_amounts(text);
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].0, 12.99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_total_sorts_by_amount() {
|
||||
let text = "Coffee 4.50\nSandwich 8.99\n";
|
||||
let results = parse_all_amounts(text);
|
||||
assert_eq!(results[0].0, 8.99);
|
||||
assert_eq!(results[1].0, 4.50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_amounts() {
|
||||
let text = "Hello world\nNo numbers here\n";
|
||||
let results = parse_all_amounts(text);
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_total_case_insensitive() {
|
||||
let text = "Sub 5.00\nTOTAL 15.00\nChange 5.00\n";
|
||||
let results = parse_all_amounts(text);
|
||||
// TOTAL line first
|
||||
assert_eq!(results[0].0, 15.00);
|
||||
assert!(results[0].1.contains("TOTAL"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_deduplicates_amounts() {
|
||||
let text = "Subtotal 10.00\nTotal 10.00\n";
|
||||
let results = parse_all_amounts(text);
|
||||
// Same amount on two lines - should deduplicate
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].0, 10.00);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_large_amount() {
|
||||
let text = "Grand Total 1250.00\n";
|
||||
let results = parse_all_amounts(text);
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].0, 1250.00);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user