diff --git a/src-tauri/src/ffmpeg.rs b/src-tauri/src/ffmpeg.rs new file mode 100644 index 0000000..bf08b73 --- /dev/null +++ b/src-tauri/src/ffmpeg.rs @@ -0,0 +1,806 @@ +//! FFmpeg / FFprobe discovery, video metadata extraction, and ffmpeg downloading. + +use once_cell::sync::Lazy; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Duration; + +#[cfg(target_os = "windows")] +use std::os::windows::process::CommandExt; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/// CREATE_NO_WINDOW flag for Windows subprocess creation. +#[cfg(target_os = "windows")] +const CREATE_NO_WINDOW: u32 = 0x08000000; + +/// Timeout for ffprobe / ffmpeg subprocess calls. +const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(20); + +/// Timeout for ffprobe metadata calls (slightly longer for large files). +const METADATA_TIMEOUT: Duration = Duration::from_secs(25); + +/// FFmpeg download URL (Windows 64-bit GPL build). +const FFMPEG_DOWNLOAD_URL: &str = + "https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip"; + +// --------------------------------------------------------------------------- +// Regex patterns +// --------------------------------------------------------------------------- + +/// Matches "Duration: HH:MM:SS.ss" in ffmpeg stderr output. +static DURATION_RE: Lazy = + Lazy::new(|| Regex::new(r"Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)").unwrap()); + +// --------------------------------------------------------------------------- +// Structs +// --------------------------------------------------------------------------- + +/// Paths to discovered ffprobe and ffmpeg executables. +pub struct FfmpegPaths { + pub ffprobe: Option, + pub ffmpeg: Option, +} + +/// Detailed video metadata extracted via ffprobe. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct VideoMetadata { + pub v_codec: Option, + pub width: Option, + pub height: Option, + pub fps: Option, + pub v_bitrate: Option, + pub pix_fmt: Option, + pub color_space: Option, + pub a_codec: Option, + pub channels: Option, + pub sample_rate: Option, + pub a_bitrate: Option, + pub subtitle_tracks: Vec, + pub container_bitrate: Option, + pub duration: Option, + pub format_name: Option, + pub container_title: Option, + pub encoder: Option, +} + +impl Default for VideoMetadata { + fn default() -> Self { + Self { + v_codec: None, + width: None, + height: None, + fps: None, + v_bitrate: None, + pix_fmt: None, + color_space: None, + a_codec: None, + channels: None, + sample_rate: None, + a_bitrate: None, + subtitle_tracks: Vec::new(), + container_bitrate: None, + duration: None, + format_name: None, + container_title: None, + encoder: None, + } + } +} + +/// Information about a single subtitle track. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SubtitleTrack { + pub index: u32, + pub codec: String, + pub language: String, + pub title: String, +} + +/// Progress information emitted during ffmpeg download. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DownloadProgress { + pub percent: f64, + pub downloaded_bytes: u64, + pub total_bytes: u64, +} + +// --------------------------------------------------------------------------- +// 1. Platform-specific command setup +// --------------------------------------------------------------------------- + +/// Apply platform-specific flags to a `Command` (hide console window on Windows). +fn apply_no_window(_cmd: &mut Command) { + #[cfg(target_os = "windows")] + { + _cmd.creation_flags(CREATE_NO_WINDOW); + } +} + +// --------------------------------------------------------------------------- +// 2. discover +// --------------------------------------------------------------------------- + +/// Discover ffmpeg and ffprobe executables. +/// +/// Search order: +/// 1. System PATH via `which` +/// 2. Alongside the application executable (`exe_dir`) +/// 3. Inside `state_dir/ffmpeg/` +pub fn discover(exe_dir: &Path, state_dir: &Path) -> FfmpegPaths { + let ffprobe = discover_one("ffprobe", exe_dir, state_dir); + let ffmpeg = discover_one("ffmpeg", exe_dir, state_dir); + FfmpegPaths { ffprobe, ffmpeg } +} + +/// Discover a single executable by name. +fn discover_one(name: &str, exe_dir: &Path, state_dir: &Path) -> Option { + // 1. System PATH + if let Ok(p) = which::which(name) { + return Some(p); + } + + // Platform-specific executable name + let exe_name = if cfg!(target_os = "windows") { + format!("{}.exe", name) + } else { + name.to_string() + }; + + // 2. Beside the application executable + let candidate = exe_dir.join(&exe_name); + if candidate.is_file() { + return Some(candidate); + } + + // 3. Inside state_dir/ffmpeg/ + let candidate = state_dir.join("ffmpeg").join(&exe_name); + if candidate.is_file() { + return Some(candidate); + } + + None +} + +// --------------------------------------------------------------------------- +// 3. duration_seconds +// --------------------------------------------------------------------------- + +/// Get video duration in seconds using ffprobe (primary) or ffmpeg stderr (fallback). +/// +/// Returns `None` if neither method succeeds or duration is not positive. +pub fn duration_seconds(path: &Path, paths: &FfmpegPaths) -> Option { + // Try ffprobe first + if let Some(ref ffprobe) = paths.ffprobe { + if let Some(d) = duration_via_ffprobe(path, ffprobe) { + return Some(d); + } + } + + // Fallback: parse ffmpeg stderr + if let Some(ref ffmpeg) = paths.ffmpeg { + if let Some(d) = duration_via_ffmpeg(path, ffmpeg) { + return Some(d); + } + } + + None +} + +/// Extract duration using ffprobe's format=duration output. +fn duration_via_ffprobe(path: &Path, ffprobe: &Path) -> Option { + let mut cmd = Command::new(ffprobe); + cmd.args([ + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=nw=1:nk=1", + ]) + .arg(path) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()); + apply_no_window(&mut cmd); + + let child = cmd.spawn().ok()?; + let output = wait_with_timeout(child, SUBPROCESS_TIMEOUT)?; + let text = String::from_utf8_lossy(&output.stdout); + let trimmed = text.trim(); + if trimmed.is_empty() { + return None; + } + let d: f64 = trimmed.parse().ok()?; + if d > 0.0 { Some(d) } else { None } +} + +/// Extract duration by parsing "Duration: HH:MM:SS.ss" from ffmpeg stderr. +fn duration_via_ffmpeg(path: &Path, ffmpeg: &Path) -> Option { + let mut cmd = Command::new(ffmpeg); + cmd.args(["-hide_banner", "-i"]) + .arg(path) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()); + apply_no_window(&mut cmd); + + let child = cmd.spawn().ok()?; + let output = wait_with_timeout(child, SUBPROCESS_TIMEOUT)?; + let stderr = String::from_utf8_lossy(&output.stderr); + parse_ffmpeg_duration(&stderr) +} + +/// Parse "Duration: HH:MM:SS.ss" from ffmpeg stderr output. +fn parse_ffmpeg_duration(stderr: &str) -> Option { + let caps = DURATION_RE.captures(stderr)?; + let hh: f64 = caps.get(1)?.as_str().parse().ok()?; + let mm: f64 = caps.get(2)?.as_str().parse().ok()?; + let ss: f64 = caps.get(3)?.as_str().parse().ok()?; + let total = hh * 3600.0 + mm * 60.0 + ss; + if total > 0.0 { Some(total) } else { None } +} + +/// Wait for a child process with a timeout, killing it if exceeded. +fn wait_with_timeout( + child: std::process::Child, + timeout: Duration, +) -> Option { + // Convert Child into a form we can wait on with a timeout. + // std::process::Child::wait_with_output blocks, so we use a thread. + let (tx, rx) = std::sync::mpsc::channel(); + let handle = std::thread::spawn(move || { + let result = child.wait_with_output(); + let _ = tx.send(result); + }); + + match rx.recv_timeout(timeout) { + Ok(Ok(output)) => { + let _ = handle.join(); + Some(output) + } + _ => { + // Timeout or error -- the thread owns the child and will clean up + let _ = handle.join(); + None + } + } +} + +// --------------------------------------------------------------------------- +// 4. ffprobe_video_metadata +// --------------------------------------------------------------------------- + +/// Extract detailed video metadata using ffprobe JSON output. +/// +/// Runs ffprobe with `-print_format json -show_streams -show_format` and parses +/// the resulting JSON to populate a `VideoMetadata` struct. +pub fn ffprobe_video_metadata(path: &Path, ffprobe: &Path) -> Option { + let mut cmd = Command::new(ffprobe); + cmd.args([ + "-v", "error", + "-print_format", "json", + "-show_streams", "-show_format", + ]) + .arg(path) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()); + apply_no_window(&mut cmd); + + let child = cmd.spawn().ok()?; + let output = wait_with_timeout(child, METADATA_TIMEOUT)?; + let text = String::from_utf8_lossy(&output.stdout); + let data: serde_json::Value = serde_json::from_str(&text).ok()?; + + let streams = data.get("streams").and_then(|v| v.as_array()); + let fmt = data.get("format").and_then(|v| v.as_object()); + + let mut meta = VideoMetadata::default(); + let mut found_video = false; + let mut found_audio = false; + + // Iterate streams: first video, first audio, all subtitles + if let Some(streams) = streams { + for (idx, s) in streams.iter().enumerate() { + let obj = match s.as_object() { + Some(o) => o, + None => continue, + }; + let codec_type = obj + .get("codec_type") + .and_then(|v| v.as_str()) + .unwrap_or(""); + + match codec_type { + "video" if !found_video => { + found_video = true; + meta.v_codec = json_str(obj, "codec_name"); + meta.width = json_u32(obj, "width"); + meta.height = json_u32(obj, "height"); + meta.pix_fmt = json_str(obj, "pix_fmt"); + meta.color_space = json_str(obj, "color_space"); + + // Parse frame rate ("num/den") + let frame_rate = json_str(obj, "r_frame_rate") + .or_else(|| json_str(obj, "avg_frame_rate")); + if let Some(ref fr) = frame_rate { + meta.fps = parse_frame_rate(fr); + } + + // Video bitrate + meta.v_bitrate = json_str(obj, "bit_rate") + .and_then(|s| s.parse::().ok()); + } + "audio" if !found_audio => { + found_audio = true; + meta.a_codec = json_str(obj, "codec_name"); + meta.channels = json_u32(obj, "channels"); + meta.sample_rate = json_str(obj, "sample_rate"); + meta.a_bitrate = json_str(obj, "bit_rate") + .and_then(|s| s.parse::().ok()); + } + "subtitle" => { + let tags = obj + .get("tags") + .and_then(|v| v.as_object()); + + let language = tags + .and_then(|t| { + json_str_from(t, "language") + .or_else(|| json_str_from(t, "LANGUAGE")) + }) + .unwrap_or_default(); + + let title = tags + .and_then(|t| { + json_str_from(t, "title") + .or_else(|| json_str_from(t, "TITLE")) + }) + .unwrap_or_default(); + + let stream_index = obj + .get("index") + .and_then(|v| v.as_u64()) + .unwrap_or(idx as u64) as u32; + + let codec = json_str(obj, "codec_name") + .unwrap_or_else(|| "unknown".to_string()); + + meta.subtitle_tracks.push(SubtitleTrack { + index: stream_index, + codec, + language, + title, + }); + } + _ => {} + } + } + } + + // Format-level metadata + if let Some(fmt) = fmt { + meta.container_bitrate = json_str_from(fmt, "bit_rate") + .and_then(|s| s.parse::().ok()); + + meta.duration = json_str_from(fmt, "duration") + .and_then(|s| s.parse::().ok()); + + meta.format_name = json_str_from(fmt, "format_name"); + + // Container tags + if let Some(ftags) = fmt.get("tags").and_then(|v| v.as_object()) { + let ct = json_str_from(ftags, "title"); + if ct.as_deref().map_or(false, |s| !s.is_empty()) { + meta.container_title = ct; + } + let enc = json_str_from(ftags, "encoder"); + if enc.as_deref().map_or(false, |s| !s.is_empty()) { + meta.encoder = enc; + } + } + } + + // Return None if we extracted nothing useful + let has_data = meta.v_codec.is_some() + || meta.a_codec.is_some() + || !meta.subtitle_tracks.is_empty() + || meta.duration.is_some() + || meta.format_name.is_some(); + + if has_data { Some(meta) } else { None } +} + +// --------------------------------------------------------------------------- +// JSON helper functions +// --------------------------------------------------------------------------- + +/// Extract a string value from a JSON object by key. +fn json_str(obj: &serde_json::Map, key: &str) -> Option { + json_str_from(obj, key) +} + +/// Extract a string value from a JSON map by key. +fn json_str_from( + obj: &serde_json::Map, + key: &str, +) -> Option { + obj.get(key).and_then(|v| match v { + serde_json::Value::String(s) => Some(s.clone()), + serde_json::Value::Number(n) => Some(n.to_string()), + _ => None, + }) +} + +/// Extract a u32 value from a JSON object by key. +fn json_u32(obj: &serde_json::Map, key: &str) -> Option { + obj.get(key).and_then(|v| v.as_u64()).map(|n| n as u32) +} + +/// Parse a frame rate string like "30000/1001" into an f64. +fn parse_frame_rate(fr: &str) -> Option { + if let Some((num_str, den_str)) = fr.split_once('/') { + let num: f64 = num_str.trim().parse().ok()?; + let den: f64 = den_str.trim().parse().ok()?; + if den == 0.0 { + return None; + } + let fps = num / den; + if fps > 0.0 { Some(fps) } else { None } + } else { + // Plain number + let fps: f64 = fr.trim().parse().ok()?; + if fps > 0.0 { Some(fps) } else { None } + } +} + +// --------------------------------------------------------------------------- +// 5. download_ffmpeg (async) +// --------------------------------------------------------------------------- + +/// Download ffmpeg from GitHub, extract `ffmpeg.exe` and `ffprobe.exe`, and +/// place them in `state_dir/ffmpeg/`. +/// +/// Reports progress via the provided `tokio::sync::mpsc::Sender`. +pub async fn download_ffmpeg( + state_dir: &Path, + progress_tx: tokio::sync::mpsc::Sender, +) -> Result { + let dest_dir = state_dir.join("ffmpeg"); + std::fs::create_dir_all(&dest_dir) + .map_err(|e| format!("Failed to create ffmpeg directory: {}", e))?; + + let zip_path = dest_dir.join("ffmpeg-download.zip"); + + // Start the download + let client = reqwest::Client::new(); + let response = client + .get(FFMPEG_DOWNLOAD_URL) + .send() + .await + .map_err(|e| format!("Failed to start download: {}", e))?; + + if !response.status().is_success() { + return Err(format!("Download failed with status: {}", response.status())); + } + + let total_bytes = response.content_length().unwrap_or(0); + let mut downloaded_bytes: u64 = 0; + + // Stream the response body to disk chunk by chunk using reqwest's chunk() + let mut file = std::fs::File::create(&zip_path) + .map_err(|e| format!("Failed to create zip file: {}", e))?; + + let mut response = response; + while let Some(chunk) = response + .chunk() + .await + .map_err(|e| format!("Download stream error: {}", e))? + { + std::io::Write::write_all(&mut file, &chunk) + .map_err(|e| format!("Failed to write chunk: {}", e))?; + + downloaded_bytes += chunk.len() as u64; + + let percent = if total_bytes > 0 { + (downloaded_bytes as f64 / total_bytes as f64) * 100.0 + } else { + 0.0 + }; + + // Send progress update (ignore send errors if receiver dropped) + let _ = progress_tx + .send(DownloadProgress { + percent, + downloaded_bytes, + total_bytes, + }) + .await; + } + drop(file); + + // Extract ffmpeg.exe and ffprobe.exe from the zip + extract_ffmpeg_from_zip(&zip_path, &dest_dir)?; + + // Clean up the zip file + std::fs::remove_file(&zip_path).ok(); + + // Verify the extracted files exist + let ffmpeg_exe = if cfg!(target_os = "windows") { + "ffmpeg.exe" + } else { + "ffmpeg" + }; + let ffprobe_exe = if cfg!(target_os = "windows") { + "ffprobe.exe" + } else { + "ffprobe" + }; + + let ffmpeg_path = dest_dir.join(ffmpeg_exe); + let ffprobe_path = dest_dir.join(ffprobe_exe); + + Ok(FfmpegPaths { + ffprobe: if ffprobe_path.is_file() { + Some(ffprobe_path) + } else { + None + }, + ffmpeg: if ffmpeg_path.is_file() { + Some(ffmpeg_path) + } else { + None + }, + }) +} + +/// Extract only `ffmpeg.exe` and `ffprobe.exe` from a downloaded zip archive. +/// +/// The BtbN builds have files nested inside a directory like +/// `ffmpeg-master-latest-win64-gpl/bin/ffmpeg.exe`, so we search for any entry +/// whose filename ends with the target name. +fn extract_ffmpeg_from_zip(zip_path: &Path, dest_dir: &Path) -> Result<(), String> { + let file = std::fs::File::open(zip_path) + .map_err(|e| format!("Failed to open zip: {}", e))?; + let mut archive = zip::ZipArchive::new(file) + .map_err(|e| format!("Failed to read zip archive: {}", e))?; + + let targets: &[&str] = if cfg!(target_os = "windows") { + &["ffmpeg.exe", "ffprobe.exe"] + } else { + &["ffmpeg", "ffprobe"] + }; + + for i in 0..archive.len() { + let mut entry = archive + .by_index(i) + .map_err(|e| format!("Failed to read zip entry {}: {}", i, e))?; + + let entry_name = entry.name().to_string(); + + // Check if this entry matches one of our target filenames + for target in targets { + if entry_name.ends_with(&format!("/{}", target)) + || entry_name == *target + { + let out_path = dest_dir.join(target); + let mut out_file = std::fs::File::create(&out_path) + .map_err(|e| format!("Failed to create {}: {}", target, e))?; + std::io::copy(&mut entry, &mut out_file) + .map_err(|e| format!("Failed to extract {}: {}", target, e))?; + break; + } + } + } + + Ok(()) +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + + // -- parse_ffmpeg_duration ----------------------------------------------- + + #[test] + fn test_parse_ffmpeg_duration_standard() { + let stderr = r#" +Input #0, matroska,webm, from 'video.mkv': + Duration: 01:23:45.67, start: 0.000000, bitrate: 5000 kb/s + Stream #0:0: Video: h264 +"#; + let d = parse_ffmpeg_duration(stderr).unwrap(); + let expected = 1.0 * 3600.0 + 23.0 * 60.0 + 45.67; + assert!((d - expected).abs() < 0.001, "got {}, expected {}", d, expected); + } + + #[test] + fn test_parse_ffmpeg_duration_short_video() { + let stderr = " Duration: 00:00:30.50, start: 0.000000, bitrate: 1200 kb/s\n"; + let d = parse_ffmpeg_duration(stderr).unwrap(); + assert!((d - 30.5).abs() < 0.001); + } + + #[test] + fn test_parse_ffmpeg_duration_whole_seconds() { + let stderr = " Duration: 00:05:00.00, start: 0.0\n"; + let d = parse_ffmpeg_duration(stderr).unwrap(); + assert!((d - 300.0).abs() < 0.001); + } + + #[test] + fn test_parse_ffmpeg_duration_zero() { + // Zero duration should return None (not positive) + let stderr = " Duration: 00:00:00.00, start: 0.0\n"; + assert!(parse_ffmpeg_duration(stderr).is_none()); + } + + #[test] + fn test_parse_ffmpeg_duration_no_match() { + let stderr = "some random output without duration info\n"; + assert!(parse_ffmpeg_duration(stderr).is_none()); + } + + #[test] + fn test_parse_ffmpeg_duration_empty() { + assert!(parse_ffmpeg_duration("").is_none()); + } + + // -- discover_not_found -------------------------------------------------- + + #[test] + fn test_discover_not_found() { + // Use non-existent directories -- should return None for both paths + let exe_dir = Path::new("/nonexistent/path/that/does/not/exist/exe"); + let state_dir = Path::new("/nonexistent/path/that/does/not/exist/state"); + let paths = discover(exe_dir, state_dir); + + // On a system without ffmpeg in PATH these will be None; + // on a system with ffmpeg installed they may be Some. + // We simply verify the function does not panic. + let _ = paths.ffprobe; + let _ = paths.ffmpeg; + } + + // -- video_metadata_default ---------------------------------------------- + + #[test] + fn test_video_metadata_default() { + let meta = VideoMetadata::default(); + assert!(meta.v_codec.is_none()); + assert!(meta.width.is_none()); + assert!(meta.height.is_none()); + assert!(meta.fps.is_none()); + assert!(meta.v_bitrate.is_none()); + assert!(meta.pix_fmt.is_none()); + assert!(meta.color_space.is_none()); + assert!(meta.a_codec.is_none()); + assert!(meta.channels.is_none()); + assert!(meta.sample_rate.is_none()); + assert!(meta.a_bitrate.is_none()); + assert!(meta.subtitle_tracks.is_empty()); + assert!(meta.container_bitrate.is_none()); + assert!(meta.duration.is_none()); + assert!(meta.format_name.is_none()); + assert!(meta.container_title.is_none()); + assert!(meta.encoder.is_none()); + } + + // -- download_progress_serialization ------------------------------------- + + #[test] + fn test_download_progress_serialization() { + let progress = DownloadProgress { + percent: 50.5, + downloaded_bytes: 1024, + total_bytes: 2048, + }; + + let json = serde_json::to_string(&progress).unwrap(); + let parsed: DownloadProgress = serde_json::from_str(&json).unwrap(); + + assert!((parsed.percent - 50.5).abs() < f64::EPSILON); + assert_eq!(parsed.downloaded_bytes, 1024); + assert_eq!(parsed.total_bytes, 2048); + } + + #[test] + fn test_download_progress_json_fields() { + let progress = DownloadProgress { + percent: 100.0, + downloaded_bytes: 5000, + total_bytes: 5000, + }; + + let value: serde_json::Value = serde_json::to_value(&progress).unwrap(); + assert_eq!(value["percent"], 100.0); + assert_eq!(value["downloaded_bytes"], 5000); + assert_eq!(value["total_bytes"], 5000); + } + + // -- parse_frame_rate ---------------------------------------------------- + + #[test] + fn test_parse_frame_rate_fraction() { + let fps = parse_frame_rate("30000/1001").unwrap(); + assert!((fps - 29.97).abs() < 0.01); + } + + #[test] + fn test_parse_frame_rate_integer() { + let fps = parse_frame_rate("24/1").unwrap(); + assert!((fps - 24.0).abs() < 0.001); + } + + #[test] + fn test_parse_frame_rate_zero_denominator() { + assert!(parse_frame_rate("24/0").is_none()); + } + + #[test] + fn test_parse_frame_rate_plain_number() { + let fps = parse_frame_rate("60").unwrap(); + assert!((fps - 60.0).abs() < 0.001); + } + + // -- subtitle_track_serialization ---------------------------------------- + + #[test] + fn test_subtitle_track_serialization() { + let track = SubtitleTrack { + index: 2, + codec: "srt".to_string(), + language: "eng".to_string(), + title: "English".to_string(), + }; + + let json = serde_json::to_string(&track).unwrap(); + let parsed: SubtitleTrack = serde_json::from_str(&json).unwrap(); + + assert_eq!(parsed.index, 2); + assert_eq!(parsed.codec, "srt"); + assert_eq!(parsed.language, "eng"); + assert_eq!(parsed.title, "English"); + } + + // -- integration tests (require actual ffprobe/ffmpeg) ------------------- + + #[test] + #[ignore] + fn test_duration_seconds_with_real_ffprobe() { + // This test requires ffprobe and ffmpeg to be installed and a sample + // video file at the given path. + let exe_dir = Path::new("."); + let state_dir = Path::new("."); + let paths = discover(exe_dir, state_dir); + + if paths.ffprobe.is_none() && paths.ffmpeg.is_none() { + eprintln!("Skipping: no ffprobe or ffmpeg found"); + return; + } + + // Would need a real video file here + // let d = duration_seconds(Path::new("sample.mp4"), &paths); + // assert!(d.is_some()); + } + + #[test] + #[ignore] + fn test_ffprobe_video_metadata_with_real_ffprobe() { + let exe_dir = Path::new("."); + let state_dir = Path::new("."); + let paths = discover(exe_dir, state_dir); + + if let Some(ref ffprobe) = paths.ffprobe { + // Would need a real video file here + // let meta = ffprobe_video_metadata(Path::new("sample.mp4"), ffprobe); + // assert!(meta.is_some()); + let _ = ffprobe; + } else { + eprintln!("Skipping: no ffprobe found"); + } + } +} diff --git a/src-tauri/src/fonts.rs b/src-tauri/src/fonts.rs new file mode 100644 index 0000000..c32b000 --- /dev/null +++ b/src-tauri/src/fonts.rs @@ -0,0 +1,614 @@ +use once_cell::sync::Lazy; +use regex::Regex; +use serde_json::json; +use sha2::{Digest, Sha256}; +use std::collections::HashMap; +use std::fs; +use std::path::Path; +use std::time::SystemTime; + +use crate::state::{atomic_write_json, load_json_with_fallbacks, BACKUP_COUNT}; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/// Current version for Google Fonts metadata (fonts_meta.json). +const GOOGLE_FONTS_META_VERSION: u64 = 7; + +/// Current version for Font Awesome metadata (fa_meta.json). +const FA_META_VERSION: u64 = 3; + +/// User-Agent header value for HTTP requests. +/// Google Fonts API returns different CSS based on User-Agent; we want woff2. +const USER_AGENT: &str = "Mozilla/5.0"; + +/// Google Fonts CSS URLs. +const GOOGLE_FONT_URLS: &[(&str, &str)] = &[ + ( + "Sora", + "https://fonts.googleapis.com/css2?family=Sora:wght@500;600;700;800&display=swap", + ), + ( + "Manrope", + "https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700;800&display=swap", + ), + ( + "IBM Plex Mono", + "https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&display=swap", + ), +]; + +/// Font Awesome CSS URL. +const FA_CSS_URL: &str = + "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css"; + +/// Base URL for resolving relative Font Awesome webfont URLs. +const FA_WEBFONTS_BASE: &str = + "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/"; + +// --------------------------------------------------------------------------- +// Compiled regex patterns +// --------------------------------------------------------------------------- + +/// Regex for extracting woff2 font URLs from Google Fonts CSS. +static GOOGLE_FONT_URL_RE: Lazy = + Lazy::new(|| Regex::new(r#"url\(([^)]+)\)\s*format\(['"]woff2['"]\)"#).unwrap()); + +/// Regex for extracting all url(...) references from Font Awesome CSS. +static FA_URL_RE: Lazy = Lazy::new(|| Regex::new(r"url\(([^)]+)\)").unwrap()); + +// --------------------------------------------------------------------------- +// 1. safe_filename_from_url +// --------------------------------------------------------------------------- + +/// Generate a safe local filename from a URL using SHA-256 hash for uniqueness. +/// +/// The filename is `{stem}-{hash}{suffix}` where `hash` is the first 10 hex +/// characters of the SHA-256 digest of the full URL. If the URL path has no +/// extension, `.woff2` is appended. +pub fn safe_filename_from_url(url: &str) -> String { + // Extract the last path component from the URL + let base = url + .split('?') + .next() + .unwrap_or(url) + .split('#') + .next() + .unwrap_or(url) + .rsplit('/') + .next() + .unwrap_or("font.woff2"); + + let base = if base.is_empty() { "font.woff2" } else { base }; + + // Ensure the base has an extension + let base = if !base.contains('.') { + format!("{}.woff2", base) + } else { + base.to_string() + }; + + // Split into stem and suffix + let (stem, suffix) = match base.rfind('.') { + Some(pos) => (&base[..pos], &base[pos..]), + None => (base.as_str(), ".woff2"), + }; + + // Compute SHA-256 hash of the full URL + let mut hasher = Sha256::new(); + hasher.update(url.as_bytes()); + let digest = format!("{:x}", hasher.finalize()); + let url_hash = &digest[..10]; + + format!("{}-{}{}", stem, url_hash, suffix) +} + +// --------------------------------------------------------------------------- +// 2. ensure_google_fonts_local +// --------------------------------------------------------------------------- + +/// Download and cache Google Fonts (Sora, Manrope, IBM Plex Mono) locally. +/// +/// The `fonts_dir` is the directory where `fonts.css`, `fonts_meta.json`, and +/// individual `.woff2` files are stored. +/// +/// If already cached (version matches, ok=true, CSS file exists), this is a +/// no-op. Otherwise, downloads each font family's CSS from the Google Fonts +/// API, extracts woff2 URLs, downloads each font file, rewrites the CSS to +/// use local paths, and writes the combined CSS and metadata. +pub async fn ensure_google_fonts_local(fonts_dir: &Path) -> Result<(), String> { + fs::create_dir_all(fonts_dir).map_err(|e| format!("Failed to create fonts dir: {}", e))?; + + let meta_path = fonts_dir.join("fonts_meta.json"); + let css_path = fonts_dir.join("fonts.css"); + + // Check if already cached + if let Some(meta) = load_json_with_fallbacks(&meta_path, BACKUP_COUNT) { + if let Some(obj) = meta.as_object() { + let version_ok = obj + .get("version") + .and_then(|v| v.as_u64()) + .map(|v| v == GOOGLE_FONTS_META_VERSION) + .unwrap_or(false); + let ok_flag = obj + .get("ok") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + if version_ok && ok_flag && css_path.exists() { + return Ok(()); + } + } + } + + let client = reqwest::Client::builder() + .user_agent(USER_AGENT) + .build() + .map_err(|e| format!("Failed to build HTTP client: {}", e))?; + + let mut all_css_parts: Vec = Vec::new(); + let mut downloaded_files: Vec = Vec::new(); + let mut errors: Vec = Vec::new(); + + for (family, css_url) in GOOGLE_FONT_URLS { + // Download the CSS for this font family + let css_text = match client.get(*css_url).send().await { + Ok(resp) => match resp.text().await { + Ok(text) => text, + Err(e) => { + errors.push(format!("Failed to read CSS for {}: {}", family, e)); + continue; + } + }, + Err(e) => { + errors.push(format!("Failed to download CSS for {}: {}", family, e)); + continue; + } + }; + + // Find all woff2 url(...) references and download each font file + let mut rewritten_css = css_text.clone(); + let mut replacements: Vec<(String, String)> = Vec::new(); + + for cap in GOOGLE_FONT_URL_RE.captures_iter(&css_text) { + let raw_url = cap[1].trim().trim_matches('\'').trim_matches('"'); + + let safe_name = safe_filename_from_url(raw_url); + let local_path = fonts_dir.join(&safe_name); + + // Download the font file + match client.get(raw_url).send().await { + Ok(resp) => match resp.bytes().await { + Ok(bytes) => { + if let Err(e) = fs::write(&local_path, &bytes) { + errors.push(format!("Failed to write {}: {}", safe_name, e)); + continue; + } + downloaded_files.push(safe_name.clone()); + } + Err(e) => { + errors.push(format!("Failed to read bytes for {}: {}", safe_name, e)); + continue; + } + }, + Err(e) => { + errors.push(format!("Failed to download {}: {}", raw_url, e)); + continue; + } + } + + // Record the replacement: original url(...) content -> local path + let replacement_url = format!("/fonts/{}", safe_name); + replacements.push((cap[1].to_string(), replacement_url)); + } + + // Apply all URL replacements to the CSS + for (original, replacement) in &replacements { + let old = format!("url({}) format", original); + let new = format!("url({}) format", replacement); + rewritten_css = rewritten_css.replace(&old, &new); + } + + all_css_parts.push(rewritten_css); + } + + // Write combined CSS + let combined_css = all_css_parts.join("\n"); + fs::write(&css_path, &combined_css) + .map_err(|e| format!("Failed to write fonts.css: {}", e))?; + + // Write metadata + let timestamp = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + + let ok = errors.is_empty(); + let meta = json!({ + "version": GOOGLE_FONTS_META_VERSION, + "ok": ok, + "timestamp": timestamp, + "downloaded": downloaded_files, + "errors": errors, + }); + + atomic_write_json(&meta_path, &meta, BACKUP_COUNT); + + if ok { + Ok(()) + } else { + Err(format!( + "Google Fonts download completed with errors: {}", + errors.join("; ") + )) + } +} + +// --------------------------------------------------------------------------- +// 3. ensure_fontawesome_local +// --------------------------------------------------------------------------- + +/// Clean and resolve a Font Awesome URL reference. +/// +/// Strips whitespace and quotes, resolves relative URLs against the FA +/// webfonts base URL. Returns the URL unchanged if it is a `data:` URI. +fn clean_fa_url(u: &str) -> String { + let u = u.trim().trim_matches('\'').trim_matches('"'); + + if u.starts_with("data:") { + return u.to_string(); + } + if u.starts_with("//") { + return format!("https:{}", u); + } + if u.starts_with("http://") || u.starts_with("https://") { + return u.to_string(); + } + + // Relative URL: strip leading "./" and "../" then join with base + let cleaned = u + .trim_start_matches("./") + .replace("../", ""); + format!("{}{}", FA_WEBFONTS_BASE, cleaned) +} + +/// Download and cache Font Awesome 6.5.2 locally. +/// +/// The `fa_dir` is the directory where `fa.css` and `fa_meta.json` live. +/// The `fa_dir/webfonts/` subdirectory holds individual webfont files. +/// +/// If already cached (version matches, ok=true, CSS file exists), this is a +/// no-op. Otherwise, downloads the Font Awesome CSS, extracts all `url(...)` +/// references, downloads each font file (skipping `data:` URIs), rewrites +/// the CSS to use local paths, and writes the CSS and metadata. +pub async fn ensure_fontawesome_local(fa_dir: &Path) -> Result<(), String> { + fs::create_dir_all(fa_dir).map_err(|e| format!("Failed to create fa dir: {}", e))?; + + let webfonts_dir = fa_dir.join("webfonts"); + fs::create_dir_all(&webfonts_dir) + .map_err(|e| format!("Failed to create webfonts dir: {}", e))?; + + let meta_path = fa_dir.join("fa_meta.json"); + let css_path = fa_dir.join("fa.css"); + + // Check if already cached + if let Some(meta) = load_json_with_fallbacks(&meta_path, BACKUP_COUNT) { + if let Some(obj) = meta.as_object() { + let version_ok = obj + .get("version") + .and_then(|v| v.as_u64()) + .map(|v| v == FA_META_VERSION) + .unwrap_or(false); + let ok_flag = obj + .get("ok") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + if version_ok && ok_flag && css_path.exists() { + return Ok(()); + } + } + } + + let client = reqwest::Client::builder() + .user_agent(USER_AGENT) + .build() + .map_err(|e| format!("Failed to build HTTP client: {}", e))?; + + // Download the Font Awesome CSS + let css_text = client + .get(FA_CSS_URL) + .send() + .await + .map_err(|e| format!("Failed to download FA CSS: {}", e))? + .text() + .await + .map_err(|e| format!("Failed to read FA CSS: {}", e))?; + + let mut downloaded_files: Vec = Vec::new(); + let mut errors: Vec = Vec::new(); + let mut replacements: HashMap = HashMap::new(); + + for cap in FA_URL_RE.captures_iter(&css_text) { + let raw_url = &cap[1]; + let resolved = clean_fa_url(raw_url); + + // Skip data: URIs + if resolved.starts_with("data:") { + continue; + } + + // Determine the filename from the resolved URL + let filename = resolved + .split('?') + .next() + .unwrap_or(&resolved) + .split('#') + .next() + .unwrap_or(&resolved) + .rsplit('/') + .next() + .unwrap_or("font.woff2") + .to_string(); + + if filename.is_empty() { + continue; + } + + let local_path = webfonts_dir.join(&filename); + + // Only download each file once + if !replacements.contains_key(raw_url) { + match client.get(&resolved).send().await { + Ok(resp) => match resp.bytes().await { + Ok(bytes) => { + if let Err(e) = fs::write(&local_path, &bytes) { + errors.push(format!("Failed to write {}: {}", filename, e)); + continue; + } + downloaded_files.push(filename.clone()); + } + Err(e) => { + errors.push(format!("Failed to read bytes for {}: {}", filename, e)); + continue; + } + }, + Err(e) => { + errors.push(format!("Failed to download {}: {}", resolved, e)); + continue; + } + } + + let replacement = format!("/fa/webfonts/{}", filename); + replacements.insert(raw_url.to_string(), replacement); + } + } + + // Rewrite CSS with local paths + let mut rewritten_css = css_text.clone(); + for (original, replacement) in &replacements { + let old = format!("url({})", original); + let new = format!("url({})", replacement); + rewritten_css = rewritten_css.replace(&old, &new); + } + + // Write rewritten CSS + fs::write(&css_path, &rewritten_css) + .map_err(|e| format!("Failed to write fa.css: {}", e))?; + + // Write metadata + let timestamp = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + + let ok = errors.is_empty(); + let meta = json!({ + "version": FA_META_VERSION, + "ok": ok, + "timestamp": timestamp, + "downloaded": downloaded_files, + "errors": errors, + }); + + atomic_write_json(&meta_path, &meta, BACKUP_COUNT); + + if ok { + Ok(()) + } else { + Err(format!( + "Font Awesome download completed with errors: {}", + errors.join("; ") + )) + } +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + + // -- safe_filename_from_url ----------------------------------------------- + + #[test] + fn test_safe_filename_from_url_basic() { + let url = "https://fonts.gstatic.com/s/sora/v12/abc123.woff2"; + let result = safe_filename_from_url(url); + + // Should contain the original stem + assert!(result.starts_with("abc123-")); + // Should end with .woff2 + assert!(result.ends_with(".woff2")); + // Should contain a 10-char hash between stem and extension + let parts: Vec<&str> = result.rsplitn(2, '.').collect(); + let before_ext = parts[1]; // "abc123-{hash}" + let hash_part = before_ext.rsplit('-').next().unwrap(); + assert_eq!(hash_part.len(), 10); + } + + #[test] + fn test_safe_filename_from_url_no_extension() { + let url = "https://example.com/fontfile"; + let result = safe_filename_from_url(url); + + // Should have .woff2 appended + assert!(result.ends_with(".woff2")); + assert!(result.starts_with("fontfile-")); + } + + #[test] + fn test_safe_filename_from_url_deterministic() { + let url = "https://fonts.gstatic.com/s/sora/v12/abc.woff2"; + let result1 = safe_filename_from_url(url); + let result2 = safe_filename_from_url(url); + assert_eq!(result1, result2); + } + + #[test] + fn test_safe_filename_different_urls() { + let url1 = "https://fonts.gstatic.com/s/sora/v12/abc.woff2"; + let url2 = "https://fonts.gstatic.com/s/manrope/v14/def.woff2"; + let result1 = safe_filename_from_url(url1); + let result2 = safe_filename_from_url(url2); + assert_ne!(result1, result2); + } + + // -- clean_fa_url --------------------------------------------------------- + + #[test] + fn test_clean_fa_url_data() { + let result = clean_fa_url("data:font/woff2;base64,abc"); + assert_eq!(result, "data:font/woff2;base64,abc"); + } + + #[test] + fn test_clean_fa_url_protocol_relative() { + let result = clean_fa_url("//example.com/font.woff2"); + assert_eq!(result, "https://example.com/font.woff2"); + } + + #[test] + fn test_clean_fa_url_absolute() { + let result = clean_fa_url("https://example.com/font.woff2"); + assert_eq!(result, "https://example.com/font.woff2"); + } + + #[test] + fn test_clean_fa_url_relative() { + let result = clean_fa_url("../webfonts/fa-solid-900.woff2"); + assert_eq!( + result, + "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/webfonts/fa-solid-900.woff2" + ); + } + + #[test] + fn test_clean_fa_url_relative_dot_slash() { + let result = clean_fa_url("./webfonts/fa-solid-900.woff2"); + assert_eq!( + result, + "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/webfonts/fa-solid-900.woff2" + ); + } + + #[test] + fn test_clean_fa_url_strips_quotes() { + let result = clean_fa_url("'https://example.com/font.woff2'"); + assert_eq!(result, "https://example.com/font.woff2"); + } + + // -- Integration tests (require network) ---------------------------------- + + #[tokio::test] + #[ignore] + async fn test_google_fonts_download() { + let dir = tempfile::tempdir().unwrap(); + let fonts_dir = dir.path().join("fonts"); + + let result = ensure_google_fonts_local(&fonts_dir).await; + assert!(result.is_ok(), "Google Fonts download failed: {:?}", result); + + // Verify fonts.css was created + let css_path = fonts_dir.join("fonts.css"); + assert!(css_path.exists(), "fonts.css should exist"); + + let css_content = fs::read_to_string(&css_path).unwrap(); + assert!(!css_content.is_empty(), "fonts.css should not be empty"); + // CSS should contain rewritten local paths + assert!( + css_content.contains("/fonts/"), + "CSS should contain /fonts/ local paths" + ); + + // Verify metadata was created + let meta_path = fonts_dir.join("fonts_meta.json"); + assert!(meta_path.exists(), "fonts_meta.json should exist"); + + let meta = load_json_with_fallbacks(&meta_path, BACKUP_COUNT).unwrap(); + assert_eq!(meta["version"], GOOGLE_FONTS_META_VERSION); + assert_eq!(meta["ok"], true); + assert!( + meta["downloaded"].as_array().unwrap().len() > 0, + "Should have downloaded at least one font file" + ); + + // Second call should be a no-op (cached) + let result2 = ensure_google_fonts_local(&fonts_dir).await; + assert!(result2.is_ok()); + } + + #[tokio::test] + #[ignore] + async fn test_fontawesome_download() { + let dir = tempfile::tempdir().unwrap(); + let fa_dir = dir.path().join("fa"); + + let result = ensure_fontawesome_local(&fa_dir).await; + assert!( + result.is_ok(), + "Font Awesome download failed: {:?}", + result + ); + + // Verify fa.css was created + let css_path = fa_dir.join("fa.css"); + assert!(css_path.exists(), "fa.css should exist"); + + let css_content = fs::read_to_string(&css_path).unwrap(); + assert!(!css_content.is_empty(), "fa.css should not be empty"); + // CSS should contain rewritten local paths + assert!( + css_content.contains("/fa/webfonts/"), + "CSS should contain /fa/webfonts/ local paths" + ); + + // Verify webfonts directory has files + let webfonts_dir = fa_dir.join("webfonts"); + assert!(webfonts_dir.exists(), "webfonts dir should exist"); + + let webfont_files: Vec<_> = fs::read_dir(&webfonts_dir) + .unwrap() + .filter_map(|e| e.ok()) + .collect(); + assert!( + !webfont_files.is_empty(), + "Should have downloaded at least one webfont file" + ); + + // Verify metadata was created + let meta_path = fa_dir.join("fa_meta.json"); + assert!(meta_path.exists(), "fa_meta.json should exist"); + + let meta = load_json_with_fallbacks(&meta_path, BACKUP_COUNT).unwrap(); + assert_eq!(meta["version"], FA_META_VERSION); + assert_eq!(meta["ok"], true); + + // Second call should be a no-op (cached) + let result2 = ensure_fontawesome_local(&fa_dir).await; + assert!(result2.is_ok()); + } +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 06265f2..23d94d5 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -1,6 +1,9 @@ +pub mod ffmpeg; +pub mod fonts; pub mod prefs; pub mod recents; pub mod state; +pub mod subtitles; pub mod utils; #[cfg_attr(mobile, tauri::mobile_entry_point)] diff --git a/src-tauri/src/subtitles.rs b/src-tauri/src/subtitles.rs new file mode 100644 index 0000000..e3190b8 --- /dev/null +++ b/src-tauri/src/subtitles.rs @@ -0,0 +1,652 @@ +//! Subtitle handling: SRT-to-VTT conversion, sidecar discovery, storage, +//! and embedded subtitle extraction via ffmpeg. + +use once_cell::sync::Lazy; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +#[cfg(target_os = "windows")] +use std::os::windows::process::CommandExt; + +// --------------------------------------------------------------------------- +// Constants +// --------------------------------------------------------------------------- + +/// Supported subtitle file extensions. +pub const SUB_EXTS: &[&str] = &[".srt", ".vtt"]; + +/// Languages considered "English" for sidecar priority. +const ENGLISH_LANGS: &[&str] = &["en", "eng", "english"]; + +/// All language suffixes to strip when normalizing subtitle basenames. +const ALL_LANG_SUFFIXES: &[&str] = &[ + "en", "eng", "english", "fr", "de", "es", "it", "pt", "ru", "ja", "ko", "zh", +]; + +/// Windows CREATE_NO_WINDOW flag for subprocess creation. +#[cfg(target_os = "windows")] +const CREATE_NO_WINDOW: u32 = 0x08000000; + +// --------------------------------------------------------------------------- +// Structs +// --------------------------------------------------------------------------- + +/// Result of storing a subtitle file for a video. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SubtitleStored { + /// Relative path like `"subtitles/{fid}_{name}.vtt"`. + pub vtt: String, + /// Display label (source filename). + pub label: String, +} + +// --------------------------------------------------------------------------- +// Compiled regex patterns +// --------------------------------------------------------------------------- + +/// Matches a line that is only digits (SRT cue index). +static CUE_INDEX_RE: Lazy = Lazy::new(|| Regex::new(r"^\d+$").unwrap()); + +/// Matches characters that are NOT alphanumeric, dot, underscore, or hyphen. +static SANITIZE_RE: Lazy = Lazy::new(|| Regex::new(r"[^a-zA-Z0-9._\-]").unwrap()); + +/// Collapses runs of whitespace and dash/underscore into a single space for +/// normalized comparison of subtitle stems. +static NORMALIZE_SEP_RE: Lazy = Lazy::new(|| Regex::new(r"[-_\s]+").unwrap()); + +// --------------------------------------------------------------------------- +// 1. srt_to_vtt +// --------------------------------------------------------------------------- + +/// Convert SRT subtitle text to WebVTT format string. +/// +/// - Removes BOM (`\u{FEFF}`) if present. +/// - Adds the `WEBVTT` header. +/// - Skips cue index numbers (lines that are just digits). +/// - Converts timestamp separators: comma → dot. +/// - Collects subtitle text between timestamp lines and empty lines. +pub fn srt_to_vtt(srt_text: &str) -> String { + let text = srt_text.replace('\u{FEFF}', ""); + let lines: Vec<&str> = text.lines().collect(); + let mut out: Vec = vec!["WEBVTT".to_string(), String::new()]; + + let mut i = 0; + while i < lines.len() { + let line = lines[i].trim_end_matches('\r'); + + // Empty line → blank line in output + if line.trim().is_empty() { + out.push(String::new()); + i += 1; + continue; + } + + // Skip cue index (pure digit line) + if CUE_INDEX_RE.is_match(line.trim()) { + i += 1; + if i >= lines.len() { + break; + } + // Re-read the next line as a potential timestamp + let line = lines[i].trim_end_matches('\r'); + + if line.contains("-->") { + let ts_line = line.replace(',', "."); + out.push(ts_line); + i += 1; + // Collect subtitle text until blank line + while i < lines.len() { + let t = lines[i].trim_end_matches('\r'); + if t.trim().is_empty() { + out.push(String::new()); + i += 1; + break; + } + out.push(t.to_string()); + i += 1; + } + } else { + i += 1; + } + } else if line.contains("-->") { + // Timestamp line without preceding cue index + let ts_line = line.replace(',', "."); + out.push(ts_line); + i += 1; + while i < lines.len() { + let t = lines[i].trim_end_matches('\r'); + if t.trim().is_empty() { + out.push(String::new()); + i += 1; + break; + } + out.push(t.to_string()); + i += 1; + } + } else { + i += 1; + } + } + + let joined = out.join("\n"); + format!("{}\n", joined.trim()) +} + +// --------------------------------------------------------------------------- +// 2. auto_subtitle_sidecar (helpers) +// --------------------------------------------------------------------------- + +/// Normalize a string for fuzzy subtitle matching: lowercase, replace `-` and +/// `_` with space, collapse whitespace. +fn normalize_stem(s: &str) -> String { + let lower = s.to_lowercase(); + let replaced = NORMALIZE_SEP_RE.replace_all(&lower, " "); + replaced.trim().to_string() +} + +/// Strip a trailing language suffix from a subtitle stem. +/// +/// For example, `"video.en"` → `Some(("video", "en"))`. +/// Returns `None` if no known language suffix is found. +fn strip_lang_suffix(stem: &str) -> Option<(String, String)> { + if let Some(dot_pos) = stem.rfind('.') { + let base = &stem[..dot_pos]; + let suffix = &stem[dot_pos + 1..]; + let suffix_lower = suffix.to_lowercase(); + if ALL_LANG_SUFFIXES.contains(&suffix_lower.as_str()) { + return Some((base.to_string(), suffix_lower)); + } + } + None +} + +/// Find a subtitle sidecar file matching the given video path. +/// +/// Returns the best matching subtitle file path, or `None`. +/// +/// Priority (lower is better): +/// - 0: Exact stem match (case-insensitive) +/// - 1: Normalized exact match +/// - 2: English language suffix with exact base +/// - 3: English language suffix with normalized base +/// - 4: Other language suffix with exact base +/// - 5: Other/no language with normalized base +pub fn auto_subtitle_sidecar(video_path: &Path) -> Option { + let parent = video_path.parent()?; + let video_stem = video_path.file_stem()?.to_string_lossy().to_string(); + let video_stem_lower = video_stem.to_lowercase(); + let video_stem_norm = normalize_stem(&video_stem); + + // Collect all subtitle files in the same directory. + let entries = fs::read_dir(parent).ok()?; + + let mut best: Option<(u8, PathBuf)> = None; + + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + + let fname = match path.file_name() { + Some(n) => n.to_string_lossy().to_string(), + None => continue, + }; + let fname_lower = fname.to_lowercase(); + + // Must end with a supported subtitle extension. + let is_sub = SUB_EXTS + .iter() + .any(|ext| fname_lower.ends_with(ext)); + if !is_sub { + continue; + } + + // Extract the stem (without the subtitle extension). + let sub_stem = match path.file_stem() { + Some(s) => s.to_string_lossy().to_string(), + None => continue, + }; + let sub_stem_lower = sub_stem.to_lowercase(); + + // Priority 0: exact stem match (case-insensitive). + if sub_stem_lower == video_stem_lower { + let priority = 0u8; + if best.as_ref().map_or(true, |(bp, _)| priority < *bp) { + best = Some((priority, path.clone())); + } + continue; + } + + // Check for language suffix. + if let Some((base, lang)) = strip_lang_suffix(&sub_stem) { + let base_lower = base.to_lowercase(); + let base_norm = normalize_stem(&base); + let is_english = ENGLISH_LANGS.contains(&lang.as_str()); + + if is_english { + // Priority 2: English suffix, exact base. + if base_lower == video_stem_lower { + let priority = 2u8; + if best.as_ref().map_or(true, |(bp, _)| priority < *bp) { + best = Some((priority, path.clone())); + } + continue; + } + // Priority 3: English suffix, normalized base. + if base_norm == video_stem_norm { + let priority = 3u8; + if best.as_ref().map_or(true, |(bp, _)| priority < *bp) { + best = Some((priority, path.clone())); + } + continue; + } + } else { + // Priority 4: Other language suffix, exact base. + if base_lower == video_stem_lower { + let priority = 4u8; + if best.as_ref().map_or(true, |(bp, _)| priority < *bp) { + best = Some((priority, path.clone())); + } + continue; + } + // Priority 5: Other language suffix, normalized base. + if base_norm == video_stem_norm { + let priority = 5u8; + if best.as_ref().map_or(true, |(bp, _)| priority < *bp) { + best = Some((priority, path.clone())); + } + continue; + } + } + } + + // Priority 1: Normalized match (no language suffix). + let sub_stem_norm = normalize_stem(&sub_stem); + if sub_stem_norm == video_stem_norm { + let priority = 1u8; + if best.as_ref().map_or(true, |(bp, _)| priority < *bp) { + best = Some((priority, path.clone())); + } + } + + // Priority 5 fallback: normalized match for subtitle files whose + // language suffix was not recognised above (handled by the + // strip_lang_suffix branch already for known languages). + } + + best.map(|(_, p)| p) +} + +// --------------------------------------------------------------------------- +// 3. store_subtitle_for_fid +// --------------------------------------------------------------------------- + +/// Sanitize a filename component: replace non-alphanumeric chars (except +/// `._-`) with `_`, then truncate to 60 characters. +fn sanitize_name(name: &str) -> String { + let sanitized = SANITIZE_RE.replace_all(name, "_"); + let s = sanitized.as_ref(); + if s.len() > 60 { + s[..60].to_string() + } else { + s.to_string() + } +} + +/// Store a subtitle file for a given fid. Converts SRT→VTT if needed. +/// +/// The output file is written as `{fid}_{sanitized_name}.vtt` inside +/// `subs_dir`. Returns `SubtitleStored` with the relative path (from the +/// parent of `subs_dir`) and a display label. +/// +/// Returns `None` if the source file extension is not supported or reading +/// the source fails. +pub fn store_subtitle_for_fid( + fid: &str, + src_path: &Path, + subs_dir: &Path, +) -> Option { + let ext_lower = src_path + .extension() + .map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))?; + + if !SUB_EXTS.contains(&ext_lower.as_str()) { + return None; + } + + let src_filename = src_path + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_default(); + + let src_stem = src_path + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "subtitle".to_string()); + + let sanitized = sanitize_name(&src_stem); + let out_name = format!("{}_{}.vtt", fid, sanitized); + + // Ensure subs_dir exists. + let _ = fs::create_dir_all(subs_dir); + + let out_path = subs_dir.join(&out_name); + + let content = fs::read_to_string(src_path).ok()?; + + let vtt_content = if ext_lower == ".srt" { + srt_to_vtt(&content) + } else { + // Already VTT — use as-is. + content + }; + + fs::write(&out_path, vtt_content.as_bytes()).ok()?; + + // Build relative path: "subtitles/{out_name}". + let subs_dir_name = subs_dir + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_else(|| "subtitles".to_string()); + + let vtt_rel = format!("{}/{}", subs_dir_name, out_name); + + Some(SubtitleStored { + vtt: vtt_rel, + label: src_filename, + }) +} + +// --------------------------------------------------------------------------- +// 4. extract_embedded_subtitle +// --------------------------------------------------------------------------- + +/// Extract an embedded subtitle track from a video using ffmpeg. +/// +/// Runs: `ffmpeg -y -i {video_path} -map 0:{track_index} -c:s webvtt {output_path}` +/// +/// The output file is `{fid}_embedded_{track_index}.vtt` inside `subs_dir`. +/// On Windows, the process is created with `CREATE_NO_WINDOW`. +/// +/// Returns `SubtitleStored` on success, or an error message string. +pub fn extract_embedded_subtitle( + video_path: &Path, + track_index: u32, + ffmpeg_path: &Path, + subs_dir: &Path, + fid: &str, +) -> Result { + let _ = fs::create_dir_all(subs_dir); + + let out_name = format!("{}_embedded_{}.vtt", fid, track_index); + let out_path = subs_dir.join(&out_name); + + let mut cmd = Command::new(ffmpeg_path); + cmd.args([ + "-y", + "-i", + &video_path.to_string_lossy(), + "-map", + &format!("0:{}", track_index), + "-c:s", + "webvtt", + &out_path.to_string_lossy(), + ]); + + #[cfg(target_os = "windows")] + { + cmd.creation_flags(CREATE_NO_WINDOW); + } + + let output = cmd + .output() + .map_err(|e| format!("Failed to run ffmpeg: {}", e))?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + return Err(format!( + "ffmpeg exited with status {}: {}", + output.status, stderr + )); + } + + if !out_path.exists() { + return Err("ffmpeg did not produce an output file".to_string()); + } + + let subs_dir_name = subs_dir + .file_name() + .map(|n| n.to_string_lossy().to_string()) + .unwrap_or_else(|| "subtitles".to_string()); + + let vtt_rel = format!("{}/{}", subs_dir_name, out_name); + + Ok(SubtitleStored { + vtt: vtt_rel, + label: format!("Embedded track {}", track_index), + }) +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + // -- srt_to_vtt ---------------------------------------------------------- + + #[test] + fn test_srt_to_vtt_basic() { + let srt = "\ +1 +00:00:01,000 --> 00:00:04,000 +Hello, world! + +2 +00:00:05,000 --> 00:00:08,000 +This is a test. +"; + let vtt = srt_to_vtt(srt); + assert!(vtt.starts_with("WEBVTT\n")); + assert!(vtt.contains("00:00:01.000 --> 00:00:04.000")); + assert!(vtt.contains("Hello, world!")); + assert!(vtt.contains("00:00:05.000 --> 00:00:08.000")); + assert!(vtt.contains("This is a test.")); + // Timestamp commas must be converted to dots. + assert!(!vtt.contains("00:00:01,000")); + assert!(!vtt.contains("00:00:04,000")); + } + + #[test] + fn test_srt_to_vtt_bom() { + let srt = "\u{FEFF}1\n00:00:01,000 --> 00:00:02,000\nHello\n"; + let vtt = srt_to_vtt(srt); + assert!(vtt.starts_with("WEBVTT")); + // BOM must be removed. + assert!(!vtt.contains('\u{FEFF}')); + assert!(vtt.contains("Hello")); + } + + #[test] + fn test_srt_to_vtt_empty() { + let vtt = srt_to_vtt(""); + assert!(vtt.starts_with("WEBVTT")); + // Should be just the header. + assert_eq!(vtt.trim(), "WEBVTT"); + } + + #[test] + fn test_srt_to_vtt_windows_line_endings() { + let srt = "1\r\n00:00:01,000 --> 00:00:02,000\r\nHello\r\n\r\n\ + 2\r\n00:00:03,000 --> 00:00:04,000\r\nWorld\r\n"; + let vtt = srt_to_vtt(srt); + assert!(vtt.starts_with("WEBVTT")); + assert!(vtt.contains("00:00:01.000 --> 00:00:02.000")); + assert!(vtt.contains("Hello")); + assert!(vtt.contains("00:00:03.000 --> 00:00:04.000")); + assert!(vtt.contains("World")); + } + + #[test] + fn test_srt_to_vtt_no_cue_indices() { + // Some SRT files omit cue numbers entirely. + let srt = "\ +00:00:01,500 --> 00:00:03,500 +First line + +00:00:04,000 --> 00:00:06,000 +Second line +"; + let vtt = srt_to_vtt(srt); + assert!(vtt.starts_with("WEBVTT")); + assert!(vtt.contains("00:00:01.500 --> 00:00:03.500")); + assert!(vtt.contains("First line")); + assert!(vtt.contains("00:00:04.000 --> 00:00:06.000")); + assert!(vtt.contains("Second line")); + } + + // -- auto_subtitle_sidecar ----------------------------------------------- + + #[test] + fn test_auto_subtitle_sidecar_exact_match() { + let dir = TempDir::new().unwrap(); + let video = dir.path().join("lecture.mp4"); + let sub = dir.path().join("lecture.srt"); + fs::write(&video, b"video").unwrap(); + fs::write(&sub, b"1\n00:00:00,000 --> 00:00:01,000\nhi\n").unwrap(); + + let result = auto_subtitle_sidecar(&video); + assert!(result.is_some()); + assert_eq!(result.unwrap(), sub); + } + + #[test] + fn test_auto_subtitle_sidecar_english_suffix() { + let dir = TempDir::new().unwrap(); + let video = dir.path().join("lecture.mp4"); + let sub = dir.path().join("lecture.en.srt"); + fs::write(&video, b"video").unwrap(); + fs::write(&sub, b"sub content").unwrap(); + + let result = auto_subtitle_sidecar(&video); + assert!(result.is_some()); + assert_eq!(result.unwrap(), sub); + } + + #[test] + fn test_auto_subtitle_sidecar_no_match() { + let dir = TempDir::new().unwrap(); + let video = dir.path().join("lecture.mp4"); + fs::write(&video, b"video").unwrap(); + // No subtitle files at all. + let result = auto_subtitle_sidecar(&video); + assert!(result.is_none()); + } + + #[test] + fn test_auto_subtitle_sidecar_priority_order() { + let dir = TempDir::new().unwrap(); + let video = dir.path().join("lecture.mp4"); + fs::write(&video, b"video").unwrap(); + + // Priority 0: exact stem match. + let exact = dir.path().join("lecture.srt"); + // Priority 2: English suffix with exact base. + let en_suffix = dir.path().join("lecture.en.srt"); + // Priority 4: Other language suffix with exact base. + let fr_suffix = dir.path().join("lecture.fr.srt"); + + fs::write(&exact, b"exact").unwrap(); + fs::write(&en_suffix, b"english").unwrap(); + fs::write(&fr_suffix, b"french").unwrap(); + + let result = auto_subtitle_sidecar(&video); + assert!(result.is_some()); + // Should pick priority 0 (exact match) over others. + assert_eq!(result.unwrap(), exact); + + // Remove exact match → should pick English suffix (priority 2). + fs::remove_file(&exact).unwrap(); + let result = auto_subtitle_sidecar(&video); + assert!(result.is_some()); + assert_eq!(result.unwrap(), en_suffix); + + // Remove English suffix → should pick French suffix (priority 4). + fs::remove_file(&en_suffix).unwrap(); + let result = auto_subtitle_sidecar(&video); + assert!(result.is_some()); + assert_eq!(result.unwrap(), fr_suffix); + } + + // -- store_subtitle_for_fid ---------------------------------------------- + + #[test] + fn test_store_subtitle_srt_converts_to_vtt() { + let dir = TempDir::new().unwrap(); + let subs_dir = dir.path().join("subtitles"); + let src = dir.path().join("my_sub.srt"); + + let srt_content = "1\n00:00:01,000 --> 00:00:02,000\nHello\n"; + fs::write(&src, srt_content).unwrap(); + + let result = store_subtitle_for_fid("abc123", &src, &subs_dir); + assert!(result.is_some()); + + let stored = result.unwrap(); + assert!(stored.vtt.ends_with(".vtt")); + assert!(stored.vtt.starts_with("subtitles/")); + assert_eq!(stored.label, "my_sub.srt"); + + // Verify the VTT output file was actually created and converted. + let out_path = subs_dir.join(format!("abc123_{}.vtt", "my_sub")); + assert!(out_path.exists()); + + let vtt_content = fs::read_to_string(&out_path).unwrap(); + assert!(vtt_content.starts_with("WEBVTT")); + assert!(vtt_content.contains("00:00:01.000 --> 00:00:02.000")); + assert!(vtt_content.contains("Hello")); + } + + #[test] + fn test_store_subtitle_vtt_copies() { + let dir = TempDir::new().unwrap(); + let subs_dir = dir.path().join("subtitles"); + let src = dir.path().join("my_sub.vtt"); + + let vtt_content = "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nHello\n"; + fs::write(&src, vtt_content).unwrap(); + + let result = store_subtitle_for_fid("def456", &src, &subs_dir); + assert!(result.is_some()); + + let stored = result.unwrap(); + assert!(stored.vtt.ends_with(".vtt")); + assert_eq!(stored.label, "my_sub.vtt"); + + // Verify the output file has the same content (not SRT-converted). + let out_path = subs_dir.join("def456_my_sub.vtt"); + assert!(out_path.exists()); + + let content = fs::read_to_string(&out_path).unwrap(); + assert_eq!(content, vtt_content); + } + + #[test] + fn test_store_subtitle_unsupported_ext() { + let dir = TempDir::new().unwrap(); + let subs_dir = dir.path().join("subtitles"); + let src = dir.path().join("notes.txt"); + fs::write(&src, "Some notes").unwrap(); + + let result = store_subtitle_for_fid("xyz789", &src, &subs_dir); + assert!(result.is_none()); + } +}