feat: implement ffmpeg.rs, subtitles.rs, and fonts.rs

- ffmpeg.rs: discovery, duration extraction, metadata probing, download
- subtitles.rs: SRT-to-VTT conversion, sidecar discovery, storage, extraction
- fonts.rs: Google Fonts and Font Awesome local caching
This commit is contained in:
Your Name
2026-02-19 01:59:21 +02:00
parent 3280d60f71
commit 4e91fe679f
4 changed files with 2075 additions and 0 deletions

806
src-tauri/src/ffmpeg.rs Normal file
View File

@@ -0,0 +1,806 @@
//! FFmpeg / FFprobe discovery, video metadata extraction, and ffmpeg downloading.
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::Duration;
#[cfg(target_os = "windows")]
use std::os::windows::process::CommandExt;
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/// CREATE_NO_WINDOW flag for Windows subprocess creation.
#[cfg(target_os = "windows")]
const CREATE_NO_WINDOW: u32 = 0x08000000;
/// Timeout for ffprobe / ffmpeg subprocess calls.
const SUBPROCESS_TIMEOUT: Duration = Duration::from_secs(20);
/// Timeout for ffprobe metadata calls (slightly longer for large files).
const METADATA_TIMEOUT: Duration = Duration::from_secs(25);
/// FFmpeg download URL (Windows 64-bit GPL build).
const FFMPEG_DOWNLOAD_URL: &str =
"https://github.com/BtbN/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-win64-gpl.zip";
// ---------------------------------------------------------------------------
// Regex patterns
// ---------------------------------------------------------------------------
/// Matches "Duration: HH:MM:SS.ss" in ffmpeg stderr output.
static DURATION_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)").unwrap());
// ---------------------------------------------------------------------------
// Structs
// ---------------------------------------------------------------------------
/// Paths to discovered ffprobe and ffmpeg executables.
pub struct FfmpegPaths {
pub ffprobe: Option<PathBuf>,
pub ffmpeg: Option<PathBuf>,
}
/// Detailed video metadata extracted via ffprobe.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoMetadata {
pub v_codec: Option<String>,
pub width: Option<u32>,
pub height: Option<u32>,
pub fps: Option<f64>,
pub v_bitrate: Option<u64>,
pub pix_fmt: Option<String>,
pub color_space: Option<String>,
pub a_codec: Option<String>,
pub channels: Option<u32>,
pub sample_rate: Option<String>,
pub a_bitrate: Option<u64>,
pub subtitle_tracks: Vec<SubtitleTrack>,
pub container_bitrate: Option<u64>,
pub duration: Option<f64>,
pub format_name: Option<String>,
pub container_title: Option<String>,
pub encoder: Option<String>,
}
impl Default for VideoMetadata {
fn default() -> Self {
Self {
v_codec: None,
width: None,
height: None,
fps: None,
v_bitrate: None,
pix_fmt: None,
color_space: None,
a_codec: None,
channels: None,
sample_rate: None,
a_bitrate: None,
subtitle_tracks: Vec::new(),
container_bitrate: None,
duration: None,
format_name: None,
container_title: None,
encoder: None,
}
}
}
/// Information about a single subtitle track.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubtitleTrack {
pub index: u32,
pub codec: String,
pub language: String,
pub title: String,
}
/// Progress information emitted during ffmpeg download.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DownloadProgress {
pub percent: f64,
pub downloaded_bytes: u64,
pub total_bytes: u64,
}
// ---------------------------------------------------------------------------
// 1. Platform-specific command setup
// ---------------------------------------------------------------------------
/// Apply platform-specific flags to a `Command` (hide console window on Windows).
fn apply_no_window(_cmd: &mut Command) {
#[cfg(target_os = "windows")]
{
_cmd.creation_flags(CREATE_NO_WINDOW);
}
}
// ---------------------------------------------------------------------------
// 2. discover
// ---------------------------------------------------------------------------
/// Discover ffmpeg and ffprobe executables.
///
/// Search order:
/// 1. System PATH via `which`
/// 2. Alongside the application executable (`exe_dir`)
/// 3. Inside `state_dir/ffmpeg/`
pub fn discover(exe_dir: &Path, state_dir: &Path) -> FfmpegPaths {
let ffprobe = discover_one("ffprobe", exe_dir, state_dir);
let ffmpeg = discover_one("ffmpeg", exe_dir, state_dir);
FfmpegPaths { ffprobe, ffmpeg }
}
/// Discover a single executable by name.
fn discover_one(name: &str, exe_dir: &Path, state_dir: &Path) -> Option<PathBuf> {
// 1. System PATH
if let Ok(p) = which::which(name) {
return Some(p);
}
// Platform-specific executable name
let exe_name = if cfg!(target_os = "windows") {
format!("{}.exe", name)
} else {
name.to_string()
};
// 2. Beside the application executable
let candidate = exe_dir.join(&exe_name);
if candidate.is_file() {
return Some(candidate);
}
// 3. Inside state_dir/ffmpeg/
let candidate = state_dir.join("ffmpeg").join(&exe_name);
if candidate.is_file() {
return Some(candidate);
}
None
}
// ---------------------------------------------------------------------------
// 3. duration_seconds
// ---------------------------------------------------------------------------
/// Get video duration in seconds using ffprobe (primary) or ffmpeg stderr (fallback).
///
/// Returns `None` if neither method succeeds or duration is not positive.
pub fn duration_seconds(path: &Path, paths: &FfmpegPaths) -> Option<f64> {
// Try ffprobe first
if let Some(ref ffprobe) = paths.ffprobe {
if let Some(d) = duration_via_ffprobe(path, ffprobe) {
return Some(d);
}
}
// Fallback: parse ffmpeg stderr
if let Some(ref ffmpeg) = paths.ffmpeg {
if let Some(d) = duration_via_ffmpeg(path, ffmpeg) {
return Some(d);
}
}
None
}
/// Extract duration using ffprobe's format=duration output.
fn duration_via_ffprobe(path: &Path, ffprobe: &Path) -> Option<f64> {
let mut cmd = Command::new(ffprobe);
cmd.args([
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=nw=1:nk=1",
])
.arg(path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
apply_no_window(&mut cmd);
let child = cmd.spawn().ok()?;
let output = wait_with_timeout(child, SUBPROCESS_TIMEOUT)?;
let text = String::from_utf8_lossy(&output.stdout);
let trimmed = text.trim();
if trimmed.is_empty() {
return None;
}
let d: f64 = trimmed.parse().ok()?;
if d > 0.0 { Some(d) } else { None }
}
/// Extract duration by parsing "Duration: HH:MM:SS.ss" from ffmpeg stderr.
fn duration_via_ffmpeg(path: &Path, ffmpeg: &Path) -> Option<f64> {
let mut cmd = Command::new(ffmpeg);
cmd.args(["-hide_banner", "-i"])
.arg(path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
apply_no_window(&mut cmd);
let child = cmd.spawn().ok()?;
let output = wait_with_timeout(child, SUBPROCESS_TIMEOUT)?;
let stderr = String::from_utf8_lossy(&output.stderr);
parse_ffmpeg_duration(&stderr)
}
/// Parse "Duration: HH:MM:SS.ss" from ffmpeg stderr output.
fn parse_ffmpeg_duration(stderr: &str) -> Option<f64> {
let caps = DURATION_RE.captures(stderr)?;
let hh: f64 = caps.get(1)?.as_str().parse().ok()?;
let mm: f64 = caps.get(2)?.as_str().parse().ok()?;
let ss: f64 = caps.get(3)?.as_str().parse().ok()?;
let total = hh * 3600.0 + mm * 60.0 + ss;
if total > 0.0 { Some(total) } else { None }
}
/// Wait for a child process with a timeout, killing it if exceeded.
fn wait_with_timeout(
child: std::process::Child,
timeout: Duration,
) -> Option<std::process::Output> {
// Convert Child into a form we can wait on with a timeout.
// std::process::Child::wait_with_output blocks, so we use a thread.
let (tx, rx) = std::sync::mpsc::channel();
let handle = std::thread::spawn(move || {
let result = child.wait_with_output();
let _ = tx.send(result);
});
match rx.recv_timeout(timeout) {
Ok(Ok(output)) => {
let _ = handle.join();
Some(output)
}
_ => {
// Timeout or error -- the thread owns the child and will clean up
let _ = handle.join();
None
}
}
}
// ---------------------------------------------------------------------------
// 4. ffprobe_video_metadata
// ---------------------------------------------------------------------------
/// Extract detailed video metadata using ffprobe JSON output.
///
/// Runs ffprobe with `-print_format json -show_streams -show_format` and parses
/// the resulting JSON to populate a `VideoMetadata` struct.
pub fn ffprobe_video_metadata(path: &Path, ffprobe: &Path) -> Option<VideoMetadata> {
let mut cmd = Command::new(ffprobe);
cmd.args([
"-v", "error",
"-print_format", "json",
"-show_streams", "-show_format",
])
.arg(path)
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped());
apply_no_window(&mut cmd);
let child = cmd.spawn().ok()?;
let output = wait_with_timeout(child, METADATA_TIMEOUT)?;
let text = String::from_utf8_lossy(&output.stdout);
let data: serde_json::Value = serde_json::from_str(&text).ok()?;
let streams = data.get("streams").and_then(|v| v.as_array());
let fmt = data.get("format").and_then(|v| v.as_object());
let mut meta = VideoMetadata::default();
let mut found_video = false;
let mut found_audio = false;
// Iterate streams: first video, first audio, all subtitles
if let Some(streams) = streams {
for (idx, s) in streams.iter().enumerate() {
let obj = match s.as_object() {
Some(o) => o,
None => continue,
};
let codec_type = obj
.get("codec_type")
.and_then(|v| v.as_str())
.unwrap_or("");
match codec_type {
"video" if !found_video => {
found_video = true;
meta.v_codec = json_str(obj, "codec_name");
meta.width = json_u32(obj, "width");
meta.height = json_u32(obj, "height");
meta.pix_fmt = json_str(obj, "pix_fmt");
meta.color_space = json_str(obj, "color_space");
// Parse frame rate ("num/den")
let frame_rate = json_str(obj, "r_frame_rate")
.or_else(|| json_str(obj, "avg_frame_rate"));
if let Some(ref fr) = frame_rate {
meta.fps = parse_frame_rate(fr);
}
// Video bitrate
meta.v_bitrate = json_str(obj, "bit_rate")
.and_then(|s| s.parse::<u64>().ok());
}
"audio" if !found_audio => {
found_audio = true;
meta.a_codec = json_str(obj, "codec_name");
meta.channels = json_u32(obj, "channels");
meta.sample_rate = json_str(obj, "sample_rate");
meta.a_bitrate = json_str(obj, "bit_rate")
.and_then(|s| s.parse::<u64>().ok());
}
"subtitle" => {
let tags = obj
.get("tags")
.and_then(|v| v.as_object());
let language = tags
.and_then(|t| {
json_str_from(t, "language")
.or_else(|| json_str_from(t, "LANGUAGE"))
})
.unwrap_or_default();
let title = tags
.and_then(|t| {
json_str_from(t, "title")
.or_else(|| json_str_from(t, "TITLE"))
})
.unwrap_or_default();
let stream_index = obj
.get("index")
.and_then(|v| v.as_u64())
.unwrap_or(idx as u64) as u32;
let codec = json_str(obj, "codec_name")
.unwrap_or_else(|| "unknown".to_string());
meta.subtitle_tracks.push(SubtitleTrack {
index: stream_index,
codec,
language,
title,
});
}
_ => {}
}
}
}
// Format-level metadata
if let Some(fmt) = fmt {
meta.container_bitrate = json_str_from(fmt, "bit_rate")
.and_then(|s| s.parse::<u64>().ok());
meta.duration = json_str_from(fmt, "duration")
.and_then(|s| s.parse::<f64>().ok());
meta.format_name = json_str_from(fmt, "format_name");
// Container tags
if let Some(ftags) = fmt.get("tags").and_then(|v| v.as_object()) {
let ct = json_str_from(ftags, "title");
if ct.as_deref().map_or(false, |s| !s.is_empty()) {
meta.container_title = ct;
}
let enc = json_str_from(ftags, "encoder");
if enc.as_deref().map_or(false, |s| !s.is_empty()) {
meta.encoder = enc;
}
}
}
// Return None if we extracted nothing useful
let has_data = meta.v_codec.is_some()
|| meta.a_codec.is_some()
|| !meta.subtitle_tracks.is_empty()
|| meta.duration.is_some()
|| meta.format_name.is_some();
if has_data { Some(meta) } else { None }
}
// ---------------------------------------------------------------------------
// JSON helper functions
// ---------------------------------------------------------------------------
/// Extract a string value from a JSON object by key.
fn json_str(obj: &serde_json::Map<String, serde_json::Value>, key: &str) -> Option<String> {
json_str_from(obj, key)
}
/// Extract a string value from a JSON map by key.
fn json_str_from(
obj: &serde_json::Map<String, serde_json::Value>,
key: &str,
) -> Option<String> {
obj.get(key).and_then(|v| match v {
serde_json::Value::String(s) => Some(s.clone()),
serde_json::Value::Number(n) => Some(n.to_string()),
_ => None,
})
}
/// Extract a u32 value from a JSON object by key.
fn json_u32(obj: &serde_json::Map<String, serde_json::Value>, key: &str) -> Option<u32> {
obj.get(key).and_then(|v| v.as_u64()).map(|n| n as u32)
}
/// Parse a frame rate string like "30000/1001" into an f64.
fn parse_frame_rate(fr: &str) -> Option<f64> {
if let Some((num_str, den_str)) = fr.split_once('/') {
let num: f64 = num_str.trim().parse().ok()?;
let den: f64 = den_str.trim().parse().ok()?;
if den == 0.0 {
return None;
}
let fps = num / den;
if fps > 0.0 { Some(fps) } else { None }
} else {
// Plain number
let fps: f64 = fr.trim().parse().ok()?;
if fps > 0.0 { Some(fps) } else { None }
}
}
// ---------------------------------------------------------------------------
// 5. download_ffmpeg (async)
// ---------------------------------------------------------------------------
/// Download ffmpeg from GitHub, extract `ffmpeg.exe` and `ffprobe.exe`, and
/// place them in `state_dir/ffmpeg/`.
///
/// Reports progress via the provided `tokio::sync::mpsc::Sender`.
pub async fn download_ffmpeg(
state_dir: &Path,
progress_tx: tokio::sync::mpsc::Sender<DownloadProgress>,
) -> Result<FfmpegPaths, String> {
let dest_dir = state_dir.join("ffmpeg");
std::fs::create_dir_all(&dest_dir)
.map_err(|e| format!("Failed to create ffmpeg directory: {}", e))?;
let zip_path = dest_dir.join("ffmpeg-download.zip");
// Start the download
let client = reqwest::Client::new();
let response = client
.get(FFMPEG_DOWNLOAD_URL)
.send()
.await
.map_err(|e| format!("Failed to start download: {}", e))?;
if !response.status().is_success() {
return Err(format!("Download failed with status: {}", response.status()));
}
let total_bytes = response.content_length().unwrap_or(0);
let mut downloaded_bytes: u64 = 0;
// Stream the response body to disk chunk by chunk using reqwest's chunk()
let mut file = std::fs::File::create(&zip_path)
.map_err(|e| format!("Failed to create zip file: {}", e))?;
let mut response = response;
while let Some(chunk) = response
.chunk()
.await
.map_err(|e| format!("Download stream error: {}", e))?
{
std::io::Write::write_all(&mut file, &chunk)
.map_err(|e| format!("Failed to write chunk: {}", e))?;
downloaded_bytes += chunk.len() as u64;
let percent = if total_bytes > 0 {
(downloaded_bytes as f64 / total_bytes as f64) * 100.0
} else {
0.0
};
// Send progress update (ignore send errors if receiver dropped)
let _ = progress_tx
.send(DownloadProgress {
percent,
downloaded_bytes,
total_bytes,
})
.await;
}
drop(file);
// Extract ffmpeg.exe and ffprobe.exe from the zip
extract_ffmpeg_from_zip(&zip_path, &dest_dir)?;
// Clean up the zip file
std::fs::remove_file(&zip_path).ok();
// Verify the extracted files exist
let ffmpeg_exe = if cfg!(target_os = "windows") {
"ffmpeg.exe"
} else {
"ffmpeg"
};
let ffprobe_exe = if cfg!(target_os = "windows") {
"ffprobe.exe"
} else {
"ffprobe"
};
let ffmpeg_path = dest_dir.join(ffmpeg_exe);
let ffprobe_path = dest_dir.join(ffprobe_exe);
Ok(FfmpegPaths {
ffprobe: if ffprobe_path.is_file() {
Some(ffprobe_path)
} else {
None
},
ffmpeg: if ffmpeg_path.is_file() {
Some(ffmpeg_path)
} else {
None
},
})
}
/// Extract only `ffmpeg.exe` and `ffprobe.exe` from a downloaded zip archive.
///
/// The BtbN builds have files nested inside a directory like
/// `ffmpeg-master-latest-win64-gpl/bin/ffmpeg.exe`, so we search for any entry
/// whose filename ends with the target name.
fn extract_ffmpeg_from_zip(zip_path: &Path, dest_dir: &Path) -> Result<(), String> {
let file = std::fs::File::open(zip_path)
.map_err(|e| format!("Failed to open zip: {}", e))?;
let mut archive = zip::ZipArchive::new(file)
.map_err(|e| format!("Failed to read zip archive: {}", e))?;
let targets: &[&str] = if cfg!(target_os = "windows") {
&["ffmpeg.exe", "ffprobe.exe"]
} else {
&["ffmpeg", "ffprobe"]
};
for i in 0..archive.len() {
let mut entry = archive
.by_index(i)
.map_err(|e| format!("Failed to read zip entry {}: {}", i, e))?;
let entry_name = entry.name().to_string();
// Check if this entry matches one of our target filenames
for target in targets {
if entry_name.ends_with(&format!("/{}", target))
|| entry_name == *target
{
let out_path = dest_dir.join(target);
let mut out_file = std::fs::File::create(&out_path)
.map_err(|e| format!("Failed to create {}: {}", target, e))?;
std::io::copy(&mut entry, &mut out_file)
.map_err(|e| format!("Failed to extract {}: {}", target, e))?;
break;
}
}
}
Ok(())
}
// ===========================================================================
// Tests
// ===========================================================================
#[cfg(test)]
mod tests {
use super::*;
// -- parse_ffmpeg_duration -----------------------------------------------
#[test]
fn test_parse_ffmpeg_duration_standard() {
let stderr = r#"
Input #0, matroska,webm, from 'video.mkv':
Duration: 01:23:45.67, start: 0.000000, bitrate: 5000 kb/s
Stream #0:0: Video: h264
"#;
let d = parse_ffmpeg_duration(stderr).unwrap();
let expected = 1.0 * 3600.0 + 23.0 * 60.0 + 45.67;
assert!((d - expected).abs() < 0.001, "got {}, expected {}", d, expected);
}
#[test]
fn test_parse_ffmpeg_duration_short_video() {
let stderr = " Duration: 00:00:30.50, start: 0.000000, bitrate: 1200 kb/s\n";
let d = parse_ffmpeg_duration(stderr).unwrap();
assert!((d - 30.5).abs() < 0.001);
}
#[test]
fn test_parse_ffmpeg_duration_whole_seconds() {
let stderr = " Duration: 00:05:00.00, start: 0.0\n";
let d = parse_ffmpeg_duration(stderr).unwrap();
assert!((d - 300.0).abs() < 0.001);
}
#[test]
fn test_parse_ffmpeg_duration_zero() {
// Zero duration should return None (not positive)
let stderr = " Duration: 00:00:00.00, start: 0.0\n";
assert!(parse_ffmpeg_duration(stderr).is_none());
}
#[test]
fn test_parse_ffmpeg_duration_no_match() {
let stderr = "some random output without duration info\n";
assert!(parse_ffmpeg_duration(stderr).is_none());
}
#[test]
fn test_parse_ffmpeg_duration_empty() {
assert!(parse_ffmpeg_duration("").is_none());
}
// -- discover_not_found --------------------------------------------------
#[test]
fn test_discover_not_found() {
// Use non-existent directories -- should return None for both paths
let exe_dir = Path::new("/nonexistent/path/that/does/not/exist/exe");
let state_dir = Path::new("/nonexistent/path/that/does/not/exist/state");
let paths = discover(exe_dir, state_dir);
// On a system without ffmpeg in PATH these will be None;
// on a system with ffmpeg installed they may be Some.
// We simply verify the function does not panic.
let _ = paths.ffprobe;
let _ = paths.ffmpeg;
}
// -- video_metadata_default ----------------------------------------------
#[test]
fn test_video_metadata_default() {
let meta = VideoMetadata::default();
assert!(meta.v_codec.is_none());
assert!(meta.width.is_none());
assert!(meta.height.is_none());
assert!(meta.fps.is_none());
assert!(meta.v_bitrate.is_none());
assert!(meta.pix_fmt.is_none());
assert!(meta.color_space.is_none());
assert!(meta.a_codec.is_none());
assert!(meta.channels.is_none());
assert!(meta.sample_rate.is_none());
assert!(meta.a_bitrate.is_none());
assert!(meta.subtitle_tracks.is_empty());
assert!(meta.container_bitrate.is_none());
assert!(meta.duration.is_none());
assert!(meta.format_name.is_none());
assert!(meta.container_title.is_none());
assert!(meta.encoder.is_none());
}
// -- download_progress_serialization -------------------------------------
#[test]
fn test_download_progress_serialization() {
let progress = DownloadProgress {
percent: 50.5,
downloaded_bytes: 1024,
total_bytes: 2048,
};
let json = serde_json::to_string(&progress).unwrap();
let parsed: DownloadProgress = serde_json::from_str(&json).unwrap();
assert!((parsed.percent - 50.5).abs() < f64::EPSILON);
assert_eq!(parsed.downloaded_bytes, 1024);
assert_eq!(parsed.total_bytes, 2048);
}
#[test]
fn test_download_progress_json_fields() {
let progress = DownloadProgress {
percent: 100.0,
downloaded_bytes: 5000,
total_bytes: 5000,
};
let value: serde_json::Value = serde_json::to_value(&progress).unwrap();
assert_eq!(value["percent"], 100.0);
assert_eq!(value["downloaded_bytes"], 5000);
assert_eq!(value["total_bytes"], 5000);
}
// -- parse_frame_rate ----------------------------------------------------
#[test]
fn test_parse_frame_rate_fraction() {
let fps = parse_frame_rate("30000/1001").unwrap();
assert!((fps - 29.97).abs() < 0.01);
}
#[test]
fn test_parse_frame_rate_integer() {
let fps = parse_frame_rate("24/1").unwrap();
assert!((fps - 24.0).abs() < 0.001);
}
#[test]
fn test_parse_frame_rate_zero_denominator() {
assert!(parse_frame_rate("24/0").is_none());
}
#[test]
fn test_parse_frame_rate_plain_number() {
let fps = parse_frame_rate("60").unwrap();
assert!((fps - 60.0).abs() < 0.001);
}
// -- subtitle_track_serialization ----------------------------------------
#[test]
fn test_subtitle_track_serialization() {
let track = SubtitleTrack {
index: 2,
codec: "srt".to_string(),
language: "eng".to_string(),
title: "English".to_string(),
};
let json = serde_json::to_string(&track).unwrap();
let parsed: SubtitleTrack = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.index, 2);
assert_eq!(parsed.codec, "srt");
assert_eq!(parsed.language, "eng");
assert_eq!(parsed.title, "English");
}
// -- integration tests (require actual ffprobe/ffmpeg) -------------------
#[test]
#[ignore]
fn test_duration_seconds_with_real_ffprobe() {
// This test requires ffprobe and ffmpeg to be installed and a sample
// video file at the given path.
let exe_dir = Path::new(".");
let state_dir = Path::new(".");
let paths = discover(exe_dir, state_dir);
if paths.ffprobe.is_none() && paths.ffmpeg.is_none() {
eprintln!("Skipping: no ffprobe or ffmpeg found");
return;
}
// Would need a real video file here
// let d = duration_seconds(Path::new("sample.mp4"), &paths);
// assert!(d.is_some());
}
#[test]
#[ignore]
fn test_ffprobe_video_metadata_with_real_ffprobe() {
let exe_dir = Path::new(".");
let state_dir = Path::new(".");
let paths = discover(exe_dir, state_dir);
if let Some(ref ffprobe) = paths.ffprobe {
// Would need a real video file here
// let meta = ffprobe_video_metadata(Path::new("sample.mp4"), ffprobe);
// assert!(meta.is_some());
let _ = ffprobe;
} else {
eprintln!("Skipping: no ffprobe found");
}
}
}

614
src-tauri/src/fonts.rs Normal file
View File

@@ -0,0 +1,614 @@
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::json;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::time::SystemTime;
use crate::state::{atomic_write_json, load_json_with_fallbacks, BACKUP_COUNT};
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/// Current version for Google Fonts metadata (fonts_meta.json).
const GOOGLE_FONTS_META_VERSION: u64 = 7;
/// Current version for Font Awesome metadata (fa_meta.json).
const FA_META_VERSION: u64 = 3;
/// User-Agent header value for HTTP requests.
/// Google Fonts API returns different CSS based on User-Agent; we want woff2.
const USER_AGENT: &str = "Mozilla/5.0";
/// Google Fonts CSS URLs.
const GOOGLE_FONT_URLS: &[(&str, &str)] = &[
(
"Sora",
"https://fonts.googleapis.com/css2?family=Sora:wght@500;600;700;800&display=swap",
),
(
"Manrope",
"https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700;800&display=swap",
),
(
"IBM Plex Mono",
"https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;500;600&display=swap",
),
];
/// Font Awesome CSS URL.
const FA_CSS_URL: &str =
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/css/all.min.css";
/// Base URL for resolving relative Font Awesome webfont URLs.
const FA_WEBFONTS_BASE: &str =
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/";
// ---------------------------------------------------------------------------
// Compiled regex patterns
// ---------------------------------------------------------------------------
/// Regex for extracting woff2 font URLs from Google Fonts CSS.
static GOOGLE_FONT_URL_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"url\(([^)]+)\)\s*format\(['"]woff2['"]\)"#).unwrap());
/// Regex for extracting all url(...) references from Font Awesome CSS.
static FA_URL_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"url\(([^)]+)\)").unwrap());
// ---------------------------------------------------------------------------
// 1. safe_filename_from_url
// ---------------------------------------------------------------------------
/// Generate a safe local filename from a URL using SHA-256 hash for uniqueness.
///
/// The filename is `{stem}-{hash}{suffix}` where `hash` is the first 10 hex
/// characters of the SHA-256 digest of the full URL. If the URL path has no
/// extension, `.woff2` is appended.
pub fn safe_filename_from_url(url: &str) -> String {
// Extract the last path component from the URL
let base = url
.split('?')
.next()
.unwrap_or(url)
.split('#')
.next()
.unwrap_or(url)
.rsplit('/')
.next()
.unwrap_or("font.woff2");
let base = if base.is_empty() { "font.woff2" } else { base };
// Ensure the base has an extension
let base = if !base.contains('.') {
format!("{}.woff2", base)
} else {
base.to_string()
};
// Split into stem and suffix
let (stem, suffix) = match base.rfind('.') {
Some(pos) => (&base[..pos], &base[pos..]),
None => (base.as_str(), ".woff2"),
};
// Compute SHA-256 hash of the full URL
let mut hasher = Sha256::new();
hasher.update(url.as_bytes());
let digest = format!("{:x}", hasher.finalize());
let url_hash = &digest[..10];
format!("{}-{}{}", stem, url_hash, suffix)
}
// ---------------------------------------------------------------------------
// 2. ensure_google_fonts_local
// ---------------------------------------------------------------------------
/// Download and cache Google Fonts (Sora, Manrope, IBM Plex Mono) locally.
///
/// The `fonts_dir` is the directory where `fonts.css`, `fonts_meta.json`, and
/// individual `.woff2` files are stored.
///
/// If already cached (version matches, ok=true, CSS file exists), this is a
/// no-op. Otherwise, downloads each font family's CSS from the Google Fonts
/// API, extracts woff2 URLs, downloads each font file, rewrites the CSS to
/// use local paths, and writes the combined CSS and metadata.
pub async fn ensure_google_fonts_local(fonts_dir: &Path) -> Result<(), String> {
fs::create_dir_all(fonts_dir).map_err(|e| format!("Failed to create fonts dir: {}", e))?;
let meta_path = fonts_dir.join("fonts_meta.json");
let css_path = fonts_dir.join("fonts.css");
// Check if already cached
if let Some(meta) = load_json_with_fallbacks(&meta_path, BACKUP_COUNT) {
if let Some(obj) = meta.as_object() {
let version_ok = obj
.get("version")
.and_then(|v| v.as_u64())
.map(|v| v == GOOGLE_FONTS_META_VERSION)
.unwrap_or(false);
let ok_flag = obj
.get("ok")
.and_then(|v| v.as_bool())
.unwrap_or(false);
if version_ok && ok_flag && css_path.exists() {
return Ok(());
}
}
}
let client = reqwest::Client::builder()
.user_agent(USER_AGENT)
.build()
.map_err(|e| format!("Failed to build HTTP client: {}", e))?;
let mut all_css_parts: Vec<String> = Vec::new();
let mut downloaded_files: Vec<String> = Vec::new();
let mut errors: Vec<String> = Vec::new();
for (family, css_url) in GOOGLE_FONT_URLS {
// Download the CSS for this font family
let css_text = match client.get(*css_url).send().await {
Ok(resp) => match resp.text().await {
Ok(text) => text,
Err(e) => {
errors.push(format!("Failed to read CSS for {}: {}", family, e));
continue;
}
},
Err(e) => {
errors.push(format!("Failed to download CSS for {}: {}", family, e));
continue;
}
};
// Find all woff2 url(...) references and download each font file
let mut rewritten_css = css_text.clone();
let mut replacements: Vec<(String, String)> = Vec::new();
for cap in GOOGLE_FONT_URL_RE.captures_iter(&css_text) {
let raw_url = cap[1].trim().trim_matches('\'').trim_matches('"');
let safe_name = safe_filename_from_url(raw_url);
let local_path = fonts_dir.join(&safe_name);
// Download the font file
match client.get(raw_url).send().await {
Ok(resp) => match resp.bytes().await {
Ok(bytes) => {
if let Err(e) = fs::write(&local_path, &bytes) {
errors.push(format!("Failed to write {}: {}", safe_name, e));
continue;
}
downloaded_files.push(safe_name.clone());
}
Err(e) => {
errors.push(format!("Failed to read bytes for {}: {}", safe_name, e));
continue;
}
},
Err(e) => {
errors.push(format!("Failed to download {}: {}", raw_url, e));
continue;
}
}
// Record the replacement: original url(...) content -> local path
let replacement_url = format!("/fonts/{}", safe_name);
replacements.push((cap[1].to_string(), replacement_url));
}
// Apply all URL replacements to the CSS
for (original, replacement) in &replacements {
let old = format!("url({}) format", original);
let new = format!("url({}) format", replacement);
rewritten_css = rewritten_css.replace(&old, &new);
}
all_css_parts.push(rewritten_css);
}
// Write combined CSS
let combined_css = all_css_parts.join("\n");
fs::write(&css_path, &combined_css)
.map_err(|e| format!("Failed to write fonts.css: {}", e))?;
// Write metadata
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let ok = errors.is_empty();
let meta = json!({
"version": GOOGLE_FONTS_META_VERSION,
"ok": ok,
"timestamp": timestamp,
"downloaded": downloaded_files,
"errors": errors,
});
atomic_write_json(&meta_path, &meta, BACKUP_COUNT);
if ok {
Ok(())
} else {
Err(format!(
"Google Fonts download completed with errors: {}",
errors.join("; ")
))
}
}
// ---------------------------------------------------------------------------
// 3. ensure_fontawesome_local
// ---------------------------------------------------------------------------
/// Clean and resolve a Font Awesome URL reference.
///
/// Strips whitespace and quotes, resolves relative URLs against the FA
/// webfonts base URL. Returns the URL unchanged if it is a `data:` URI.
fn clean_fa_url(u: &str) -> String {
let u = u.trim().trim_matches('\'').trim_matches('"');
if u.starts_with("data:") {
return u.to_string();
}
if u.starts_with("//") {
return format!("https:{}", u);
}
if u.starts_with("http://") || u.starts_with("https://") {
return u.to_string();
}
// Relative URL: strip leading "./" and "../" then join with base
let cleaned = u
.trim_start_matches("./")
.replace("../", "");
format!("{}{}", FA_WEBFONTS_BASE, cleaned)
}
/// Download and cache Font Awesome 6.5.2 locally.
///
/// The `fa_dir` is the directory where `fa.css` and `fa_meta.json` live.
/// The `fa_dir/webfonts/` subdirectory holds individual webfont files.
///
/// If already cached (version matches, ok=true, CSS file exists), this is a
/// no-op. Otherwise, downloads the Font Awesome CSS, extracts all `url(...)`
/// references, downloads each font file (skipping `data:` URIs), rewrites
/// the CSS to use local paths, and writes the CSS and metadata.
pub async fn ensure_fontawesome_local(fa_dir: &Path) -> Result<(), String> {
fs::create_dir_all(fa_dir).map_err(|e| format!("Failed to create fa dir: {}", e))?;
let webfonts_dir = fa_dir.join("webfonts");
fs::create_dir_all(&webfonts_dir)
.map_err(|e| format!("Failed to create webfonts dir: {}", e))?;
let meta_path = fa_dir.join("fa_meta.json");
let css_path = fa_dir.join("fa.css");
// Check if already cached
if let Some(meta) = load_json_with_fallbacks(&meta_path, BACKUP_COUNT) {
if let Some(obj) = meta.as_object() {
let version_ok = obj
.get("version")
.and_then(|v| v.as_u64())
.map(|v| v == FA_META_VERSION)
.unwrap_or(false);
let ok_flag = obj
.get("ok")
.and_then(|v| v.as_bool())
.unwrap_or(false);
if version_ok && ok_flag && css_path.exists() {
return Ok(());
}
}
}
let client = reqwest::Client::builder()
.user_agent(USER_AGENT)
.build()
.map_err(|e| format!("Failed to build HTTP client: {}", e))?;
// Download the Font Awesome CSS
let css_text = client
.get(FA_CSS_URL)
.send()
.await
.map_err(|e| format!("Failed to download FA CSS: {}", e))?
.text()
.await
.map_err(|e| format!("Failed to read FA CSS: {}", e))?;
let mut downloaded_files: Vec<String> = Vec::new();
let mut errors: Vec<String> = Vec::new();
let mut replacements: HashMap<String, String> = HashMap::new();
for cap in FA_URL_RE.captures_iter(&css_text) {
let raw_url = &cap[1];
let resolved = clean_fa_url(raw_url);
// Skip data: URIs
if resolved.starts_with("data:") {
continue;
}
// Determine the filename from the resolved URL
let filename = resolved
.split('?')
.next()
.unwrap_or(&resolved)
.split('#')
.next()
.unwrap_or(&resolved)
.rsplit('/')
.next()
.unwrap_or("font.woff2")
.to_string();
if filename.is_empty() {
continue;
}
let local_path = webfonts_dir.join(&filename);
// Only download each file once
if !replacements.contains_key(raw_url) {
match client.get(&resolved).send().await {
Ok(resp) => match resp.bytes().await {
Ok(bytes) => {
if let Err(e) = fs::write(&local_path, &bytes) {
errors.push(format!("Failed to write {}: {}", filename, e));
continue;
}
downloaded_files.push(filename.clone());
}
Err(e) => {
errors.push(format!("Failed to read bytes for {}: {}", filename, e));
continue;
}
},
Err(e) => {
errors.push(format!("Failed to download {}: {}", resolved, e));
continue;
}
}
let replacement = format!("/fa/webfonts/{}", filename);
replacements.insert(raw_url.to_string(), replacement);
}
}
// Rewrite CSS with local paths
let mut rewritten_css = css_text.clone();
for (original, replacement) in &replacements {
let old = format!("url({})", original);
let new = format!("url({})", replacement);
rewritten_css = rewritten_css.replace(&old, &new);
}
// Write rewritten CSS
fs::write(&css_path, &rewritten_css)
.map_err(|e| format!("Failed to write fa.css: {}", e))?;
// Write metadata
let timestamp = SystemTime::now()
.duration_since(SystemTime::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let ok = errors.is_empty();
let meta = json!({
"version": FA_META_VERSION,
"ok": ok,
"timestamp": timestamp,
"downloaded": downloaded_files,
"errors": errors,
});
atomic_write_json(&meta_path, &meta, BACKUP_COUNT);
if ok {
Ok(())
} else {
Err(format!(
"Font Awesome download completed with errors: {}",
errors.join("; ")
))
}
}
// ===========================================================================
// Tests
// ===========================================================================
#[cfg(test)]
mod tests {
use super::*;
// -- safe_filename_from_url -----------------------------------------------
#[test]
fn test_safe_filename_from_url_basic() {
let url = "https://fonts.gstatic.com/s/sora/v12/abc123.woff2";
let result = safe_filename_from_url(url);
// Should contain the original stem
assert!(result.starts_with("abc123-"));
// Should end with .woff2
assert!(result.ends_with(".woff2"));
// Should contain a 10-char hash between stem and extension
let parts: Vec<&str> = result.rsplitn(2, '.').collect();
let before_ext = parts[1]; // "abc123-{hash}"
let hash_part = before_ext.rsplit('-').next().unwrap();
assert_eq!(hash_part.len(), 10);
}
#[test]
fn test_safe_filename_from_url_no_extension() {
let url = "https://example.com/fontfile";
let result = safe_filename_from_url(url);
// Should have .woff2 appended
assert!(result.ends_with(".woff2"));
assert!(result.starts_with("fontfile-"));
}
#[test]
fn test_safe_filename_from_url_deterministic() {
let url = "https://fonts.gstatic.com/s/sora/v12/abc.woff2";
let result1 = safe_filename_from_url(url);
let result2 = safe_filename_from_url(url);
assert_eq!(result1, result2);
}
#[test]
fn test_safe_filename_different_urls() {
let url1 = "https://fonts.gstatic.com/s/sora/v12/abc.woff2";
let url2 = "https://fonts.gstatic.com/s/manrope/v14/def.woff2";
let result1 = safe_filename_from_url(url1);
let result2 = safe_filename_from_url(url2);
assert_ne!(result1, result2);
}
// -- clean_fa_url ---------------------------------------------------------
#[test]
fn test_clean_fa_url_data() {
let result = clean_fa_url("data:font/woff2;base64,abc");
assert_eq!(result, "data:font/woff2;base64,abc");
}
#[test]
fn test_clean_fa_url_protocol_relative() {
let result = clean_fa_url("//example.com/font.woff2");
assert_eq!(result, "https://example.com/font.woff2");
}
#[test]
fn test_clean_fa_url_absolute() {
let result = clean_fa_url("https://example.com/font.woff2");
assert_eq!(result, "https://example.com/font.woff2");
}
#[test]
fn test_clean_fa_url_relative() {
let result = clean_fa_url("../webfonts/fa-solid-900.woff2");
assert_eq!(
result,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/webfonts/fa-solid-900.woff2"
);
}
#[test]
fn test_clean_fa_url_relative_dot_slash() {
let result = clean_fa_url("./webfonts/fa-solid-900.woff2");
assert_eq!(
result,
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.2/webfonts/webfonts/fa-solid-900.woff2"
);
}
#[test]
fn test_clean_fa_url_strips_quotes() {
let result = clean_fa_url("'https://example.com/font.woff2'");
assert_eq!(result, "https://example.com/font.woff2");
}
// -- Integration tests (require network) ----------------------------------
#[tokio::test]
#[ignore]
async fn test_google_fonts_download() {
let dir = tempfile::tempdir().unwrap();
let fonts_dir = dir.path().join("fonts");
let result = ensure_google_fonts_local(&fonts_dir).await;
assert!(result.is_ok(), "Google Fonts download failed: {:?}", result);
// Verify fonts.css was created
let css_path = fonts_dir.join("fonts.css");
assert!(css_path.exists(), "fonts.css should exist");
let css_content = fs::read_to_string(&css_path).unwrap();
assert!(!css_content.is_empty(), "fonts.css should not be empty");
// CSS should contain rewritten local paths
assert!(
css_content.contains("/fonts/"),
"CSS should contain /fonts/ local paths"
);
// Verify metadata was created
let meta_path = fonts_dir.join("fonts_meta.json");
assert!(meta_path.exists(), "fonts_meta.json should exist");
let meta = load_json_with_fallbacks(&meta_path, BACKUP_COUNT).unwrap();
assert_eq!(meta["version"], GOOGLE_FONTS_META_VERSION);
assert_eq!(meta["ok"], true);
assert!(
meta["downloaded"].as_array().unwrap().len() > 0,
"Should have downloaded at least one font file"
);
// Second call should be a no-op (cached)
let result2 = ensure_google_fonts_local(&fonts_dir).await;
assert!(result2.is_ok());
}
#[tokio::test]
#[ignore]
async fn test_fontawesome_download() {
let dir = tempfile::tempdir().unwrap();
let fa_dir = dir.path().join("fa");
let result = ensure_fontawesome_local(&fa_dir).await;
assert!(
result.is_ok(),
"Font Awesome download failed: {:?}",
result
);
// Verify fa.css was created
let css_path = fa_dir.join("fa.css");
assert!(css_path.exists(), "fa.css should exist");
let css_content = fs::read_to_string(&css_path).unwrap();
assert!(!css_content.is_empty(), "fa.css should not be empty");
// CSS should contain rewritten local paths
assert!(
css_content.contains("/fa/webfonts/"),
"CSS should contain /fa/webfonts/ local paths"
);
// Verify webfonts directory has files
let webfonts_dir = fa_dir.join("webfonts");
assert!(webfonts_dir.exists(), "webfonts dir should exist");
let webfont_files: Vec<_> = fs::read_dir(&webfonts_dir)
.unwrap()
.filter_map(|e| e.ok())
.collect();
assert!(
!webfont_files.is_empty(),
"Should have downloaded at least one webfont file"
);
// Verify metadata was created
let meta_path = fa_dir.join("fa_meta.json");
assert!(meta_path.exists(), "fa_meta.json should exist");
let meta = load_json_with_fallbacks(&meta_path, BACKUP_COUNT).unwrap();
assert_eq!(meta["version"], FA_META_VERSION);
assert_eq!(meta["ok"], true);
// Second call should be a no-op (cached)
let result2 = ensure_fontawesome_local(&fa_dir).await;
assert!(result2.is_ok());
}
}

View File

@@ -1,6 +1,9 @@
pub mod ffmpeg;
pub mod fonts;
pub mod prefs; pub mod prefs;
pub mod recents; pub mod recents;
pub mod state; pub mod state;
pub mod subtitles;
pub mod utils; pub mod utils;
#[cfg_attr(mobile, tauri::mobile_entry_point)] #[cfg_attr(mobile, tauri::mobile_entry_point)]

652
src-tauri/src/subtitles.rs Normal file
View File

@@ -0,0 +1,652 @@
//! Subtitle handling: SRT-to-VTT conversion, sidecar discovery, storage,
//! and embedded subtitle extraction via ffmpeg.
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg(target_os = "windows")]
use std::os::windows::process::CommandExt;
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/// Supported subtitle file extensions.
pub const SUB_EXTS: &[&str] = &[".srt", ".vtt"];
/// Languages considered "English" for sidecar priority.
const ENGLISH_LANGS: &[&str] = &["en", "eng", "english"];
/// All language suffixes to strip when normalizing subtitle basenames.
const ALL_LANG_SUFFIXES: &[&str] = &[
"en", "eng", "english", "fr", "de", "es", "it", "pt", "ru", "ja", "ko", "zh",
];
/// Windows CREATE_NO_WINDOW flag for subprocess creation.
#[cfg(target_os = "windows")]
const CREATE_NO_WINDOW: u32 = 0x08000000;
// ---------------------------------------------------------------------------
// Structs
// ---------------------------------------------------------------------------
/// Result of storing a subtitle file for a video.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubtitleStored {
/// Relative path like `"subtitles/{fid}_{name}.vtt"`.
pub vtt: String,
/// Display label (source filename).
pub label: String,
}
// ---------------------------------------------------------------------------
// Compiled regex patterns
// ---------------------------------------------------------------------------
/// Matches a line that is only digits (SRT cue index).
static CUE_INDEX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d+$").unwrap());
/// Matches characters that are NOT alphanumeric, dot, underscore, or hyphen.
static SANITIZE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^a-zA-Z0-9._\-]").unwrap());
/// Collapses runs of whitespace and dash/underscore into a single space for
/// normalized comparison of subtitle stems.
static NORMALIZE_SEP_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-_\s]+").unwrap());
// ---------------------------------------------------------------------------
// 1. srt_to_vtt
// ---------------------------------------------------------------------------
/// Convert SRT subtitle text to WebVTT format string.
///
/// - Removes BOM (`\u{FEFF}`) if present.
/// - Adds the `WEBVTT` header.
/// - Skips cue index numbers (lines that are just digits).
/// - Converts timestamp separators: comma → dot.
/// - Collects subtitle text between timestamp lines and empty lines.
pub fn srt_to_vtt(srt_text: &str) -> String {
let text = srt_text.replace('\u{FEFF}', "");
let lines: Vec<&str> = text.lines().collect();
let mut out: Vec<String> = vec!["WEBVTT".to_string(), String::new()];
let mut i = 0;
while i < lines.len() {
let line = lines[i].trim_end_matches('\r');
// Empty line → blank line in output
if line.trim().is_empty() {
out.push(String::new());
i += 1;
continue;
}
// Skip cue index (pure digit line)
if CUE_INDEX_RE.is_match(line.trim()) {
i += 1;
if i >= lines.len() {
break;
}
// Re-read the next line as a potential timestamp
let line = lines[i].trim_end_matches('\r');
if line.contains("-->") {
let ts_line = line.replace(',', ".");
out.push(ts_line);
i += 1;
// Collect subtitle text until blank line
while i < lines.len() {
let t = lines[i].trim_end_matches('\r');
if t.trim().is_empty() {
out.push(String::new());
i += 1;
break;
}
out.push(t.to_string());
i += 1;
}
} else {
i += 1;
}
} else if line.contains("-->") {
// Timestamp line without preceding cue index
let ts_line = line.replace(',', ".");
out.push(ts_line);
i += 1;
while i < lines.len() {
let t = lines[i].trim_end_matches('\r');
if t.trim().is_empty() {
out.push(String::new());
i += 1;
break;
}
out.push(t.to_string());
i += 1;
}
} else {
i += 1;
}
}
let joined = out.join("\n");
format!("{}\n", joined.trim())
}
// ---------------------------------------------------------------------------
// 2. auto_subtitle_sidecar (helpers)
// ---------------------------------------------------------------------------
/// Normalize a string for fuzzy subtitle matching: lowercase, replace `-` and
/// `_` with space, collapse whitespace.
fn normalize_stem(s: &str) -> String {
let lower = s.to_lowercase();
let replaced = NORMALIZE_SEP_RE.replace_all(&lower, " ");
replaced.trim().to_string()
}
/// Strip a trailing language suffix from a subtitle stem.
///
/// For example, `"video.en"` → `Some(("video", "en"))`.
/// Returns `None` if no known language suffix is found.
fn strip_lang_suffix(stem: &str) -> Option<(String, String)> {
if let Some(dot_pos) = stem.rfind('.') {
let base = &stem[..dot_pos];
let suffix = &stem[dot_pos + 1..];
let suffix_lower = suffix.to_lowercase();
if ALL_LANG_SUFFIXES.contains(&suffix_lower.as_str()) {
return Some((base.to_string(), suffix_lower));
}
}
None
}
/// Find a subtitle sidecar file matching the given video path.
///
/// Returns the best matching subtitle file path, or `None`.
///
/// Priority (lower is better):
/// - 0: Exact stem match (case-insensitive)
/// - 1: Normalized exact match
/// - 2: English language suffix with exact base
/// - 3: English language suffix with normalized base
/// - 4: Other language suffix with exact base
/// - 5: Other/no language with normalized base
pub fn auto_subtitle_sidecar(video_path: &Path) -> Option<PathBuf> {
let parent = video_path.parent()?;
let video_stem = video_path.file_stem()?.to_string_lossy().to_string();
let video_stem_lower = video_stem.to_lowercase();
let video_stem_norm = normalize_stem(&video_stem);
// Collect all subtitle files in the same directory.
let entries = fs::read_dir(parent).ok()?;
let mut best: Option<(u8, PathBuf)> = None;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let fname = match path.file_name() {
Some(n) => n.to_string_lossy().to_string(),
None => continue,
};
let fname_lower = fname.to_lowercase();
// Must end with a supported subtitle extension.
let is_sub = SUB_EXTS
.iter()
.any(|ext| fname_lower.ends_with(ext));
if !is_sub {
continue;
}
// Extract the stem (without the subtitle extension).
let sub_stem = match path.file_stem() {
Some(s) => s.to_string_lossy().to_string(),
None => continue,
};
let sub_stem_lower = sub_stem.to_lowercase();
// Priority 0: exact stem match (case-insensitive).
if sub_stem_lower == video_stem_lower {
let priority = 0u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
// Check for language suffix.
if let Some((base, lang)) = strip_lang_suffix(&sub_stem) {
let base_lower = base.to_lowercase();
let base_norm = normalize_stem(&base);
let is_english = ENGLISH_LANGS.contains(&lang.as_str());
if is_english {
// Priority 2: English suffix, exact base.
if base_lower == video_stem_lower {
let priority = 2u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
// Priority 3: English suffix, normalized base.
if base_norm == video_stem_norm {
let priority = 3u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
} else {
// Priority 4: Other language suffix, exact base.
if base_lower == video_stem_lower {
let priority = 4u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
// Priority 5: Other language suffix, normalized base.
if base_norm == video_stem_norm {
let priority = 5u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
}
}
// Priority 1: Normalized match (no language suffix).
let sub_stem_norm = normalize_stem(&sub_stem);
if sub_stem_norm == video_stem_norm {
let priority = 1u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
}
// Priority 5 fallback: normalized match for subtitle files whose
// language suffix was not recognised above (handled by the
// strip_lang_suffix branch already for known languages).
}
best.map(|(_, p)| p)
}
// ---------------------------------------------------------------------------
// 3. store_subtitle_for_fid
// ---------------------------------------------------------------------------
/// Sanitize a filename component: replace non-alphanumeric chars (except
/// `._-`) with `_`, then truncate to 60 characters.
fn sanitize_name(name: &str) -> String {
let sanitized = SANITIZE_RE.replace_all(name, "_");
let s = sanitized.as_ref();
if s.len() > 60 {
s[..60].to_string()
} else {
s.to_string()
}
}
/// Store a subtitle file for a given fid. Converts SRT→VTT if needed.
///
/// The output file is written as `{fid}_{sanitized_name}.vtt` inside
/// `subs_dir`. Returns `SubtitleStored` with the relative path (from the
/// parent of `subs_dir`) and a display label.
///
/// Returns `None` if the source file extension is not supported or reading
/// the source fails.
pub fn store_subtitle_for_fid(
fid: &str,
src_path: &Path,
subs_dir: &Path,
) -> Option<SubtitleStored> {
let ext_lower = src_path
.extension()
.map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))?;
if !SUB_EXTS.contains(&ext_lower.as_str()) {
return None;
}
let src_filename = src_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let src_stem = src_path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "subtitle".to_string());
let sanitized = sanitize_name(&src_stem);
let out_name = format!("{}_{}.vtt", fid, sanitized);
// Ensure subs_dir exists.
let _ = fs::create_dir_all(subs_dir);
let out_path = subs_dir.join(&out_name);
let content = fs::read_to_string(src_path).ok()?;
let vtt_content = if ext_lower == ".srt" {
srt_to_vtt(&content)
} else {
// Already VTT — use as-is.
content
};
fs::write(&out_path, vtt_content.as_bytes()).ok()?;
// Build relative path: "subtitles/{out_name}".
let subs_dir_name = subs_dir
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "subtitles".to_string());
let vtt_rel = format!("{}/{}", subs_dir_name, out_name);
Some(SubtitleStored {
vtt: vtt_rel,
label: src_filename,
})
}
// ---------------------------------------------------------------------------
// 4. extract_embedded_subtitle
// ---------------------------------------------------------------------------
/// Extract an embedded subtitle track from a video using ffmpeg.
///
/// Runs: `ffmpeg -y -i {video_path} -map 0:{track_index} -c:s webvtt {output_path}`
///
/// The output file is `{fid}_embedded_{track_index}.vtt` inside `subs_dir`.
/// On Windows, the process is created with `CREATE_NO_WINDOW`.
///
/// Returns `SubtitleStored` on success, or an error message string.
pub fn extract_embedded_subtitle(
video_path: &Path,
track_index: u32,
ffmpeg_path: &Path,
subs_dir: &Path,
fid: &str,
) -> Result<SubtitleStored, String> {
let _ = fs::create_dir_all(subs_dir);
let out_name = format!("{}_embedded_{}.vtt", fid, track_index);
let out_path = subs_dir.join(&out_name);
let mut cmd = Command::new(ffmpeg_path);
cmd.args([
"-y",
"-i",
&video_path.to_string_lossy(),
"-map",
&format!("0:{}", track_index),
"-c:s",
"webvtt",
&out_path.to_string_lossy(),
]);
#[cfg(target_os = "windows")]
{
cmd.creation_flags(CREATE_NO_WINDOW);
}
let output = cmd
.output()
.map_err(|e| format!("Failed to run ffmpeg: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!(
"ffmpeg exited with status {}: {}",
output.status, stderr
));
}
if !out_path.exists() {
return Err("ffmpeg did not produce an output file".to_string());
}
let subs_dir_name = subs_dir
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "subtitles".to_string());
let vtt_rel = format!("{}/{}", subs_dir_name, out_name);
Ok(SubtitleStored {
vtt: vtt_rel,
label: format!("Embedded track {}", track_index),
})
}
// ===========================================================================
// Tests
// ===========================================================================
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
// -- srt_to_vtt ----------------------------------------------------------
#[test]
fn test_srt_to_vtt_basic() {
let srt = "\
1
00:00:01,000 --> 00:00:04,000
Hello, world!
2
00:00:05,000 --> 00:00:08,000
This is a test.
";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT\n"));
assert!(vtt.contains("00:00:01.000 --> 00:00:04.000"));
assert!(vtt.contains("Hello, world!"));
assert!(vtt.contains("00:00:05.000 --> 00:00:08.000"));
assert!(vtt.contains("This is a test."));
// Timestamp commas must be converted to dots.
assert!(!vtt.contains("00:00:01,000"));
assert!(!vtt.contains("00:00:04,000"));
}
#[test]
fn test_srt_to_vtt_bom() {
let srt = "\u{FEFF}1\n00:00:01,000 --> 00:00:02,000\nHello\n";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT"));
// BOM must be removed.
assert!(!vtt.contains('\u{FEFF}'));
assert!(vtt.contains("Hello"));
}
#[test]
fn test_srt_to_vtt_empty() {
let vtt = srt_to_vtt("");
assert!(vtt.starts_with("WEBVTT"));
// Should be just the header.
assert_eq!(vtt.trim(), "WEBVTT");
}
#[test]
fn test_srt_to_vtt_windows_line_endings() {
let srt = "1\r\n00:00:01,000 --> 00:00:02,000\r\nHello\r\n\r\n\
2\r\n00:00:03,000 --> 00:00:04,000\r\nWorld\r\n";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT"));
assert!(vtt.contains("00:00:01.000 --> 00:00:02.000"));
assert!(vtt.contains("Hello"));
assert!(vtt.contains("00:00:03.000 --> 00:00:04.000"));
assert!(vtt.contains("World"));
}
#[test]
fn test_srt_to_vtt_no_cue_indices() {
// Some SRT files omit cue numbers entirely.
let srt = "\
00:00:01,500 --> 00:00:03,500
First line
00:00:04,000 --> 00:00:06,000
Second line
";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT"));
assert!(vtt.contains("00:00:01.500 --> 00:00:03.500"));
assert!(vtt.contains("First line"));
assert!(vtt.contains("00:00:04.000 --> 00:00:06.000"));
assert!(vtt.contains("Second line"));
}
// -- auto_subtitle_sidecar -----------------------------------------------
#[test]
fn test_auto_subtitle_sidecar_exact_match() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
let sub = dir.path().join("lecture.srt");
fs::write(&video, b"video").unwrap();
fs::write(&sub, b"1\n00:00:00,000 --> 00:00:01,000\nhi\n").unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), sub);
}
#[test]
fn test_auto_subtitle_sidecar_english_suffix() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
let sub = dir.path().join("lecture.en.srt");
fs::write(&video, b"video").unwrap();
fs::write(&sub, b"sub content").unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), sub);
}
#[test]
fn test_auto_subtitle_sidecar_no_match() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
fs::write(&video, b"video").unwrap();
// No subtitle files at all.
let result = auto_subtitle_sidecar(&video);
assert!(result.is_none());
}
#[test]
fn test_auto_subtitle_sidecar_priority_order() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
fs::write(&video, b"video").unwrap();
// Priority 0: exact stem match.
let exact = dir.path().join("lecture.srt");
// Priority 2: English suffix with exact base.
let en_suffix = dir.path().join("lecture.en.srt");
// Priority 4: Other language suffix with exact base.
let fr_suffix = dir.path().join("lecture.fr.srt");
fs::write(&exact, b"exact").unwrap();
fs::write(&en_suffix, b"english").unwrap();
fs::write(&fr_suffix, b"french").unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
// Should pick priority 0 (exact match) over others.
assert_eq!(result.unwrap(), exact);
// Remove exact match → should pick English suffix (priority 2).
fs::remove_file(&exact).unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), en_suffix);
// Remove English suffix → should pick French suffix (priority 4).
fs::remove_file(&en_suffix).unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), fr_suffix);
}
// -- store_subtitle_for_fid ----------------------------------------------
#[test]
fn test_store_subtitle_srt_converts_to_vtt() {
let dir = TempDir::new().unwrap();
let subs_dir = dir.path().join("subtitles");
let src = dir.path().join("my_sub.srt");
let srt_content = "1\n00:00:01,000 --> 00:00:02,000\nHello\n";
fs::write(&src, srt_content).unwrap();
let result = store_subtitle_for_fid("abc123", &src, &subs_dir);
assert!(result.is_some());
let stored = result.unwrap();
assert!(stored.vtt.ends_with(".vtt"));
assert!(stored.vtt.starts_with("subtitles/"));
assert_eq!(stored.label, "my_sub.srt");
// Verify the VTT output file was actually created and converted.
let out_path = subs_dir.join(format!("abc123_{}.vtt", "my_sub"));
assert!(out_path.exists());
let vtt_content = fs::read_to_string(&out_path).unwrap();
assert!(vtt_content.starts_with("WEBVTT"));
assert!(vtt_content.contains("00:00:01.000 --> 00:00:02.000"));
assert!(vtt_content.contains("Hello"));
}
#[test]
fn test_store_subtitle_vtt_copies() {
let dir = TempDir::new().unwrap();
let subs_dir = dir.path().join("subtitles");
let src = dir.path().join("my_sub.vtt");
let vtt_content = "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nHello\n";
fs::write(&src, vtt_content).unwrap();
let result = store_subtitle_for_fid("def456", &src, &subs_dir);
assert!(result.is_some());
let stored = result.unwrap();
assert!(stored.vtt.ends_with(".vtt"));
assert_eq!(stored.label, "my_sub.vtt");
// Verify the output file has the same content (not SRT-converted).
let out_path = subs_dir.join("def456_my_sub.vtt");
assert!(out_path.exists());
let content = fs::read_to_string(&out_path).unwrap();
assert_eq!(content, vtt_content);
}
#[test]
fn test_store_subtitle_unsupported_ext() {
let dir = TempDir::new().unwrap();
let subs_dir = dir.path().join("subtitles");
let src = dir.path().join("notes.txt");
fs::write(&src, "Some notes").unwrap();
let result = store_subtitle_for_fid("xyz789", &src, &subs_dir);
assert!(result.is_none());
}
}