add ffmpeg.rs, subtitles.rs, and fonts.rs

This commit is contained in:
2026-02-19 01:59:21 +02:00
parent 4fd04f55c8
commit ffa37e6061
4 changed files with 2075 additions and 0 deletions

652
src-tauri/src/subtitles.rs Normal file
View File

@@ -0,0 +1,652 @@
//! Subtitle handling: SRT-to-VTT conversion, sidecar discovery, storage,
//! and embedded subtitle extraction via ffmpeg.
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg(target_os = "windows")]
use std::os::windows::process::CommandExt;
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/// Supported subtitle file extensions.
pub const SUB_EXTS: &[&str] = &[".srt", ".vtt"];
/// Languages considered "English" for sidecar priority.
const ENGLISH_LANGS: &[&str] = &["en", "eng", "english"];
/// All language suffixes to strip when normalizing subtitle basenames.
const ALL_LANG_SUFFIXES: &[&str] = &[
"en", "eng", "english", "fr", "de", "es", "it", "pt", "ru", "ja", "ko", "zh",
];
/// Windows CREATE_NO_WINDOW flag for subprocess creation.
#[cfg(target_os = "windows")]
const CREATE_NO_WINDOW: u32 = 0x08000000;
// ---------------------------------------------------------------------------
// Structs
// ---------------------------------------------------------------------------
/// Result of storing a subtitle file for a video.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SubtitleStored {
/// Relative path like `"subtitles/{fid}_{name}.vtt"`.
pub vtt: String,
/// Display label (source filename).
pub label: String,
}
// ---------------------------------------------------------------------------
// Compiled regex patterns
// ---------------------------------------------------------------------------
/// Matches a line that is only digits (SRT cue index).
static CUE_INDEX_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d+$").unwrap());
/// Matches characters that are NOT alphanumeric, dot, underscore, or hyphen.
static SANITIZE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[^a-zA-Z0-9._\-]").unwrap());
/// Collapses runs of whitespace and dash/underscore into a single space for
/// normalized comparison of subtitle stems.
static NORMALIZE_SEP_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"[-_\s]+").unwrap());
// ---------------------------------------------------------------------------
// 1. srt_to_vtt
// ---------------------------------------------------------------------------
/// Convert SRT subtitle text to WebVTT format string.
///
/// - Removes BOM (`\u{FEFF}`) if present.
/// - Adds the `WEBVTT` header.
/// - Skips cue index numbers (lines that are just digits).
/// - Converts timestamp separators: comma -> dot.
/// - Collects subtitle text between timestamp lines and empty lines.
pub fn srt_to_vtt(srt_text: &str) -> String {
let text = srt_text.replace('\u{FEFF}', "");
let lines: Vec<&str> = text.lines().collect();
let mut out: Vec<String> = vec!["WEBVTT".to_string(), String::new()];
let mut i = 0;
while i < lines.len() {
let line = lines[i].trim_end_matches('\r');
// Empty line -> blank line in output
if line.trim().is_empty() {
out.push(String::new());
i += 1;
continue;
}
// Skip cue index (pure digit line)
if CUE_INDEX_RE.is_match(line.trim()) {
i += 1;
if i >= lines.len() {
break;
}
// Re-read the next line as a potential timestamp
let line = lines[i].trim_end_matches('\r');
if line.contains("-->") {
let ts_line = line.replace(',', ".");
out.push(ts_line);
i += 1;
// Collect subtitle text until blank line
while i < lines.len() {
let t = lines[i].trim_end_matches('\r');
if t.trim().is_empty() {
out.push(String::new());
i += 1;
break;
}
out.push(t.to_string());
i += 1;
}
} else {
i += 1;
}
} else if line.contains("-->") {
// Timestamp line without preceding cue index
let ts_line = line.replace(',', ".");
out.push(ts_line);
i += 1;
while i < lines.len() {
let t = lines[i].trim_end_matches('\r');
if t.trim().is_empty() {
out.push(String::new());
i += 1;
break;
}
out.push(t.to_string());
i += 1;
}
} else {
i += 1;
}
}
let joined = out.join("\n");
format!("{}\n", joined.trim())
}
// ---------------------------------------------------------------------------
// 2. auto_subtitle_sidecar (helpers)
// ---------------------------------------------------------------------------
/// Normalize a string for fuzzy subtitle matching: lowercase, replace `-` and
/// `_` with space, collapse whitespace.
fn normalize_stem(s: &str) -> String {
let lower = s.to_lowercase();
let replaced = NORMALIZE_SEP_RE.replace_all(&lower, " ");
replaced.trim().to_string()
}
/// Strip a trailing language suffix from a subtitle stem.
///
/// For example, `"video.en"` -> `Some(("video", "en"))`.
/// Returns `None` if no known language suffix is found.
fn strip_lang_suffix(stem: &str) -> Option<(String, String)> {
if let Some(dot_pos) = stem.rfind('.') {
let base = &stem[..dot_pos];
let suffix = &stem[dot_pos + 1..];
let suffix_lower = suffix.to_lowercase();
if ALL_LANG_SUFFIXES.contains(&suffix_lower.as_str()) {
return Some((base.to_string(), suffix_lower));
}
}
None
}
/// Find a subtitle sidecar file matching the given video path.
///
/// Returns the best matching subtitle file path, or `None`.
///
/// Priority (lower is better):
/// - 0: Exact stem match (case-insensitive)
/// - 1: Normalized exact match
/// - 2: English language suffix with exact base
/// - 3: English language suffix with normalized base
/// - 4: Other language suffix with exact base
/// - 5: Other/no language with normalized base
pub fn auto_subtitle_sidecar(video_path: &Path) -> Option<PathBuf> {
let parent = video_path.parent()?;
let video_stem = video_path.file_stem()?.to_string_lossy().to_string();
let video_stem_lower = video_stem.to_lowercase();
let video_stem_norm = normalize_stem(&video_stem);
// Collect all subtitle files in the same directory.
let entries = fs::read_dir(parent).ok()?;
let mut best: Option<(u8, PathBuf)> = None;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let fname = match path.file_name() {
Some(n) => n.to_string_lossy().to_string(),
None => continue,
};
let fname_lower = fname.to_lowercase();
// Must end with a supported subtitle extension.
let is_sub = SUB_EXTS
.iter()
.any(|ext| fname_lower.ends_with(ext));
if !is_sub {
continue;
}
// Extract the stem (without the subtitle extension).
let sub_stem = match path.file_stem() {
Some(s) => s.to_string_lossy().to_string(),
None => continue,
};
let sub_stem_lower = sub_stem.to_lowercase();
// Priority 0: exact stem match (case-insensitive).
if sub_stem_lower == video_stem_lower {
let priority = 0u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
// Check for language suffix.
if let Some((base, lang)) = strip_lang_suffix(&sub_stem) {
let base_lower = base.to_lowercase();
let base_norm = normalize_stem(&base);
let is_english = ENGLISH_LANGS.contains(&lang.as_str());
if is_english {
// Priority 2: English suffix, exact base.
if base_lower == video_stem_lower {
let priority = 2u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
// Priority 3: English suffix, normalized base.
if base_norm == video_stem_norm {
let priority = 3u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
} else {
// Priority 4: Other language suffix, exact base.
if base_lower == video_stem_lower {
let priority = 4u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
// Priority 5: Other language suffix, normalized base.
if base_norm == video_stem_norm {
let priority = 5u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
continue;
}
}
}
// Priority 1: Normalized match (no language suffix).
let sub_stem_norm = normalize_stem(&sub_stem);
if sub_stem_norm == video_stem_norm {
let priority = 1u8;
if best.as_ref().map_or(true, |(bp, _)| priority < *bp) {
best = Some((priority, path.clone()));
}
}
// Priority 5 fallback: normalized match for subtitle files whose
// language suffix was not recognised above (handled by the
// strip_lang_suffix branch already for known languages).
}
best.map(|(_, p)| p)
}
// ---------------------------------------------------------------------------
// 3. store_subtitle_for_fid
// ---------------------------------------------------------------------------
/// Sanitize a filename component: replace non-alphanumeric chars (except
/// `._-`) with `_`, then truncate to 60 characters.
fn sanitize_name(name: &str) -> String {
let sanitized = SANITIZE_RE.replace_all(name, "_");
let s = sanitized.as_ref();
if s.len() > 60 {
s[..60].to_string()
} else {
s.to_string()
}
}
/// Store a subtitle file for a given fid. Converts SRT->VTT if needed.
///
/// The output file is written as `{fid}_{sanitized_name}.vtt` inside
/// `subs_dir`. Returns `SubtitleStored` with the relative path (from the
/// parent of `subs_dir`) and a display label.
///
/// Returns `None` if the source file extension is not supported or reading
/// the source fails.
pub fn store_subtitle_for_fid(
fid: &str,
src_path: &Path,
subs_dir: &Path,
) -> Option<SubtitleStored> {
let ext_lower = src_path
.extension()
.map(|e| format!(".{}", e.to_string_lossy().to_lowercase()))?;
if !SUB_EXTS.contains(&ext_lower.as_str()) {
return None;
}
let src_filename = src_path
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_default();
let src_stem = src_path
.file_stem()
.map(|s| s.to_string_lossy().to_string())
.unwrap_or_else(|| "subtitle".to_string());
let sanitized = sanitize_name(&src_stem);
let out_name = format!("{}_{}.vtt", fid, sanitized);
// Ensure subs_dir exists.
let _ = fs::create_dir_all(subs_dir);
let out_path = subs_dir.join(&out_name);
let content = fs::read_to_string(src_path).ok()?;
let vtt_content = if ext_lower == ".srt" {
srt_to_vtt(&content)
} else {
// Already VTT - use as-is.
content
};
fs::write(&out_path, vtt_content.as_bytes()).ok()?;
// Build relative path: "subtitles/{out_name}".
let subs_dir_name = subs_dir
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "subtitles".to_string());
let vtt_rel = format!("{}/{}", subs_dir_name, out_name);
Some(SubtitleStored {
vtt: vtt_rel,
label: src_filename,
})
}
// ---------------------------------------------------------------------------
// 4. extract_embedded_subtitle
// ---------------------------------------------------------------------------
/// Extract an embedded subtitle track from a video using ffmpeg.
///
/// Runs: `ffmpeg -y -i {video_path} -map 0:{track_index} -c:s webvtt {output_path}`
///
/// The output file is `{fid}_embedded_{track_index}.vtt` inside `subs_dir`.
/// On Windows, the process is created with `CREATE_NO_WINDOW`.
///
/// Returns `SubtitleStored` on success, or an error message string.
pub fn extract_embedded_subtitle(
video_path: &Path,
track_index: u32,
ffmpeg_path: &Path,
subs_dir: &Path,
fid: &str,
) -> Result<SubtitleStored, String> {
let _ = fs::create_dir_all(subs_dir);
let out_name = format!("{}_embedded_{}.vtt", fid, track_index);
let out_path = subs_dir.join(&out_name);
let mut cmd = Command::new(ffmpeg_path);
cmd.args([
"-y",
"-i",
&video_path.to_string_lossy(),
"-map",
&format!("0:{}", track_index),
"-c:s",
"webvtt",
&out_path.to_string_lossy(),
]);
#[cfg(target_os = "windows")]
{
cmd.creation_flags(CREATE_NO_WINDOW);
}
let output = cmd
.output()
.map_err(|e| format!("Failed to run ffmpeg: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
return Err(format!(
"ffmpeg exited with status {}: {}",
output.status, stderr
));
}
if !out_path.exists() {
return Err("ffmpeg did not produce an output file".to_string());
}
let subs_dir_name = subs_dir
.file_name()
.map(|n| n.to_string_lossy().to_string())
.unwrap_or_else(|| "subtitles".to_string());
let vtt_rel = format!("{}/{}", subs_dir_name, out_name);
Ok(SubtitleStored {
vtt: vtt_rel,
label: format!("Embedded track {}", track_index),
})
}
// ===========================================================================
// Tests
// ===========================================================================
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
// -- srt_to_vtt ----------------------------------------------------------
#[test]
fn test_srt_to_vtt_basic() {
let srt = "\
1
00:00:01,000 --> 00:00:04,000
Hello, world!
2
00:00:05,000 --> 00:00:08,000
This is a test.
";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT\n"));
assert!(vtt.contains("00:00:01.000 --> 00:00:04.000"));
assert!(vtt.contains("Hello, world!"));
assert!(vtt.contains("00:00:05.000 --> 00:00:08.000"));
assert!(vtt.contains("This is a test."));
// Timestamp commas must be converted to dots.
assert!(!vtt.contains("00:00:01,000"));
assert!(!vtt.contains("00:00:04,000"));
}
#[test]
fn test_srt_to_vtt_bom() {
let srt = "\u{FEFF}1\n00:00:01,000 --> 00:00:02,000\nHello\n";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT"));
// BOM must be removed.
assert!(!vtt.contains('\u{FEFF}'));
assert!(vtt.contains("Hello"));
}
#[test]
fn test_srt_to_vtt_empty() {
let vtt = srt_to_vtt("");
assert!(vtt.starts_with("WEBVTT"));
// Should be just the header.
assert_eq!(vtt.trim(), "WEBVTT");
}
#[test]
fn test_srt_to_vtt_windows_line_endings() {
let srt = "1\r\n00:00:01,000 --> 00:00:02,000\r\nHello\r\n\r\n\
2\r\n00:00:03,000 --> 00:00:04,000\r\nWorld\r\n";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT"));
assert!(vtt.contains("00:00:01.000 --> 00:00:02.000"));
assert!(vtt.contains("Hello"));
assert!(vtt.contains("00:00:03.000 --> 00:00:04.000"));
assert!(vtt.contains("World"));
}
#[test]
fn test_srt_to_vtt_no_cue_indices() {
// Some SRT files omit cue numbers entirely.
let srt = "\
00:00:01,500 --> 00:00:03,500
First line
00:00:04,000 --> 00:00:06,000
Second line
";
let vtt = srt_to_vtt(srt);
assert!(vtt.starts_with("WEBVTT"));
assert!(vtt.contains("00:00:01.500 --> 00:00:03.500"));
assert!(vtt.contains("First line"));
assert!(vtt.contains("00:00:04.000 --> 00:00:06.000"));
assert!(vtt.contains("Second line"));
}
// -- auto_subtitle_sidecar -----------------------------------------------
#[test]
fn test_auto_subtitle_sidecar_exact_match() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
let sub = dir.path().join("lecture.srt");
fs::write(&video, b"video").unwrap();
fs::write(&sub, b"1\n00:00:00,000 --> 00:00:01,000\nhi\n").unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), sub);
}
#[test]
fn test_auto_subtitle_sidecar_english_suffix() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
let sub = dir.path().join("lecture.en.srt");
fs::write(&video, b"video").unwrap();
fs::write(&sub, b"sub content").unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), sub);
}
#[test]
fn test_auto_subtitle_sidecar_no_match() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
fs::write(&video, b"video").unwrap();
// No subtitle files at all.
let result = auto_subtitle_sidecar(&video);
assert!(result.is_none());
}
#[test]
fn test_auto_subtitle_sidecar_priority_order() {
let dir = TempDir::new().unwrap();
let video = dir.path().join("lecture.mp4");
fs::write(&video, b"video").unwrap();
// Priority 0: exact stem match.
let exact = dir.path().join("lecture.srt");
// Priority 2: English suffix with exact base.
let en_suffix = dir.path().join("lecture.en.srt");
// Priority 4: Other language suffix with exact base.
let fr_suffix = dir.path().join("lecture.fr.srt");
fs::write(&exact, b"exact").unwrap();
fs::write(&en_suffix, b"english").unwrap();
fs::write(&fr_suffix, b"french").unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
// Should pick priority 0 (exact match) over others.
assert_eq!(result.unwrap(), exact);
// Remove exact match -> should pick English suffix (priority 2).
fs::remove_file(&exact).unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), en_suffix);
// Remove English suffix -> should pick French suffix (priority 4).
fs::remove_file(&en_suffix).unwrap();
let result = auto_subtitle_sidecar(&video);
assert!(result.is_some());
assert_eq!(result.unwrap(), fr_suffix);
}
// -- store_subtitle_for_fid ----------------------------------------------
#[test]
fn test_store_subtitle_srt_converts_to_vtt() {
let dir = TempDir::new().unwrap();
let subs_dir = dir.path().join("subtitles");
let src = dir.path().join("my_sub.srt");
let srt_content = "1\n00:00:01,000 --> 00:00:02,000\nHello\n";
fs::write(&src, srt_content).unwrap();
let result = store_subtitle_for_fid("abc123", &src, &subs_dir);
assert!(result.is_some());
let stored = result.unwrap();
assert!(stored.vtt.ends_with(".vtt"));
assert!(stored.vtt.starts_with("subtitles/"));
assert_eq!(stored.label, "my_sub.srt");
// Verify the VTT output file was actually created and converted.
let out_path = subs_dir.join(format!("abc123_{}.vtt", "my_sub"));
assert!(out_path.exists());
let vtt_content = fs::read_to_string(&out_path).unwrap();
assert!(vtt_content.starts_with("WEBVTT"));
assert!(vtt_content.contains("00:00:01.000 --> 00:00:02.000"));
assert!(vtt_content.contains("Hello"));
}
#[test]
fn test_store_subtitle_vtt_copies() {
let dir = TempDir::new().unwrap();
let subs_dir = dir.path().join("subtitles");
let src = dir.path().join("my_sub.vtt");
let vtt_content = "WEBVTT\n\n00:00:01.000 --> 00:00:02.000\nHello\n";
fs::write(&src, vtt_content).unwrap();
let result = store_subtitle_for_fid("def456", &src, &subs_dir);
assert!(result.is_some());
let stored = result.unwrap();
assert!(stored.vtt.ends_with(".vtt"));
assert_eq!(stored.label, "my_sub.vtt");
// Verify the output file has the same content (not SRT-converted).
let out_path = subs_dir.join("def456_my_sub.vtt");
assert!(out_path.exists());
let content = fs::read_to_string(&out_path).unwrap();
assert_eq!(content, vtt_content);
}
#[test]
fn test_store_subtitle_unsupported_ext() {
let dir = TempDir::new().unwrap();
let subs_dir = dir.path().join("subtitles");
let src = dir.path().join("notes.txt");
fs::write(&src, "Some notes").unwrap();
let result = store_subtitle_for_fid("xyz789", &src, &subs_dir);
assert!(result.is_none());
}
}