Files
driftwood/src/core/github_enrichment.rs
lashman abb69dc753 Add tags, export/import, and changelog features
- Tag editor in detail view with add/remove pill chips
- Tag filter chips in library view for filtering by tag
- Shared backup module for app list export/import (JSON v2)
- CLI export/import refactored to use shared module
- GUI export/import via file picker dialogs in hamburger menu
- GitHub release history enrichment for catalog apps
- Changelog preview in updates view with expandable rows
- DB migration v19 for catalog release_history column
2026-03-01 01:01:43 +02:00

527 lines
17 KiB
Rust

use super::database::Database;
// --- API response structs ---
#[derive(Debug, serde::Deserialize)]
pub struct GitHubRepoInfo {
pub stargazers_count: i64,
pub pushed_at: Option<String>,
pub description: Option<String>,
}
#[derive(Debug, serde::Deserialize)]
pub struct GitHubReleaseInfo {
pub tag_name: String,
pub published_at: Option<String>,
pub assets: Vec<GitHubReleaseAsset>,
}
#[derive(Debug, serde::Deserialize)]
pub struct GitHubReleaseAsset {
pub name: String,
pub browser_download_url: String,
pub download_count: i64,
pub size: i64,
}
// --- URL parsing ---
/// Extract (owner, repo) from a GitHub URL.
/// Tries download_url first (most reliable for GitHub releases), then homepage.
pub fn extract_github_repo(homepage: Option<&str>, download_url: &str) -> Option<(String, String)> {
// Try download URL first - most AppImageHub entries point to GitHub releases
if let Some(pair) = parse_github_url(download_url) {
return Some(pair);
}
// Fallback to homepage
if let Some(hp) = homepage {
if let Some(pair) = parse_github_url(hp) {
return Some(pair);
}
}
None
}
/// Parse `github.com/{owner}/{repo}` from a URL, stripping .git suffix if present.
fn parse_github_url(url: &str) -> Option<(String, String)> {
let stripped = url.trim_start_matches("https://")
.trim_start_matches("http://");
if !stripped.starts_with("github.com/") {
return None;
}
let path = stripped.strip_prefix("github.com/")?;
let parts: Vec<&str> = path.splitn(3, '/').collect();
if parts.len() < 2 {
return None;
}
let owner = parts[0];
let repo = parts[1]
.trim_end_matches(".git")
.split('?').next().unwrap_or(parts[1]);
if owner.is_empty() || repo.is_empty() {
return None;
}
Some((owner.to_string(), repo.to_string()))
}
// --- API calls ---
fn github_get(url: &str, token: &str) -> Result<(String, u32), String> {
let mut req = ureq::get(url)
.header("Accept", "application/vnd.github+json")
.header("User-Agent", "Driftwood-AppImage-Manager");
if !token.is_empty() {
req = req.header("Authorization", &format!("Bearer {}", token));
}
let mut response = req.call()
.map_err(|e| format!("GitHub API error: {}", e))?;
// Parse rate limit header
let remaining: u32 = response.headers()
.get("x-ratelimit-remaining")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let body = response.body_mut().read_to_string()
.map_err(|e| format!("Read error: {}", e))?;
Ok((body, remaining))
}
pub fn fetch_repo_info(owner: &str, repo: &str, token: &str) -> Result<(GitHubRepoInfo, u32), String> {
let url = format!("https://api.github.com/repos/{}/{}", owner, repo);
let (body, remaining) = github_get(&url, token)?;
let info: GitHubRepoInfo = serde_json::from_str(&body)
.map_err(|e| format!("Parse error: {}", e))?;
Ok((info, remaining))
}
pub fn fetch_release_info(owner: &str, repo: &str, token: &str) -> Result<(GitHubReleaseInfo, u32), String> {
let url = format!("https://api.github.com/repos/{}/{}/releases/latest", owner, repo);
let (body, remaining) = github_get(&url, token)?;
let info: GitHubReleaseInfo = serde_json::from_str(&body)
.map_err(|e| format!("Parse error: {}", e))?;
Ok((info, remaining))
}
#[derive(Debug, serde::Deserialize)]
struct GitHubReadmeResponse {
content: String,
#[serde(default)]
encoding: String,
}
/// Fetch the README content for a repo (decoded from base64).
pub fn fetch_readme(owner: &str, repo: &str, token: &str) -> Result<(String, u32), String> {
let url = format!("https://api.github.com/repos/{}/{}/readme", owner, repo);
let (body, remaining) = github_get(&url, token)?;
let resp: GitHubReadmeResponse = serde_json::from_str(&body)
.map_err(|e| format!("Parse error: {}", e))?;
if resp.encoding != "base64" {
return Err(format!("Unexpected encoding: {}", resp.encoding));
}
// GitHub returns base64 with newlines; strip them before decoding
let clean = resp.content.replace('\n', "");
let decoded = base64_decode(&clean)
.map_err(|e| format!("Base64 decode error: {}", e))?;
let text = String::from_utf8(decoded)
.map_err(|e| format!("UTF-8 error: {}", e))?;
Ok((text, remaining))
}
/// Simple base64 decoder (standard alphabet, no padding required).
fn base64_decode(input: &str) -> Result<Vec<u8>, String> {
const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut output = Vec::with_capacity(input.len() * 3 / 4);
let mut buf = 0u32;
let mut bits = 0u32;
for &b in input.as_bytes() {
if b == b'=' { break; }
let val = TABLE.iter().position(|&c| c == b)
.ok_or_else(|| format!("Invalid base64 char: {}", b as char))? as u32;
buf = (buf << 6) | val;
bits += 6;
if bits >= 8 {
bits -= 8;
output.push((buf >> bits) as u8);
buf &= (1 << bits) - 1;
}
}
Ok(output)
}
// --- AppImage asset filtering ---
/// A simplified release asset for storage (JSON-serializable).
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct AppImageAsset {
pub name: String,
pub url: String,
pub size: i64,
}
/// Filter release assets to only AppImage files.
pub fn filter_appimage_assets(assets: &[GitHubReleaseAsset]) -> Vec<AppImageAsset> {
assets.iter()
.filter(|a| {
let lower = a.name.to_lowercase();
lower.ends_with(".appimage") || lower.ends_with(".appimage.zsync")
})
.filter(|a| !a.name.to_lowercase().ends_with(".zsync"))
.map(|a| AppImageAsset {
name: a.name.clone(),
url: a.browser_download_url.clone(),
size: a.size,
})
.collect()
}
/// Detect the current system architecture string as used in AppImage filenames.
pub fn detect_arch() -> &'static str {
#[cfg(target_arch = "x86_64")]
{ "x86_64" }
#[cfg(target_arch = "aarch64")]
{ "aarch64" }
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{ std::env::consts::ARCH }
}
/// Pick the best AppImage asset for the current architecture.
/// Returns the matching asset, or the first one if no arch match.
pub fn pick_best_asset(assets: &[AppImageAsset]) -> Option<&AppImageAsset> {
if assets.is_empty() {
return None;
}
let arch = detect_arch();
// Prefer exact arch match in filename
let arch_match = assets.iter().find(|a| {
let lower = a.name.to_lowercase();
lower.contains(&arch.to_lowercase())
});
arch_match.or(assets.first())
}
// --- Enrichment logic ---
/// Enrich a catalog app with repo-level info (stars, pushed_at, description).
pub fn enrich_app_repo_info(
db: &Database,
app_id: i64,
owner: &str,
repo: &str,
token: &str,
) -> Result<u32, String> {
let (info, remaining) = fetch_repo_info(owner, repo, token)?;
db.update_catalog_app_github_metadata(
app_id, info.stargazers_count, info.pushed_at.as_deref(), info.description.as_deref(),
).map_err(|e| format!("DB error: {}", e))?;
Ok(remaining)
}
/// Enrich a catalog app with release info (version, date, downloads, assets).
pub fn enrich_app_release_info(
db: &Database,
app_id: i64,
owner: &str,
repo: &str,
token: &str,
) -> Result<u32, String> {
let (info, remaining) = fetch_release_info(owner, repo, token)?;
// Clean version string (strip leading "v")
let version = info.tag_name.strip_prefix('v')
.unwrap_or(&info.tag_name)
.to_string();
// Sum download counts across all assets
let total_downloads: i64 = info.assets.iter().map(|a| a.download_count).sum();
// Extract AppImage assets and pick the best download URL
let appimage_assets = filter_appimage_assets(&info.assets);
let best_url = pick_best_asset(&appimage_assets).map(|a| a.url.as_str());
let assets_json = if appimage_assets.is_empty() {
None
} else {
serde_json::to_string(&appimage_assets).ok()
};
db.update_catalog_app_release_info(
app_id,
Some(&version),
info.published_at.as_deref(),
if total_downloads > 0 { Some(total_downloads) } else { None },
best_url,
assets_json.as_deref(),
).map_err(|e| format!("DB error: {}", e))?;
Ok(remaining)
}
/// A GitHub release with body text for changelog display.
#[derive(Debug, serde::Deserialize)]
struct GitHubRelease {
tag_name: String,
published_at: Option<String>,
body: Option<String>,
}
/// Fetch up to 10 recent releases for a repo.
fn fetch_recent_releases(owner: &str, repo: &str, token: &str) -> Result<(Vec<GitHubRelease>, u32), String> {
let url = format!("https://api.github.com/repos/{}/{}/releases?per_page=10", owner, repo);
let (body, remaining) = github_get(&url, token)?;
let releases: Vec<GitHubRelease> = serde_json::from_str(&body)
.map_err(|e| format!("Parse error: {}", e))?;
Ok((releases, remaining))
}
/// Enrich a catalog app with release history (version, date, description for last 10 releases).
/// Only populates if the existing release_history is empty.
pub fn enrich_app_release_history(
db: &Database,
app_id: i64,
owner: &str,
repo: &str,
token: &str,
) -> Result<u32, String> {
// Check if release_history is already populated (from AppStream or prior enrichment)
if let Ok(Some(app)) = db.get_catalog_app(app_id) {
if app.release_history.as_ref().is_some_and(|h| !h.is_empty()) {
return Ok(u32::MAX); // already has data, skip
}
}
let (releases, remaining) = fetch_recent_releases(owner, repo, token)?;
if releases.is_empty() {
return Ok(remaining);
}
// Convert to the same JSON format used by AppStream: [{version, date, description}]
let history: Vec<serde_json::Value> = releases.iter().map(|r| {
let version = r.tag_name.strip_prefix('v')
.unwrap_or(&r.tag_name)
.to_string();
let date = r.published_at.as_deref()
.and_then(|d| d.split('T').next())
.unwrap_or("");
let mut obj = serde_json::json!({
"version": version,
"date": date,
});
if let Some(ref body) = r.body {
if !body.is_empty() {
obj["description"] = serde_json::Value::String(body.clone());
}
}
obj
}).collect();
let json = serde_json::to_string(&history)
.map_err(|e| format!("JSON error: {}", e))?;
db.update_catalog_app_release_history(app_id, &json)
.map_err(|e| format!("DB error: {}", e))?;
Ok(remaining)
}
/// Fetch and store the README for a catalog app.
pub fn enrich_app_readme(
db: &Database,
app_id: i64,
owner: &str,
repo: &str,
token: &str,
) -> Result<u32, String> {
let (readme, remaining) = fetch_readme(owner, repo, token)?;
db.update_catalog_app_readme(app_id, &readme)
.map_err(|e| format!("DB error: {}", e))?;
Ok(remaining)
}
/// Background enrichment: process a batch of unenriched apps.
/// Returns (count_enriched, should_continue).
pub fn background_enrich_batch(
db: &Database,
token: &str,
batch_size: i32,
on_progress: &dyn Fn(i64, i64),
) -> Result<(u32, bool), String> {
let apps = db.get_unenriched_catalog_apps(batch_size)
.map_err(|e| format!("DB error: {}", e))?;
if apps.is_empty() {
return Ok((0, false));
}
let mut enriched = 0u32;
for app in &apps {
let owner = match app.github_owner.as_deref() {
Some(o) => o,
None => continue,
};
let repo = match app.github_repo.as_deref() {
Some(r) => r,
None => continue,
};
match enrich_app_repo_info(db, app.id, owner, repo, token) {
Ok(remaining) => {
enriched += 1;
// Also fetch release history (changelog data)
let _ = enrich_app_release_history(db, app.id, owner, repo, token);
// Report progress
if let Ok((done, total)) = db.catalog_enrichment_progress() {
on_progress(done, total);
}
// Stop if rate limit is getting low
if remaining < 5 {
log::info!("GitHub rate limit low ({}), pausing enrichment", remaining);
return Ok((enriched, false));
}
}
Err(e) => {
log::warn!("Failed to enrich {}/{}: {}", owner, repo, e);
// Mark as enriched anyway so we don't retry forever
db.update_catalog_app_github_metadata(app.id, 0, None, None).ok();
}
}
// Sleep between calls to be respectful
std::thread::sleep(std::time::Duration::from_secs(1));
}
Ok((enriched, enriched > 0))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_github_repo_from_download() {
let result = extract_github_repo(
None,
"https://github.com/nickvdp/deno-spreadsheets/releases/download/v0.3.0/app.AppImage",
);
assert_eq!(result, Some(("nickvdp".to_string(), "deno-spreadsheets".to_string())));
}
#[test]
fn test_extract_github_repo_from_homepage() {
let result = extract_github_repo(
Some("https://github.com/nickvdp/deno-spreadsheets"),
"https://example.com/download.AppImage",
);
assert_eq!(result, Some(("nickvdp".to_string(), "deno-spreadsheets".to_string())));
}
#[test]
fn test_extract_github_repo_with_git_suffix() {
let result = extract_github_repo(
Some("https://github.com/user/repo.git"),
"https://example.com/download.AppImage",
);
assert_eq!(result, Some(("user".to_string(), "repo".to_string())));
}
#[test]
fn test_extract_github_repo_non_github() {
let result = extract_github_repo(
Some("https://gitlab.com/user/repo"),
"https://sourceforge.net/download.AppImage",
);
assert_eq!(result, None);
}
#[test]
fn test_parse_github_url_empty() {
assert_eq!(parse_github_url(""), None);
assert_eq!(parse_github_url("https://github.com/"), None);
assert_eq!(parse_github_url("https://github.com/user"), None);
}
#[test]
fn test_filter_appimage_assets() {
let assets = vec![
GitHubReleaseAsset {
name: "app-x86_64.AppImage".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/app-x86_64.AppImage".to_string(),
download_count: 100,
size: 50_000_000,
},
GitHubReleaseAsset {
name: "app-aarch64.AppImage".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/app-aarch64.AppImage".to_string(),
download_count: 20,
size: 48_000_000,
},
GitHubReleaseAsset {
name: "app-x86_64.AppImage.zsync".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/app-x86_64.AppImage.zsync".to_string(),
download_count: 5,
size: 1000,
},
GitHubReleaseAsset {
name: "source.tar.gz".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/source.tar.gz".to_string(),
download_count: 10,
size: 2_000_000,
},
];
let filtered = filter_appimage_assets(&assets);
assert_eq!(filtered.len(), 2);
assert_eq!(filtered[0].name, "app-x86_64.AppImage");
assert_eq!(filtered[1].name, "app-aarch64.AppImage");
}
#[test]
fn test_pick_best_asset_prefers_arch() {
let assets = vec![
AppImageAsset {
name: "app-aarch64.AppImage".to_string(),
url: "https://example.com/aarch64".to_string(),
size: 48_000_000,
},
AppImageAsset {
name: "app-x86_64.AppImage".to_string(),
url: "https://example.com/x86_64".to_string(),
size: 50_000_000,
},
];
let best = pick_best_asset(&assets).unwrap();
// On x86_64 systems this should pick x86_64, on aarch64 it picks aarch64
let arch = detect_arch();
assert!(best.name.contains(arch));
}
#[test]
fn test_pick_best_asset_empty() {
let assets: Vec<AppImageAsset> = vec![];
assert!(pick_best_asset(&assets).is_none());
}
#[test]
fn test_pick_best_asset_single() {
let assets = vec![
AppImageAsset {
name: "app.AppImage".to_string(),
url: "https://example.com/app".to_string(),
size: 50_000_000,
},
];
let best = pick_best_asset(&assets).unwrap();
assert_eq!(best.name, "app.AppImage");
}
}