Add GitHub metadata enrichment for catalog apps

This commit is contained in:
2026-02-28 16:49:13 +02:00
parent f22438d960
commit 848f4e7de7
15 changed files with 3027 additions and 224 deletions

View File

@@ -3,6 +3,7 @@ use std::io::Write;
use std::path::{Path, PathBuf};
use super::database::Database;
use super::github_enrichment;
/// A catalog source that can be synced to discover available AppImages.
#[derive(Debug, Clone)]
@@ -53,27 +54,71 @@ pub struct CatalogApp {
pub homepage: Option<String>,
pub file_size: Option<u64>,
pub architecture: Option<String>,
pub screenshots: Vec<String>,
pub license: Option<String>,
/// GitHub link URL from the feed (e.g. "https://github.com/user/repo")
pub github_link: Option<String>,
}
/// Default AppImageHub registry URL.
const APPIMAGEHUB_API_URL: &str = "https://appimage.github.io/feed.json";
/// Sync a catalog source - fetch the index and store entries in the database.
/// Progress updates sent during catalog sync.
#[derive(Debug, Clone)]
pub enum SyncProgress {
/// Fetching the feed from the remote source.
FetchingFeed,
/// Feed fetched, total number of apps found.
FeedFetched { total: u32 },
/// Caching icon for an app.
CachingIcon { current: u32, total: u32, app_name: String },
/// Saving apps to the database.
SavingApps { current: u32, total: u32 },
/// Sync complete.
Done { total: u32 },
}
pub fn sync_catalog(db: &Database, source: &CatalogSource) -> Result<u32, CatalogError> {
sync_catalog_with_progress(db, source, &|_| {})
}
pub fn sync_catalog_with_progress(
db: &Database,
source: &CatalogSource,
on_progress: &dyn Fn(SyncProgress),
) -> Result<u32, CatalogError> {
on_progress(SyncProgress::FetchingFeed);
let apps = match source.source_type {
CatalogType::AppImageHub => fetch_appimage_hub()?,
CatalogType::Custom => fetch_custom_catalog(&source.url)?,
CatalogType::GitHubSearch => {
// GitHub search requires a token and is more complex - stub for now
log::warn!("GitHub catalog search not yet implemented");
Vec::new()
}
};
let total = apps.len() as u32;
on_progress(SyncProgress::FeedFetched { total });
// Cache icons with progress reporting
let icon_count = cache_catalog_icons_with_progress(&apps, on_progress);
log::info!("Cached {} catalog icons", icon_count);
let source_id = source.id.ok_or(CatalogError::NoSourceId)?;
let mut count = 0u32;
for app in &apps {
count += 1;
on_progress(SyncProgress::SavingApps { current: count, total });
let screenshots_str = if app.screenshots.is_empty() {
None
} else {
Some(app.screenshots.join(";"))
};
db.insert_catalog_app(
source_id,
&app.name,
@@ -85,12 +130,25 @@ pub fn sync_catalog(db: &Database, source: &CatalogSource) -> Result<u32, Catalo
app.homepage.as_deref(),
app.file_size.map(|s| s as i64),
app.architecture.as_deref(),
screenshots_str.as_deref(),
app.license.as_deref(),
).ok();
count += 1;
// Extract and store GitHub owner/repo
if let Some((owner, repo)) = github_enrichment::extract_github_repo(
app.github_link.as_deref().or(app.homepage.as_deref()),
&app.download_url,
) {
// Get the app ID we just inserted/updated
if let Ok(Some(db_app)) = db.get_catalog_app_by_source_and_name(source_id, &app.name) {
db.update_catalog_app_github_repo(db_app, &owner, &repo).ok();
}
}
}
db.update_catalog_source_sync(source_id, count as i32).ok();
on_progress(SyncProgress::Done { total: count });
Ok(count)
}
@@ -152,9 +210,15 @@ fn fetch_appimage_hub() -> Result<Vec<CatalogApp>, CatalogError> {
let apps: Vec<CatalogApp> = feed.items.into_iter().filter_map(|item| {
// AppImageHub items need at least a name and a link
let name = item.name?;
let download_url = item.links.unwrap_or_default().into_iter()
let links = item.links.unwrap_or_default();
let download_url = links.iter()
.find(|l| l.r#type == "Download")
.map(|l| l.url)?;
.map(|l| l.url.clone())?;
// Extract GitHub link from feed links
let github_link = links.iter()
.find(|l| l.r#type.to_lowercase().contains("github"))
.map(|l| l.url.clone());
Some(CatalogApp {
name,
@@ -172,6 +236,9 @@ fn fetch_appimage_hub() -> Result<Vec<CatalogApp>, CatalogError> {
}),
file_size: None,
architecture: None,
screenshots: item.screenshots.unwrap_or_default().into_iter().flatten().collect(),
license: item.license,
github_link,
})
}).collect();
@@ -200,6 +267,9 @@ fn fetch_custom_catalog(url: &str) -> Result<Vec<CatalogApp>, CatalogError> {
homepage: item.homepage,
file_size: item.file_size,
architecture: item.architecture,
screenshots: Vec::new(),
license: None,
github_link: None,
}).collect())
}
@@ -226,6 +296,126 @@ pub fn get_sources(db: &Database) -> Vec<CatalogSource> {
}).collect()
}
/// Base URL for AppImageHub database assets (icons, screenshots).
pub const APPIMAGEHUB_DATABASE_URL: &str = "https://appimage.github.io/database/";
/// Get the icon cache directory, creating it if needed.
pub fn icon_cache_dir() -> PathBuf {
let dir = dirs::cache_dir()
.unwrap_or_else(|| PathBuf::from("/tmp"))
.join("driftwood")
.join("icons");
fs::create_dir_all(&dir).ok();
dir
}
/// Get the screenshot cache directory, creating it if needed.
pub fn screenshot_cache_dir() -> PathBuf {
let dir = dirs::cache_dir()
.unwrap_or_else(|| PathBuf::from("/tmp"))
.join("driftwood")
.join("screenshots");
fs::create_dir_all(&dir).ok();
dir
}
/// Resolve an asset path to a full URL (handles relative paths from AppImageHub).
fn resolve_asset_url(path: &str) -> String {
if path.starts_with("http://") || path.starts_with("https://") {
path.to_string()
} else {
format!("{}{}", APPIMAGEHUB_DATABASE_URL, path)
}
}
/// Download a file from a URL to a local path.
fn download_file(url: &str, dest: &Path) -> Result<(), CatalogError> {
let response = ureq::get(url)
.call()
.map_err(|e| CatalogError::Network(e.to_string()))?;
let mut file = fs::File::create(dest)
.map_err(|e| CatalogError::Io(e.to_string()))?;
let mut reader = response.into_body().into_reader();
let mut buf = [0u8; 65536];
loop {
let n = reader.read(&mut buf)
.map_err(|e| CatalogError::Network(e.to_string()))?;
if n == 0 { break; }
file.write_all(&buf[..n])
.map_err(|e| CatalogError::Io(e.to_string()))?;
}
Ok(())
}
/// Sanitize a name for use as a filename.
pub fn sanitize_filename(name: &str) -> String {
name.chars()
.map(|c| if c.is_alphanumeric() || c == '-' || c == '_' { c } else { '_' })
.collect()
}
/// Download icons for all catalog apps that have icon_url set.
/// Saves to ~/.cache/driftwood/icons/{sanitized_name}.png
fn cache_catalog_icons(apps: &[CatalogApp]) -> u32 {
cache_catalog_icons_with_progress(apps, &|_| {})
}
fn cache_catalog_icons_with_progress(apps: &[CatalogApp], on_progress: &dyn Fn(SyncProgress)) -> u32 {
let cache_dir = icon_cache_dir();
let mut count = 0u32;
let total = apps.len() as u32;
for (i, app) in apps.iter().enumerate() {
on_progress(SyncProgress::CachingIcon {
current: i as u32 + 1,
total,
app_name: app.name.clone(),
});
if let Some(ref icon_url) = app.icon_url {
let sanitized = sanitize_filename(&app.name);
let dest = cache_dir.join(format!("{}.png", sanitized));
// Skip if already cached
if dest.exists() {
count += 1;
continue;
}
let url = resolve_asset_url(icon_url);
match download_file(&url, &dest) {
Ok(_) => {
count += 1;
log::debug!("Cached icon for {}", app.name);
}
Err(e) => {
log::debug!("Failed to cache icon for {}: {}", app.name, e);
}
}
}
}
count
}
/// Download a screenshot to the cache. Returns the local path on success.
pub fn cache_screenshot(app_name: &str, screenshot_path: &str, index: usize) -> Result<PathBuf, CatalogError> {
let cache_dir = screenshot_cache_dir();
let sanitized = sanitize_filename(app_name);
let dest = cache_dir.join(format!("{}_{}.png", sanitized, index));
if dest.exists() {
return Ok(dest);
}
let url = resolve_asset_url(screenshot_path);
download_file(&url, &dest)?;
Ok(dest)
}
// --- AppImageHub feed format ---
#[derive(Debug, serde::Deserialize)]
@@ -241,6 +431,8 @@ struct AppImageHubItem {
authors: Option<Vec<AppImageHubAuthor>>,
links: Option<Vec<AppImageHubLink>>,
icons: Option<Vec<Option<String>>>,
screenshots: Option<Vec<Option<String>>>,
license: Option<String>,
}
#[derive(Debug, serde::Deserialize)]

View File

@@ -98,6 +98,16 @@ pub struct CatalogApp {
pub icon_url: Option<String>,
pub homepage: Option<String>,
pub license: Option<String>,
pub screenshots: Option<String>,
pub github_owner: Option<String>,
pub github_repo: Option<String>,
pub github_stars: Option<i64>,
pub github_downloads: Option<i64>,
pub latest_version: Option<String>,
pub release_date: Option<String>,
pub github_enriched_at: Option<String>,
pub github_download_url: Option<String>,
pub github_release_assets: Option<String>,
}
#[derive(Debug, Clone)]
@@ -400,6 +410,22 @@ impl Database {
self.migrate_to_v11()?;
}
if current_version < 12 {
self.migrate_to_v12()?;
}
if current_version < 13 {
self.migrate_to_v13()?;
}
if current_version < 14 {
self.migrate_to_v14()?;
}
if current_version < 15 {
self.migrate_to_v15()?;
}
// Ensure all expected columns exist (repairs DBs where a migration
// was updated after it had already run on this database)
self.ensure_columns()?;
@@ -838,6 +864,72 @@ impl Database {
Ok(())
}
fn migrate_to_v12(&self) -> SqlResult<()> {
let new_columns = [
"screenshots TEXT",
"license TEXT",
];
for col in &new_columns {
let sql = format!("ALTER TABLE catalog_apps ADD COLUMN {}", col);
self.conn.execute(&sql, []).ok();
}
self.conn.execute(
"UPDATE schema_version SET version = ?1",
params![12],
)?;
Ok(())
}
fn migrate_to_v13(&self) -> SqlResult<()> {
// Remove duplicate catalog_apps entries, keeping the row with the highest id
// (most recent insert) per (source_id, name) pair
self.conn.execute_batch(
"DELETE FROM catalog_apps WHERE id NOT IN (
SELECT MAX(id) FROM catalog_apps GROUP BY source_id, name
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_catalog_apps_source_name
ON catalog_apps(source_id, name);
UPDATE schema_version SET version = 13;"
)?;
Ok(())
}
fn migrate_to_v14(&self) -> SqlResult<()> {
let new_columns = [
"github_owner TEXT",
"github_repo TEXT",
"github_stars INTEGER",
"github_downloads INTEGER",
"release_date TEXT",
"github_enriched_at TEXT",
];
for col in &new_columns {
let sql = format!("ALTER TABLE catalog_apps ADD COLUMN {}", col);
self.conn.execute(&sql, []).ok();
}
self.conn.execute(
"UPDATE schema_version SET version = ?1",
params![14],
)?;
Ok(())
}
fn migrate_to_v15(&self) -> SqlResult<()> {
let new_columns = [
"github_download_url TEXT",
"github_release_assets TEXT",
];
for col in &new_columns {
let sql = format!("ALTER TABLE catalog_apps ADD COLUMN {}", col);
self.conn.execute(&sql, []).ok();
}
self.conn.execute(
"UPDATE schema_version SET version = ?1",
params![15],
)?;
Ok(())
}
pub fn upsert_appimage(
&self,
path: &str,
@@ -2069,7 +2161,8 @@ impl Database {
limit: i32,
) -> SqlResult<Vec<CatalogApp>> {
let mut sql = String::from(
"SELECT id, name, description, categories, download_url, icon_url, homepage, architecture
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
FROM catalog_apps WHERE 1=1"
);
let mut params_list: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
@@ -2101,6 +2194,16 @@ impl Database {
icon_url: row.get(5)?,
homepage: row.get(6)?,
license: row.get(7)?,
screenshots: row.get(8)?,
github_owner: row.get(9)?,
github_repo: row.get(10)?,
github_stars: row.get(11)?,
github_downloads: row.get(12)?,
latest_version: row.get(13)?,
release_date: row.get(14)?,
github_enriched_at: row.get(15)?,
github_download_url: row.get(16)?,
github_release_assets: row.get(17)?,
})
})?;
@@ -2113,7 +2216,8 @@ impl Database {
pub fn get_catalog_app(&self, id: i64) -> SqlResult<Option<CatalogApp>> {
let result = self.conn.query_row(
"SELECT id, name, description, categories, download_url, icon_url, homepage, architecture
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
FROM catalog_apps WHERE id = ?1",
params![id],
|row| {
@@ -2126,6 +2230,16 @@ impl Database {
icon_url: row.get(5)?,
homepage: row.get(6)?,
license: row.get(7)?,
screenshots: row.get(8)?,
github_owner: row.get(9)?,
github_repo: row.get(10)?,
github_stars: row.get(11)?,
github_downloads: row.get(12)?,
latest_version: row.get(13)?,
release_date: row.get(14)?,
github_enriched_at: row.get(15)?,
github_download_url: row.get(16)?,
github_release_assets: row.get(17)?,
})
},
);
@@ -2136,6 +2250,65 @@ impl Database {
}
}
/// Get featured catalog apps. Apps with GitHub stars sort first (by stars desc),
/// then unenriched apps get a deterministic shuffle that rotates every 15 minutes.
pub fn get_featured_catalog_apps(&self, limit: i32) -> SqlResult<Vec<CatalogApp>> {
// Time seed rotates every 15 minutes (900 seconds)
let time_seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs() / 900;
let mut stmt = self.conn.prepare(
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
FROM catalog_apps
WHERE icon_url IS NOT NULL AND icon_url != ''
AND description IS NOT NULL AND description != ''
AND screenshots IS NOT NULL AND screenshots != ''"
)?;
let rows = stmt.query_map([], |row| {
Ok(CatalogApp {
id: row.get(0)?,
name: row.get(1)?,
description: row.get(2)?,
categories: row.get(3)?,
download_url: row.get(4)?,
icon_url: row.get(5)?,
homepage: row.get(6)?,
license: row.get(7)?,
screenshots: row.get(8)?,
github_owner: row.get(9)?,
github_repo: row.get(10)?,
github_stars: row.get(11)?,
github_downloads: row.get(12)?,
latest_version: row.get(13)?,
release_date: row.get(14)?,
github_enriched_at: row.get(15)?,
github_download_url: row.get(16)?,
github_release_assets: row.get(17)?,
})
})?;
let mut apps: Vec<CatalogApp> = rows.collect::<SqlResult<Vec<_>>>()?;
// Enriched apps (with stars) sort first by stars descending,
// unenriched apps get the deterministic shuffle after them
apps.sort_by(|a, b| {
match (a.github_stars, b.github_stars) {
(Some(sa), Some(sb)) => sb.cmp(&sa),
(Some(_), None) => std::cmp::Ordering::Less,
(None, Some(_)) => std::cmp::Ordering::Greater,
(None, None) => {
let ha = (a.id as u64 ^ time_seed).wrapping_mul(0x517cc1b727220a95);
let hb = (b.id as u64 ^ time_seed).wrapping_mul(0x517cc1b727220a95);
ha.cmp(&hb)
}
}
});
apps.truncate(limit as usize);
Ok(apps)
}
pub fn get_catalog_categories(&self) -> SqlResult<Vec<(String, u32)>> {
let mut stmt = self.conn.prepare(
"SELECT categories FROM catalog_apps WHERE categories IS NOT NULL AND categories != ''"
@@ -2172,12 +2345,26 @@ impl Database {
homepage: Option<&str>,
file_size: Option<i64>,
architecture: Option<&str>,
screenshots: Option<&str>,
license: Option<&str>,
) -> SqlResult<()> {
self.conn.execute(
"INSERT OR REPLACE INTO catalog_apps
(source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture, cached_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, datetime('now'))",
params![source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture],
"INSERT INTO catalog_apps
(source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture, screenshots, license, cached_at)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, datetime('now'))
ON CONFLICT(source_id, name) DO UPDATE SET
description = COALESCE(excluded.description, description),
categories = COALESCE(excluded.categories, categories),
latest_version = COALESCE(excluded.latest_version, latest_version),
download_url = excluded.download_url,
icon_url = COALESCE(excluded.icon_url, icon_url),
homepage = COALESCE(excluded.homepage, homepage),
file_size = COALESCE(excluded.file_size, file_size),
architecture = COALESCE(excluded.architecture, architecture),
screenshots = COALESCE(excluded.screenshots, screenshots),
license = COALESCE(excluded.license, license),
cached_at = datetime('now')",
params![source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture, screenshots, license],
)?;
Ok(())
}
@@ -2232,6 +2419,125 @@ impl Database {
})?;
rows.collect()
}
pub fn get_catalog_app_by_source_and_name(&self, source_id: i64, name: &str) -> SqlResult<Option<i64>> {
let result = self.conn.query_row(
"SELECT id FROM catalog_apps WHERE source_id = ?1 AND name = ?2",
params![source_id, name],
|row| row.get(0),
);
match result {
Ok(id) => Ok(Some(id)),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e),
}
}
// --- GitHub enrichment methods ---
pub fn update_catalog_app_github_repo(
&self,
app_id: i64,
owner: &str,
repo: &str,
) -> SqlResult<()> {
self.conn.execute(
"UPDATE catalog_apps SET github_owner = ?2, github_repo = ?3 WHERE id = ?1",
params![app_id, owner, repo],
)?;
Ok(())
}
pub fn update_catalog_app_github_metadata(
&self,
app_id: i64,
stars: i64,
pushed_at: Option<&str>,
) -> SqlResult<()> {
self.conn.execute(
"UPDATE catalog_apps SET github_stars = ?2, github_enriched_at = datetime('now') WHERE id = ?1",
params![app_id, stars],
)?;
// Store pushed_at in release_date if no release info yet
if let Some(pushed) = pushed_at {
self.conn.execute(
"UPDATE catalog_apps SET release_date = COALESCE(release_date, ?2) WHERE id = ?1",
params![app_id, pushed],
)?;
}
Ok(())
}
pub fn update_catalog_app_release_info(
&self,
app_id: i64,
version: Option<&str>,
date: Option<&str>,
downloads: Option<i64>,
github_download_url: Option<&str>,
github_release_assets: Option<&str>,
) -> SqlResult<()> {
self.conn.execute(
"UPDATE catalog_apps SET
latest_version = COALESCE(?2, latest_version),
release_date = COALESCE(?3, release_date),
github_downloads = COALESCE(?4, github_downloads),
github_download_url = COALESCE(?5, github_download_url),
github_release_assets = COALESCE(?6, github_release_assets),
github_enriched_at = datetime('now')
WHERE id = ?1",
params![app_id, version, date, downloads, github_download_url, github_release_assets],
)?;
Ok(())
}
pub fn get_unenriched_catalog_apps(&self, limit: i32) -> SqlResult<Vec<CatalogApp>> {
let mut stmt = self.conn.prepare(
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
FROM catalog_apps
WHERE github_owner IS NOT NULL AND github_enriched_at IS NULL
ORDER BY id
LIMIT ?1"
)?;
let rows = stmt.query_map(params![limit], |row| {
Ok(CatalogApp {
id: row.get(0)?,
name: row.get(1)?,
description: row.get(2)?,
categories: row.get(3)?,
download_url: row.get(4)?,
icon_url: row.get(5)?,
homepage: row.get(6)?,
license: row.get(7)?,
screenshots: row.get(8)?,
github_owner: row.get(9)?,
github_repo: row.get(10)?,
github_stars: row.get(11)?,
github_downloads: row.get(12)?,
latest_version: row.get(13)?,
release_date: row.get(14)?,
github_enriched_at: row.get(15)?,
github_download_url: row.get(16)?,
github_release_assets: row.get(17)?,
})
})?;
rows.collect()
}
pub fn catalog_enrichment_progress(&self) -> SqlResult<(i64, i64)> {
let enriched: i64 = self.conn.query_row(
"SELECT COUNT(*) FROM catalog_apps WHERE github_owner IS NOT NULL AND github_enriched_at IS NOT NULL",
[],
|row| row.get(0),
)?;
let total_with_github: i64 = self.conn.query_row(
"SELECT COUNT(*) FROM catalog_apps WHERE github_owner IS NOT NULL",
[],
|row| row.get(0),
)?;
Ok((enriched, total_with_github))
}
}
#[cfg(test)]
@@ -2402,7 +2708,7 @@ mod tests {
[],
|row| row.get(0),
).unwrap();
assert_eq!(version, 11);
assert_eq!(version, 15);
// All tables that should exist after the full v1-v7 migration chain
let expected_tables = [

View File

@@ -0,0 +1,394 @@
use super::database::Database;
// --- API response structs ---
#[derive(Debug, serde::Deserialize)]
pub struct GitHubRepoInfo {
pub stargazers_count: i64,
pub pushed_at: Option<String>,
pub description: Option<String>,
}
#[derive(Debug, serde::Deserialize)]
pub struct GitHubReleaseInfo {
pub tag_name: String,
pub published_at: Option<String>,
pub assets: Vec<GitHubReleaseAsset>,
}
#[derive(Debug, serde::Deserialize)]
pub struct GitHubReleaseAsset {
pub name: String,
pub browser_download_url: String,
pub download_count: i64,
pub size: i64,
}
// --- URL parsing ---
/// Extract (owner, repo) from a GitHub URL.
/// Tries download_url first (most reliable for GitHub releases), then homepage.
pub fn extract_github_repo(homepage: Option<&str>, download_url: &str) -> Option<(String, String)> {
// Try download URL first - most AppImageHub entries point to GitHub releases
if let Some(pair) = parse_github_url(download_url) {
return Some(pair);
}
// Fallback to homepage
if let Some(hp) = homepage {
if let Some(pair) = parse_github_url(hp) {
return Some(pair);
}
}
None
}
/// Parse `github.com/{owner}/{repo}` from a URL, stripping .git suffix if present.
fn parse_github_url(url: &str) -> Option<(String, String)> {
let stripped = url.trim_start_matches("https://")
.trim_start_matches("http://");
if !stripped.starts_with("github.com/") {
return None;
}
let path = stripped.strip_prefix("github.com/")?;
let parts: Vec<&str> = path.splitn(3, '/').collect();
if parts.len() < 2 {
return None;
}
let owner = parts[0];
let repo = parts[1]
.trim_end_matches(".git")
.split('?').next().unwrap_or(parts[1]);
if owner.is_empty() || repo.is_empty() {
return None;
}
Some((owner.to_string(), repo.to_string()))
}
// --- API calls ---
fn github_get(url: &str, token: &str) -> Result<(String, u32), String> {
let mut req = ureq::get(url)
.header("Accept", "application/vnd.github+json")
.header("User-Agent", "Driftwood-AppImage-Manager");
if !token.is_empty() {
req = req.header("Authorization", &format!("Bearer {}", token));
}
let mut response = req.call()
.map_err(|e| format!("GitHub API error: {}", e))?;
// Parse rate limit header
let remaining: u32 = response.headers()
.get("x-ratelimit-remaining")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse().ok())
.unwrap_or(0);
let body = response.body_mut().read_to_string()
.map_err(|e| format!("Read error: {}", e))?;
Ok((body, remaining))
}
pub fn fetch_repo_info(owner: &str, repo: &str, token: &str) -> Result<(GitHubRepoInfo, u32), String> {
let url = format!("https://api.github.com/repos/{}/{}", owner, repo);
let (body, remaining) = github_get(&url, token)?;
let info: GitHubRepoInfo = serde_json::from_str(&body)
.map_err(|e| format!("Parse error: {}", e))?;
Ok((info, remaining))
}
pub fn fetch_release_info(owner: &str, repo: &str, token: &str) -> Result<(GitHubReleaseInfo, u32), String> {
let url = format!("https://api.github.com/repos/{}/{}/releases/latest", owner, repo);
let (body, remaining) = github_get(&url, token)?;
let info: GitHubReleaseInfo = serde_json::from_str(&body)
.map_err(|e| format!("Parse error: {}", e))?;
Ok((info, remaining))
}
// --- AppImage asset filtering ---
/// A simplified release asset for storage (JSON-serializable).
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct AppImageAsset {
pub name: String,
pub url: String,
pub size: i64,
}
/// Filter release assets to only AppImage files.
pub fn filter_appimage_assets(assets: &[GitHubReleaseAsset]) -> Vec<AppImageAsset> {
assets.iter()
.filter(|a| {
let lower = a.name.to_lowercase();
lower.ends_with(".appimage") || lower.ends_with(".appimage.zsync")
})
.filter(|a| !a.name.to_lowercase().ends_with(".zsync"))
.map(|a| AppImageAsset {
name: a.name.clone(),
url: a.browser_download_url.clone(),
size: a.size,
})
.collect()
}
/// Detect the current system architecture string as used in AppImage filenames.
pub fn detect_arch() -> &'static str {
#[cfg(target_arch = "x86_64")]
{ "x86_64" }
#[cfg(target_arch = "aarch64")]
{ "aarch64" }
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{ std::env::consts::ARCH }
}
/// Pick the best AppImage asset for the current architecture.
/// Returns the matching asset, or the first one if no arch match.
pub fn pick_best_asset(assets: &[AppImageAsset]) -> Option<&AppImageAsset> {
if assets.is_empty() {
return None;
}
let arch = detect_arch();
// Prefer exact arch match in filename
let arch_match = assets.iter().find(|a| {
let lower = a.name.to_lowercase();
lower.contains(&arch.to_lowercase())
});
arch_match.or(assets.first())
}
// --- Enrichment logic ---
/// Enrich a catalog app with repo-level info (stars, pushed_at).
pub fn enrich_app_repo_info(
db: &Database,
app_id: i64,
owner: &str,
repo: &str,
token: &str,
) -> Result<u32, String> {
let (info, remaining) = fetch_repo_info(owner, repo, token)?;
db.update_catalog_app_github_metadata(app_id, info.stargazers_count, info.pushed_at.as_deref())
.map_err(|e| format!("DB error: {}", e))?;
Ok(remaining)
}
/// Enrich a catalog app with release info (version, date, downloads, assets).
pub fn enrich_app_release_info(
db: &Database,
app_id: i64,
owner: &str,
repo: &str,
token: &str,
) -> Result<u32, String> {
let (info, remaining) = fetch_release_info(owner, repo, token)?;
// Clean version string (strip leading "v")
let version = info.tag_name.strip_prefix('v')
.unwrap_or(&info.tag_name)
.to_string();
// Sum download counts across all assets
let total_downloads: i64 = info.assets.iter().map(|a| a.download_count).sum();
// Extract AppImage assets and pick the best download URL
let appimage_assets = filter_appimage_assets(&info.assets);
let best_url = pick_best_asset(&appimage_assets).map(|a| a.url.as_str());
let assets_json = if appimage_assets.is_empty() {
None
} else {
serde_json::to_string(&appimage_assets).ok()
};
db.update_catalog_app_release_info(
app_id,
Some(&version),
info.published_at.as_deref(),
if total_downloads > 0 { Some(total_downloads) } else { None },
best_url,
assets_json.as_deref(),
).map_err(|e| format!("DB error: {}", e))?;
Ok(remaining)
}
/// Background enrichment: process a batch of unenriched apps.
/// Returns (count_enriched, should_continue).
pub fn background_enrich_batch(
db: &Database,
token: &str,
batch_size: i32,
on_progress: &dyn Fn(i64, i64),
) -> Result<(u32, bool), String> {
let apps = db.get_unenriched_catalog_apps(batch_size)
.map_err(|e| format!("DB error: {}", e))?;
if apps.is_empty() {
return Ok((0, false));
}
let mut enriched = 0u32;
for app in &apps {
let owner = match app.github_owner.as_deref() {
Some(o) => o,
None => continue,
};
let repo = match app.github_repo.as_deref() {
Some(r) => r,
None => continue,
};
match enrich_app_repo_info(db, app.id, owner, repo, token) {
Ok(remaining) => {
enriched += 1;
// Report progress
if let Ok((done, total)) = db.catalog_enrichment_progress() {
on_progress(done, total);
}
// Stop if rate limit is getting low
if remaining < 5 {
log::info!("GitHub rate limit low ({}), pausing enrichment", remaining);
return Ok((enriched, false));
}
}
Err(e) => {
log::warn!("Failed to enrich {}/{}: {}", owner, repo, e);
// Mark as enriched anyway so we don't retry forever
db.update_catalog_app_github_metadata(app.id, 0, None).ok();
}
}
// Sleep between calls to be respectful
std::thread::sleep(std::time::Duration::from_secs(1));
}
Ok((enriched, enriched > 0))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_github_repo_from_download() {
let result = extract_github_repo(
None,
"https://github.com/nickvdp/deno-spreadsheets/releases/download/v0.3.0/app.AppImage",
);
assert_eq!(result, Some(("nickvdp".to_string(), "deno-spreadsheets".to_string())));
}
#[test]
fn test_extract_github_repo_from_homepage() {
let result = extract_github_repo(
Some("https://github.com/nickvdp/deno-spreadsheets"),
"https://example.com/download.AppImage",
);
assert_eq!(result, Some(("nickvdp".to_string(), "deno-spreadsheets".to_string())));
}
#[test]
fn test_extract_github_repo_with_git_suffix() {
let result = extract_github_repo(
Some("https://github.com/user/repo.git"),
"https://example.com/download.AppImage",
);
assert_eq!(result, Some(("user".to_string(), "repo".to_string())));
}
#[test]
fn test_extract_github_repo_non_github() {
let result = extract_github_repo(
Some("https://gitlab.com/user/repo"),
"https://sourceforge.net/download.AppImage",
);
assert_eq!(result, None);
}
#[test]
fn test_parse_github_url_empty() {
assert_eq!(parse_github_url(""), None);
assert_eq!(parse_github_url("https://github.com/"), None);
assert_eq!(parse_github_url("https://github.com/user"), None);
}
#[test]
fn test_filter_appimage_assets() {
let assets = vec![
GitHubReleaseAsset {
name: "app-x86_64.AppImage".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/app-x86_64.AppImage".to_string(),
download_count: 100,
size: 50_000_000,
},
GitHubReleaseAsset {
name: "app-aarch64.AppImage".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/app-aarch64.AppImage".to_string(),
download_count: 20,
size: 48_000_000,
},
GitHubReleaseAsset {
name: "app-x86_64.AppImage.zsync".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/app-x86_64.AppImage.zsync".to_string(),
download_count: 5,
size: 1000,
},
GitHubReleaseAsset {
name: "source.tar.gz".to_string(),
browser_download_url: "https://github.com/u/r/releases/download/v1/source.tar.gz".to_string(),
download_count: 10,
size: 2_000_000,
},
];
let filtered = filter_appimage_assets(&assets);
assert_eq!(filtered.len(), 2);
assert_eq!(filtered[0].name, "app-x86_64.AppImage");
assert_eq!(filtered[1].name, "app-aarch64.AppImage");
}
#[test]
fn test_pick_best_asset_prefers_arch() {
let assets = vec![
AppImageAsset {
name: "app-aarch64.AppImage".to_string(),
url: "https://example.com/aarch64".to_string(),
size: 48_000_000,
},
AppImageAsset {
name: "app-x86_64.AppImage".to_string(),
url: "https://example.com/x86_64".to_string(),
size: 50_000_000,
},
];
let best = pick_best_asset(&assets).unwrap();
// On x86_64 systems this should pick x86_64, on aarch64 it picks aarch64
let arch = detect_arch();
assert!(best.name.contains(arch));
}
#[test]
fn test_pick_best_asset_empty() {
let assets: Vec<AppImageAsset> = vec![];
assert!(pick_best_asset(&assets).is_none());
}
#[test]
fn test_pick_best_asset_single() {
let assets = vec![
AppImageAsset {
name: "app.AppImage".to_string(),
url: "https://example.com/app".to_string(),
size: 50_000_000,
},
];
let best = pick_best_asset(&assets).unwrap();
assert_eq!(best.name, "app.AppImage");
}
}

View File

@@ -4,6 +4,7 @@ pub mod backup;
pub mod catalog;
pub mod database;
pub mod discovery;
pub mod github_enrichment;
pub mod duplicates;
pub mod footprint;
pub mod fuse;