Add GitHub metadata enrichment for catalog apps
This commit is contained in:
@@ -3,6 +3,7 @@ use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use super::database::Database;
|
||||
use super::github_enrichment;
|
||||
|
||||
/// A catalog source that can be synced to discover available AppImages.
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -53,27 +54,71 @@ pub struct CatalogApp {
|
||||
pub homepage: Option<String>,
|
||||
pub file_size: Option<u64>,
|
||||
pub architecture: Option<String>,
|
||||
pub screenshots: Vec<String>,
|
||||
pub license: Option<String>,
|
||||
/// GitHub link URL from the feed (e.g. "https://github.com/user/repo")
|
||||
pub github_link: Option<String>,
|
||||
}
|
||||
|
||||
/// Default AppImageHub registry URL.
|
||||
const APPIMAGEHUB_API_URL: &str = "https://appimage.github.io/feed.json";
|
||||
|
||||
/// Sync a catalog source - fetch the index and store entries in the database.
|
||||
/// Progress updates sent during catalog sync.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SyncProgress {
|
||||
/// Fetching the feed from the remote source.
|
||||
FetchingFeed,
|
||||
/// Feed fetched, total number of apps found.
|
||||
FeedFetched { total: u32 },
|
||||
/// Caching icon for an app.
|
||||
CachingIcon { current: u32, total: u32, app_name: String },
|
||||
/// Saving apps to the database.
|
||||
SavingApps { current: u32, total: u32 },
|
||||
/// Sync complete.
|
||||
Done { total: u32 },
|
||||
}
|
||||
|
||||
pub fn sync_catalog(db: &Database, source: &CatalogSource) -> Result<u32, CatalogError> {
|
||||
sync_catalog_with_progress(db, source, &|_| {})
|
||||
}
|
||||
|
||||
pub fn sync_catalog_with_progress(
|
||||
db: &Database,
|
||||
source: &CatalogSource,
|
||||
on_progress: &dyn Fn(SyncProgress),
|
||||
) -> Result<u32, CatalogError> {
|
||||
on_progress(SyncProgress::FetchingFeed);
|
||||
|
||||
let apps = match source.source_type {
|
||||
CatalogType::AppImageHub => fetch_appimage_hub()?,
|
||||
CatalogType::Custom => fetch_custom_catalog(&source.url)?,
|
||||
CatalogType::GitHubSearch => {
|
||||
// GitHub search requires a token and is more complex - stub for now
|
||||
log::warn!("GitHub catalog search not yet implemented");
|
||||
Vec::new()
|
||||
}
|
||||
};
|
||||
|
||||
let total = apps.len() as u32;
|
||||
on_progress(SyncProgress::FeedFetched { total });
|
||||
|
||||
// Cache icons with progress reporting
|
||||
let icon_count = cache_catalog_icons_with_progress(&apps, on_progress);
|
||||
log::info!("Cached {} catalog icons", icon_count);
|
||||
|
||||
let source_id = source.id.ok_or(CatalogError::NoSourceId)?;
|
||||
let mut count = 0u32;
|
||||
|
||||
for app in &apps {
|
||||
count += 1;
|
||||
on_progress(SyncProgress::SavingApps { current: count, total });
|
||||
|
||||
let screenshots_str = if app.screenshots.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(app.screenshots.join(";"))
|
||||
};
|
||||
|
||||
db.insert_catalog_app(
|
||||
source_id,
|
||||
&app.name,
|
||||
@@ -85,12 +130,25 @@ pub fn sync_catalog(db: &Database, source: &CatalogSource) -> Result<u32, Catalo
|
||||
app.homepage.as_deref(),
|
||||
app.file_size.map(|s| s as i64),
|
||||
app.architecture.as_deref(),
|
||||
screenshots_str.as_deref(),
|
||||
app.license.as_deref(),
|
||||
).ok();
|
||||
count += 1;
|
||||
|
||||
// Extract and store GitHub owner/repo
|
||||
if let Some((owner, repo)) = github_enrichment::extract_github_repo(
|
||||
app.github_link.as_deref().or(app.homepage.as_deref()),
|
||||
&app.download_url,
|
||||
) {
|
||||
// Get the app ID we just inserted/updated
|
||||
if let Ok(Some(db_app)) = db.get_catalog_app_by_source_and_name(source_id, &app.name) {
|
||||
db.update_catalog_app_github_repo(db_app, &owner, &repo).ok();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
db.update_catalog_source_sync(source_id, count as i32).ok();
|
||||
|
||||
on_progress(SyncProgress::Done { total: count });
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
@@ -152,9 +210,15 @@ fn fetch_appimage_hub() -> Result<Vec<CatalogApp>, CatalogError> {
|
||||
let apps: Vec<CatalogApp> = feed.items.into_iter().filter_map(|item| {
|
||||
// AppImageHub items need at least a name and a link
|
||||
let name = item.name?;
|
||||
let download_url = item.links.unwrap_or_default().into_iter()
|
||||
let links = item.links.unwrap_or_default();
|
||||
let download_url = links.iter()
|
||||
.find(|l| l.r#type == "Download")
|
||||
.map(|l| l.url)?;
|
||||
.map(|l| l.url.clone())?;
|
||||
|
||||
// Extract GitHub link from feed links
|
||||
let github_link = links.iter()
|
||||
.find(|l| l.r#type.to_lowercase().contains("github"))
|
||||
.map(|l| l.url.clone());
|
||||
|
||||
Some(CatalogApp {
|
||||
name,
|
||||
@@ -172,6 +236,9 @@ fn fetch_appimage_hub() -> Result<Vec<CatalogApp>, CatalogError> {
|
||||
}),
|
||||
file_size: None,
|
||||
architecture: None,
|
||||
screenshots: item.screenshots.unwrap_or_default().into_iter().flatten().collect(),
|
||||
license: item.license,
|
||||
github_link,
|
||||
})
|
||||
}).collect();
|
||||
|
||||
@@ -200,6 +267,9 @@ fn fetch_custom_catalog(url: &str) -> Result<Vec<CatalogApp>, CatalogError> {
|
||||
homepage: item.homepage,
|
||||
file_size: item.file_size,
|
||||
architecture: item.architecture,
|
||||
screenshots: Vec::new(),
|
||||
license: None,
|
||||
github_link: None,
|
||||
}).collect())
|
||||
}
|
||||
|
||||
@@ -226,6 +296,126 @@ pub fn get_sources(db: &Database) -> Vec<CatalogSource> {
|
||||
}).collect()
|
||||
}
|
||||
|
||||
/// Base URL for AppImageHub database assets (icons, screenshots).
|
||||
pub const APPIMAGEHUB_DATABASE_URL: &str = "https://appimage.github.io/database/";
|
||||
|
||||
/// Get the icon cache directory, creating it if needed.
|
||||
pub fn icon_cache_dir() -> PathBuf {
|
||||
let dir = dirs::cache_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("/tmp"))
|
||||
.join("driftwood")
|
||||
.join("icons");
|
||||
fs::create_dir_all(&dir).ok();
|
||||
dir
|
||||
}
|
||||
|
||||
/// Get the screenshot cache directory, creating it if needed.
|
||||
pub fn screenshot_cache_dir() -> PathBuf {
|
||||
let dir = dirs::cache_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("/tmp"))
|
||||
.join("driftwood")
|
||||
.join("screenshots");
|
||||
fs::create_dir_all(&dir).ok();
|
||||
dir
|
||||
}
|
||||
|
||||
/// Resolve an asset path to a full URL (handles relative paths from AppImageHub).
|
||||
fn resolve_asset_url(path: &str) -> String {
|
||||
if path.starts_with("http://") || path.starts_with("https://") {
|
||||
path.to_string()
|
||||
} else {
|
||||
format!("{}{}", APPIMAGEHUB_DATABASE_URL, path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Download a file from a URL to a local path.
|
||||
fn download_file(url: &str, dest: &Path) -> Result<(), CatalogError> {
|
||||
let response = ureq::get(url)
|
||||
.call()
|
||||
.map_err(|e| CatalogError::Network(e.to_string()))?;
|
||||
|
||||
let mut file = fs::File::create(dest)
|
||||
.map_err(|e| CatalogError::Io(e.to_string()))?;
|
||||
|
||||
let mut reader = response.into_body().into_reader();
|
||||
let mut buf = [0u8; 65536];
|
||||
loop {
|
||||
let n = reader.read(&mut buf)
|
||||
.map_err(|e| CatalogError::Network(e.to_string()))?;
|
||||
if n == 0 { break; }
|
||||
file.write_all(&buf[..n])
|
||||
.map_err(|e| CatalogError::Io(e.to_string()))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Sanitize a name for use as a filename.
|
||||
pub fn sanitize_filename(name: &str) -> String {
|
||||
name.chars()
|
||||
.map(|c| if c.is_alphanumeric() || c == '-' || c == '_' { c } else { '_' })
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Download icons for all catalog apps that have icon_url set.
|
||||
/// Saves to ~/.cache/driftwood/icons/{sanitized_name}.png
|
||||
fn cache_catalog_icons(apps: &[CatalogApp]) -> u32 {
|
||||
cache_catalog_icons_with_progress(apps, &|_| {})
|
||||
}
|
||||
|
||||
fn cache_catalog_icons_with_progress(apps: &[CatalogApp], on_progress: &dyn Fn(SyncProgress)) -> u32 {
|
||||
let cache_dir = icon_cache_dir();
|
||||
let mut count = 0u32;
|
||||
let total = apps.len() as u32;
|
||||
|
||||
for (i, app) in apps.iter().enumerate() {
|
||||
on_progress(SyncProgress::CachingIcon {
|
||||
current: i as u32 + 1,
|
||||
total,
|
||||
app_name: app.name.clone(),
|
||||
});
|
||||
|
||||
if let Some(ref icon_url) = app.icon_url {
|
||||
let sanitized = sanitize_filename(&app.name);
|
||||
let dest = cache_dir.join(format!("{}.png", sanitized));
|
||||
|
||||
// Skip if already cached
|
||||
if dest.exists() {
|
||||
count += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let url = resolve_asset_url(icon_url);
|
||||
match download_file(&url, &dest) {
|
||||
Ok(_) => {
|
||||
count += 1;
|
||||
log::debug!("Cached icon for {}", app.name);
|
||||
}
|
||||
Err(e) => {
|
||||
log::debug!("Failed to cache icon for {}: {}", app.name, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
count
|
||||
}
|
||||
|
||||
/// Download a screenshot to the cache. Returns the local path on success.
|
||||
pub fn cache_screenshot(app_name: &str, screenshot_path: &str, index: usize) -> Result<PathBuf, CatalogError> {
|
||||
let cache_dir = screenshot_cache_dir();
|
||||
let sanitized = sanitize_filename(app_name);
|
||||
let dest = cache_dir.join(format!("{}_{}.png", sanitized, index));
|
||||
|
||||
if dest.exists() {
|
||||
return Ok(dest);
|
||||
}
|
||||
|
||||
let url = resolve_asset_url(screenshot_path);
|
||||
download_file(&url, &dest)?;
|
||||
Ok(dest)
|
||||
}
|
||||
|
||||
// --- AppImageHub feed format ---
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
@@ -241,6 +431,8 @@ struct AppImageHubItem {
|
||||
authors: Option<Vec<AppImageHubAuthor>>,
|
||||
links: Option<Vec<AppImageHubLink>>,
|
||||
icons: Option<Vec<Option<String>>>,
|
||||
screenshots: Option<Vec<Option<String>>>,
|
||||
license: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
|
||||
@@ -98,6 +98,16 @@ pub struct CatalogApp {
|
||||
pub icon_url: Option<String>,
|
||||
pub homepage: Option<String>,
|
||||
pub license: Option<String>,
|
||||
pub screenshots: Option<String>,
|
||||
pub github_owner: Option<String>,
|
||||
pub github_repo: Option<String>,
|
||||
pub github_stars: Option<i64>,
|
||||
pub github_downloads: Option<i64>,
|
||||
pub latest_version: Option<String>,
|
||||
pub release_date: Option<String>,
|
||||
pub github_enriched_at: Option<String>,
|
||||
pub github_download_url: Option<String>,
|
||||
pub github_release_assets: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -400,6 +410,22 @@ impl Database {
|
||||
self.migrate_to_v11()?;
|
||||
}
|
||||
|
||||
if current_version < 12 {
|
||||
self.migrate_to_v12()?;
|
||||
}
|
||||
|
||||
if current_version < 13 {
|
||||
self.migrate_to_v13()?;
|
||||
}
|
||||
|
||||
if current_version < 14 {
|
||||
self.migrate_to_v14()?;
|
||||
}
|
||||
|
||||
if current_version < 15 {
|
||||
self.migrate_to_v15()?;
|
||||
}
|
||||
|
||||
// Ensure all expected columns exist (repairs DBs where a migration
|
||||
// was updated after it had already run on this database)
|
||||
self.ensure_columns()?;
|
||||
@@ -838,6 +864,72 @@ impl Database {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn migrate_to_v12(&self) -> SqlResult<()> {
|
||||
let new_columns = [
|
||||
"screenshots TEXT",
|
||||
"license TEXT",
|
||||
];
|
||||
for col in &new_columns {
|
||||
let sql = format!("ALTER TABLE catalog_apps ADD COLUMN {}", col);
|
||||
self.conn.execute(&sql, []).ok();
|
||||
}
|
||||
self.conn.execute(
|
||||
"UPDATE schema_version SET version = ?1",
|
||||
params![12],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn migrate_to_v13(&self) -> SqlResult<()> {
|
||||
// Remove duplicate catalog_apps entries, keeping the row with the highest id
|
||||
// (most recent insert) per (source_id, name) pair
|
||||
self.conn.execute_batch(
|
||||
"DELETE FROM catalog_apps WHERE id NOT IN (
|
||||
SELECT MAX(id) FROM catalog_apps GROUP BY source_id, name
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_catalog_apps_source_name
|
||||
ON catalog_apps(source_id, name);
|
||||
UPDATE schema_version SET version = 13;"
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn migrate_to_v14(&self) -> SqlResult<()> {
|
||||
let new_columns = [
|
||||
"github_owner TEXT",
|
||||
"github_repo TEXT",
|
||||
"github_stars INTEGER",
|
||||
"github_downloads INTEGER",
|
||||
"release_date TEXT",
|
||||
"github_enriched_at TEXT",
|
||||
];
|
||||
for col in &new_columns {
|
||||
let sql = format!("ALTER TABLE catalog_apps ADD COLUMN {}", col);
|
||||
self.conn.execute(&sql, []).ok();
|
||||
}
|
||||
self.conn.execute(
|
||||
"UPDATE schema_version SET version = ?1",
|
||||
params![14],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn migrate_to_v15(&self) -> SqlResult<()> {
|
||||
let new_columns = [
|
||||
"github_download_url TEXT",
|
||||
"github_release_assets TEXT",
|
||||
];
|
||||
for col in &new_columns {
|
||||
let sql = format!("ALTER TABLE catalog_apps ADD COLUMN {}", col);
|
||||
self.conn.execute(&sql, []).ok();
|
||||
}
|
||||
self.conn.execute(
|
||||
"UPDATE schema_version SET version = ?1",
|
||||
params![15],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn upsert_appimage(
|
||||
&self,
|
||||
path: &str,
|
||||
@@ -2069,7 +2161,8 @@ impl Database {
|
||||
limit: i32,
|
||||
) -> SqlResult<Vec<CatalogApp>> {
|
||||
let mut sql = String::from(
|
||||
"SELECT id, name, description, categories, download_url, icon_url, homepage, architecture
|
||||
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
|
||||
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
|
||||
FROM catalog_apps WHERE 1=1"
|
||||
);
|
||||
let mut params_list: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
|
||||
@@ -2101,6 +2194,16 @@ impl Database {
|
||||
icon_url: row.get(5)?,
|
||||
homepage: row.get(6)?,
|
||||
license: row.get(7)?,
|
||||
screenshots: row.get(8)?,
|
||||
github_owner: row.get(9)?,
|
||||
github_repo: row.get(10)?,
|
||||
github_stars: row.get(11)?,
|
||||
github_downloads: row.get(12)?,
|
||||
latest_version: row.get(13)?,
|
||||
release_date: row.get(14)?,
|
||||
github_enriched_at: row.get(15)?,
|
||||
github_download_url: row.get(16)?,
|
||||
github_release_assets: row.get(17)?,
|
||||
})
|
||||
})?;
|
||||
|
||||
@@ -2113,7 +2216,8 @@ impl Database {
|
||||
|
||||
pub fn get_catalog_app(&self, id: i64) -> SqlResult<Option<CatalogApp>> {
|
||||
let result = self.conn.query_row(
|
||||
"SELECT id, name, description, categories, download_url, icon_url, homepage, architecture
|
||||
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
|
||||
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
|
||||
FROM catalog_apps WHERE id = ?1",
|
||||
params![id],
|
||||
|row| {
|
||||
@@ -2126,6 +2230,16 @@ impl Database {
|
||||
icon_url: row.get(5)?,
|
||||
homepage: row.get(6)?,
|
||||
license: row.get(7)?,
|
||||
screenshots: row.get(8)?,
|
||||
github_owner: row.get(9)?,
|
||||
github_repo: row.get(10)?,
|
||||
github_stars: row.get(11)?,
|
||||
github_downloads: row.get(12)?,
|
||||
latest_version: row.get(13)?,
|
||||
release_date: row.get(14)?,
|
||||
github_enriched_at: row.get(15)?,
|
||||
github_download_url: row.get(16)?,
|
||||
github_release_assets: row.get(17)?,
|
||||
})
|
||||
},
|
||||
);
|
||||
@@ -2136,6 +2250,65 @@ impl Database {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get featured catalog apps. Apps with GitHub stars sort first (by stars desc),
|
||||
/// then unenriched apps get a deterministic shuffle that rotates every 15 minutes.
|
||||
pub fn get_featured_catalog_apps(&self, limit: i32) -> SqlResult<Vec<CatalogApp>> {
|
||||
// Time seed rotates every 15 minutes (900 seconds)
|
||||
let time_seed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs() / 900;
|
||||
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
|
||||
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
|
||||
FROM catalog_apps
|
||||
WHERE icon_url IS NOT NULL AND icon_url != ''
|
||||
AND description IS NOT NULL AND description != ''
|
||||
AND screenshots IS NOT NULL AND screenshots != ''"
|
||||
)?;
|
||||
let rows = stmt.query_map([], |row| {
|
||||
Ok(CatalogApp {
|
||||
id: row.get(0)?,
|
||||
name: row.get(1)?,
|
||||
description: row.get(2)?,
|
||||
categories: row.get(3)?,
|
||||
download_url: row.get(4)?,
|
||||
icon_url: row.get(5)?,
|
||||
homepage: row.get(6)?,
|
||||
license: row.get(7)?,
|
||||
screenshots: row.get(8)?,
|
||||
github_owner: row.get(9)?,
|
||||
github_repo: row.get(10)?,
|
||||
github_stars: row.get(11)?,
|
||||
github_downloads: row.get(12)?,
|
||||
latest_version: row.get(13)?,
|
||||
release_date: row.get(14)?,
|
||||
github_enriched_at: row.get(15)?,
|
||||
github_download_url: row.get(16)?,
|
||||
github_release_assets: row.get(17)?,
|
||||
})
|
||||
})?;
|
||||
let mut apps: Vec<CatalogApp> = rows.collect::<SqlResult<Vec<_>>>()?;
|
||||
|
||||
// Enriched apps (with stars) sort first by stars descending,
|
||||
// unenriched apps get the deterministic shuffle after them
|
||||
apps.sort_by(|a, b| {
|
||||
match (a.github_stars, b.github_stars) {
|
||||
(Some(sa), Some(sb)) => sb.cmp(&sa),
|
||||
(Some(_), None) => std::cmp::Ordering::Less,
|
||||
(None, Some(_)) => std::cmp::Ordering::Greater,
|
||||
(None, None) => {
|
||||
let ha = (a.id as u64 ^ time_seed).wrapping_mul(0x517cc1b727220a95);
|
||||
let hb = (b.id as u64 ^ time_seed).wrapping_mul(0x517cc1b727220a95);
|
||||
ha.cmp(&hb)
|
||||
}
|
||||
}
|
||||
});
|
||||
apps.truncate(limit as usize);
|
||||
Ok(apps)
|
||||
}
|
||||
|
||||
pub fn get_catalog_categories(&self) -> SqlResult<Vec<(String, u32)>> {
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT categories FROM catalog_apps WHERE categories IS NOT NULL AND categories != ''"
|
||||
@@ -2172,12 +2345,26 @@ impl Database {
|
||||
homepage: Option<&str>,
|
||||
file_size: Option<i64>,
|
||||
architecture: Option<&str>,
|
||||
screenshots: Option<&str>,
|
||||
license: Option<&str>,
|
||||
) -> SqlResult<()> {
|
||||
self.conn.execute(
|
||||
"INSERT OR REPLACE INTO catalog_apps
|
||||
(source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture, cached_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, datetime('now'))",
|
||||
params![source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture],
|
||||
"INSERT INTO catalog_apps
|
||||
(source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture, screenshots, license, cached_at)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, datetime('now'))
|
||||
ON CONFLICT(source_id, name) DO UPDATE SET
|
||||
description = COALESCE(excluded.description, description),
|
||||
categories = COALESCE(excluded.categories, categories),
|
||||
latest_version = COALESCE(excluded.latest_version, latest_version),
|
||||
download_url = excluded.download_url,
|
||||
icon_url = COALESCE(excluded.icon_url, icon_url),
|
||||
homepage = COALESCE(excluded.homepage, homepage),
|
||||
file_size = COALESCE(excluded.file_size, file_size),
|
||||
architecture = COALESCE(excluded.architecture, architecture),
|
||||
screenshots = COALESCE(excluded.screenshots, screenshots),
|
||||
license = COALESCE(excluded.license, license),
|
||||
cached_at = datetime('now')",
|
||||
params![source_id, name, description, categories, latest_version, download_url, icon_url, homepage, file_size, architecture, screenshots, license],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
@@ -2232,6 +2419,125 @@ impl Database {
|
||||
})?;
|
||||
rows.collect()
|
||||
}
|
||||
|
||||
pub fn get_catalog_app_by_source_and_name(&self, source_id: i64, name: &str) -> SqlResult<Option<i64>> {
|
||||
let result = self.conn.query_row(
|
||||
"SELECT id FROM catalog_apps WHERE source_id = ?1 AND name = ?2",
|
||||
params![source_id, name],
|
||||
|row| row.get(0),
|
||||
);
|
||||
match result {
|
||||
Ok(id) => Ok(Some(id)),
|
||||
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
// --- GitHub enrichment methods ---
|
||||
|
||||
pub fn update_catalog_app_github_repo(
|
||||
&self,
|
||||
app_id: i64,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
) -> SqlResult<()> {
|
||||
self.conn.execute(
|
||||
"UPDATE catalog_apps SET github_owner = ?2, github_repo = ?3 WHERE id = ?1",
|
||||
params![app_id, owner, repo],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_catalog_app_github_metadata(
|
||||
&self,
|
||||
app_id: i64,
|
||||
stars: i64,
|
||||
pushed_at: Option<&str>,
|
||||
) -> SqlResult<()> {
|
||||
self.conn.execute(
|
||||
"UPDATE catalog_apps SET github_stars = ?2, github_enriched_at = datetime('now') WHERE id = ?1",
|
||||
params![app_id, stars],
|
||||
)?;
|
||||
// Store pushed_at in release_date if no release info yet
|
||||
if let Some(pushed) = pushed_at {
|
||||
self.conn.execute(
|
||||
"UPDATE catalog_apps SET release_date = COALESCE(release_date, ?2) WHERE id = ?1",
|
||||
params![app_id, pushed],
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn update_catalog_app_release_info(
|
||||
&self,
|
||||
app_id: i64,
|
||||
version: Option<&str>,
|
||||
date: Option<&str>,
|
||||
downloads: Option<i64>,
|
||||
github_download_url: Option<&str>,
|
||||
github_release_assets: Option<&str>,
|
||||
) -> SqlResult<()> {
|
||||
self.conn.execute(
|
||||
"UPDATE catalog_apps SET
|
||||
latest_version = COALESCE(?2, latest_version),
|
||||
release_date = COALESCE(?3, release_date),
|
||||
github_downloads = COALESCE(?4, github_downloads),
|
||||
github_download_url = COALESCE(?5, github_download_url),
|
||||
github_release_assets = COALESCE(?6, github_release_assets),
|
||||
github_enriched_at = datetime('now')
|
||||
WHERE id = ?1",
|
||||
params![app_id, version, date, downloads, github_download_url, github_release_assets],
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_unenriched_catalog_apps(&self, limit: i32) -> SqlResult<Vec<CatalogApp>> {
|
||||
let mut stmt = self.conn.prepare(
|
||||
"SELECT id, name, description, categories, download_url, icon_url, homepage, license, screenshots,
|
||||
github_owner, github_repo, github_stars, github_downloads, latest_version, release_date, github_enriched_at, github_download_url, github_release_assets
|
||||
FROM catalog_apps
|
||||
WHERE github_owner IS NOT NULL AND github_enriched_at IS NULL
|
||||
ORDER BY id
|
||||
LIMIT ?1"
|
||||
)?;
|
||||
let rows = stmt.query_map(params![limit], |row| {
|
||||
Ok(CatalogApp {
|
||||
id: row.get(0)?,
|
||||
name: row.get(1)?,
|
||||
description: row.get(2)?,
|
||||
categories: row.get(3)?,
|
||||
download_url: row.get(4)?,
|
||||
icon_url: row.get(5)?,
|
||||
homepage: row.get(6)?,
|
||||
license: row.get(7)?,
|
||||
screenshots: row.get(8)?,
|
||||
github_owner: row.get(9)?,
|
||||
github_repo: row.get(10)?,
|
||||
github_stars: row.get(11)?,
|
||||
github_downloads: row.get(12)?,
|
||||
latest_version: row.get(13)?,
|
||||
release_date: row.get(14)?,
|
||||
github_enriched_at: row.get(15)?,
|
||||
github_download_url: row.get(16)?,
|
||||
github_release_assets: row.get(17)?,
|
||||
})
|
||||
})?;
|
||||
rows.collect()
|
||||
}
|
||||
|
||||
pub fn catalog_enrichment_progress(&self) -> SqlResult<(i64, i64)> {
|
||||
let enriched: i64 = self.conn.query_row(
|
||||
"SELECT COUNT(*) FROM catalog_apps WHERE github_owner IS NOT NULL AND github_enriched_at IS NOT NULL",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
let total_with_github: i64 = self.conn.query_row(
|
||||
"SELECT COUNT(*) FROM catalog_apps WHERE github_owner IS NOT NULL",
|
||||
[],
|
||||
|row| row.get(0),
|
||||
)?;
|
||||
Ok((enriched, total_with_github))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -2402,7 +2708,7 @@ mod tests {
|
||||
[],
|
||||
|row| row.get(0),
|
||||
).unwrap();
|
||||
assert_eq!(version, 11);
|
||||
assert_eq!(version, 15);
|
||||
|
||||
// All tables that should exist after the full v1-v7 migration chain
|
||||
let expected_tables = [
|
||||
|
||||
394
src/core/github_enrichment.rs
Normal file
394
src/core/github_enrichment.rs
Normal file
@@ -0,0 +1,394 @@
|
||||
use super::database::Database;
|
||||
|
||||
// --- API response structs ---
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct GitHubRepoInfo {
|
||||
pub stargazers_count: i64,
|
||||
pub pushed_at: Option<String>,
|
||||
pub description: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct GitHubReleaseInfo {
|
||||
pub tag_name: String,
|
||||
pub published_at: Option<String>,
|
||||
pub assets: Vec<GitHubReleaseAsset>,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
pub struct GitHubReleaseAsset {
|
||||
pub name: String,
|
||||
pub browser_download_url: String,
|
||||
pub download_count: i64,
|
||||
pub size: i64,
|
||||
}
|
||||
|
||||
// --- URL parsing ---
|
||||
|
||||
/// Extract (owner, repo) from a GitHub URL.
|
||||
/// Tries download_url first (most reliable for GitHub releases), then homepage.
|
||||
pub fn extract_github_repo(homepage: Option<&str>, download_url: &str) -> Option<(String, String)> {
|
||||
// Try download URL first - most AppImageHub entries point to GitHub releases
|
||||
if let Some(pair) = parse_github_url(download_url) {
|
||||
return Some(pair);
|
||||
}
|
||||
// Fallback to homepage
|
||||
if let Some(hp) = homepage {
|
||||
if let Some(pair) = parse_github_url(hp) {
|
||||
return Some(pair);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse `github.com/{owner}/{repo}` from a URL, stripping .git suffix if present.
|
||||
fn parse_github_url(url: &str) -> Option<(String, String)> {
|
||||
let stripped = url.trim_start_matches("https://")
|
||||
.trim_start_matches("http://");
|
||||
|
||||
if !stripped.starts_with("github.com/") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let path = stripped.strip_prefix("github.com/")?;
|
||||
let parts: Vec<&str> = path.splitn(3, '/').collect();
|
||||
if parts.len() < 2 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let owner = parts[0];
|
||||
let repo = parts[1]
|
||||
.trim_end_matches(".git")
|
||||
.split('?').next().unwrap_or(parts[1]);
|
||||
|
||||
if owner.is_empty() || repo.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((owner.to_string(), repo.to_string()))
|
||||
}
|
||||
|
||||
// --- API calls ---
|
||||
|
||||
fn github_get(url: &str, token: &str) -> Result<(String, u32), String> {
|
||||
let mut req = ureq::get(url)
|
||||
.header("Accept", "application/vnd.github+json")
|
||||
.header("User-Agent", "Driftwood-AppImage-Manager");
|
||||
if !token.is_empty() {
|
||||
req = req.header("Authorization", &format!("Bearer {}", token));
|
||||
}
|
||||
let mut response = req.call()
|
||||
.map_err(|e| format!("GitHub API error: {}", e))?;
|
||||
|
||||
// Parse rate limit header
|
||||
let remaining: u32 = response.headers()
|
||||
.get("x-ratelimit-remaining")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let body = response.body_mut().read_to_string()
|
||||
.map_err(|e| format!("Read error: {}", e))?;
|
||||
|
||||
Ok((body, remaining))
|
||||
}
|
||||
|
||||
pub fn fetch_repo_info(owner: &str, repo: &str, token: &str) -> Result<(GitHubRepoInfo, u32), String> {
|
||||
let url = format!("https://api.github.com/repos/{}/{}", owner, repo);
|
||||
let (body, remaining) = github_get(&url, token)?;
|
||||
let info: GitHubRepoInfo = serde_json::from_str(&body)
|
||||
.map_err(|e| format!("Parse error: {}", e))?;
|
||||
Ok((info, remaining))
|
||||
}
|
||||
|
||||
pub fn fetch_release_info(owner: &str, repo: &str, token: &str) -> Result<(GitHubReleaseInfo, u32), String> {
|
||||
let url = format!("https://api.github.com/repos/{}/{}/releases/latest", owner, repo);
|
||||
let (body, remaining) = github_get(&url, token)?;
|
||||
let info: GitHubReleaseInfo = serde_json::from_str(&body)
|
||||
.map_err(|e| format!("Parse error: {}", e))?;
|
||||
Ok((info, remaining))
|
||||
}
|
||||
|
||||
// --- AppImage asset filtering ---
|
||||
|
||||
/// A simplified release asset for storage (JSON-serializable).
|
||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||
pub struct AppImageAsset {
|
||||
pub name: String,
|
||||
pub url: String,
|
||||
pub size: i64,
|
||||
}
|
||||
|
||||
/// Filter release assets to only AppImage files.
|
||||
pub fn filter_appimage_assets(assets: &[GitHubReleaseAsset]) -> Vec<AppImageAsset> {
|
||||
assets.iter()
|
||||
.filter(|a| {
|
||||
let lower = a.name.to_lowercase();
|
||||
lower.ends_with(".appimage") || lower.ends_with(".appimage.zsync")
|
||||
})
|
||||
.filter(|a| !a.name.to_lowercase().ends_with(".zsync"))
|
||||
.map(|a| AppImageAsset {
|
||||
name: a.name.clone(),
|
||||
url: a.browser_download_url.clone(),
|
||||
size: a.size,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Detect the current system architecture string as used in AppImage filenames.
|
||||
pub fn detect_arch() -> &'static str {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{ "x86_64" }
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{ "aarch64" }
|
||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
|
||||
{ std::env::consts::ARCH }
|
||||
}
|
||||
|
||||
/// Pick the best AppImage asset for the current architecture.
|
||||
/// Returns the matching asset, or the first one if no arch match.
|
||||
pub fn pick_best_asset(assets: &[AppImageAsset]) -> Option<&AppImageAsset> {
|
||||
if assets.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let arch = detect_arch();
|
||||
// Prefer exact arch match in filename
|
||||
let arch_match = assets.iter().find(|a| {
|
||||
let lower = a.name.to_lowercase();
|
||||
lower.contains(&arch.to_lowercase())
|
||||
});
|
||||
arch_match.or(assets.first())
|
||||
}
|
||||
|
||||
// --- Enrichment logic ---
|
||||
|
||||
/// Enrich a catalog app with repo-level info (stars, pushed_at).
|
||||
pub fn enrich_app_repo_info(
|
||||
db: &Database,
|
||||
app_id: i64,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
token: &str,
|
||||
) -> Result<u32, String> {
|
||||
let (info, remaining) = fetch_repo_info(owner, repo, token)?;
|
||||
db.update_catalog_app_github_metadata(app_id, info.stargazers_count, info.pushed_at.as_deref())
|
||||
.map_err(|e| format!("DB error: {}", e))?;
|
||||
Ok(remaining)
|
||||
}
|
||||
|
||||
/// Enrich a catalog app with release info (version, date, downloads, assets).
|
||||
pub fn enrich_app_release_info(
|
||||
db: &Database,
|
||||
app_id: i64,
|
||||
owner: &str,
|
||||
repo: &str,
|
||||
token: &str,
|
||||
) -> Result<u32, String> {
|
||||
let (info, remaining) = fetch_release_info(owner, repo, token)?;
|
||||
|
||||
// Clean version string (strip leading "v")
|
||||
let version = info.tag_name.strip_prefix('v')
|
||||
.unwrap_or(&info.tag_name)
|
||||
.to_string();
|
||||
|
||||
// Sum download counts across all assets
|
||||
let total_downloads: i64 = info.assets.iter().map(|a| a.download_count).sum();
|
||||
|
||||
// Extract AppImage assets and pick the best download URL
|
||||
let appimage_assets = filter_appimage_assets(&info.assets);
|
||||
let best_url = pick_best_asset(&appimage_assets).map(|a| a.url.as_str());
|
||||
let assets_json = if appimage_assets.is_empty() {
|
||||
None
|
||||
} else {
|
||||
serde_json::to_string(&appimage_assets).ok()
|
||||
};
|
||||
|
||||
db.update_catalog_app_release_info(
|
||||
app_id,
|
||||
Some(&version),
|
||||
info.published_at.as_deref(),
|
||||
if total_downloads > 0 { Some(total_downloads) } else { None },
|
||||
best_url,
|
||||
assets_json.as_deref(),
|
||||
).map_err(|e| format!("DB error: {}", e))?;
|
||||
|
||||
Ok(remaining)
|
||||
}
|
||||
|
||||
/// Background enrichment: process a batch of unenriched apps.
|
||||
/// Returns (count_enriched, should_continue).
|
||||
pub fn background_enrich_batch(
|
||||
db: &Database,
|
||||
token: &str,
|
||||
batch_size: i32,
|
||||
on_progress: &dyn Fn(i64, i64),
|
||||
) -> Result<(u32, bool), String> {
|
||||
let apps = db.get_unenriched_catalog_apps(batch_size)
|
||||
.map_err(|e| format!("DB error: {}", e))?;
|
||||
|
||||
if apps.is_empty() {
|
||||
return Ok((0, false));
|
||||
}
|
||||
|
||||
let mut enriched = 0u32;
|
||||
|
||||
for app in &apps {
|
||||
let owner = match app.github_owner.as_deref() {
|
||||
Some(o) => o,
|
||||
None => continue,
|
||||
};
|
||||
let repo = match app.github_repo.as_deref() {
|
||||
Some(r) => r,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
match enrich_app_repo_info(db, app.id, owner, repo, token) {
|
||||
Ok(remaining) => {
|
||||
enriched += 1;
|
||||
|
||||
// Report progress
|
||||
if let Ok((done, total)) = db.catalog_enrichment_progress() {
|
||||
on_progress(done, total);
|
||||
}
|
||||
|
||||
// Stop if rate limit is getting low
|
||||
if remaining < 5 {
|
||||
log::info!("GitHub rate limit low ({}), pausing enrichment", remaining);
|
||||
return Ok((enriched, false));
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
log::warn!("Failed to enrich {}/{}: {}", owner, repo, e);
|
||||
// Mark as enriched anyway so we don't retry forever
|
||||
db.update_catalog_app_github_metadata(app.id, 0, None).ok();
|
||||
}
|
||||
}
|
||||
|
||||
// Sleep between calls to be respectful
|
||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||
}
|
||||
|
||||
Ok((enriched, enriched > 0))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_github_repo_from_download() {
|
||||
let result = extract_github_repo(
|
||||
None,
|
||||
"https://github.com/nickvdp/deno-spreadsheets/releases/download/v0.3.0/app.AppImage",
|
||||
);
|
||||
assert_eq!(result, Some(("nickvdp".to_string(), "deno-spreadsheets".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_github_repo_from_homepage() {
|
||||
let result = extract_github_repo(
|
||||
Some("https://github.com/nickvdp/deno-spreadsheets"),
|
||||
"https://example.com/download.AppImage",
|
||||
);
|
||||
assert_eq!(result, Some(("nickvdp".to_string(), "deno-spreadsheets".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_github_repo_with_git_suffix() {
|
||||
let result = extract_github_repo(
|
||||
Some("https://github.com/user/repo.git"),
|
||||
"https://example.com/download.AppImage",
|
||||
);
|
||||
assert_eq!(result, Some(("user".to_string(), "repo".to_string())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_github_repo_non_github() {
|
||||
let result = extract_github_repo(
|
||||
Some("https://gitlab.com/user/repo"),
|
||||
"https://sourceforge.net/download.AppImage",
|
||||
);
|
||||
assert_eq!(result, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_github_url_empty() {
|
||||
assert_eq!(parse_github_url(""), None);
|
||||
assert_eq!(parse_github_url("https://github.com/"), None);
|
||||
assert_eq!(parse_github_url("https://github.com/user"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_filter_appimage_assets() {
|
||||
let assets = vec![
|
||||
GitHubReleaseAsset {
|
||||
name: "app-x86_64.AppImage".to_string(),
|
||||
browser_download_url: "https://github.com/u/r/releases/download/v1/app-x86_64.AppImage".to_string(),
|
||||
download_count: 100,
|
||||
size: 50_000_000,
|
||||
},
|
||||
GitHubReleaseAsset {
|
||||
name: "app-aarch64.AppImage".to_string(),
|
||||
browser_download_url: "https://github.com/u/r/releases/download/v1/app-aarch64.AppImage".to_string(),
|
||||
download_count: 20,
|
||||
size: 48_000_000,
|
||||
},
|
||||
GitHubReleaseAsset {
|
||||
name: "app-x86_64.AppImage.zsync".to_string(),
|
||||
browser_download_url: "https://github.com/u/r/releases/download/v1/app-x86_64.AppImage.zsync".to_string(),
|
||||
download_count: 5,
|
||||
size: 1000,
|
||||
},
|
||||
GitHubReleaseAsset {
|
||||
name: "source.tar.gz".to_string(),
|
||||
browser_download_url: "https://github.com/u/r/releases/download/v1/source.tar.gz".to_string(),
|
||||
download_count: 10,
|
||||
size: 2_000_000,
|
||||
},
|
||||
];
|
||||
let filtered = filter_appimage_assets(&assets);
|
||||
assert_eq!(filtered.len(), 2);
|
||||
assert_eq!(filtered[0].name, "app-x86_64.AppImage");
|
||||
assert_eq!(filtered[1].name, "app-aarch64.AppImage");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pick_best_asset_prefers_arch() {
|
||||
let assets = vec![
|
||||
AppImageAsset {
|
||||
name: "app-aarch64.AppImage".to_string(),
|
||||
url: "https://example.com/aarch64".to_string(),
|
||||
size: 48_000_000,
|
||||
},
|
||||
AppImageAsset {
|
||||
name: "app-x86_64.AppImage".to_string(),
|
||||
url: "https://example.com/x86_64".to_string(),
|
||||
size: 50_000_000,
|
||||
},
|
||||
];
|
||||
let best = pick_best_asset(&assets).unwrap();
|
||||
// On x86_64 systems this should pick x86_64, on aarch64 it picks aarch64
|
||||
let arch = detect_arch();
|
||||
assert!(best.name.contains(arch));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pick_best_asset_empty() {
|
||||
let assets: Vec<AppImageAsset> = vec![];
|
||||
assert!(pick_best_asset(&assets).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pick_best_asset_single() {
|
||||
let assets = vec![
|
||||
AppImageAsset {
|
||||
name: "app.AppImage".to_string(),
|
||||
url: "https://example.com/app".to_string(),
|
||||
size: 50_000_000,
|
||||
},
|
||||
];
|
||||
let best = pick_best_asset(&assets).unwrap();
|
||||
assert_eq!(best.name, "app.AppImage");
|
||||
}
|
||||
}
|
||||
@@ -4,6 +4,7 @@ pub mod backup;
|
||||
pub mod catalog;
|
||||
pub mod database;
|
||||
pub mod discovery;
|
||||
pub mod github_enrichment;
|
||||
pub mod duplicates;
|
||||
pub mod footprint;
|
||||
pub mod fuse;
|
||||
|
||||
Reference in New Issue
Block a user