use super::database::{AppImageRecord, Database}; use std::collections::HashMap; /// A group of AppImages that appear to be the same application. #[derive(Debug, Clone)] pub struct DuplicateGroup { /// Canonical app name for this group. pub app_name: String, /// All records in this group, sorted by version (newest first). pub members: Vec, /// Reason these were grouped together. pub match_reason: MatchReason, /// Total disk space used by all members. pub total_size: u64, /// Potential space savings if only keeping the newest. pub potential_savings: u64, } #[derive(Debug, Clone)] pub struct DuplicateMember { pub record: AppImageRecord, /// Whether this is the recommended one to keep. pub is_recommended: bool, /// Why we recommend keeping or removing this one. pub recommendation: MemberRecommendation, } #[derive(Debug, Clone, PartialEq)] pub enum MatchReason { /// Same app name, different versions. MultiVersion, /// Same SHA256 hash (exact duplicates in different locations). ExactDuplicate, /// Same app name, same version, different paths. SameVersionDifferentPath, } impl MatchReason { pub fn label(&self) -> &'static str { match self { Self::MultiVersion => "Multiple versions", Self::ExactDuplicate => "Exact duplicates", Self::SameVersionDifferentPath => "Same version, different locations", } } } #[derive(Debug, Clone, PartialEq)] pub enum MemberRecommendation { /// This is the newest version - keep it. KeepNewest, /// This is the only integrated copy - keep it. KeepIntegrated, /// Older version that can be removed. RemoveOlder, /// Duplicate that can be removed. RemoveDuplicate, /// No clear recommendation. UserChoice, } impl MemberRecommendation { pub fn label(&self) -> &'static str { match self { Self::KeepNewest => "Keep (newest)", Self::KeepIntegrated => "Keep (integrated)", Self::RemoveOlder => "Remove (older version)", Self::RemoveDuplicate => "Remove (duplicate)", Self::UserChoice => "Your choice", } } } /// Detect duplicate and multi-version AppImages from the database. pub fn detect_duplicates(db: &Database) -> Vec { let records = match db.get_all_appimages() { Ok(r) => r, Err(e) => { log::error!("Failed to query appimages for duplicate detection: {}", e); return Vec::new(); } }; if records.len() < 2 { return Vec::new(); } let mut groups = Vec::new(); // Phase 1: Find exact duplicates by SHA256 hash let hash_groups = group_by_hash(&records); for (hash, members) in &hash_groups { if members.len() > 1 { groups.push(build_exact_duplicate_group(hash, members)); } } // Phase 2: Find same app name groups (excluding already-found exact dupes) let exact_dupe_ids: std::collections::HashSet = groups .iter() .flat_map(|g| g.members.iter().map(|m| m.record.id)) .collect(); let name_groups = group_by_name(&records); for (name, members) in &name_groups { // Skip if all members are already in exact duplicate groups let remaining: Vec<&AppImageRecord> = members .iter() .filter(|r| !exact_dupe_ids.contains(&r.id)) .collect(); if remaining.len() > 1 { groups.push(build_name_group(name, &remaining)); } } // Sort groups by potential savings (largest first) groups.sort_by(|a, b| b.potential_savings.cmp(&a.potential_savings)); groups } /// Group records by SHA256 hash. fn group_by_hash(records: &[AppImageRecord]) -> HashMap> { let mut map: HashMap> = HashMap::new(); for record in records { if let Some(ref hash) = record.sha256 { if !hash.is_empty() { map.entry(hash.clone()) .or_default() .push(record.clone()); } } } map } /// Group records by normalized app name. fn group_by_name(records: &[AppImageRecord]) -> HashMap> { let mut map: HashMap> = HashMap::new(); for record in records { let name = normalize_app_name(record); map.entry(name).or_default().push(record.clone()); } map } /// Normalize an app name for grouping purposes. /// Strips version numbers, architecture suffixes, and normalizes case. fn normalize_app_name(record: &AppImageRecord) -> String { let name = record .app_name .as_deref() .unwrap_or(&record.filename); // Lowercase and trim let mut normalized = name.to_lowercase().trim().to_string(); // Remove common suffixes for suffix in &[ ".appimage", "-x86_64", "-aarch64", "-armhf", "-i386", "-i686", "_x86_64", "_aarch64", ] { if let Some(stripped) = normalized.strip_suffix(suffix) { normalized = stripped.to_string(); } } // Remove trailing version-like patterns (e.g., "-1.2.3", "_v2.0") if let Some(pos) = find_version_suffix(&normalized) { normalized = normalized[..pos].to_string(); } // Remove trailing hyphens/underscores normalized = normalized.trim_end_matches(|c: char| c == '-' || c == '_').to_string(); normalized } /// Find the start position of a trailing version suffix. fn find_version_suffix(s: &str) -> Option { // Look for patterns like -1.2.3, _v2.0, -24.02.1 at the end let bytes = s.as_bytes(); let mut i = bytes.len(); // Walk backwards past version characters (digits, dots) while i > 0 && (bytes[i - 1].is_ascii_digit() || bytes[i - 1] == b'.') { i -= 1; } // Check if we found a version separator if i > 0 && i < bytes.len() { // Skip optional 'v' prefix if i > 0 && bytes[i - 1] == b'v' { i -= 1; } // Must have a separator before the version if i > 0 && (bytes[i - 1] == b'-' || bytes[i - 1] == b'_') { // Verify it looks like a version (has at least one dot) let version_part = &s[i..]; if version_part.contains('.') || version_part.starts_with('v') { return Some(i - 1); } } } None } /// Build a DuplicateGroup for exact hash duplicates. fn build_exact_duplicate_group(_hash: &str, records: &[AppImageRecord]) -> DuplicateGroup { let total_size: u64 = records.iter().map(|r| r.size_bytes as u64).sum(); // Keep the one that's integrated, or the one with the shortest path let keep_idx = records .iter() .position(|r| r.integrated) .unwrap_or(0); let members: Vec = records .iter() .enumerate() .map(|(i, r)| DuplicateMember { record: r.clone(), is_recommended: i == keep_idx, recommendation: if i == keep_idx { if r.integrated { MemberRecommendation::KeepIntegrated } else { MemberRecommendation::UserChoice } } else { MemberRecommendation::RemoveDuplicate }, }) .collect(); let savings = total_size - records[keep_idx].size_bytes as u64; let app_name = records[0] .app_name .clone() .unwrap_or_else(|| records[0].filename.clone()); DuplicateGroup { app_name, members, match_reason: MatchReason::ExactDuplicate, total_size, potential_savings: savings, } } /// Build a DuplicateGroup for same-name groups. fn build_name_group(name: &str, records: &[&AppImageRecord]) -> DuplicateGroup { let total_size: u64 = records.iter().map(|r| r.size_bytes as u64).sum(); // Sort by version (newest first) let mut sorted: Vec<&AppImageRecord> = records.to_vec(); sorted.sort_by(|a, b| { let va = a.app_version.as_deref().unwrap_or("0"); let vb = b.app_version.as_deref().unwrap_or("0"); // Compare versions - newer should come first compare_versions(vb, va) }); // Determine if this is multi-version or same-version-different-path let versions: std::collections::HashSet = sorted .iter() .filter_map(|r| r.app_version.clone()) .collect(); let match_reason = if versions.len() <= 1 { MatchReason::SameVersionDifferentPath } else { MatchReason::MultiVersion }; let members: Vec = sorted .iter() .enumerate() .map(|(i, r)| { let (is_recommended, recommendation) = if i == 0 { // First (newest) version (true, MemberRecommendation::KeepNewest) } else if r.integrated { // Older but integrated (false, MemberRecommendation::KeepIntegrated) } else if match_reason == MatchReason::SameVersionDifferentPath { (false, MemberRecommendation::RemoveDuplicate) } else { (false, MemberRecommendation::RemoveOlder) }; DuplicateMember { record: (*r).clone(), is_recommended, recommendation, } }) .collect(); let savings = if !members.is_empty() { total_size - members[0].record.size_bytes as u64 } else { 0 }; // Use the prettiest app name from the group let app_name = sorted .iter() .filter_map(|r| r.app_name.as_ref()) .next() .cloned() .unwrap_or_else(|| name.to_string()); DuplicateGroup { app_name, members, match_reason, total_size, potential_savings: savings, } } /// Compare two version strings for ordering. /// Falls back to lexicographic comparison of cleaned versions to guarantee /// the total ordering contract (antisymmetry) required by sort_by. fn compare_versions(a: &str, b: &str) -> std::cmp::Ordering { use super::updater::{clean_version, version_is_newer}; let ca = clean_version(a); let cb = clean_version(b); if ca == cb { std::cmp::Ordering::Equal } else if version_is_newer(a, b) { std::cmp::Ordering::Greater } else if version_is_newer(b, a) { std::cmp::Ordering::Less } else { // Neither is newer (unparseable components) - use lexicographic fallback ca.cmp(&cb) } } /// Summary of duplicate detection results. #[derive(Debug, Clone)] pub struct DuplicateSummary { pub total_groups: usize, pub exact_duplicates: usize, pub multi_version: usize, pub total_potential_savings: u64, } pub fn summarize_duplicates(groups: &[DuplicateGroup]) -> DuplicateSummary { let exact_duplicates = groups .iter() .filter(|g| g.match_reason == MatchReason::ExactDuplicate) .count(); let multi_version = groups .iter() .filter(|g| g.match_reason == MatchReason::MultiVersion) .count(); let total_potential_savings: u64 = groups.iter().map(|g| g.potential_savings).sum(); DuplicateSummary { total_groups: groups.len(), exact_duplicates, multi_version, total_potential_savings, } } #[cfg(test)] mod tests { use super::*; #[test] fn test_normalize_app_name() { let make_record = |name: &str, filename: &str| AppImageRecord { id: 0, path: String::new(), filename: filename.to_string(), app_name: Some(name.to_string()), app_version: None, appimage_type: None, size_bytes: 0, sha256: None, icon_path: None, desktop_file: None, integrated: false, integrated_at: None, is_executable: true, desktop_entry_content: None, categories: None, description: None, developer: None, architecture: None, first_seen: String::new(), last_scanned: String::new(), file_modified: None, fuse_status: None, wayland_status: None, update_info: None, update_type: None, latest_version: None, update_checked: None, update_url: None, notes: None, sandbox_mode: None, runtime_wayland_status: None, runtime_wayland_checked: None, analysis_status: None, launch_args: None, tags: None, pinned: false, avg_startup_ms: None, appstream_id: None, appstream_description: None, generic_name: None, license: None, homepage_url: None, bugtracker_url: None, donation_url: None, help_url: None, vcs_url: None, keywords: None, mime_types: None, content_rating: None, project_group: None, release_history: None, desktop_actions: None, has_signature: false, screenshot_urls: None, previous_version_path: None, source_url: None, autostart: false, startup_wm_class: None, verification_status: None, first_run_prompted: false, system_wide: false, is_portable: false, mount_point: None, }; assert_eq!( normalize_app_name(&make_record("Firefox", "Firefox.AppImage")), "firefox" ); assert_eq!( normalize_app_name(&make_record("Inkscape", "Inkscape-1.3.2-x86_64.AppImage")), "inkscape" ); } #[test] fn test_find_version_suffix() { assert_eq!(find_version_suffix("firefox-124.0"), Some(7)); assert_eq!(find_version_suffix("app-v2.0.0"), Some(3)); assert_eq!(find_version_suffix("firefox"), None); assert_eq!(find_version_suffix("app_1.2.3"), Some(3)); } #[test] fn test_match_reason_labels() { assert_eq!(MatchReason::MultiVersion.label(), "Multiple versions"); assert_eq!(MatchReason::ExactDuplicate.label(), "Exact duplicates"); } #[test] fn test_member_recommendation_labels() { assert_eq!(MemberRecommendation::KeepNewest.label(), "Keep (newest)"); assert_eq!(MemberRecommendation::RemoveOlder.label(), "Remove (older version)"); } }