Coverage for src / lilbee / catalog.py: 100%

545 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1"""Model catalog — discovers available GGUF models from HuggingFace. 

2 

3Three levels: 

41. Featured models — curated favorites (hardcoded, always available) 

52. HF API models — fetched from HuggingFace API, paginated and filterable 

63. Combined catalog — featured first, then HF results 

7""" 

8 

9import fnmatch 

10import functools 

11import io 

12import logging 

13import os 

14import re 

15import threading 

16import time 

17from collections.abc import Callable 

18from dataclasses import dataclass 

19from datetime import UTC, datetime 

20from pathlib import Path 

21from typing import Any, NamedTuple 

22 

23import httpx 

24from huggingface_hub import ModelInfo 

25from huggingface_hub.hf_api import RepoSibling 

26from huggingface_hub.utils import HFValidationError, validate_repo_id 

27from pydantic import BaseModel 

28from tqdm.auto import tqdm as _base_tqdm 

29 

30from lilbee.cancellation import TaskCancelled 

31from lilbee.model_manager import ModelSource 

32from lilbee.models import ModelTask 

33from lilbee.registry import ModelManifest, ModelRegistry 

34 

35# circular: config.py -> catalog (via the per-role task validator). cfg is 

36# imported lazily so this module can load before Config() finishes init. 

37 

38log = logging.getLogger(__name__) 

39 

40 

41def _cfg() -> Any: 

42 """Lazy accessor for the global ``cfg`` singleton (see circular-import note).""" 

43 from lilbee.config import cfg 

44 

45 return cfg 

46 

47 

48def __getattr__(name: str) -> Any: 

49 """Expose ``catalog.cfg`` lazily so ``monkeypatch.setattr(catalog.cfg, ...)`` still works.""" 

50 if name == "cfg": 

51 from lilbee.config import cfg 

52 

53 return cfg 

54 raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 

55 

56 

57HF_API_URL = "https://huggingface.co/api/models" 

58 

59 

60@dataclass 

61class DownloadProgress: 

62 """Human-readable snapshot of download progress. 

63 

64 ``percent`` is a float (0.0 to 100.0) so the ProgressBar renders smooth 

65 fractional movement during multi-GB downloads. Call sites that need 

66 an integer for display format it themselves. 

67 """ 

68 

69 percent: float 

70 detail: str 

71 is_cache_hit: bool 

72 

73 

74ProgressCallback = Callable[[int, int], None] 

75_BYTES_PER_MB = 1024 * 1024 

76 

77 

78def make_download_callback( 

79 on_update: Callable[[DownloadProgress], None], 

80 *, 

81 throttle_interval: float = 0.1, 

82) -> ProgressCallback: 

83 """Build a download progress callback that converts bytes to human-readable state. 

84 *on_update(progress: DownloadProgress)* is called at most once per 

85 ``throttle_interval`` seconds with a float percentage (0.0 to 100.0), a 

86 ``"<done>/<total> MB"`` detail string, and a cache-hit flag. Both the 

87 catalog and setup screens use this so byte-to-MB conversion and 

88 cache-hit detection aren't duplicated. 

89 """ 

90 last_update_time = 0.0 

91 seen_partial = False 

92 

93 def _on_progress(downloaded: int, total: int) -> None: 

94 nonlocal last_update_time, seen_partial 

95 

96 if total > 0 and downloaded >= total and not seen_partial: 

97 on_update( 

98 DownloadProgress(percent=100.0, detail="already downloaded", is_cache_hit=True) 

99 ) 

100 return 

101 seen_partial = True 

102 

103 now = time.monotonic() 

104 if now - last_update_time < throttle_interval: 

105 return 

106 last_update_time = now 

107 

108 mb_done = downloaded / _BYTES_PER_MB 

109 if total > 0: 

110 pct = min(downloaded * 100.0 / total, 100.0) 

111 mb_total = total / _BYTES_PER_MB 

112 on_update( 

113 DownloadProgress( 

114 percent=pct, 

115 detail=f"{mb_done:.0f}/{mb_total:.0f} MB", 

116 is_cache_hit=False, 

117 ) 

118 ) 

119 else: 

120 on_update(DownloadProgress(percent=0.0, detail=f"{mb_done:.0f} MB", is_cache_hit=False)) 

121 

122 return _on_progress 

123 

124 

125class _CallbackProgressBar(_base_tqdm): 

126 """tqdm subclass that forwards progress to a plain callback. 

127 Fully suppresses terminal output by disabling tqdm rendering and redirecting 

128 its file handle to a devnull sink — prevents ANSI escape sequences from leaking 

129 into Textual's managed terminal. 

130 

131 Overrides ``get_lock`` to return a threading lock instead of tqdm's default 

132 multiprocessing lock. Vanilla tqdm acquires ``self._lock`` even on the 

133 ``disable=True`` path (std.py:988), and the multiprocessing lock's lazy init 

134 raises ``ValueError`` when ``sys.stderr.fileno() == -1`` (Textual, Jupyter, 

135 pytest capture). A thread lock sidesteps that fd handling entirely. 

136 """ 

137 

138 _lock = threading.RLock() 

139 _callback: Any = None 

140 

141 @classmethod 

142 def get_lock(cls) -> threading.RLock: 

143 return cls._lock 

144 

145 def __init__(self, *args: Any, **kwargs: Any): 

146 kwargs["disable"] = True 

147 kwargs["file"] = io.StringIO() # absorb any accidental tqdm output 

148 super().__init__(*args, **kwargs) 

149 self._cumulative = 0 

150 

151 def update(self, n: float = 1) -> bool | None: 

152 self._cumulative += int(n) 

153 if self._callback is not None: 

154 total = self.total if self.total is not None else 0 

155 self._callback(int(self._cumulative), int(total)) 

156 return None 

157 

158 

159class _ProgressTracker: 

160 """Wraps a tqdm_class to detect whether progress updates actually fired.""" 

161 

162 def __init__(self, callback: Any) -> None: 

163 self.was_used = False 

164 self._callback = callback 

165 

166 def make_tqdm_class(self) -> type[_base_tqdm]: 

167 tracker = self 

168 

169 class _Cls(_CallbackProgressBar): 

170 _callback = staticmethod(tracker._callback) 

171 

172 def update(self, n: float = 1) -> bool | None: 

173 tracker.was_used = True 

174 return super().update(n) 

175 

176 return _Cls 

177 

178 

179class _HfGgufMeta(BaseModel): 

180 """GGUF metadata returned by the HF API when expand=gguf is requested. 

181 

182 ModelInfo.gguf is typed as ``dict | None`` upstream, so we validate it ourselves. 

183 """ 

184 

185 total: int = 0 

186 architecture: str = "" 

187 context_length: int = 0 

188 

189 

190class DownloadConfig(BaseModel): 

191 model_config = {"arbitrary_types_allowed": True} 

192 

193 repo_id: str 

194 filename: str 

195 token: str | None 

196 force_download: bool = False 

197 cache_dir: str | None = None 

198 tqdm_class: Any = None 

199 

200 

201_DEFAULT_TIMEOUT = 30.0 

202 

203# Fields to request from the HF listing API via ?expand=. 

204# Without expand, the default response omits siblings, cardData, and gguf. 

205_HF_EXPAND_FIELDS: list[str] = ["gguf", "siblings", "downloads", "pipeline_tag", "cardData"] 

206 

207 

208@dataclass(frozen=True) 

209class CatalogModel: 

210 """One catalog entry, keyed by HuggingFace repo. ``gguf_filename`` may be a glob.""" 

211 

212 hf_repo: str 

213 gguf_filename: str 

214 size_gb: float 

215 min_ram_gb: float 

216 description: str 

217 featured: bool 

218 downloads: int 

219 task: str 

220 recommended: bool = False 

221 

222 @property 

223 def ref(self) -> str: 

224 """Browse-time ref (the HF repo); concrete filename is resolved at install.""" 

225 return self.hf_repo 

226 

227 @property 

228 def display_name(self) -> str: 

229 """Human-readable label derived from the HuggingFace repo id.""" 

230 return clean_display_name(self.hf_repo) 

231 

232 

233@dataclass(frozen=True) 

234class CatalogResult: 

235 """Paginated catalog result.""" 

236 

237 total: int 

238 limit: int 

239 offset: int 

240 models: list[CatalogModel] 

241 has_more: bool = False 

242 

243 

244@dataclass(frozen=True) 

245class _HfPage: 

246 """Internal: one page of HuggingFace API results.""" 

247 

248 models: list[CatalogModel] 

249 has_more: bool 

250 

251 

252@dataclass(frozen=True) 

253class ModelVariant: 

254 """One quantization within a model family. ``filename`` may be a glob.""" 

255 

256 hf_repo: str 

257 filename: str 

258 param_count: str 

259 quant: str 

260 size_mb: int 

261 recommended: bool 

262 mmproj_filename: str = "" 

263 

264 

265@dataclass(frozen=True) 

266class ModelFamily: 

267 """A group of related model variants (e.g. Qwen3 in multiple sizes).""" 

268 

269 slug: str # family slug for building refs: "qwen3" 

270 name: str # display name: "Qwen3" 

271 task: str 

272 description: str 

273 variants: tuple[ModelVariant, ...] 

274 

275 

276def _load_featured() -> tuple[ 

277 tuple[CatalogModel, ...], 

278 tuple[CatalogModel, ...], 

279 tuple[CatalogModel, ...], 

280 tuple[CatalogModel, ...], 

281]: 

282 """Load featured models from the TOML file, cached after first call.""" 

283 import tomllib 

284 

285 toml_path = Path(__file__).parent / "featured_models.toml" 

286 with open(toml_path, "rb") as f: 

287 data = tomllib.load(f) 

288 

289 def _build(task: ModelTask) -> tuple[CatalogModel, ...]: 

290 return tuple( 

291 CatalogModel( 

292 hf_repo=m["hf_repo"], 

293 gguf_filename=m["gguf_filename"], 

294 size_gb=m["size_gb"], 

295 min_ram_gb=m["min_ram_gb"], 

296 description=m["description"], 

297 featured=True, 

298 downloads=0, 

299 task=task, 

300 recommended=m.get("recommended", False), 

301 ) 

302 for m in data.get(task, []) 

303 ) 

304 

305 return ( 

306 _build(ModelTask.CHAT), 

307 _build(ModelTask.EMBEDDING), 

308 _build(ModelTask.VISION), 

309 _build(ModelTask.RERANK), 

310 ) 

311 

312 

313FEATURED_CHAT, FEATURED_EMBEDDING, FEATURED_VISION, FEATURED_RERANK = _load_featured() 

314 

315# Maps vision catalog entries to their mmproj (CLIP projection) filenames. 

316# Vision models need both the main GGUF and the mmproj file to work. 

317# Keys are hf_repo identifiers; values are glob patterns resolved at download time. 

318# Every FEATURED_VISION entry MUST have a corresponding key here. 

319_DEFAULT_MMPROJ_PATTERN = "*mmproj*.gguf" 

320 

321VISION_MMPROJ_FILES: dict[str, str] = { 

322 "noctrex/LightOnOCR-2-1B-GGUF": _DEFAULT_MMPROJ_PATTERN, 

323} 

324 

325FEATURED_ALL: tuple[CatalogModel, ...] = ( 

326 FEATURED_CHAT + FEATURED_EMBEDDING + FEATURED_VISION + FEATURED_RERANK 

327) 

328 

329_FAMILY_NAME_RE = re.compile(r"^(.+?)\s+\d") 

330PARAM_COUNT_RE = re.compile(r"(\d+\.?\d*B)", re.IGNORECASE) 

331 

332 

333def _extract_family_name(model_name: str) -> str: 

334 """Extract the family name by stripping the trailing parameter count. 

335 Applies clean_display_name first to strip -GGUF, -Instruct, etc. 

336 

337 "Qwen3 8B" -> "Qwen3", "Qwen3-Coder 30B A3B" -> "Qwen3-Coder", 

338 "Nomic Embed Text v1.5" -> "Nomic Embed Text v1.5" (no trailing number pattern). 

339 """ 

340 cleaned = clean_display_name(model_name) 

341 m = _FAMILY_NAME_RE.match(cleaned) 

342 return m.group(1) if m else cleaned 

343 

344 

345def extract_quant(filename: str) -> str: 

346 """Extract the GGUF quantization label (e.g. ``Q4_K_M``) from a filename.""" 

347 m = re.search(r"(Q\d[A-Z0-9_]*)", filename, re.IGNORECASE) 

348 return m.group(1).upper() if m else "" 

349 

350 

351def _derive_param_count(model: CatalogModel) -> str: 

352 """Parse the ``7B``-style param count from the display name; ``""`` if absent.""" 

353 match = PARAM_COUNT_RE.search(model.display_name) 

354 return match.group(1) if match else "" 

355 

356 

357def _catalog_to_variant(model: CatalogModel) -> ModelVariant: 

358 """Convert a CatalogModel to a ModelVariant.""" 

359 return ModelVariant( 

360 hf_repo=model.hf_repo, 

361 filename=model.gguf_filename, 

362 param_count=_derive_param_count(model), 

363 quant=extract_quant(model.gguf_filename), 

364 size_mb=int(model.size_gb * 1024), 

365 recommended=model.recommended, 

366 ) 

367 

368 

369def _family_slug(display_name: str) -> str: 

370 """Stable slug for a family, derived from its display name.""" 

371 return _extract_family_name(display_name).lower().replace(" ", "-") 

372 

373 

374def _build_families(models: tuple[CatalogModel, ...], task: str) -> list[ModelFamily]: 

375 """Group CatalogModels into families by display-derived family name.""" 

376 groups: dict[str, list[CatalogModel]] = {} 

377 order: list[str] = [] 

378 for m in models: 

379 family = _extract_family_name(m.display_name) 

380 if family not in groups: 

381 order.append(family) 

382 groups.setdefault(family, []).append(m) 

383 

384 families: list[ModelFamily] = [] 

385 for family_name in order: 

386 members = groups[family_name] 

387 representative = next((m for m in members if m.recommended), members[0]) 

388 variants = [_catalog_to_variant(m) for m in members] 

389 families.append( 

390 ModelFamily( 

391 slug=_family_slug(representative.display_name), 

392 name=family_name, 

393 task=task, 

394 description=representative.description, 

395 variants=tuple(variants), 

396 ) 

397 ) 

398 return families 

399 

400 

401def get_families() -> list[ModelFamily]: 

402 """Get all featured models grouped into families. 

403 Returns families ordered: chat, then embedding, then vision, then reranker. 

404 Within each family, variants are ordered smallest to largest, with 

405 the largest marked as recommended (for multi-variant families). 

406 """ 

407 return ( 

408 _build_families(FEATURED_CHAT, ModelTask.CHAT) 

409 + _build_families(FEATURED_EMBEDDING, ModelTask.EMBEDDING) 

410 + _build_families(FEATURED_VISION, ModelTask.VISION) 

411 + _build_families(FEATURED_RERANK, ModelTask.RERANK) 

412 ) 

413 

414 

415_SIZE_RANGES: dict[str, tuple[float, float]] = { 

416 "small": (0.0, 3.0), 

417 "medium": (3.0, 10.0), 

418 "large": (10.0, float("inf")), 

419} 

420 

421 

422def _hf_token() -> str | None: 

423 """Read HuggingFace token from env vars or huggingface_hub login cache.""" 

424 token = os.environ.get("LILBEE_HF_TOKEN") or os.environ.get("HF_TOKEN") or None 

425 if token: 

426 return token 

427 try: 

428 from huggingface_hub import get_token 

429 

430 return get_token() 

431 except Exception: 

432 return None 

433 

434 

435def _hf_headers() -> dict[str, str]: 

436 """Build HTTP headers for HuggingFace API requests.""" 

437 token = _hf_token() 

438 if token: 

439 return {"Authorization": f"Bearer {token}"} 

440 return {} 

441 

442 

443# TTL cache for HuggingFace API results (5 minutes). The lock guards the 

444# evict-then-insert path so concurrent TUI workers can't race and hit 

445# ``RuntimeError: dictionary changed size during iteration``. 

446_HF_CACHE_TTL = 300 

447_HF_CACHE_MAX_ENTRIES = 50 

448_hf_cache: dict[str, tuple[float, _HfPage]] = {} 

449_hf_cache_lock = threading.Lock() 

450 

451_EMPTY_HF_PAGE = _HfPage(models=[], has_more=False) 

452 

453# HF ``?search=`` is a single space-tokenized substring match on the model id. 

454# Multiple ``search=`` params are silently ignored, so the user's query is 

455# space-joined onto the GGUF filter into one param value. 

456_HF_GGUF_SEARCH_TERM = "GGUF" 

457 

458 

459def _hf_search_value(search: str) -> str: 

460 """Build the HF ``search=`` value: GGUF plus the user's tokens, space-joined.""" 

461 tokens = [_HF_GGUF_SEARCH_TERM, *search.split()] 

462 return " ".join(tokens) 

463 

464 

465def _fetch_hf_models( 

466 pipeline_tag: str = "text-generation", 

467 sort: str = "downloads", 

468 limit: int = 50, 

469 offset: int = 0, 

470 library: str | None = None, 

471 search: str = "", 

472) -> _HfPage: 

473 """Fetch GGUF models from HuggingFace API with 5-minute cache. 

474 

475 Returns an ``_HfPage`` with a ``has_more`` flag derived from the 

476 ``Link: <...>; rel="next"`` response header (RFC 5988), the same 

477 mechanism the ``huggingface_hub`` library uses internally. 

478 """ 

479 search_value = _hf_search_value(search) 

480 cache_key = f"{pipeline_tag}:{sort}:{limit}:{offset}:{library}:{search_value}" 

481 now = time.monotonic() 

482 with _hf_cache_lock: 

483 expired = [k for k, (ts, _) in _hf_cache.items() if now - ts >= _HF_CACHE_TTL] 

484 for k in expired: 

485 del _hf_cache[k] 

486 

487 cached = _hf_cache.get(cache_key) 

488 if cached and now - cached[0] < _HF_CACHE_TTL: 

489 return cached[1] 

490 

491 params = httpx.QueryParams( 

492 pipeline_tag=pipeline_tag, 

493 search=search_value, 

494 sort=sort, 

495 limit=limit, 

496 skip=offset, 

497 expand=_HF_EXPAND_FIELDS, 

498 ) 

499 if library: 

500 params = params.add("library", library) 

501 try: 

502 resp = httpx.get(HF_API_URL, params=params, timeout=_DEFAULT_TIMEOUT, headers=_hf_headers()) 

503 if resp.status_code >= 400: 

504 log.warning("HuggingFace API returned HTTP %d", resp.status_code) 

505 return _EMPTY_HF_PAGE 

506 data = resp.json() 

507 except (httpx.HTTPError, ValueError) as exc: 

508 log.warning("Failed to fetch models from HuggingFace: %s", exc) 

509 return _EMPTY_HF_PAGE 

510 

511 has_more = "next" in resp.links 

512 

513 models: list[CatalogModel] = [] 

514 for raw in data: 

515 if not raw.get("id"): 

516 continue 

517 item = ModelInfo(**raw) 

518 card_desc = item.card_data.get("description", "") if item.card_data else "" 

519 model_desc = card_desc 

520 gguf_meta = _HfGgufMeta(**(item.gguf or {})) 

521 if gguf_meta.total > 0: 

522 size_gb = round(gguf_meta.total / (1024**3), 1) 

523 else: 

524 size_gb = _estimate_size_from_siblings(item.siblings or []) 

525 task = _pipeline_to_task(item.pipeline_tag or "") 

526 models.append( 

527 CatalogModel( 

528 hf_repo=item.id, 

529 gguf_filename="*.gguf", 

530 size_gb=size_gb, 

531 min_ram_gb=max(2.0, size_gb * 1.5), 

532 description=model_desc[:120] if model_desc else "", 

533 featured=False, 

534 downloads=item.downloads or 0, 

535 task=task, 

536 ) 

537 ) 

538 page = _HfPage(models=models, has_more=has_more) 

539 with _hf_cache_lock: 

540 _hf_cache[cache_key] = (now, page) 

541 if len(_hf_cache) > _HF_CACHE_MAX_ENTRIES: 

542 oldest_key = min(_hf_cache, key=lambda k: _hf_cache[k][0]) 

543 del _hf_cache[oldest_key] 

544 return page 

545 

546 

547def _has_gguf_siblings(siblings: list[RepoSibling]) -> bool: 

548 """Return True if the sibling list contains at least one .gguf file.""" 

549 return any(s.rfilename.endswith(".gguf") for s in siblings) 

550 

551 

552def _estimate_size_from_siblings(siblings: list[RepoSibling]) -> float: 

553 """Estimate model size in GB from the largest GGUF file in siblings.""" 

554 max_bytes = 0 

555 for sib in siblings: 

556 if sib.rfilename.endswith(".gguf"): 

557 max_bytes = max(max_bytes, sib.size or 0) 

558 if max_bytes > 0: 

559 return round(max_bytes / (1024**3), 1) 

560 return 0.0 # unknown — display as "?" in UI 

561 

562 

563def _search_blob(m: CatalogModel) -> str: 

564 """Lowercased join of searchable fields on a catalog row. 

565 

566 Null char joins the fields so a search term never straddles them. 

567 """ 

568 return f"{m.display_name}\0{m.hf_repo}\0{m.description}".lower() 

569 

570 

571def get_catalog( 

572 task: str | None = None, 

573 *, 

574 search: str = "", 

575 size: str | None = None, 

576 installed: bool | None = None, 

577 featured: bool | None = None, 

578 sort: str = "featured", 

579 limit: int = 20, 

580 offset: int = 0, 

581 model_manager: Any = None, 

582) -> CatalogResult: 

583 """Get paginated, filtered catalog of models.""" 

584 # Featured models only on the first page 

585 all_models = list(FEATURED_ALL) if offset == 0 else [] 

586 hf_has_more = False 

587 

588 # Optionally fetch from HF API 

589 if not featured: 

590 hf_task, hf_library = _task_to_pipeline(task) 

591 hf_page = _fetch_hf_models( 

592 pipeline_tag=hf_task, 

593 limit=limit, 

594 offset=offset, 

595 library=hf_library, 

596 search=search, 

597 ) 

598 hf_has_more = hf_page.has_more 

599 # Deduplicate: skip HF models whose repo matches a featured model 

600 featured_repos = {m.hf_repo for m in FEATURED_ALL} 

601 hf_models = [m for m in hf_page.models if m.hf_repo not in featured_repos] 

602 all_models.extend(hf_models) 

603 

604 # Filter by task 

605 if task: 

606 all_models = [m for m in all_models if m.task == task] 

607 

608 # Filter by search. Single join+lower per model per keystroke instead 

609 # of four separate lowers + substring checks; the no-match path 

610 # (the common case) runs four times fewer ``str.lower()`` calls. 

611 if search: 

612 search_lower = search.lower() 

613 all_models = [m for m in all_models if search_lower in _search_blob(m)] 

614 

615 # Filter by size 

616 if size and size in _SIZE_RANGES: 

617 lo, hi = _SIZE_RANGES[size] 

618 all_models = [m for m in all_models if lo <= m.size_gb < hi] 

619 

620 # A repo is "installed" if any of its quants has a manifest. 

621 if installed is not None and model_manager is not None: 

622 installed_repos = {ref.rsplit("/", 1)[0] for ref in _get_installed_models(model_manager)} 

623 if installed: 

624 all_models = [m for m in all_models if m.hf_repo in installed_repos] 

625 else: 

626 all_models = [m for m in all_models if m.hf_repo not in installed_repos] 

627 

628 # Filter by featured status 

629 if featured is not None: 

630 all_models = [m for m in all_models if m.featured == featured] 

631 

632 # Sort 

633 all_models = _sort_models(all_models, sort) 

634 

635 total = len(all_models) 

636 

637 # When HF API pagination is active (offset passed to API), skip local slicing 

638 # to avoid double-applying the offset. Only slice for featured-only requests. 

639 paginated = all_models[offset : offset + limit] if featured else all_models[:limit] 

640 

641 return CatalogResult( 

642 total=total, limit=limit, offset=offset, models=paginated, has_more=hf_has_more 

643 ) 

644 

645 

646def _task_to_pipeline(task: str | None) -> tuple[str, str | None]: 

647 """Map task name to HuggingFace pipeline tag and library filter.""" 

648 mapping: dict[str, tuple[str, str | None]] = { 

649 ModelTask.CHAT: ("text-generation", None), 

650 ModelTask.EMBEDDING: ("feature-extraction", "sentence-transformers"), 

651 ModelTask.VISION: ("image-text-to-text", None), 

652 ModelTask.RERANK: ("text-classification", None), 

653 } 

654 return mapping.get(task or ModelTask.CHAT, ("text-generation", None)) 

655 

656 

657_PIPELINE_TO_TASK: dict[str, str] = { 

658 "text-generation": ModelTask.CHAT, 

659 "feature-extraction": ModelTask.EMBEDDING, 

660 "sentence-similarity": ModelTask.EMBEDDING, 

661 "image-text-to-text": ModelTask.VISION, 

662 "image-to-text": ModelTask.VISION, 

663 "text-classification": ModelTask.RERANK, 

664 "text-ranking": ModelTask.RERANK, 

665} 

666 

667 

668def _pipeline_to_task(pipeline_tag: str) -> str: 

669 """Map HuggingFace pipeline tag to internal task name.""" 

670 return _PIPELINE_TO_TASK.get(pipeline_tag, ModelTask.CHAT) 

671 

672 

673def _get_installed_models(model_manager: Any) -> set[str]: 

674 """Get set of installed model names from model_manager.""" 

675 try: 

676 return set(model_manager.list_installed()) 

677 except Exception: 

678 return set() 

679 

680 

681_SORT_KEYS: dict[str, tuple] = { 

682 "downloads": (lambda m: m.downloads, True), 

683 "name": (lambda m: m.display_name.lower(), False), 

684 "size_asc": (lambda m: m.size_gb, False), 

685 "size_desc": (lambda m: m.size_gb, True), 

686 "featured": (lambda m: (not m.featured, -m.downloads), False), 

687} 

688 

689 

690def _sort_models(models: list[CatalogModel], sort: str) -> list[CatalogModel]: 

691 """Sort models according to the specified sort order.""" 

692 key_fn, reverse = _SORT_KEYS.get(sort, _SORT_KEYS["featured"]) 

693 return sorted(models, key=key_fn, reverse=reverse) 

694 

695 

696class CatalogIndex(NamedTuple): 

697 """Case-insensitive lookup indexes for find_catalog_entry.""" 

698 

699 by_hf_repo: dict[str, CatalogModel] 

700 by_full_ref: dict[str, CatalogModel] # repo + concrete filename 

701 

702 

703@functools.cache 

704def _build_catalog_index() -> CatalogIndex: 

705 """Build case-insensitive lookup indexes for find_catalog_entry.""" 

706 by_hf_repo: dict[str, CatalogModel] = {} 

707 by_full_ref: dict[str, CatalogModel] = {} 

708 for m in FEATURED_ALL: 

709 by_hf_repo.setdefault(m.hf_repo.lower(), m) 

710 if "*" not in m.gguf_filename: 

711 by_full_ref[f"{m.hf_repo}/{m.gguf_filename}".lower()] = m 

712 return CatalogIndex(by_hf_repo, by_full_ref) 

713 

714 

715def find_catalog_entry(query: str) -> CatalogModel | None: 

716 """Find a featured model by hf_repo or by ``hf_repo/filename`` ref. 

717 

718 Tries the query as-is, then strips a trailing ``/<filename>.gguf``, 

719 then strips a leading non-HF provider prefix (``ollama/``, etc.). 

720 Case-insensitive; returns ``None`` on miss. 

721 """ 

722 if not query: 

723 return None 

724 idx = _build_catalog_index() 

725 q = query.lower() 

726 candidates = [q] 

727 # Strip the filename for ``<repo>/<filename>.gguf`` queries so the 

728 # bare-repo index catches featured entries whose gguf_filename is a 

729 # glob (most are). 

730 if q.endswith(".gguf") and q.count("/") >= 2: 

731 candidates.append(q.rsplit("/", 1)[0]) 

732 if "/" in q: 

733 prefix, rest = q.split("/", 1) 

734 hf_owners = {r.split("/", 1)[0] for r in idx.by_hf_repo if "/" in r} 

735 if prefix not in hf_owners: 

736 candidates.append(rest) 

737 for c in candidates: 

738 hit = idx.by_full_ref.get(c) or idx.by_hf_repo.get(c) 

739 if hit is not None: 

740 return hit 

741 return None 

742 

743 

744def is_rerank_ref(model_ref: str) -> bool: 

745 """Return True iff *model_ref* resolves to a rerank catalog entry.""" 

746 if not model_ref: 

747 return False 

748 entry = find_catalog_entry(model_ref) 

749 return entry is not None and entry.task == ModelTask.RERANK 

750 

751 

752def _is_hf_repo_id(value: str) -> bool: 

753 """True if *value* is a well-formed ``owner/name`` HuggingFace repo id.""" 

754 if "/" not in value: 

755 return False 

756 try: 

757 validate_repo_id(value) 

758 except HFValidationError: 

759 return False 

760 return True 

761 

762 

763def build_adhoc_entry(hf_repo: str, *, task: str = ModelTask.CHAT) -> CatalogModel: 

764 """Minimal CatalogModel for a non-featured HuggingFace GGUF repo.""" 

765 return CatalogModel( 

766 hf_repo=hf_repo, 

767 gguf_filename="*.gguf", 

768 size_gb=0.0, 

769 min_ram_gb=2.0, 

770 description="", 

771 featured=False, 

772 downloads=0, 

773 task=task, 

774 ) 

775 

776 

777def resolve_pull_target(model: str) -> CatalogModel | None: 

778 """Resolve *model* to a pullable entry: featured first, then ad-hoc HF.""" 

779 featured = find_catalog_entry(model) 

780 if featured is not None: 

781 return featured 

782 return build_adhoc_entry(model) if _is_hf_repo_id(model) else None 

783 

784 

785def download_model(entry: CatalogModel, *, on_progress: ProgressCallback | None = None) -> Path: 

786 """Download a GGUF model from HuggingFace to cfg.models_dir. 

787 Uses huggingface_hub for resumable downloads, caching, and auth. 

788 The optional *on_progress(downloaded, total)* callback receives byte counts. 

789 For vision models, also downloads the mmproj (CLIP projection) file. 

790 

791 Raises: 

792 PermissionError: gated repo requiring authentication 

793 RuntimeError: repo not found or download failure with details 

794 """ 

795 from huggingface_hub import hf_hub_download 

796 from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError 

797 

798 _cfg().models_dir.mkdir(parents=True, exist_ok=True) 

799 

800 filename = resolve_filename(entry) 

801 dest = _cfg().models_dir / filename 

802 if dest.exists(): 

803 log.info("Model already downloaded: %s", dest) 

804 if on_progress is not None: 

805 size = dest.stat().st_size 

806 on_progress(size, size) # Report 100% immediately 

807 return _finalize_download(entry, dest, on_progress=on_progress) 

808 

809 log.info("Downloading %s/%s → %s", entry.hf_repo, filename, _cfg().models_dir) 

810 token = _hf_token() 

811 

812 tracker = _ProgressTracker(on_progress) if on_progress else None 

813 config = DownloadConfig( 

814 repo_id=entry.hf_repo, 

815 filename=filename, 

816 token=token, 

817 cache_dir=str(_cfg().models_dir), 

818 tqdm_class=tracker.make_tqdm_class() if tracker else None, 

819 ) 

820 

821 try: 

822 # HF_HUB_DISABLE_XET is set in lilbee/__init__.py at import time. 

823 # Setting it here is too late — huggingface_hub.constants already 

824 # captured the value when this module first imported it. 

825 cached = Path(hf_hub_download(**config.model_dump(exclude_none=True))) 

826 except TaskCancelled: 

827 raise 

828 except GatedRepoError: 

829 raise PermissionError( 

830 f"{entry.hf_repo} requires HuggingFace authentication. " 

831 "Set HF_TOKEN env var or visit the repo page to request access." 

832 ) from None 

833 except RepositoryNotFoundError: 

834 raise RuntimeError(f"Repository {entry.hf_repo!r} not found on HuggingFace.") from None 

835 except (httpx.TimeoutException, httpx.ConnectError) as exc: 

836 raise RuntimeError(f"Network error downloading {entry.hf_repo}: {exc}") from None 

837 except OSError as exc: 

838 raise RuntimeError(f"I/O error downloading {entry.hf_repo}: {exc}") from None 

839 except Exception as exc: 

840 raise RuntimeError( 

841 f"Failed to download {entry.hf_repo}: {type(exc).__name__}: {exc}" 

842 ) from None 

843 

844 if on_progress: 

845 actual_size = cached.stat().st_size 

846 if not tracker or not tracker.was_used: 

847 log.info("Model found in HuggingFace cache: %s", cached) 

848 on_progress(actual_size, actual_size) 

849 dest = cached 

850 return _finalize_download(entry, dest, on_progress=on_progress) 

851 

852 

853def _finalize_download( 

854 entry: CatalogModel, 

855 dest: Path, 

856 *, 

857 on_progress: ProgressCallback | None = None, 

858) -> Path: 

859 """Register the model in the manifest and download mmproj for vision models.""" 

860 _register_model(entry, dest) 

861 if entry.task == ModelTask.VISION: 

862 _download_mmproj(entry, on_progress=on_progress) 

863 return dest 

864 

865 

866def _register_model(entry: CatalogModel, file_path: Path) -> None: 

867 """Create a registry manifest for a downloaded model.""" 

868 registry = ModelRegistry(_cfg().models_dir) 

869 manifest = ModelManifest( 

870 hf_repo=entry.hf_repo, 

871 gguf_filename=file_path.name, 

872 size_bytes=file_path.stat().st_size, 

873 task=entry.task, 

874 downloaded_at=datetime.now(UTC).isoformat(), 

875 ) 

876 try: 

877 registry.install(entry.hf_repo, file_path.name, file_path, manifest) 

878 log.info("Registered %s/%s in manifest", entry.hf_repo, file_path.name) 

879 except Exception: 

880 log.warning("Failed to register manifest for %s", entry.hf_repo, exc_info=True) 

881 

882 

883def _download_mmproj( 

884 entry: CatalogModel, 

885 *, 

886 on_progress: ProgressCallback | None = None, 

887) -> Path | None: 

888 """Download the mmproj (CLIP projection) file for a vision model. 

889 Returns the path to the downloaded file, or None if no mmproj is configured. 

890 The optional ``on_progress`` callback receives ``(downloaded, total)`` byte 

891 counts and is wired through the same tqdm hook used by the main download. 

892 """ 

893 mmproj_pattern = VISION_MMPROJ_FILES.get(entry.hf_repo, _DEFAULT_MMPROJ_PATTERN) 

894 

895 mmproj_filename = _resolve_mmproj_filename(entry.hf_repo, mmproj_pattern) 

896 if not mmproj_filename: 

897 log.warning("Could not resolve mmproj file for %s", entry.hf_repo) 

898 return None 

899 

900 from huggingface_hub import hf_hub_download 

901 

902 tracker = _ProgressTracker(on_progress) if on_progress else None 

903 log.info("Downloading mmproj %s/%s → %s", entry.hf_repo, mmproj_filename, _cfg().models_dir) 

904 path = Path( 

905 hf_hub_download( 

906 repo_id=entry.hf_repo, 

907 filename=mmproj_filename, 

908 cache_dir=str(_cfg().models_dir), 

909 token=_hf_token(), 

910 tqdm_class=tracker.make_tqdm_class() if tracker else None, 

911 ) 

912 ) 

913 if on_progress is not None and (not tracker or not tracker.was_used): 

914 # Cache hit — HF returned the cached path without invoking tqdm. 

915 size = path.stat().st_size 

916 on_progress(size, size) 

917 return path 

918 

919 

920def _resolve_mmproj_filename(hf_repo: str, pattern: str) -> str | None: 

921 """Resolve an mmproj filename pattern to a concrete filename via the HF API.""" 

922 if "*" not in pattern: 

923 return pattern 

924 

925 try: 

926 resp = httpx.get( 

927 f"https://huggingface.co/api/models/{hf_repo}", 

928 timeout=_DEFAULT_TIMEOUT, 

929 headers=_hf_headers(), 

930 ) 

931 resp.raise_for_status() 

932 siblings = resp.json().get("siblings", []) 

933 except Exception as exc: 

934 log.warning("Cannot query mmproj files for %s: %s", hf_repo, exc) 

935 return None 

936 

937 mmproj_files: list[str] = [ 

938 s.get("rfilename", "") for s in siblings if fnmatch.fnmatch(s.get("rfilename", ""), pattern) 

939 ] 

940 if not mmproj_files: 

941 return None 

942 

943 # Prefer F16 over F32 (smaller), and any over BF16 

944 for preference in ("f16", "F16"): 

945 for f in mmproj_files: 

946 if preference in f: 

947 return f 

948 return mmproj_files[0] 

949 

950 

951def _mmproj_in_models_dir_matching(pattern: str) -> Path | None: 

952 """Return the first ``*.gguf`` under ``_cfg().models_dir`` that matches.""" 

953 models_dir: Path = _cfg().models_dir 

954 for p in models_dir.rglob("*.gguf"): 

955 if fnmatch.fnmatch(p.name, pattern) or "mmproj" in p.name.lower(): 

956 return p 

957 return None 

958 

959 

960def find_mmproj_file(model_ref: str) -> Path | None: 

961 """Find the mmproj for a ``FEATURED_VISION`` entry under ``_cfg().models_dir``. 

962 

963 *model_ref* is matched against each featured vision entry's 

964 ``hf_repo``. Returns ``None`` when nothing matches. Never falls back 

965 to an arbitrary mmproj: that cross-contaminates non-vision chat 

966 models (e.g. a chat model would inherit a vision model's mmproj and 

967 be misreported as vision-capable). 

968 """ 

969 if not _cfg().models_dir.exists(): 

970 return None 

971 for entry in FEATURED_VISION: 

972 if model_ref not in entry.hf_repo and entry.hf_repo not in model_ref: 

973 continue 

974 pattern = VISION_MMPROJ_FILES.get(entry.hf_repo, _DEFAULT_MMPROJ_PATTERN) 

975 match = _mmproj_in_models_dir_matching(pattern) 

976 if match is not None: 

977 return match 

978 return None 

979 

980 

981_QUANT_PREFERENCE = ("Q4_K_M", "Q4_K_S", "Q5_K_M", "Q5_K_S", "Q8_0", "Q6_K", "Q3_K_M") 

982 

983 

984def resolve_filename(entry: CatalogModel) -> str: 

985 """Resolve a GGUF filename pattern to the best concrete filename. 

986 For exact filenames, return as-is. For wildcards, query the HF API 

987 and pick the best quantization (prefer Q4_K_M for balance of size/quality). 

988 """ 

989 if "*" not in entry.gguf_filename: 

990 return entry.gguf_filename 

991 

992 try: 

993 resp = httpx.get( 

994 f"https://huggingface.co/api/models/{entry.hf_repo}", 

995 timeout=_DEFAULT_TIMEOUT, 

996 headers=_hf_headers(), 

997 ) 

998 if resp.status_code == 401: 

999 raise PermissionError( 

1000 f"{entry.hf_repo} requires HuggingFace authentication. " 

1001 "Set HF_TOKEN env var or visit the repo page to request access." 

1002 ) 

1003 resp.raise_for_status() 

1004 siblings = resp.json().get("siblings", []) 

1005 except PermissionError: 

1006 raise 

1007 except Exception as exc: 

1008 raise RuntimeError(f"Cannot query files for {entry.hf_repo}: {exc}") from exc 

1009 

1010 gguf_files = [ 

1011 s.get("rfilename", "") for s in siblings if s.get("rfilename", "").endswith(".gguf") 

1012 ] 

1013 if not gguf_files: 

1014 raise RuntimeError(f"No GGUF files found in {entry.hf_repo}") 

1015 

1016 return _pick_best_gguf(gguf_files) 

1017 

1018 

1019def _pick_best_gguf(filenames: list[str]) -> str: 

1020 """Pick the best GGUF file by quantization preference.""" 

1021 for quant in _QUANT_PREFERENCE: 

1022 for f in filenames: 

1023 if quant in f: 

1024 return f 

1025 return filenames[0] 

1026 

1027 

1028def fetch_model_file_size(hf_repo: str) -> float: 

1029 """Fetch the best GGUF file size from HuggingFace tree API. 

1030 Returns size in GB, or 0.0 if unavailable. 

1031 """ 

1032 try: 

1033 resp = httpx.get( 

1034 f"https://huggingface.co/api/models/{hf_repo}/tree/main", 

1035 timeout=_DEFAULT_TIMEOUT, 

1036 headers=_hf_headers(), 

1037 ) 

1038 resp.raise_for_status() 

1039 files = resp.json() 

1040 except Exception: 

1041 return 0.0 

1042 

1043 gguf_files = [ 

1044 (f.get("path", ""), f.get("size", 0) or f.get("lfs", {}).get("size", 0)) 

1045 for f in files 

1046 if isinstance(f, dict) and f.get("path", "").endswith(".gguf") 

1047 ] 

1048 if not gguf_files: 

1049 return 0.0 

1050 

1051 best_name = _pick_best_gguf([name for name, _ in gguf_files]) 

1052 size_bytes = next((s for n, s in gguf_files if n == best_name), 0) 

1053 return round(size_bytes / (1024**3), 1) if size_bytes else 0.0 

1054 

1055 

1056_DISPLAY_NAME_SUFFIXES = re.compile(r"-(GGUF|Instruct|Chat)(?=-|$)", re.IGNORECASE) 

1057_DISPLAY_NAME_DATE_SUFFIX = re.compile(r"-\d{4}$") 

1058_DISPLAY_NAME_META_PREFIX = re.compile(r"^Meta-", re.IGNORECASE) 

1059 

1060 

1061def clean_display_name(repo_id: str) -> str: 

1062 """Derive a human-friendly display name from a HuggingFace repo ID. 

1063 Strips org prefix, -GGUF/-Instruct/-Chat suffixes, date suffixes (-2507), 

1064 and Meta- prefix. Replaces hyphens with spaces. 

1065 

1066 Examples: 

1067 "Qwen/Qwen2.5-7B-Instruct-GGUF" -> "Qwen2.5 7B" 

1068 "meta-llama/Meta-Llama-3-8B" -> "Llama 3 8B" 

1069 """ 

1070 name = repo_id.split("/")[-1] 

1071 name = _DISPLAY_NAME_SUFFIXES.sub("", name) 

1072 name = _DISPLAY_NAME_DATE_SUFFIX.sub("", name) 

1073 name = _DISPLAY_NAME_META_PREFIX.sub("", name) 

1074 name = name.replace("-", " ").strip() 

1075 return re.sub(r"\s+", " ", name) 

1076 

1077 

1078def display_label_for_ref(ref: str) -> str: 

1079 """Render any model ref as a short, human-friendly UI label. 

1080 

1081 - Native HF ref (``<repo>/<file>.gguf``): cleaned repo name. 

1082 - Provider-prefixed (``ollama/``, ``openai/`` ...): the part after the prefix. 

1083 - Anything else: returned unchanged. 

1084 """ 

1085 if not ref: 

1086 return "" 

1087 if ref.endswith(".gguf") and ref.count("/") >= 2: 

1088 return clean_display_name(ref.rsplit("/", 1)[0]) 

1089 if "/" in ref: 

1090 return ref.split("/", 1)[1] 

1091 return ref 

1092 

1093 

1094QUANT_TIERS: dict[str, str] = { 

1095 "Q2_K": "compact", 

1096 "Q3_K_S": "compact", 

1097 "Q3_K_M": "compact", 

1098 "Q3_K_L": "compact", 

1099 "Q4_K_S": "balanced", 

1100 "Q4_K_M": "balanced", 

1101 "Q4_0": "balanced", 

1102 "Q5_K_S": "high quality", 

1103 "Q5_K_M": "high quality", 

1104 "Q6_K": "high quality", 

1105 "Q8_0": "full precision", 

1106 "F16": "unquantized", 

1107 "F32": "unquantized", 

1108} 

1109 

1110 

1111def quant_tier(quant: str) -> str: 

1112 """Map a quantization label to a human-readable quality tier.""" 

1113 if not quant: 

1114 return "—" 

1115 return QUANT_TIERS.get(quant, "—") 

1116 

1117 

1118@dataclass(frozen=True) 

1119class EnrichedModel: 

1120 """A catalog model enriched with display metadata and install status.""" 

1121 

1122 hf_repo: str 

1123 gguf_filename: str 

1124 size_gb: float 

1125 min_ram_gb: float 

1126 description: str 

1127 featured: bool 

1128 downloads: int 

1129 task: str 

1130 display_name: str 

1131 param_count: str 

1132 quality_tier: str 

1133 installed: bool 

1134 source: str 

1135 

1136 

1137def enrich_catalog(result: CatalogResult, installed_refs: set[str]) -> list[EnrichedModel]: 

1138 """Enrich catalog models with display names, quality tiers, and install status. 

1139 

1140 *installed_refs* contains the ``hf_repo/filename`` refs returned by 

1141 ``model_manager.list_installed()``. A repo is considered installed 

1142 when at least one of its quants has a manifest. 

1143 """ 

1144 installed_repos = {ref.rsplit("/", 1)[0] for ref in installed_refs} 

1145 enriched: list[EnrichedModel] = [] 

1146 for m in result.models: 

1147 enriched.append( 

1148 EnrichedModel( 

1149 hf_repo=m.hf_repo, 

1150 gguf_filename=m.gguf_filename, 

1151 size_gb=m.size_gb, 

1152 min_ram_gb=m.min_ram_gb, 

1153 description=m.description, 

1154 featured=m.featured, 

1155 downloads=m.downloads, 

1156 task=m.task, 

1157 display_name=m.display_name, 

1158 param_count=_derive_param_count(m), 

1159 quality_tier=quant_tier(extract_quant(m.gguf_filename)), 

1160 installed=m.hf_repo in installed_repos, 

1161 source=ModelSource.NATIVE.value, 

1162 ) 

1163 ) 

1164 return enriched