Coverage for src/lilbee/catalog.py: 100%

1"""Model catalog — discovers available GGUF models from HuggingFace.

3Three levels:

41. Featured models — curated favorites (hardcoded, always available)

52. HF API models — fetched from HuggingFace API, paginated and filterable

63. Combined catalog — featured first, then HF results

7"""

9import fnmatch

10import functools

11import io

12import logging

13import os

14import re

15import threading

16import time

17from collections.abc import Callable

18from dataclasses import dataclass

19from datetime import UTC, datetime

20from pathlib import Path

21from typing import Any, NamedTuple

23import httpx

24from huggingface_hub import ModelInfo

25from huggingface_hub.hf_api import RepoSibling

26from huggingface_hub.utils import HFValidationError, validate_repo_id

27from pydantic import BaseModel

28from tqdm.auto import tqdm as _base_tqdm

30from lilbee.cancellation import TaskCancelled

31from lilbee.model_manager import ModelSource

32from lilbee.models import ModelTask

33from lilbee.registry import ModelManifest, ModelRegistry

35# circular: config.py -> catalog (via the per-role task validator). cfg is

36# imported lazily so this module can load before Config() finishes init.

38log = logging.getLogger(__name__)

41def _cfg() -> Any:

42 """Lazy accessor for the global ``cfg`` singleton (see circular-import note)."""

43 from lilbee.config import cfg

45 return cfg

48def __getattr__(name: str) -> Any:

49 """Expose ``catalog.cfg`` lazily so ``monkeypatch.setattr(catalog.cfg, ...)`` still works."""

50 if name == "cfg":

51 from lilbee.config import cfg

53 return cfg

54 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")

57HF_API_URL = "https://huggingface.co/api/models"

60@dataclass

61class DownloadProgress:

62 """Human-readable snapshot of download progress.

64 ``percent`` is a float (0.0 to 100.0) so the ProgressBar renders smooth

65 fractional movement during multi-GB downloads. Call sites that need

66 an integer for display format it themselves.

67 """

69 percent: float

70 detail: str

71 is_cache_hit: bool

74ProgressCallback = Callable[[int, int], None]

75_BYTES_PER_MB = 1024 * 1024

78def make_download_callback(

79 on_update: Callable[[DownloadProgress], None],

80 *,

81 throttle_interval: float = 0.1,

82) -> ProgressCallback:

83 """Build a download progress callback that converts bytes to human-readable state.

84 *on_update(progress: DownloadProgress)* is called at most once per

85 ``throttle_interval`` seconds with a float percentage (0.0 to 100.0), a

86 ``"<done>/<total> MB"`` detail string, and a cache-hit flag. Both the

87 catalog and setup screens use this so byte-to-MB conversion and

88 cache-hit detection aren't duplicated.

89 """

90 last_update_time = 0.0

91 seen_partial = False

93 def _on_progress(downloaded: int, total: int) -> None:

94 nonlocal last_update_time, seen_partial

96 if total > 0 and downloaded >= total and not seen_partial:

97 on_update(

98 DownloadProgress(percent=100.0, detail="already downloaded", is_cache_hit=True)

99 )

100 return

101 seen_partial = True

102

103 now = time.monotonic()

104 if now - last_update_time < throttle_interval:

105 return

106 last_update_time = now

107

108 mb_done = downloaded / _BYTES_PER_MB

109 if total > 0:

110 pct = min(downloaded * 100.0 / total, 100.0)

111 mb_total = total / _BYTES_PER_MB

112 on_update(

113 DownloadProgress(

114 percent=pct,

115 detail=f"{mb_done:.0f}/{mb_total:.0f} MB",

116 is_cache_hit=False,

117 )

118 )

119 else:

120 on_update(DownloadProgress(percent=0.0, detail=f"{mb_done:.0f} MB", is_cache_hit=False))

121

122 return _on_progress

123

124

125class _CallbackProgressBar(_base_tqdm):

126 """tqdm subclass that forwards progress to a plain callback.

127 Fully suppresses terminal output by disabling tqdm rendering and redirecting

128 its file handle to a devnull sink — prevents ANSI escape sequences from leaking

129 into Textual's managed terminal.

130

131 Overrides ``get_lock`` to return a threading lock instead of tqdm's default

132 multiprocessing lock. Vanilla tqdm acquires ``self._lock`` even on the

133 ``disable=True`` path (std.py:988), and the multiprocessing lock's lazy init

134 raises ``ValueError`` when ``sys.stderr.fileno() == -1`` (Textual, Jupyter,

135 pytest capture). A thread lock sidesteps that fd handling entirely.

136 """

137

138 _lock = threading.RLock()

139 _callback: Any = None

140

141 @classmethod

142 def get_lock(cls) -> threading.RLock:

143 return cls._lock

144

145 def __init__(self, *args: Any, **kwargs: Any):

146 kwargs["disable"] = True

147 kwargs["file"] = io.StringIO() # absorb any accidental tqdm output

148 super().__init__(*args, **kwargs)

149 self._cumulative = 0

150

151 def update(self, n: float = 1) -> bool | None:

152 self._cumulative += int(n)

153 if self._callback is not None:

154 total = self.total if self.total is not None else 0

155 self._callback(int(self._cumulative), int(total))

156 return None

157

158

159class _ProgressTracker:

160 """Wraps a tqdm_class to detect whether progress updates actually fired."""

161

162 def __init__(self, callback: Any) -> None:

163 self.was_used = False

164 self._callback = callback

165

166 def make_tqdm_class(self) -> type[_base_tqdm]:

167 tracker = self

168

169 class _Cls(_CallbackProgressBar):

170 _callback = staticmethod(tracker._callback)

171

172 def update(self, n: float = 1) -> bool | None:

173 tracker.was_used = True

174 return super().update(n)

175

176 return _Cls

177

178

179class _HfGgufMeta(BaseModel):

180 """GGUF metadata returned by the HF API when expand=gguf is requested.

181

182 ModelInfo.gguf is typed as ``dict | None`` upstream, so we validate it ourselves.

183 """

184

185 total: int = 0

186 architecture: str = ""

187 context_length: int = 0

188

189

190class DownloadConfig(BaseModel):

191 model_config = {"arbitrary_types_allowed": True}

192

193 repo_id: str

194 filename: str

195 token: str | None

196 force_download: bool = False

197 cache_dir: str | None = None

198 tqdm_class: Any = None

199

200

201_DEFAULT_TIMEOUT = 30.0

202

203# Fields to request from the HF listing API via ?expand=.

204# Without expand, the default response omits siblings, cardData, and gguf.

205_HF_EXPAND_FIELDS: list[str] = ["gguf", "siblings", "downloads", "pipeline_tag", "cardData"]

206

207

208@dataclass(frozen=True)

209class CatalogModel:

210 """One catalog entry, keyed by HuggingFace repo. ``gguf_filename`` may be a glob."""

211

212 hf_repo: str

213 gguf_filename: str

214 size_gb: float

215 min_ram_gb: float

216 description: str

217 featured: bool

218 downloads: int

219 task: str

220 recommended: bool = False

221

222 @property

223 def ref(self) -> str:

224 """Browse-time ref (the HF repo); concrete filename is resolved at install."""

225 return self.hf_repo

226

227 @property

228 def display_name(self) -> str:

229 """Human-readable label derived from the HuggingFace repo id."""

230 return clean_display_name(self.hf_repo)

231

232

233@dataclass(frozen=True)

234class CatalogResult:

235 """Paginated catalog result."""

236

237 total: int

238 limit: int

239 offset: int

240 models: list[CatalogModel]

241 has_more: bool = False

242

243

244@dataclass(frozen=True)

245class _HfPage:

246 """Internal: one page of HuggingFace API results."""

247

248 models: list[CatalogModel]

249 has_more: bool

250

251

252@dataclass(frozen=True)

253class ModelVariant:

254 """One quantization within a model family. ``filename`` may be a glob."""

255

256 hf_repo: str

257 filename: str

258 param_count: str

259 quant: str

260 size_mb: int

261 recommended: bool

262 mmproj_filename: str = ""

263

264

265@dataclass(frozen=True)

266class ModelFamily:

267 """A group of related model variants (e.g. Qwen3 in multiple sizes)."""

268

269 slug: str # family slug for building refs: "qwen3"

270 name: str # display name: "Qwen3"

271 task: str

272 description: str

273 variants: tuple[ModelVariant, ...]

274

275

276def _load_featured() -> tuple[

277 tuple[CatalogModel, ...],

278 tuple[CatalogModel, ...],

279 tuple[CatalogModel, ...],

280 tuple[CatalogModel, ...],

281]:

282 """Load featured models from the TOML file, cached after first call."""

283 import tomllib

284

285 toml_path = Path(__file__).parent / "featured_models.toml"

286 with open(toml_path, "rb") as f:

287 data = tomllib.load(f)

288

289 def _build(task: ModelTask) -> tuple[CatalogModel, ...]:

290 return tuple(

291 CatalogModel(

292 hf_repo=m["hf_repo"],

293 gguf_filename=m["gguf_filename"],

294 size_gb=m["size_gb"],

295 min_ram_gb=m["min_ram_gb"],

296 description=m["description"],

297 featured=True,

298 downloads=0,

299 task=task,

300 recommended=m.get("recommended", False),

301 )

302 for m in data.get(task, [])

303 )

304

305 return (

306 _build(ModelTask.CHAT),

307 _build(ModelTask.EMBEDDING),

308 _build(ModelTask.VISION),

309 _build(ModelTask.RERANK),

310 )

311

312

313FEATURED_CHAT, FEATURED_EMBEDDING, FEATURED_VISION, FEATURED_RERANK = _load_featured()

314

315# Maps vision catalog entries to their mmproj (CLIP projection) filenames.

316# Vision models need both the main GGUF and the mmproj file to work.

317# Keys are hf_repo identifiers; values are glob patterns resolved at download time.

318# Every FEATURED_VISION entry MUST have a corresponding key here.

319_DEFAULT_MMPROJ_PATTERN = "*mmproj*.gguf"

320

321VISION_MMPROJ_FILES: dict[str, str] = {

322 "noctrex/LightOnOCR-2-1B-GGUF": _DEFAULT_MMPROJ_PATTERN,

323}

324

325FEATURED_ALL: tuple[CatalogModel, ...] = (

326 FEATURED_CHAT + FEATURED_EMBEDDING + FEATURED_VISION + FEATURED_RERANK

327)

328

329_FAMILY_NAME_RE = re.compile(r"^(.+?)\s+\d")

330PARAM_COUNT_RE = re.compile(r"(\d+\.?\d*B)", re.IGNORECASE)

331

332

333def _extract_family_name(model_name: str) -> str:

334 """Extract the family name by stripping the trailing parameter count.

335 Applies clean_display_name first to strip -GGUF, -Instruct, etc.

336

337 "Qwen3 8B" -> "Qwen3", "Qwen3-Coder 30B A3B" -> "Qwen3-Coder",

338 "Nomic Embed Text v1.5" -> "Nomic Embed Text v1.5" (no trailing number pattern).

339 """

340 cleaned = clean_display_name(model_name)

341 m = _FAMILY_NAME_RE.match(cleaned)

342 return m.group(1) if m else cleaned

343

344

345def extract_quant(filename: str) -> str:

346 """Extract the GGUF quantization label (e.g. ``Q4_K_M``) from a filename."""

347 m = re.search(r"(Q\d[A-Z0-9_]*)", filename, re.IGNORECASE)

348 return m.group(1).upper() if m else ""

349

350

351def _derive_param_count(model: CatalogModel) -> str:

352 """Parse the ``7B``-style param count from the display name; ``""`` if absent."""

353 match = PARAM_COUNT_RE.search(model.display_name)

354 return match.group(1) if match else ""

355

356

357def _catalog_to_variant(model: CatalogModel) -> ModelVariant:

358 """Convert a CatalogModel to a ModelVariant."""

359 return ModelVariant(

360 hf_repo=model.hf_repo,

361 filename=model.gguf_filename,

362 param_count=_derive_param_count(model),

363 quant=extract_quant(model.gguf_filename),

364 size_mb=int(model.size_gb * 1024),

365 recommended=model.recommended,

366 )

367

368

369def _family_slug(display_name: str) -> str:

370 """Stable slug for a family, derived from its display name."""

371 return _extract_family_name(display_name).lower().replace(" ", "-")

372

373

374def _build_families(models: tuple[CatalogModel, ...], task: str) -> list[ModelFamily]:

375 """Group CatalogModels into families by display-derived family name."""

376 groups: dict[str, list[CatalogModel]] = {}

377 order: list[str] = []

378 for m in models:

379 family = _extract_family_name(m.display_name)

380 if family not in groups:

381 order.append(family)

382 groups.setdefault(family, []).append(m)

383

384 families: list[ModelFamily] = []

385 for family_name in order:

386 members = groups[family_name]

387 representative = next((m for m in members if m.recommended), members[0])

388 variants = [_catalog_to_variant(m) for m in members]

389 families.append(

390 ModelFamily(

391 slug=_family_slug(representative.display_name),

392 name=family_name,

393 task=task,

394 description=representative.description,

395 variants=tuple(variants),

396 )

397 )

398 return families

399

400

401def get_families() -> list[ModelFamily]:

402 """Get all featured models grouped into families.

403 Returns families ordered: chat, then embedding, then vision, then reranker.

404 Within each family, variants are ordered smallest to largest, with

405 the largest marked as recommended (for multi-variant families).

406 """

407 return (

408 _build_families(FEATURED_CHAT, ModelTask.CHAT)

409 + _build_families(FEATURED_EMBEDDING, ModelTask.EMBEDDING)

410 + _build_families(FEATURED_VISION, ModelTask.VISION)

411 + _build_families(FEATURED_RERANK, ModelTask.RERANK)

412 )

413

414

415_SIZE_RANGES: dict[str, tuple[float, float]] = {

416 "small": (0.0, 3.0),

417 "medium": (3.0, 10.0),

418 "large": (10.0, float("inf")),

419}

420

421

422def _hf_token() -> str | None:

423 """Read HuggingFace token from env vars or huggingface_hub login cache."""

424 token = os.environ.get("LILBEE_HF_TOKEN") or os.environ.get("HF_TOKEN") or None

425 if token:

426 return token

427 try:

428 from huggingface_hub import get_token

429

430 return get_token()

431 except Exception:

432 return None

433

434

435def _hf_headers() -> dict[str, str]:

436 """Build HTTP headers for HuggingFace API requests."""

437 token = _hf_token()

438 if token:

439 return {"Authorization": f"Bearer {token}"}

440 return {}

441

442

443# TTL cache for HuggingFace API results (5 minutes). The lock guards the

444# evict-then-insert path so concurrent TUI workers can't race and hit

445# ``RuntimeError: dictionary changed size during iteration``.

446_HF_CACHE_TTL = 300

447_HF_CACHE_MAX_ENTRIES = 50

448_hf_cache: dict[str, tuple[float, _HfPage]] = {}

449_hf_cache_lock = threading.Lock()

450

451_EMPTY_HF_PAGE = _HfPage(models=[], has_more=False)

452

453# HF ``?search=`` is a single space-tokenized substring match on the model id.

454# Multiple ``search=`` params are silently ignored, so the user's query is

455# space-joined onto the GGUF filter into one param value.

456_HF_GGUF_SEARCH_TERM = "GGUF"

457

458

459def _hf_search_value(search: str) -> str:

460 """Build the HF ``search=`` value: GGUF plus the user's tokens, space-joined."""

461 tokens = [_HF_GGUF_SEARCH_TERM, *search.split()]

462 return " ".join(tokens)

463

464

465def _fetch_hf_models(

466 pipeline_tag: str = "text-generation",

467 sort: str = "downloads",

468 limit: int = 50,

469 offset: int = 0,

470 library: str | None = None,

471 search: str = "",

472) -> _HfPage:

473 """Fetch GGUF models from HuggingFace API with 5-minute cache.

474

475 Returns an ``_HfPage`` with a ``has_more`` flag derived from the

476 ``Link: <...>; rel="next"`` response header (RFC 5988), the same

477 mechanism the ``huggingface_hub`` library uses internally.

478 """

479 search_value = _hf_search_value(search)

480 cache_key = f"{pipeline_tag}:{sort}:{limit}:{offset}:{library}:{search_value}"

481 now = time.monotonic()

482 with _hf_cache_lock:

483 expired = [k for k, (ts, _) in _hf_cache.items() if now - ts >= _HF_CACHE_TTL]

484 for k in expired:

485 del _hf_cache[k]

486

487 cached = _hf_cache.get(cache_key)

488 if cached and now - cached[0] < _HF_CACHE_TTL:

489 return cached[1]

490

491 params = httpx.QueryParams(

492 pipeline_tag=pipeline_tag,

493 search=search_value,

494 sort=sort,

495 limit=limit,

496 skip=offset,

497 expand=_HF_EXPAND_FIELDS,

498 )

499 if library:

500 params = params.add("library", library)

501 try:

502 resp = httpx.get(HF_API_URL, params=params, timeout=_DEFAULT_TIMEOUT, headers=_hf_headers())

503 if resp.status_code >= 400:

504 log.warning("HuggingFace API returned HTTP %d", resp.status_code)

505 return _EMPTY_HF_PAGE

506 data = resp.json()

507 except (httpx.HTTPError, ValueError) as exc:

508 log.warning("Failed to fetch models from HuggingFace: %s", exc)

509 return _EMPTY_HF_PAGE

510

511 has_more = "next" in resp.links

512

513 models: list[CatalogModel] = []

514 for raw in data:

515 if not raw.get("id"):

516 continue

517 item = ModelInfo(**raw)

518 card_desc = item.card_data.get("description", "") if item.card_data else ""

519 model_desc = card_desc

520 gguf_meta = _HfGgufMeta(**(item.gguf or {}))

521 if gguf_meta.total > 0:

522 size_gb = round(gguf_meta.total / (1024**3), 1)

523 else:

524 size_gb = _estimate_size_from_siblings(item.siblings or [])

525 task = _pipeline_to_task(item.pipeline_tag or "")

526 models.append(

527 CatalogModel(

528 hf_repo=item.id,

529 gguf_filename="*.gguf",

530 size_gb=size_gb,

531 min_ram_gb=max(2.0, size_gb * 1.5),

532 description=model_desc[:120] if model_desc else "",

533 featured=False,

534 downloads=item.downloads or 0,

535 task=task,

536 )

537 )

538 page = _HfPage(models=models, has_more=has_more)

539 with _hf_cache_lock:

540 _hf_cache[cache_key] = (now, page)

541 if len(_hf_cache) > _HF_CACHE_MAX_ENTRIES:

542 oldest_key = min(_hf_cache, key=lambda k: _hf_cache[k][0])

543 del _hf_cache[oldest_key]

544 return page

545

546

547def _has_gguf_siblings(siblings: list[RepoSibling]) -> bool:

548 """Return True if the sibling list contains at least one .gguf file."""

549 return any(s.rfilename.endswith(".gguf") for s in siblings)

550

551

552def _estimate_size_from_siblings(siblings: list[RepoSibling]) -> float:

553 """Estimate model size in GB from the largest GGUF file in siblings."""

554 max_bytes = 0

555 for sib in siblings:

556 if sib.rfilename.endswith(".gguf"):

557 max_bytes = max(max_bytes, sib.size or 0)

558 if max_bytes > 0:

559 return round(max_bytes / (1024**3), 1)

560 return 0.0 # unknown — display as "?" in UI

561

562

563def _search_blob(m: CatalogModel) -> str:

564 """Lowercased join of searchable fields on a catalog row.

565

566 Null char joins the fields so a search term never straddles them.

567 """

568 return f"{m.display_name}\0{m.hf_repo}\0{m.description}".lower()

569

570

571def get_catalog(

572 task: str | None = None,

573 *,

574 search: str = "",

575 size: str | None = None,

576 installed: bool | None = None,

577 featured: bool | None = None,

578 sort: str = "featured",

579 limit: int = 20,

580 offset: int = 0,

581 model_manager: Any = None,

582) -> CatalogResult:

583 """Get paginated, filtered catalog of models."""

584 # Featured models only on the first page

585 all_models = list(FEATURED_ALL) if offset == 0 else []

586 hf_has_more = False

587

588 # Optionally fetch from HF API

589 if not featured:

590 hf_task, hf_library = _task_to_pipeline(task)

591 hf_page = _fetch_hf_models(

592 pipeline_tag=hf_task,

593 limit=limit,

594 offset=offset,

595 library=hf_library,

596 search=search,

597 )

598 hf_has_more = hf_page.has_more

599 # Deduplicate: skip HF models whose repo matches a featured model

600 featured_repos = {m.hf_repo for m in FEATURED_ALL}

601 hf_models = [m for m in hf_page.models if m.hf_repo not in featured_repos]

602 all_models.extend(hf_models)

603

604 # Filter by task

605 if task:

606 all_models = [m for m in all_models if m.task == task]

607

608 # Filter by search. Single join+lower per model per keystroke instead

609 # of four separate lowers + substring checks; the no-match path

610 # (the common case) runs four times fewer ``str.lower()`` calls.

611 if search:

612 search_lower = search.lower()

613 all_models = [m for m in all_models if search_lower in _search_blob(m)]

614

615 # Filter by size

616 if size and size in _SIZE_RANGES:

617 lo, hi = _SIZE_RANGES[size]

618 all_models = [m for m in all_models if lo <= m.size_gb < hi]

619

620 # A repo is "installed" if any of its quants has a manifest.

621 if installed is not None and model_manager is not None:

622 installed_repos = {ref.rsplit("/", 1)[0] for ref in _get_installed_models(model_manager)}

623 if installed:

624 all_models = [m for m in all_models if m.hf_repo in installed_repos]

625 else:

626 all_models = [m for m in all_models if m.hf_repo not in installed_repos]

627

628 # Filter by featured status

629 if featured is not None:

630 all_models = [m for m in all_models if m.featured == featured]

631

632 # Sort

633 all_models = _sort_models(all_models, sort)

634

635 total = len(all_models)

636

637 # When HF API pagination is active (offset passed to API), skip local slicing

638 # to avoid double-applying the offset. Only slice for featured-only requests.

639 paginated = all_models[offset : offset + limit] if featured else all_models[:limit]

640

641 return CatalogResult(

642 total=total, limit=limit, offset=offset, models=paginated, has_more=hf_has_more

643 )

644

645

646def _task_to_pipeline(task: str | None) -> tuple[str, str | None]:

647 """Map task name to HuggingFace pipeline tag and library filter."""

648 mapping: dict[str, tuple[str, str | None]] = {

649 ModelTask.CHAT: ("text-generation", None),

650 ModelTask.EMBEDDING: ("feature-extraction", "sentence-transformers"),

651 ModelTask.VISION: ("image-text-to-text", None),

652 ModelTask.RERANK: ("text-classification", None),

653 }

654 return mapping.get(task or ModelTask.CHAT, ("text-generation", None))

655

656

657_PIPELINE_TO_TASK: dict[str, str] = {

658 "text-generation": ModelTask.CHAT,

659 "feature-extraction": ModelTask.EMBEDDING,

660 "sentence-similarity": ModelTask.EMBEDDING,

661 "image-text-to-text": ModelTask.VISION,

662 "image-to-text": ModelTask.VISION,

663 "text-classification": ModelTask.RERANK,

664 "text-ranking": ModelTask.RERANK,

665}

666

667

668def _pipeline_to_task(pipeline_tag: str) -> str:

669 """Map HuggingFace pipeline tag to internal task name."""

670 return _PIPELINE_TO_TASK.get(pipeline_tag, ModelTask.CHAT)

671

672

673def _get_installed_models(model_manager: Any) -> set[str]:

674 """Get set of installed model names from model_manager."""

675 try:

676 return set(model_manager.list_installed())

677 except Exception:

678 return set()

679

680

681_SORT_KEYS: dict[str, tuple] = {

682 "downloads": (lambda m: m.downloads, True),

683 "name": (lambda m: m.display_name.lower(), False),

684 "size_asc": (lambda m: m.size_gb, False),

685 "size_desc": (lambda m: m.size_gb, True),

686 "featured": (lambda m: (not m.featured, -m.downloads), False),

687}

688

689

690def _sort_models(models: list[CatalogModel], sort: str) -> list[CatalogModel]:

691 """Sort models according to the specified sort order."""

692 key_fn, reverse = _SORT_KEYS.get(sort, _SORT_KEYS["featured"])

693 return sorted(models, key=key_fn, reverse=reverse)

694

695

696class CatalogIndex(NamedTuple):

697 """Case-insensitive lookup indexes for find_catalog_entry."""

698

699 by_hf_repo: dict[str, CatalogModel]

700 by_full_ref: dict[str, CatalogModel] # repo + concrete filename

701

702

703@functools.cache

704def _build_catalog_index() -> CatalogIndex:

705 """Build case-insensitive lookup indexes for find_catalog_entry."""

706 by_hf_repo: dict[str, CatalogModel] = {}

707 by_full_ref: dict[str, CatalogModel] = {}

708 for m in FEATURED_ALL:

709 by_hf_repo.setdefault(m.hf_repo.lower(), m)

710 if "*" not in m.gguf_filename:

711 by_full_ref[f"{m.hf_repo}/{m.gguf_filename}".lower()] = m

712 return CatalogIndex(by_hf_repo, by_full_ref)

713

714

715def find_catalog_entry(query: str) -> CatalogModel | None:

716 """Find a featured model by hf_repo or by ``hf_repo/filename`` ref.

717

718 Tries the query as-is, then strips a trailing ``/<filename>.gguf``,

719 then strips a leading non-HF provider prefix (``ollama/``, etc.).

720 Case-insensitive; returns ``None`` on miss.

721 """

722 if not query:

723 return None

724 idx = _build_catalog_index()

725 q = query.lower()

726 candidates = [q]

727 # Strip the filename for ``<repo>/<filename>.gguf`` queries so the

728 # bare-repo index catches featured entries whose gguf_filename is a

729 # glob (most are).

730 if q.endswith(".gguf") and q.count("/") >= 2:

731 candidates.append(q.rsplit("/", 1)[0])

732 if "/" in q:

733 prefix, rest = q.split("/", 1)

734 hf_owners = {r.split("/", 1)[0] for r in idx.by_hf_repo if "/" in r}

735 if prefix not in hf_owners:

736 candidates.append(rest)

737 for c in candidates:

738 hit = idx.by_full_ref.get(c) or idx.by_hf_repo.get(c)

739 if hit is not None:

740 return hit

741 return None

742

743

744def is_rerank_ref(model_ref: str) -> bool:

745 """Return True iff *model_ref* resolves to a rerank catalog entry."""

746 if not model_ref:

747 return False

748 entry = find_catalog_entry(model_ref)

749 return entry is not None and entry.task == ModelTask.RERANK

750

751

752def _is_hf_repo_id(value: str) -> bool:

753 """True if *value* is a well-formed ``owner/name`` HuggingFace repo id."""

754 if "/" not in value:

755 return False

756 try:

757 validate_repo_id(value)

758 except HFValidationError:

759 return False

760 return True

761

762

763def build_adhoc_entry(hf_repo: str, *, task: str = ModelTask.CHAT) -> CatalogModel:

764 """Minimal CatalogModel for a non-featured HuggingFace GGUF repo."""

765 return CatalogModel(

766 hf_repo=hf_repo,

767 gguf_filename="*.gguf",

768 size_gb=0.0,

769 min_ram_gb=2.0,

770 description="",

771 featured=False,

772 downloads=0,

773 task=task,

774 )

775

776

777def resolve_pull_target(model: str) -> CatalogModel | None:

778 """Resolve *model* to a pullable entry: featured first, then ad-hoc HF."""

779 featured = find_catalog_entry(model)

780 if featured is not None:

781 return featured

782 return build_adhoc_entry(model) if _is_hf_repo_id(model) else None

783

784

785def download_model(entry: CatalogModel, *, on_progress: ProgressCallback | None = None) -> Path:

786 """Download a GGUF model from HuggingFace to cfg.models_dir.

787 Uses huggingface_hub for resumable downloads, caching, and auth.

788 The optional *on_progress(downloaded, total)* callback receives byte counts.

789 For vision models, also downloads the mmproj (CLIP projection) file.

790

791 Raises:

792 PermissionError: gated repo requiring authentication

793 RuntimeError: repo not found or download failure with details

794 """

795 from huggingface_hub import hf_hub_download

796 from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError

797

798 _cfg().models_dir.mkdir(parents=True, exist_ok=True)

799

800 filename = resolve_filename(entry)

801 dest = _cfg().models_dir / filename

802 if dest.exists():

803 log.info("Model already downloaded: %s", dest)

804 if on_progress is not None:

805 size = dest.stat().st_size

806 on_progress(size, size) # Report 100% immediately

807 return _finalize_download(entry, dest, on_progress=on_progress)

808

809 log.info("Downloading %s/%s → %s", entry.hf_repo, filename, _cfg().models_dir)

810 token = _hf_token()

811

812 tracker = _ProgressTracker(on_progress) if on_progress else None

813 config = DownloadConfig(

814 repo_id=entry.hf_repo,

815 filename=filename,

816 token=token,

817 cache_dir=str(_cfg().models_dir),

818 tqdm_class=tracker.make_tqdm_class() if tracker else None,

819 )

820

821 try:

822 # HF_HUB_DISABLE_XET is set in lilbee/__init__.py at import time.

823 # Setting it here is too late — huggingface_hub.constants already

824 # captured the value when this module first imported it.

825 cached = Path(hf_hub_download(**config.model_dump(exclude_none=True)))

826 except TaskCancelled:

827 raise

828 except GatedRepoError:

829 raise PermissionError(

830 f"{entry.hf_repo} requires HuggingFace authentication. "

831 "Set HF_TOKEN env var or visit the repo page to request access."

832 ) from None

833 except RepositoryNotFoundError:

834 raise RuntimeError(f"Repository {entry.hf_repo!r} not found on HuggingFace.") from None

835 except (httpx.TimeoutException, httpx.ConnectError) as exc:

836 raise RuntimeError(f"Network error downloading {entry.hf_repo}: {exc}") from None

837 except OSError as exc:

838 raise RuntimeError(f"I/O error downloading {entry.hf_repo}: {exc}") from None

839 except Exception as exc:

840 raise RuntimeError(

841 f"Failed to download {entry.hf_repo}: {type(exc).__name__}: {exc}"

842 ) from None

843

844 if on_progress:

845 actual_size = cached.stat().st_size

846 if not tracker or not tracker.was_used:

847 log.info("Model found in HuggingFace cache: %s", cached)

848 on_progress(actual_size, actual_size)

849 dest = cached

850 return _finalize_download(entry, dest, on_progress=on_progress)

851

852

853def _finalize_download(

854 entry: CatalogModel,

855 dest: Path,

856 *,

857 on_progress: ProgressCallback | None = None,

858) -> Path:

859 """Register the model in the manifest and download mmproj for vision models."""

860 _register_model(entry, dest)

861 if entry.task == ModelTask.VISION:

862 _download_mmproj(entry, on_progress=on_progress)

863 return dest

864

865

866def _register_model(entry: CatalogModel, file_path: Path) -> None:

867 """Create a registry manifest for a downloaded model."""

868 registry = ModelRegistry(_cfg().models_dir)

869 manifest = ModelManifest(

870 hf_repo=entry.hf_repo,

871 gguf_filename=file_path.name,

872 size_bytes=file_path.stat().st_size,

873 task=entry.task,

874 downloaded_at=datetime.now(UTC).isoformat(),

875 )

876 try:

877 registry.install(entry.hf_repo, file_path.name, file_path, manifest)

878 log.info("Registered %s/%s in manifest", entry.hf_repo, file_path.name)

879 except Exception:

880 log.warning("Failed to register manifest for %s", entry.hf_repo, exc_info=True)

881

882

883def _download_mmproj(

884 entry: CatalogModel,

885 *,

886 on_progress: ProgressCallback | None = None,

887) -> Path | None:

888 """Download the mmproj (CLIP projection) file for a vision model.

889 Returns the path to the downloaded file, or None if no mmproj is configured.

890 The optional ``on_progress`` callback receives ``(downloaded, total)`` byte

891 counts and is wired through the same tqdm hook used by the main download.

892 """

893 mmproj_pattern = VISION_MMPROJ_FILES.get(entry.hf_repo, _DEFAULT_MMPROJ_PATTERN)

894

895 mmproj_filename = _resolve_mmproj_filename(entry.hf_repo, mmproj_pattern)

896 if not mmproj_filename:

897 log.warning("Could not resolve mmproj file for %s", entry.hf_repo)

898 return None

899

900 from huggingface_hub import hf_hub_download

901

902 tracker = _ProgressTracker(on_progress) if on_progress else None

903 log.info("Downloading mmproj %s/%s → %s", entry.hf_repo, mmproj_filename, _cfg().models_dir)

904 path = Path(

905 hf_hub_download(

906 repo_id=entry.hf_repo,

907 filename=mmproj_filename,

908 cache_dir=str(_cfg().models_dir),

909 token=_hf_token(),

910 tqdm_class=tracker.make_tqdm_class() if tracker else None,

911 )

912 )

913 if on_progress is not None and (not tracker or not tracker.was_used):

914 # Cache hit — HF returned the cached path without invoking tqdm.

915 size = path.stat().st_size

916 on_progress(size, size)

917 return path

918

919

920def _resolve_mmproj_filename(hf_repo: str, pattern: str) -> str | None:

921 """Resolve an mmproj filename pattern to a concrete filename via the HF API."""

922 if "*" not in pattern:

923 return pattern

924

925 try:

926 resp = httpx.get(

927 f"https://huggingface.co/api/models/{hf_repo}",

928 timeout=_DEFAULT_TIMEOUT,

929 headers=_hf_headers(),

930 )

931 resp.raise_for_status()

932 siblings = resp.json().get("siblings", [])

933 except Exception as exc:

934 log.warning("Cannot query mmproj files for %s: %s", hf_repo, exc)

935 return None

936

937 mmproj_files: list[str] = [

938 s.get("rfilename", "") for s in siblings if fnmatch.fnmatch(s.get("rfilename", ""), pattern)

939 ]

940 if not mmproj_files:

941 return None

942

943 # Prefer F16 over F32 (smaller), and any over BF16

944 for preference in ("f16", "F16"):

945 for f in mmproj_files:

946 if preference in f:

947 return f

948 return mmproj_files[0]

949

950

951def _mmproj_in_models_dir_matching(pattern: str) -> Path | None:

952 """Return the first ``*.gguf`` under ``_cfg().models_dir`` that matches."""

953 models_dir: Path = _cfg().models_dir

954 for p in models_dir.rglob("*.gguf"):

955 if fnmatch.fnmatch(p.name, pattern) or "mmproj" in p.name.lower():

956 return p

957 return None

958

959

960def find_mmproj_file(model_ref: str) -> Path | None:

961 """Find the mmproj for a ``FEATURED_VISION`` entry under ``_cfg().models_dir``.

962

963 *model_ref* is matched against each featured vision entry's

964 ``hf_repo``. Returns ``None`` when nothing matches. Never falls back

965 to an arbitrary mmproj: that cross-contaminates non-vision chat

966 models (e.g. a chat model would inherit a vision model's mmproj and

967 be misreported as vision-capable).

968 """

969 if not _cfg().models_dir.exists():

970 return None

971 for entry in FEATURED_VISION:

972 if model_ref not in entry.hf_repo and entry.hf_repo not in model_ref:

973 continue

974 pattern = VISION_MMPROJ_FILES.get(entry.hf_repo, _DEFAULT_MMPROJ_PATTERN)

975 match = _mmproj_in_models_dir_matching(pattern)

976 if match is not None:

977 return match

978 return None

979

980

981_QUANT_PREFERENCE = ("Q4_K_M", "Q4_K_S", "Q5_K_M", "Q5_K_S", "Q8_0", "Q6_K", "Q3_K_M")

982

983

984def resolve_filename(entry: CatalogModel) -> str:

985 """Resolve a GGUF filename pattern to the best concrete filename.

986 For exact filenames, return as-is. For wildcards, query the HF API

987 and pick the best quantization (prefer Q4_K_M for balance of size/quality).

988 """

989 if "*" not in entry.gguf_filename:

990 return entry.gguf_filename

991

992 try:

993 resp = httpx.get(

994 f"https://huggingface.co/api/models/{entry.hf_repo}",

995 timeout=_DEFAULT_TIMEOUT,

996 headers=_hf_headers(),

997 )

998 if resp.status_code == 401:

999 raise PermissionError(

1000 f"{entry.hf_repo} requires HuggingFace authentication. "

1001 "Set HF_TOKEN env var or visit the repo page to request access."

1002 )

1003 resp.raise_for_status()

1004 siblings = resp.json().get("siblings", [])

1005 except PermissionError:

1006 raise

1007 except Exception as exc:

1008 raise RuntimeError(f"Cannot query files for {entry.hf_repo}: {exc}") from exc

1009

1010 gguf_files = [

1011 s.get("rfilename", "") for s in siblings if s.get("rfilename", "").endswith(".gguf")

1012 ]

1013 if not gguf_files:

1014 raise RuntimeError(f"No GGUF files found in {entry.hf_repo}")

1015

1016 return _pick_best_gguf(gguf_files)

1017

1018

1019def _pick_best_gguf(filenames: list[str]) -> str:

1020 """Pick the best GGUF file by quantization preference."""

1021 for quant in _QUANT_PREFERENCE:

1022 for f in filenames:

1023 if quant in f:

1024 return f

1025 return filenames[0]

1026

1027

1028def fetch_model_file_size(hf_repo: str) -> float:

1029 """Fetch the best GGUF file size from HuggingFace tree API.

1030 Returns size in GB, or 0.0 if unavailable.

1031 """

1032 try:

1033 resp = httpx.get(

1034 f"https://huggingface.co/api/models/{hf_repo}/tree/main",

1035 timeout=_DEFAULT_TIMEOUT,

1036 headers=_hf_headers(),

1037 )

1038 resp.raise_for_status()

1039 files = resp.json()

1040 except Exception:

1041 return 0.0

1042

1043 gguf_files = [

1044 (f.get("path", ""), f.get("size", 0) or f.get("lfs", {}).get("size", 0))

1045 for f in files

1046 if isinstance(f, dict) and f.get("path", "").endswith(".gguf")

1047 ]

1048 if not gguf_files:

1049 return 0.0

1050

1051 best_name = _pick_best_gguf([name for name, _ in gguf_files])

1052 size_bytes = next((s for n, s in gguf_files if n == best_name), 0)

1053 return round(size_bytes / (1024**3), 1) if size_bytes else 0.0

1054

1055

1056_DISPLAY_NAME_SUFFIXES = re.compile(r"-(GGUF|Instruct|Chat)(?=-|$)", re.IGNORECASE)

1057_DISPLAY_NAME_DATE_SUFFIX = re.compile(r"-\d{4}$")

1058_DISPLAY_NAME_META_PREFIX = re.compile(r"^Meta-", re.IGNORECASE)

1059

1060

1061def clean_display_name(repo_id: str) -> str:

1062 """Derive a human-friendly display name from a HuggingFace repo ID.

1063 Strips org prefix, -GGUF/-Instruct/-Chat suffixes, date suffixes (-2507),

1064 and Meta- prefix. Replaces hyphens with spaces.

1065

1066 Examples:

1067 "Qwen/Qwen2.5-7B-Instruct-GGUF" -> "Qwen2.5 7B"

1068 "meta-llama/Meta-Llama-3-8B" -> "Llama 3 8B"

1069 """

1070 name = repo_id.split("/")[-1]

1071 name = _DISPLAY_NAME_SUFFIXES.sub("", name)

1072 name = _DISPLAY_NAME_DATE_SUFFIX.sub("", name)

1073 name = _DISPLAY_NAME_META_PREFIX.sub("", name)

1074 name = name.replace("-", " ").strip()

1075 return re.sub(r"\s+", " ", name)

1076

1077

1078def display_label_for_ref(ref: str) -> str:

1079 """Render any model ref as a short, human-friendly UI label.

1080

1081 - Native HF ref (``<repo>/<file>.gguf``): cleaned repo name.

1082 - Provider-prefixed (``ollama/``, ``openai/`` ...): the part after the prefix.

1083 - Anything else: returned unchanged.

1084 """

1085 if not ref:

1086 return ""

1087 if ref.endswith(".gguf") and ref.count("/") >= 2:

1088 return clean_display_name(ref.rsplit("/", 1)[0])

1089 if "/" in ref:

1090 return ref.split("/", 1)[1]

1091 return ref

1092

1093

1094QUANT_TIERS: dict[str, str] = {

1095 "Q2_K": "compact",

1096 "Q3_K_S": "compact",

1097 "Q3_K_M": "compact",

1098 "Q3_K_L": "compact",

1099 "Q4_K_S": "balanced",

1100 "Q4_K_M": "balanced",

1101 "Q4_0": "balanced",

1102 "Q5_K_S": "high quality",

1103 "Q5_K_M": "high quality",

1104 "Q6_K": "high quality",

1105 "Q8_0": "full precision",

1106 "F16": "unquantized",

1107 "F32": "unquantized",

1108}

1109

1110

1111def quant_tier(quant: str) -> str:

1112 """Map a quantization label to a human-readable quality tier."""

1113 if not quant:

1114 return "—"

1115 return QUANT_TIERS.get(quant, "—")

1116

1117

1118@dataclass(frozen=True)

1119class EnrichedModel:

1120 """A catalog model enriched with display metadata and install status."""

1121

1122 hf_repo: str

1123 gguf_filename: str

1124 size_gb: float

1125 min_ram_gb: float

1126 description: str

1127 featured: bool

1128 downloads: int

1129 task: str

1130 display_name: str

1131 param_count: str

1132 quality_tier: str

1133 installed: bool

1134 source: str

1135

1136

1137def enrich_catalog(result: CatalogResult, installed_refs: set[str]) -> list[EnrichedModel]:

1138 """Enrich catalog models with display names, quality tiers, and install status.

1139

1140 *installed_refs* contains the ``hf_repo/filename`` refs returned by

1141 ``model_manager.list_installed()``. A repo is considered installed

1142 when at least one of its quants has a manifest.

1143 """

1144 installed_repos = {ref.rsplit("/", 1)[0] for ref in installed_refs}

1145 enriched: list[EnrichedModel] = []

1146 for m in result.models:

1147 enriched.append(

1148 EnrichedModel(

1149 hf_repo=m.hf_repo,

1150 gguf_filename=m.gguf_filename,

1151 size_gb=m.size_gb,

1152 min_ram_gb=m.min_ram_gb,

1153 description=m.description,

1154 featured=m.featured,

1155 downloads=m.downloads,

1156 task=m.task,

1157 display_name=m.display_name,

1158 param_count=_derive_param_count(m),

1159 quality_tier=quant_tier(extract_quant(m.gguf_filename)),

1160 installed=m.hf_repo in installed_repos,

1161 source=ModelSource.NATIVE.value,

1162 )

1163 )

1164 return enriched

Coverage for src / lilbee / catalog.py: 100%

545 statements