Coverage for src / lilbee / catalog.py: 100%
545 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1"""Model catalog — discovers available GGUF models from HuggingFace.
3Three levels:
41. Featured models — curated favorites (hardcoded, always available)
52. HF API models — fetched from HuggingFace API, paginated and filterable
63. Combined catalog — featured first, then HF results
7"""
9import fnmatch
10import functools
11import io
12import logging
13import os
14import re
15import threading
16import time
17from collections.abc import Callable
18from dataclasses import dataclass
19from datetime import UTC, datetime
20from pathlib import Path
21from typing import Any, NamedTuple
23import httpx
24from huggingface_hub import ModelInfo
25from huggingface_hub.hf_api import RepoSibling
26from huggingface_hub.utils import HFValidationError, validate_repo_id
27from pydantic import BaseModel
28from tqdm.auto import tqdm as _base_tqdm
30from lilbee.cancellation import TaskCancelled
31from lilbee.model_manager import ModelSource
32from lilbee.models import ModelTask
33from lilbee.registry import ModelManifest, ModelRegistry
35# circular: config.py -> catalog (via the per-role task validator). cfg is
36# imported lazily so this module can load before Config() finishes init.
38log = logging.getLogger(__name__)
41def _cfg() -> Any:
42 """Lazy accessor for the global ``cfg`` singleton (see circular-import note)."""
43 from lilbee.config import cfg
45 return cfg
48def __getattr__(name: str) -> Any:
49 """Expose ``catalog.cfg`` lazily so ``monkeypatch.setattr(catalog.cfg, ...)`` still works."""
50 if name == "cfg":
51 from lilbee.config import cfg
53 return cfg
54 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
57HF_API_URL = "https://huggingface.co/api/models"
60@dataclass
61class DownloadProgress:
62 """Human-readable snapshot of download progress.
64 ``percent`` is a float (0.0 to 100.0) so the ProgressBar renders smooth
65 fractional movement during multi-GB downloads. Call sites that need
66 an integer for display format it themselves.
67 """
69 percent: float
70 detail: str
71 is_cache_hit: bool
74ProgressCallback = Callable[[int, int], None]
75_BYTES_PER_MB = 1024 * 1024
78def make_download_callback(
79 on_update: Callable[[DownloadProgress], None],
80 *,
81 throttle_interval: float = 0.1,
82) -> ProgressCallback:
83 """Build a download progress callback that converts bytes to human-readable state.
84 *on_update(progress: DownloadProgress)* is called at most once per
85 ``throttle_interval`` seconds with a float percentage (0.0 to 100.0), a
86 ``"<done>/<total> MB"`` detail string, and a cache-hit flag. Both the
87 catalog and setup screens use this so byte-to-MB conversion and
88 cache-hit detection aren't duplicated.
89 """
90 last_update_time = 0.0
91 seen_partial = False
93 def _on_progress(downloaded: int, total: int) -> None:
94 nonlocal last_update_time, seen_partial
96 if total > 0 and downloaded >= total and not seen_partial:
97 on_update(
98 DownloadProgress(percent=100.0, detail="already downloaded", is_cache_hit=True)
99 )
100 return
101 seen_partial = True
103 now = time.monotonic()
104 if now - last_update_time < throttle_interval:
105 return
106 last_update_time = now
108 mb_done = downloaded / _BYTES_PER_MB
109 if total > 0:
110 pct = min(downloaded * 100.0 / total, 100.0)
111 mb_total = total / _BYTES_PER_MB
112 on_update(
113 DownloadProgress(
114 percent=pct,
115 detail=f"{mb_done:.0f}/{mb_total:.0f} MB",
116 is_cache_hit=False,
117 )
118 )
119 else:
120 on_update(DownloadProgress(percent=0.0, detail=f"{mb_done:.0f} MB", is_cache_hit=False))
122 return _on_progress
125class _CallbackProgressBar(_base_tqdm):
126 """tqdm subclass that forwards progress to a plain callback.
127 Fully suppresses terminal output by disabling tqdm rendering and redirecting
128 its file handle to a devnull sink — prevents ANSI escape sequences from leaking
129 into Textual's managed terminal.
131 Overrides ``get_lock`` to return a threading lock instead of tqdm's default
132 multiprocessing lock. Vanilla tqdm acquires ``self._lock`` even on the
133 ``disable=True`` path (std.py:988), and the multiprocessing lock's lazy init
134 raises ``ValueError`` when ``sys.stderr.fileno() == -1`` (Textual, Jupyter,
135 pytest capture). A thread lock sidesteps that fd handling entirely.
136 """
138 _lock = threading.RLock()
139 _callback: Any = None
141 @classmethod
142 def get_lock(cls) -> threading.RLock:
143 return cls._lock
145 def __init__(self, *args: Any, **kwargs: Any):
146 kwargs["disable"] = True
147 kwargs["file"] = io.StringIO() # absorb any accidental tqdm output
148 super().__init__(*args, **kwargs)
149 self._cumulative = 0
151 def update(self, n: float = 1) -> bool | None:
152 self._cumulative += int(n)
153 if self._callback is not None:
154 total = self.total if self.total is not None else 0
155 self._callback(int(self._cumulative), int(total))
156 return None
159class _ProgressTracker:
160 """Wraps a tqdm_class to detect whether progress updates actually fired."""
162 def __init__(self, callback: Any) -> None:
163 self.was_used = False
164 self._callback = callback
166 def make_tqdm_class(self) -> type[_base_tqdm]:
167 tracker = self
169 class _Cls(_CallbackProgressBar):
170 _callback = staticmethod(tracker._callback)
172 def update(self, n: float = 1) -> bool | None:
173 tracker.was_used = True
174 return super().update(n)
176 return _Cls
179class _HfGgufMeta(BaseModel):
180 """GGUF metadata returned by the HF API when expand=gguf is requested.
182 ModelInfo.gguf is typed as ``dict | None`` upstream, so we validate it ourselves.
183 """
185 total: int = 0
186 architecture: str = ""
187 context_length: int = 0
190class DownloadConfig(BaseModel):
191 model_config = {"arbitrary_types_allowed": True}
193 repo_id: str
194 filename: str
195 token: str | None
196 force_download: bool = False
197 cache_dir: str | None = None
198 tqdm_class: Any = None
201_DEFAULT_TIMEOUT = 30.0
203# Fields to request from the HF listing API via ?expand=.
204# Without expand, the default response omits siblings, cardData, and gguf.
205_HF_EXPAND_FIELDS: list[str] = ["gguf", "siblings", "downloads", "pipeline_tag", "cardData"]
208@dataclass(frozen=True)
209class CatalogModel:
210 """One catalog entry, keyed by HuggingFace repo. ``gguf_filename`` may be a glob."""
212 hf_repo: str
213 gguf_filename: str
214 size_gb: float
215 min_ram_gb: float
216 description: str
217 featured: bool
218 downloads: int
219 task: str
220 recommended: bool = False
222 @property
223 def ref(self) -> str:
224 """Browse-time ref (the HF repo); concrete filename is resolved at install."""
225 return self.hf_repo
227 @property
228 def display_name(self) -> str:
229 """Human-readable label derived from the HuggingFace repo id."""
230 return clean_display_name(self.hf_repo)
233@dataclass(frozen=True)
234class CatalogResult:
235 """Paginated catalog result."""
237 total: int
238 limit: int
239 offset: int
240 models: list[CatalogModel]
241 has_more: bool = False
244@dataclass(frozen=True)
245class _HfPage:
246 """Internal: one page of HuggingFace API results."""
248 models: list[CatalogModel]
249 has_more: bool
252@dataclass(frozen=True)
253class ModelVariant:
254 """One quantization within a model family. ``filename`` may be a glob."""
256 hf_repo: str
257 filename: str
258 param_count: str
259 quant: str
260 size_mb: int
261 recommended: bool
262 mmproj_filename: str = ""
265@dataclass(frozen=True)
266class ModelFamily:
267 """A group of related model variants (e.g. Qwen3 in multiple sizes)."""
269 slug: str # family slug for building refs: "qwen3"
270 name: str # display name: "Qwen3"
271 task: str
272 description: str
273 variants: tuple[ModelVariant, ...]
276def _load_featured() -> tuple[
277 tuple[CatalogModel, ...],
278 tuple[CatalogModel, ...],
279 tuple[CatalogModel, ...],
280 tuple[CatalogModel, ...],
281]:
282 """Load featured models from the TOML file, cached after first call."""
283 import tomllib
285 toml_path = Path(__file__).parent / "featured_models.toml"
286 with open(toml_path, "rb") as f:
287 data = tomllib.load(f)
289 def _build(task: ModelTask) -> tuple[CatalogModel, ...]:
290 return tuple(
291 CatalogModel(
292 hf_repo=m["hf_repo"],
293 gguf_filename=m["gguf_filename"],
294 size_gb=m["size_gb"],
295 min_ram_gb=m["min_ram_gb"],
296 description=m["description"],
297 featured=True,
298 downloads=0,
299 task=task,
300 recommended=m.get("recommended", False),
301 )
302 for m in data.get(task, [])
303 )
305 return (
306 _build(ModelTask.CHAT),
307 _build(ModelTask.EMBEDDING),
308 _build(ModelTask.VISION),
309 _build(ModelTask.RERANK),
310 )
313FEATURED_CHAT, FEATURED_EMBEDDING, FEATURED_VISION, FEATURED_RERANK = _load_featured()
315# Maps vision catalog entries to their mmproj (CLIP projection) filenames.
316# Vision models need both the main GGUF and the mmproj file to work.
317# Keys are hf_repo identifiers; values are glob patterns resolved at download time.
318# Every FEATURED_VISION entry MUST have a corresponding key here.
319_DEFAULT_MMPROJ_PATTERN = "*mmproj*.gguf"
321VISION_MMPROJ_FILES: dict[str, str] = {
322 "noctrex/LightOnOCR-2-1B-GGUF": _DEFAULT_MMPROJ_PATTERN,
323}
325FEATURED_ALL: tuple[CatalogModel, ...] = (
326 FEATURED_CHAT + FEATURED_EMBEDDING + FEATURED_VISION + FEATURED_RERANK
327)
329_FAMILY_NAME_RE = re.compile(r"^(.+?)\s+\d")
330PARAM_COUNT_RE = re.compile(r"(\d+\.?\d*B)", re.IGNORECASE)
333def _extract_family_name(model_name: str) -> str:
334 """Extract the family name by stripping the trailing parameter count.
335 Applies clean_display_name first to strip -GGUF, -Instruct, etc.
337 "Qwen3 8B" -> "Qwen3", "Qwen3-Coder 30B A3B" -> "Qwen3-Coder",
338 "Nomic Embed Text v1.5" -> "Nomic Embed Text v1.5" (no trailing number pattern).
339 """
340 cleaned = clean_display_name(model_name)
341 m = _FAMILY_NAME_RE.match(cleaned)
342 return m.group(1) if m else cleaned
345def extract_quant(filename: str) -> str:
346 """Extract the GGUF quantization label (e.g. ``Q4_K_M``) from a filename."""
347 m = re.search(r"(Q\d[A-Z0-9_]*)", filename, re.IGNORECASE)
348 return m.group(1).upper() if m else ""
351def _derive_param_count(model: CatalogModel) -> str:
352 """Parse the ``7B``-style param count from the display name; ``""`` if absent."""
353 match = PARAM_COUNT_RE.search(model.display_name)
354 return match.group(1) if match else ""
357def _catalog_to_variant(model: CatalogModel) -> ModelVariant:
358 """Convert a CatalogModel to a ModelVariant."""
359 return ModelVariant(
360 hf_repo=model.hf_repo,
361 filename=model.gguf_filename,
362 param_count=_derive_param_count(model),
363 quant=extract_quant(model.gguf_filename),
364 size_mb=int(model.size_gb * 1024),
365 recommended=model.recommended,
366 )
369def _family_slug(display_name: str) -> str:
370 """Stable slug for a family, derived from its display name."""
371 return _extract_family_name(display_name).lower().replace(" ", "-")
374def _build_families(models: tuple[CatalogModel, ...], task: str) -> list[ModelFamily]:
375 """Group CatalogModels into families by display-derived family name."""
376 groups: dict[str, list[CatalogModel]] = {}
377 order: list[str] = []
378 for m in models:
379 family = _extract_family_name(m.display_name)
380 if family not in groups:
381 order.append(family)
382 groups.setdefault(family, []).append(m)
384 families: list[ModelFamily] = []
385 for family_name in order:
386 members = groups[family_name]
387 representative = next((m for m in members if m.recommended), members[0])
388 variants = [_catalog_to_variant(m) for m in members]
389 families.append(
390 ModelFamily(
391 slug=_family_slug(representative.display_name),
392 name=family_name,
393 task=task,
394 description=representative.description,
395 variants=tuple(variants),
396 )
397 )
398 return families
401def get_families() -> list[ModelFamily]:
402 """Get all featured models grouped into families.
403 Returns families ordered: chat, then embedding, then vision, then reranker.
404 Within each family, variants are ordered smallest to largest, with
405 the largest marked as recommended (for multi-variant families).
406 """
407 return (
408 _build_families(FEATURED_CHAT, ModelTask.CHAT)
409 + _build_families(FEATURED_EMBEDDING, ModelTask.EMBEDDING)
410 + _build_families(FEATURED_VISION, ModelTask.VISION)
411 + _build_families(FEATURED_RERANK, ModelTask.RERANK)
412 )
415_SIZE_RANGES: dict[str, tuple[float, float]] = {
416 "small": (0.0, 3.0),
417 "medium": (3.0, 10.0),
418 "large": (10.0, float("inf")),
419}
422def _hf_token() -> str | None:
423 """Read HuggingFace token from env vars or huggingface_hub login cache."""
424 token = os.environ.get("LILBEE_HF_TOKEN") or os.environ.get("HF_TOKEN") or None
425 if token:
426 return token
427 try:
428 from huggingface_hub import get_token
430 return get_token()
431 except Exception:
432 return None
435def _hf_headers() -> dict[str, str]:
436 """Build HTTP headers for HuggingFace API requests."""
437 token = _hf_token()
438 if token:
439 return {"Authorization": f"Bearer {token}"}
440 return {}
443# TTL cache for HuggingFace API results (5 minutes). The lock guards the
444# evict-then-insert path so concurrent TUI workers can't race and hit
445# ``RuntimeError: dictionary changed size during iteration``.
446_HF_CACHE_TTL = 300
447_HF_CACHE_MAX_ENTRIES = 50
448_hf_cache: dict[str, tuple[float, _HfPage]] = {}
449_hf_cache_lock = threading.Lock()
451_EMPTY_HF_PAGE = _HfPage(models=[], has_more=False)
453# HF ``?search=`` is a single space-tokenized substring match on the model id.
454# Multiple ``search=`` params are silently ignored, so the user's query is
455# space-joined onto the GGUF filter into one param value.
456_HF_GGUF_SEARCH_TERM = "GGUF"
459def _hf_search_value(search: str) -> str:
460 """Build the HF ``search=`` value: GGUF plus the user's tokens, space-joined."""
461 tokens = [_HF_GGUF_SEARCH_TERM, *search.split()]
462 return " ".join(tokens)
465def _fetch_hf_models(
466 pipeline_tag: str = "text-generation",
467 sort: str = "downloads",
468 limit: int = 50,
469 offset: int = 0,
470 library: str | None = None,
471 search: str = "",
472) -> _HfPage:
473 """Fetch GGUF models from HuggingFace API with 5-minute cache.
475 Returns an ``_HfPage`` with a ``has_more`` flag derived from the
476 ``Link: <...>; rel="next"`` response header (RFC 5988), the same
477 mechanism the ``huggingface_hub`` library uses internally.
478 """
479 search_value = _hf_search_value(search)
480 cache_key = f"{pipeline_tag}:{sort}:{limit}:{offset}:{library}:{search_value}"
481 now = time.monotonic()
482 with _hf_cache_lock:
483 expired = [k for k, (ts, _) in _hf_cache.items() if now - ts >= _HF_CACHE_TTL]
484 for k in expired:
485 del _hf_cache[k]
487 cached = _hf_cache.get(cache_key)
488 if cached and now - cached[0] < _HF_CACHE_TTL:
489 return cached[1]
491 params = httpx.QueryParams(
492 pipeline_tag=pipeline_tag,
493 search=search_value,
494 sort=sort,
495 limit=limit,
496 skip=offset,
497 expand=_HF_EXPAND_FIELDS,
498 )
499 if library:
500 params = params.add("library", library)
501 try:
502 resp = httpx.get(HF_API_URL, params=params, timeout=_DEFAULT_TIMEOUT, headers=_hf_headers())
503 if resp.status_code >= 400:
504 log.warning("HuggingFace API returned HTTP %d", resp.status_code)
505 return _EMPTY_HF_PAGE
506 data = resp.json()
507 except (httpx.HTTPError, ValueError) as exc:
508 log.warning("Failed to fetch models from HuggingFace: %s", exc)
509 return _EMPTY_HF_PAGE
511 has_more = "next" in resp.links
513 models: list[CatalogModel] = []
514 for raw in data:
515 if not raw.get("id"):
516 continue
517 item = ModelInfo(**raw)
518 card_desc = item.card_data.get("description", "") if item.card_data else ""
519 model_desc = card_desc
520 gguf_meta = _HfGgufMeta(**(item.gguf or {}))
521 if gguf_meta.total > 0:
522 size_gb = round(gguf_meta.total / (1024**3), 1)
523 else:
524 size_gb = _estimate_size_from_siblings(item.siblings or [])
525 task = _pipeline_to_task(item.pipeline_tag or "")
526 models.append(
527 CatalogModel(
528 hf_repo=item.id,
529 gguf_filename="*.gguf",
530 size_gb=size_gb,
531 min_ram_gb=max(2.0, size_gb * 1.5),
532 description=model_desc[:120] if model_desc else "",
533 featured=False,
534 downloads=item.downloads or 0,
535 task=task,
536 )
537 )
538 page = _HfPage(models=models, has_more=has_more)
539 with _hf_cache_lock:
540 _hf_cache[cache_key] = (now, page)
541 if len(_hf_cache) > _HF_CACHE_MAX_ENTRIES:
542 oldest_key = min(_hf_cache, key=lambda k: _hf_cache[k][0])
543 del _hf_cache[oldest_key]
544 return page
547def _has_gguf_siblings(siblings: list[RepoSibling]) -> bool:
548 """Return True if the sibling list contains at least one .gguf file."""
549 return any(s.rfilename.endswith(".gguf") for s in siblings)
552def _estimate_size_from_siblings(siblings: list[RepoSibling]) -> float:
553 """Estimate model size in GB from the largest GGUF file in siblings."""
554 max_bytes = 0
555 for sib in siblings:
556 if sib.rfilename.endswith(".gguf"):
557 max_bytes = max(max_bytes, sib.size or 0)
558 if max_bytes > 0:
559 return round(max_bytes / (1024**3), 1)
560 return 0.0 # unknown — display as "?" in UI
563def _search_blob(m: CatalogModel) -> str:
564 """Lowercased join of searchable fields on a catalog row.
566 Null char joins the fields so a search term never straddles them.
567 """
568 return f"{m.display_name}\0{m.hf_repo}\0{m.description}".lower()
571def get_catalog(
572 task: str | None = None,
573 *,
574 search: str = "",
575 size: str | None = None,
576 installed: bool | None = None,
577 featured: bool | None = None,
578 sort: str = "featured",
579 limit: int = 20,
580 offset: int = 0,
581 model_manager: Any = None,
582) -> CatalogResult:
583 """Get paginated, filtered catalog of models."""
584 # Featured models only on the first page
585 all_models = list(FEATURED_ALL) if offset == 0 else []
586 hf_has_more = False
588 # Optionally fetch from HF API
589 if not featured:
590 hf_task, hf_library = _task_to_pipeline(task)
591 hf_page = _fetch_hf_models(
592 pipeline_tag=hf_task,
593 limit=limit,
594 offset=offset,
595 library=hf_library,
596 search=search,
597 )
598 hf_has_more = hf_page.has_more
599 # Deduplicate: skip HF models whose repo matches a featured model
600 featured_repos = {m.hf_repo for m in FEATURED_ALL}
601 hf_models = [m for m in hf_page.models if m.hf_repo not in featured_repos]
602 all_models.extend(hf_models)
604 # Filter by task
605 if task:
606 all_models = [m for m in all_models if m.task == task]
608 # Filter by search. Single join+lower per model per keystroke instead
609 # of four separate lowers + substring checks; the no-match path
610 # (the common case) runs four times fewer ``str.lower()`` calls.
611 if search:
612 search_lower = search.lower()
613 all_models = [m for m in all_models if search_lower in _search_blob(m)]
615 # Filter by size
616 if size and size in _SIZE_RANGES:
617 lo, hi = _SIZE_RANGES[size]
618 all_models = [m for m in all_models if lo <= m.size_gb < hi]
620 # A repo is "installed" if any of its quants has a manifest.
621 if installed is not None and model_manager is not None:
622 installed_repos = {ref.rsplit("/", 1)[0] for ref in _get_installed_models(model_manager)}
623 if installed:
624 all_models = [m for m in all_models if m.hf_repo in installed_repos]
625 else:
626 all_models = [m for m in all_models if m.hf_repo not in installed_repos]
628 # Filter by featured status
629 if featured is not None:
630 all_models = [m for m in all_models if m.featured == featured]
632 # Sort
633 all_models = _sort_models(all_models, sort)
635 total = len(all_models)
637 # When HF API pagination is active (offset passed to API), skip local slicing
638 # to avoid double-applying the offset. Only slice for featured-only requests.
639 paginated = all_models[offset : offset + limit] if featured else all_models[:limit]
641 return CatalogResult(
642 total=total, limit=limit, offset=offset, models=paginated, has_more=hf_has_more
643 )
646def _task_to_pipeline(task: str | None) -> tuple[str, str | None]:
647 """Map task name to HuggingFace pipeline tag and library filter."""
648 mapping: dict[str, tuple[str, str | None]] = {
649 ModelTask.CHAT: ("text-generation", None),
650 ModelTask.EMBEDDING: ("feature-extraction", "sentence-transformers"),
651 ModelTask.VISION: ("image-text-to-text", None),
652 ModelTask.RERANK: ("text-classification", None),
653 }
654 return mapping.get(task or ModelTask.CHAT, ("text-generation", None))
657_PIPELINE_TO_TASK: dict[str, str] = {
658 "text-generation": ModelTask.CHAT,
659 "feature-extraction": ModelTask.EMBEDDING,
660 "sentence-similarity": ModelTask.EMBEDDING,
661 "image-text-to-text": ModelTask.VISION,
662 "image-to-text": ModelTask.VISION,
663 "text-classification": ModelTask.RERANK,
664 "text-ranking": ModelTask.RERANK,
665}
668def _pipeline_to_task(pipeline_tag: str) -> str:
669 """Map HuggingFace pipeline tag to internal task name."""
670 return _PIPELINE_TO_TASK.get(pipeline_tag, ModelTask.CHAT)
673def _get_installed_models(model_manager: Any) -> set[str]:
674 """Get set of installed model names from model_manager."""
675 try:
676 return set(model_manager.list_installed())
677 except Exception:
678 return set()
681_SORT_KEYS: dict[str, tuple] = {
682 "downloads": (lambda m: m.downloads, True),
683 "name": (lambda m: m.display_name.lower(), False),
684 "size_asc": (lambda m: m.size_gb, False),
685 "size_desc": (lambda m: m.size_gb, True),
686 "featured": (lambda m: (not m.featured, -m.downloads), False),
687}
690def _sort_models(models: list[CatalogModel], sort: str) -> list[CatalogModel]:
691 """Sort models according to the specified sort order."""
692 key_fn, reverse = _SORT_KEYS.get(sort, _SORT_KEYS["featured"])
693 return sorted(models, key=key_fn, reverse=reverse)
696class CatalogIndex(NamedTuple):
697 """Case-insensitive lookup indexes for find_catalog_entry."""
699 by_hf_repo: dict[str, CatalogModel]
700 by_full_ref: dict[str, CatalogModel] # repo + concrete filename
703@functools.cache
704def _build_catalog_index() -> CatalogIndex:
705 """Build case-insensitive lookup indexes for find_catalog_entry."""
706 by_hf_repo: dict[str, CatalogModel] = {}
707 by_full_ref: dict[str, CatalogModel] = {}
708 for m in FEATURED_ALL:
709 by_hf_repo.setdefault(m.hf_repo.lower(), m)
710 if "*" not in m.gguf_filename:
711 by_full_ref[f"{m.hf_repo}/{m.gguf_filename}".lower()] = m
712 return CatalogIndex(by_hf_repo, by_full_ref)
715def find_catalog_entry(query: str) -> CatalogModel | None:
716 """Find a featured model by hf_repo or by ``hf_repo/filename`` ref.
718 Tries the query as-is, then strips a trailing ``/<filename>.gguf``,
719 then strips a leading non-HF provider prefix (``ollama/``, etc.).
720 Case-insensitive; returns ``None`` on miss.
721 """
722 if not query:
723 return None
724 idx = _build_catalog_index()
725 q = query.lower()
726 candidates = [q]
727 # Strip the filename for ``<repo>/<filename>.gguf`` queries so the
728 # bare-repo index catches featured entries whose gguf_filename is a
729 # glob (most are).
730 if q.endswith(".gguf") and q.count("/") >= 2:
731 candidates.append(q.rsplit("/", 1)[0])
732 if "/" in q:
733 prefix, rest = q.split("/", 1)
734 hf_owners = {r.split("/", 1)[0] for r in idx.by_hf_repo if "/" in r}
735 if prefix not in hf_owners:
736 candidates.append(rest)
737 for c in candidates:
738 hit = idx.by_full_ref.get(c) or idx.by_hf_repo.get(c)
739 if hit is not None:
740 return hit
741 return None
744def is_rerank_ref(model_ref: str) -> bool:
745 """Return True iff *model_ref* resolves to a rerank catalog entry."""
746 if not model_ref:
747 return False
748 entry = find_catalog_entry(model_ref)
749 return entry is not None and entry.task == ModelTask.RERANK
752def _is_hf_repo_id(value: str) -> bool:
753 """True if *value* is a well-formed ``owner/name`` HuggingFace repo id."""
754 if "/" not in value:
755 return False
756 try:
757 validate_repo_id(value)
758 except HFValidationError:
759 return False
760 return True
763def build_adhoc_entry(hf_repo: str, *, task: str = ModelTask.CHAT) -> CatalogModel:
764 """Minimal CatalogModel for a non-featured HuggingFace GGUF repo."""
765 return CatalogModel(
766 hf_repo=hf_repo,
767 gguf_filename="*.gguf",
768 size_gb=0.0,
769 min_ram_gb=2.0,
770 description="",
771 featured=False,
772 downloads=0,
773 task=task,
774 )
777def resolve_pull_target(model: str) -> CatalogModel | None:
778 """Resolve *model* to a pullable entry: featured first, then ad-hoc HF."""
779 featured = find_catalog_entry(model)
780 if featured is not None:
781 return featured
782 return build_adhoc_entry(model) if _is_hf_repo_id(model) else None
785def download_model(entry: CatalogModel, *, on_progress: ProgressCallback | None = None) -> Path:
786 """Download a GGUF model from HuggingFace to cfg.models_dir.
787 Uses huggingface_hub for resumable downloads, caching, and auth.
788 The optional *on_progress(downloaded, total)* callback receives byte counts.
789 For vision models, also downloads the mmproj (CLIP projection) file.
791 Raises:
792 PermissionError: gated repo requiring authentication
793 RuntimeError: repo not found or download failure with details
794 """
795 from huggingface_hub import hf_hub_download
796 from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
798 _cfg().models_dir.mkdir(parents=True, exist_ok=True)
800 filename = resolve_filename(entry)
801 dest = _cfg().models_dir / filename
802 if dest.exists():
803 log.info("Model already downloaded: %s", dest)
804 if on_progress is not None:
805 size = dest.stat().st_size
806 on_progress(size, size) # Report 100% immediately
807 return _finalize_download(entry, dest, on_progress=on_progress)
809 log.info("Downloading %s/%s → %s", entry.hf_repo, filename, _cfg().models_dir)
810 token = _hf_token()
812 tracker = _ProgressTracker(on_progress) if on_progress else None
813 config = DownloadConfig(
814 repo_id=entry.hf_repo,
815 filename=filename,
816 token=token,
817 cache_dir=str(_cfg().models_dir),
818 tqdm_class=tracker.make_tqdm_class() if tracker else None,
819 )
821 try:
822 # HF_HUB_DISABLE_XET is set in lilbee/__init__.py at import time.
823 # Setting it here is too late — huggingface_hub.constants already
824 # captured the value when this module first imported it.
825 cached = Path(hf_hub_download(**config.model_dump(exclude_none=True)))
826 except TaskCancelled:
827 raise
828 except GatedRepoError:
829 raise PermissionError(
830 f"{entry.hf_repo} requires HuggingFace authentication. "
831 "Set HF_TOKEN env var or visit the repo page to request access."
832 ) from None
833 except RepositoryNotFoundError:
834 raise RuntimeError(f"Repository {entry.hf_repo!r} not found on HuggingFace.") from None
835 except (httpx.TimeoutException, httpx.ConnectError) as exc:
836 raise RuntimeError(f"Network error downloading {entry.hf_repo}: {exc}") from None
837 except OSError as exc:
838 raise RuntimeError(f"I/O error downloading {entry.hf_repo}: {exc}") from None
839 except Exception as exc:
840 raise RuntimeError(
841 f"Failed to download {entry.hf_repo}: {type(exc).__name__}: {exc}"
842 ) from None
844 if on_progress:
845 actual_size = cached.stat().st_size
846 if not tracker or not tracker.was_used:
847 log.info("Model found in HuggingFace cache: %s", cached)
848 on_progress(actual_size, actual_size)
849 dest = cached
850 return _finalize_download(entry, dest, on_progress=on_progress)
853def _finalize_download(
854 entry: CatalogModel,
855 dest: Path,
856 *,
857 on_progress: ProgressCallback | None = None,
858) -> Path:
859 """Register the model in the manifest and download mmproj for vision models."""
860 _register_model(entry, dest)
861 if entry.task == ModelTask.VISION:
862 _download_mmproj(entry, on_progress=on_progress)
863 return dest
866def _register_model(entry: CatalogModel, file_path: Path) -> None:
867 """Create a registry manifest for a downloaded model."""
868 registry = ModelRegistry(_cfg().models_dir)
869 manifest = ModelManifest(
870 hf_repo=entry.hf_repo,
871 gguf_filename=file_path.name,
872 size_bytes=file_path.stat().st_size,
873 task=entry.task,
874 downloaded_at=datetime.now(UTC).isoformat(),
875 )
876 try:
877 registry.install(entry.hf_repo, file_path.name, file_path, manifest)
878 log.info("Registered %s/%s in manifest", entry.hf_repo, file_path.name)
879 except Exception:
880 log.warning("Failed to register manifest for %s", entry.hf_repo, exc_info=True)
883def _download_mmproj(
884 entry: CatalogModel,
885 *,
886 on_progress: ProgressCallback | None = None,
887) -> Path | None:
888 """Download the mmproj (CLIP projection) file for a vision model.
889 Returns the path to the downloaded file, or None if no mmproj is configured.
890 The optional ``on_progress`` callback receives ``(downloaded, total)`` byte
891 counts and is wired through the same tqdm hook used by the main download.
892 """
893 mmproj_pattern = VISION_MMPROJ_FILES.get(entry.hf_repo, _DEFAULT_MMPROJ_PATTERN)
895 mmproj_filename = _resolve_mmproj_filename(entry.hf_repo, mmproj_pattern)
896 if not mmproj_filename:
897 log.warning("Could not resolve mmproj file for %s", entry.hf_repo)
898 return None
900 from huggingface_hub import hf_hub_download
902 tracker = _ProgressTracker(on_progress) if on_progress else None
903 log.info("Downloading mmproj %s/%s → %s", entry.hf_repo, mmproj_filename, _cfg().models_dir)
904 path = Path(
905 hf_hub_download(
906 repo_id=entry.hf_repo,
907 filename=mmproj_filename,
908 cache_dir=str(_cfg().models_dir),
909 token=_hf_token(),
910 tqdm_class=tracker.make_tqdm_class() if tracker else None,
911 )
912 )
913 if on_progress is not None and (not tracker or not tracker.was_used):
914 # Cache hit — HF returned the cached path without invoking tqdm.
915 size = path.stat().st_size
916 on_progress(size, size)
917 return path
920def _resolve_mmproj_filename(hf_repo: str, pattern: str) -> str | None:
921 """Resolve an mmproj filename pattern to a concrete filename via the HF API."""
922 if "*" not in pattern:
923 return pattern
925 try:
926 resp = httpx.get(
927 f"https://huggingface.co/api/models/{hf_repo}",
928 timeout=_DEFAULT_TIMEOUT,
929 headers=_hf_headers(),
930 )
931 resp.raise_for_status()
932 siblings = resp.json().get("siblings", [])
933 except Exception as exc:
934 log.warning("Cannot query mmproj files for %s: %s", hf_repo, exc)
935 return None
937 mmproj_files: list[str] = [
938 s.get("rfilename", "") for s in siblings if fnmatch.fnmatch(s.get("rfilename", ""), pattern)
939 ]
940 if not mmproj_files:
941 return None
943 # Prefer F16 over F32 (smaller), and any over BF16
944 for preference in ("f16", "F16"):
945 for f in mmproj_files:
946 if preference in f:
947 return f
948 return mmproj_files[0]
951def _mmproj_in_models_dir_matching(pattern: str) -> Path | None:
952 """Return the first ``*.gguf`` under ``_cfg().models_dir`` that matches."""
953 models_dir: Path = _cfg().models_dir
954 for p in models_dir.rglob("*.gguf"):
955 if fnmatch.fnmatch(p.name, pattern) or "mmproj" in p.name.lower():
956 return p
957 return None
960def find_mmproj_file(model_ref: str) -> Path | None:
961 """Find the mmproj for a ``FEATURED_VISION`` entry under ``_cfg().models_dir``.
963 *model_ref* is matched against each featured vision entry's
964 ``hf_repo``. Returns ``None`` when nothing matches. Never falls back
965 to an arbitrary mmproj: that cross-contaminates non-vision chat
966 models (e.g. a chat model would inherit a vision model's mmproj and
967 be misreported as vision-capable).
968 """
969 if not _cfg().models_dir.exists():
970 return None
971 for entry in FEATURED_VISION:
972 if model_ref not in entry.hf_repo and entry.hf_repo not in model_ref:
973 continue
974 pattern = VISION_MMPROJ_FILES.get(entry.hf_repo, _DEFAULT_MMPROJ_PATTERN)
975 match = _mmproj_in_models_dir_matching(pattern)
976 if match is not None:
977 return match
978 return None
981_QUANT_PREFERENCE = ("Q4_K_M", "Q4_K_S", "Q5_K_M", "Q5_K_S", "Q8_0", "Q6_K", "Q3_K_M")
984def resolve_filename(entry: CatalogModel) -> str:
985 """Resolve a GGUF filename pattern to the best concrete filename.
986 For exact filenames, return as-is. For wildcards, query the HF API
987 and pick the best quantization (prefer Q4_K_M for balance of size/quality).
988 """
989 if "*" not in entry.gguf_filename:
990 return entry.gguf_filename
992 try:
993 resp = httpx.get(
994 f"https://huggingface.co/api/models/{entry.hf_repo}",
995 timeout=_DEFAULT_TIMEOUT,
996 headers=_hf_headers(),
997 )
998 if resp.status_code == 401:
999 raise PermissionError(
1000 f"{entry.hf_repo} requires HuggingFace authentication. "
1001 "Set HF_TOKEN env var or visit the repo page to request access."
1002 )
1003 resp.raise_for_status()
1004 siblings = resp.json().get("siblings", [])
1005 except PermissionError:
1006 raise
1007 except Exception as exc:
1008 raise RuntimeError(f"Cannot query files for {entry.hf_repo}: {exc}") from exc
1010 gguf_files = [
1011 s.get("rfilename", "") for s in siblings if s.get("rfilename", "").endswith(".gguf")
1012 ]
1013 if not gguf_files:
1014 raise RuntimeError(f"No GGUF files found in {entry.hf_repo}")
1016 return _pick_best_gguf(gguf_files)
1019def _pick_best_gguf(filenames: list[str]) -> str:
1020 """Pick the best GGUF file by quantization preference."""
1021 for quant in _QUANT_PREFERENCE:
1022 for f in filenames:
1023 if quant in f:
1024 return f
1025 return filenames[0]
1028def fetch_model_file_size(hf_repo: str) -> float:
1029 """Fetch the best GGUF file size from HuggingFace tree API.
1030 Returns size in GB, or 0.0 if unavailable.
1031 """
1032 try:
1033 resp = httpx.get(
1034 f"https://huggingface.co/api/models/{hf_repo}/tree/main",
1035 timeout=_DEFAULT_TIMEOUT,
1036 headers=_hf_headers(),
1037 )
1038 resp.raise_for_status()
1039 files = resp.json()
1040 except Exception:
1041 return 0.0
1043 gguf_files = [
1044 (f.get("path", ""), f.get("size", 0) or f.get("lfs", {}).get("size", 0))
1045 for f in files
1046 if isinstance(f, dict) and f.get("path", "").endswith(".gguf")
1047 ]
1048 if not gguf_files:
1049 return 0.0
1051 best_name = _pick_best_gguf([name for name, _ in gguf_files])
1052 size_bytes = next((s for n, s in gguf_files if n == best_name), 0)
1053 return round(size_bytes / (1024**3), 1) if size_bytes else 0.0
1056_DISPLAY_NAME_SUFFIXES = re.compile(r"-(GGUF|Instruct|Chat)(?=-|$)", re.IGNORECASE)
1057_DISPLAY_NAME_DATE_SUFFIX = re.compile(r"-\d{4}$")
1058_DISPLAY_NAME_META_PREFIX = re.compile(r"^Meta-", re.IGNORECASE)
1061def clean_display_name(repo_id: str) -> str:
1062 """Derive a human-friendly display name from a HuggingFace repo ID.
1063 Strips org prefix, -GGUF/-Instruct/-Chat suffixes, date suffixes (-2507),
1064 and Meta- prefix. Replaces hyphens with spaces.
1066 Examples:
1067 "Qwen/Qwen2.5-7B-Instruct-GGUF" -> "Qwen2.5 7B"
1068 "meta-llama/Meta-Llama-3-8B" -> "Llama 3 8B"
1069 """
1070 name = repo_id.split("/")[-1]
1071 name = _DISPLAY_NAME_SUFFIXES.sub("", name)
1072 name = _DISPLAY_NAME_DATE_SUFFIX.sub("", name)
1073 name = _DISPLAY_NAME_META_PREFIX.sub("", name)
1074 name = name.replace("-", " ").strip()
1075 return re.sub(r"\s+", " ", name)
1078def display_label_for_ref(ref: str) -> str:
1079 """Render any model ref as a short, human-friendly UI label.
1081 - Native HF ref (``<repo>/<file>.gguf``): cleaned repo name.
1082 - Provider-prefixed (``ollama/``, ``openai/`` ...): the part after the prefix.
1083 - Anything else: returned unchanged.
1084 """
1085 if not ref:
1086 return ""
1087 if ref.endswith(".gguf") and ref.count("/") >= 2:
1088 return clean_display_name(ref.rsplit("/", 1)[0])
1089 if "/" in ref:
1090 return ref.split("/", 1)[1]
1091 return ref
1094QUANT_TIERS: dict[str, str] = {
1095 "Q2_K": "compact",
1096 "Q3_K_S": "compact",
1097 "Q3_K_M": "compact",
1098 "Q3_K_L": "compact",
1099 "Q4_K_S": "balanced",
1100 "Q4_K_M": "balanced",
1101 "Q4_0": "balanced",
1102 "Q5_K_S": "high quality",
1103 "Q5_K_M": "high quality",
1104 "Q6_K": "high quality",
1105 "Q8_0": "full precision",
1106 "F16": "unquantized",
1107 "F32": "unquantized",
1108}
1111def quant_tier(quant: str) -> str:
1112 """Map a quantization label to a human-readable quality tier."""
1113 if not quant:
1114 return "—"
1115 return QUANT_TIERS.get(quant, "—")
1118@dataclass(frozen=True)
1119class EnrichedModel:
1120 """A catalog model enriched with display metadata and install status."""
1122 hf_repo: str
1123 gguf_filename: str
1124 size_gb: float
1125 min_ram_gb: float
1126 description: str
1127 featured: bool
1128 downloads: int
1129 task: str
1130 display_name: str
1131 param_count: str
1132 quality_tier: str
1133 installed: bool
1134 source: str
1137def enrich_catalog(result: CatalogResult, installed_refs: set[str]) -> list[EnrichedModel]:
1138 """Enrich catalog models with display names, quality tiers, and install status.
1140 *installed_refs* contains the ``hf_repo/filename`` refs returned by
1141 ``model_manager.list_installed()``. A repo is considered installed
1142 when at least one of its quants has a manifest.
1143 """
1144 installed_repos = {ref.rsplit("/", 1)[0] for ref in installed_refs}
1145 enriched: list[EnrichedModel] = []
1146 for m in result.models:
1147 enriched.append(
1148 EnrichedModel(
1149 hf_repo=m.hf_repo,
1150 gguf_filename=m.gguf_filename,
1151 size_gb=m.size_gb,
1152 min_ram_gb=m.min_ram_gb,
1153 description=m.description,
1154 featured=m.featured,
1155 downloads=m.downloads,
1156 task=m.task,
1157 display_name=m.display_name,
1158 param_count=_derive_param_count(m),
1159 quality_tier=quant_tier(extract_quant(m.gguf_filename)),
1160 installed=m.hf_repo in installed_repos,
1161 source=ModelSource.NATIVE.value,
1162 )
1163 )
1164 return enriched