Coverage for src / lilbee / providers / routing_provider.py: 100%
90 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1"""Routing provider: prefix-based dispatch between the SDK backend and llama-cpp."""
3from __future__ import annotations
5import contextlib
6import logging
7from collections.abc import Callable
8from pathlib import Path
9from typing import Any
11from lilbee.catalog import is_rerank_ref
12from lilbee.config import cfg
13from lilbee.providers.base import ClosableIterator, LLMProvider, ProviderError
14from lilbee.providers.litellm_sdk import LitellmSdkBackend
15from lilbee.providers.model_ref import ProviderModelRef, parse_model_ref
16from lilbee.providers.sdk_llm_provider import SdkLLMProvider
18log = logging.getLogger(__name__)
21class RoutingProvider(LLMProvider):
22 """Dispatches calls based on the model ref prefix.
24 ``ollama/``, ``openai/``, ``anthropic/``, ``gemini/`` go to the SDK
25 provider. Other refs (the HuggingFace ``<org>/<repo>/<file>.gguf``
26 shape) go to llama-cpp, which resolves them against the native
27 registry. A registry miss surfaces the native ProviderError
28 unchanged, rather than silently falling through to a remote backend.
29 """
31 def __init__(self) -> None:
32 self._llama_cpp: LLMProvider | None = None
33 self._sdk_provider: SdkLLMProvider | None = None
35 def _get_llama_cpp(self) -> LLMProvider:
36 if self._llama_cpp is None:
37 from lilbee.providers.llama_cpp_provider import LlamaCppProvider
39 self._llama_cpp = LlamaCppProvider()
40 return self._llama_cpp
42 def _get_sdk_provider(self) -> SdkLLMProvider:
43 if self._sdk_provider is None:
44 self._sdk_provider = SdkLLMProvider(
45 LitellmSdkBackend(),
46 base_url=cfg.remote_base_url,
47 api_key=cfg.llm_api_key,
48 )
49 return self._sdk_provider
51 def _pick_backend(self, ref: ProviderModelRef) -> LLMProvider:
52 """Pick the backend for *ref* purely by prefix."""
53 if ref.is_remote:
54 return self._get_sdk_provider()
55 return self._get_llama_cpp()
57 def embed(self, texts: list[str]) -> list[list[float]]:
58 ref = parse_model_ref(cfg.embedding_model)
59 return self._pick_backend(ref).embed(texts)
61 def chat(
62 self,
63 messages: list[dict[str, str]],
64 *,
65 stream: bool = False,
66 options: dict[str, Any] | None = None,
67 model: str | None = None,
68 ) -> str | ClosableIterator[str]:
69 ref = parse_model_ref(model or cfg.chat_model)
70 return self._pick_backend(ref).chat(messages, stream=stream, options=options, model=model)
72 def list_models(self) -> list[str]:
73 """Return the union of native and SDK-visible models.
75 Both halves are wrapped so an unreachable remote backend or a
76 missing native registry does not mask the other.
77 """
78 native: set[str] = set()
79 with contextlib.suppress(Exception):
80 native = set(self._get_llama_cpp().list_models())
81 sdk = self._get_sdk_provider()
82 if not sdk.available():
83 return sorted(native)
84 try:
85 remote = set(sdk.list_models())
86 except Exception:
87 return sorted(native)
88 return sorted(native | remote)
90 def list_chat_models(self, provider: str) -> list[str]:
91 """Delegate to the SDK backend; native llama-cpp has no catalog."""
92 sdk = self._get_sdk_provider()
93 if not sdk.available():
94 return []
95 return sdk.list_chat_models(provider)
97 def pull_model(self, model: str, *, on_progress: Callable[..., Any] | None = None) -> None:
98 """Pull via the SDK backend if installed, otherwise raise."""
99 sdk = self._get_sdk_provider()
100 if not sdk.available():
101 raise ProviderError(f"Cannot pull model {model!r}: no pull-capable backend available")
102 sdk.pull_model(model, on_progress=on_progress)
104 def show_model(self, model: str) -> dict[str, Any] | None:
105 """Show model info from the backend selected by the ref prefix."""
106 ref = parse_model_ref(model)
107 return self._pick_backend(ref).show_model(model)
109 def get_capabilities(self, model: str) -> list[str]:
110 """Return capability tags from the backend selected by the ref prefix."""
111 ref = parse_model_ref(model)
112 return self._pick_backend(ref).get_capabilities(model)
114 def rerank(self, query: str, candidates: list[str]) -> list[float]:
115 """Dispatch rerank to the backend that owns ``cfg.reranker_model``.
117 Native GGUF refs go to llama-cpp; hosted refs go through the SDK
118 provider. Raises ``ProviderError`` when ``cfg.reranker_model`` is
119 empty or the selected backend does not support reranking.
120 """
121 if not cfg.reranker_model:
122 raise ProviderError("No reranker configured. Set cfg.reranker_model first.")
123 if _is_native_rerank_ref(cfg.reranker_model):
124 return self._get_llama_cpp().rerank(query, candidates)
125 sdk = self._get_sdk_provider()
126 if not sdk.supports_rerank():
127 raise ProviderError(
128 f"Cannot rerank with {cfg.reranker_model!r}: "
129 "hosted rerank backend not available. "
130 "Install the 'litellm' extra to enable hosted reranking."
131 )
132 return sdk.rerank(query, candidates)
134 def supports_rerank(self) -> bool:
135 """Capability probe: can the routed backend rerank if configured?
137 Pure capability check, NOT "a reranker is currently active". An
138 empty ``cfg.reranker_model`` returns ``True`` so the settings UI
139 keeps the picker visible; callers that need to know whether
140 reranking is actually configured must check ``bool(cfg.reranker_model)``
141 separately. Delegates to the backend that would handle the
142 configured model when one is set.
143 """
144 model = cfg.reranker_model
145 if not model:
146 return True
147 if _is_native_rerank_ref(model):
148 return self._get_llama_cpp().supports_rerank()
149 return self._get_sdk_provider().supports_rerank()
151 def shutdown(self) -> None:
152 """Shut down sub-providers to release resources."""
153 if self._llama_cpp is not None:
154 self._llama_cpp.shutdown()
155 if self._sdk_provider is not None:
156 self._sdk_provider.shutdown()
158 def invalidate_load_cache(self, model_path: Path | None = None) -> None:
159 """Forward to the native side only; the SDK side has no local cache."""
160 if self._llama_cpp is not None:
161 self._llama_cpp.invalidate_load_cache(model_path)
164def _is_native_rerank_ref(model: str) -> bool:
165 """Return True if *model* resolves to a featured rerank catalog entry."""
166 return is_rerank_ref(model)