Coverage for src / lilbee / providers / routing_provider.py: 100%

90 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1"""Routing provider: prefix-based dispatch between the SDK backend and llama-cpp.""" 

2 

3from __future__ import annotations 

4 

5import contextlib 

6import logging 

7from collections.abc import Callable 

8from pathlib import Path 

9from typing import Any 

10 

11from lilbee.catalog import is_rerank_ref 

12from lilbee.config import cfg 

13from lilbee.providers.base import ClosableIterator, LLMProvider, ProviderError 

14from lilbee.providers.litellm_sdk import LitellmSdkBackend 

15from lilbee.providers.model_ref import ProviderModelRef, parse_model_ref 

16from lilbee.providers.sdk_llm_provider import SdkLLMProvider 

17 

18log = logging.getLogger(__name__) 

19 

20 

21class RoutingProvider(LLMProvider): 

22 """Dispatches calls based on the model ref prefix. 

23 

24 ``ollama/``, ``openai/``, ``anthropic/``, ``gemini/`` go to the SDK 

25 provider. Other refs (the HuggingFace ``<org>/<repo>/<file>.gguf`` 

26 shape) go to llama-cpp, which resolves them against the native 

27 registry. A registry miss surfaces the native ProviderError 

28 unchanged, rather than silently falling through to a remote backend. 

29 """ 

30 

31 def __init__(self) -> None: 

32 self._llama_cpp: LLMProvider | None = None 

33 self._sdk_provider: SdkLLMProvider | None = None 

34 

35 def _get_llama_cpp(self) -> LLMProvider: 

36 if self._llama_cpp is None: 

37 from lilbee.providers.llama_cpp_provider import LlamaCppProvider 

38 

39 self._llama_cpp = LlamaCppProvider() 

40 return self._llama_cpp 

41 

42 def _get_sdk_provider(self) -> SdkLLMProvider: 

43 if self._sdk_provider is None: 

44 self._sdk_provider = SdkLLMProvider( 

45 LitellmSdkBackend(), 

46 base_url=cfg.remote_base_url, 

47 api_key=cfg.llm_api_key, 

48 ) 

49 return self._sdk_provider 

50 

51 def _pick_backend(self, ref: ProviderModelRef) -> LLMProvider: 

52 """Pick the backend for *ref* purely by prefix.""" 

53 if ref.is_remote: 

54 return self._get_sdk_provider() 

55 return self._get_llama_cpp() 

56 

57 def embed(self, texts: list[str]) -> list[list[float]]: 

58 ref = parse_model_ref(cfg.embedding_model) 

59 return self._pick_backend(ref).embed(texts) 

60 

61 def chat( 

62 self, 

63 messages: list[dict[str, str]], 

64 *, 

65 stream: bool = False, 

66 options: dict[str, Any] | None = None, 

67 model: str | None = None, 

68 ) -> str | ClosableIterator[str]: 

69 ref = parse_model_ref(model or cfg.chat_model) 

70 return self._pick_backend(ref).chat(messages, stream=stream, options=options, model=model) 

71 

72 def list_models(self) -> list[str]: 

73 """Return the union of native and SDK-visible models. 

74 

75 Both halves are wrapped so an unreachable remote backend or a 

76 missing native registry does not mask the other. 

77 """ 

78 native: set[str] = set() 

79 with contextlib.suppress(Exception): 

80 native = set(self._get_llama_cpp().list_models()) 

81 sdk = self._get_sdk_provider() 

82 if not sdk.available(): 

83 return sorted(native) 

84 try: 

85 remote = set(sdk.list_models()) 

86 except Exception: 

87 return sorted(native) 

88 return sorted(native | remote) 

89 

90 def list_chat_models(self, provider: str) -> list[str]: 

91 """Delegate to the SDK backend; native llama-cpp has no catalog.""" 

92 sdk = self._get_sdk_provider() 

93 if not sdk.available(): 

94 return [] 

95 return sdk.list_chat_models(provider) 

96 

97 def pull_model(self, model: str, *, on_progress: Callable[..., Any] | None = None) -> None: 

98 """Pull via the SDK backend if installed, otherwise raise.""" 

99 sdk = self._get_sdk_provider() 

100 if not sdk.available(): 

101 raise ProviderError(f"Cannot pull model {model!r}: no pull-capable backend available") 

102 sdk.pull_model(model, on_progress=on_progress) 

103 

104 def show_model(self, model: str) -> dict[str, Any] | None: 

105 """Show model info from the backend selected by the ref prefix.""" 

106 ref = parse_model_ref(model) 

107 return self._pick_backend(ref).show_model(model) 

108 

109 def get_capabilities(self, model: str) -> list[str]: 

110 """Return capability tags from the backend selected by the ref prefix.""" 

111 ref = parse_model_ref(model) 

112 return self._pick_backend(ref).get_capabilities(model) 

113 

114 def rerank(self, query: str, candidates: list[str]) -> list[float]: 

115 """Dispatch rerank to the backend that owns ``cfg.reranker_model``. 

116 

117 Native GGUF refs go to llama-cpp; hosted refs go through the SDK 

118 provider. Raises ``ProviderError`` when ``cfg.reranker_model`` is 

119 empty or the selected backend does not support reranking. 

120 """ 

121 if not cfg.reranker_model: 

122 raise ProviderError("No reranker configured. Set cfg.reranker_model first.") 

123 if _is_native_rerank_ref(cfg.reranker_model): 

124 return self._get_llama_cpp().rerank(query, candidates) 

125 sdk = self._get_sdk_provider() 

126 if not sdk.supports_rerank(): 

127 raise ProviderError( 

128 f"Cannot rerank with {cfg.reranker_model!r}: " 

129 "hosted rerank backend not available. " 

130 "Install the 'litellm' extra to enable hosted reranking." 

131 ) 

132 return sdk.rerank(query, candidates) 

133 

134 def supports_rerank(self) -> bool: 

135 """Capability probe: can the routed backend rerank if configured? 

136 

137 Pure capability check, NOT "a reranker is currently active". An 

138 empty ``cfg.reranker_model`` returns ``True`` so the settings UI 

139 keeps the picker visible; callers that need to know whether 

140 reranking is actually configured must check ``bool(cfg.reranker_model)`` 

141 separately. Delegates to the backend that would handle the 

142 configured model when one is set. 

143 """ 

144 model = cfg.reranker_model 

145 if not model: 

146 return True 

147 if _is_native_rerank_ref(model): 

148 return self._get_llama_cpp().supports_rerank() 

149 return self._get_sdk_provider().supports_rerank() 

150 

151 def shutdown(self) -> None: 

152 """Shut down sub-providers to release resources.""" 

153 if self._llama_cpp is not None: 

154 self._llama_cpp.shutdown() 

155 if self._sdk_provider is not None: 

156 self._sdk_provider.shutdown() 

157 

158 def invalidate_load_cache(self, model_path: Path | None = None) -> None: 

159 """Forward to the native side only; the SDK side has no local cache.""" 

160 if self._llama_cpp is not None: 

161 self._llama_cpp.invalidate_load_cache(model_path) 

162 

163 

164def _is_native_rerank_ref(model: str) -> bool: 

165 """Return True if *model* resolves to a featured rerank catalog entry.""" 

166 return is_rerank_ref(model)