Coverage for src / lilbee / providers / sdk_backend.py: 100%

56 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1"""Protocol and value types for SDK-backed LLM backends. 

2 

3A backend hides one third-party SDK. The ``SdkLLMProvider`` speaks to 

4backends exclusively through the ``LlmSdkBackend`` Protocol and the 

5value types defined here, so SDK response objects never leak outside 

6the adapter. 

7 

8This module is intentionally dependency-free (no SDK imports, no 

9lilbee provider imports beyond the shared base types). 

10""" 

11 

12from __future__ import annotations 

13 

14from collections.abc import Callable, Iterator 

15from dataclasses import dataclass, field 

16from typing import Any, Protocol 

17 

18from lilbee.providers.model_ref import ProviderModelRef 

19 

20# Single source of truth for per-provider API key configuration. 

21# Maps (provider_name, config_field, env_var, display_label). Backend-agnostic: 

22# OpenAI-compatible SDKs all read these env vars at call time. 

23PROVIDER_KEYS: tuple[tuple[str, str, str, str], ...] = ( 

24 ("openai", "openai_api_key", "OPENAI_API_KEY", "OpenAI"), 

25 ("anthropic", "anthropic_api_key", "ANTHROPIC_API_KEY", "Anthropic"), 

26 ("gemini", "gemini_api_key", "GEMINI_API_KEY", "Gemini"), 

27) 

28 

29# Derived set of config field names (for checking which updates touch API keys). 

30API_KEY_FIELDS: frozenset[str] = frozenset(t[1] for t in PROVIDER_KEYS) 

31 

32# Display names for the active backend the SDK is talking to. The 

33# adapter's own identity is exposed separately via provider_name. 

34OLLAMA_BACKEND_NAME = "Ollama" 

35OPENAI_BACKEND_NAME = "OpenAI" 

36ANTHROPIC_BACKEND_NAME = "Anthropic" 

37GEMINI_BACKEND_NAME = "Gemini" 

38REMOTE_BACKEND_NAME = "Remote" 

39 

40_BACKEND_URL_PATTERNS: tuple[tuple[str, str], ...] = ( 

41 ("localhost:11434", OLLAMA_BACKEND_NAME), 

42 ("ollama", OLLAMA_BACKEND_NAME), 

43 ("openai", OPENAI_BACKEND_NAME), 

44 ("anthropic", ANTHROPIC_BACKEND_NAME), 

45 ("googleapis", GEMINI_BACKEND_NAME), 

46 ("gemini", GEMINI_BACKEND_NAME), 

47) 

48 

49 

50def detect_backend_name(base_url: str) -> str: 

51 """Return the display name of the backend behind ``base_url``. 

52 

53 Adapter-agnostic; any SDK implementation can delegate to this helper. 

54 Falls back to ``REMOTE_BACKEND_NAME`` when the URL matches none of 

55 the known patterns. 

56 """ 

57 url_lower = base_url.lower() 

58 for pattern, name in _BACKEND_URL_PATTERNS: 

59 if pattern in url_lower: 

60 return name 

61 return REMOTE_BACKEND_NAME 

62 

63 

64@dataclass(frozen=True) 

65class CompletionResult: 

66 """Single-shot chat completion result returned by a backend.""" 

67 

68 content: str 

69 finish_reason: str | None = None 

70 model: str | None = None 

71 

72 

73@dataclass(frozen=True) 

74class StreamChunk: 

75 """One delta yielded during a streaming chat completion.""" 

76 

77 content: str 

78 finish_reason: str | None = None 

79 

80 

81@dataclass(frozen=True) 

82class EmbeddingResult: 

83 """Embedding vectors returned by a backend for a batch of inputs.""" 

84 

85 vectors: list[list[float]] 

86 model: str | None = None 

87 

88 

89@dataclass(frozen=True) 

90class CompletionRequest: 

91 """Backend-agnostic request for a single completion call. 

92 

93 ``ref`` carries the parsed model reference; the adapter converts it 

94 to the wire format its SDK expects. ``messages`` is the raw lilbee 

95 message list (may contain ``images`` bytes); the adapter formats it 

96 for its SDK. ``api_base`` is populated for local/Ollama deployments 

97 and omitted for API-hosted models. 

98 """ 

99 

100 ref: ProviderModelRef 

101 messages: list[dict[str, Any]] 

102 options: dict[str, Any] = field(default_factory=dict) 

103 api_base: str | None = None 

104 api_key: str | None = None 

105 

106 

107@dataclass(frozen=True) 

108class EmbeddingRequest: 

109 """Backend-agnostic request for an embedding call.""" 

110 

111 ref: ProviderModelRef 

112 inputs: list[str] 

113 api_base: str | None = None 

114 api_key: str | None = None 

115 

116 

117@dataclass(frozen=True) 

118class RerankRequest: 

119 """Backend-agnostic rerank request.""" 

120 

121 ref: ProviderModelRef 

122 query: str 

123 candidates: list[str] 

124 api_base: str | None = None 

125 api_key: str | None = None 

126 

127 

128@dataclass(frozen=True) 

129class RerankResult: 

130 """Rerank scores returned by a backend, one per candidate in input order.""" 

131 

132 scores: list[float] 

133 model: str | None = None 

134 

135 

136class LlmSdkBackend(Protocol): 

137 """Protocol every LLM SDK adapter must satisfy. 

138 

139 The provider calls these methods through the Protocol only; SDK 

140 response objects never cross the seam. Methods with a natural 

141 "not supported" signal are documented below. 

142 

143 Lifecycle: ``available()`` is the cheap install check called before 

144 any other method; ``configure_logging`` runs once at first use. 

145 ``complete`` / ``complete_stream`` / ``embed`` are the hot-path 

146 operations. ``list_models`` / ``list_chat_models`` / ``pull_model`` 

147 / ``show_model`` are catalog helpers and may raise 

148 ``NotImplementedError`` or return empty values when unsupported. 

149 

150 Error contract: implementations must raise only ``ProviderError`` or 

151 ``NotImplementedError`` from any method. ``SdkLLMProvider`` wraps any 

152 other exception at the seam; adapters should translate SDK-specific 

153 errors (httpx errors, third-party SDK exceptions) into 

154 ``ProviderError`` so the provider can pass them through. 

155 """ 

156 

157 @property 

158 def provider_name(self) -> str: 

159 """Stable identifier used when wrapping errors in ``ProviderError``.""" 

160 ... 

161 

162 def active_backend_name(self, base_url: str) -> str: 

163 """Return the display name of the backend the adapter is talking to. 

164 

165 ``"Ollama"`` for an Ollama URL, ``"OpenAI"`` for an OpenAI URL, 

166 etc.; unknown URLs fall back to ``"Remote"``. The adapter's own 

167 identity is exposed separately through ``provider_name``. 

168 """ 

169 ... 

170 

171 def available(self) -> bool: 

172 """Return True when the underlying SDK is importable.""" 

173 ... 

174 

175 def configure_logging(self, *, suppress_debug: bool) -> None: 

176 """Apply backend-level logging toggles (best-effort no-op if unsupported).""" 

177 ... 

178 

179 def complete(self, request: CompletionRequest) -> CompletionResult: 

180 """Run a single-shot chat completion.""" 

181 ... 

182 

183 def complete_stream(self, request: CompletionRequest) -> Iterator[StreamChunk]: 

184 """Run a streaming chat completion, yielding content chunks.""" 

185 ... 

186 

187 def embed(self, request: EmbeddingRequest) -> EmbeddingResult: 

188 """Embed a batch of inputs, returning one vector per input.""" 

189 ... 

190 

191 def rerank(self, request: RerankRequest) -> RerankResult: 

192 """Score *candidates* against *query*, returning one float per candidate. 

193 

194 Raise ``NotImplementedError`` if the backend has no rerank API. 

195 An empty ``request.candidates`` returns ``RerankResult([])`` 

196 without an SDK call. 

197 """ 

198 ... 

199 

200 def list_models(self, *, base_url: str, api_key: str) -> list[str]: 

201 """List model identifiers visible to the backend. Return [] if unsupported.""" 

202 ... 

203 

204 def list_chat_models(self, provider: str) -> list[str]: 

205 """List chat-mode models from the SDK's catalog for *provider*. 

206 

207 Return ``[]`` if the backend has no catalog of frontier models. 

208 Unlike ``list_models``, this is a static pricing/capability table, 

209 not a runtime HTTP probe. 

210 """ 

211 ... 

212 

213 def pull_model( 

214 self, 

215 model: str, 

216 *, 

217 base_url: str, 

218 on_progress: Callable[..., Any] | None = None, 

219 ) -> None: 

220 """Pull a model. Raise NotImplementedError if unsupported.""" 

221 ... 

222 

223 def show_model(self, model: str, *, base_url: str) -> dict[str, Any] | None: 

224 """Return model metadata dict or None if unsupported / not found.""" 

225 ...