Coverage for src/lilbee/server/models.py: 100%

1"""Request and response models for the lilbee HTTP API.

3Typed pydantic models so Litestar's OpenAPI schema has field-level detail.

4"""

6from __future__ import annotations

8from typing import Any, Literal

10from pydantic import BaseModel, Field, field_validator

12from lilbee.store import SearchScope

14_VALID_CHUNK_TYPES = frozenset({SearchScope.RAW.value, SearchScope.WIKI.value})

17def _validate_chunk_type(value: str | None) -> str | None:

18 """Reject unknown ``chunk_type`` values at the HTTP boundary.

20 Matches the CLI/MCP behaviour: only ``"raw"`` or ``"wiki"`` filter the

21 pool; everything else (including ``None`` and the UI-side ``"both"``)

22 means no filter.

23 """

24 if value is None or value == SearchScope.BOTH.value:

25 return None

26 if value not in _VALID_CHUNK_TYPES:

27 raise ValueError(

28 f"chunk_type must be one of 'raw', 'wiki', 'both', or omitted; got {value!r}"

29 )

30 return value

33class AskRequest(BaseModel):

34 """Request body for /api/ask."""

36 question: str

37 top_k: int = Field(default=0, le=100)

38 options: dict[str, Any] | None = None

39 chunk_type: str | None = None

41 @field_validator("chunk_type")

42 @classmethod

43 def _check_chunk_type(cls, v: str | None) -> str | None:

44 return _validate_chunk_type(v)

47class ChatRequest(BaseModel):

48 """Request body for /api/chat."""

50 question: str

51 history: list[ChatMessage] = []

52 top_k: int = Field(default=0, le=100)

53 options: dict[str, Any] | None = None

54 chunk_type: str | None = None

56 @field_validator("chunk_type")

57 @classmethod

58 def _check_chunk_type(cls, v: str | None) -> str | None:

59 return _validate_chunk_type(v)

62class SyncRequest(BaseModel):

63 """Request body for /api/sync.

65 ``force_rebuild`` triggers a full drop-and-reingest equivalent to ``lilbee rebuild``.

66 Use it to recover from an embedding-model switch (when the store refuses search

67 or ingest because ``cfg.embedding_model`` no longer matches the persisted vectors).

68 The default is incremental sync.

69 """

71 enable_ocr: bool | None = None

72 force_rebuild: bool = False

75class AddRequest(BaseModel):

76 """Request body for /api/add."""

78 paths: list[str]

79 force: bool = False

80 enable_ocr: bool | None = None

81 ocr_timeout: float | None = None

84class SetModelRequest(BaseModel):

85 """Request body for /api/models/chat."""

87 model: str

90class SourceContentResponse(BaseModel):

91 """JSON body for ``GET /api/source`` (``raw=0``); empty ``markdown`` for binary types."""

93 markdown: str

94 content_type: str

95 title: str | None = None

98class ChatMessage(BaseModel):

99 """A single message in a chat conversation."""

100

101 role: Literal["user", "assistant"]

102 content: str

103

104

105class CleanedChunk(BaseModel):

106 """A search result chunk with vector stripped and distance renamed."""

107

108 source: str

109 content_type: str

110 chunk: str

111 distance: float | None = None

112 relevance_score: float | None = None

113 page_start: int = 0

114 page_end: int = 0

115 line_start: int = 0

116 line_end: int = 0

117 chunk_index: int = 0

118 # Vault-relative path when ``cfg.vault_base`` is set and the source file

119 # lives inside the vault. Absent when the server is running headless or

120 # the source isn't resolvable as a vault file. Clients use this to open

121 # the source in a native editor instead of fetching ``/api/source``.

122 vault_path: str | None = None

123

124

125class StatusSourceInfo(BaseModel):

126 """A single indexed source in a status response."""

127

128 filename: str

129 file_hash: str

130 chunk_count: int

131 ingested_at: str

132

133

134class StatusConfigInfo(BaseModel):

135 """Configuration section of a status response.

136

137 Exposes all four role-bound model fields so plugins/TUI can show

138 what's active per role without a second round trip.

139 """

140

141 documents_dir: str

142 data_dir: str

143 chat_model: str

144 embedding_model: str

145 vision_model: str = ""

146 reranker_model: str = ""

147 enable_ocr: bool | None = None

148

149

150class StatusResponse(BaseModel):

151 """Response for GET /api/status."""

152

153 command: str = "status"

154 config: StatusConfigInfo

155 sources: list[StatusSourceInfo]

156 total_chunks: int

157

158

159class HealthResponse(BaseModel):

160 """Response for /api/health."""

161

162 status: str

163 version: str

164

165

166class AskResponse(BaseModel):

167 """Response for /api/ask and /api/chat."""

168

169 answer: str

170 sources: list[CleanedChunk]

171

172

173class SetModelResponse(BaseModel):

174 """Response for PUT /api/models/{chat|embedding|vision|reranker}.

175

176 ``reindex_required`` is ``True`` only when the new embedding model differs from

177 the model that built the persisted vector store. The chat, vision, and reranker

178 handlers always return ``False`` because their changes do not invalidate stored

179 vectors. Mirrors the ``reindex_required`` flag on ``ConfigUpdateResponse``.

180 """

181

182 model: str

183 reindex_required: bool = False

184

185

186class ConfigUpdateResponse(BaseModel):

187 """Response for PATCH /api/config."""

188

189 updated: list[str]

190 reindex_required: bool

191

192

193class CrawlRequest(BaseModel):

194 """Request body for /api/crawl.

195

196 depth: null / omitted = whole-site unbounded recursion. 0 = single URL

197 only. Positive int = max depth. max_pages: null / omitted = no cap.

198 Positive int = explicit page cap.

199 """

200

201 url: str

202 depth: int | None = Field(default=None, ge=0)

203 max_pages: int | None = Field(default=None, ge=1)

204

205

206class DocumentInfo(BaseModel):

207 """A single indexed document in a list response."""

208

209 filename: str

210 chunk_count: int = 0

211 ingested_at: str = ""

212

213

214class DocumentListResponse(BaseModel):

215 """Response for GET /api/documents."""

216

217 documents: list[DocumentInfo]

218 total: int

219 limit: int

220 offset: int

221 has_more: bool = False

222

223

224class DocumentRemoveResponse(BaseModel):

225 """Response for POST /api/documents/remove."""

226

227 removed: list[str]

228 not_found: list[str]

229

230

231class ConfigResponse(BaseModel):

232 """Response for GET /api/config."""

233

234 model_config = {"extra": "allow"}

235

236

237class ModelsShowResponse(BaseModel):

238 """Response for POST /api/models/show."""

239

240 model_config = {"extra": "allow"}

241

242

243class CatalogEntryResponse(BaseModel):

244 """A single model in the catalog browser."""

245

246 hf_repo: str

247 gguf_filename: str

248 task: str

249 display_name: str

250 param_count: str

251 size_gb: float

252 min_ram_gb: float

253 description: str

254 quality_tier: str

255 featured: bool

256 downloads: int

257 installed: bool

258 source: str

259

260

261class ModelsCatalogResponse(BaseModel):

262 """Response for GET /api/models/catalog."""

263

264 total: int

265 limit: int

266 offset: int

267 models: list[CatalogEntryResponse]

268 has_more: bool = False

269

270

271class InstalledModelEntry(BaseModel):

272 """A single installed model."""

273

274 name: str

275 source: str

276

277

278class ModelsInstalledResponse(BaseModel):

279 """Response for GET /api/models/installed."""

280

281 models: list[InstalledModelEntry]

282

283

284class ModelsDeleteResponse(BaseModel):

285 """Response for DELETE /api/models/{model}."""

286

287 deleted: bool

288 model: str

289 freed_gb: float

290

291

292class ExternalModelsResponse(BaseModel):

293 """Response for GET /api/models/external."""

294

295 models: list[str]

296 error: str | None = None

297

298

299class SyncSummary(BaseModel):

300 """Embedded sync result within an add-files response."""

301

302 added: list[str] = []

303 updated: list[str] = []

304 removed: list[str] = []

305 unchanged: int = 0

306 failed: list[str] = []

307

308

309class AddSummary(BaseModel):

310 """Summary returned by the add-files handler."""

311

312 copied: list[str]

313 skipped: list[str]

314 errors: list[str]

315 sync: SyncSummary | None = None

316

317

318class WikiPageSummary(BaseModel):

319 """Summary of a wiki page for list endpoints."""

320

321 slug: str

322 title: str = ""

323 page_type: str = "unknown"

324 source_count: int = 0

325 created_at: str = ""

326

327

328class WikiCitationRecord(BaseModel):

329 """A citation record from the store, used in reverse lookup responses."""

330

331 wiki_source: str = ""

332 wiki_chunk_index: int = 0

333 citation_key: str = ""

334 claim_type: str = "fact"

335 source_filename: str = ""

336 source_hash: str = ""

337 page_start: int = 0

338 page_end: int = 0

339 line_start: int = 0

340 line_end: int = 0

341 excerpt: str = ""

342 created_at: str = ""

343

344

345class WikiPageDetail(BaseModel):

346 """Full content of a single wiki page."""

347

348 slug: str

349 title: str = ""

350 content: str = ""

351

352

353class WikiCitationsResult(BaseModel):

354 """Citations attached to a single wiki page."""

355

356 slug: str

357 citations: list[WikiCitationRecord] = []

358

359

360class WikiLintIssueItem(BaseModel):

361 """A single lint finding on a wiki page."""

362

363 wiki_source: str = ""

364 issue_type: str = ""

365 severity: str = ""

366 message: str = ""

367

368

369class WikiLintResult(BaseModel):

370 """Result of a full wiki lint run."""

371

372 issues: list[WikiLintIssueItem] = []

373 errors: int = 0

374 warnings: int = 0

375

376

377class WikiPruneRecordResponse(BaseModel):

378 """A single pruning action."""

379

380 wiki_source: str

381 action: str

382 reason: str

383

384

385class WikiPruneResult(BaseModel):

386 """Result of wiki pruning."""

387

388 records: list[WikiPruneRecordResponse] = []

389 archived: int = 0

390 flagged: int = 0

391

392

393class WikiBuildResult(BaseModel):

394 """Result of a full wiki build/update."""

395

396 paths: list[str] = []

397 entities: int = 0

398 count: int = 0

399

400

401class WikiStatusResult(BaseModel):

402 """Wiki layer status counters."""

403

404 wiki_enabled: bool

405 summaries: int = 0

406 drafts: int = 0

407 pages: int = 0

408 lint_errors: int = 0

409 lint_warnings: int = 0

410

411

412class WikiSynthesizeResult(BaseModel):

413 """Result of generating synthesis pages for cross-source concept clusters."""

414

415 paths: list[str] = []

416 count: int = 0

417

418

419class DraftInfoResponse(BaseModel):

420 """Metadata about a single wiki draft, mirroring ``DraftInfo.to_dict()``.

421

422 ``pending_kind`` distinguishes drift drafts (``None``) from the Phase D

423 batched-generation markers (``"parse"``, ``"collision"``).

424 """

425

426 slug: str

427 path: str

428 drift_ratio: float | None = None

429 faithfulness_score: float | None = None

430 bad_title: bool = False

431 published_path: str | None = None

432 published_exists: bool = False

433 mtime: float = 0.0

434 pending_kind: str | None = None

435

436

437class WikiDraftDiffResponse(BaseModel):

438 """Unified diff of a draft against its published counterpart."""

439

440 slug: str

441 diff: str

442

443

444class WikiDraftAcceptResponse(BaseModel):

445 """Outcome of accepting a draft: where it landed and how many chunks reindexed.

446

447 ``slug`` is the slug where the content was published.

448 ``requested_slug`` is the slug the client asked to accept. The two

449 differ for PENDING-COLLISION drafts, where the request slug carries

450 a ``-collision-<hash>`` suffix that is stripped on publish.

451 """

452

453 slug: str

454 requested_slug: str

455 moved_to: str

456 reindexed_chunks: int

457

458

459class WikiDraftRejectResponse(BaseModel):

460 """Outcome of rejecting a draft."""

461

462 slug: str

Coverage for src / lilbee / server / models.py: 100%

230 statements