Coverage for src / lilbee / results.py: 100%
37 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1from __future__ import annotations
3from pydantic import BaseModel
5from lilbee.store import SearchChunk
8class Excerpt(BaseModel):
9 content: str
10 page_start: int | None
11 page_end: int | None
12 line_start: int | None
13 line_end: int | None
14 relevance: float # 0.0-1.0 (1 = best match)
17class DocumentResult(BaseModel):
18 source: str
19 content_type: str
20 excerpts: list[Excerpt]
21 best_relevance: float
22 # Vault-relative path for clients to deep-link into the native UI.
23 # ``None`` when the server can't resolve the source under ``cfg.vault_base``.
24 vault_path: str | None = None
27def _zero_to_none(val: int) -> int | None:
28 return None if val == 0 else val
31def _to_excerpt(chunk: SearchChunk) -> Excerpt:
32 if chunk.relevance_score is not None:
33 relevance = chunk.relevance_score
34 else:
35 relevance = 1.0 / (1.0 + (chunk.distance or 0))
36 return Excerpt(
37 content=chunk.chunk,
38 page_start=_zero_to_none(chunk.page_start),
39 page_end=_zero_to_none(chunk.page_end),
40 line_start=_zero_to_none(chunk.line_start),
41 line_end=_zero_to_none(chunk.line_end),
42 relevance=relevance,
43 )
46def group(chunks: list[SearchChunk]) -> list[DocumentResult]:
47 """Group raw LanceDB chunks into document-centric results."""
48 from lilbee.cli.helpers import resolve_vault_path
50 by_source: dict[str, list[SearchChunk]] = {}
51 for chunk in chunks:
52 source = chunk.source
53 by_source.setdefault(source, []).append(chunk)
55 results: list[DocumentResult] = []
56 for source, source_chunks in by_source.items():
57 excerpts = sorted(
58 [_to_excerpt(c) for c in source_chunks],
59 key=lambda e: e.relevance,
60 reverse=True,
61 )
62 results.append(
63 DocumentResult(
64 source=source,
65 content_type=source_chunks[0].content_type,
66 excerpts=excerpts,
67 best_relevance=excerpts[0].relevance,
68 vault_path=resolve_vault_path(source),
69 )
70 )
72 results.sort(key=lambda r: r.best_relevance, reverse=True)
73 return results
76def to_dicts(results: list[DocumentResult]) -> list[dict[str, object]]:
77 """Serialize DocumentResults to JSON-safe dicts."""
78 return [r.model_dump() for r in results]