Coverage for src / lilbee / results.py: 100%

37 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1from __future__ import annotations 

2 

3from pydantic import BaseModel 

4 

5from lilbee.store import SearchChunk 

6 

7 

8class Excerpt(BaseModel): 

9 content: str 

10 page_start: int | None 

11 page_end: int | None 

12 line_start: int | None 

13 line_end: int | None 

14 relevance: float # 0.0-1.0 (1 = best match) 

15 

16 

17class DocumentResult(BaseModel): 

18 source: str 

19 content_type: str 

20 excerpts: list[Excerpt] 

21 best_relevance: float 

22 # Vault-relative path for clients to deep-link into the native UI. 

23 # ``None`` when the server can't resolve the source under ``cfg.vault_base``. 

24 vault_path: str | None = None 

25 

26 

27def _zero_to_none(val: int) -> int | None: 

28 return None if val == 0 else val 

29 

30 

31def _to_excerpt(chunk: SearchChunk) -> Excerpt: 

32 if chunk.relevance_score is not None: 

33 relevance = chunk.relevance_score 

34 else: 

35 relevance = 1.0 / (1.0 + (chunk.distance or 0)) 

36 return Excerpt( 

37 content=chunk.chunk, 

38 page_start=_zero_to_none(chunk.page_start), 

39 page_end=_zero_to_none(chunk.page_end), 

40 line_start=_zero_to_none(chunk.line_start), 

41 line_end=_zero_to_none(chunk.line_end), 

42 relevance=relevance, 

43 ) 

44 

45 

46def group(chunks: list[SearchChunk]) -> list[DocumentResult]: 

47 """Group raw LanceDB chunks into document-centric results.""" 

48 from lilbee.cli.helpers import resolve_vault_path 

49 

50 by_source: dict[str, list[SearchChunk]] = {} 

51 for chunk in chunks: 

52 source = chunk.source 

53 by_source.setdefault(source, []).append(chunk) 

54 

55 results: list[DocumentResult] = [] 

56 for source, source_chunks in by_source.items(): 

57 excerpts = sorted( 

58 [_to_excerpt(c) for c in source_chunks], 

59 key=lambda e: e.relevance, 

60 reverse=True, 

61 ) 

62 results.append( 

63 DocumentResult( 

64 source=source, 

65 content_type=source_chunks[0].content_type, 

66 excerpts=excerpts, 

67 best_relevance=excerpts[0].relevance, 

68 vault_path=resolve_vault_path(source), 

69 ) 

70 ) 

71 

72 results.sort(key=lambda r: r.best_relevance, reverse=True) 

73 return results 

74 

75 

76def to_dicts(results: list[DocumentResult]) -> list[dict[str, object]]: 

77 """Serialize DocumentResults to JSON-safe dicts.""" 

78 return [r.model_dump() for r in results]