Coverage for src / lilbee / api.py: 100%

88 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1"""Programmatic access to lilbee's retrieval pipeline. 

2 

3Retrieval only -- no LLM chat. Search your indexed documents from Python. 

4Optional features (concept graph, reranker) activate automatically when 

5their dependencies are installed. 

6 

7Usage:: 

8 

9 from lilbee import Lilbee 

10 

11 bee = Lilbee("./docs") 

12 bee.sync() 

13 results = bee.search("authentication") 

14""" 

15 

16from __future__ import annotations 

17 

18import asyncio 

19from collections.abc import Iterator 

20from contextlib import contextmanager 

21from pathlib import Path 

22from typing import TYPE_CHECKING 

23 

24from lilbee.concepts import ConceptGraph 

25from lilbee.config import Config, cfg 

26from lilbee.embedder import Embedder 

27from lilbee.providers.factory import create_provider 

28from lilbee.query import Searcher 

29from lilbee.reranker import Reranker 

30from lilbee.security import validate_path_within 

31from lilbee.services import reset_services 

32from lilbee.store import Store 

33 

34if TYPE_CHECKING: 

35 from lilbee.ingest import SyncResult 

36 from lilbee.providers.base import LLMProvider 

37 from lilbee.store import SearchChunk 

38 

39 

40@contextmanager 

41def _swap_config(target: Config) -> Iterator[None]: 

42 """Temporarily replace the global cfg fields with *target*'s values. 

43 Not thread-safe -- sequential use only. 

44 """ 

45 snapshot = {name: getattr(cfg, name) for name in type(cfg).model_fields} 

46 for name in type(target).model_fields: 

47 setattr(cfg, name, getattr(target, name)) 

48 reset_services() 

49 try: 

50 yield 

51 finally: 

52 reset_services() 

53 for name, val in snapshot.items(): 

54 setattr(cfg, name, val) 

55 

56 

57class Lilbee: 

58 """Programmatic access to lilbee's retrieval pipeline. 

59 Composes Store, Embedder, Searcher, Reranker, and ConceptGraph. Each holds a reference 

60 to config and its dependencies -- no god class, no global mutation in the 

61 public API. 

62 

63 Usage:: 

64 

65 from lilbee import Lilbee 

66 

67 bee = Lilbee("./docs") 

68 bee.sync() 

69 results = bee.search("authentication") 

70 """ 

71 

72 def __init__( 

73 self, 

74 documents_dir: str | Path | None = None, 

75 *, 

76 config: Config | None = None, 

77 provider: LLMProvider | None = None, 

78 ) -> None: 

79 """Create a lilbee instance. 

80 Args: 

81 documents_dir: Path to documents folder. Creates a default Config 

82 with derived data and lancedb directories. 

83 config: Full Config instance for complete control. 

84 provider: LLM provider instance. If not given, creates one from config. 

85 

86 Pass documents_dir or config, not both. If neither is given, uses 

87 ``Config()`` (same defaults as the CLI). 

88 """ 

89 if documents_dir is not None and config is not None: 

90 raise ValueError("Pass documents_dir or config, not both") 

91 

92 if config is not None: 

93 self._config = config 

94 elif documents_dir is not None: 

95 root = Path(documents_dir).resolve() 

96 self._config = cfg.model_copy( 

97 update={ 

98 "data_root": root, 

99 "documents_dir": root / "documents", 

100 "data_dir": root / "data", 

101 "lancedb_dir": root / "data" / "lancedb", 

102 }, 

103 ) 

104 else: 

105 self._config = Config() 

106 

107 self._config.documents_dir.mkdir(parents=True, exist_ok=True) 

108 self._config.data_dir.mkdir(parents=True, exist_ok=True) 

109 

110 self._provider = provider or create_provider(self._config) 

111 self._store = Store(self._config) 

112 self._embedder = Embedder(self._config, self._provider) 

113 self._reranker = Reranker(self._config) 

114 self._concepts = ConceptGraph(self._config, self._store) 

115 self._searcher = Searcher( 

116 self._config, 

117 self._provider, 

118 self._store, 

119 self._embedder, 

120 self._reranker, 

121 self._concepts, 

122 ) 

123 

124 @property 

125 def config(self) -> Config: 

126 """The Config instance backing this Lilbee.""" 

127 return self._config 

128 

129 @property 

130 def store(self) -> Store: 

131 """The Store component.""" 

132 return self._store 

133 

134 @property 

135 def embedder(self) -> Embedder: 

136 """The Embedder component.""" 

137 return self._embedder 

138 

139 @property 

140 def searcher(self) -> Searcher: 

141 """The Searcher component.""" 

142 return self._searcher 

143 

144 def sync(self, *, quiet: bool = True) -> SyncResult: 

145 """Sync documents to the vector store. Returns what changed.""" 

146 from lilbee.ingest import sync as _sync 

147 

148 with _swap_config(self._config): 

149 return asyncio.run(_sync(quiet=quiet)) 

150 

151 def search(self, query: str, *, top_k: int = 0) -> list[SearchChunk]: 

152 """Search indexed documents. Returns ranked chunks.""" 

153 with _swap_config(self._config): 

154 return self._searcher.search(query, top_k=top_k) 

155 

156 def add(self, paths: list[str | Path]) -> SyncResult: 

157 """Add files to the knowledge base and sync. 

158 Copies each path into the documents directory, then syncs. 

159 """ 

160 from lilbee.cli.helpers import copy_files 

161 from lilbee.ingest import sync as _sync 

162 

163 resolved = [Path(p).resolve() for p in paths] 

164 with _swap_config(self._config): 

165 copy_files(resolved, force=True) 

166 return asyncio.run(_sync(quiet=True)) 

167 

168 def remove(self, name: str) -> None: 

169 """Remove a document from the index by source name.""" 

170 with _swap_config(self._config): 

171 self._store.delete_by_source(name) 

172 self._store.delete_source(name) 

173 try: 

174 doc_path = validate_path_within( 

175 self._config.documents_dir / name, self._config.documents_dir 

176 ) 

177 except ValueError: 

178 return 

179 if doc_path.exists(): 

180 doc_path.unlink() 

181 

182 def status(self) -> dict[str, object]: 

183 """Return index stats (document count, data directory, etc.).""" 

184 with _swap_config(self._config): 

185 sources = self._store.get_sources() 

186 return { 

187 "documents_dir": str(self._config.documents_dir), 

188 "data_dir": str(self._config.data_dir), 

189 "document_count": len(sources), 

190 "sources": [s["filename"] for s in sources], 

191 } 

192 

193 def rebuild(self) -> SyncResult: 

194 """Rebuild the entire index from scratch.""" 

195 from lilbee.ingest import sync as _sync 

196 

197 with _swap_config(self._config): 

198 return asyncio.run(_sync(force_rebuild=True, quiet=True))