Coverage for src / lilbee / api.py: 100%
88 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1"""Programmatic access to lilbee's retrieval pipeline.
3Retrieval only -- no LLM chat. Search your indexed documents from Python.
4Optional features (concept graph, reranker) activate automatically when
5their dependencies are installed.
7Usage::
9 from lilbee import Lilbee
11 bee = Lilbee("./docs")
12 bee.sync()
13 results = bee.search("authentication")
14"""
16from __future__ import annotations
18import asyncio
19from collections.abc import Iterator
20from contextlib import contextmanager
21from pathlib import Path
22from typing import TYPE_CHECKING
24from lilbee.concepts import ConceptGraph
25from lilbee.config import Config, cfg
26from lilbee.embedder import Embedder
27from lilbee.providers.factory import create_provider
28from lilbee.query import Searcher
29from lilbee.reranker import Reranker
30from lilbee.security import validate_path_within
31from lilbee.services import reset_services
32from lilbee.store import Store
34if TYPE_CHECKING:
35 from lilbee.ingest import SyncResult
36 from lilbee.providers.base import LLMProvider
37 from lilbee.store import SearchChunk
40@contextmanager
41def _swap_config(target: Config) -> Iterator[None]:
42 """Temporarily replace the global cfg fields with *target*'s values.
43 Not thread-safe -- sequential use only.
44 """
45 snapshot = {name: getattr(cfg, name) for name in type(cfg).model_fields}
46 for name in type(target).model_fields:
47 setattr(cfg, name, getattr(target, name))
48 reset_services()
49 try:
50 yield
51 finally:
52 reset_services()
53 for name, val in snapshot.items():
54 setattr(cfg, name, val)
57class Lilbee:
58 """Programmatic access to lilbee's retrieval pipeline.
59 Composes Store, Embedder, Searcher, Reranker, and ConceptGraph. Each holds a reference
60 to config and its dependencies -- no god class, no global mutation in the
61 public API.
63 Usage::
65 from lilbee import Lilbee
67 bee = Lilbee("./docs")
68 bee.sync()
69 results = bee.search("authentication")
70 """
72 def __init__(
73 self,
74 documents_dir: str | Path | None = None,
75 *,
76 config: Config | None = None,
77 provider: LLMProvider | None = None,
78 ) -> None:
79 """Create a lilbee instance.
80 Args:
81 documents_dir: Path to documents folder. Creates a default Config
82 with derived data and lancedb directories.
83 config: Full Config instance for complete control.
84 provider: LLM provider instance. If not given, creates one from config.
86 Pass documents_dir or config, not both. If neither is given, uses
87 ``Config()`` (same defaults as the CLI).
88 """
89 if documents_dir is not None and config is not None:
90 raise ValueError("Pass documents_dir or config, not both")
92 if config is not None:
93 self._config = config
94 elif documents_dir is not None:
95 root = Path(documents_dir).resolve()
96 self._config = cfg.model_copy(
97 update={
98 "data_root": root,
99 "documents_dir": root / "documents",
100 "data_dir": root / "data",
101 "lancedb_dir": root / "data" / "lancedb",
102 },
103 )
104 else:
105 self._config = Config()
107 self._config.documents_dir.mkdir(parents=True, exist_ok=True)
108 self._config.data_dir.mkdir(parents=True, exist_ok=True)
110 self._provider = provider or create_provider(self._config)
111 self._store = Store(self._config)
112 self._embedder = Embedder(self._config, self._provider)
113 self._reranker = Reranker(self._config)
114 self._concepts = ConceptGraph(self._config, self._store)
115 self._searcher = Searcher(
116 self._config,
117 self._provider,
118 self._store,
119 self._embedder,
120 self._reranker,
121 self._concepts,
122 )
124 @property
125 def config(self) -> Config:
126 """The Config instance backing this Lilbee."""
127 return self._config
129 @property
130 def store(self) -> Store:
131 """The Store component."""
132 return self._store
134 @property
135 def embedder(self) -> Embedder:
136 """The Embedder component."""
137 return self._embedder
139 @property
140 def searcher(self) -> Searcher:
141 """The Searcher component."""
142 return self._searcher
144 def sync(self, *, quiet: bool = True) -> SyncResult:
145 """Sync documents to the vector store. Returns what changed."""
146 from lilbee.ingest import sync as _sync
148 with _swap_config(self._config):
149 return asyncio.run(_sync(quiet=quiet))
151 def search(self, query: str, *, top_k: int = 0) -> list[SearchChunk]:
152 """Search indexed documents. Returns ranked chunks."""
153 with _swap_config(self._config):
154 return self._searcher.search(query, top_k=top_k)
156 def add(self, paths: list[str | Path]) -> SyncResult:
157 """Add files to the knowledge base and sync.
158 Copies each path into the documents directory, then syncs.
159 """
160 from lilbee.cli.helpers import copy_files
161 from lilbee.ingest import sync as _sync
163 resolved = [Path(p).resolve() for p in paths]
164 with _swap_config(self._config):
165 copy_files(resolved, force=True)
166 return asyncio.run(_sync(quiet=True))
168 def remove(self, name: str) -> None:
169 """Remove a document from the index by source name."""
170 with _swap_config(self._config):
171 self._store.delete_by_source(name)
172 self._store.delete_source(name)
173 try:
174 doc_path = validate_path_within(
175 self._config.documents_dir / name, self._config.documents_dir
176 )
177 except ValueError:
178 return
179 if doc_path.exists():
180 doc_path.unlink()
182 def status(self) -> dict[str, object]:
183 """Return index stats (document count, data directory, etc.)."""
184 with _swap_config(self._config):
185 sources = self._store.get_sources()
186 return {
187 "documents_dir": str(self._config.documents_dir),
188 "data_dir": str(self._config.data_dir),
189 "document_count": len(sources),
190 "sources": [s["filename"] for s in sources],
191 }
193 def rebuild(self) -> SyncResult:
194 """Rebuild the entire index from scratch."""
195 from lilbee.ingest import sync as _sync
197 with _swap_config(self._config):
198 return asyncio.run(_sync(force_rebuild=True, quiet=True))