Coverage for src / lilbee / cli / commands.py: 100%
827 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1"""CLI command definitions registered on the app."""
3from __future__ import annotations
5import asyncio
6import json
7import sys
8from pathlib import Path
9from typing import TYPE_CHECKING, Any
11import typer
13if TYPE_CHECKING:
14 import uvicorn
16 from lilbee.wiki.entity_extractor import ExtractedEntity
17from rich.table import Table
19from lilbee.cli import theme
20from lilbee.cli.app import (
21 app,
22 apply_overrides,
23 console,
24 data_dir_option,
25 global_option,
26 model_option,
27 num_ctx_option,
28 repeat_penalty_option,
29 seed_option,
30 temperature_option,
31 top_k_sampling_option,
32 top_p_option,
33)
34from lilbee.cli.helpers import (
35 CopyResult,
36 add_paths,
37 auto_sync,
38 clean_result,
39 copy_files,
40 gather_status,
41 get_version,
42 json_output,
43 perform_reset,
44 render_status,
45 sync_result_to_json,
46)
47from lilbee.cli.tui import messages as msg
48from lilbee.config import cfg
49from lilbee.crawler import CrawlerBrowserMissing, bootstrap_chromium, chromium_installed, is_url
50from lilbee.progress import EventType, SetupProgressEvent
51from lilbee.providers.base import ProviderError
52from lilbee.services import get_services
53from lilbee.store import SearchScope, scope_to_chunk_type
54from lilbee.wiki.shared import (
55 DRAFTS_SUBDIR,
56 SUMMARIES_SUBDIR,
57)
59CHUNK_PREVIEW_LEN = 80 # characters shown in human-readable search output
61_ocr_option = typer.Option(None, "--ocr/--no-ocr", help="Force vision OCR on/off for scanned PDFs.")
62_ocr_timeout_option = typer.Option(
63 None,
64 "--ocr-timeout",
65 help="Per-page timeout in seconds for vision OCR (default: 120, 0 = no limit).",
66)
67_scope_option = typer.Option(
68 SearchScope.BOTH,
69 "--scope",
70 "-s",
71 help="Restrict the pool to raw chunks, wiki pages, or both (default).",
72 case_sensitive=False,
73)
76def _apply_ocr_overrides(ocr: bool | None, ocr_timeout: float | None) -> None:
77 """Apply --ocr/--no-ocr and --ocr-timeout CLI overrides to config."""
78 if ocr is not None:
79 cfg.enable_ocr = ocr
80 if ocr_timeout is not None:
81 cfg.ocr_timeout = ocr_timeout
84_paths_argument = typer.Argument(
85 ...,
86 help="Files, directories, or URLs to add to the knowledge base.",
87)
90@app.command()
91def search(
92 query: str = typer.Argument(..., help="Search query"),
93 top_k: int = typer.Option(None, "--top-k", "-k", help="Number of results"),
94 scope: SearchScope = _scope_option,
95 data_dir: Path | None = data_dir_option,
96 use_global: bool = global_option,
97) -> None:
98 """Search the knowledge base for relevant chunks."""
99 apply_overrides(data_dir=data_dir, use_global=use_global)
101 if not query or not query.strip():
102 if cfg.json_mode:
103 json_output({"error": "query must not be empty"})
104 raise SystemExit(1)
105 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] query must not be empty")
106 raise SystemExit(1)
108 try:
109 results = get_services().searcher.search(
110 query,
111 top_k=top_k or cfg.top_k,
112 chunk_type=scope_to_chunk_type(scope),
113 )
114 except Exception as exc:
115 if cfg.json_mode:
116 json_output({"error": str(exc)})
117 raise SystemExit(1) from None
118 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] {exc}")
119 raise SystemExit(1) from None
120 cleaned = [clean_result(r) for r in results]
122 if cfg.json_mode:
123 json_output({"command": "search", "query": query, "results": cleaned})
124 return
126 if not cleaned:
127 console.print("No results found.")
128 return
130 has_relevance = any("relevance_score" in r for r in cleaned)
131 table = Table(title="Search Results")
132 table.add_column("Source", style=theme.ACCENT)
133 table.add_column("Chunk", max_width=80)
134 score_label = "Score" if has_relevance else "Distance"
135 table.add_column(score_label, justify="right", style=theme.MUTED)
137 for r in cleaned:
138 chunk_text = r["chunk"]
139 preview = chunk_text[:CHUNK_PREVIEW_LEN]
140 if len(chunk_text) > CHUNK_PREVIEW_LEN:
141 preview += "..."
142 score = r.get("relevance_score") or r.get("distance") or 0
143 table.add_row(r["source"], preview, f"{score:.4f}")
144 console.print(table)
147@app.command(name="sync")
148def sync_cmd(
149 data_dir: Path | None = data_dir_option,
150 use_global: bool = global_option,
151 ocr: bool | None = _ocr_option,
152 ocr_timeout: float | None = _ocr_timeout_option,
153) -> None:
154 """Manually trigger document sync."""
155 apply_overrides(data_dir=data_dir, use_global=use_global)
156 _apply_ocr_overrides(ocr, ocr_timeout)
157 from lilbee.ingest import sync
159 try:
160 result = asyncio.run(sync(quiet=cfg.json_mode))
161 except RuntimeError as exc:
162 if cfg.json_mode:
163 json_output({"error": str(exc)})
164 raise SystemExit(1) from None
165 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] {exc}")
166 raise SystemExit(1) from None
167 if cfg.json_mode:
168 json_output(sync_result_to_json(result))
169 return
170 console.print(result)
173@app.command()
174def rebuild(
175 data_dir: Path | None = data_dir_option,
176 use_global: bool = global_option,
177 ocr: bool | None = _ocr_option,
178 ocr_timeout: float | None = _ocr_timeout_option,
179) -> None:
180 """Nuke the DB and re-ingest everything from documents/."""
181 apply_overrides(data_dir=data_dir, use_global=use_global)
182 _apply_ocr_overrides(ocr, ocr_timeout)
183 from lilbee.ingest import sync
185 try:
186 result = asyncio.run(sync(force_rebuild=True, quiet=cfg.json_mode))
187 except RuntimeError as exc:
188 if cfg.json_mode:
189 json_output({"error": str(exc)})
190 raise SystemExit(1) from None
191 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] {exc}")
192 raise SystemExit(1) from None
193 if cfg.json_mode:
194 json_output({"command": "rebuild", "ingested": len(result.added)})
195 return
196 console.print(f"Rebuilt: {len(result.added)} documents ingested")
199_force_option = typer.Option(False, "--force", "-f", help="Overwrite existing files.")
200_crawl_option = typer.Option(
201 False,
202 "--crawl",
203 help="Recursively crawl URLs (whole site by default; see --depth and --max-pages).",
204)
205_depth_option = typer.Option(
206 None,
207 "--depth",
208 help="Cap link-follow depth for --crawl. Unset = unbounded; 0 = single URL only.",
209)
210_max_pages_option = typer.Option(
211 None,
212 "--max-pages",
213 help="Cap total pages for --crawl. Unset = no limit; positive int = hard cap.",
214)
215_include_subdomains_option = typer.Option(
216 False,
217 "--include-subdomains",
218 help=(
219 "Allow --crawl to follow links into sibling subdomains of the start "
220 "host (e.g. en.wikipedia.org plus af.wikipedia.org). Default scopes "
221 "the crawl to the exact start host only."
222 ),
223)
226def _partition_inputs(inputs: list[str]) -> tuple[list[Path], list[str]]:
227 """Split inputs into file paths and URLs."""
228 paths: list[Path] = []
229 urls: list[str] = []
230 for inp in inputs:
231 if is_url(inp):
232 urls.append(inp)
233 else:
234 paths.append(Path(inp))
235 return paths, urls
238def _crawl_urls_blocking(
239 urls: list[str],
240 *,
241 crawl: bool,
242 depth: int | None,
243 max_pages: int | None,
244 include_subdomains: bool = False,
245) -> list[Path]:
246 """Crawl URLs synchronously (for CLI), returning paths written.
248 Without --crawl, each URL is fetched as a single page (depth=0).
249 With --crawl, the default is whole-site unbounded (depth=None, pages=None).
250 Explicit --depth / --max-pages override both.
252 Ctrl-C is handled by running the crawl through _run_crawl_with_signal_cancel,
253 which installs a signal.signal handler that sets a threading.Event passed
254 into crawl_and_save. crawl_recursive polls the event between pages so the
255 signal flows through as a clean cancel instead of asyncio.run's default
256 KeyboardInterrupt-raising (which left browser contexts mid-teardown).
257 """
258 import threading
260 from rich.progress import Progress, SpinnerColumn, TaskID, TextColumn
262 from lilbee.crawler import crawl_and_save
263 from lilbee.progress import CrawlPageEvent, DetailedProgressCallback, EventType, ProgressEvent
265 if crawl:
266 effective_depth = depth
267 effective_pages = max_pages
268 else:
269 effective_depth = 0
270 effective_pages = None
272 cancel_event = threading.Event()
274 from rich.console import Console as RichConsole
276 err_console = RichConsole(stderr=True)
277 all_paths: list[Path] = []
278 with Progress(
279 SpinnerColumn(),
280 TextColumn("{task.description}"),
281 transient=True,
282 console=err_console,
283 disable=cfg.json_mode,
284 ) as progress:
285 for url in urls:
286 if cancel_event.is_set():
287 break
288 ptask = progress.add_task(f"Crawling {url}...", total=None)
290 def _make_callback(_t: TaskID = ptask) -> DetailedProgressCallback:
291 def on_progress(event_type: EventType, data: ProgressEvent) -> None:
292 if event_type == EventType.CRAWL_PAGE:
293 if not isinstance(data, CrawlPageEvent):
294 raise TypeError(f"Expected CrawlPageEvent, got {type(data).__name__}")
295 total_str = str(data.total) if data.total > 0 else "?"
296 progress.update(
297 _t,
298 description=f"Crawled {data.current}/{total_str}: {data.url}",
299 )
301 return on_progress
303 paths = _run_crawl_with_signal_cancel(
304 url,
305 depth=effective_depth,
306 max_pages=effective_pages,
307 on_progress=_make_callback(),
308 cancel_event=cancel_event,
309 crawl_and_save=crawl_and_save,
310 include_subdomains=include_subdomains,
311 )
312 all_paths.extend(paths)
313 progress.update(ptask, description=f"Done: {url} ({len(paths)} pages)")
314 return all_paths
317def _run_crawl_with_signal_cancel(
318 url: str,
319 *,
320 depth: int | None,
321 max_pages: int | None,
322 on_progress: object,
323 cancel_event: object,
324 crawl_and_save: object,
325 include_subdomains: bool = False,
326) -> list[Path]:
327 """Run crawl_and_save on a dedicated event loop with a SIGINT->cancel hook.
329 asyncio.run() installs its own SIGINT handler that raises
330 KeyboardInterrupt, which tears the crawl down ungracefully. Registering a
331 plain signal.signal handler on the main thread AND running the crawl on a
332 loop we own (instead of asyncio.run) lets Ctrl-C set our threading.Event,
333 which crawl_recursive polls between pages so it can close the stream and
334 stop dispatch cleanly.
335 """
336 import signal
338 previous_handler = signal.getsignal(signal.SIGINT)
340 def _on_sigint(_signum: int, _frame: object) -> None:
341 # Set the cancel event that crawl_recursive polls between pages, so
342 # a Ctrl-C flows through as a clean cancel instead of asyncio.run's
343 # default KeyboardInterrupt-raising dance.
344 cancel_event.set() # type: ignore[attr-defined]
346 signal.signal(signal.SIGINT, _on_sigint)
347 # Manage the event loop explicitly. In the CLI this runs once per process,
348 # but under pytest-xdist the same worker thread runs many tests; leaving a
349 # closed loop set as the "current" loop for the thread poisons every later
350 # asyncio.get_event_loop() call and hangs macOS 3.12/3.13 unit-test CI.
351 # Always clear the thread-current loop in finally.
352 loop = asyncio.new_event_loop()
353 try:
354 asyncio.set_event_loop(loop)
355 coro = crawl_and_save( # type: ignore[operator]
356 url,
357 depth=depth,
358 max_pages=max_pages,
359 on_progress=on_progress,
360 cancel=cancel_event,
361 quiet=cfg.json_mode,
362 include_subdomains=include_subdomains,
363 )
364 return loop.run_until_complete(coro)
365 finally:
366 loop.close()
367 asyncio.set_event_loop(None)
368 signal.signal(signal.SIGINT, previous_handler)
371@app.command()
372def add(
373 paths: list[str] = _paths_argument,
374 data_dir: Path | None = data_dir_option,
375 use_global: bool = global_option,
376 force: bool = _force_option,
377 ocr: bool | None = _ocr_option,
378 ocr_timeout: float | None = _ocr_timeout_option,
379 crawl: bool = _crawl_option,
380 depth: int | None = _depth_option,
381 max_pages: int | None = _max_pages_option,
382 include_subdomains: bool = _include_subdomains_option,
383) -> None:
384 """Copy files or crawl URLs into the knowledge base and ingest them."""
385 apply_overrides(data_dir=data_dir, use_global=use_global)
386 _apply_ocr_overrides(ocr, ocr_timeout)
388 file_paths, urls = _partition_inputs(paths)
389 # Validate file paths exist
390 for fp in file_paths:
391 if not fp.exists():
392 if cfg.json_mode:
393 json_output({"error": f"Path not found: {fp}"})
394 raise SystemExit(1)
395 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] Path not found: {fp}")
396 raise SystemExit(1)
398 try:
399 # Crawl URLs first (saves .md files into documents/_web/)
400 crawled_paths: list[Path] = []
401 if urls:
402 from lilbee.crawler import crawler_available
404 if not crawler_available():
405 console.print(
406 f"[{theme.ERROR}]Web crawling requires: "
407 f"pip install 'lilbee[crawler]'[/{theme.ERROR}]"
408 )
409 raise SystemExit(1)
410 crawled_paths = _crawl_urls_blocking(
411 urls,
412 crawl=crawl,
413 depth=depth,
414 max_pages=max_pages,
415 include_subdomains=include_subdomains,
416 )
417 if not cfg.json_mode:
418 console.print(
419 f"[{theme.MUTED}]Crawled {len(crawled_paths)} page(s)"
420 f" from {len(urls)} URL(s)[/{theme.MUTED}]"
421 )
423 if cfg.json_mode:
424 from lilbee.ingest import sync
426 copy_result = CopyResult()
427 if file_paths:
428 copy_result = copy_files(file_paths, force=force)
429 result = asyncio.run(sync(quiet=True))
430 json_output(
431 {
432 "command": "add",
433 "copied": copy_result.copied,
434 "skipped": copy_result.skipped,
435 "crawled": len(crawled_paths),
436 "sync": sync_result_to_json(result),
437 }
438 )
439 return
441 if file_paths:
442 add_paths(file_paths, console, force=force)
443 elif urls:
444 # URLs already saved; just trigger sync
445 from lilbee.ingest import sync
447 result = asyncio.run(sync())
448 console.print(result)
449 except RuntimeError as exc:
450 if cfg.json_mode:
451 json_output({"error": str(exc)})
452 raise SystemExit(1) from None
453 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] {exc}")
454 raise SystemExit(1) from None
457_chunks_source_argument = typer.Argument(..., help="Source name to inspect chunks for.")
460@app.command()
461def chunks(
462 source: str = _chunks_source_argument,
463 data_dir: Path | None = data_dir_option,
464 use_global: bool = global_option,
465) -> None:
466 """Show chunks a document was split into (useful for debugging retrieval)."""
467 apply_overrides(data_dir=data_dir, use_global=use_global)
469 store = get_services().store
470 known = {s["filename"] for s in store.get_sources()}
471 if source not in known:
472 if cfg.json_mode:
473 json_output({"error": f"Source not found: {source}"})
474 raise SystemExit(1)
475 console.print(f"[{theme.ERROR}]Source not found:[/{theme.ERROR}] {source}")
476 raise SystemExit(1)
478 raw_chunks = store.get_chunks_by_source(source)
479 cleaned = sorted(
480 [clean_result(c) for c in raw_chunks],
481 key=lambda c: c.get("chunk_index", 0),
482 )
484 if cfg.json_mode:
485 json_output({"command": "chunks", "source": source, "chunks": cleaned})
486 return
488 console.print(
489 f"[{theme.LABEL}]{len(cleaned)}[/{theme.LABEL}]"
490 f" chunks from [{theme.ACCENT}]{source}[/{theme.ACCENT}]\n"
491 )
492 for c in cleaned:
493 idx = c.get("chunk_index", "?")
494 preview = c.get("chunk", "")[:CHUNK_PREVIEW_LEN]
495 if len(c.get("chunk", "")) > CHUNK_PREVIEW_LEN:
496 preview += "..."
497 console.print(f" [{idx}] {preview}")
500_remove_names_argument = typer.Argument(
501 ..., help="Source name(s) to remove from the knowledge base."
502)
504_delete_file_option = typer.Option(
505 False, "--delete", help="Also delete the file from the documents directory."
506)
509@app.command()
510def remove(
511 names: list[str] = _remove_names_argument,
512 data_dir: Path | None = data_dir_option,
513 use_global: bool = global_option,
514 delete_file: bool = _delete_file_option,
515) -> None:
516 """Remove documents from the knowledge base by source name."""
517 apply_overrides(data_dir=data_dir, use_global=use_global)
519 result = get_services().store.remove_documents(
520 names, delete_files=delete_file, documents_dir=cfg.documents_dir
521 )
523 if cfg.json_mode:
524 payload: dict = {"command": "remove", "removed": result.removed}
525 if result.not_found:
526 payload["not_found"] = result.not_found
527 json_output(payload)
528 if not result.removed and result.not_found:
529 raise SystemExit(1)
530 return
532 for name in result.removed:
533 console.print(f"Removed [{theme.ACCENT}]{name}[/{theme.ACCENT}]")
534 for name in result.not_found:
535 console.print(f"[{theme.ERROR}]Not found:[/{theme.ERROR}] {name}")
536 if not result.removed and result.not_found:
537 raise SystemExit(1)
540@app.command()
541def ask(
542 question: str = typer.Argument(..., help="Question to ask"),
543 scope: SearchScope = _scope_option,
544 data_dir: Path | None = data_dir_option,
545 model: str | None = model_option,
546 use_global: bool = global_option,
547 temperature: float | None = temperature_option,
548 top_p: float | None = top_p_option,
549 top_k_sampling: int | None = top_k_sampling_option,
550 repeat_penalty: float | None = repeat_penalty_option,
551 num_ctx: int | None = num_ctx_option,
552 seed: int | None = seed_option,
553) -> None:
554 """Ask a one-shot question (auto-syncs first)."""
555 apply_overrides(
556 data_dir=data_dir,
557 model=model,
558 use_global=use_global,
559 temperature=temperature,
560 top_p=top_p,
561 top_k_sampling=top_k_sampling,
562 repeat_penalty=repeat_penalty,
563 num_ctx=num_ctx,
564 seed=seed,
565 )
567 try:
568 from lilbee.models import ensure_chat_model
570 ensure_chat_model()
571 get_services().embedder.validate_model()
572 if cfg.json_mode:
573 from rich.console import Console as _QuietConsole
575 auto_sync(_QuietConsole(quiet=True))
576 else:
577 auto_sync(console)
579 chunk_type = scope_to_chunk_type(scope)
581 if cfg.json_mode:
582 result = get_services().searcher.ask_raw(question, chunk_type=chunk_type)
583 json_output(
584 {
585 "command": "ask",
586 "question": question,
587 "answer": result.answer,
588 "sources": [clean_result(s) for s in result.sources],
589 }
590 )
591 return
593 for token in get_services().searcher.ask_stream(question, chunk_type=chunk_type):
594 console.print(token.content, end="")
595 console.print()
596 except (RuntimeError, ProviderError) as exc:
597 if cfg.json_mode:
598 json_output({"error": str(exc)})
599 raise SystemExit(1) from None
600 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] {exc}")
601 raise SystemExit(1) from None
604@app.command()
605def chat(
606 data_dir: Path | None = data_dir_option,
607 model: str | None = model_option,
608 use_global: bool = global_option,
609 temperature: float | None = temperature_option,
610 top_p: float | None = top_p_option,
611 top_k_sampling: int | None = top_k_sampling_option,
612 repeat_penalty: float | None = repeat_penalty_option,
613 num_ctx: int | None = num_ctx_option,
614 seed: int | None = seed_option,
615) -> None:
616 """Interactive chat loop (auto-syncs first)."""
617 apply_overrides(
618 data_dir=data_dir,
619 model=model,
620 use_global=use_global,
621 temperature=temperature,
622 top_p=top_p,
623 top_k_sampling=top_k_sampling,
624 repeat_penalty=repeat_penalty,
625 num_ctx=num_ctx,
626 seed=seed,
627 )
629 if cfg.json_mode:
630 json_output({"error": "Chat requires a terminal, not --json"})
631 raise SystemExit(1)
632 if not sys.stdin.isatty() or not sys.stdout.isatty():
633 console.print(f"[{theme.ERROR}]Error:[/{theme.ERROR}] Chat requires a terminal.")
634 raise SystemExit(1)
635 from lilbee.cli.tui import run_tui
637 run_tui(auto_sync=True)
640@app.command()
641def version() -> None:
642 """Show the lilbee version."""
643 ver = get_version()
644 if cfg.json_mode:
645 json_output({"command": "version", "version": ver})
646 return
647 console.print(f"lilbee {ver}")
650_SELF_CHECK_CHAT_REPO = "bartowski/SmolLM2-135M-Instruct-GGUF"
651_SELF_CHECK_CHAT_FILE = "SmolLM2-135M-Instruct-Q3_K_S.gguf"
652_SELF_CHECK_EMBED_REPO = "nomic-ai/nomic-embed-text-v1.5-GGUF"
653_SELF_CHECK_EMBED_FILE = "nomic-embed-text-v1.5.Q4_K_M.gguf"
656def _download_self_check_model(repo: str, filename: str) -> Path:
657 """Fetch a GGUF from the HuggingFace CDN via urllib (stdlib only).
659 Avoids huggingface_hub / httpx entirely. Inside the PyInstaller --onefile
660 bundle, huggingface_hub's retry path has re-entered a closed httpx client
661 after transient DNS failures on macOS runners. urllib is synchronous,
662 lives in the stdlib, and has no long-lived client to close.
663 """
664 import tempfile
665 import urllib.request
667 url = f"https://huggingface.co/{repo}/resolve/main/{filename}"
668 dest_dir = Path(tempfile.mkdtemp(prefix="lilbee-self-check-"))
669 dest = dest_dir / filename
670 console.print(f"Downloading {url}")
671 last_exc: BaseException | None = None
672 for attempt in range(3):
673 try:
674 with urllib.request.urlopen(url, timeout=120) as response: # noqa: S310 — literal https url
675 dest.write_bytes(response.read())
676 return dest
677 except (OSError, urllib.error.URLError) as exc:
678 last_exc = exc
679 console.print(f"download attempt {attempt + 1} failed: {exc!r}")
680 raise RuntimeError(f"GGUF download failed after 3 attempts: {last_exc!r}")
683_self_check_chat_path_option = typer.Option(
684 None,
685 "--chat-model-path",
686 help="Path to a chat GGUF file. Skips the HuggingFace download.",
687)
688_self_check_embed_path_option = typer.Option(
689 None,
690 "--embed-model-path",
691 help="Path to an embedding GGUF file. Skips the HuggingFace download.",
692)
693_self_check_max_tokens_option = typer.Option(5, "--max-tokens", help="Tokens to generate.")
694_self_check_skip_embedding_option = typer.Option(
695 False,
696 "--skip-embedding",
697 help="Skip the embedding-model leg of the self-check.",
698)
701def _self_check_emit_failure(error: str) -> None:
702 if cfg.json_mode:
703 json_output({"ok": False, "error": error})
704 else:
705 console.print(f"[{theme.ERROR}]SELF-CHECK FAILED:[/{theme.ERROR}] {error}")
708@app.command("self-check")
709def self_check_cmd(
710 chat_model_path: Path | None = _self_check_chat_path_option,
711 embed_model_path: Path | None = _self_check_embed_path_option,
712 max_tokens: int = _self_check_max_tokens_option,
713 skip_embedding: bool = _self_check_skip_embedding_option,
714) -> None:
715 """Verify the installation can load llama.cpp and run real inference.
717 Two legs:
719 1. **Chat**: downloads ``SmolLM2-135M-Instruct-Q3_K_S.gguf`` (~90MB) and
720 runs a tiny ``create_completion`` so we know decoder-style models work
721 end-to-end and the vendored shared libraries load.
722 2. **Embedding**: downloads ``nomic-embed-text-v1.5.Q4_K_M.gguf`` (~84MB)
723 and runs ``create_embedding``. This is the leg that catches the
724 "Memory is not initialized" assert from llama-cpp-python <0.3.19, where
725 BERT-style encoders trip ``kv_cache_clear`` on a context that never
726 allocated memory.
728 Exits 0 on success, 1 on any failure. Intended for post-install
729 verification and as the end-to-end gate in release CI.
730 """
731 from typing import Any, cast
733 try:
734 chat_path = chat_model_path or _download_self_check_model(
735 _SELF_CHECK_CHAT_REPO, _SELF_CHECK_CHAT_FILE
736 )
737 console.print(f"Loading chat model {chat_path}")
739 import llama_cpp
741 from lilbee.providers.llama_cpp_provider import install_llama_log_handler
743 install_llama_log_handler()
744 llm = llama_cpp.Llama(model_path=str(chat_path), n_ctx=256, verbose=False)
745 # stream=False (default) returns a dict, not an iterator, but
746 # create_completion's return type is a union; cast to Any so the
747 # indexing below type-checks without forcing llama_cpp to be a
748 # typecheck-time dep of lilbee.
749 out = cast(Any, llm.create_completion("2+2=", max_tokens=max_tokens))
750 text: str = out["choices"][0]["text"]
751 except Exception as exc:
752 _self_check_emit_failure(repr(exc))
753 raise typer.Exit(1) from exc
755 if not text.strip():
756 _self_check_emit_failure("empty inference response")
757 raise typer.Exit(1)
759 embedding_dims: int | None = None
760 if not skip_embedding:
761 try:
762 embed_path = embed_model_path or _download_self_check_model(
763 _SELF_CHECK_EMBED_REPO, _SELF_CHECK_EMBED_FILE
764 )
765 console.print(f"Loading embedding model {embed_path}")
766 enc = llama_cpp.Llama(
767 model_path=str(embed_path),
768 embedding=True,
769 n_ctx=512,
770 verbose=False,
771 )
772 emb = cast(Any, enc.create_embedding(input=["test"]))
773 vec = emb["data"][0]["embedding"]
774 except Exception as exc:
775 _self_check_emit_failure(repr(exc))
776 raise typer.Exit(1) from exc
778 if not vec:
779 _self_check_emit_failure("empty embedding vector")
780 raise typer.Exit(1)
781 embedding_dims = len(vec)
783 if cfg.json_mode:
784 payload: dict[str, Any] = {
785 "ok": True,
786 "chat_response": text,
787 "chat_model": str(chat_path),
788 }
789 if embedding_dims is not None:
790 payload["embedding_dims"] = embedding_dims
791 json_output(payload)
792 else:
793 console.print(f"Chat response: {text!r}")
794 if embedding_dims is not None:
795 console.print(f"Embedding dims: {embedding_dims}")
796 console.print(f"[{theme.ACCENT}]SELF-CHECK PASSED[/{theme.ACCENT}]")
799@app.command()
800def status(
801 data_dir: Path | None = data_dir_option,
802 use_global: bool = global_option,
803) -> None:
804 """Show indexed documents, paths, and chunk counts."""
805 apply_overrides(data_dir=data_dir, use_global=use_global)
806 if cfg.json_mode:
807 json_output(gather_status().model_dump(exclude_none=True))
808 return
809 render_status(console)
812_yes_option = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt.")
815@app.command()
816def reset(
817 data_dir: Path | None = data_dir_option,
818 use_global: bool = global_option,
819 yes: bool = _yes_option,
820) -> None:
821 """Delete all documents and data (full factory reset)."""
822 apply_overrides(data_dir=data_dir, use_global=use_global)
823 if not yes:
824 if cfg.json_mode:
825 json_output({"error": "Use --yes to confirm reset in JSON mode"})
826 raise SystemExit(1)
827 console.print(
828 f"[{theme.ERROR_BOLD}]This will delete ALL documents and data.[/{theme.ERROR_BOLD}]\n"
829 f" Documents: {cfg.documents_dir}\n"
830 f" Data: {cfg.data_dir}"
831 )
832 confirmed = typer.confirm("Are you sure?", default=False)
833 if not confirmed:
834 console.print("Aborted.")
835 raise SystemExit(0)
837 result = perform_reset()
839 if cfg.json_mode:
840 json_output(result.model_dump())
841 return
843 console.print(
844 f"Reset complete: {result.deleted_docs} document(s), "
845 f"{result.deleted_data} data item(s) deleted."
846 )
847 if result.skipped:
848 console.print(
849 f"[{theme.WARNING}]{len(result.skipped)} item(s) could not be deleted "
850 f"(locked or permission denied).[/{theme.WARNING}]"
851 )
854@app.command()
855def init() -> None:
856 """Initialize a local .lilbee/ knowledge base in the current directory."""
857 root = Path.cwd() / ".lilbee"
858 if root.is_dir():
859 if cfg.json_mode:
860 json_output({"command": "init", "path": str(root), "created": False})
861 return
862 console.print(f"Already initialized: {root}")
863 return
865 docs = root / "documents"
866 data = root / "data"
867 docs.mkdir(parents=True)
868 data.mkdir(parents=True)
869 (root / ".gitignore").write_text("data/\n")
871 if cfg.json_mode:
872 json_output({"command": "init", "path": str(root), "created": True})
873 return
874 console.print(f"Initialized local knowledge base at {root}")
877def _port_file() -> Path:
878 return cfg.data_dir / "server.port"
881async def _run_server(server: uvicorn.Server, config: uvicorn.Config, host: str) -> None:
882 """Start uvicorn, write port file, and clean up on shutdown."""
883 import atexit
885 from lilbee.parent_monitor import parse_parent_pid, watch_parent_async
887 port_path = _port_file()
889 def _cleanup_port_file() -> None:
890 port_path.unlink(missing_ok=True)
892 if not config.loaded:
893 config.load()
894 server.lifespan = config.lifespan_class(config)
895 await server.startup()
897 parent_pid = parse_parent_pid()
898 parent_watcher: asyncio.Task[None] | None = None
899 if parent_pid is not None:
901 def _on_parent_death() -> None:
902 server.should_exit = True
904 parent_watcher = asyncio.create_task(watch_parent_async(parent_pid, _on_parent_death))
906 try:
907 if server.servers:
908 sock = server.servers[0].sockets[0]
909 actual_port = sock.getsockname()[1]
910 port_path.parent.mkdir(parents=True, exist_ok=True)
911 port_path.write_text(str(actual_port))
912 atexit.register(_cleanup_port_file)
913 console.print(f"Listening on http://{host}:{actual_port}")
914 await server.main_loop()
915 finally:
916 if parent_watcher is not None and not parent_watcher.done():
917 parent_watcher.cancel()
918 port_path.unlink(missing_ok=True)
919 await server.shutdown()
922@app.command()
923def serve(
924 host: str = typer.Option(None, "--host", "-H", help="Bind address (default: 127.0.0.1)"),
925 port: int = typer.Option(None, "--port", "-p", help="Port (default: 0/random)"),
926 data_dir: Path | None = data_dir_option,
927 use_global: bool = global_option,
928) -> None:
929 """Start the HTTP API server."""
930 apply_overrides(data_dir=data_dir, use_global=use_global)
931 if host is not None:
932 cfg.server_host = host
933 if port is not None:
934 cfg.server_port = port
936 import logging
938 import uvicorn
940 from lilbee.server import create_app
942 logging.getLogger("asyncio").setLevel(logging.ERROR)
944 config = uvicorn.Config(create_app(), host=cfg.server_host, port=cfg.server_port)
945 server = uvicorn.Server(config)
946 asyncio.run(_run_server(server, config, cfg.server_host))
949@app.command()
950def token(
951 data_dir: Path | None = data_dir_option,
952 use_global: bool = global_option,
953) -> None:
954 """Print the auth token for a running server."""
955 from lilbee.server.auth import server_json_path
957 apply_overrides(data_dir=data_dir, use_global=use_global)
958 path = server_json_path()
959 if not path.exists():
960 if cfg.json_mode:
961 json_output({"error": "No running server found"})
962 else:
963 console.print("No running server found (server.json missing).")
964 raise SystemExit(1)
965 try:
966 data = json.loads(path.read_text())
967 tok = data.get("token", "")
968 except (json.JSONDecodeError, OSError) as exc:
969 if cfg.json_mode:
970 json_output({"error": f"Could not read server.json: {exc}"})
971 else:
972 console.print(
973 f"[{theme.ERROR}]Error:[/{theme.ERROR}] Could not read server.json: {exc}"
974 )
975 raise SystemExit(1) from None
976 if cfg.json_mode:
977 json_output({"token": tok})
978 return
979 console.print(tok)
982@app.command()
983def topics(
984 query: str = typer.Argument(None, help="Optional query to find related concepts."),
985 top_k: int = typer.Option(10, "--top-k", "-k", help="Number of results."),
986 data_dir: Path | None = data_dir_option,
987 use_global: bool = global_option,
988) -> None:
989 """Show top concept communities or concepts related to a query."""
990 apply_overrides(data_dir=data_dir, use_global=use_global)
992 from lilbee.concepts import concepts_available
994 if not concepts_available():
995 msg = "Concept graph requires: pip install 'lilbee[graph]'"
996 if cfg.json_mode:
997 json_output({"error": msg})
998 raise SystemExit(1)
999 console.print(f"[{theme.ERROR}]{msg}[/{theme.ERROR}]")
1000 raise SystemExit(1)
1002 if not cfg.concept_graph:
1003 if cfg.json_mode:
1004 json_output({"error": "Concept graph is disabled (LILBEE_CONCEPT_GRAPH=false)"})
1005 raise SystemExit(1)
1006 console.print(
1007 f"[{theme.ERROR}]Concept graph is disabled.[/{theme.ERROR}] "
1008 "Enable with LILBEE_CONCEPT_GRAPH=true"
1009 )
1010 raise SystemExit(1)
1012 if not get_services().concepts.get_graph():
1013 if cfg.json_mode:
1014 json_output({"error": "Concept graph not available"})
1015 raise SystemExit(1)
1016 console.print(f"[{theme.ERROR}]Concept graph not available.[/{theme.ERROR}]")
1017 raise SystemExit(1)
1019 if query:
1020 _topics_for_query(query)
1021 else:
1022 _topics_overview(top_k)
1025def _topics_for_query(query: str) -> None:
1026 """Show concepts related to a query."""
1027 cg = get_services().concepts
1028 concepts = cg.extract_concepts(query)
1029 related = cg.expand_query(query)
1030 all_concepts = concepts + [r for r in related if r not in concepts]
1032 if cfg.json_mode:
1033 json_output({"command": "topics", "query": query, "concepts": all_concepts})
1034 return
1035 if not all_concepts:
1036 console.print("No concepts found for this query.")
1037 return
1038 console.print(f"Concepts related to [{theme.ACCENT}]{query}[/{theme.ACCENT}]:")
1039 for c in all_concepts:
1040 console.print(f" {c}")
1043def _topics_overview(top_k: int) -> None:
1044 """Show top concept communities."""
1045 from dataclasses import asdict
1047 communities = get_services().concepts.top_communities(k=top_k)
1048 if cfg.json_mode:
1049 json_output({"command": "topics", "communities": [asdict(c) for c in communities]})
1050 return
1051 if not communities:
1052 console.print("No concept communities found. Try syncing some documents first.")
1053 return
1054 table = Table(title="Concept Communities")
1055 table.add_column("Cluster", justify="right", style=theme.MUTED)
1056 table.add_column("Size", justify="right")
1057 table.add_column("Top Concepts", style=theme.ACCENT)
1058 for comm in communities:
1059 preview = ", ".join(comm.concepts[:5])
1060 if len(comm.concepts) > 5:
1061 preview += f" (+{len(comm.concepts) - 5} more)"
1062 table.add_row(str(comm.cluster_id), str(comm.size), preview)
1063 console.print(table)
1066@app.command()
1067def login() -> None:
1068 """Log in to HuggingFace for access to gated models (Mistral, Llama, etc.)."""
1069 import webbrowser
1071 from huggingface_hub import get_token
1072 from huggingface_hub import login as hf_login
1074 if get_token():
1075 typer.echo("Already logged in to HuggingFace.")
1076 if not typer.confirm("Log in again?", default=False):
1077 return
1079 typer.echo("Opening HuggingFace token page in your browser...")
1080 typer.echo("Create a token with 'Read' access, then paste it below.\n")
1081 webbrowser.open("https://huggingface.co/settings/tokens")
1083 token = typer.prompt("Paste your HuggingFace token", hide_input=True)
1084 if not token.strip():
1085 typer.echo("No token provided.", err=True)
1086 raise typer.Exit(1)
1088 hf_login(token=token.strip(), add_to_git_credential=False)
1089 typer.echo("Logged in! Gated models (Mistral, Llama, etc.) are now accessible.")
1092@app.command(name="mcp")
1093def mcp_cmd() -> None:
1094 """Start the MCP server (stdio transport) for agent integration."""
1095 from lilbee.mcp import main
1097 main()
1100setup_app = typer.Typer(help="One-time setup for optional runtime components.")
1101app.add_typer(setup_app, name="setup")
1104@setup_app.command(name="crawler")
1105def setup_crawler_cmd() -> None:
1106 """Install Playwright's Chromium browser, needed for /crawl.
1108 No-op when Chromium is already present. Emits a simple progress
1109 readout; use '--json' mode on the top-level 'lilbee' command to get
1110 a single JSON blob with the final install state instead.
1111 """
1112 if chromium_installed():
1113 if cfg.json_mode:
1114 typer.echo(json.dumps({"component": "chromium", "already_installed": True}))
1115 else:
1116 typer.echo("Chromium already installed.")
1117 return
1119 last_pct: list[int] = [-1]
1121 def _on_progress(event_type: object, data: object) -> None:
1122 if event_type != EventType.SETUP_PROGRESS or not isinstance(data, SetupProgressEvent):
1123 return
1124 total = data.total_bytes or 0
1125 pct = int(data.downloaded_bytes * 100 / total) if total > 0 else 0
1126 if pct != last_pct[0] and not cfg.json_mode:
1127 last_pct[0] = pct
1128 typer.echo(msg.SETUP_CHROMIUM_CLI_PROGRESS.format(pct=pct), err=True)
1130 try:
1131 asyncio.run(bootstrap_chromium(on_progress=_on_progress))
1132 except CrawlerBrowserMissing as exc:
1133 if cfg.json_mode:
1134 typer.echo(json.dumps({"component": "chromium", "error": str(exc)}))
1135 else:
1136 typer.secho(f"Install failed: {exc}", fg=typer.colors.RED)
1137 raise typer.Exit(code=1) from exc
1139 if cfg.json_mode:
1140 typer.echo(json.dumps({"component": "chromium", "installed": True}))
1141 else:
1142 typer.echo("Chromium installed.")
1145wiki_app = typer.Typer(help="Wiki layer commands: generate, lint, citations, status, prune.")
1146app.add_typer(wiki_app, name="wiki")
1149@wiki_app.command(name="lint")
1150def wiki_lint(
1151 wiki_source: str = typer.Argument("", help="Wiki page path (empty = lint all)."),
1152 data_dir: Path | None = data_dir_option,
1153 use_global: bool = global_option,
1154) -> None:
1155 """Lint wiki pages for stale citations, missing sources, and unmarked claims."""
1156 apply_overrides(data_dir=data_dir, use_global=use_global)
1157 from lilbee.wiki.lint import lint_all as _lint_all
1158 from lilbee.wiki.lint import lint_wiki_page
1160 store = get_services().store
1161 if wiki_source:
1162 issues = lint_wiki_page(wiki_source, store)
1163 else:
1164 report = _lint_all(store)
1165 issues = report.issues
1167 if cfg.json_mode:
1168 json_output(
1169 {
1170 "command": "wiki_lint",
1171 "issues": [i.to_dict() for i in issues],
1172 "total": len(issues),
1173 }
1174 )
1175 return
1177 if not issues:
1178 console.print("No issues found.")
1179 return
1181 table = Table(title="Wiki Lint Issues")
1182 table.add_column("Page", style=theme.ACCENT)
1183 table.add_column("Severity")
1184 table.add_column("Message")
1185 for issue in issues:
1186 sev_style = theme.ERROR if issue.severity.value == "error" else theme.WARNING
1187 sev_text = f"[{sev_style}]{issue.severity.value}[/{sev_style}]"
1188 table.add_row(issue.wiki_source, sev_text, issue.message)
1189 console.print(table)
1192@wiki_app.command(name="citations")
1193def wiki_citations(
1194 wiki_source: str = typer.Argument(..., help="Wiki page path, e.g. wiki/summaries/doc.md."),
1195 data_dir: Path | None = data_dir_option,
1196 use_global: bool = global_option,
1197) -> None:
1198 """Show citations for a wiki page."""
1199 apply_overrides(data_dir=data_dir, use_global=use_global)
1201 records = get_services().store.get_citations_for_wiki(wiki_source)
1203 if cfg.json_mode:
1204 json_output(
1205 {
1206 "command": "wiki_citations",
1207 "wiki_source": wiki_source,
1208 "citations": [dict(r) for r in records],
1209 "total": len(records),
1210 }
1211 )
1212 return
1214 if not records:
1215 console.print(f"No citations found for [{theme.ACCENT}]{wiki_source}[/{theme.ACCENT}]")
1216 return
1218 table = Table(title=f"Citations: {wiki_source}")
1219 table.add_column("Key", style=theme.ACCENT)
1220 table.add_column("Source")
1221 table.add_column("Type", style=theme.MUTED)
1222 table.add_column("Excerpt", max_width=60)
1223 for rec in records:
1224 excerpt = rec["excerpt"][:57] + "..." if len(rec["excerpt"]) > 60 else rec["excerpt"]
1225 table.add_row(rec["citation_key"], rec["source_filename"], rec["claim_type"], excerpt)
1226 console.print(table)
1229@wiki_app.command(name="status")
1230def wiki_status(
1231 data_dir: Path | None = data_dir_option,
1232 use_global: bool = global_option,
1233) -> None:
1234 """Show wiki layer status: page counts and lint summary."""
1235 apply_overrides(data_dir=data_dir, use_global=use_global)
1237 wiki_root = cfg.data_root / cfg.wiki_dir
1238 if not wiki_root.exists():
1239 if cfg.json_mode:
1240 json_output({"wiki_enabled": cfg.wiki, "pages": 0, "issues": 0})
1241 return
1242 console.print("Wiki directory does not exist yet. Run sync with wiki enabled.")
1243 return
1245 summaries = _count_md_files(wiki_root / SUMMARIES_SUBDIR)
1246 drafts = _count_md_files(wiki_root / DRAFTS_SUBDIR)
1248 from lilbee.wiki.lint import lint_all as _lint_all
1250 report = _lint_all(get_services().store)
1252 if cfg.json_mode:
1253 json_output(
1254 {
1255 "wiki_enabled": cfg.wiki,
1256 SUMMARIES_SUBDIR: summaries,
1257 DRAFTS_SUBDIR: drafts,
1258 "pages": summaries + drafts,
1259 "lint_errors": report.error_count,
1260 "lint_warnings": report.warning_count,
1261 }
1262 )
1263 return
1265 color = "green" if cfg.wiki else "red"
1266 label = "enabled" if cfg.wiki else "disabled"
1267 console.print(f"Wiki: [{color}]{label}[/{color}]")
1268 console.print(f" Summaries: [{theme.LABEL}]{summaries}[/{theme.LABEL}]")
1269 console.print(f" Drafts: [{theme.LABEL}]{drafts}[/{theme.LABEL}]")
1270 if report.error_count or report.warning_count:
1271 console.print(
1272 f" Lint: [{theme.ERROR}]{report.error_count} error(s)[/{theme.ERROR}], "
1273 f"[{theme.WARNING}]{report.warning_count} warning(s)[/{theme.WARNING}]"
1274 )
1275 else:
1276 console.print(" Lint: all clean")
1279@wiki_app.command(name="synthesize")
1280def wiki_synthesize(
1281 data_dir: Path | None = data_dir_option,
1282 use_global: bool = global_option,
1283) -> None:
1284 """Generate synthesis pages for concept clusters spanning 3+ sources."""
1285 apply_overrides(data_dir=data_dir, use_global=use_global)
1286 if not cfg.wiki:
1287 _fail_wiki_disabled()
1288 return
1289 from lilbee.wiki.gen import generate_synthesis_pages
1291 svc = get_services()
1292 paths = generate_synthesis_pages(svc.provider, svc.store, svc.clusterer)
1294 if cfg.json_mode:
1295 json_output(
1296 {
1297 "command": "wiki_synthesize",
1298 "paths": [str(p) for p in paths],
1299 "count": len(paths),
1300 }
1301 )
1302 return
1304 if not paths:
1305 console.print("No synthesis pages generated (need 3+ sources per cluster).")
1306 return
1308 console.print(f"Generated [{theme.LABEL}]{len(paths)}[/{theme.LABEL}] synthesis pages:")
1309 for path in paths:
1310 console.print(f" {path}")
1313@wiki_app.command(name="prune")
1314def wiki_prune(
1315 data_dir: Path | None = data_dir_option,
1316 use_global: bool = global_option,
1317) -> None:
1318 """Prune stale and orphaned wiki pages."""
1319 apply_overrides(data_dir=data_dir, use_global=use_global)
1320 from lilbee.wiki.prune import prune_wiki
1322 report = prune_wiki(get_services().store)
1324 if cfg.json_mode:
1325 json_output(
1326 {
1327 "command": "wiki_prune",
1328 "records": [r.to_dict() for r in report.records],
1329 "archived": report.archived_count,
1330 "flagged": report.flagged_count,
1331 }
1332 )
1333 return
1335 if not report.records:
1336 console.print("No pages pruned.")
1337 return
1339 table = Table(title="Wiki Prune Results")
1340 table.add_column("Page", style=theme.ACCENT)
1341 table.add_column("Action")
1342 table.add_column("Reason")
1343 for rec in report.records:
1344 action_style = theme.ERROR if rec.action.value == "archived" else theme.WARNING
1345 action_text = f"[{action_style}]{rec.action.value}[/{action_style}]"
1346 table.add_row(rec.wiki_source, action_text, rec.reason)
1347 console.print(table)
1350def _count_md_files(directory: Path) -> int:
1351 """Count markdown files in a directory."""
1352 if not directory.exists():
1353 return 0
1354 return len(list(directory.rglob("*.md")))
1357def _fail_wiki_disabled() -> None:
1358 """Emit the standard wiki-disabled message in the caller's output mode."""
1359 if cfg.json_mode:
1360 json_output({"error": msg.CMD_WIKI_DISABLED})
1361 return
1362 console.print(msg.CMD_WIKI_DISABLED)
1365@wiki_app.command(name="build")
1366def wiki_build(
1367 data_dir: Path | None = data_dir_option,
1368 use_global: bool = global_option,
1369 dry_run: bool = typer.Option(
1370 False,
1371 "--dry-run",
1372 help=(
1373 "Run extraction only; skip every LLM call. Prints the NER entity candidates. "
1374 "LLM-curated concept pages require a build call and are not shown in dry-run."
1375 ),
1376 ),
1377) -> None:
1378 """Build the concept and entity wiki across all ingested sources."""
1379 apply_overrides(data_dir=data_dir, use_global=use_global)
1380 if not cfg.wiki:
1381 _fail_wiki_disabled()
1382 return
1384 if dry_run:
1385 from lilbee.store import SearchChunk
1386 from lilbee.wiki.entity_extractor import get_entity_extractor
1388 svc = get_services()
1389 chunks: list[SearchChunk] = []
1390 for record in svc.store.get_sources():
1391 chunks.extend(svc.store.get_chunks_by_source(record["filename"]))
1392 extractor = get_entity_extractor(cfg.wiki_entity_mode, svc.provider, cfg)
1393 entities = extractor.extract(chunks)
1394 _wiki_build_dry_run_output(entities)
1395 return
1397 from lilbee.wiki import run_full_build
1399 result = run_full_build(cfg)
1401 if cfg.json_mode:
1402 json_output({"command": "wiki_build", **result})
1403 return
1405 pages = result["paths"]
1406 if not pages:
1407 console.print("No concept or entity pages generated.")
1408 return
1410 console.print(
1411 f"Generated [{theme.LABEL}]{result['count']}[/{theme.LABEL}] "
1412 f"wiki pages from {result['entities']} extracted records:"
1413 )
1414 for path in pages:
1415 console.print(f" {path}")
1418_DRY_RUN_CONCEPT_NOTE = (
1419 "Note: LLM-curated concepts are not shown in --dry-run. "
1420 "Run `lilbee wiki build` to see which concepts the LLM proposes."
1421)
1424def _wiki_build_dry_run_output(entities: list[ExtractedEntity]) -> None:
1425 """Render the extraction result as JSON or table without calling any LLM.
1427 Phase D: concepts come from the per-source batched LLM call, so
1428 listing them would require the call we are trying to avoid. The
1429 dry-run surface is NER-entity only, with a trailing note so a
1430 user who expected concepts in the output knows why they are
1431 missing.
1432 """
1433 rows: list[dict[str, Any]] = [
1434 {
1435 "slug": e.slug,
1436 "label": e.label,
1437 "kind": e.kind.value,
1438 "type_hint": e.type_hint,
1439 "mentions": len(e.chunk_refs),
1440 "sources": sorted({r.source for r in e.chunk_refs}),
1441 }
1442 for e in entities
1443 ]
1445 if cfg.json_mode:
1446 json_output(
1447 {
1448 "command": "wiki_build",
1449 "dry_run": True,
1450 "entities": rows,
1451 "count": len(rows),
1452 "note": _DRY_RUN_CONCEPT_NOTE,
1453 }
1454 )
1455 return
1457 if not rows:
1458 console.print("No candidate entities extracted. Run sync first.")
1459 console.print(f"[{theme.MUTED}]{_DRY_RUN_CONCEPT_NOTE}[/{theme.MUTED}]")
1460 return
1462 table = Table(title=f"Wiki build dry-run ({len(rows)} NER entity candidates)")
1463 table.add_column("Slug", style=theme.ACCENT)
1464 table.add_column("Kind", style=theme.MUTED)
1465 table.add_column("Type")
1466 table.add_column("Mentions")
1467 table.add_column("Sources")
1468 for row in rows:
1469 sources_list: list[str] = row["sources"]
1470 table.add_row(
1471 str(row["slug"]),
1472 str(row["kind"]),
1473 str(row["type_hint"]),
1474 str(row["mentions"]),
1475 ", ".join(sources_list[:3]) + (", ..." if len(sources_list) > 3 else ""),
1476 )
1477 console.print(table)
1478 console.print(
1479 f"Dry run: [{theme.LABEL}]{len(rows)}[/{theme.LABEL}] candidate entities. "
1480 "No LLM calls were made."
1481 )
1482 console.print(f"[{theme.MUTED}]{_DRY_RUN_CONCEPT_NOTE}[/{theme.MUTED}]")
1485@wiki_app.command(name="update")
1486def wiki_update(
1487 data_dir: Path | None = data_dir_option,
1488 use_global: bool = global_option,
1489) -> None:
1490 """Refresh the concept and entity wiki after an ingest.
1492 Currently a full rebuild. The incremental touched-slug regeneration
1493 lands in the ingest-hook task and will re-route this command then.
1494 """
1495 wiki_build(data_dir=data_dir, use_global=use_global, dry_run=False)
1498drafts_app = typer.Typer(help="Review wiki drafts: list, diff, accept, reject.")
1499wiki_app.add_typer(drafts_app, name="drafts")
1502@drafts_app.command(name="list")
1503def wiki_drafts_list(
1504 data_dir: Path | None = data_dir_option,
1505 use_global: bool = global_option,
1506) -> None:
1507 """List pending wiki drafts with drift, faithfulness, and pairing info."""
1508 apply_overrides(data_dir=data_dir, use_global=use_global)
1509 from lilbee.wiki.drafts import PENDING_KIND_DRIFT, list_drafts
1511 wiki_root = cfg.data_root / cfg.wiki_dir
1512 drafts = list_drafts(wiki_root)
1514 if cfg.json_mode:
1515 json_output(
1516 {
1517 "command": "wiki_drafts_list",
1518 "drafts": [d.to_dict() for d in drafts],
1519 "total": len(drafts),
1520 }
1521 )
1522 return
1524 if not drafts:
1525 console.print("No drafts pending review.")
1526 return
1528 table = Table(title="Wiki Drafts")
1529 table.add_column("Slug", style=theme.ACCENT)
1530 table.add_column("Kind", style=theme.MUTED)
1531 table.add_column("Drift")
1532 table.add_column("Faithfulness")
1533 table.add_column("Published?", style=theme.MUTED)
1534 for d in drafts:
1535 kind = d.pending_kind or PENDING_KIND_DRIFT
1536 drift = f"{d.drift_ratio:.0%}" if d.drift_ratio is not None else "-"
1537 faith = f"{d.faithfulness_score:.2f}" if d.faithfulness_score is not None else "-"
1538 published = "yes" if d.published_exists else "no"
1539 table.add_row(d.slug, kind, drift, faith, published)
1540 console.print(table)
1543@drafts_app.command(name="diff")
1544def wiki_drafts_diff(
1545 slug: str = typer.Argument(..., help="Draft slug (e.g. chevrolet)."),
1546 data_dir: Path | None = data_dir_option,
1547 use_global: bool = global_option,
1548) -> None:
1549 """Show a unified diff of the draft against its published counterpart."""
1550 apply_overrides(data_dir=data_dir, use_global=use_global)
1551 from lilbee.wiki.drafts import diff_draft
1553 wiki_root = cfg.data_root / cfg.wiki_dir
1554 try:
1555 diff = diff_draft(slug, wiki_root)
1556 except FileNotFoundError as exc:
1557 if cfg.json_mode:
1558 json_output({"error": str(exc)})
1559 else:
1560 console.print(f"[{theme.ERROR}]{exc}[/{theme.ERROR}]")
1561 raise typer.Exit(1) from None
1563 if cfg.json_mode:
1564 json_output({"command": "wiki_drafts_diff", "slug": slug, "diff": diff})
1565 return
1566 console.print(diff or "(no differences)")
1569@drafts_app.command(name="accept")
1570def wiki_drafts_accept(
1571 slug: str = typer.Argument(..., help="Draft slug to accept."),
1572 data_dir: Path | None = data_dir_option,
1573 use_global: bool = global_option,
1574) -> None:
1575 """Overwrite the published page with the draft and re-index its chunks."""
1576 apply_overrides(data_dir=data_dir, use_global=use_global)
1577 from lilbee.wiki.drafts import accept_draft
1579 wiki_root = cfg.data_root / cfg.wiki_dir
1580 try:
1581 result = accept_draft(slug, wiki_root, get_services().store)
1582 except FileNotFoundError as exc:
1583 if cfg.json_mode:
1584 json_output({"error": str(exc)})
1585 else:
1586 console.print(f"[{theme.ERROR}]{exc}[/{theme.ERROR}]")
1587 raise typer.Exit(1) from None
1589 if cfg.json_mode:
1590 json_output({"command": "wiki_drafts_accept", **result.to_dict()})
1591 return
1592 console.print(
1593 f"Accepted [{theme.ACCENT}]{slug}[/{theme.ACCENT}] -> "
1594 f"{result.moved_to} ({result.reindexed_chunks} chunks re-indexed)"
1595 )
1598@drafts_app.command(name="reject")
1599def wiki_drafts_reject(
1600 slug: str = typer.Argument(..., help="Draft slug to reject."),
1601 data_dir: Path | None = data_dir_option,
1602 use_global: bool = global_option,
1603) -> None:
1604 """Delete the draft file. Does not touch the published page or index."""
1605 apply_overrides(data_dir=data_dir, use_global=use_global)
1606 from lilbee.wiki.drafts import reject_draft
1608 wiki_root = cfg.data_root / cfg.wiki_dir
1609 try:
1610 reject_draft(slug, wiki_root)
1611 except FileNotFoundError as exc:
1612 if cfg.json_mode:
1613 json_output({"error": str(exc)})
1614 else:
1615 console.print(f"[{theme.ERROR}]{exc}[/{theme.ERROR}]")
1616 raise typer.Exit(1) from None
1618 if cfg.json_mode:
1619 json_output({"command": "wiki_drafts_reject", "slug": slug})
1620 return
1621 console.print(f"Rejected [{theme.ACCENT}]{slug}[/{theme.ACCENT}]")