Coverage for src / lilbee / config.py: 100%
88 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 08:27 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 08:27 +0000
1"""Application configuration for lilbee.
3All settings can be overridden via environment variables prefixed with LILBEE_.
4"""
6import logging
7import os
8from pathlib import Path
9from typing import Any
11from pydantic import BaseModel, ConfigDict, Field
13from lilbee import settings
14from lilbee.platform import default_data_dir, env, env_float, env_int, env_int_optional
16log = logging.getLogger(__name__)
18DEFAULT_IGNORE_DIRS = frozenset(
19 {
20 "node_modules",
21 "__pycache__",
22 "venv",
23 "build",
24 "dist",
25 "target",
26 "vendor",
27 "_build",
28 "coverage",
29 "htmlcov",
30 }
31)
33CHUNKS_TABLE = "chunks"
34SOURCES_TABLE = "_sources"
37class Config(BaseModel):
38 """Runtime configuration — one singleton instance, mutated by CLI overrides."""
40 model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True)
42 data_root: Path
43 documents_dir: Path
44 data_dir: Path
45 lancedb_dir: Path
46 chat_model: str
47 embedding_model: str
48 embedding_dim: int = Field(ge=1)
49 chunk_size: int = Field(ge=1)
50 chunk_overlap: int = Field(ge=0)
51 max_embed_chars: int = Field(ge=1)
52 top_k: int = Field(ge=1)
53 max_distance: float = Field(ge=0.0)
54 system_prompt: str
55 ignore_dirs: frozenset[str]
56 vision_model: str = ""
57 vision_timeout: float = Field(default=120.0, ge=0.0)
58 server_host: str = "127.0.0.1"
59 server_port: int = Field(default=7433, ge=1, le=65535)
60 json_mode: bool = False
61 temperature: float | None = Field(default=None, ge=0.0)
62 top_p: float | None = Field(default=None, ge=0.0, le=1.0)
63 top_k_sampling: int | None = Field(default=None, ge=1)
64 repeat_penalty: float | None = Field(default=None, ge=0.0)
65 num_ctx: int | None = Field(default=None, ge=1)
66 seed: int | None = None
68 def generation_options(self, **overrides: Any) -> dict[str, Any]:
69 """Build Ollama generation options from config fields and overrides.
71 Remaps ``top_k_sampling`` to Ollama's ``top_k`` key.
72 Filters out ``None`` values so Ollama uses its model defaults.
73 """
74 mapping: dict[str, Any] = {
75 "temperature": self.temperature,
76 "top_p": self.top_p,
77 "top_k": self.top_k_sampling,
78 "repeat_penalty": self.repeat_penalty,
79 "num_ctx": self.num_ctx,
80 "seed": self.seed,
81 }
82 mapping.update(overrides)
83 return {k: v for k, v in mapping.items() if v is not None}
85 @classmethod
86 def from_env(cls) -> "Config":
87 """Build config from environment variables and settings file."""
88 data_root = _resolve_data_root()
89 chat_model = _load_chat_model(data_root)
90 vision_model = _load_vision_model(data_root)
91 vision_timeout = _parse_vision_timeout()
93 extra = env("IGNORE", "")
94 ignore_dirs = DEFAULT_IGNORE_DIRS | frozenset(
95 name.strip() for name in extra.split(",") if name.strip()
96 )
98 return cls(
99 data_root=data_root,
100 documents_dir=data_root / "documents",
101 data_dir=data_root / "data",
102 lancedb_dir=data_root / "data" / "lancedb",
103 chat_model=chat_model,
104 embedding_model=env("EMBEDDING_MODEL", "nomic-embed-text"),
105 embedding_dim=env_int("EMBEDDING_DIM", 768),
106 chunk_size=env_int("CHUNK_SIZE", 512),
107 chunk_overlap=env_int("CHUNK_OVERLAP", 100),
108 max_embed_chars=env_int("MAX_EMBED_CHARS", 2000),
109 top_k=env_int("TOP_K", 10),
110 max_distance=float(env("MAX_DISTANCE", "0.7")),
111 system_prompt=env(
112 "SYSTEM_PROMPT",
113 "You are a helpful technical assistant. Answer questions using "
114 "the provided context. Be specific — prefer exact numbers, part numbers, "
115 "and measurements over vague references. Cite facts directly from the context. "
116 "Do not make up information.",
117 ),
118 ignore_dirs=ignore_dirs,
119 vision_model=vision_model,
120 vision_timeout=vision_timeout,
121 server_host=env("SERVER_HOST", "127.0.0.1"),
122 server_port=env_int("SERVER_PORT", 7433),
123 temperature=env_float("TEMPERATURE"),
124 top_p=env_float("TOP_P"),
125 top_k_sampling=env_int_optional("TOP_K_SAMPLING"),
126 repeat_penalty=env_float("REPEAT_PENALTY"),
127 num_ctx=env_int_optional("NUM_CTX"),
128 seed=env_int_optional("SEED"),
129 )
132def _resolve_data_root() -> Path:
133 """Determine the data root: LILBEE_DATA env > local .lilbee/ > platform default."""
134 data_env = env("DATA", "")
135 if data_env:
136 return Path(data_env)
138 from lilbee.platform import find_local_root
140 local = find_local_root()
141 if local is not None:
142 return local
144 return default_data_dir()
147def _load_chat_model(data_root: Path) -> str:
148 """Resolve chat model: LILBEE_CHAT_MODEL env > persisted setting > default."""
149 chat_model = env("CHAT_MODEL", "qwen3:8b")
150 if "LILBEE_CHAT_MODEL" not in os.environ:
151 try:
152 saved = settings.get(data_root, "chat_model")
153 except (ValueError, OSError):
154 saved = None
155 if saved:
156 chat_model = saved
157 return chat_model
160def _load_vision_model(data_root: Path) -> str:
161 """Resolve vision model: LILBEE_VISION_MODEL env > persisted setting > empty."""
162 vision_model_env = os.environ.get("LILBEE_VISION_MODEL", "").strip()
163 if vision_model_env:
164 return vision_model_env
165 try:
166 return settings.get(data_root, "vision_model") or ""
167 except (ValueError, OSError):
168 return ""
171def _parse_vision_timeout() -> float:
172 """Parse LILBEE_VISION_TIMEOUT env var, returning default on invalid input."""
173 raw = os.environ.get("LILBEE_VISION_TIMEOUT", "").strip()
174 if not raw:
175 return 120.0
176 try:
177 return float(raw)
178 except ValueError:
179 log.warning("Invalid LILBEE_VISION_TIMEOUT=%r, ignoring", raw)
180 return 120.0
183cfg = Config.from_env()