Coverage for src / lilbee / config.py: 100%

88 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-16 08:27 +0000

1"""Application configuration for lilbee. 

2 

3All settings can be overridden via environment variables prefixed with LILBEE_. 

4""" 

5 

6import logging 

7import os 

8from pathlib import Path 

9from typing import Any 

10 

11from pydantic import BaseModel, ConfigDict, Field 

12 

13from lilbee import settings 

14from lilbee.platform import default_data_dir, env, env_float, env_int, env_int_optional 

15 

16log = logging.getLogger(__name__) 

17 

18DEFAULT_IGNORE_DIRS = frozenset( 

19 { 

20 "node_modules", 

21 "__pycache__", 

22 "venv", 

23 "build", 

24 "dist", 

25 "target", 

26 "vendor", 

27 "_build", 

28 "coverage", 

29 "htmlcov", 

30 } 

31) 

32 

33CHUNKS_TABLE = "chunks" 

34SOURCES_TABLE = "_sources" 

35 

36 

37class Config(BaseModel): 

38 """Runtime configuration — one singleton instance, mutated by CLI overrides.""" 

39 

40 model_config = ConfigDict(validate_assignment=True, arbitrary_types_allowed=True) 

41 

42 data_root: Path 

43 documents_dir: Path 

44 data_dir: Path 

45 lancedb_dir: Path 

46 chat_model: str 

47 embedding_model: str 

48 embedding_dim: int = Field(ge=1) 

49 chunk_size: int = Field(ge=1) 

50 chunk_overlap: int = Field(ge=0) 

51 max_embed_chars: int = Field(ge=1) 

52 top_k: int = Field(ge=1) 

53 max_distance: float = Field(ge=0.0) 

54 system_prompt: str 

55 ignore_dirs: frozenset[str] 

56 vision_model: str = "" 

57 vision_timeout: float = Field(default=120.0, ge=0.0) 

58 server_host: str = "127.0.0.1" 

59 server_port: int = Field(default=7433, ge=1, le=65535) 

60 json_mode: bool = False 

61 temperature: float | None = Field(default=None, ge=0.0) 

62 top_p: float | None = Field(default=None, ge=0.0, le=1.0) 

63 top_k_sampling: int | None = Field(default=None, ge=1) 

64 repeat_penalty: float | None = Field(default=None, ge=0.0) 

65 num_ctx: int | None = Field(default=None, ge=1) 

66 seed: int | None = None 

67 

68 def generation_options(self, **overrides: Any) -> dict[str, Any]: 

69 """Build Ollama generation options from config fields and overrides. 

70 

71 Remaps ``top_k_sampling`` to Ollama's ``top_k`` key. 

72 Filters out ``None`` values so Ollama uses its model defaults. 

73 """ 

74 mapping: dict[str, Any] = { 

75 "temperature": self.temperature, 

76 "top_p": self.top_p, 

77 "top_k": self.top_k_sampling, 

78 "repeat_penalty": self.repeat_penalty, 

79 "num_ctx": self.num_ctx, 

80 "seed": self.seed, 

81 } 

82 mapping.update(overrides) 

83 return {k: v for k, v in mapping.items() if v is not None} 

84 

85 @classmethod 

86 def from_env(cls) -> "Config": 

87 """Build config from environment variables and settings file.""" 

88 data_root = _resolve_data_root() 

89 chat_model = _load_chat_model(data_root) 

90 vision_model = _load_vision_model(data_root) 

91 vision_timeout = _parse_vision_timeout() 

92 

93 extra = env("IGNORE", "") 

94 ignore_dirs = DEFAULT_IGNORE_DIRS | frozenset( 

95 name.strip() for name in extra.split(",") if name.strip() 

96 ) 

97 

98 return cls( 

99 data_root=data_root, 

100 documents_dir=data_root / "documents", 

101 data_dir=data_root / "data", 

102 lancedb_dir=data_root / "data" / "lancedb", 

103 chat_model=chat_model, 

104 embedding_model=env("EMBEDDING_MODEL", "nomic-embed-text"), 

105 embedding_dim=env_int("EMBEDDING_DIM", 768), 

106 chunk_size=env_int("CHUNK_SIZE", 512), 

107 chunk_overlap=env_int("CHUNK_OVERLAP", 100), 

108 max_embed_chars=env_int("MAX_EMBED_CHARS", 2000), 

109 top_k=env_int("TOP_K", 10), 

110 max_distance=float(env("MAX_DISTANCE", "0.7")), 

111 system_prompt=env( 

112 "SYSTEM_PROMPT", 

113 "You are a helpful technical assistant. Answer questions using " 

114 "the provided context. Be specific — prefer exact numbers, part numbers, " 

115 "and measurements over vague references. Cite facts directly from the context. " 

116 "Do not make up information.", 

117 ), 

118 ignore_dirs=ignore_dirs, 

119 vision_model=vision_model, 

120 vision_timeout=vision_timeout, 

121 server_host=env("SERVER_HOST", "127.0.0.1"), 

122 server_port=env_int("SERVER_PORT", 7433), 

123 temperature=env_float("TEMPERATURE"), 

124 top_p=env_float("TOP_P"), 

125 top_k_sampling=env_int_optional("TOP_K_SAMPLING"), 

126 repeat_penalty=env_float("REPEAT_PENALTY"), 

127 num_ctx=env_int_optional("NUM_CTX"), 

128 seed=env_int_optional("SEED"), 

129 ) 

130 

131 

132def _resolve_data_root() -> Path: 

133 """Determine the data root: LILBEE_DATA env > local .lilbee/ > platform default.""" 

134 data_env = env("DATA", "") 

135 if data_env: 

136 return Path(data_env) 

137 

138 from lilbee.platform import find_local_root 

139 

140 local = find_local_root() 

141 if local is not None: 

142 return local 

143 

144 return default_data_dir() 

145 

146 

147def _load_chat_model(data_root: Path) -> str: 

148 """Resolve chat model: LILBEE_CHAT_MODEL env > persisted setting > default.""" 

149 chat_model = env("CHAT_MODEL", "qwen3:8b") 

150 if "LILBEE_CHAT_MODEL" not in os.environ: 

151 try: 

152 saved = settings.get(data_root, "chat_model") 

153 except (ValueError, OSError): 

154 saved = None 

155 if saved: 

156 chat_model = saved 

157 return chat_model 

158 

159 

160def _load_vision_model(data_root: Path) -> str: 

161 """Resolve vision model: LILBEE_VISION_MODEL env > persisted setting > empty.""" 

162 vision_model_env = os.environ.get("LILBEE_VISION_MODEL", "").strip() 

163 if vision_model_env: 

164 return vision_model_env 

165 try: 

166 return settings.get(data_root, "vision_model") or "" 

167 except (ValueError, OSError): 

168 return "" 

169 

170 

171def _parse_vision_timeout() -> float: 

172 """Parse LILBEE_VISION_TIMEOUT env var, returning default on invalid input.""" 

173 raw = os.environ.get("LILBEE_VISION_TIMEOUT", "").strip() 

174 if not raw: 

175 return 120.0 

176 try: 

177 return float(raw) 

178 except ValueError: 

179 log.warning("Invalid LILBEE_VISION_TIMEOUT=%r, ignoring", raw) 

180 return 120.0 

181 

182 

183cfg = Config.from_env()