Coverage for src / lilbee / clustering.py: 100%
33 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1"""Source clustering abstraction for wiki synthesis pages.
3Defines the :class:`SourceClusterer` protocol, the :class:`ClustererBackend`
4enum of known backend identifiers, and the :class:`Clusterer` facade. The
5facade is the single class the services container constructs and it picks
6the right backend from ``config.wiki_clusterer`` so callers never need to
7know which implementation they got.
8"""
10from __future__ import annotations
12import logging
13from dataclasses import dataclass
14from typing import TYPE_CHECKING, Protocol, runtime_checkable
16from lilbee.config import ClustererBackend
18if TYPE_CHECKING:
19 from lilbee.config import Config
20 from lilbee.store import Store
22log = logging.getLogger(__name__)
25@dataclass(frozen=True)
26class SourceCluster:
27 """A group of related documents identified by a clustering strategy."""
29 cluster_id: str
30 """Opaque stable identifier, used for filesystem slugs."""
32 label: str
33 """Human-readable topic label for the cluster."""
35 sources: frozenset[str]
36 """Set of source document filenames in the cluster."""
39@runtime_checkable
40class SourceClusterer(Protocol):
41 """Finds clusters of related source documents for cross-source synthesis."""
43 def available(self) -> bool:
44 """Return True if this clusterer can produce clusters in the current env."""
45 ...
47 def get_clusters(self, min_sources: int = 3) -> list[SourceCluster]:
48 """Return clusters spanning at least ``min_sources`` distinct documents."""
49 ...
52def _select_backend(config: Config, store: Store) -> SourceClusterer:
53 """Pick a backend based on ``config.wiki_clusterer`` with safe fallback.
55 Concrete backends are imported inside the function to break a hard
56 circular dependency: ``clustering_embedding`` re-exports
57 :class:`SourceCluster` from this module, so importing it at module
58 level here would fail during package initialization.
59 """
60 from lilbee.clustering_embedding import EmbeddingClusterer
61 from lilbee.concepts import ConceptGraphClusterer
63 if config.wiki_clusterer == ClustererBackend.CONCEPTS:
64 graph_clusterer = ConceptGraphClusterer(config, store)
65 if graph_clusterer.available():
66 return graph_clusterer
67 log.warning(
68 "wiki_clusterer=concepts but the [graph] extra is not installed or "
69 "the concept graph has not been built. Falling back to the "
70 "embedding clusterer."
71 )
72 return EmbeddingClusterer(config, store)
75class Clusterer:
76 """Wiki synthesis clusterer facade with backend selection."""
78 def __init__(self, config: Config, store: Store) -> None:
79 self._backend: SourceClusterer = _select_backend(config, store)
81 @property
82 def backend(self) -> SourceClusterer:
83 """Return the underlying backend (useful for tests and introspection)."""
84 return self._backend
86 def available(self) -> bool:
87 return self._backend.available()
89 def get_clusters(self, min_sources: int = 3) -> list[SourceCluster]:
90 return self._backend.get_clusters(min_sources=min_sources)