Coverage for src / lilbee / wiki / entity_extractor / base.py: 100%
18 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-04-29 19:16 +0000
1"""Protocol and record types for entity/concept extractors."""
3from __future__ import annotations
5from dataclasses import dataclass
6from enum import StrEnum
7from typing import TYPE_CHECKING, Protocol, runtime_checkable
9if TYPE_CHECKING:
10 from lilbee.store import SearchChunk
13class EntityKind(StrEnum):
14 """Whether an ``ExtractedEntity`` is a concept or a proper-noun entity."""
16 CONCEPT = "concept"
17 ENTITY = "entity"
20@dataclass(frozen=True)
21class ChunkRef:
22 """Stable identifier for a chunk inside the store."""
24 source: str
25 chunk_index: int
28@dataclass(frozen=True)
29class ExtractedEntity:
30 """One concept or entity discovered in the corpus.
32 All fields are populated by the extractor regardless of strategy, so
33 downstream page generation, [[link]] rewriting, and index building
34 never branch on which extractor ran.
35 """
37 slug: str
38 kind: EntityKind
39 label: str
40 type_hint: str
41 chunk_refs: tuple[ChunkRef, ...]
44@runtime_checkable
45class EntityExtractor(Protocol):
46 """Strategy that turns a chunk corpus into ``ExtractedEntity`` records."""
48 def extract(self, chunks: list[SearchChunk]) -> list[ExtractedEntity]:
49 """Return the deduplicated set of concepts and entities in *chunks*."""
50 ...