Coverage for src/lilbee/wiki/browse.py: 100%

1"""Wiki browse — shared page listing, reading, and resolution logic."""

3from __future__ import annotations

5from dataclasses import dataclass, field

6from datetime import date, datetime

7from pathlib import Path

8from typing import Any

10from lilbee.security import validate_path_within

11from lilbee.wiki.grammar import CODE_FENCE_RE, H1_RE

12from lilbee.wiki.index import parse_source_count

13from lilbee.wiki.shared import (

14 DRAFTS_SUBDIR,

15 SUBDIR_TO_TYPE,

16 WIKI_CONTENT_SUBDIRS,

17 parse_frontmatter,

18)

21@dataclass

22class WikiPageInfo:

23 """Summary metadata for a wiki page."""

25 slug: str

26 title: str

27 page_type: str

28 source_count: int

29 created_at: str

31 def to_dict(self) -> dict[str, Any]:

32 """Serialize to a plain dict suitable for JSON responses."""

33 return {

34 "slug": self.slug,

35 "title": self.title,

36 "page_type": self.page_type,

37 "source_count": self.source_count,

38 "created_at": self.created_at,

39 }

42@dataclass

43class WikiPageContent:

44 """Full content of a wiki page with parsed frontmatter."""

46 slug: str

47 title: str

48 content: str

49 frontmatter: dict[str, Any] = field(default_factory=dict)

52def list_md_files(directory: Path) -> list[Path]:

53 """Return sorted markdown files in a directory (non-recursive)."""

54 if not directory.is_dir():

55 return []

56 return sorted(directory.glob("*.md"))

59def _page_type_from_path(path: Path, wiki_root: Path) -> str:

60 """Determine page type from its location relative to wiki root."""

61 try:

62 relative = path.relative_to(wiki_root)

63 except ValueError:

64 return "unknown"

65 parts = relative.parts

66 if len(parts) >= 2:

67 return SUBDIR_TO_TYPE.get(parts[0], "unknown")

68 return "unknown"

71def _slug_from_path(path: Path, wiki_root: Path) -> str:

72 """Build a URL slug from a wiki page path."""

73 relative = path.relative_to(wiki_root)

74 return str(relative.with_suffix("")).replace("\\", "/")

77def _extract_h1_title(text: str) -> str | None:

78 """Return the first top-level heading from markdown body, ignoring fenced code blocks."""

79 in_fence = False

80 for line in text.splitlines():

81 if CODE_FENCE_RE.match(line):

82 in_fence = not in_fence

83 continue

84 if in_fence:

85 continue

86 if m := H1_RE.match(line):

87 return m.group(1).strip()

88 return None

91def _resolve_page_title(fm: dict[str, Any], text: str, path: Path) -> str:

92 """Pick a page title. Frontmatter wins; body H1 beats slug-title-case fallback.

94 Wiki generation does not emit a frontmatter title today, so without the H1

95 step every page would render as the slug (e.g. 'Cv Manual' for cv-manual.md).

96 """

97 if (fm_title := fm.get("title")) is not None:

98 return str(fm_title)

99 if (h1 := _extract_h1_title(text)) is not None:

100 return h1

101 return path.stem.replace("-", " ").title()

102

103

104def build_page_info(path: Path, wiki_root: Path) -> WikiPageInfo:

105 """Build a WikiPageInfo from a markdown file on disk."""

106 text = path.read_text(encoding="utf-8")

107 fm = parse_frontmatter(text)

108 slug = _slug_from_path(path, wiki_root)

109 title = _resolve_page_title(fm, text, path)

110 page_type = _page_type_from_path(path, wiki_root)

111 source_count = parse_source_count(text)

112 raw_at = fm.get("generated_at", "")

113 # yaml.safe_load returns datetime/date objects for date-like strings

114 created_at = raw_at.isoformat() if isinstance(raw_at, (datetime, date)) else str(raw_at)

115 return WikiPageInfo(

116 slug=slug,

117 title=title,

118 page_type=page_type,

119 source_count=source_count,

120 created_at=created_at,

121 )

122

123

124def find_page(wiki_root: Path, slug: str) -> Path | None:

125 """Resolve a slug to a wiki page path, or None if not found.

126 Validates the resolved path stays within wiki_root to prevent

127 path traversal attacks.

128 """

129 candidate = wiki_root / f"{slug}.md"

130 try:

131 validate_path_within(candidate, wiki_root)

132 except ValueError:

133 return None

134 return candidate if candidate.is_file() else None

135

136

137def _list_md_files_recursive(directory: Path) -> list[Path]:

138 """Sorted markdown files under *directory* at any depth."""

139 if not directory.is_dir():

140 return []

141 return sorted(directory.rglob("*.md"))

142

143

144def list_pages(wiki_root: Path) -> list[WikiPageInfo]:

145 """List all wiki pages under summaries/ and synthesis/ at any nesting depth."""

146 pages: list[WikiPageInfo] = []

147 for subdir in WIKI_CONTENT_SUBDIRS:

148 for path in _list_md_files_recursive(wiki_root / subdir):

149 pages.append(build_page_info(path, wiki_root))

150 return pages

151

152

153def list_draft_pages(wiki_root: Path) -> list[WikiPageInfo]:

154 """List draft pages that failed the quality gate (recurses into per-source dirs)."""

155 return [

156 build_page_info(path, wiki_root)

157 for path in _list_md_files_recursive(wiki_root / DRAFTS_SUBDIR)

158 ]

159

160

161def read_page(wiki_root: Path, slug: str) -> WikiPageContent | None:

162 """Read a wiki page's content and parsed frontmatter.

163 Returns None if the page does not exist or the slug escapes wiki_root.

164 """

165 path = find_page(wiki_root, slug)

166 if path is None:

167 return None

168 text = path.read_text(encoding="utf-8")

169 fm = parse_frontmatter(text)

170 title = _resolve_page_title(fm, text, path)

171 return WikiPageContent(slug=slug, title=title, content=text, frontmatter=fm)

Coverage for src / lilbee / wiki / browse.py: 100%

94 statements