Coverage for src / lilbee / wiki / browse.py: 100%

94 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1"""Wiki browse — shared page listing, reading, and resolution logic.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass, field 

6from datetime import date, datetime 

7from pathlib import Path 

8from typing import Any 

9 

10from lilbee.security import validate_path_within 

11from lilbee.wiki.grammar import CODE_FENCE_RE, H1_RE 

12from lilbee.wiki.index import parse_source_count 

13from lilbee.wiki.shared import ( 

14 DRAFTS_SUBDIR, 

15 SUBDIR_TO_TYPE, 

16 WIKI_CONTENT_SUBDIRS, 

17 parse_frontmatter, 

18) 

19 

20 

21@dataclass 

22class WikiPageInfo: 

23 """Summary metadata for a wiki page.""" 

24 

25 slug: str 

26 title: str 

27 page_type: str 

28 source_count: int 

29 created_at: str 

30 

31 def to_dict(self) -> dict[str, Any]: 

32 """Serialize to a plain dict suitable for JSON responses.""" 

33 return { 

34 "slug": self.slug, 

35 "title": self.title, 

36 "page_type": self.page_type, 

37 "source_count": self.source_count, 

38 "created_at": self.created_at, 

39 } 

40 

41 

42@dataclass 

43class WikiPageContent: 

44 """Full content of a wiki page with parsed frontmatter.""" 

45 

46 slug: str 

47 title: str 

48 content: str 

49 frontmatter: dict[str, Any] = field(default_factory=dict) 

50 

51 

52def list_md_files(directory: Path) -> list[Path]: 

53 """Return sorted markdown files in a directory (non-recursive).""" 

54 if not directory.is_dir(): 

55 return [] 

56 return sorted(directory.glob("*.md")) 

57 

58 

59def _page_type_from_path(path: Path, wiki_root: Path) -> str: 

60 """Determine page type from its location relative to wiki root.""" 

61 try: 

62 relative = path.relative_to(wiki_root) 

63 except ValueError: 

64 return "unknown" 

65 parts = relative.parts 

66 if len(parts) >= 2: 

67 return SUBDIR_TO_TYPE.get(parts[0], "unknown") 

68 return "unknown" 

69 

70 

71def _slug_from_path(path: Path, wiki_root: Path) -> str: 

72 """Build a URL slug from a wiki page path.""" 

73 relative = path.relative_to(wiki_root) 

74 return str(relative.with_suffix("")).replace("\\", "/") 

75 

76 

77def _extract_h1_title(text: str) -> str | None: 

78 """Return the first top-level heading from markdown body, ignoring fenced code blocks.""" 

79 in_fence = False 

80 for line in text.splitlines(): 

81 if CODE_FENCE_RE.match(line): 

82 in_fence = not in_fence 

83 continue 

84 if in_fence: 

85 continue 

86 if m := H1_RE.match(line): 

87 return m.group(1).strip() 

88 return None 

89 

90 

91def _resolve_page_title(fm: dict[str, Any], text: str, path: Path) -> str: 

92 """Pick a page title. Frontmatter wins; body H1 beats slug-title-case fallback. 

93 

94 Wiki generation does not emit a frontmatter title today, so without the H1 

95 step every page would render as the slug (e.g. 'Cv Manual' for cv-manual.md). 

96 """ 

97 if (fm_title := fm.get("title")) is not None: 

98 return str(fm_title) 

99 if (h1 := _extract_h1_title(text)) is not None: 

100 return h1 

101 return path.stem.replace("-", " ").title() 

102 

103 

104def build_page_info(path: Path, wiki_root: Path) -> WikiPageInfo: 

105 """Build a WikiPageInfo from a markdown file on disk.""" 

106 text = path.read_text(encoding="utf-8") 

107 fm = parse_frontmatter(text) 

108 slug = _slug_from_path(path, wiki_root) 

109 title = _resolve_page_title(fm, text, path) 

110 page_type = _page_type_from_path(path, wiki_root) 

111 source_count = parse_source_count(text) 

112 raw_at = fm.get("generated_at", "") 

113 # yaml.safe_load returns datetime/date objects for date-like strings 

114 created_at = raw_at.isoformat() if isinstance(raw_at, (datetime, date)) else str(raw_at) 

115 return WikiPageInfo( 

116 slug=slug, 

117 title=title, 

118 page_type=page_type, 

119 source_count=source_count, 

120 created_at=created_at, 

121 ) 

122 

123 

124def find_page(wiki_root: Path, slug: str) -> Path | None: 

125 """Resolve a slug to a wiki page path, or None if not found. 

126 Validates the resolved path stays within wiki_root to prevent 

127 path traversal attacks. 

128 """ 

129 candidate = wiki_root / f"{slug}.md" 

130 try: 

131 validate_path_within(candidate, wiki_root) 

132 except ValueError: 

133 return None 

134 return candidate if candidate.is_file() else None 

135 

136 

137def _list_md_files_recursive(directory: Path) -> list[Path]: 

138 """Sorted markdown files under *directory* at any depth.""" 

139 if not directory.is_dir(): 

140 return [] 

141 return sorted(directory.rglob("*.md")) 

142 

143 

144def list_pages(wiki_root: Path) -> list[WikiPageInfo]: 

145 """List all wiki pages under summaries/ and synthesis/ at any nesting depth.""" 

146 pages: list[WikiPageInfo] = [] 

147 for subdir in WIKI_CONTENT_SUBDIRS: 

148 for path in _list_md_files_recursive(wiki_root / subdir): 

149 pages.append(build_page_info(path, wiki_root)) 

150 return pages 

151 

152 

153def list_draft_pages(wiki_root: Path) -> list[WikiPageInfo]: 

154 """List draft pages that failed the quality gate (recurses into per-source dirs).""" 

155 return [ 

156 build_page_info(path, wiki_root) 

157 for path in _list_md_files_recursive(wiki_root / DRAFTS_SUBDIR) 

158 ] 

159 

160 

161def read_page(wiki_root: Path, slug: str) -> WikiPageContent | None: 

162 """Read a wiki page's content and parsed frontmatter. 

163 Returns None if the page does not exist or the slug escapes wiki_root. 

164 """ 

165 path = find_page(wiki_root, slug) 

166 if path is None: 

167 return None 

168 text = path.read_text(encoding="utf-8") 

169 fm = parse_frontmatter(text) 

170 title = _resolve_page_title(fm, text, path) 

171 return WikiPageContent(slug=slug, title=title, content=text, frontmatter=fm)