Coverage for src/lilbee/providers/mtmd

1"""Vision OCR loader that drives llama.cpp's mtmd pipeline with the GGUF's

2own chat template, so there's no projector-type-to-handler lookup table.

3"""

5from __future__ import annotations

7import logging

8from pathlib import Path

9from typing import Any

11from gguf import GGUFReader

13from lilbee.config import cfg

14from lilbee.providers.llama_cpp_provider import (

15 find_mmproj_for_model,

16 install_llama_log_handler,

17 read_gguf_metadata,

18 suppress_native_stderr,

19)

21log = logging.getLogger(__name__)

24# Image-placeholder tokens seen in GGUF chat templates. The upstream

25# mtmd pipeline substitutes image URLs with mtmd's media marker, so

26# these get rewritten to {{ content.image_url.url }} before rendering.

27# Case matters: GGUF templates are machine-emitted and stable, so a

28# case-insensitive replace would risk corrupting unrelated Jinja

29# identifiers.

30_GGUF_IMAGE_TOKENS: tuple[str, ...] = (

31 "<|image_pad|>",

32 "<image>",

33 "<IMAGE>",

34 "<__media__>",

35 "<__image__>",

36)

37_IMAGE_URL_JINJA = "{{ content.image_url.url }}"

39_TOKENIZER_CHAT_TEMPLATE_KEY = "tokenizer.chat_template"

42def read_chat_template(model_path: Path) -> str | None:

43 """Return the Jinja chat template embedded in a GGUF model, or None."""

44 try:

45 reader = GGUFReader(str(model_path))

46 field = reader.get_field(_TOKENIZER_CHAT_TEMPLATE_KEY)

47 except (OSError, ValueError, IndexError, KeyError):

48 log.debug("Failed to read chat template from %s", model_path, exc_info=True)

49 return None

50 if field is None:

51 return None

52 return bytes(field.parts[field.data[0]]).decode("utf-8", errors="replace")

55def adapt_gguf_template_for_mtmd(template: str) -> str:

56 """Rewrite known image-placeholder tokens to ``{{ content.image_url.url }}``."""

57 for token in _GGUF_IMAGE_TOKENS:

58 if token in template:

59 template = template.replace(token, _IMAGE_URL_JINJA)

60 return template

63def build_vision_chat_handler(model_path: Path, mmproj_path: Path) -> Any:

64 """Return the mtmd chat handler configured with the GGUF's embedded template.

66 ``DEFAULT_SYSTEM_MESSAGE`` is set to ``None`` so no stray system turn

67 is injected. Falls back to the upstream default template when the

68 GGUF has no ``tokenizer.chat_template``.

69 """

70 from llama_cpp.llama_chat_format import Llava15ChatHandler

72 # Defined per call so each loaded model binds its own ``CHAT_FORMAT``

73 # (set below) to a fresh class; hoisting this to module scope would

74 # make the first loaded model's template leak into every subsequent

75 # one.

76 class _GgufTemplateChatHandler(Llava15ChatHandler):

77 DEFAULT_SYSTEM_MESSAGE = None

79 handler_cls: type[Llava15ChatHandler] = _GgufTemplateChatHandler

81 template = read_chat_template(model_path)

82 if template is not None:

83 handler_cls.CHAT_FORMAT = adapt_gguf_template_for_mtmd(template)

84 log.info(

85 "Vision chat handler: using GGUF-embedded template (%d bytes) from %s",

86 len(template),

87 model_path.name,

88 )

89 else:

90 log.info(

91 "Vision chat handler: no GGUF-embedded chat template for %s; using upstream default",

92 model_path.name,

93 )

95 return handler_cls(str(mmproj_path), verbose=False)

98def load_vision_llama(model_path: Path, mmproj_path: Path | None = None) -> Any:

99 """Load a vision-capable ``Llama`` using the GGUF-templated chat handler."""

100 from llama_cpp import Llama # heavy native lib; keep import lazy

101

102 install_llama_log_handler()

103 if mmproj_path is None:

104 mmproj_path = find_mmproj_for_model(model_path)

105

106 chat_handler = build_vision_chat_handler(model_path, mmproj_path)

107

108 kwargs: dict[str, Any] = {

109 "model_path": str(model_path),

110 "chat_handler": chat_handler,

111 "verbose": False,

112 "n_gpu_layers": -1,

113 "n_ctx": _resolve_vision_n_ctx(model_path),

114 }

115

116 llama = suppress_native_stderr(Llama, **kwargs)

117 metadata = getattr(llama, "metadata", {}) or {}

118 n_ctx_fn = getattr(llama, "n_ctx", None)

119 n_ctx = n_ctx_fn() if callable(n_ctx_fn) else "?"

120 log.info(

121 "Vision model loaded: model=%s mmproj=%s n_ctx=%s arch=%s",

122 model_path.name,

123 mmproj_path.name,

124 n_ctx,

125 metadata.get("general.architecture", "?"),

126 )

127 return llama

128

129

130def _resolve_vision_n_ctx(model_path: Path) -> int:

131 """Pick n_ctx for a vision load, clamped to the model's training context."""

132 try:

133 meta = read_gguf_metadata(model_path)

134 except Exception:

135 log.debug("read_gguf_metadata failed for vision %s", model_path, exc_info=True)

136 meta = None

137 train_ctx = int((meta or {}).get("context_length", "0"))

138 if cfg.num_ctx is None:

139 return 0 # 0 -> llama.cpp uses the model's training context

140 if train_ctx <= 0:

141 return cfg.num_ctx

142 return min(cfg.num_ctx, train_ctx)

Coverage for src / lilbee / providers / mtmd_backend.py: 100%

62 statements