Coverage for src / lilbee / __init__.py: 100%

39 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-04-29 19:16 +0000

1"""lilbee — Local RAG knowledge base.""" 

2 

3from __future__ import annotations 

4 

5import os 

6import threading 

7 

8# Disable huggingface_hub's xet transfer layer before any HF submodule loads. 

9# huggingface_hub.constants reads HF_HUB_DISABLE_XET at import time, so this 

10# must run before the first `import huggingface_hub` anywhere in the process. 

11# Workaround for HF issue #4058: xet-core reports progress in 3-4 coarse jumps 

12# instead of continuously, making download bars appear stuck on large files. 

13# Forcing the HTTP path restores smooth per-chunk tqdm updates. Users can still 

14# opt back into xet by setting HF_HUB_DISABLE_XET=0 in their environment. 

15os.environ.setdefault("HF_HUB_DISABLE_XET", "1") 

16 

17# Suppress HF-default tqdm bars (metadata probes, snapshot summaries) that 

18# leak cursor escapes into the TUI. Our custom tqdm_class is NOT a subclass 

19# of huggingface_hub.utils.tqdm, so huggingface_hub's `_create_progress_bar` 

20# instantiates it directly without honoring this flag. Download callbacks 

21# continue to fire. See lilbee/catalog.py::_CallbackProgressBar. 

22os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1") 

23 

24 

25def _install_thread_only_tqdm_lock() -> None: 

26 """Pin ``tqdm.std.tqdm._lock`` to a threading RLock. 

27 

28 Bypasses tqdm's lazy multiprocessing-lock init, which tries to 

29 fork_exec the MP resource tracker with ``sys.stderr.fileno() == -1`` 

30 under Textual and crashes with ``bad value(s) in fds_to_keep``. 

31 Matches huggingface_hub PR #4065 but applied at the base class so 

32 every tqdm instance in the process inherits the lock via MRO. 

33 """ 

34 try: 

35 from tqdm.std import tqdm as _tqdm_base 

36 except ImportError: 

37 return 

38 if getattr(_tqdm_base, "_lock", None) is None: 

39 _tqdm_base._lock = threading.RLock() 

40 

41 

42def _prestart_mp_resource_tracker() -> None: 

43 """Start the multiprocessing resource tracker before Textual swaps stderr. 

44 

45 Later ``Process.start()`` calls reuse the cached tracker fd and 

46 never hit the fork_exec with a bad fd. No-op on Windows, which 

47 doesn't use ``_posixsubprocess``, and no-op under PyInstaller frozen 

48 bundles, where ``sys.executable`` is the lilbee exe itself and the 

49 tracker's spawn would re-enter typer with ``-B -s -E`` as CLI args. 

50 """ 

51 import sys as _sys 

52 

53 if _sys.platform == "win32": 

54 return 

55 if getattr(_sys, "frozen", False): 

56 return 

57 try: 

58 from multiprocessing import resource_tracker 

59 

60 resource_tracker.ensure_running() 

61 except (OSError, RuntimeError, ValueError, ImportError): 

62 # Best-effort: if the tracker already crashed or cannot be started 

63 # in the current env, leave the state alone. The worker's own 

64 # spawn will surface a real error at call time. 

65 pass 

66 

67 

68_install_thread_only_tqdm_lock() 

69_prestart_mp_resource_tracker() 

70 

71 

72def _shrink_hf_download_chunk_size() -> None: 

73 """Shrink huggingface_hub's 10MB download chunk to 200KB. 

74 

75 Default DOWNLOAD_CHUNK_SIZE=10MB means the tqdm callback only fires 

76 every ~7 seconds on a 1.5MB/s connection, making downloads look stuck 

77 between jumps. 200KB chunks drive the callback several times per 

78 second at typical home-internet rates, so the UI renders smooth 

79 real-time progress. Monkey-patched here because HF exposes no env 

80 override. Runtime cost: tqdm call overhead is negligible (~µs) and 

81 HTTP iter_bytes accumulates into chunks of this size, so smaller 

82 chunks do not produce more network round-trips. 

83 """ 

84 try: 

85 from huggingface_hub import constants as _hf_constants 

86 

87 _hf_constants.DOWNLOAD_CHUNK_SIZE = 200 * 1024 

88 except ImportError: 

89 pass 

90 

91 

92_shrink_hf_download_chunk_size() 

93 

94from typing import TYPE_CHECKING # noqa: E402 — must follow HF environment / constants setup above 

95 

96if TYPE_CHECKING: 

97 from lilbee.api import Lilbee 

98 

99__all__ = ["Lilbee"] 

100 

101 

102def __getattr__(name: str) -> object: 

103 if name == "Lilbee": 

104 from lilbee.api import Lilbee 

105 

106 return Lilbee 

107 raise AttributeError(f"module {__name__!r} has no attribute {name!r}")