Coverage for src / marimushka / orchestrator.py: 100%
96 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-28 17:41 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-28 17:41 +0000
1"""Export orchestration and template rendering.
3This module handles the core export workflow including parallel/sequential export,
4template rendering, and index file generation.
5"""
7from concurrent.futures import ThreadPoolExecutor, as_completed
8from pathlib import Path
10import jinja2
11from jinja2.sandbox import SandboxedEnvironment
12from loguru import logger
13from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID, TaskProgressColumn, TextColumn
15from .audit import AuditLogger, get_audit_logger
16from .exceptions import (
17 BatchExportResult,
18 IndexWriteError,
19 NotebookExportResult,
20 ProgressCallback,
21 TemplateRenderError,
22)
23from .notebook import Notebook
24from .security import (
25 sanitize_error_message,
26 set_secure_file_permissions,
27 validate_max_workers,
28)
31def export_notebook(
32 notebook: Notebook,
33 output_dir: Path,
34 sandbox: bool,
35 bin_path: Path | None,
36 timeout: int = 300,
37) -> NotebookExportResult:
38 """Export a single notebook and return the result.
40 Args:
41 notebook: The notebook to export.
42 output_dir: Output directory for the exported HTML.
43 sandbox: Whether to use sandbox mode.
44 bin_path: Custom path to uvx executable.
45 timeout: Maximum time in seconds for the export process. Defaults to 300.
47 Returns:
48 NotebookExportResult with success status and details.
50 """
51 return notebook.export(output_dir=output_dir, sandbox=sandbox, bin_path=bin_path, timeout=timeout)
54def export_notebooks_parallel(
55 notebooks: list[Notebook],
56 output_dir: Path,
57 sandbox: bool,
58 bin_path: Path | None,
59 max_workers: int = 4,
60 progress: Progress | None = None,
61 task_id: TaskID | None = None,
62 timeout: int = 300,
63 on_progress: ProgressCallback | None = None,
64) -> BatchExportResult:
65 """Export notebooks in parallel using a thread pool.
67 Args:
68 notebooks: List of notebooks to export.
69 output_dir: Output directory for exported HTML files.
70 sandbox: Whether to use sandbox mode.
71 bin_path: Custom path to uvx executable.
72 max_workers: Maximum number of parallel workers. Defaults to 4.
73 progress: Optional Rich Progress instance for progress tracking.
74 task_id: Optional task ID for progress updates.
75 timeout: Maximum time in seconds for each export. Defaults to 300.
76 on_progress: Optional callback called after each notebook export.
77 Signature: on_progress(completed, total, notebook_name)
79 Returns:
80 BatchExportResult containing individual results and summary statistics.
82 """
83 # Validate and bound max_workers for security
84 max_workers = validate_max_workers(max_workers)
86 batch_result = BatchExportResult()
88 if not notebooks:
89 return batch_result
91 total_notebooks = len(notebooks)
92 completed_count = 0
94 with ThreadPoolExecutor(max_workers=max_workers) as executor:
95 futures = {executor.submit(export_notebook, nb, output_dir, sandbox, bin_path, timeout): nb for nb in notebooks}
97 for future in as_completed(futures):
98 result = future.result()
99 batch_result.add(result)
100 completed_count += 1
102 if not result.success:
103 error_msg = sanitize_error_message(str(result.error)) if result.error else "Unknown error"
104 logger.error(f"Failed to export {result.notebook_path.name}: {error_msg}")
106 # Call user callback if provided
107 if on_progress:
108 on_progress(completed_count, total_notebooks, result.notebook_path.name)
110 if progress and task_id is not None:
111 progress.advance(task_id)
113 return batch_result
116def export_notebooks_sequential(
117 notebooks: list[Notebook],
118 output_dir: Path,
119 sandbox: bool,
120 bin_path: Path | None,
121 progress: Progress | None = None,
122 task_id: TaskID | None = None,
123 timeout: int = 300,
124 on_progress: ProgressCallback | None = None,
125) -> BatchExportResult:
126 """Export notebooks sequentially.
128 Args:
129 notebooks: List of notebooks to export.
130 output_dir: Output directory for exported HTML files.
131 sandbox: Whether to use sandbox mode.
132 bin_path: Custom path to uvx executable.
133 progress: Optional Rich Progress instance for progress tracking.
134 task_id: Optional task ID for progress updates.
135 timeout: Maximum time in seconds for each export. Defaults to 300.
136 on_progress: Optional callback called after each notebook export.
137 Signature: on_progress(completed, total, notebook_name)
139 Returns:
140 BatchExportResult containing individual results and summary statistics.
142 """
143 batch_result = BatchExportResult()
144 total_notebooks = len(notebooks)
146 for idx, nb in enumerate(notebooks, 1):
147 result = nb.export(output_dir=output_dir, sandbox=sandbox, bin_path=bin_path, timeout=timeout)
148 batch_result.add(result)
150 # Call user callback if provided
151 if on_progress:
152 on_progress(idx, total_notebooks, nb.path.name)
154 if progress and task_id is not None:
155 progress.advance(task_id)
157 return batch_result
160def export_all_notebooks(
161 output: Path,
162 notebooks: list[Notebook],
163 apps: list[Notebook],
164 notebooks_wasm: list[Notebook],
165 sandbox: bool,
166 bin_path: Path | None,
167 parallel: bool,
168 max_workers: int,
169 timeout: int = 300,
170 on_progress: ProgressCallback | None = None,
171) -> BatchExportResult:
172 """Export all notebooks with progress tracking.
174 Args:
175 output: Base output directory.
176 notebooks: List of notebooks for static HTML export.
177 apps: List of notebooks for app export.
178 notebooks_wasm: List of notebooks for interactive WebAssembly export.
179 sandbox: Whether to use sandbox mode.
180 bin_path: Custom path to uvx executable.
181 parallel: Whether to export notebooks in parallel.
182 max_workers: Maximum number of parallel workers.
183 timeout: Maximum time in seconds for each export. Defaults to 300.
184 on_progress: Optional callback called after each notebook export.
185 Signature: on_progress(completed, total, notebook_name)
187 Returns:
188 BatchExportResult containing all export results.
190 """
191 total_notebooks = len(notebooks) + len(apps) + len(notebooks_wasm)
192 combined_batch_result = BatchExportResult()
194 if total_notebooks == 0:
195 return combined_batch_result
197 # Define notebook categories and their output directories
198 notebook_categories = [
199 (notebooks, output / "notebooks"),
200 (apps, output / "apps"),
201 (notebooks_wasm, output / "notebooks_wasm"),
202 ]
204 with Progress(
205 SpinnerColumn(),
206 TextColumn("[progress.description]{task.description}"),
207 BarColumn(),
208 TaskProgressColumn(),
209 TextColumn("[cyan]{task.completed}/{task.total}"),
210 ) as progress:
211 task = progress.add_task("[green]Exporting notebooks...", total=total_notebooks)
213 for nb_list, out_dir in notebook_categories:
214 if not nb_list:
215 continue
217 if parallel:
218 batch_result = export_notebooks_parallel(
219 nb_list, out_dir, sandbox, bin_path, max_workers, progress, task, timeout, on_progress
220 )
221 else:
222 batch_result = export_notebooks_sequential(
223 nb_list, out_dir, sandbox, bin_path, progress, task, timeout, on_progress
224 )
226 for result in batch_result.results:
227 combined_batch_result.add(result)
229 if combined_batch_result.failed > 0: # pragma: no cover
230 logger.warning(
231 f"Export completed: {combined_batch_result.succeeded} succeeded, {combined_batch_result.failed} failed"
232 )
233 for failure in combined_batch_result.failures:
234 error_detail = sanitize_error_message(str(failure.error)) if failure.error else "Unknown error"
235 logger.debug(f" - {failure.notebook_path.name}: {error_detail}")
237 return combined_batch_result
240def render_template(
241 template_file: Path,
242 notebooks: list[Notebook],
243 apps: list[Notebook],
244 notebooks_wasm: list[Notebook],
245 audit_logger: AuditLogger,
246) -> str:
247 """Render the index template with notebook data.
249 Args:
250 template_file: Path to the Jinja2 template file.
251 notebooks: List of notebooks for static HTML export.
252 apps: List of notebooks for app export.
253 notebooks_wasm: List of notebooks for interactive WebAssembly export.
254 audit_logger: Logger for audit events.
256 Returns:
257 The rendered HTML content as a string.
259 Raises:
260 TemplateRenderError: If the template fails to render.
262 """
263 template_dir = template_file.parent
264 template_name = template_file.name
266 try:
267 # Use SandboxedEnvironment for security
268 env = SandboxedEnvironment(
269 loader=jinja2.FileSystemLoader(template_dir), autoescape=jinja2.select_autoescape(["html", "xml"])
270 )
271 template = env.get_template(template_name)
273 rendered = template.render(
274 notebooks=notebooks,
275 apps=apps,
276 notebooks_wasm=notebooks_wasm,
277 )
278 audit_logger.log_template_render(template_file, True)
279 except jinja2.exceptions.TemplateError as e:
280 sanitized_error = sanitize_error_message(str(e))
281 audit_logger.log_template_render(template_file, False, sanitized_error)
282 raise TemplateRenderError(template_file, e) from e
283 else:
284 return rendered
287def write_index_file(index_path: Path, content: str, audit_logger: AuditLogger) -> None:
288 """Write the rendered HTML content to the index file.
290 Args:
291 index_path: Path where the index.html file will be written.
292 content: The rendered HTML content to write.
293 audit_logger: Logger for audit events.
295 Raises:
296 IndexWriteError: If the file cannot be written.
298 """
299 try:
300 # Write file with secure content
301 with Path.open(index_path, "w") as f:
302 f.write(content)
304 # Set secure file permissions
305 set_secure_file_permissions(index_path, mode=0o644)
307 logger.info(f"Successfully generated index file at {index_path}")
308 audit_logger.log_file_access(index_path, "write", True)
309 except OSError as e:
310 sanitized_error = sanitize_error_message(str(e))
311 audit_logger.log_file_access(index_path, "write", False, sanitized_error)
312 raise IndexWriteError(index_path, e) from e
315def generate_index(
316 output: Path,
317 template_file: Path,
318 notebooks: list[Notebook] | None = None,
319 apps: list[Notebook] | None = None,
320 notebooks_wasm: list[Notebook] | None = None,
321 sandbox: bool = True,
322 bin_path: Path | None = None,
323 parallel: bool = True,
324 max_workers: int = 4,
325 timeout: int = 300,
326 on_progress: ProgressCallback | None = None,
327 audit_logger: AuditLogger | None = None,
328) -> str:
329 """Generate an index.html file that lists all the notebooks.
331 This function creates an HTML index page that displays links to all the exported
332 notebooks. The index page includes the marimo logo and displays each notebook
333 with a formatted title and a link to open it.
335 Args:
336 output: Directory where the index.html file will be saved.
337 template_file: Path to the Jinja2 template file.
338 notebooks: List of notebooks for static HTML export.
339 apps: List of notebooks for app export.
340 notebooks_wasm: List of notebooks for interactive WebAssembly export.
341 sandbox: Whether to run the notebook in a sandbox. Defaults to True.
342 bin_path: The directory where the executable is located. Defaults to None.
343 parallel: Whether to export notebooks in parallel. Defaults to True.
344 max_workers: Maximum number of parallel workers. Defaults to 4.
345 timeout: Maximum time in seconds for each export. Defaults to 300.
346 on_progress: Optional callback called after each notebook export.
347 Signature: on_progress(completed, total, notebook_name).
348 audit_logger: Logger for audit events. If None, creates a default logger.
350 Returns:
351 The rendered HTML content as a string.
353 Raises:
354 TemplateRenderError: If the template fails to render.
355 IndexWriteError: If the index file cannot be written.
357 """
358 if audit_logger is None:
359 audit_logger = get_audit_logger()
361 notebooks = notebooks or []
362 apps = apps or []
363 notebooks_wasm = notebooks_wasm or []
365 # Export all notebooks with progress tracking
366 export_all_notebooks(
367 output=output,
368 notebooks=notebooks,
369 apps=apps,
370 notebooks_wasm=notebooks_wasm,
371 sandbox=sandbox,
372 bin_path=bin_path,
373 parallel=parallel,
374 max_workers=max_workers,
375 timeout=timeout,
376 on_progress=on_progress,
377 )
379 # Ensure the output directory exists
380 output.mkdir(parents=True, exist_ok=True)
382 # Render template and write index file
383 rendered_html = render_template(template_file, notebooks, apps, notebooks_wasm, audit_logger)
384 write_index_file(output / "index.html", rendered_html, audit_logger)
386 return rendered_html