Coverage for src/marimushka/orchestrator.py: 100%

1"""Export orchestration and template rendering.

3This module handles the core export workflow including parallel/sequential export,

4template rendering, and index file generation.

5"""

7from concurrent.futures import ThreadPoolExecutor, as_completed

8from pathlib import Path

10import jinja2

11from jinja2.sandbox import SandboxedEnvironment

12from loguru import logger

13from rich.progress import BarColumn, Progress, SpinnerColumn, TaskID, TaskProgressColumn, TextColumn

15from .audit import AuditLogger, get_audit_logger

16from .exceptions import (

17 BatchExportResult,

18 IndexWriteError,

19 NotebookExportResult,

20 ProgressCallback,

21 TemplateRenderError,

22)

23from .notebook import Notebook

24from .security import (

25 sanitize_error_message,

26 set_secure_file_permissions,

27 validate_max_workers,

28)

31def export_notebook(

32 notebook: Notebook,

33 output_dir: Path,

34 sandbox: bool,

35 bin_path: Path | None,

36 timeout: int = 300,

37) -> NotebookExportResult:

38 """Export a single notebook and return the result.

40 Args:

41 notebook: The notebook to export.

42 output_dir: Output directory for the exported HTML.

43 sandbox: Whether to use sandbox mode.

44 bin_path: Custom path to uvx executable.

45 timeout: Maximum time in seconds for the export process. Defaults to 300.

47 Returns:

48 NotebookExportResult with success status and details.

50 """

51 return notebook.export(output_dir=output_dir, sandbox=sandbox, bin_path=bin_path, timeout=timeout)

54def export_notebooks_parallel(

55 notebooks: list[Notebook],

56 output_dir: Path,

57 sandbox: bool,

58 bin_path: Path | None,

59 max_workers: int = 4,

60 progress: Progress | None = None,

61 task_id: TaskID | None = None,

62 timeout: int = 300,

63 on_progress: ProgressCallback | None = None,

64) -> BatchExportResult:

65 """Export notebooks in parallel using a thread pool.

67 Args:

68 notebooks: List of notebooks to export.

69 output_dir: Output directory for exported HTML files.

70 sandbox: Whether to use sandbox mode.

71 bin_path: Custom path to uvx executable.

72 max_workers: Maximum number of parallel workers. Defaults to 4.

73 progress: Optional Rich Progress instance for progress tracking.

74 task_id: Optional task ID for progress updates.

75 timeout: Maximum time in seconds for each export. Defaults to 300.

76 on_progress: Optional callback called after each notebook export.

77 Signature: on_progress(completed, total, notebook_name)

79 Returns:

80 BatchExportResult containing individual results and summary statistics.

82 """

83 # Validate and bound max_workers for security

84 max_workers = validate_max_workers(max_workers)

86 batch_result = BatchExportResult()

88 if not notebooks:

89 return batch_result

91 total_notebooks = len(notebooks)

92 completed_count = 0

94 with ThreadPoolExecutor(max_workers=max_workers) as executor:

95 futures = {executor.submit(export_notebook, nb, output_dir, sandbox, bin_path, timeout): nb for nb in notebooks}

97 for future in as_completed(futures):

98 result = future.result()

99 batch_result.add(result)

100 completed_count += 1

101

102 if not result.success:

103 error_msg = sanitize_error_message(str(result.error)) if result.error else "Unknown error"

104 logger.error(f"Failed to export {result.notebook_path.name}: {error_msg}")

105

106 # Call user callback if provided

107 if on_progress:

108 on_progress(completed_count, total_notebooks, result.notebook_path.name)

109

110 if progress and task_id is not None:

111 progress.advance(task_id)

112

113 return batch_result

114

115

116def export_notebooks_sequential(

117 notebooks: list[Notebook],

118 output_dir: Path,

119 sandbox: bool,

120 bin_path: Path | None,

121 progress: Progress | None = None,

122 task_id: TaskID | None = None,

123 timeout: int = 300,

124 on_progress: ProgressCallback | None = None,

125) -> BatchExportResult:

126 """Export notebooks sequentially.

127

128 Args:

129 notebooks: List of notebooks to export.

130 output_dir: Output directory for exported HTML files.

131 sandbox: Whether to use sandbox mode.

132 bin_path: Custom path to uvx executable.

133 progress: Optional Rich Progress instance for progress tracking.

134 task_id: Optional task ID for progress updates.

135 timeout: Maximum time in seconds for each export. Defaults to 300.

136 on_progress: Optional callback called after each notebook export.

137 Signature: on_progress(completed, total, notebook_name)

138

139 Returns:

140 BatchExportResult containing individual results and summary statistics.

141

142 """

143 batch_result = BatchExportResult()

144 total_notebooks = len(notebooks)

145

146 for idx, nb in enumerate(notebooks, 1):

147 result = nb.export(output_dir=output_dir, sandbox=sandbox, bin_path=bin_path, timeout=timeout)

148 batch_result.add(result)

149

150 # Call user callback if provided

151 if on_progress:

152 on_progress(idx, total_notebooks, nb.path.name)

153

154 if progress and task_id is not None:

155 progress.advance(task_id)

156

157 return batch_result

158

159

160def export_all_notebooks(

161 output: Path,

162 notebooks: list[Notebook],

163 apps: list[Notebook],

164 notebooks_wasm: list[Notebook],

165 sandbox: bool,

166 bin_path: Path | None,

167 parallel: bool,

168 max_workers: int,

169 timeout: int = 300,

170 on_progress: ProgressCallback | None = None,

171) -> BatchExportResult:

172 """Export all notebooks with progress tracking.

173

174 Args:

175 output: Base output directory.

176 notebooks: List of notebooks for static HTML export.

177 apps: List of notebooks for app export.

178 notebooks_wasm: List of notebooks for interactive WebAssembly export.

179 sandbox: Whether to use sandbox mode.

180 bin_path: Custom path to uvx executable.

181 parallel: Whether to export notebooks in parallel.

182 max_workers: Maximum number of parallel workers.

183 timeout: Maximum time in seconds for each export. Defaults to 300.

184 on_progress: Optional callback called after each notebook export.

185 Signature: on_progress(completed, total, notebook_name)

186

187 Returns:

188 BatchExportResult containing all export results.

189

190 """

191 total_notebooks = len(notebooks) + len(apps) + len(notebooks_wasm)

192 combined_batch_result = BatchExportResult()

193

194 if total_notebooks == 0:

195 return combined_batch_result

196

197 # Define notebook categories and their output directories

198 notebook_categories = [

199 (notebooks, output / "notebooks"),

200 (apps, output / "apps"),

201 (notebooks_wasm, output / "notebooks_wasm"),

202 ]

203

204 with Progress(

205 SpinnerColumn(),

206 TextColumn("[progress.description]{task.description}"),

207 BarColumn(),

208 TaskProgressColumn(),

209 TextColumn("[cyan]{task.completed}/{task.total}"),

210 ) as progress:

211 task = progress.add_task("[green]Exporting notebooks...", total=total_notebooks)

212

213 for nb_list, out_dir in notebook_categories:

214 if not nb_list:

215 continue

216

217 if parallel:

218 batch_result = export_notebooks_parallel(

219 nb_list, out_dir, sandbox, bin_path, max_workers, progress, task, timeout, on_progress

220 )

221 else:

222 batch_result = export_notebooks_sequential(

223 nb_list, out_dir, sandbox, bin_path, progress, task, timeout, on_progress

224 )

225

226 for result in batch_result.results:

227 combined_batch_result.add(result)

228

229 if combined_batch_result.failed > 0: # pragma: no cover

230 logger.warning(

231 f"Export completed: {combined_batch_result.succeeded} succeeded, {combined_batch_result.failed} failed"

232 )

233 for failure in combined_batch_result.failures:

234 error_detail = sanitize_error_message(str(failure.error)) if failure.error else "Unknown error"

235 logger.debug(f" - {failure.notebook_path.name}: {error_detail}")

236

237 return combined_batch_result

238

239

240def render_template(

241 template_file: Path,

242 notebooks: list[Notebook],

243 apps: list[Notebook],

244 notebooks_wasm: list[Notebook],

245 audit_logger: AuditLogger,

246) -> str:

247 """Render the index template with notebook data.

248

249 Args:

250 template_file: Path to the Jinja2 template file.

251 notebooks: List of notebooks for static HTML export.

252 apps: List of notebooks for app export.

253 notebooks_wasm: List of notebooks for interactive WebAssembly export.

254 audit_logger: Logger for audit events.

255

256 Returns:

257 The rendered HTML content as a string.

258

259 Raises:

260 TemplateRenderError: If the template fails to render.

261

262 """

263 template_dir = template_file.parent

264 template_name = template_file.name

265

266 try:

267 # Use SandboxedEnvironment for security

268 env = SandboxedEnvironment(

269 loader=jinja2.FileSystemLoader(template_dir), autoescape=jinja2.select_autoescape(["html", "xml"])

270 )

271 template = env.get_template(template_name)

272

273 rendered = template.render(

274 notebooks=notebooks,

275 apps=apps,

276 notebooks_wasm=notebooks_wasm,

277 )

278 audit_logger.log_template_render(template_file, True)

279 except jinja2.exceptions.TemplateError as e:

280 sanitized_error = sanitize_error_message(str(e))

281 audit_logger.log_template_render(template_file, False, sanitized_error)

282 raise TemplateRenderError(template_file, e) from e

283 else:

284 return rendered

285

286

287def write_index_file(index_path: Path, content: str, audit_logger: AuditLogger) -> None:

288 """Write the rendered HTML content to the index file.

289

290 Args:

291 index_path: Path where the index.html file will be written.

292 content: The rendered HTML content to write.

293 audit_logger: Logger for audit events.

294

295 Raises:

296 IndexWriteError: If the file cannot be written.

297

298 """

299 try:

300 # Write file with secure content

301 with Path.open(index_path, "w") as f:

302 f.write(content)

303

304 # Set secure file permissions

305 set_secure_file_permissions(index_path, mode=0o644)

306

307 logger.info(f"Successfully generated index file at {index_path}")

308 audit_logger.log_file_access(index_path, "write", True)

309 except OSError as e:

310 sanitized_error = sanitize_error_message(str(e))

311 audit_logger.log_file_access(index_path, "write", False, sanitized_error)

312 raise IndexWriteError(index_path, e) from e

313

314

315def generate_index(

316 output: Path,

317 template_file: Path,

318 notebooks: list[Notebook] | None = None,

319 apps: list[Notebook] | None = None,

320 notebooks_wasm: list[Notebook] | None = None,

321 sandbox: bool = True,

322 bin_path: Path | None = None,

323 parallel: bool = True,

324 max_workers: int = 4,

325 timeout: int = 300,

326 on_progress: ProgressCallback | None = None,

327 audit_logger: AuditLogger | None = None,

328) -> str:

329 """Generate an index.html file that lists all the notebooks.

330

331 This function creates an HTML index page that displays links to all the exported

332 notebooks. The index page includes the marimo logo and displays each notebook

333 with a formatted title and a link to open it.

334

335 Args:

336 output: Directory where the index.html file will be saved.

337 template_file: Path to the Jinja2 template file.

338 notebooks: List of notebooks for static HTML export.

339 apps: List of notebooks for app export.

340 notebooks_wasm: List of notebooks for interactive WebAssembly export.

341 sandbox: Whether to run the notebook in a sandbox. Defaults to True.

342 bin_path: The directory where the executable is located. Defaults to None.

343 parallel: Whether to export notebooks in parallel. Defaults to True.

344 max_workers: Maximum number of parallel workers. Defaults to 4.

345 timeout: Maximum time in seconds for each export. Defaults to 300.

346 on_progress: Optional callback called after each notebook export.

347 Signature: on_progress(completed, total, notebook_name).

348 audit_logger: Logger for audit events. If None, creates a default logger.

349

350 Returns:

351 The rendered HTML content as a string.

352

353 Raises:

354 TemplateRenderError: If the template fails to render.

355 IndexWriteError: If the index file cannot be written.

356

357 """

358 if audit_logger is None:

359 audit_logger = get_audit_logger()

360

361 notebooks = notebooks or []

362 apps = apps or []

363 notebooks_wasm = notebooks_wasm or []

364

365 # Export all notebooks with progress tracking

366 export_all_notebooks(

367 output=output,

368 notebooks=notebooks,

369 apps=apps,

370 notebooks_wasm=notebooks_wasm,

371 sandbox=sandbox,

372 bin_path=bin_path,

373 parallel=parallel,

374 max_workers=max_workers,

375 timeout=timeout,

376 on_progress=on_progress,

377 )

378

379 # Ensure the output directory exists

380 output.mkdir(parents=True, exist_ok=True)

381

382 # Render template and write index file

383 rendered_html = render_template(template_file, notebooks, apps, notebooks_wasm, audit_logger)

384 write_index_file(output / "index.html", rendered_html, audit_logger)

385

386 return rendered_html

Coverage for src / marimushka / orchestrator.py: 100%

96 statements