Coverage for src / rhiza / commands / materialize.py: 100%

256 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-12 20:13 +0000

1"""Command for materializing Rhiza template files into a repository. 

2 

3This module implements the `materialize` command. It performs a sparse 

4checkout of the configured template repository, copies the selected files 

5into the target Git repository, and records managed files in 

6`.rhiza/history`. Use this to take a one-shot snapshot of template files. 

7""" 

8 

9import os 

10import shutil 

11import subprocess # nosec B404 

12import sys 

13import tempfile 

14from pathlib import Path 

15 

16from loguru import logger 

17 

18from rhiza.bundle_resolver import load_bundles_from_clone, resolve_include_paths 

19from rhiza.commands.validate import validate 

20from rhiza.models import RhizaTemplate 

21from rhiza.subprocess_utils import get_git_executable 

22 

23 

24def _log_git_stderr_errors(stderr: str | None) -> None: 

25 """Extract and log only relevant error messages from git stderr. 

26 

27 Args: 

28 stderr: Git command stderr output. 

29 """ 

30 if stderr: 

31 # Extract relevant error message from git stderr 

32 stderr_lines = stderr.strip().split("\n") 

33 # Show only the most relevant error lines, skip verbose git output 

34 for line in stderr_lines: 

35 line = line.strip() 

36 if line and (line.startswith("fatal:") or line.startswith("error:")): 

37 logger.error(line) 

38 

39 

40def _handle_target_branch( 

41 target: Path, target_branch: str | None, git_executable: str, git_env: dict[str, str] 

42) -> None: 

43 """Handle target branch creation or checkout if specified. 

44 

45 Args: 

46 target: Path to the target repository. 

47 target_branch: Optional branch name to create/checkout. 

48 git_executable: Path to git executable. 

49 git_env: Environment variables for git commands. 

50 """ 

51 if not target_branch: 

52 return 

53 

54 logger.info(f"Creating/checking out target branch: {target_branch}") 

55 try: 

56 # Check if branch already exists using git rev-parse 

57 result = subprocess.run( # nosec B603 

58 [git_executable, "rev-parse", "--verify", target_branch], 

59 cwd=target, 

60 capture_output=True, 

61 text=True, 

62 env=git_env, 

63 ) 

64 

65 if result.returncode == 0: 

66 # Branch exists, switch to it 

67 logger.info(f"Branch '{target_branch}' exists, checking out...") 

68 subprocess.run( # nosec B603 

69 [git_executable, "checkout", target_branch], 

70 cwd=target, 

71 check=True, 

72 capture_output=True, 

73 text=True, 

74 env=git_env, 

75 ) 

76 else: 

77 # Branch doesn't exist, create it from current HEAD 

78 logger.info(f"Creating new branch '{target_branch}'...") 

79 subprocess.run( # nosec B603 

80 [git_executable, "checkout", "-b", target_branch], 

81 cwd=target, 

82 check=True, 

83 capture_output=True, 

84 text=True, 

85 env=git_env, 

86 ) 

87 except subprocess.CalledProcessError as e: 

88 logger.error(f"Failed to create/checkout branch '{target_branch}'") 

89 _log_git_stderr_errors(e.stderr) 

90 logger.error("Please ensure you have no uncommitted changes or conflicts") 

91 sys.exit(1) 

92 

93 

94def _validate_and_load_template(target: Path, branch: str) -> tuple[RhizaTemplate, str, str, list[str], list[str]]: 

95 """Validate configuration and load template settings. 

96 

97 Args: 

98 target: Path to the target repository. 

99 branch: The Rhiza template branch to use (CLI argument). 

100 

101 Returns: 

102 Tuple of (template, rhiza_repo, rhiza_branch, include_paths, excluded_paths). 

103 """ 

104 # Validate Rhiza configuration 

105 valid = validate(target) 

106 if not valid: 

107 logger.error(f"Rhiza template is invalid in: {target}") 

108 logger.error("Please fix validation errors and try again") 

109 sys.exit(1) 

110 

111 # Load the template configuration 

112 template_file = target / ".rhiza" / "template.yml" 

113 template = RhizaTemplate.from_yaml(template_file) 

114 

115 # Extract template configuration settings 

116 rhiza_repo = template.template_repository 

117 if not rhiza_repo: 

118 logger.error("template-repository is not configured in template.yml") 

119 raise RuntimeError("template-repository is required") # noqa: TRY003 

120 rhiza_branch = template.template_branch or branch 

121 excluded_paths = template.exclude 

122 

123 # Note: We'll resolve templates to paths after cloning the template repo, 

124 # since we need access to template-bundles.yml from the template 

125 include_paths = template.include 

126 

127 # Validate that we have either templates or include paths 

128 if not template.templates and not include_paths: 

129 logger.error("No templates or include paths found in template.yml") 

130 logger.error("Add either 'templates' or 'include' list in template.yml") 

131 raise RuntimeError("No templates or include paths found in template.yml") # noqa: TRY003 

132 

133 # Log what we'll be using 

134 if template.templates: 

135 logger.info("Templates:") 

136 for t in template.templates: 

137 logger.info(f" - {t}") 

138 

139 if include_paths: 

140 logger.info("Include paths:") 

141 for p in include_paths: 

142 logger.info(f" - {p}") 

143 

144 if excluded_paths: 

145 logger.info("Exclude paths:") 

146 for p in excluded_paths: 

147 logger.info(f" - {p}") 

148 

149 return template, rhiza_repo, rhiza_branch, include_paths, excluded_paths 

150 

151 

152def _construct_git_url(rhiza_repo: str, rhiza_host: str) -> str: 

153 """Construct git clone URL based on host. 

154 

155 Args: 

156 rhiza_repo: Repository name in 'owner/repo' format. 

157 rhiza_host: Git hosting platform ('github' or 'gitlab'). 

158 

159 Returns: 

160 Git URL for cloning. 

161 

162 Raises: 

163 ValueError: If rhiza_host is not supported. 

164 """ 

165 if rhiza_host == "gitlab": 

166 git_url = f"https://gitlab.com/{rhiza_repo}.git" 

167 logger.debug(f"Using GitLab repository: {git_url}") 

168 elif rhiza_host == "github": 

169 git_url = f"https://github.com/{rhiza_repo}.git" 

170 logger.debug(f"Using GitHub repository: {git_url}") 

171 else: 

172 logger.error(f"Unsupported template-host: {rhiza_host}") 

173 logger.error("template-host must be 'github' or 'gitlab'") 

174 raise ValueError(f"Unsupported template-host: {rhiza_host}. Must be 'github' or 'gitlab'.") # noqa: TRY003 

175 return git_url 

176 

177 

178def _update_sparse_checkout( 

179 tmp_dir: Path, 

180 include_paths: list[str], 

181 git_executable: str, 

182 git_env: dict[str, str], 

183) -> None: 

184 """Update sparse checkout paths in an already-cloned repository. 

185 

186 Args: 

187 tmp_dir: Temporary directory with cloned repository. 

188 include_paths: Paths to include in sparse checkout. 

189 git_executable: Path to git executable. 

190 git_env: Environment variables for git commands. 

191 """ 

192 try: 

193 logger.debug(f"Updating sparse checkout paths: {include_paths}") 

194 subprocess.run( # nosec B603 

195 [git_executable, "sparse-checkout", "set", "--skip-checks", *include_paths], 

196 cwd=tmp_dir, 

197 check=True, 

198 capture_output=True, 

199 text=True, 

200 env=git_env, 

201 ) 

202 logger.debug("Sparse checkout paths updated") 

203 except subprocess.CalledProcessError as e: 

204 logger.error("Failed to update sparse checkout paths") 

205 _log_git_stderr_errors(e.stderr) 

206 sys.exit(1) 

207 

208 

209def _clone_template_repository( 

210 tmp_dir: Path, 

211 git_url: str, 

212 rhiza_branch: str, 

213 include_paths: list[str], 

214 git_executable: str, 

215 git_env: dict[str, str], 

216) -> None: 

217 """Clone template repository with sparse checkout. 

218 

219 Args: 

220 tmp_dir: Temporary directory for cloning. 

221 git_url: Git repository URL. 

222 rhiza_branch: Branch to clone. 

223 include_paths: Initial paths to include in sparse checkout. 

224 git_executable: Path to git executable. 

225 git_env: Environment variables for git commands. 

226 """ 

227 # Clone the repository using sparse checkout 

228 try: 

229 logger.debug("Executing git clone with sparse checkout") 

230 subprocess.run( # nosec B603 

231 [ 

232 git_executable, 

233 "clone", 

234 "--depth", 

235 "1", 

236 "--filter=blob:none", 

237 "--sparse", 

238 "--branch", 

239 rhiza_branch, 

240 git_url, 

241 str(tmp_dir), 

242 ], 

243 check=True, 

244 capture_output=True, 

245 text=True, 

246 env=git_env, 

247 ) 

248 logger.debug("Git clone completed successfully") 

249 except subprocess.CalledProcessError as e: 

250 logger.error(f"Failed to clone repository from {git_url}") 

251 _log_git_stderr_errors(e.stderr) 

252 logger.error("Please check that:") 

253 logger.error(" - The repository exists and is accessible") 

254 logger.error(f" - Branch '{rhiza_branch}' exists in the repository") 

255 logger.error(" - You have network access to the git hosting service") 

256 sys.exit(1) 

257 

258 # Initialize sparse checkout in cone mode 

259 try: 

260 logger.debug("Initializing sparse checkout") 

261 subprocess.run( # nosec B603 

262 [git_executable, "sparse-checkout", "init", "--cone"], 

263 cwd=tmp_dir, 

264 check=True, 

265 capture_output=True, 

266 text=True, 

267 env=git_env, 

268 ) 

269 logger.debug("Sparse checkout initialized") 

270 except subprocess.CalledProcessError as e: 

271 logger.error("Failed to initialize sparse checkout") 

272 _log_git_stderr_errors(e.stderr) 

273 sys.exit(1) 

274 

275 # Set sparse checkout paths 

276 try: 

277 logger.debug(f"Setting sparse checkout paths: {include_paths}") 

278 subprocess.run( # nosec B603 

279 [git_executable, "sparse-checkout", "set", "--skip-checks", *include_paths], 

280 cwd=tmp_dir, 

281 check=True, 

282 capture_output=True, 

283 text=True, 

284 env=git_env, 

285 ) 

286 logger.debug("Sparse checkout paths configured") 

287 except subprocess.CalledProcessError as e: 

288 logger.error("Failed to configure sparse checkout paths") 

289 _log_git_stderr_errors(e.stderr) 

290 sys.exit(1) 

291 

292 

293def _copy_files_to_target( 

294 tmp_dir: Path, 

295 target: Path, 

296 include_paths: list[str], 

297 excluded_paths: list[str], 

298 force: bool, 

299) -> list[Path]: 

300 """Copy files from temporary clone to target repository. 

301 

302 Args: 

303 tmp_dir: Temporary directory with cloned files. 

304 target: Target repository path. 

305 include_paths: Paths to include. 

306 excluded_paths: Paths to exclude. 

307 force: Whether to overwrite existing files. 

308 

309 Returns: 

310 List of materialized file paths (relative to target). 

311 """ 

312 # Expand paths to individual files 

313 logger.debug("Expanding included paths to individual files") 

314 all_files = __expand_paths(tmp_dir, include_paths) 

315 logger.info(f"Found {len(all_files)} file(s) in included paths") 

316 

317 # Create set of excluded files 

318 logger.debug("Expanding excluded paths to individual files") 

319 excluded_files = {f.resolve() for f in __expand_paths(tmp_dir, excluded_paths)} 

320 

321 # Always exclude .rhiza/template.yml to prevent overwriting local configuration 

322 # Also exclude .rhiza/history to prevent overwriting local history with template history 

323 rhiza_dir = tmp_dir / ".rhiza" 

324 template_config = (rhiza_dir / "template.yml").resolve() 

325 upstream_history = (rhiza_dir / "history").resolve() 

326 

327 if template_config.is_file(): 

328 excluded_files.add(template_config) 

329 

330 if upstream_history.is_file(): 

331 excluded_files.add(upstream_history) 

332 

333 if excluded_files: 

334 logger.info(f"Excluding {len(excluded_files)} file(s) based on exclude patterns") 

335 

336 # Filter out excluded files 

337 files_to_copy = [f for f in all_files if f.resolve() not in excluded_files] 

338 logger.info(f"Will materialize {len(files_to_copy)} file(s) to target repository") 

339 

340 # Copy files to target repository 

341 logger.info("Copying files to target repository...") 

342 materialized_files: list[Path] = [] 

343 

344 for src_file in files_to_copy: 

345 # Calculate destination path maintaining relative structure 

346 dst_file = target / src_file.relative_to(tmp_dir) 

347 relative_path = dst_file.relative_to(target) 

348 

349 # Track this file for history 

350 materialized_files.append(relative_path) 

351 

352 # Check if file exists and handle based on force flag 

353 if dst_file.exists() and not force: 

354 logger.warning(f"{relative_path} already exists — use --force to overwrite") 

355 continue 

356 

357 # Create parent directories if needed 

358 dst_file.parent.mkdir(parents=True, exist_ok=True) 

359 

360 # Copy file with metadata preservation 

361 shutil.copy2(src_file, dst_file) 

362 logger.success(f"[ADD] {relative_path}") 

363 

364 return materialized_files 

365 

366 

367def _warn_about_workflow_files(materialized_files: list[Path]) -> None: 

368 """Warn if workflow files were materialized. 

369 

370 Args: 

371 materialized_files: List of materialized file paths. 

372 """ 

373 workflow_files = [p for p in materialized_files if p.parts[:2] == (".github", "workflows")] 

374 

375 if workflow_files: 

376 logger.warning( 

377 "Workflow files were materialized. Updating these files requires " 

378 "a token with the 'workflow' permission in GitHub Actions." 

379 ) 

380 logger.info(f"Workflow files affected: {len(workflow_files)}") 

381 

382 

383def _clean_orphaned_files(target: Path, materialized_files: list[Path]) -> None: 

384 """Clean up files that are no longer maintained by template. 

385 

386 Args: 

387 target: Target repository path. 

388 materialized_files: List of currently materialized files. 

389 """ 

390 # Read old history file 

391 new_history_file = target / ".rhiza" / "history" 

392 old_history_file = target / ".rhiza.history" 

393 

394 # Prefer new location, check old for migration 

395 if new_history_file.exists(): 

396 history_file = new_history_file 

397 logger.debug(f"Reading existing history file from new location: {history_file.relative_to(target)}") 

398 elif old_history_file.exists(): 

399 history_file = old_history_file 

400 logger.debug(f"Reading existing history file from old location: {history_file.relative_to(target)}") 

401 else: 

402 logger.debug("No existing history file found") 

403 return 

404 

405 previously_tracked_files: set[Path] = set() 

406 with history_file.open("r", encoding="utf-8") as f: 

407 for line in f: 

408 line = line.strip() 

409 if line and not line.startswith("#"): 

410 previously_tracked_files.add(Path(line)) 

411 

412 logger.debug(f"Found {len(previously_tracked_files)} file(s) in previous history") 

413 

414 # Find orphaned files 

415 currently_materialized_files = set(materialized_files) 

416 orphaned_files = previously_tracked_files - currently_materialized_files 

417 

418 # Protected files that should never be deleted automatically 

419 # even if they are orphaned (e.g. user chose to stop tracking them) 

420 protected_files = {Path(".rhiza/template.yml")} 

421 

422 if orphaned_files: 

423 logger.info(f"Found {len(orphaned_files)} orphaned file(s) no longer maintained by template") 

424 for file_path in sorted(orphaned_files): 

425 if file_path in protected_files: 

426 logger.info(f"Skipping protected file: {file_path}") 

427 continue 

428 

429 full_path = target / file_path 

430 if full_path.exists(): 

431 try: 

432 full_path.unlink() 

433 logger.success(f"[DEL] {file_path}") 

434 except Exception as e: 

435 logger.warning(f"Failed to delete {file_path}: {e}") 

436 else: 

437 logger.debug(f"Skipping {file_path} (already deleted)") 

438 else: 

439 logger.debug("No orphaned files to clean up") 

440 

441 

442def _write_history_file(target: Path, materialized_files: list[Path], rhiza_repo: str, rhiza_branch: str) -> None: 

443 """Write history file tracking materialized files. 

444 

445 Args: 

446 target: Target repository path. 

447 materialized_files: List of materialized files. 

448 rhiza_repo: Template repository name. 

449 rhiza_branch: Template branch name. 

450 """ 

451 # Always write to new location 

452 history_file = target / ".rhiza" / "history" 

453 history_file.parent.mkdir(parents=True, exist_ok=True) 

454 

455 logger.debug(f"Writing history file: {history_file.relative_to(target)}") 

456 with history_file.open("w", encoding="utf-8") as f: 

457 f.write("# Rhiza Template History\n") 

458 f.write("# This file lists all files managed by the Rhiza template.\n") 

459 f.write(f"# Template repository: {rhiza_repo}\n") 

460 f.write(f"# Template branch: {rhiza_branch}\n") 

461 f.write("#\n") 

462 f.write("# Files under template control:\n") 

463 for file_path in sorted(materialized_files): 

464 f.write(f"{file_path}\n") 

465 

466 logger.info(f"Updated {history_file.relative_to(target)} with {len(materialized_files)} file(s)") 

467 

468 # Clean up old history file if it exists (migration) 

469 old_history_file = target / ".rhiza.history" 

470 if old_history_file.exists() and old_history_file != history_file: 

471 try: 

472 old_history_file.unlink() 

473 logger.debug(f"Removed old history file: {old_history_file.relative_to(target)}") 

474 except Exception as e: 

475 logger.warning(f"Could not remove old history file: {e}") 

476 

477 

478def __expand_paths(base_dir: Path, paths: list[str]) -> list[Path]: 

479 """Expand files/directories relative to base_dir into a flat list of files. 

480 

481 Given a list of paths relative to ``base_dir``, return a flat list of all 

482 individual files. 

483 

484 Args: 

485 base_dir: The base directory to resolve paths against. 

486 paths: List of relative path strings (files or directories). 

487 

488 Returns: 

489 A flat list of Path objects representing all individual files found. 

490 """ 

491 all_files = [] 

492 for p in paths: 

493 full_path = base_dir / p 

494 # Check if the path is a regular file 

495 if full_path.is_file(): 

496 all_files.append(full_path) 

497 # If it's a directory, recursively find all files within it 

498 elif full_path.is_dir(): 

499 all_files.extend([f for f in full_path.rglob("*") if f.is_file()]) 

500 else: 

501 # Path does not exist in the cloned repository - skip it silently 

502 # This can happen if the template repo doesn't have certain paths 

503 logger.debug(f"Path not found in template repository: {p}") 

504 continue 

505 return all_files 

506 

507 

508def materialize(target: Path, branch: str, target_branch: str | None, force: bool) -> None: 

509 """Materialize Rhiza templates into the target repository. 

510 

511 This performs a sparse checkout of the template repository and copies the 

512 selected files into the target repository, recording all files under 

513 template control in `.rhiza/history`. 

514 

515 Args: 

516 target (Path): Path to the target repository. 

517 branch (str): The Rhiza template branch to use. 

518 target_branch (str | None): Optional branch name to create/checkout in 

519 the target repository. 

520 force (bool): Whether to overwrite existing files. 

521 """ 

522 target = target.resolve() 

523 logger.info(f"Target repository: {target}") 

524 logger.info(f"Rhiza branch: {branch}") 

525 

526 # Setup git environment 

527 git_executable = get_git_executable() 

528 logger.debug(f"Using git executable: {git_executable}") 

529 git_env = os.environ.copy() 

530 git_env["GIT_TERMINAL_PROMPT"] = "0" 

531 

532 # Handle target branch if specified 

533 _handle_target_branch(target, target_branch, git_executable, git_env) 

534 

535 # Validate and load template configuration 

536 template, rhiza_repo, rhiza_branch, include_paths, excluded_paths = _validate_and_load_template(target, branch) 

537 rhiza_host = template.template_host or "github" 

538 

539 # Construct git URL 

540 git_url = _construct_git_url(rhiza_repo, rhiza_host) 

541 

542 # Clone template repository 

543 tmp_dir = Path(tempfile.mkdtemp()) 

544 logger.info(f"Cloning {rhiza_repo}@{rhiza_branch} from {rhiza_host} into temporary directory") 

545 logger.debug(f"Temporary directory: {tmp_dir}") 

546 

547 try: 

548 # Clone with initial minimal checkout to load template-bundles.yml if needed 

549 initial_paths = [".rhiza"] if template.templates else include_paths 

550 _clone_template_repository(tmp_dir, git_url, rhiza_branch, initial_paths, git_executable, git_env) 

551 

552 # Load template-bundles.yml and resolve templates to paths if using template mode 

553 if template.templates: 

554 logger.info("Resolving templates to file paths...") 

555 try: 

556 bundles_config = load_bundles_from_clone(tmp_dir) 

557 resolved_paths = resolve_include_paths(template, bundles_config) 

558 logger.info(f"Resolved {len(template.templates)} template(s) to {len(resolved_paths)} path(s)") 

559 logger.debug(f"Resolved paths: {resolved_paths}") 

560 # Update sparse checkout with resolved paths 

561 _update_sparse_checkout(tmp_dir, resolved_paths, git_executable, git_env) 

562 include_paths = resolved_paths 

563 except ValueError as e: 

564 logger.error(f"Failed to resolve templates: {e}") 

565 sys.exit(1) 

566 

567 materialized_files = _copy_files_to_target(tmp_dir, target, include_paths, excluded_paths, force) 

568 finally: 

569 logger.debug(f"Cleaning up temporary directory: {tmp_dir}") 

570 shutil.rmtree(tmp_dir) 

571 

572 # Post-processing 

573 _warn_about_workflow_files(materialized_files) 

574 _clean_orphaned_files(target, materialized_files) 

575 _write_history_file(target, materialized_files, rhiza_repo, rhiza_branch) 

576 

577 logger.success("Rhiza templates materialized successfully") 

578 logger.info( 

579 "Next steps:\n" 

580 " 1. Review changes:\n" 

581 " git status\n" 

582 " git diff\n\n" 

583 " 2. Commit:\n" 

584 " git add .\n" 

585 ' git commit -m "chore: import rhiza templates"\n\n' 

586 "This is a one-shot snapshot.\n" 

587 "Re-run this command to update templates explicitly." 

588 )