Coverage for src / rhiza / commands / materialize.py: 100%
183 statements
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-29 01:59 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2025-12-29 01:59 +0000
1"""Command for materializing Rhiza template files into a repository.
3This module implements the `materialize` command. It performs a sparse
4checkout of the configured template repository, copies the selected files
5into the target Git repository, and records managed files in
6`.rhiza/history`. Use this to take a one-shot snapshot of template files.
7"""
9import os
10import shutil
11import subprocess
12import sys
13import tempfile
14from pathlib import Path
16from loguru import logger
18from rhiza.commands.validate import validate
19from rhiza.models import RhizaTemplate
22def __expand_paths(base_dir: Path, paths: list[str]) -> list[Path]:
23 """Expand files/directories relative to base_dir into a flat list of files.
25 Given a list of paths relative to ``base_dir``, return a flat list of all
26 individual files.
28 Args:
29 base_dir: The base directory to resolve paths against.
30 paths: List of relative path strings (files or directories).
32 Returns:
33 A flat list of Path objects representing all individual files found.
34 """
35 all_files = []
36 for p in paths:
37 full_path = base_dir / p
38 # Check if the path is a regular file
39 if full_path.is_file():
40 all_files.append(full_path)
41 # If it's a directory, recursively find all files within it
42 elif full_path.is_dir():
43 all_files.extend([f for f in full_path.rglob("*") if f.is_file()])
44 else:
45 # Path does not exist in the cloned repository - skip it silently
46 # This can happen if the template repo doesn't have certain paths
47 logger.debug(f"Path not found in template repository: {p}")
48 continue
49 return all_files
52def materialize(target: Path, branch: str, target_branch: str | None, force: bool) -> None:
53 """Materialize Rhiza templates into the target repository.
55 This performs a sparse checkout of the template repository and copies the
56 selected files into the target repository, recording all files under
57 template control in `.rhiza/history`.
59 Args:
60 target (Path): Path to the target repository.
61 branch (str): The Rhiza template branch to use.
62 target_branch (str | None): Optional branch name to create/checkout in
63 the target repository.
64 force (bool): Whether to overwrite existing files.
65 """
66 # Resolve to absolute path to avoid any ambiguity
67 target = target.resolve()
69 logger.info(f"Target repository: {target}")
70 logger.info(f"Rhiza branch: {branch}")
72 # Set environment to prevent git from prompting for credentials
73 # This ensures non-interactive behavior during git operations
74 git_env = os.environ.copy()
75 git_env["GIT_TERMINAL_PROMPT"] = "0"
77 # -----------------------
78 # Handle target branch creation/checkout if specified
79 # -----------------------
80 # When a target branch is specified, we either checkout an existing branch
81 # or create a new one. This allows users to materialize templates onto a
82 # separate branch for review before merging to main.
83 if target_branch:
84 logger.info(f"Creating/checking out target branch: {target_branch}")
85 try:
86 # Check if branch already exists using git rev-parse
87 # Returns 0 if the branch exists, non-zero otherwise
88 result = subprocess.run(
89 ["git", "rev-parse", "--verify", target_branch],
90 cwd=target,
91 capture_output=True,
92 text=True,
93 env=git_env,
94 )
96 if result.returncode == 0:
97 # Branch exists, switch to it
98 logger.info(f"Branch '{target_branch}' exists, checking out...")
99 subprocess.run(
100 ["git", "checkout", target_branch],
101 cwd=target,
102 check=True,
103 env=git_env,
104 )
105 else:
106 # Branch doesn't exist, create it from current HEAD
107 logger.info(f"Creating new branch '{target_branch}'...")
108 subprocess.run(
109 ["git", "checkout", "-b", target_branch],
110 cwd=target,
111 check=True,
112 env=git_env,
113 )
114 except subprocess.CalledProcessError as e:
115 logger.error(f"Failed to create/checkout branch '{target_branch}': {e}")
116 sys.exit(1)
118 # -----------------------
119 # Validate Rhiza configuration
120 # -----------------------
121 # The validate function checks if template.yml exists and is valid
122 # Returns True if valid, False otherwise
123 valid = validate(target)
125 if not valid:
126 logger.error(f"Rhiza template is invalid in: {target}")
127 logger.error("Please fix validation errors and try again")
128 sys.exit(1)
130 # Load the template configuration from the validated file
131 # Validation ensures the file exists at .rhiza/template.yml
132 template_file = target / ".rhiza" / "template.yml"
133 template = RhizaTemplate.from_yaml(template_file)
135 # Extract template configuration settings
136 # These define where to clone from and what to materialize
137 rhiza_repo = template.template_repository
138 # Use CLI arg if template doesn't specify a branch
139 rhiza_branch = template.template_branch or branch
140 # Default to GitHub if not specified
141 rhiza_host = template.template_host or "github"
142 include_paths = template.include
143 excluded_paths = template.exclude
145 # Validate that we have paths to include
146 if not include_paths:
147 logger.error("No include paths found in template.yml")
148 logger.error("Add at least one path to the 'include' list in template.yml")
149 raise RuntimeError("No include paths found in template.yml")
151 # Log the paths we'll be including for transparency
152 logger.info("Include paths:")
153 for p in include_paths:
154 logger.info(f" - {p}")
156 # Log excluded paths if any are defined
157 if excluded_paths:
158 logger.info("Exclude paths:")
159 for p in excluded_paths:
160 logger.info(f" - {p}")
162 # -----------------------
163 # Construct git clone URL based on host
164 # -----------------------
165 # Support both GitHub and GitLab template repositories
166 if rhiza_host == "gitlab":
167 git_url = f"https://gitlab.com/{rhiza_repo}.git"
168 logger.debug(f"Using GitLab repository: {git_url}")
169 elif rhiza_host == "github":
170 git_url = f"https://github.com/{rhiza_repo}.git"
171 logger.debug(f"Using GitHub repository: {git_url}")
172 else:
173 logger.error(f"Unsupported template-host: {rhiza_host}")
174 logger.error("template-host must be 'github' or 'gitlab'")
175 raise ValueError(f"Unsupported template-host: {rhiza_host}. Must be 'github' or 'gitlab'.")
177 # -----------------------
178 # Sparse clone template repo
179 # -----------------------
180 # Create a temporary directory for the sparse clone
181 # This will be cleaned up in the finally block
182 tmp_dir = Path(tempfile.mkdtemp())
183 materialized_files: list[Path] = []
185 logger.info(f"Cloning {rhiza_repo}@{rhiza_branch} from {rhiza_host} into temporary directory")
186 logger.debug(f"Temporary directory: {tmp_dir}")
188 try:
189 # Clone the repository using sparse checkout for efficiency
190 # --depth 1: Only fetch the latest commit (shallow clone)
191 # --filter=blob:none: Don't download file contents initially
192 # --sparse: Enable sparse checkout mode
193 # This combination allows us to clone only the paths we need
194 try:
195 logger.debug("Executing git clone with sparse checkout")
196 subprocess.run(
197 [
198 "git",
199 "clone",
200 "--depth",
201 "1",
202 "--filter=blob:none",
203 "--sparse",
204 "--branch",
205 rhiza_branch,
206 git_url,
207 str(tmp_dir),
208 ],
209 check=True,
210 capture_output=True,
211 text=True,
212 env=git_env,
213 )
214 logger.debug("Git clone completed successfully")
215 except subprocess.CalledProcessError as e:
216 logger.error(f"Failed to clone repository: {e}")
217 if e.stderr:
218 logger.error(f"Git error: {e.stderr.strip()}")
219 logger.error(f"Check that the repository '{rhiza_repo}' exists and branch '{rhiza_branch}' is valid")
220 raise
222 # Initialize sparse checkout in cone mode
223 # Cone mode is more efficient and uses pattern matching
224 try:
225 logger.debug("Initializing sparse checkout")
226 subprocess.run(
227 ["git", "sparse-checkout", "init", "--cone"],
228 cwd=tmp_dir,
229 check=True,
230 capture_output=True,
231 text=True,
232 env=git_env,
233 )
234 logger.debug("Sparse checkout initialized")
235 except subprocess.CalledProcessError as e:
236 logger.error(f"Failed to initialize sparse checkout: {e}")
237 if e.stderr:
238 logger.error(f"Git error: {e.stderr.strip()}")
239 raise
241 # Set sparse checkout paths to only checkout the files/directories we need
242 # --skip-checks: Don't validate that patterns match existing files
243 try:
244 logger.debug(f"Setting sparse checkout paths: {include_paths}")
245 subprocess.run(
246 ["git", "sparse-checkout", "set", "--skip-checks", *include_paths],
247 cwd=tmp_dir,
248 check=True,
249 capture_output=True,
250 text=True,
251 env=git_env,
252 )
253 logger.debug("Sparse checkout paths configured")
254 except subprocess.CalledProcessError as e:
255 logger.error(f"Failed to set sparse checkout paths: {e}")
256 if e.stderr:
257 logger.error(f"Git error: {e.stderr.strip()}")
258 raise
260 # -----------------------
261 # Expand include/exclude paths
262 # -----------------------
263 # Convert directory paths to individual file paths for precise control
264 logger.debug("Expanding included paths to individual files")
265 all_files = __expand_paths(tmp_dir, include_paths)
266 logger.info(f"Found {len(all_files)} file(s) in included paths")
268 # Create a set of excluded files for fast lookup
269 logger.debug("Expanding excluded paths to individual files")
270 excluded_files = {f.resolve() for f in __expand_paths(tmp_dir, excluded_paths)}
271 if excluded_files:
272 logger.info(f"Excluding {len(excluded_files)} file(s) based on exclude patterns")
274 # Filter out excluded files from the list of files to copy
275 files_to_copy = [f for f in all_files if f.resolve() not in excluded_files]
276 logger.info(f"Will materialize {len(files_to_copy)} file(s) to target repository")
278 # -----------------------
279 # Copy files into target repo
280 # -----------------------
281 # Copy each file from the temporary clone to the target repository
282 # Preserve file metadata (timestamps, permissions) with copy2
283 logger.info("Copying files to target repository...")
284 for src_file in files_to_copy:
285 # Calculate destination path maintaining relative structure
286 dst_file = target / src_file.relative_to(tmp_dir)
287 relative_path = dst_file.relative_to(target)
289 # Track this file for .rhiza.history
290 materialized_files.append(relative_path)
292 # Check if file already exists and handle based on force flag
293 if dst_file.exists() and not force:
294 logger.warning(f"{relative_path} already exists — use --force to overwrite")
295 continue
297 # Create parent directories if they don't exist
298 dst_file.parent.mkdir(parents=True, exist_ok=True)
300 # Copy file with metadata preservation
301 shutil.copy2(src_file, dst_file)
302 logger.success(f"[ADD] {relative_path}")
304 finally:
305 # Clean up the temporary directory
306 logger.debug(f"Cleaning up temporary directory: {tmp_dir}")
307 shutil.rmtree(tmp_dir)
309 # -----------------------
310 # Warn about workflow files
311 # -----------------------
312 # GitHub Actions workflow files require special permissions to modify
313 # Check if any of the materialized files are workflow files
314 workflow_files = [p for p in materialized_files if p.parts[:2] == (".github", "workflows")]
316 if workflow_files:
317 logger.warning(
318 "Workflow files were materialized. Updating these files requires "
319 "a token with the 'workflow' permission in GitHub Actions."
320 )
321 logger.info(f"Workflow files affected: {len(workflow_files)}")
323 # -----------------------
324 # Clean up orphaned files
325 # -----------------------
326 # Read the old history file to find files that are no longer
327 # part of the current materialization and should be deleted
328 # Check both new and old locations for backward compatibility
329 new_history_file = target / ".rhiza" / "history"
330 old_history_file = target / ".rhiza.history"
332 # Prefer new location, but check old location for migration
333 if new_history_file.exists():
334 history_file = new_history_file
335 logger.debug(f"Reading existing history file from new location: {history_file.relative_to(target)}")
336 elif old_history_file.exists():
337 history_file = old_history_file
338 logger.debug(f"Reading existing history file from old location: {history_file.relative_to(target)}")
339 else:
340 history_file = new_history_file # Default to new location for creation
341 logger.debug("No existing history file found, will create new one")
343 previously_tracked_files: set[Path] = set()
345 if history_file.exists():
346 with history_file.open("r", encoding="utf-8") as f:
347 for line in f:
348 line = line.strip()
349 # Skip comments and empty lines
350 if line and not line.startswith("#"):
351 previously_tracked_files.add(Path(line))
353 logger.debug(f"Found {len(previously_tracked_files)} file(s) in previous history")
355 # Convert materialized_files list to a set for comparison
356 currently_materialized_files = set(materialized_files)
358 # Find orphaned files (in old history but not in new materialization)
359 orphaned_files = previously_tracked_files - currently_materialized_files
361 if orphaned_files:
362 logger.info(f"Found {len(orphaned_files)} orphaned file(s) no longer maintained by template")
363 for file_path in sorted(orphaned_files):
364 full_path = target / file_path
365 if full_path.exists():
366 try:
367 full_path.unlink()
368 logger.success(f"[DEL] {file_path}")
369 except Exception as e:
370 logger.warning(f"Failed to delete {file_path}: {e}")
371 else:
372 logger.debug(f"Skipping {file_path} (already deleted)")
373 else:
374 logger.debug("No orphaned files to clean up")
376 # -----------------------
377 # Write history file
378 # -----------------------
379 # This file tracks which files were materialized by Rhiza
380 # Useful for understanding which files came from the template
381 # Always write to new location (.rhiza/history)
382 history_file = target / ".rhiza" / "history"
384 # Ensure .rhiza directory exists
385 history_file.parent.mkdir(parents=True, exist_ok=True)
387 logger.debug(f"Writing history file: {history_file.relative_to(target)}")
388 with history_file.open("w", encoding="utf-8") as f:
389 f.write("# Rhiza Template History\n")
390 f.write("# This file lists all files managed by the Rhiza template.\n")
391 f.write(f"# Template repository: {rhiza_repo}\n")
392 f.write(f"# Template branch: {rhiza_branch}\n")
393 f.write("#\n")
394 f.write("# Files under template control:\n")
395 # Sort files for consistent ordering
396 for file_path in sorted(materialized_files):
397 f.write(f"{file_path}\n")
399 logger.info(f"Updated {history_file.relative_to(target)} with {len(materialized_files)} file(s)")
401 # Clean up old history file if it exists (migration)
402 old_history_file = target / ".rhiza.history"
403 if old_history_file.exists() and old_history_file != history_file:
404 try:
405 old_history_file.unlink()
406 logger.debug(f"Removed old history file: {old_history_file.relative_to(target)}")
407 except Exception as e:
408 logger.warning(f"Could not remove old history file: {e}")
410 logger.success("Rhiza templates materialized successfully")
412 logger.info(
413 "Next steps:\n"
414 " 1. Review changes:\n"
415 " git status\n"
416 " git diff\n\n"
417 " 2. Commit:\n"
418 " git add .\n"
419 ' git commit -m "chore: import rhiza templates"\n\n'
420 "This is a one-shot snapshot.\n"
421 "Re-run this command to update templates explicitly."
422 )