Coverage for src / rhiza / commands / summarise.py: 93%
276 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-27 15:33 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-27 15:33 +0000
1"""Command for generating PR descriptions from staged changes.
3This module provides functionality to analyze staged git changes and generate
4structured PR descriptions for rhiza sync operations.
5"""
7import json as _json
8import subprocess # nosec B404
9from collections import defaultdict
10from dataclasses import dataclass, field
11from datetime import UTC, datetime
12from pathlib import Path
13from typing import NamedTuple
15import jinja2
16import yaml
17from loguru import logger
19from rhiza.models.lock import TemplateLock
20from rhiza.models.template import RhizaTemplate
23@dataclass(kw_only=True)
24class SummariseOptions:
25 """Options controlling the output of :func:`generate_pr_description`.
27 All fields are keyword-only and default to the standard behaviour so
28 callers only need to set the fields they want to override.
29 """
31 include_header: bool = True
32 """Whether to include the header section (markdown / plain formats)."""
34 include_footer: bool = True
35 """Whether to include the footer section (markdown / plain formats)."""
37 include_categories: bool = True
38 """Whether to group changes by category; when ``False`` a flat list is shown."""
40 output_format: str = "markdown"
41 """Output format: ``"markdown"`` (default), ``"plain"``, or ``"json"``."""
43 title: str | None = None
44 """Override the section heading; ``None`` uses the built-in default."""
46 compare_ref: str | None = None
47 """Compare against this git ref instead of the staged index."""
49 jinja2_template: Path | None = field(default=None)
50 """Path to a Jinja2 template file for fully custom output."""
53class _TemplateInfo(NamedTuple):
54 """Lightweight container for template metadata used during rendering."""
56 repo: str
57 branch: str
58 last_sync: str | None
61def run_git_command(args: list[str], cwd: Path | None = None) -> str:
62 """Run a git command and return the output.
64 Args:
65 args: Git command arguments (without 'git' prefix)
66 cwd: Working directory for the command
68 Returns:
69 Command output as string
70 """
71 try:
72 result = subprocess.run( # nosec B603 B607 # noqa: S603
73 ["git", *args], # noqa: S607
74 cwd=cwd,
75 capture_output=True,
76 text=True,
77 check=True,
78 )
79 return result.stdout.strip()
80 except subprocess.CalledProcessError as e:
81 logger.error(f"Error running git {' '.join(args)}: {e.stderr}")
82 return ""
85def get_staged_changes(repo_path: Path, compare_ref: str | None = None) -> dict[str, list[str]]:
86 """Get list of changes categorized by type.
88 Args:
89 repo_path: Path to the repository
90 compare_ref: Optional git ref to compare against. When provided the
91 working tree is diffed against this ref instead of the staged index.
93 Returns:
94 Dictionary with keys 'added', 'modified', 'deleted' containing file lists
95 """
96 changes: dict[str, list[str]] = {
97 "added": [],
98 "modified": [],
99 "deleted": [],
100 }
102 # Compare against a specific ref, or fall back to staged changes
103 diff_args = ["diff", compare_ref, "--name-status"] if compare_ref else ["diff", "--cached", "--name-status"]
105 output = run_git_command(diff_args, cwd=repo_path)
107 for line in output.split("\n"):
108 if not line:
109 continue
110 parts = line.split("\t", 1)
111 if len(parts) != 2:
112 continue
113 status, filepath = parts
115 if status == "A":
116 changes["added"].append(filepath)
117 elif status == "M":
118 changes["modified"].append(filepath)
119 elif status == "D":
120 changes["deleted"].append(filepath)
121 elif status.startswith("R"):
122 # Renamed file - treat as modified
123 changes["modified"].append(filepath)
125 return changes
128_CONFIG_FILES: frozenset[str] = frozenset(
129 {
130 "Makefile",
131 "ruff.toml",
132 "pytest.ini",
133 ".editorconfig",
134 ".gitignore",
135 ".pre-commit-config.yaml",
136 "renovate.json",
137 ".python-version",
138 }
139)
142_DIR_CATEGORIES: dict[str, str] = {
143 "tests": "Tests",
144 "src": "Source Code",
145}
146_DOC_DIRS: frozenset[str] = frozenset({"book", "docs"})
149def _categorize_by_directory(first_dir: str, filepath: str) -> str | None:
150 """Categorize file based on its first directory.
152 Args:
153 first_dir: First directory in the path
154 filepath: Full file path
156 Returns:
157 Category name or None if no match
158 """
159 if first_dir == ".github":
160 path_parts = Path(filepath).parts
161 if len(path_parts) > 1 and path_parts[1] == "workflows":
162 return "GitHub Actions Workflows"
163 return "GitHub Configuration"
165 if first_dir == ".rhiza":
166 if "script" in filepath.lower():
167 return "Rhiza Scripts"
168 if "Makefile" in filepath:
169 return "Makefiles"
170 return "Rhiza Configuration"
172 if first_dir in _DIR_CATEGORIES:
173 return _DIR_CATEGORIES[first_dir]
175 if first_dir in _DOC_DIRS:
176 return "Documentation"
178 return None
181def _categorize_single_file(filepath: str) -> str:
182 """Categorize a single file path.
184 Args:
185 filepath: File path to categorize
187 Returns:
188 Category name
189 """
190 path_parts = Path(filepath).parts
192 if not path_parts:
193 return "Other"
195 # Try directory-based categorization first
196 category = _categorize_by_directory(path_parts[0], filepath)
197 if category:
198 return category
200 # Check file-based categories
201 if filepath.endswith(".md"):
202 return "Documentation"
204 if filepath in _CONFIG_FILES:
205 return "Configuration Files"
207 return "Other"
210def categorize_files(files: list[str]) -> dict[str, list[str]]:
211 """Categorize files by type.
213 Args:
214 files: List of file paths
216 Returns:
217 Dictionary mapping category names to file lists
218 """
219 categories = defaultdict(list)
221 for filepath in files:
222 category = _categorize_single_file(filepath)
223 categories[category].append(filepath)
225 return dict(categories)
228def get_template_info(repo_path: Path) -> tuple[str, str]:
229 """Get template repository and branch from template.lock or template.yml.
231 Prefers ``template.lock`` as the authoritative record of the last sync.
232 Falls back to ``template.yml`` if the lock file is absent or incomplete.
233 Returns empty strings when no configuration is found, rather than
234 defaulting to any hardcoded repository name.
236 Args:
237 repo_path: Path to the repository
239 Returns:
240 Tuple of (template_repo, template_branch)
241 """
242 # Prefer template.lock - it is the authoritative record of what was synced
243 lock_file = repo_path / ".rhiza" / "template.lock"
244 if lock_file.exists():
245 try:
246 lock = TemplateLock.from_yaml(lock_file)
247 if lock.repo:
248 return lock.repo, lock.ref
249 except (yaml.YAMLError, ValueError, TypeError, KeyError):
250 logger.warning("Failed to read template.lock; falling back to template.yml")
252 # Fall back to template.yml, using the proper model which handles both
253 # 'template-repository'/'repository' and 'template-branch'/'ref' key variants
254 template_file = repo_path / ".rhiza" / "template.yml"
255 if not template_file.exists():
256 return ("", "")
258 try:
259 template = RhizaTemplate.from_yaml(template_file)
260 except (yaml.YAMLError, ValueError, TypeError, KeyError):
261 logger.warning("Failed to read template.yml")
262 return ("", "")
264 return template.template_repository, template.template_branch
267def get_last_sync_date(repo_path: Path, template_repo: str = "") -> str | None:
268 """Get the date of the last sync.
270 Checks ``template.lock`` for a recorded sync timestamp first, then falls
271 back to searching the git log. The template repository name (when given)
272 is used to build more accurate grep patterns so that projects using a
273 non-rhiza template are still matched correctly.
275 Args:
276 repo_path: Path to the repository
277 template_repo: Template repository name (e.g. ``"my-org/my-template"``)
278 used to derive the short name for git-log grep patterns.
280 Returns:
281 ISO format date string or None if not found
282 """
283 # Prefer template.lock synced_at - it is the most reliable source
284 lock_file = repo_path / ".rhiza" / "template.lock"
285 if lock_file.exists():
286 try:
287 lock = TemplateLock.from_yaml(lock_file)
288 if lock.synced_at:
289 return lock.synced_at
290 except (yaml.YAMLError, ValueError, TypeError, KeyError):
291 pass
293 # Derive the short name from the template repo for targeted grepping
294 template_short_name = template_repo.rsplit("/", 1)[-1] if template_repo else ""
296 grep_args = ["log", "--format=%cI", "-1"]
297 if template_short_name:
298 grep_args.extend(["--grep", template_short_name])
299 grep_args.extend(["--grep=Sync", "--grep=template", "-i"])
301 output = run_git_command(grep_args, cwd=repo_path)
302 if output:
303 return output
305 # Fallback: try to get date from history file if it exists
306 history_file = repo_path / ".rhiza" / "history"
307 if history_file.exists():
308 # Get the file modification time
309 stat = history_file.stat()
310 return datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat()
312 return None
315def _format_file_list(files: list[str], status_emoji: str) -> list[str]:
316 """Format a list of files with the given status emoji.
318 Args:
319 files: List of file paths
320 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted)
322 Returns:
323 List of formatted lines
324 """
325 lines = []
326 for f in sorted(files):
327 lines.append(f"- {status_emoji} `{f}`")
328 return lines
331def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None:
332 """Add a collapsible section for a category and change type.
334 Args:
335 lines: List to append lines to
336 title: Section title (e.g., "Added", "Modified")
337 count: Number of files
338 files: List of file paths
339 emoji: Status emoji
340 """
341 if not files:
342 return
344 lines.append("<details>")
345 lines.append(f"<summary>{title} ({count})</summary>")
346 lines.append("")
347 lines.extend(_format_file_list(files, emoji))
348 lines.append("")
349 lines.append("</details>")
350 lines.append("")
353def _build_header(template_repo: str, title: str | None = None) -> list[str]:
354 """Build the PR description header.
356 Args:
357 template_repo: Template repository name
358 title: Optional override for the section heading
360 Returns:
361 List of header lines
362 """
363 header_title = title if title else "## 🔄 Template Synchronization"
364 lines = [header_title, ""]
365 if template_repo:
366 url = f"https://github.com/{template_repo}"
367 repo_link = f"[{template_repo}]({url})"
368 sync_line = f"This PR synchronizes the repository with the {repo_link} template."
369 lines.append(sync_line)
370 else:
371 lines.append("This PR synchronizes the repository with the upstream template.")
372 lines.append("")
373 return lines
376def _build_summary(changes: dict[str, list[str]]) -> list[str]:
377 """Build the change summary section.
379 Args:
380 changes: Dictionary of changes by type
382 Returns:
383 List of summary lines
384 """
385 return [
386 "### 📊 Change Summary",
387 "",
388 f"- **{len(changes['added'])}** files added",
389 f"- **{len(changes['modified'])}** files modified",
390 f"- **{len(changes['deleted'])}** files deleted",
391 "",
392 ]
395def _build_footer(tmpl: _TemplateInfo) -> list[str]:
396 """Build the PR description footer with metadata.
398 Args:
399 tmpl: Template metadata container
401 Returns:
402 List of footer lines
403 """
404 lines = [
405 "---",
406 "",
407 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**",
408 "",
409 ]
410 if tmpl.repo:
411 lines.append(f"- Template: `{tmpl.repo}@{tmpl.branch}`")
412 if tmpl.last_sync:
413 lines.append(f"- Last sync: {tmpl.last_sync}")
414 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}")
415 return lines
418def _generate_json_output(
419 changes: dict[str, list[str]],
420 categories: dict[str, list[str]],
421 tmpl: _TemplateInfo,
422) -> str:
423 """Generate a JSON representation of the change data.
425 Args:
426 changes: Dictionary of changes by type
427 categories: Files grouped by category
428 tmpl: Template metadata container
430 Returns:
431 JSON-formatted string
432 """
433 data = {
434 "template_repo": tmpl.repo,
435 "template_branch": tmpl.branch,
436 "last_sync": tmpl.last_sync,
437 "sync_date": datetime.now().astimezone().isoformat(),
438 "changes": changes,
439 "categories": categories,
440 }
441 return _json.dumps(data, indent=2)
444def _plain_file_section(lines: list[str], label: str, files: list[str]) -> None:
445 """Append a labelled block of files to *lines* in plain-text format.
447 Args:
448 lines: List to append lines to
449 label: Section label (e.g. "Added")
450 files: List of file paths
451 """
452 if not files:
453 return
454 lines.append(f"{label}:")
455 lines.extend(f" {f}" for f in sorted(files))
456 lines.append("")
459def _generate_plain_output(
460 changes: dict[str, list[str]],
461 categories: dict[str, list[str]],
462 tmpl: _TemplateInfo,
463 options: SummariseOptions,
464) -> str:
465 """Generate plain-text output from change data.
467 Args:
468 changes: Dictionary of changes by type
469 categories: Files grouped by category
470 tmpl: Template metadata container
471 options: Output customisation options
473 Returns:
474 Plain-text formatted string
475 """
476 lines: list[str] = []
478 if options.include_header:
479 heading = options.title or "Template Synchronization"
480 lines.extend([heading, "=" * len(heading), ""])
481 if tmpl.repo:
482 lines.append(f"Template: {tmpl.repo}@{tmpl.branch}")
483 lines.append("")
485 total = sum(len(v) for v in changes.values())
486 if not total:
487 lines.append("No changes detected.")
488 return "\n".join(lines)
490 lines.append(
491 f"Changes: {len(changes['added'])} added, "
492 f"{len(changes['modified'])} modified, "
493 f"{len(changes['deleted'])} deleted",
494 )
495 lines.append("")
497 if options.include_categories:
498 for category, files in sorted(categories.items()):
499 lines.append(f"{category}:")
500 lines.extend(f" {f}" for f in sorted(files))
501 lines.append("")
502 else:
503 for label, files in [
504 ("Added", changes["added"]),
505 ("Modified", changes["modified"]),
506 ("Deleted", changes["deleted"]),
507 ]:
508 _plain_file_section(lines, label, files)
510 if options.include_footer:
511 if tmpl.last_sync:
512 lines.append(f"Last sync: {tmpl.last_sync}")
513 lines.append(f"Sync date: {datetime.now().astimezone().isoformat()}")
515 return "\n".join(lines)
518def _generate_jinja2_output(template_path: Path, context: dict) -> str:
519 """Render output using a custom Jinja2 template file.
521 The *context* dict is passed directly to the template. It should contain at
522 minimum: ``template_repo``, ``template_branch``, ``last_sync``, ``sync_date``,
523 ``changes``, ``categories``, and ``title``.
525 Note:
526 Autoescape is disabled because this function generates plain text / Markdown,
527 not HTML. Do **not** use the rendered output directly in a web context without
528 first escaping it, as the template content is not sanitised for HTML.
530 Args:
531 template_path: Path to the Jinja2 template file
532 context: Template context variables
534 Returns:
535 Rendered template string
536 """
537 template_text = template_path.read_text(encoding="utf-8")
538 env = jinja2.Environment( # nosec B701
539 autoescape=False, # noqa: S701
540 loader=jinja2.BaseLoader(),
541 )
542 return env.from_string(template_text).render(**context)
545def _markdown_body(
546 changes: dict[str, list[str]],
547 categories: dict[str, list[str]],
548 tmpl: _TemplateInfo,
549 options: SummariseOptions,
550) -> str:
551 """Build the markdown PR description body.
553 Args:
554 changes: Dictionary of changes by type
555 categories: Files grouped by category
556 tmpl: Template metadata container
557 options: Output customisation options
559 Returns:
560 Markdown-formatted string
561 """
562 lines: list[str] = []
564 if options.include_header:
565 lines.extend(_build_header(tmpl.repo, title=options.title))
567 total_changes = sum(len(files) for files in changes.values())
568 if not total_changes:
569 lines.append("No changes detected.")
570 if options.include_footer:
571 lines.append("")
572 lines.extend(_build_footer(tmpl))
573 return "\n".join(lines)
575 lines.extend(_build_summary(changes))
577 if options.include_categories and categories:
578 lines.append("### 📁 Changes by Category")
579 lines.append("")
581 for category, files in sorted(categories.items()):
582 lines.append(f"#### {category}")
583 lines.append("")
585 category_added = [f for f in files if f in changes["added"]]
586 category_modified = [f for f in files if f in changes["modified"]]
587 category_deleted = [f for f in files if f in changes["deleted"]]
589 _add_category_section(lines, "Added", len(category_added), category_added, "✅")
590 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝")
591 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌")
593 elif not options.include_categories:
594 lines.append("### 📁 Changed Files")
595 lines.append("")
596 _add_category_section(lines, "Added", len(changes["added"]), changes["added"], "✅")
597 _add_category_section(lines, "Modified", len(changes["modified"]), changes["modified"], "📝")
598 _add_category_section(lines, "Deleted", len(changes["deleted"]), changes["deleted"], "❌")
600 if options.include_footer:
601 lines.extend(_build_footer(tmpl))
603 return "\n".join(lines)
606def generate_pr_description(repo_path: Path, options: SummariseOptions | None = None) -> str:
607 """Generate PR description based on staged changes.
609 Args:
610 repo_path: Path to the repository
611 options: Output customisation options. Defaults to :class:`SummariseOptions`
612 with all fields at their defaults (markdown format, with header / footer /
613 categories, no custom title, staged-index diff).
615 Returns:
616 Formatted PR description
617 """
618 opts = options or SummariseOptions()
620 changes = get_staged_changes(repo_path, compare_ref=opts.compare_ref)
621 template_repo, template_branch = get_template_info(repo_path)
622 last_sync = get_last_sync_date(repo_path, template_repo=template_repo)
624 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"]
625 categories = categorize_files(all_changed_files) if all_changed_files else {}
627 tmpl = _TemplateInfo(repo=template_repo, branch=template_branch, last_sync=last_sync)
629 # Custom Jinja2 template takes full precedence over all other options
630 if opts.jinja2_template:
631 context = {
632 "template_repo": tmpl.repo,
633 "template_branch": tmpl.branch,
634 "last_sync": tmpl.last_sync,
635 "sync_date": datetime.now().astimezone().isoformat(),
636 "changes": changes,
637 "categories": categories,
638 "title": opts.title,
639 }
640 return _generate_jinja2_output(opts.jinja2_template, context)
642 if opts.output_format == "json":
643 return _generate_json_output(changes, categories, tmpl)
645 if opts.output_format == "plain":
646 return _generate_plain_output(changes, categories, tmpl, opts)
648 return _markdown_body(changes, categories, tmpl, opts)
651def summarise(
652 target: Path,
653 output: Path | None = None,
654 *,
655 options: SummariseOptions | None = None,
656) -> None:
657 """Generate a summary of staged changes for rhiza sync operations.
659 This command analyzes staged git changes and generates a structured
660 PR description with:
661 - Summary statistics (files added/modified/deleted)
662 - Changes categorized by type (workflows, configs, docs, tests, etc.)
663 - Template repository information
664 - Last sync date
666 Args:
667 target: Path to the target repository.
668 output: Optional output file path. If not provided, prints to stdout.
669 options: Output customisation options. Defaults to :class:`SummariseOptions`
670 with all fields at their defaults.
671 """
672 target = target.resolve()
673 logger.info(f"Target repository: {target}")
675 # Check if target is a git repository
676 if not (target / ".git").is_dir():
677 err_msg = f"Target directory is not a git repository: {target}"
678 logger.error(err_msg)
679 logger.error("Initialize a git repository with 'git init' first")
680 raise RuntimeError(err_msg)
682 description = generate_pr_description(target, options)
684 if output:
685 output_path = output.resolve()
686 output_path.write_text(description, encoding="utf-8")
687 logger.success(f"PR description written to {output_path}")
688 else:
689 print(description)
691 logger.success("Summary generated successfully")