Coverage for src / rhiza / commands / summarise.py: 100%
276 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-06-15 18:22 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-06-15 18:22 +0000
1"""Command for generating PR descriptions from staged changes.
3This module provides functionality to analyze staged git changes and generate
4structured PR descriptions for rhiza sync operations.
5"""
7import json as _json
8import subprocess # nosec B404
9from collections import defaultdict
10from dataclasses import dataclass, field
11from datetime import UTC, datetime
12from pathlib import Path
13from typing import Any, NamedTuple
15import jinja2
16import yaml
17from loguru import logger
19from rhiza.models.lock import TemplateLock
20from rhiza.models.template import RhizaTemplate
23@dataclass(kw_only=True)
24class SummariseOptions:
25 """Options controlling the output of :func:`generate_pr_description`.
27 All fields are keyword-only and default to the standard behaviour so
28 callers only need to set the fields they want to override.
29 """
31 include_header: bool = True
32 """Whether to include the header section (markdown / plain formats)."""
34 include_footer: bool = True
35 """Whether to include the footer section (markdown / plain formats)."""
37 include_categories: bool = True
38 """Whether to group changes by category; when ``False`` a flat list is shown."""
40 output_format: str = "markdown"
41 """Output format: ``"markdown"`` (default), ``"plain"``, or ``"json"``."""
43 title: str | None = None
44 """Override the section heading; ``None`` uses the built-in default."""
46 compare_ref: str | None = None
47 """Compare against this git ref instead of the staged index."""
49 jinja2_template: Path | None = field(default=None)
50 """Path to a Jinja2 template file for fully custom output."""
53class _TemplateInfo(NamedTuple):
54 """Lightweight container for template metadata used during rendering."""
56 repo: str
57 branch: str
58 last_sync: str | None
61def run_git_command(args: list[str], cwd: Path | None = None) -> str:
62 """Run a git command and return the output.
64 Args:
65 args: Git command arguments (without 'git' prefix)
66 cwd: Working directory for the command
68 Returns:
69 Command output as string
70 """
71 try:
72 result = subprocess.run( # nosec B603 B607 # noqa: S603
73 ["git", *args], # noqa: S607
74 cwd=cwd,
75 capture_output=True,
76 text=True,
77 check=True,
78 )
79 return result.stdout.strip()
80 except subprocess.CalledProcessError as e:
81 logger.error(f"Error running git {' '.join(args)}: {e.stderr}")
82 return ""
85def get_staged_changes(repo_path: Path, compare_ref: str | None = None) -> dict[str, list[str]]:
86 """Get list of changes categorized by type.
88 Args:
89 repo_path: Path to the repository
90 compare_ref: Optional git ref to compare against. When provided the
91 working tree is diffed against this ref instead of the staged index.
93 Returns:
94 Dictionary with keys 'added', 'modified', 'deleted' containing file lists
95 """
96 changes: dict[str, list[str]] = {
97 "added": [],
98 "modified": [],
99 "deleted": [],
100 }
102 # Compare against a specific ref, or fall back to staged changes
103 diff_args = ["diff", compare_ref, "--name-status"] if compare_ref else ["diff", "--cached", "--name-status"]
105 output = run_git_command(diff_args, cwd=repo_path)
107 for line in output.split("\n"):
108 if not line:
109 continue
110 parts = line.split("\t", 1)
111 if len(parts) != 2:
112 continue
113 status, filepath = parts
115 if status == "A":
116 changes["added"].append(filepath)
117 elif status == "M":
118 changes["modified"].append(filepath)
119 elif status == "D":
120 changes["deleted"].append(filepath)
121 elif status.startswith("R"):
122 # Renamed file - treat as modified
123 changes["modified"].append(filepath)
125 return changes
128_CONFIG_FILES: frozenset[str] = frozenset(
129 {
130 "Makefile",
131 "ruff.toml",
132 "pytest.ini",
133 ".editorconfig",
134 ".gitignore",
135 ".pre-commit-config.yaml",
136 "renovate.json",
137 ".python-version",
138 }
139)
142_DIR_CATEGORIES: dict[str, str] = {
143 "tests": "Tests",
144 "src": "Source Code",
145}
146_DOC_DIRS: frozenset[str] = frozenset({"book", "docs"})
149def _categorize_by_directory(first_dir: str, filepath: str) -> str | None:
150 """Categorize file based on its first directory.
152 Args:
153 first_dir: First directory in the path
154 filepath: Full file path
156 Returns:
157 Category name or None if no match
158 """
159 if first_dir == ".github":
160 path_parts = Path(filepath).parts
161 if len(path_parts) > 1 and path_parts[1] == "workflows":
162 return "GitHub Actions Workflows"
163 return "GitHub Configuration"
165 if first_dir == ".rhiza":
166 if "script" in filepath.lower():
167 return "Rhiza Scripts"
168 if "Makefile" in filepath:
169 return "Makefiles"
170 return "Rhiza Configuration"
172 if first_dir in _DIR_CATEGORIES:
173 return _DIR_CATEGORIES[first_dir]
175 if first_dir in _DOC_DIRS:
176 return "Documentation"
178 return None
181def _categorize_single_file(filepath: str) -> str:
182 """Categorize a single file path.
184 Args:
185 filepath: File path to categorize
187 Returns:
188 Category name
189 """
190 path_parts = Path(filepath).parts
192 if not path_parts:
193 return "Other"
195 # Try directory-based categorization first
196 category = _categorize_by_directory(path_parts[0], filepath)
197 if category:
198 return category
200 # Check file-based categories
201 if filepath.endswith(".md"):
202 return "Documentation"
204 if filepath in _CONFIG_FILES:
205 return "Configuration Files"
207 return "Other"
210def categorize_files(files: list[str]) -> dict[str, list[str]]:
211 """Categorize files by type.
213 Args:
214 files: List of file paths
216 Returns:
217 Dictionary mapping category names to file lists
218 """
219 categories = defaultdict(list)
221 for filepath in files:
222 category = _categorize_single_file(filepath)
223 categories[category].append(filepath)
225 return dict(categories)
228def get_template_info(repo_path: Path) -> tuple[str, str]:
229 """Get template repository and branch from template.lock or template.yml.
231 Prefers ``template.lock`` as the authoritative record of the last sync.
232 Falls back to ``template.yml`` if the lock file is absent or incomplete.
233 Returns empty strings when no configuration is found, rather than
234 defaulting to any hardcoded repository name.
236 Args:
237 repo_path: Path to the repository
239 Returns:
240 Tuple of (template_repo, template_branch)
241 """
242 # Prefer template.lock - it is the authoritative record of what was synced
243 lock_file = repo_path / ".rhiza" / "template.lock"
244 if lock_file.exists():
245 try:
246 lock = TemplateLock.from_yaml(lock_file)
247 if lock.repo:
248 return lock.repo, lock.ref
249 except (yaml.YAMLError, ValueError, TypeError, KeyError):
250 logger.warning("Failed to read template.lock; falling back to template.yml")
252 # Fall back to template.yml, using the proper model which handles both
253 # 'template-repository'/'repository' and 'template-branch'/'ref' key variants
254 template_file = repo_path / ".rhiza" / "template.yml"
255 if not template_file.exists():
256 return ("", "")
258 try:
259 template = RhizaTemplate.from_yaml(template_file)
260 except (yaml.YAMLError, ValueError, TypeError, KeyError):
261 logger.warning("Failed to read template.yml")
262 return ("", "")
264 return template.template_repository, template.template_branch
267def get_last_sync_date(repo_path: Path, template_repo: str = "") -> str | None:
268 """Get the date of the last sync.
270 Checks ``template.lock`` for a recorded sync timestamp first, then falls
271 back to searching the git log. The template repository name (when given)
272 is used to build more accurate grep patterns so that projects using a
273 non-rhiza template are still matched correctly.
275 Args:
276 repo_path: Path to the repository
277 template_repo: Template repository name (e.g. ``"my-org/my-template"``)
278 used to derive the short name for git-log grep patterns.
280 Returns:
281 ISO format date string or None if not found
282 """
283 # Prefer template.lock synced_at - it is the most reliable source
284 lock_file = repo_path / ".rhiza" / "template.lock"
285 if lock_file.exists():
286 try:
287 lock = TemplateLock.from_yaml(lock_file)
288 if lock.synced_at:
289 return lock.synced_at
290 except (yaml.YAMLError, ValueError, TypeError, KeyError):
291 pass
293 # Derive the short name from the template repo for targeted grepping
294 template_short_name = template_repo.rsplit("/", 1)[-1] if template_repo else ""
296 grep_args = ["log", "--format=%cI", "-1"]
297 if template_short_name:
298 grep_args.extend(["--grep", template_short_name])
299 grep_args.extend(["--grep=Sync", "--grep=template", "-i"])
301 output = run_git_command(grep_args, cwd=repo_path)
302 if output:
303 return output
305 # Fallback: try to get date from history file if it exists
306 history_file = repo_path / ".rhiza" / "history"
307 if history_file.exists():
308 # Get the file modification time
309 stat = history_file.stat()
310 return datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat()
312 return None
315def _format_file_list(files: list[str], status_emoji: str) -> list[str]:
316 """Format a list of files with the given status emoji.
318 Args:
319 files: List of file paths
320 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted)
322 Returns:
323 List of formatted lines
324 """
325 lines = []
326 for f in sorted(files):
327 lines.append(f"- {status_emoji} `{f}`")
328 return lines
331def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None:
332 """Add a collapsible section for a category and change type.
334 Args:
335 lines: List to append lines to
336 title: Section title (e.g., "Added", "Modified")
337 count: Number of files
338 files: List of file paths
339 emoji: Status emoji
340 """
341 if not files:
342 return
344 lines.append("<details>")
345 lines.append(f"<summary>{title} ({count})</summary>")
346 lines.append("")
347 lines.extend(_format_file_list(files, emoji))
348 lines.append("")
349 lines.append("</details>")
350 lines.append("")
353def _build_header(template_repo: str, title: str | None = None) -> list[str]:
354 """Build the PR description header.
356 Args:
357 template_repo: Template repository name
358 title: Optional override for the section heading
360 Returns:
361 List of header lines
362 """
363 header_title = title if title else "## 🔄 Template Synchronization"
364 lines = [header_title, ""]
365 if template_repo:
366 url = f"https://github.com/{template_repo}"
367 repo_link = f"[{template_repo}]({url})"
368 sync_line = f"This PR synchronizes the repository with the {repo_link} template."
369 lines.append(sync_line)
370 else:
371 lines.append("This PR synchronizes the repository with the upstream template.")
372 lines.append("")
373 return lines
376def _build_summary(changes: dict[str, list[str]]) -> list[str]:
377 """Build the change summary section.
379 Args:
380 changes: Dictionary of changes by type
382 Returns:
383 List of summary lines
384 """
385 return [
386 "### 📊 Change Summary",
387 "",
388 f"- **{len(changes['added'])}** files added",
389 f"- **{len(changes['modified'])}** files modified",
390 f"- **{len(changes['deleted'])}** files deleted",
391 "",
392 ]
395def _build_footer(tmpl: _TemplateInfo) -> list[str]:
396 """Build the PR description footer with metadata.
398 Args:
399 tmpl: Template metadata container
401 Returns:
402 List of footer lines
403 """
404 lines = [
405 "---",
406 "",
407 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**",
408 "",
409 ]
410 if tmpl.repo:
411 lines.append(f"- Template: `{tmpl.repo}@{tmpl.branch}`")
412 if tmpl.last_sync:
413 lines.append(f"- Last sync: {tmpl.last_sync}")
414 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}")
415 return lines
418def _generate_json_output(
419 changes: dict[str, list[str]],
420 categories: dict[str, list[str]],
421 tmpl: _TemplateInfo,
422) -> str:
423 """Generate a JSON representation of the change data.
425 Args:
426 changes: Dictionary of changes by type
427 categories: Files grouped by category
428 tmpl: Template metadata container
430 Returns:
431 JSON-formatted string
432 """
433 data = {
434 "template_repo": tmpl.repo,
435 "template_branch": tmpl.branch,
436 "last_sync": tmpl.last_sync,
437 "sync_date": datetime.now().astimezone().isoformat(),
438 "changes": changes,
439 "categories": categories,
440 }
441 return _json.dumps(data, indent=2)
444def _plain_file_section(lines: list[str], label: str, files: list[str]) -> None:
445 """Append a labelled block of files to *lines* in plain-text format.
447 Args:
448 lines: List to append lines to
449 label: Section label (e.g. "Added")
450 files: List of file paths
451 """
452 if not files:
453 return
454 lines.append(f"{label}:")
455 lines.extend(f" {f}" for f in sorted(files))
456 lines.append("")
459def _generate_plain_output(
460 changes: dict[str, list[str]],
461 categories: dict[str, list[str]],
462 tmpl: _TemplateInfo,
463 options: SummariseOptions,
464) -> str:
465 """Generate plain-text output from change data.
467 Args:
468 changes: Dictionary of changes by type
469 categories: Files grouped by category
470 tmpl: Template metadata container
471 options: Output customisation options
473 Returns:
474 Plain-text formatted string
475 """
476 lines: list[str] = []
478 if options.include_header:
479 heading = options.title or "Template Synchronization"
480 lines.extend([heading, "=" * len(heading), ""])
481 if tmpl.repo:
482 lines.append(f"Template: {tmpl.repo}@{tmpl.branch}")
483 lines.append("")
485 total = sum(len(v) for v in changes.values())
486 if not total:
487 lines.append("No changes detected.")
488 return "\n".join(lines)
490 lines.append(
491 f"Changes: {len(changes['added'])} added, "
492 f"{len(changes['modified'])} modified, "
493 f"{len(changes['deleted'])} deleted",
494 )
495 lines.append("")
497 if options.include_categories:
498 for category, files in sorted(categories.items()):
499 lines.append(f"{category}:")
500 lines.extend(f" {f}" for f in sorted(files))
501 lines.append("")
502 else:
503 for label, files in [
504 ("Added", changes["added"]),
505 ("Modified", changes["modified"]),
506 ("Deleted", changes["deleted"]),
507 ]:
508 _plain_file_section(lines, label, files)
510 if options.include_footer:
511 if tmpl.last_sync:
512 lines.append(f"Last sync: {tmpl.last_sync}")
513 lines.append(f"Sync date: {datetime.now().astimezone().isoformat()}")
515 return "\n".join(lines)
518def _generate_jinja2_output(template_path: Path, context: dict[str, Any]) -> str:
519 """Render output using a custom Jinja2 template file.
521 The *context* dict is passed directly to the template. It should contain at
522 minimum: ``template_repo``, ``template_branch``, ``last_sync``, ``sync_date``,
523 ``changes``, ``categories``, and ``title``.
525 Note:
526 Autoescape is disabled because this function generates plain text / Markdown,
527 not HTML. Do **not** use the rendered output directly in a web context without
528 first escaping it, as the template content is not sanitised for HTML.
530 Args:
531 template_path: Path to the Jinja2 template file
532 context: Template context variables
534 Returns:
535 Rendered template string
536 """
537 template_text = template_path.read_text(encoding="utf-8")
538 env = jinja2.Environment( # nosec B701
539 autoescape=False, # noqa: S701
540 loader=jinja2.BaseLoader(),
541 keep_trailing_newline=True,
542 )
543 return env.from_string(template_text).render(**context)
546def _markdown_body(
547 changes: dict[str, list[str]],
548 categories: dict[str, list[str]],
549 tmpl: _TemplateInfo,
550 options: SummariseOptions,
551) -> str:
552 """Build the markdown PR description body.
554 Args:
555 changes: Dictionary of changes by type
556 categories: Files grouped by category
557 tmpl: Template metadata container
558 options: Output customisation options
560 Returns:
561 Markdown-formatted string
562 """
563 lines: list[str] = []
565 if options.include_header:
566 lines.extend(_build_header(tmpl.repo, title=options.title))
568 total_changes = sum(len(files) for files in changes.values())
569 if not total_changes:
570 lines.append("No changes detected.")
571 if options.include_footer:
572 lines.append("")
573 lines.extend(_build_footer(tmpl))
574 return "\n".join(lines)
576 lines.extend(_build_summary(changes))
578 if options.include_categories and categories:
579 lines.append("### 📁 Changes by Category")
580 lines.append("")
582 for category, files in sorted(categories.items()):
583 lines.append(f"#### {category}")
584 lines.append("")
586 category_added = [f for f in files if f in changes["added"]]
587 category_modified = [f for f in files if f in changes["modified"]]
588 category_deleted = [f for f in files if f in changes["deleted"]]
590 _add_category_section(lines, "Added", len(category_added), category_added, "✅")
591 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝")
592 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌")
594 elif not options.include_categories:
595 lines.append("### 📁 Changed Files")
596 lines.append("")
597 _add_category_section(lines, "Added", len(changes["added"]), changes["added"], "✅")
598 _add_category_section(lines, "Modified", len(changes["modified"]), changes["modified"], "📝")
599 _add_category_section(lines, "Deleted", len(changes["deleted"]), changes["deleted"], "❌")
601 if options.include_footer:
602 lines.extend(_build_footer(tmpl))
604 return "\n".join(lines)
607def generate_pr_description(repo_path: Path, options: SummariseOptions | None = None) -> str:
608 """Generate PR description based on staged changes.
610 Args:
611 repo_path: Path to the repository
612 options: Output customisation options. Defaults to :class:`SummariseOptions`
613 with all fields at their defaults (markdown format, with header / footer /
614 categories, no custom title, staged-index diff).
616 Returns:
617 Formatted PR description
618 """
619 opts = options or SummariseOptions()
621 changes = get_staged_changes(repo_path, compare_ref=opts.compare_ref)
622 template_repo, template_branch = get_template_info(repo_path)
623 last_sync = get_last_sync_date(repo_path, template_repo=template_repo)
625 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"]
626 categories = categorize_files(all_changed_files) if all_changed_files else {}
628 tmpl = _TemplateInfo(repo=template_repo, branch=template_branch, last_sync=last_sync)
630 # Custom Jinja2 template takes full precedence over all other options
631 if opts.jinja2_template:
632 context = {
633 "template_repo": tmpl.repo,
634 "template_branch": tmpl.branch,
635 "last_sync": tmpl.last_sync,
636 "sync_date": datetime.now().astimezone().isoformat(),
637 "changes": changes,
638 "categories": categories,
639 "title": opts.title,
640 }
641 return _generate_jinja2_output(opts.jinja2_template, context)
643 if opts.output_format == "json":
644 return _generate_json_output(changes, categories, tmpl)
646 if opts.output_format == "plain":
647 return _generate_plain_output(changes, categories, tmpl, opts)
649 return _markdown_body(changes, categories, tmpl, opts)
652def summarise(
653 target: Path,
654 output: Path | None = None,
655 *,
656 options: SummariseOptions | None = None,
657) -> None:
658 """Generate a summary of staged changes for rhiza sync operations.
660 This command analyzes staged git changes and generates a structured
661 PR description with:
662 - Summary statistics (files added/modified/deleted)
663 - Changes categorized by type (workflows, configs, docs, tests, etc.)
664 - Template repository information
665 - Last sync date
667 Args:
668 target: Path to the target repository.
669 output: Optional output file path. If not provided, prints to stdout.
670 options: Output customisation options. Defaults to :class:`SummariseOptions`
671 with all fields at their defaults.
672 """
673 target = target.resolve()
674 logger.info(f"Target repository: {target}")
676 # Check if target is a git repository
677 if not (target / ".git").is_dir():
678 err_msg = f"Target directory is not a git repository: {target}"
679 logger.error(err_msg)
680 logger.error("Initialize a git repository with 'git init' first")
681 raise RuntimeError(err_msg)
683 description = generate_pr_description(target, options)
685 if output:
686 output_path = output.resolve()
687 output_path.write_text(description, encoding="utf-8")
688 logger.success(f"PR description written to {output_path}")
689 else:
690 print(description)
692 logger.success("Summary generated successfully")