Coverage for src / rhiza / commands / summarise.py: 100%

276 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-06-15 18:22 +0000

1"""Command for generating PR descriptions from staged changes. 

2 

3This module provides functionality to analyze staged git changes and generate 

4structured PR descriptions for rhiza sync operations. 

5""" 

6 

7import json as _json 

8import subprocess # nosec B404 

9from collections import defaultdict 

10from dataclasses import dataclass, field 

11from datetime import UTC, datetime 

12from pathlib import Path 

13from typing import Any, NamedTuple 

14 

15import jinja2 

16import yaml 

17from loguru import logger 

18 

19from rhiza.models.lock import TemplateLock 

20from rhiza.models.template import RhizaTemplate 

21 

22 

23@dataclass(kw_only=True) 

24class SummariseOptions: 

25 """Options controlling the output of :func:`generate_pr_description`. 

26 

27 All fields are keyword-only and default to the standard behaviour so 

28 callers only need to set the fields they want to override. 

29 """ 

30 

31 include_header: bool = True 

32 """Whether to include the header section (markdown / plain formats).""" 

33 

34 include_footer: bool = True 

35 """Whether to include the footer section (markdown / plain formats).""" 

36 

37 include_categories: bool = True 

38 """Whether to group changes by category; when ``False`` a flat list is shown.""" 

39 

40 output_format: str = "markdown" 

41 """Output format: ``"markdown"`` (default), ``"plain"``, or ``"json"``.""" 

42 

43 title: str | None = None 

44 """Override the section heading; ``None`` uses the built-in default.""" 

45 

46 compare_ref: str | None = None 

47 """Compare against this git ref instead of the staged index.""" 

48 

49 jinja2_template: Path | None = field(default=None) 

50 """Path to a Jinja2 template file for fully custom output.""" 

51 

52 

53class _TemplateInfo(NamedTuple): 

54 """Lightweight container for template metadata used during rendering.""" 

55 

56 repo: str 

57 branch: str 

58 last_sync: str | None 

59 

60 

61def run_git_command(args: list[str], cwd: Path | None = None) -> str: 

62 """Run a git command and return the output. 

63 

64 Args: 

65 args: Git command arguments (without 'git' prefix) 

66 cwd: Working directory for the command 

67 

68 Returns: 

69 Command output as string 

70 """ 

71 try: 

72 result = subprocess.run( # nosec B603 B607 # noqa: S603 

73 ["git", *args], # noqa: S607 

74 cwd=cwd, 

75 capture_output=True, 

76 text=True, 

77 check=True, 

78 ) 

79 return result.stdout.strip() 

80 except subprocess.CalledProcessError as e: 

81 logger.error(f"Error running git {' '.join(args)}: {e.stderr}") 

82 return "" 

83 

84 

85def get_staged_changes(repo_path: Path, compare_ref: str | None = None) -> dict[str, list[str]]: 

86 """Get list of changes categorized by type. 

87 

88 Args: 

89 repo_path: Path to the repository 

90 compare_ref: Optional git ref to compare against. When provided the 

91 working tree is diffed against this ref instead of the staged index. 

92 

93 Returns: 

94 Dictionary with keys 'added', 'modified', 'deleted' containing file lists 

95 """ 

96 changes: dict[str, list[str]] = { 

97 "added": [], 

98 "modified": [], 

99 "deleted": [], 

100 } 

101 

102 # Compare against a specific ref, or fall back to staged changes 

103 diff_args = ["diff", compare_ref, "--name-status"] if compare_ref else ["diff", "--cached", "--name-status"] 

104 

105 output = run_git_command(diff_args, cwd=repo_path) 

106 

107 for line in output.split("\n"): 

108 if not line: 

109 continue 

110 parts = line.split("\t", 1) 

111 if len(parts) != 2: 

112 continue 

113 status, filepath = parts 

114 

115 if status == "A": 

116 changes["added"].append(filepath) 

117 elif status == "M": 

118 changes["modified"].append(filepath) 

119 elif status == "D": 

120 changes["deleted"].append(filepath) 

121 elif status.startswith("R"): 

122 # Renamed file - treat as modified 

123 changes["modified"].append(filepath) 

124 

125 return changes 

126 

127 

128_CONFIG_FILES: frozenset[str] = frozenset( 

129 { 

130 "Makefile", 

131 "ruff.toml", 

132 "pytest.ini", 

133 ".editorconfig", 

134 ".gitignore", 

135 ".pre-commit-config.yaml", 

136 "renovate.json", 

137 ".python-version", 

138 } 

139) 

140 

141 

142_DIR_CATEGORIES: dict[str, str] = { 

143 "tests": "Tests", 

144 "src": "Source Code", 

145} 

146_DOC_DIRS: frozenset[str] = frozenset({"book", "docs"}) 

147 

148 

149def _categorize_by_directory(first_dir: str, filepath: str) -> str | None: 

150 """Categorize file based on its first directory. 

151 

152 Args: 

153 first_dir: First directory in the path 

154 filepath: Full file path 

155 

156 Returns: 

157 Category name or None if no match 

158 """ 

159 if first_dir == ".github": 

160 path_parts = Path(filepath).parts 

161 if len(path_parts) > 1 and path_parts[1] == "workflows": 

162 return "GitHub Actions Workflows" 

163 return "GitHub Configuration" 

164 

165 if first_dir == ".rhiza": 

166 if "script" in filepath.lower(): 

167 return "Rhiza Scripts" 

168 if "Makefile" in filepath: 

169 return "Makefiles" 

170 return "Rhiza Configuration" 

171 

172 if first_dir in _DIR_CATEGORIES: 

173 return _DIR_CATEGORIES[first_dir] 

174 

175 if first_dir in _DOC_DIRS: 

176 return "Documentation" 

177 

178 return None 

179 

180 

181def _categorize_single_file(filepath: str) -> str: 

182 """Categorize a single file path. 

183 

184 Args: 

185 filepath: File path to categorize 

186 

187 Returns: 

188 Category name 

189 """ 

190 path_parts = Path(filepath).parts 

191 

192 if not path_parts: 

193 return "Other" 

194 

195 # Try directory-based categorization first 

196 category = _categorize_by_directory(path_parts[0], filepath) 

197 if category: 

198 return category 

199 

200 # Check file-based categories 

201 if filepath.endswith(".md"): 

202 return "Documentation" 

203 

204 if filepath in _CONFIG_FILES: 

205 return "Configuration Files" 

206 

207 return "Other" 

208 

209 

210def categorize_files(files: list[str]) -> dict[str, list[str]]: 

211 """Categorize files by type. 

212 

213 Args: 

214 files: List of file paths 

215 

216 Returns: 

217 Dictionary mapping category names to file lists 

218 """ 

219 categories = defaultdict(list) 

220 

221 for filepath in files: 

222 category = _categorize_single_file(filepath) 

223 categories[category].append(filepath) 

224 

225 return dict(categories) 

226 

227 

228def get_template_info(repo_path: Path) -> tuple[str, str]: 

229 """Get template repository and branch from template.lock or template.yml. 

230 

231 Prefers ``template.lock`` as the authoritative record of the last sync. 

232 Falls back to ``template.yml`` if the lock file is absent or incomplete. 

233 Returns empty strings when no configuration is found, rather than 

234 defaulting to any hardcoded repository name. 

235 

236 Args: 

237 repo_path: Path to the repository 

238 

239 Returns: 

240 Tuple of (template_repo, template_branch) 

241 """ 

242 # Prefer template.lock - it is the authoritative record of what was synced 

243 lock_file = repo_path / ".rhiza" / "template.lock" 

244 if lock_file.exists(): 

245 try: 

246 lock = TemplateLock.from_yaml(lock_file) 

247 if lock.repo: 

248 return lock.repo, lock.ref 

249 except (yaml.YAMLError, ValueError, TypeError, KeyError): 

250 logger.warning("Failed to read template.lock; falling back to template.yml") 

251 

252 # Fall back to template.yml, using the proper model which handles both 

253 # 'template-repository'/'repository' and 'template-branch'/'ref' key variants 

254 template_file = repo_path / ".rhiza" / "template.yml" 

255 if not template_file.exists(): 

256 return ("", "") 

257 

258 try: 

259 template = RhizaTemplate.from_yaml(template_file) 

260 except (yaml.YAMLError, ValueError, TypeError, KeyError): 

261 logger.warning("Failed to read template.yml") 

262 return ("", "") 

263 

264 return template.template_repository, template.template_branch 

265 

266 

267def get_last_sync_date(repo_path: Path, template_repo: str = "") -> str | None: 

268 """Get the date of the last sync. 

269 

270 Checks ``template.lock`` for a recorded sync timestamp first, then falls 

271 back to searching the git log. The template repository name (when given) 

272 is used to build more accurate grep patterns so that projects using a 

273 non-rhiza template are still matched correctly. 

274 

275 Args: 

276 repo_path: Path to the repository 

277 template_repo: Template repository name (e.g. ``"my-org/my-template"``) 

278 used to derive the short name for git-log grep patterns. 

279 

280 Returns: 

281 ISO format date string or None if not found 

282 """ 

283 # Prefer template.lock synced_at - it is the most reliable source 

284 lock_file = repo_path / ".rhiza" / "template.lock" 

285 if lock_file.exists(): 

286 try: 

287 lock = TemplateLock.from_yaml(lock_file) 

288 if lock.synced_at: 

289 return lock.synced_at 

290 except (yaml.YAMLError, ValueError, TypeError, KeyError): 

291 pass 

292 

293 # Derive the short name from the template repo for targeted grepping 

294 template_short_name = template_repo.rsplit("/", 1)[-1] if template_repo else "" 

295 

296 grep_args = ["log", "--format=%cI", "-1"] 

297 if template_short_name: 

298 grep_args.extend(["--grep", template_short_name]) 

299 grep_args.extend(["--grep=Sync", "--grep=template", "-i"]) 

300 

301 output = run_git_command(grep_args, cwd=repo_path) 

302 if output: 

303 return output 

304 

305 # Fallback: try to get date from history file if it exists 

306 history_file = repo_path / ".rhiza" / "history" 

307 if history_file.exists(): 

308 # Get the file modification time 

309 stat = history_file.stat() 

310 return datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat() 

311 

312 return None 

313 

314 

315def _format_file_list(files: list[str], status_emoji: str) -> list[str]: 

316 """Format a list of files with the given status emoji. 

317 

318 Args: 

319 files: List of file paths 

320 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted) 

321 

322 Returns: 

323 List of formatted lines 

324 """ 

325 lines = [] 

326 for f in sorted(files): 

327 lines.append(f"- {status_emoji} `{f}`") 

328 return lines 

329 

330 

331def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None: 

332 """Add a collapsible section for a category and change type. 

333 

334 Args: 

335 lines: List to append lines to 

336 title: Section title (e.g., "Added", "Modified") 

337 count: Number of files 

338 files: List of file paths 

339 emoji: Status emoji 

340 """ 

341 if not files: 

342 return 

343 

344 lines.append("<details>") 

345 lines.append(f"<summary>{title} ({count})</summary>") 

346 lines.append("") 

347 lines.extend(_format_file_list(files, emoji)) 

348 lines.append("") 

349 lines.append("</details>") 

350 lines.append("") 

351 

352 

353def _build_header(template_repo: str, title: str | None = None) -> list[str]: 

354 """Build the PR description header. 

355 

356 Args: 

357 template_repo: Template repository name 

358 title: Optional override for the section heading 

359 

360 Returns: 

361 List of header lines 

362 """ 

363 header_title = title if title else "## 🔄 Template Synchronization" 

364 lines = [header_title, ""] 

365 if template_repo: 

366 url = f"https://github.com/{template_repo}" 

367 repo_link = f"[{template_repo}]({url})" 

368 sync_line = f"This PR synchronizes the repository with the {repo_link} template." 

369 lines.append(sync_line) 

370 else: 

371 lines.append("This PR synchronizes the repository with the upstream template.") 

372 lines.append("") 

373 return lines 

374 

375 

376def _build_summary(changes: dict[str, list[str]]) -> list[str]: 

377 """Build the change summary section. 

378 

379 Args: 

380 changes: Dictionary of changes by type 

381 

382 Returns: 

383 List of summary lines 

384 """ 

385 return [ 

386 "### 📊 Change Summary", 

387 "", 

388 f"- **{len(changes['added'])}** files added", 

389 f"- **{len(changes['modified'])}** files modified", 

390 f"- **{len(changes['deleted'])}** files deleted", 

391 "", 

392 ] 

393 

394 

395def _build_footer(tmpl: _TemplateInfo) -> list[str]: 

396 """Build the PR description footer with metadata. 

397 

398 Args: 

399 tmpl: Template metadata container 

400 

401 Returns: 

402 List of footer lines 

403 """ 

404 lines = [ 

405 "---", 

406 "", 

407 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**", 

408 "", 

409 ] 

410 if tmpl.repo: 

411 lines.append(f"- Template: `{tmpl.repo}@{tmpl.branch}`") 

412 if tmpl.last_sync: 

413 lines.append(f"- Last sync: {tmpl.last_sync}") 

414 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}") 

415 return lines 

416 

417 

418def _generate_json_output( 

419 changes: dict[str, list[str]], 

420 categories: dict[str, list[str]], 

421 tmpl: _TemplateInfo, 

422) -> str: 

423 """Generate a JSON representation of the change data. 

424 

425 Args: 

426 changes: Dictionary of changes by type 

427 categories: Files grouped by category 

428 tmpl: Template metadata container 

429 

430 Returns: 

431 JSON-formatted string 

432 """ 

433 data = { 

434 "template_repo": tmpl.repo, 

435 "template_branch": tmpl.branch, 

436 "last_sync": tmpl.last_sync, 

437 "sync_date": datetime.now().astimezone().isoformat(), 

438 "changes": changes, 

439 "categories": categories, 

440 } 

441 return _json.dumps(data, indent=2) 

442 

443 

444def _plain_file_section(lines: list[str], label: str, files: list[str]) -> None: 

445 """Append a labelled block of files to *lines* in plain-text format. 

446 

447 Args: 

448 lines: List to append lines to 

449 label: Section label (e.g. "Added") 

450 files: List of file paths 

451 """ 

452 if not files: 

453 return 

454 lines.append(f"{label}:") 

455 lines.extend(f" {f}" for f in sorted(files)) 

456 lines.append("") 

457 

458 

459def _generate_plain_output( 

460 changes: dict[str, list[str]], 

461 categories: dict[str, list[str]], 

462 tmpl: _TemplateInfo, 

463 options: SummariseOptions, 

464) -> str: 

465 """Generate plain-text output from change data. 

466 

467 Args: 

468 changes: Dictionary of changes by type 

469 categories: Files grouped by category 

470 tmpl: Template metadata container 

471 options: Output customisation options 

472 

473 Returns: 

474 Plain-text formatted string 

475 """ 

476 lines: list[str] = [] 

477 

478 if options.include_header: 

479 heading = options.title or "Template Synchronization" 

480 lines.extend([heading, "=" * len(heading), ""]) 

481 if tmpl.repo: 

482 lines.append(f"Template: {tmpl.repo}@{tmpl.branch}") 

483 lines.append("") 

484 

485 total = sum(len(v) for v in changes.values()) 

486 if not total: 

487 lines.append("No changes detected.") 

488 return "\n".join(lines) 

489 

490 lines.append( 

491 f"Changes: {len(changes['added'])} added, " 

492 f"{len(changes['modified'])} modified, " 

493 f"{len(changes['deleted'])} deleted", 

494 ) 

495 lines.append("") 

496 

497 if options.include_categories: 

498 for category, files in sorted(categories.items()): 

499 lines.append(f"{category}:") 

500 lines.extend(f" {f}" for f in sorted(files)) 

501 lines.append("") 

502 else: 

503 for label, files in [ 

504 ("Added", changes["added"]), 

505 ("Modified", changes["modified"]), 

506 ("Deleted", changes["deleted"]), 

507 ]: 

508 _plain_file_section(lines, label, files) 

509 

510 if options.include_footer: 

511 if tmpl.last_sync: 

512 lines.append(f"Last sync: {tmpl.last_sync}") 

513 lines.append(f"Sync date: {datetime.now().astimezone().isoformat()}") 

514 

515 return "\n".join(lines) 

516 

517 

518def _generate_jinja2_output(template_path: Path, context: dict[str, Any]) -> str: 

519 """Render output using a custom Jinja2 template file. 

520 

521 The *context* dict is passed directly to the template. It should contain at 

522 minimum: ``template_repo``, ``template_branch``, ``last_sync``, ``sync_date``, 

523 ``changes``, ``categories``, and ``title``. 

524 

525 Note: 

526 Autoescape is disabled because this function generates plain text / Markdown, 

527 not HTML. Do **not** use the rendered output directly in a web context without 

528 first escaping it, as the template content is not sanitised for HTML. 

529 

530 Args: 

531 template_path: Path to the Jinja2 template file 

532 context: Template context variables 

533 

534 Returns: 

535 Rendered template string 

536 """ 

537 template_text = template_path.read_text(encoding="utf-8") 

538 env = jinja2.Environment( # nosec B701 

539 autoescape=False, # noqa: S701 

540 loader=jinja2.BaseLoader(), 

541 keep_trailing_newline=True, 

542 ) 

543 return env.from_string(template_text).render(**context) 

544 

545 

546def _markdown_body( 

547 changes: dict[str, list[str]], 

548 categories: dict[str, list[str]], 

549 tmpl: _TemplateInfo, 

550 options: SummariseOptions, 

551) -> str: 

552 """Build the markdown PR description body. 

553 

554 Args: 

555 changes: Dictionary of changes by type 

556 categories: Files grouped by category 

557 tmpl: Template metadata container 

558 options: Output customisation options 

559 

560 Returns: 

561 Markdown-formatted string 

562 """ 

563 lines: list[str] = [] 

564 

565 if options.include_header: 

566 lines.extend(_build_header(tmpl.repo, title=options.title)) 

567 

568 total_changes = sum(len(files) for files in changes.values()) 

569 if not total_changes: 

570 lines.append("No changes detected.") 

571 if options.include_footer: 

572 lines.append("") 

573 lines.extend(_build_footer(tmpl)) 

574 return "\n".join(lines) 

575 

576 lines.extend(_build_summary(changes)) 

577 

578 if options.include_categories and categories: 

579 lines.append("### 📁 Changes by Category") 

580 lines.append("") 

581 

582 for category, files in sorted(categories.items()): 

583 lines.append(f"#### {category}") 

584 lines.append("") 

585 

586 category_added = [f for f in files if f in changes["added"]] 

587 category_modified = [f for f in files if f in changes["modified"]] 

588 category_deleted = [f for f in files if f in changes["deleted"]] 

589 

590 _add_category_section(lines, "Added", len(category_added), category_added, "✅") 

591 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝") 

592 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌") 

593 

594 elif not options.include_categories: 

595 lines.append("### 📁 Changed Files") 

596 lines.append("") 

597 _add_category_section(lines, "Added", len(changes["added"]), changes["added"], "✅") 

598 _add_category_section(lines, "Modified", len(changes["modified"]), changes["modified"], "📝") 

599 _add_category_section(lines, "Deleted", len(changes["deleted"]), changes["deleted"], "❌") 

600 

601 if options.include_footer: 

602 lines.extend(_build_footer(tmpl)) 

603 

604 return "\n".join(lines) 

605 

606 

607def generate_pr_description(repo_path: Path, options: SummariseOptions | None = None) -> str: 

608 """Generate PR description based on staged changes. 

609 

610 Args: 

611 repo_path: Path to the repository 

612 options: Output customisation options. Defaults to :class:`SummariseOptions` 

613 with all fields at their defaults (markdown format, with header / footer / 

614 categories, no custom title, staged-index diff). 

615 

616 Returns: 

617 Formatted PR description 

618 """ 

619 opts = options or SummariseOptions() 

620 

621 changes = get_staged_changes(repo_path, compare_ref=opts.compare_ref) 

622 template_repo, template_branch = get_template_info(repo_path) 

623 last_sync = get_last_sync_date(repo_path, template_repo=template_repo) 

624 

625 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"] 

626 categories = categorize_files(all_changed_files) if all_changed_files else {} 

627 

628 tmpl = _TemplateInfo(repo=template_repo, branch=template_branch, last_sync=last_sync) 

629 

630 # Custom Jinja2 template takes full precedence over all other options 

631 if opts.jinja2_template: 

632 context = { 

633 "template_repo": tmpl.repo, 

634 "template_branch": tmpl.branch, 

635 "last_sync": tmpl.last_sync, 

636 "sync_date": datetime.now().astimezone().isoformat(), 

637 "changes": changes, 

638 "categories": categories, 

639 "title": opts.title, 

640 } 

641 return _generate_jinja2_output(opts.jinja2_template, context) 

642 

643 if opts.output_format == "json": 

644 return _generate_json_output(changes, categories, tmpl) 

645 

646 if opts.output_format == "plain": 

647 return _generate_plain_output(changes, categories, tmpl, opts) 

648 

649 return _markdown_body(changes, categories, tmpl, opts) 

650 

651 

652def summarise( 

653 target: Path, 

654 output: Path | None = None, 

655 *, 

656 options: SummariseOptions | None = None, 

657) -> None: 

658 """Generate a summary of staged changes for rhiza sync operations. 

659 

660 This command analyzes staged git changes and generates a structured 

661 PR description with: 

662 - Summary statistics (files added/modified/deleted) 

663 - Changes categorized by type (workflows, configs, docs, tests, etc.) 

664 - Template repository information 

665 - Last sync date 

666 

667 Args: 

668 target: Path to the target repository. 

669 output: Optional output file path. If not provided, prints to stdout. 

670 options: Output customisation options. Defaults to :class:`SummariseOptions` 

671 with all fields at their defaults. 

672 """ 

673 target = target.resolve() 

674 logger.info(f"Target repository: {target}") 

675 

676 # Check if target is a git repository 

677 if not (target / ".git").is_dir(): 

678 err_msg = f"Target directory is not a git repository: {target}" 

679 logger.error(err_msg) 

680 logger.error("Initialize a git repository with 'git init' first") 

681 raise RuntimeError(err_msg) 

682 

683 description = generate_pr_description(target, options) 

684 

685 if output: 

686 output_path = output.resolve() 

687 output_path.write_text(description, encoding="utf-8") 

688 logger.success(f"PR description written to {output_path}") 

689 else: 

690 print(description) 

691 

692 logger.success("Summary generated successfully")