Coverage for src / rhiza / commands / summarise.py: 93%

276 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-27 15:33 +0000

1"""Command for generating PR descriptions from staged changes. 

2 

3This module provides functionality to analyze staged git changes and generate 

4structured PR descriptions for rhiza sync operations. 

5""" 

6 

7import json as _json 

8import subprocess # nosec B404 

9from collections import defaultdict 

10from dataclasses import dataclass, field 

11from datetime import UTC, datetime 

12from pathlib import Path 

13from typing import NamedTuple 

14 

15import jinja2 

16import yaml 

17from loguru import logger 

18 

19from rhiza.models.lock import TemplateLock 

20from rhiza.models.template import RhizaTemplate 

21 

22 

23@dataclass(kw_only=True) 

24class SummariseOptions: 

25 """Options controlling the output of :func:`generate_pr_description`. 

26 

27 All fields are keyword-only and default to the standard behaviour so 

28 callers only need to set the fields they want to override. 

29 """ 

30 

31 include_header: bool = True 

32 """Whether to include the header section (markdown / plain formats).""" 

33 

34 include_footer: bool = True 

35 """Whether to include the footer section (markdown / plain formats).""" 

36 

37 include_categories: bool = True 

38 """Whether to group changes by category; when ``False`` a flat list is shown.""" 

39 

40 output_format: str = "markdown" 

41 """Output format: ``"markdown"`` (default), ``"plain"``, or ``"json"``.""" 

42 

43 title: str | None = None 

44 """Override the section heading; ``None`` uses the built-in default.""" 

45 

46 compare_ref: str | None = None 

47 """Compare against this git ref instead of the staged index.""" 

48 

49 jinja2_template: Path | None = field(default=None) 

50 """Path to a Jinja2 template file for fully custom output.""" 

51 

52 

53class _TemplateInfo(NamedTuple): 

54 """Lightweight container for template metadata used during rendering.""" 

55 

56 repo: str 

57 branch: str 

58 last_sync: str | None 

59 

60 

61def run_git_command(args: list[str], cwd: Path | None = None) -> str: 

62 """Run a git command and return the output. 

63 

64 Args: 

65 args: Git command arguments (without 'git' prefix) 

66 cwd: Working directory for the command 

67 

68 Returns: 

69 Command output as string 

70 """ 

71 try: 

72 result = subprocess.run( # nosec B603 B607 # noqa: S603 

73 ["git", *args], # noqa: S607 

74 cwd=cwd, 

75 capture_output=True, 

76 text=True, 

77 check=True, 

78 ) 

79 return result.stdout.strip() 

80 except subprocess.CalledProcessError as e: 

81 logger.error(f"Error running git {' '.join(args)}: {e.stderr}") 

82 return "" 

83 

84 

85def get_staged_changes(repo_path: Path, compare_ref: str | None = None) -> dict[str, list[str]]: 

86 """Get list of changes categorized by type. 

87 

88 Args: 

89 repo_path: Path to the repository 

90 compare_ref: Optional git ref to compare against. When provided the 

91 working tree is diffed against this ref instead of the staged index. 

92 

93 Returns: 

94 Dictionary with keys 'added', 'modified', 'deleted' containing file lists 

95 """ 

96 changes: dict[str, list[str]] = { 

97 "added": [], 

98 "modified": [], 

99 "deleted": [], 

100 } 

101 

102 # Compare against a specific ref, or fall back to staged changes 

103 diff_args = ["diff", compare_ref, "--name-status"] if compare_ref else ["diff", "--cached", "--name-status"] 

104 

105 output = run_git_command(diff_args, cwd=repo_path) 

106 

107 for line in output.split("\n"): 

108 if not line: 

109 continue 

110 parts = line.split("\t", 1) 

111 if len(parts) != 2: 

112 continue 

113 status, filepath = parts 

114 

115 if status == "A": 

116 changes["added"].append(filepath) 

117 elif status == "M": 

118 changes["modified"].append(filepath) 

119 elif status == "D": 

120 changes["deleted"].append(filepath) 

121 elif status.startswith("R"): 

122 # Renamed file - treat as modified 

123 changes["modified"].append(filepath) 

124 

125 return changes 

126 

127 

128_CONFIG_FILES: frozenset[str] = frozenset( 

129 { 

130 "Makefile", 

131 "ruff.toml", 

132 "pytest.ini", 

133 ".editorconfig", 

134 ".gitignore", 

135 ".pre-commit-config.yaml", 

136 "renovate.json", 

137 ".python-version", 

138 } 

139) 

140 

141 

142_DIR_CATEGORIES: dict[str, str] = { 

143 "tests": "Tests", 

144 "src": "Source Code", 

145} 

146_DOC_DIRS: frozenset[str] = frozenset({"book", "docs"}) 

147 

148 

149def _categorize_by_directory(first_dir: str, filepath: str) -> str | None: 

150 """Categorize file based on its first directory. 

151 

152 Args: 

153 first_dir: First directory in the path 

154 filepath: Full file path 

155 

156 Returns: 

157 Category name or None if no match 

158 """ 

159 if first_dir == ".github": 

160 path_parts = Path(filepath).parts 

161 if len(path_parts) > 1 and path_parts[1] == "workflows": 

162 return "GitHub Actions Workflows" 

163 return "GitHub Configuration" 

164 

165 if first_dir == ".rhiza": 

166 if "script" in filepath.lower(): 

167 return "Rhiza Scripts" 

168 if "Makefile" in filepath: 

169 return "Makefiles" 

170 return "Rhiza Configuration" 

171 

172 if first_dir in _DIR_CATEGORIES: 

173 return _DIR_CATEGORIES[first_dir] 

174 

175 if first_dir in _DOC_DIRS: 

176 return "Documentation" 

177 

178 return None 

179 

180 

181def _categorize_single_file(filepath: str) -> str: 

182 """Categorize a single file path. 

183 

184 Args: 

185 filepath: File path to categorize 

186 

187 Returns: 

188 Category name 

189 """ 

190 path_parts = Path(filepath).parts 

191 

192 if not path_parts: 

193 return "Other" 

194 

195 # Try directory-based categorization first 

196 category = _categorize_by_directory(path_parts[0], filepath) 

197 if category: 

198 return category 

199 

200 # Check file-based categories 

201 if filepath.endswith(".md"): 

202 return "Documentation" 

203 

204 if filepath in _CONFIG_FILES: 

205 return "Configuration Files" 

206 

207 return "Other" 

208 

209 

210def categorize_files(files: list[str]) -> dict[str, list[str]]: 

211 """Categorize files by type. 

212 

213 Args: 

214 files: List of file paths 

215 

216 Returns: 

217 Dictionary mapping category names to file lists 

218 """ 

219 categories = defaultdict(list) 

220 

221 for filepath in files: 

222 category = _categorize_single_file(filepath) 

223 categories[category].append(filepath) 

224 

225 return dict(categories) 

226 

227 

228def get_template_info(repo_path: Path) -> tuple[str, str]: 

229 """Get template repository and branch from template.lock or template.yml. 

230 

231 Prefers ``template.lock`` as the authoritative record of the last sync. 

232 Falls back to ``template.yml`` if the lock file is absent or incomplete. 

233 Returns empty strings when no configuration is found, rather than 

234 defaulting to any hardcoded repository name. 

235 

236 Args: 

237 repo_path: Path to the repository 

238 

239 Returns: 

240 Tuple of (template_repo, template_branch) 

241 """ 

242 # Prefer template.lock - it is the authoritative record of what was synced 

243 lock_file = repo_path / ".rhiza" / "template.lock" 

244 if lock_file.exists(): 

245 try: 

246 lock = TemplateLock.from_yaml(lock_file) 

247 if lock.repo: 

248 return lock.repo, lock.ref 

249 except (yaml.YAMLError, ValueError, TypeError, KeyError): 

250 logger.warning("Failed to read template.lock; falling back to template.yml") 

251 

252 # Fall back to template.yml, using the proper model which handles both 

253 # 'template-repository'/'repository' and 'template-branch'/'ref' key variants 

254 template_file = repo_path / ".rhiza" / "template.yml" 

255 if not template_file.exists(): 

256 return ("", "") 

257 

258 try: 

259 template = RhizaTemplate.from_yaml(template_file) 

260 except (yaml.YAMLError, ValueError, TypeError, KeyError): 

261 logger.warning("Failed to read template.yml") 

262 return ("", "") 

263 

264 return template.template_repository, template.template_branch 

265 

266 

267def get_last_sync_date(repo_path: Path, template_repo: str = "") -> str | None: 

268 """Get the date of the last sync. 

269 

270 Checks ``template.lock`` for a recorded sync timestamp first, then falls 

271 back to searching the git log. The template repository name (when given) 

272 is used to build more accurate grep patterns so that projects using a 

273 non-rhiza template are still matched correctly. 

274 

275 Args: 

276 repo_path: Path to the repository 

277 template_repo: Template repository name (e.g. ``"my-org/my-template"``) 

278 used to derive the short name for git-log grep patterns. 

279 

280 Returns: 

281 ISO format date string or None if not found 

282 """ 

283 # Prefer template.lock synced_at - it is the most reliable source 

284 lock_file = repo_path / ".rhiza" / "template.lock" 

285 if lock_file.exists(): 

286 try: 

287 lock = TemplateLock.from_yaml(lock_file) 

288 if lock.synced_at: 

289 return lock.synced_at 

290 except (yaml.YAMLError, ValueError, TypeError, KeyError): 

291 pass 

292 

293 # Derive the short name from the template repo for targeted grepping 

294 template_short_name = template_repo.rsplit("/", 1)[-1] if template_repo else "" 

295 

296 grep_args = ["log", "--format=%cI", "-1"] 

297 if template_short_name: 

298 grep_args.extend(["--grep", template_short_name]) 

299 grep_args.extend(["--grep=Sync", "--grep=template", "-i"]) 

300 

301 output = run_git_command(grep_args, cwd=repo_path) 

302 if output: 

303 return output 

304 

305 # Fallback: try to get date from history file if it exists 

306 history_file = repo_path / ".rhiza" / "history" 

307 if history_file.exists(): 

308 # Get the file modification time 

309 stat = history_file.stat() 

310 return datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat() 

311 

312 return None 

313 

314 

315def _format_file_list(files: list[str], status_emoji: str) -> list[str]: 

316 """Format a list of files with the given status emoji. 

317 

318 Args: 

319 files: List of file paths 

320 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted) 

321 

322 Returns: 

323 List of formatted lines 

324 """ 

325 lines = [] 

326 for f in sorted(files): 

327 lines.append(f"- {status_emoji} `{f}`") 

328 return lines 

329 

330 

331def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None: 

332 """Add a collapsible section for a category and change type. 

333 

334 Args: 

335 lines: List to append lines to 

336 title: Section title (e.g., "Added", "Modified") 

337 count: Number of files 

338 files: List of file paths 

339 emoji: Status emoji 

340 """ 

341 if not files: 

342 return 

343 

344 lines.append("<details>") 

345 lines.append(f"<summary>{title} ({count})</summary>") 

346 lines.append("") 

347 lines.extend(_format_file_list(files, emoji)) 

348 lines.append("") 

349 lines.append("</details>") 

350 lines.append("") 

351 

352 

353def _build_header(template_repo: str, title: str | None = None) -> list[str]: 

354 """Build the PR description header. 

355 

356 Args: 

357 template_repo: Template repository name 

358 title: Optional override for the section heading 

359 

360 Returns: 

361 List of header lines 

362 """ 

363 header_title = title if title else "## 🔄 Template Synchronization" 

364 lines = [header_title, ""] 

365 if template_repo: 

366 url = f"https://github.com/{template_repo}" 

367 repo_link = f"[{template_repo}]({url})" 

368 sync_line = f"This PR synchronizes the repository with the {repo_link} template." 

369 lines.append(sync_line) 

370 else: 

371 lines.append("This PR synchronizes the repository with the upstream template.") 

372 lines.append("") 

373 return lines 

374 

375 

376def _build_summary(changes: dict[str, list[str]]) -> list[str]: 

377 """Build the change summary section. 

378 

379 Args: 

380 changes: Dictionary of changes by type 

381 

382 Returns: 

383 List of summary lines 

384 """ 

385 return [ 

386 "### 📊 Change Summary", 

387 "", 

388 f"- **{len(changes['added'])}** files added", 

389 f"- **{len(changes['modified'])}** files modified", 

390 f"- **{len(changes['deleted'])}** files deleted", 

391 "", 

392 ] 

393 

394 

395def _build_footer(tmpl: _TemplateInfo) -> list[str]: 

396 """Build the PR description footer with metadata. 

397 

398 Args: 

399 tmpl: Template metadata container 

400 

401 Returns: 

402 List of footer lines 

403 """ 

404 lines = [ 

405 "---", 

406 "", 

407 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**", 

408 "", 

409 ] 

410 if tmpl.repo: 

411 lines.append(f"- Template: `{tmpl.repo}@{tmpl.branch}`") 

412 if tmpl.last_sync: 

413 lines.append(f"- Last sync: {tmpl.last_sync}") 

414 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}") 

415 return lines 

416 

417 

418def _generate_json_output( 

419 changes: dict[str, list[str]], 

420 categories: dict[str, list[str]], 

421 tmpl: _TemplateInfo, 

422) -> str: 

423 """Generate a JSON representation of the change data. 

424 

425 Args: 

426 changes: Dictionary of changes by type 

427 categories: Files grouped by category 

428 tmpl: Template metadata container 

429 

430 Returns: 

431 JSON-formatted string 

432 """ 

433 data = { 

434 "template_repo": tmpl.repo, 

435 "template_branch": tmpl.branch, 

436 "last_sync": tmpl.last_sync, 

437 "sync_date": datetime.now().astimezone().isoformat(), 

438 "changes": changes, 

439 "categories": categories, 

440 } 

441 return _json.dumps(data, indent=2) 

442 

443 

444def _plain_file_section(lines: list[str], label: str, files: list[str]) -> None: 

445 """Append a labelled block of files to *lines* in plain-text format. 

446 

447 Args: 

448 lines: List to append lines to 

449 label: Section label (e.g. "Added") 

450 files: List of file paths 

451 """ 

452 if not files: 

453 return 

454 lines.append(f"{label}:") 

455 lines.extend(f" {f}" for f in sorted(files)) 

456 lines.append("") 

457 

458 

459def _generate_plain_output( 

460 changes: dict[str, list[str]], 

461 categories: dict[str, list[str]], 

462 tmpl: _TemplateInfo, 

463 options: SummariseOptions, 

464) -> str: 

465 """Generate plain-text output from change data. 

466 

467 Args: 

468 changes: Dictionary of changes by type 

469 categories: Files grouped by category 

470 tmpl: Template metadata container 

471 options: Output customisation options 

472 

473 Returns: 

474 Plain-text formatted string 

475 """ 

476 lines: list[str] = [] 

477 

478 if options.include_header: 

479 heading = options.title or "Template Synchronization" 

480 lines.extend([heading, "=" * len(heading), ""]) 

481 if tmpl.repo: 

482 lines.append(f"Template: {tmpl.repo}@{tmpl.branch}") 

483 lines.append("") 

484 

485 total = sum(len(v) for v in changes.values()) 

486 if not total: 

487 lines.append("No changes detected.") 

488 return "\n".join(lines) 

489 

490 lines.append( 

491 f"Changes: {len(changes['added'])} added, " 

492 f"{len(changes['modified'])} modified, " 

493 f"{len(changes['deleted'])} deleted", 

494 ) 

495 lines.append("") 

496 

497 if options.include_categories: 

498 for category, files in sorted(categories.items()): 

499 lines.append(f"{category}:") 

500 lines.extend(f" {f}" for f in sorted(files)) 

501 lines.append("") 

502 else: 

503 for label, files in [ 

504 ("Added", changes["added"]), 

505 ("Modified", changes["modified"]), 

506 ("Deleted", changes["deleted"]), 

507 ]: 

508 _plain_file_section(lines, label, files) 

509 

510 if options.include_footer: 

511 if tmpl.last_sync: 

512 lines.append(f"Last sync: {tmpl.last_sync}") 

513 lines.append(f"Sync date: {datetime.now().astimezone().isoformat()}") 

514 

515 return "\n".join(lines) 

516 

517 

518def _generate_jinja2_output(template_path: Path, context: dict) -> str: 

519 """Render output using a custom Jinja2 template file. 

520 

521 The *context* dict is passed directly to the template. It should contain at 

522 minimum: ``template_repo``, ``template_branch``, ``last_sync``, ``sync_date``, 

523 ``changes``, ``categories``, and ``title``. 

524 

525 Note: 

526 Autoescape is disabled because this function generates plain text / Markdown, 

527 not HTML. Do **not** use the rendered output directly in a web context without 

528 first escaping it, as the template content is not sanitised for HTML. 

529 

530 Args: 

531 template_path: Path to the Jinja2 template file 

532 context: Template context variables 

533 

534 Returns: 

535 Rendered template string 

536 """ 

537 template_text = template_path.read_text(encoding="utf-8") 

538 env = jinja2.Environment( # nosec B701 

539 autoescape=False, # noqa: S701 

540 loader=jinja2.BaseLoader(), 

541 ) 

542 return env.from_string(template_text).render(**context) 

543 

544 

545def _markdown_body( 

546 changes: dict[str, list[str]], 

547 categories: dict[str, list[str]], 

548 tmpl: _TemplateInfo, 

549 options: SummariseOptions, 

550) -> str: 

551 """Build the markdown PR description body. 

552 

553 Args: 

554 changes: Dictionary of changes by type 

555 categories: Files grouped by category 

556 tmpl: Template metadata container 

557 options: Output customisation options 

558 

559 Returns: 

560 Markdown-formatted string 

561 """ 

562 lines: list[str] = [] 

563 

564 if options.include_header: 

565 lines.extend(_build_header(tmpl.repo, title=options.title)) 

566 

567 total_changes = sum(len(files) for files in changes.values()) 

568 if not total_changes: 

569 lines.append("No changes detected.") 

570 if options.include_footer: 

571 lines.append("") 

572 lines.extend(_build_footer(tmpl)) 

573 return "\n".join(lines) 

574 

575 lines.extend(_build_summary(changes)) 

576 

577 if options.include_categories and categories: 

578 lines.append("### 📁 Changes by Category") 

579 lines.append("") 

580 

581 for category, files in sorted(categories.items()): 

582 lines.append(f"#### {category}") 

583 lines.append("") 

584 

585 category_added = [f for f in files if f in changes["added"]] 

586 category_modified = [f for f in files if f in changes["modified"]] 

587 category_deleted = [f for f in files if f in changes["deleted"]] 

588 

589 _add_category_section(lines, "Added", len(category_added), category_added, "✅") 

590 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝") 

591 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌") 

592 

593 elif not options.include_categories: 

594 lines.append("### 📁 Changed Files") 

595 lines.append("") 

596 _add_category_section(lines, "Added", len(changes["added"]), changes["added"], "✅") 

597 _add_category_section(lines, "Modified", len(changes["modified"]), changes["modified"], "📝") 

598 _add_category_section(lines, "Deleted", len(changes["deleted"]), changes["deleted"], "❌") 

599 

600 if options.include_footer: 

601 lines.extend(_build_footer(tmpl)) 

602 

603 return "\n".join(lines) 

604 

605 

606def generate_pr_description(repo_path: Path, options: SummariseOptions | None = None) -> str: 

607 """Generate PR description based on staged changes. 

608 

609 Args: 

610 repo_path: Path to the repository 

611 options: Output customisation options. Defaults to :class:`SummariseOptions` 

612 with all fields at their defaults (markdown format, with header / footer / 

613 categories, no custom title, staged-index diff). 

614 

615 Returns: 

616 Formatted PR description 

617 """ 

618 opts = options or SummariseOptions() 

619 

620 changes = get_staged_changes(repo_path, compare_ref=opts.compare_ref) 

621 template_repo, template_branch = get_template_info(repo_path) 

622 last_sync = get_last_sync_date(repo_path, template_repo=template_repo) 

623 

624 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"] 

625 categories = categorize_files(all_changed_files) if all_changed_files else {} 

626 

627 tmpl = _TemplateInfo(repo=template_repo, branch=template_branch, last_sync=last_sync) 

628 

629 # Custom Jinja2 template takes full precedence over all other options 

630 if opts.jinja2_template: 

631 context = { 

632 "template_repo": tmpl.repo, 

633 "template_branch": tmpl.branch, 

634 "last_sync": tmpl.last_sync, 

635 "sync_date": datetime.now().astimezone().isoformat(), 

636 "changes": changes, 

637 "categories": categories, 

638 "title": opts.title, 

639 } 

640 return _generate_jinja2_output(opts.jinja2_template, context) 

641 

642 if opts.output_format == "json": 

643 return _generate_json_output(changes, categories, tmpl) 

644 

645 if opts.output_format == "plain": 

646 return _generate_plain_output(changes, categories, tmpl, opts) 

647 

648 return _markdown_body(changes, categories, tmpl, opts) 

649 

650 

651def summarise( 

652 target: Path, 

653 output: Path | None = None, 

654 *, 

655 options: SummariseOptions | None = None, 

656) -> None: 

657 """Generate a summary of staged changes for rhiza sync operations. 

658 

659 This command analyzes staged git changes and generates a structured 

660 PR description with: 

661 - Summary statistics (files added/modified/deleted) 

662 - Changes categorized by type (workflows, configs, docs, tests, etc.) 

663 - Template repository information 

664 - Last sync date 

665 

666 Args: 

667 target: Path to the target repository. 

668 output: Optional output file path. If not provided, prints to stdout. 

669 options: Output customisation options. Defaults to :class:`SummariseOptions` 

670 with all fields at their defaults. 

671 """ 

672 target = target.resolve() 

673 logger.info(f"Target repository: {target}") 

674 

675 # Check if target is a git repository 

676 if not (target / ".git").is_dir(): 

677 err_msg = f"Target directory is not a git repository: {target}" 

678 logger.error(err_msg) 

679 logger.error("Initialize a git repository with 'git init' first") 

680 raise RuntimeError(err_msg) 

681 

682 description = generate_pr_description(target, options) 

683 

684 if output: 

685 output_path = output.resolve() 

686 output_path.write_text(description, encoding="utf-8") 

687 logger.success(f"PR description written to {output_path}") 

688 else: 

689 print(description) 

690 

691 logger.success("Summary generated successfully")