Coverage for src/rhiza/commands/summarise.py: 93%

1"""Command for generating PR descriptions from staged changes.

3This module provides functionality to analyze staged git changes and generate

4structured PR descriptions for rhiza sync operations.

5"""

7import json as _json

8import subprocess # nosec B404

9from collections import defaultdict

10from dataclasses import dataclass, field

11from datetime import UTC, datetime

12from pathlib import Path

13from typing import NamedTuple

15import jinja2

16import yaml

17from loguru import logger

19from rhiza.models.lock import TemplateLock

20from rhiza.models.template import RhizaTemplate

23@dataclass(kw_only=True)

24class SummariseOptions:

25 """Options controlling the output of :func:`generate_pr_description`.

27 All fields are keyword-only and default to the standard behaviour so

28 callers only need to set the fields they want to override.

29 """

31 include_header: bool = True

32 """Whether to include the header section (markdown / plain formats)."""

34 include_footer: bool = True

35 """Whether to include the footer section (markdown / plain formats)."""

37 include_categories: bool = True

38 """Whether to group changes by category; when ``False`` a flat list is shown."""

40 output_format: str = "markdown"

41 """Output format: ``"markdown"`` (default), ``"plain"``, or ``"json"``."""

43 title: str | None = None

44 """Override the section heading; ``None`` uses the built-in default."""

46 compare_ref: str | None = None

47 """Compare against this git ref instead of the staged index."""

49 jinja2_template: Path | None = field(default=None)

50 """Path to a Jinja2 template file for fully custom output."""

53class _TemplateInfo(NamedTuple):

54 """Lightweight container for template metadata used during rendering."""

56 repo: str

57 branch: str

58 last_sync: str | None

61def run_git_command(args: list[str], cwd: Path | None = None) -> str:

62 """Run a git command and return the output.

64 Args:

65 args: Git command arguments (without 'git' prefix)

66 cwd: Working directory for the command

68 Returns:

69 Command output as string

70 """

71 try:

72 result = subprocess.run( # nosec B603 B607 # noqa: S603

73 ["git", *args], # noqa: S607

74 cwd=cwd,

75 capture_output=True,

76 text=True,

77 check=True,

78 )

79 return result.stdout.strip()

80 except subprocess.CalledProcessError as e:

81 logger.error(f"Error running git {' '.join(args)}: {e.stderr}")

82 return ""

85def get_staged_changes(repo_path: Path, compare_ref: str | None = None) -> dict[str, list[str]]:

86 """Get list of changes categorized by type.

88 Args:

89 repo_path: Path to the repository

90 compare_ref: Optional git ref to compare against. When provided the

91 working tree is diffed against this ref instead of the staged index.

93 Returns:

94 Dictionary with keys 'added', 'modified', 'deleted' containing file lists

95 """

96 changes: dict[str, list[str]] = {

97 "added": [],

98 "modified": [],

99 "deleted": [],

100 }

101

102 # Compare against a specific ref, or fall back to staged changes

103 diff_args = ["diff", compare_ref, "--name-status"] if compare_ref else ["diff", "--cached", "--name-status"]

104

105 output = run_git_command(diff_args, cwd=repo_path)

106

107 for line in output.split("\n"):

108 if not line:

109 continue

110 parts = line.split("\t", 1)

111 if len(parts) != 2:

112 continue

113 status, filepath = parts

114

115 if status == "A":

116 changes["added"].append(filepath)

117 elif status == "M":

118 changes["modified"].append(filepath)

119 elif status == "D":

120 changes["deleted"].append(filepath)

121 elif status.startswith("R"):

122 # Renamed file - treat as modified

123 changes["modified"].append(filepath)

124

125 return changes

126

127

128_CONFIG_FILES: frozenset[str] = frozenset(

129 {

130 "Makefile",

131 "ruff.toml",

132 "pytest.ini",

133 ".editorconfig",

134 ".gitignore",

135 ".pre-commit-config.yaml",

136 "renovate.json",

137 ".python-version",

138 }

139)

140

141

142_DIR_CATEGORIES: dict[str, str] = {

143 "tests": "Tests",

144 "src": "Source Code",

145}

146_DOC_DIRS: frozenset[str] = frozenset({"book", "docs"})

147

148

149def _categorize_by_directory(first_dir: str, filepath: str) -> str | None:

150 """Categorize file based on its first directory.

151

152 Args:

153 first_dir: First directory in the path

154 filepath: Full file path

155

156 Returns:

157 Category name or None if no match

158 """

159 if first_dir == ".github":

160 path_parts = Path(filepath).parts

161 if len(path_parts) > 1 and path_parts[1] == "workflows":

162 return "GitHub Actions Workflows"

163 return "GitHub Configuration"

164

165 if first_dir == ".rhiza":

166 if "script" in filepath.lower():

167 return "Rhiza Scripts"

168 if "Makefile" in filepath:

169 return "Makefiles"

170 return "Rhiza Configuration"

171

172 if first_dir in _DIR_CATEGORIES:

173 return _DIR_CATEGORIES[first_dir]

174

175 if first_dir in _DOC_DIRS:

176 return "Documentation"

177

178 return None

179

180

181def _categorize_single_file(filepath: str) -> str:

182 """Categorize a single file path.

183

184 Args:

185 filepath: File path to categorize

186

187 Returns:

188 Category name

189 """

190 path_parts = Path(filepath).parts

191

192 if not path_parts:

193 return "Other"

194

195 # Try directory-based categorization first

196 category = _categorize_by_directory(path_parts[0], filepath)

197 if category:

198 return category

199

200 # Check file-based categories

201 if filepath.endswith(".md"):

202 return "Documentation"

203

204 if filepath in _CONFIG_FILES:

205 return "Configuration Files"

206

207 return "Other"

208

209

210def categorize_files(files: list[str]) -> dict[str, list[str]]:

211 """Categorize files by type.

212

213 Args:

214 files: List of file paths

215

216 Returns:

217 Dictionary mapping category names to file lists

218 """

219 categories = defaultdict(list)

220

221 for filepath in files:

222 category = _categorize_single_file(filepath)

223 categories[category].append(filepath)

224

225 return dict(categories)

226

227

228def get_template_info(repo_path: Path) -> tuple[str, str]:

229 """Get template repository and branch from template.lock or template.yml.

230

231 Prefers ``template.lock`` as the authoritative record of the last sync.

232 Falls back to ``template.yml`` if the lock file is absent or incomplete.

233 Returns empty strings when no configuration is found, rather than

234 defaulting to any hardcoded repository name.

235

236 Args:

237 repo_path: Path to the repository

238

239 Returns:

240 Tuple of (template_repo, template_branch)

241 """

242 # Prefer template.lock - it is the authoritative record of what was synced

243 lock_file = repo_path / ".rhiza" / "template.lock"

244 if lock_file.exists():

245 try:

246 lock = TemplateLock.from_yaml(lock_file)

247 if lock.repo:

248 return lock.repo, lock.ref

249 except (yaml.YAMLError, ValueError, TypeError, KeyError):

250 logger.warning("Failed to read template.lock; falling back to template.yml")

251

252 # Fall back to template.yml, using the proper model which handles both

253 # 'template-repository'/'repository' and 'template-branch'/'ref' key variants

254 template_file = repo_path / ".rhiza" / "template.yml"

255 if not template_file.exists():

256 return ("", "")

257

258 try:

259 template = RhizaTemplate.from_yaml(template_file)

260 except (yaml.YAMLError, ValueError, TypeError, KeyError):

261 logger.warning("Failed to read template.yml")

262 return ("", "")

263

264 return template.template_repository, template.template_branch

265

266

267def get_last_sync_date(repo_path: Path, template_repo: str = "") -> str | None:

268 """Get the date of the last sync.

269

270 Checks ``template.lock`` for a recorded sync timestamp first, then falls

271 back to searching the git log. The template repository name (when given)

272 is used to build more accurate grep patterns so that projects using a

273 non-rhiza template are still matched correctly.

274

275 Args:

276 repo_path: Path to the repository

277 template_repo: Template repository name (e.g. ``"my-org/my-template"``)

278 used to derive the short name for git-log grep patterns.

279

280 Returns:

281 ISO format date string or None if not found

282 """

283 # Prefer template.lock synced_at - it is the most reliable source

284 lock_file = repo_path / ".rhiza" / "template.lock"

285 if lock_file.exists():

286 try:

287 lock = TemplateLock.from_yaml(lock_file)

288 if lock.synced_at:

289 return lock.synced_at

290 except (yaml.YAMLError, ValueError, TypeError, KeyError):

291 pass

292

293 # Derive the short name from the template repo for targeted grepping

294 template_short_name = template_repo.rsplit("/", 1)[-1] if template_repo else ""

295

296 grep_args = ["log", "--format=%cI", "-1"]

297 if template_short_name:

298 grep_args.extend(["--grep", template_short_name])

299 grep_args.extend(["--grep=Sync", "--grep=template", "-i"])

300

301 output = run_git_command(grep_args, cwd=repo_path)

302 if output:

303 return output

304

305 # Fallback: try to get date from history file if it exists

306 history_file = repo_path / ".rhiza" / "history"

307 if history_file.exists():

308 # Get the file modification time

309 stat = history_file.stat()

310 return datetime.fromtimestamp(stat.st_mtime, tz=UTC).isoformat()

311

312 return None

313

314

315def _format_file_list(files: list[str], status_emoji: str) -> list[str]:

316 """Format a list of files with the given status emoji.

317

318 Args:

319 files: List of file paths

320 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted)

321

322 Returns:

323 List of formatted lines

324 """

325 lines = []

326 for f in sorted(files):

327 lines.append(f"- {status_emoji} `{f}`")

328 return lines

329

330

331def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None:

332 """Add a collapsible section for a category and change type.

333

334 Args:

335 lines: List to append lines to

336 title: Section title (e.g., "Added", "Modified")

337 count: Number of files

338 files: List of file paths

339 emoji: Status emoji

340 """

341 if not files:

342 return

343

344 lines.append("<details>")

345 lines.append(f"<summary>{title} ({count})</summary>")

346 lines.append("")

347 lines.extend(_format_file_list(files, emoji))

348 lines.append("")

349 lines.append("</details>")

350 lines.append("")

351

352

353def _build_header(template_repo: str, title: str | None = None) -> list[str]:

354 """Build the PR description header.

355

356 Args:

357 template_repo: Template repository name

358 title: Optional override for the section heading

359

360 Returns:

361 List of header lines

362 """

363 header_title = title if title else "## 🔄 Template Synchronization"

364 lines = [header_title, ""]

365 if template_repo:

366 url = f"https://github.com/{template_repo}"

367 repo_link = f"[{template_repo}]({url})"

368 sync_line = f"This PR synchronizes the repository with the {repo_link} template."

369 lines.append(sync_line)

370 else:

371 lines.append("This PR synchronizes the repository with the upstream template.")

372 lines.append("")

373 return lines

374

375

376def _build_summary(changes: dict[str, list[str]]) -> list[str]:

377 """Build the change summary section.

378

379 Args:

380 changes: Dictionary of changes by type

381

382 Returns:

383 List of summary lines

384 """

385 return [

386 "### 📊 Change Summary",

387 "",

388 f"- **{len(changes['added'])}** files added",

389 f"- **{len(changes['modified'])}** files modified",

390 f"- **{len(changes['deleted'])}** files deleted",

391 "",

392 ]

393

394

395def _build_footer(tmpl: _TemplateInfo) -> list[str]:

396 """Build the PR description footer with metadata.

397

398 Args:

399 tmpl: Template metadata container

400

401 Returns:

402 List of footer lines

403 """

404 lines = [

405 "---",

406 "",

407 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**",

408 "",

409 ]

410 if tmpl.repo:

411 lines.append(f"- Template: `{tmpl.repo}@{tmpl.branch}`")

412 if tmpl.last_sync:

413 lines.append(f"- Last sync: {tmpl.last_sync}")

414 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}")

415 return lines

416

417

418def _generate_json_output(

419 changes: dict[str, list[str]],

420 categories: dict[str, list[str]],

421 tmpl: _TemplateInfo,

422) -> str:

423 """Generate a JSON representation of the change data.

424

425 Args:

426 changes: Dictionary of changes by type

427 categories: Files grouped by category

428 tmpl: Template metadata container

429

430 Returns:

431 JSON-formatted string

432 """

433 data = {

434 "template_repo": tmpl.repo,

435 "template_branch": tmpl.branch,

436 "last_sync": tmpl.last_sync,

437 "sync_date": datetime.now().astimezone().isoformat(),

438 "changes": changes,

439 "categories": categories,

440 }

441 return _json.dumps(data, indent=2)

442

443

444def _plain_file_section(lines: list[str], label: str, files: list[str]) -> None:

445 """Append a labelled block of files to *lines* in plain-text format.

446

447 Args:

448 lines: List to append lines to

449 label: Section label (e.g. "Added")

450 files: List of file paths

451 """

452 if not files:

453 return

454 lines.append(f"{label}:")

455 lines.extend(f" {f}" for f in sorted(files))

456 lines.append("")

457

458

459def _generate_plain_output(

460 changes: dict[str, list[str]],

461 categories: dict[str, list[str]],

462 tmpl: _TemplateInfo,

463 options: SummariseOptions,

464) -> str:

465 """Generate plain-text output from change data.

466

467 Args:

468 changes: Dictionary of changes by type

469 categories: Files grouped by category

470 tmpl: Template metadata container

471 options: Output customisation options

472

473 Returns:

474 Plain-text formatted string

475 """

476 lines: list[str] = []

477

478 if options.include_header:

479 heading = options.title or "Template Synchronization"

480 lines.extend([heading, "=" * len(heading), ""])

481 if tmpl.repo:

482 lines.append(f"Template: {tmpl.repo}@{tmpl.branch}")

483 lines.append("")

484

485 total = sum(len(v) for v in changes.values())

486 if not total:

487 lines.append("No changes detected.")

488 return "\n".join(lines)

489

490 lines.append(

491 f"Changes: {len(changes['added'])} added, "

492 f"{len(changes['modified'])} modified, "

493 f"{len(changes['deleted'])} deleted",

494 )

495 lines.append("")

496

497 if options.include_categories:

498 for category, files in sorted(categories.items()):

499 lines.append(f"{category}:")

500 lines.extend(f" {f}" for f in sorted(files))

501 lines.append("")

502 else:

503 for label, files in [

504 ("Added", changes["added"]),

505 ("Modified", changes["modified"]),

506 ("Deleted", changes["deleted"]),

507 ]:

508 _plain_file_section(lines, label, files)

509

510 if options.include_footer:

511 if tmpl.last_sync:

512 lines.append(f"Last sync: {tmpl.last_sync}")

513 lines.append(f"Sync date: {datetime.now().astimezone().isoformat()}")

514

515 return "\n".join(lines)

516

517

518def _generate_jinja2_output(template_path: Path, context: dict) -> str:

519 """Render output using a custom Jinja2 template file.

520

521 The *context* dict is passed directly to the template. It should contain at

522 minimum: ``template_repo``, ``template_branch``, ``last_sync``, ``sync_date``,

523 ``changes``, ``categories``, and ``title``.

524

525 Note:

526 Autoescape is disabled because this function generates plain text / Markdown,

527 not HTML. Do **not** use the rendered output directly in a web context without

528 first escaping it, as the template content is not sanitised for HTML.

529

530 Args:

531 template_path: Path to the Jinja2 template file

532 context: Template context variables

533

534 Returns:

535 Rendered template string

536 """

537 template_text = template_path.read_text(encoding="utf-8")

538 env = jinja2.Environment( # nosec B701

539 autoescape=False, # noqa: S701

540 loader=jinja2.BaseLoader(),

541 )

542 return env.from_string(template_text).render(**context)

543

544

545def _markdown_body(

546 changes: dict[str, list[str]],

547 categories: dict[str, list[str]],

548 tmpl: _TemplateInfo,

549 options: SummariseOptions,

550) -> str:

551 """Build the markdown PR description body.

552

553 Args:

554 changes: Dictionary of changes by type

555 categories: Files grouped by category

556 tmpl: Template metadata container

557 options: Output customisation options

558

559 Returns:

560 Markdown-formatted string

561 """

562 lines: list[str] = []

563

564 if options.include_header:

565 lines.extend(_build_header(tmpl.repo, title=options.title))

566

567 total_changes = sum(len(files) for files in changes.values())

568 if not total_changes:

569 lines.append("No changes detected.")

570 if options.include_footer:

571 lines.append("")

572 lines.extend(_build_footer(tmpl))

573 return "\n".join(lines)

574

575 lines.extend(_build_summary(changes))

576

577 if options.include_categories and categories:

578 lines.append("### 📁 Changes by Category")

579 lines.append("")

580

581 for category, files in sorted(categories.items()):

582 lines.append(f"#### {category}")

583 lines.append("")

584

585 category_added = [f for f in files if f in changes["added"]]

586 category_modified = [f for f in files if f in changes["modified"]]

587 category_deleted = [f for f in files if f in changes["deleted"]]

588

589 _add_category_section(lines, "Added", len(category_added), category_added, "✅")

590 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝")

591 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌")

592

593 elif not options.include_categories:

594 lines.append("### 📁 Changed Files")

595 lines.append("")

596 _add_category_section(lines, "Added", len(changes["added"]), changes["added"], "✅")

597 _add_category_section(lines, "Modified", len(changes["modified"]), changes["modified"], "📝")

598 _add_category_section(lines, "Deleted", len(changes["deleted"]), changes["deleted"], "❌")

599

600 if options.include_footer:

601 lines.extend(_build_footer(tmpl))

602

603 return "\n".join(lines)

604

605

606def generate_pr_description(repo_path: Path, options: SummariseOptions | None = None) -> str:

607 """Generate PR description based on staged changes.

608

609 Args:

610 repo_path: Path to the repository

611 options: Output customisation options. Defaults to :class:`SummariseOptions`

612 with all fields at their defaults (markdown format, with header / footer /

613 categories, no custom title, staged-index diff).

614

615 Returns:

616 Formatted PR description

617 """

618 opts = options or SummariseOptions()

619

620 changes = get_staged_changes(repo_path, compare_ref=opts.compare_ref)

621 template_repo, template_branch = get_template_info(repo_path)

622 last_sync = get_last_sync_date(repo_path, template_repo=template_repo)

623

624 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"]

625 categories = categorize_files(all_changed_files) if all_changed_files else {}

626

627 tmpl = _TemplateInfo(repo=template_repo, branch=template_branch, last_sync=last_sync)

628

629 # Custom Jinja2 template takes full precedence over all other options

630 if opts.jinja2_template:

631 context = {

632 "template_repo": tmpl.repo,

633 "template_branch": tmpl.branch,

634 "last_sync": tmpl.last_sync,

635 "sync_date": datetime.now().astimezone().isoformat(),

636 "changes": changes,

637 "categories": categories,

638 "title": opts.title,

639 }

640 return _generate_jinja2_output(opts.jinja2_template, context)

641

642 if opts.output_format == "json":

643 return _generate_json_output(changes, categories, tmpl)

644

645 if opts.output_format == "plain":

646 return _generate_plain_output(changes, categories, tmpl, opts)

647

648 return _markdown_body(changes, categories, tmpl, opts)

649

650

651def summarise(

652 target: Path,

653 output: Path | None = None,

654 *,

655 options: SummariseOptions | None = None,

656) -> None:

657 """Generate a summary of staged changes for rhiza sync operations.

658

659 This command analyzes staged git changes and generates a structured

660 PR description with:

661 - Summary statistics (files added/modified/deleted)

662 - Changes categorized by type (workflows, configs, docs, tests, etc.)

663 - Template repository information

664 - Last sync date

665

666 Args:

667 target: Path to the target repository.

668 output: Optional output file path. If not provided, prints to stdout.

669 options: Output customisation options. Defaults to :class:`SummariseOptions`

670 with all fields at their defaults.

671 """

672 target = target.resolve()

673 logger.info(f"Target repository: {target}")

674

675 # Check if target is a git repository

676 if not (target / ".git").is_dir():

677 err_msg = f"Target directory is not a git repository: {target}"

678 logger.error(err_msg)

679 logger.error("Initialize a git repository with 'git init' first")

680 raise RuntimeError(err_msg)

681

682 description = generate_pr_description(target, options)

683

684 if output:

685 output_path = output.resolve()

686 output_path.write_text(description, encoding="utf-8")

687 logger.success(f"PR description written to {output_path}")

688 else:

689 print(description)

690

691 logger.success("Summary generated successfully")

Coverage for src / rhiza / commands / summarise.py: 93%

276 statements