Coverage for src / rhiza / commands / summarise.py: 100%

155 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 07:04 +0000

1"""Command for generating PR descriptions from staged changes. 

2 

3This module provides functionality to analyze staged git changes and generate 

4structured PR descriptions for rhiza sync operations. 

5""" 

6 

7import subprocess # nosec B404 

8from collections import defaultdict 

9from datetime import datetime 

10from pathlib import Path 

11 

12from loguru import logger 

13 

14 

15def run_git_command(args: list[str], cwd: Path | None = None) -> str: 

16 """Run a git command and return the output. 

17 

18 Args: 

19 args: Git command arguments (without 'git' prefix) 

20 cwd: Working directory for the command 

21 

22 Returns: 

23 Command output as string 

24 """ 

25 try: 

26 result = subprocess.run( # nosec B603 B607 # noqa: S603 

27 ["git", *args], # noqa: S607 

28 cwd=cwd, 

29 capture_output=True, 

30 text=True, 

31 check=True, 

32 ) 

33 return result.stdout.strip() 

34 except subprocess.CalledProcessError as e: 

35 logger.error(f"Error running git {' '.join(args)}: {e.stderr}") 

36 return "" 

37 

38 

39def get_staged_changes(repo_path: Path) -> dict[str, list[str]]: 

40 """Get list of staged changes categorized by type. 

41 

42 Args: 

43 repo_path: Path to the repository 

44 

45 Returns: 

46 Dictionary with keys 'added', 'modified', 'deleted' containing file lists 

47 """ 

48 changes: dict[str, list[str]] = { 

49 "added": [], 

50 "modified": [], 

51 "deleted": [], 

52 } 

53 

54 # Get staged changes 

55 output = run_git_command(["diff", "--cached", "--name-status"], cwd=repo_path) 

56 

57 for line in output.split("\n"): 

58 if not line: 

59 continue 

60 parts = line.split("\t", 1) 

61 if len(parts) != 2: 

62 continue 

63 status, filepath = parts 

64 

65 if status == "A": 

66 changes["added"].append(filepath) 

67 elif status == "M": 

68 changes["modified"].append(filepath) 

69 elif status == "D": 

70 changes["deleted"].append(filepath) 

71 elif status.startswith("R"): 

72 # Renamed file - treat as modified 

73 changes["modified"].append(filepath) 

74 

75 return changes 

76 

77 

78_CONFIG_FILES: frozenset[str] = frozenset( 

79 { 

80 "Makefile", 

81 "ruff.toml", 

82 "pytest.ini", 

83 ".editorconfig", 

84 ".gitignore", 

85 ".pre-commit-config.yaml", 

86 "renovate.json", 

87 ".python-version", 

88 } 

89) 

90 

91 

92def _categorize_by_directory(first_dir: str, filepath: str) -> str | None: 

93 """Categorize file based on its first directory. 

94 

95 Args: 

96 first_dir: First directory in the path 

97 filepath: Full file path 

98 

99 Returns: 

100 Category name or None if no match 

101 """ 

102 if first_dir == ".github": 

103 path_parts = Path(filepath).parts 

104 if len(path_parts) > 1 and path_parts[1] == "workflows": 

105 return "GitHub Actions Workflows" 

106 return "GitHub Configuration" 

107 

108 if first_dir == ".rhiza": 

109 if "script" in filepath.lower(): 

110 return "Rhiza Scripts" 

111 if "Makefile" in filepath: 

112 return "Makefiles" 

113 return "Rhiza Configuration" 

114 

115 if first_dir == "tests": 

116 return "Tests" 

117 

118 if first_dir == "book": 

119 return "Documentation" 

120 

121 return None 

122 

123 

124def _categorize_single_file(filepath: str) -> str: 

125 """Categorize a single file path. 

126 

127 Args: 

128 filepath: File path to categorize 

129 

130 Returns: 

131 Category name 

132 """ 

133 path_parts = Path(filepath).parts 

134 

135 if not path_parts: 

136 return "Other" 

137 

138 # Try directory-based categorization first 

139 category = _categorize_by_directory(path_parts[0], filepath) 

140 if category: 

141 return category 

142 

143 # Check file-based categories 

144 if filepath.endswith(".md"): 

145 return "Documentation" 

146 

147 if filepath in _CONFIG_FILES: 

148 return "Configuration Files" 

149 

150 return "Other" 

151 

152 

153def categorize_files(files: list[str]) -> dict[str, list[str]]: 

154 """Categorize files by type. 

155 

156 Args: 

157 files: List of file paths 

158 

159 Returns: 

160 Dictionary mapping category names to file lists 

161 """ 

162 categories = defaultdict(list) 

163 

164 for filepath in files: 

165 category = _categorize_single_file(filepath) 

166 categories[category].append(filepath) 

167 

168 return dict(categories) 

169 

170 

171def get_template_info(repo_path: Path) -> tuple[str, str]: 

172 """Get template repository and branch from template.yml. 

173 

174 Args: 

175 repo_path: Path to the repository 

176 

177 Returns: 

178 Tuple of (template_repo, template_branch) 

179 """ 

180 template_file = repo_path / ".rhiza" / "template.yml" 

181 

182 if not template_file.exists(): 

183 return ("jebel-quant/rhiza", "main") 

184 

185 template_repo = "jebel-quant/rhiza" 

186 template_branch = "main" 

187 

188 with open(template_file) as f: 

189 for line in f: 

190 line = line.strip() 

191 if line.startswith("template-repository:"): 

192 template_repo = line.split(":", 1)[1].strip().strip('"') 

193 elif line.startswith("template-branch:"): 

194 template_branch = line.split(":", 1)[1].strip().strip('"') 

195 

196 return template_repo, template_branch 

197 

198 

199def get_last_sync_date(repo_path: Path) -> str | None: 

200 """Get the date of the last sync commit. 

201 

202 Args: 

203 repo_path: Path to the repository 

204 

205 Returns: 

206 ISO format date string or None if not found 

207 """ 

208 # Look for the most recent commit with "rhiza" in the message 

209 output = run_git_command( 

210 ["log", "--grep=rhiza", "--grep=Sync", "--grep=template", "-i", "--format=%cI", "-1"], cwd=repo_path 

211 ) 

212 

213 if output: 

214 return output 

215 

216 # Fallback: try to get date from history file if it exists 

217 history_file = repo_path / ".rhiza" / "history" 

218 if history_file.exists(): 

219 # Get the file modification time 

220 stat = history_file.stat() 

221 return datetime.fromtimestamp(stat.st_mtime).isoformat() 

222 

223 return None 

224 

225 

226def _format_file_list(files: list[str], status_emoji: str) -> list[str]: 

227 """Format a list of files with the given status emoji. 

228 

229 Args: 

230 files: List of file paths 

231 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted) 

232 

233 Returns: 

234 List of formatted lines 

235 """ 

236 lines = [] 

237 for f in sorted(files): 

238 lines.append(f"- {status_emoji} `{f}`") 

239 return lines 

240 

241 

242def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None: 

243 """Add a collapsible section for a category and change type. 

244 

245 Args: 

246 lines: List to append lines to 

247 title: Section title (e.g., "Added", "Modified") 

248 count: Number of files 

249 files: List of file paths 

250 emoji: Status emoji 

251 """ 

252 if not files: 

253 return 

254 

255 lines.append("<details>") 

256 lines.append(f"<summary>{title} ({count})</summary>") 

257 lines.append("") 

258 lines.extend(_format_file_list(files, emoji)) 

259 lines.append("") 

260 lines.append("</details>") 

261 lines.append("") 

262 

263 

264def _build_header(template_repo: str) -> list[str]: 

265 """Build the PR description header. 

266 

267 Args: 

268 template_repo: Template repository name 

269 

270 Returns: 

271 List of header lines 

272 """ 

273 return [ 

274 "## 🔄 Template Synchronization", 

275 "", 

276 f"This PR synchronizes the repository with the [{template_repo}](https://github.com/{template_repo}) template.", 

277 "", 

278 ] 

279 

280 

281def _build_summary(changes: dict[str, list[str]]) -> list[str]: 

282 """Build the change summary section. 

283 

284 Args: 

285 changes: Dictionary of changes by type 

286 

287 Returns: 

288 List of summary lines 

289 """ 

290 return [ 

291 "### 📊 Change Summary", 

292 "", 

293 f"- **{len(changes['added'])}** files added", 

294 f"- **{len(changes['modified'])}** files modified", 

295 f"- **{len(changes['deleted'])}** files deleted", 

296 "", 

297 ] 

298 

299 

300def _build_footer(template_repo: str, template_branch: str, last_sync: str | None) -> list[str]: 

301 """Build the PR description footer with metadata. 

302 

303 Args: 

304 template_repo: Template repository name 

305 template_branch: Template branch name 

306 last_sync: Last sync date string or None 

307 

308 Returns: 

309 List of footer lines 

310 """ 

311 lines = [ 

312 "---", 

313 "", 

314 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**", 

315 "", 

316 f"- Template: `{template_repo}@{template_branch}`", 

317 ] 

318 if last_sync: 

319 lines.append(f"- Last sync: {last_sync}") 

320 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}") 

321 return lines 

322 

323 

324def generate_pr_description(repo_path: Path) -> str: 

325 """Generate PR description based on staged changes. 

326 

327 Args: 

328 repo_path: Path to the repository 

329 

330 Returns: 

331 Formatted PR description 

332 """ 

333 changes = get_staged_changes(repo_path) 

334 template_repo, template_branch = get_template_info(repo_path) 

335 last_sync = get_last_sync_date(repo_path) 

336 

337 # Build header 

338 lines = _build_header(template_repo) 

339 

340 # Check if there are any changes 

341 total_changes = sum(len(files) for files in changes.values()) 

342 if total_changes == 0: 

343 lines.append("No changes detected.") 

344 return "\n".join(lines) 

345 

346 # Add summary 

347 lines.extend(_build_summary(changes)) 

348 

349 # Add detailed changes by category 

350 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"] 

351 categories = categorize_files(all_changed_files) 

352 

353 if categories: 

354 lines.append("### 📁 Changes by Category") 

355 lines.append("") 

356 

357 for category, files in sorted(categories.items()): 

358 lines.append(f"#### {category}") 

359 lines.append("") 

360 

361 # Group files by change type 

362 category_added = [f for f in files if f in changes["added"]] 

363 category_modified = [f for f in files if f in changes["modified"]] 

364 category_deleted = [f for f in files if f in changes["deleted"]] 

365 

366 _add_category_section(lines, "Added", len(category_added), category_added, "✅") 

367 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝") 

368 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌") 

369 

370 # Add footer 

371 lines.extend(_build_footer(template_repo, template_branch, last_sync)) 

372 

373 return "\n".join(lines) 

374 

375 

376def summarise(target: Path, output: Path | None = None) -> None: 

377 """Generate a summary of staged changes for rhiza sync operations. 

378 

379 This command analyzes staged git changes and generates a structured 

380 PR description with: 

381 - Summary statistics (files added/modified/deleted) 

382 - Changes categorized by type (workflows, configs, docs, tests, etc.) 

383 - Template repository information 

384 - Last sync date 

385 

386 Args: 

387 target: Path to the target repository. 

388 output: Optional output file path. If not provided, prints to stdout. 

389 """ 

390 target = target.resolve() 

391 logger.info(f"Target repository: {target}") 

392 

393 # Check if target is a git repository 

394 if not (target / ".git").is_dir(): 

395 logger.error(f"Target directory is not a git repository: {target}") 

396 logger.error("Initialize a git repository with 'git init' first") 

397 raise RuntimeError(f"Target directory is not a git repository: {target}") # noqa: TRY003 

398 

399 # Generate the PR description 

400 description = generate_pr_description(target) 

401 

402 # Output the description 

403 if output: 

404 output_path = output.resolve() 

405 output_path.write_text(description, encoding="utf-8") 

406 logger.success(f"PR description written to {output_path}") 

407 else: 

408 # Print to stdout 

409 print(description) 

410 

411 logger.success("Summary generated successfully")