Coverage for src / rhiza / commands / summarise.py: 100%

157 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-12 20:13 +0000

1"""Command for generating PR descriptions from staged changes. 

2 

3This module provides functionality to analyze staged git changes and generate 

4structured PR descriptions for rhiza sync operations. 

5""" 

6 

7import subprocess # nosec B404 

8import sys 

9from collections import defaultdict 

10from datetime import datetime 

11from pathlib import Path 

12 

13from loguru import logger 

14 

15 

16def run_git_command(args: list[str], cwd: Path | None = None) -> str: 

17 """Run a git command and return the output. 

18 

19 Args: 

20 args: Git command arguments (without 'git' prefix) 

21 cwd: Working directory for the command 

22 

23 Returns: 

24 Command output as string 

25 """ 

26 try: 

27 result = subprocess.run( # nosec B603 B607 

28 ["git", *args], 

29 cwd=cwd, 

30 capture_output=True, 

31 text=True, 

32 check=True, 

33 ) 

34 return result.stdout.strip() 

35 except subprocess.CalledProcessError as e: 

36 logger.error(f"Error running git {' '.join(args)}: {e.stderr}") 

37 return "" 

38 

39 

40def get_staged_changes(repo_path: Path) -> dict[str, list[str]]: 

41 """Get list of staged changes categorized by type. 

42 

43 Args: 

44 repo_path: Path to the repository 

45 

46 Returns: 

47 Dictionary with keys 'added', 'modified', 'deleted' containing file lists 

48 """ 

49 changes: dict[str, list[str]] = { 

50 "added": [], 

51 "modified": [], 

52 "deleted": [], 

53 } 

54 

55 # Get staged changes 

56 output = run_git_command(["diff", "--cached", "--name-status"], cwd=repo_path) 

57 

58 for line in output.split("\n"): 

59 if not line: 

60 continue 

61 parts = line.split("\t", 1) 

62 if len(parts) != 2: 

63 continue 

64 status, filepath = parts 

65 

66 if status == "A": 

67 changes["added"].append(filepath) 

68 elif status == "M": 

69 changes["modified"].append(filepath) 

70 elif status == "D": 

71 changes["deleted"].append(filepath) 

72 elif status.startswith("R"): 

73 # Renamed file - treat as modified 

74 changes["modified"].append(filepath) 

75 

76 return changes 

77 

78 

79def _get_config_files() -> set[str]: 

80 """Get set of known configuration files. 

81 

82 Returns: 

83 Set of configuration file names 

84 """ 

85 return { 

86 "Makefile", 

87 "ruff.toml", 

88 "pytest.ini", 

89 ".editorconfig", 

90 ".gitignore", 

91 ".pre-commit-config.yaml", 

92 "renovate.json", 

93 ".python-version", 

94 } 

95 

96 

97def _categorize_by_directory(first_dir: str, filepath: str) -> str | None: 

98 """Categorize file based on its first directory. 

99 

100 Args: 

101 first_dir: First directory in the path 

102 filepath: Full file path 

103 

104 Returns: 

105 Category name or None if no match 

106 """ 

107 if first_dir == ".github": 

108 path_parts = Path(filepath).parts 

109 if len(path_parts) > 1 and path_parts[1] == "workflows": 

110 return "GitHub Actions Workflows" 

111 return "GitHub Configuration" 

112 

113 if first_dir == ".rhiza": 

114 if "script" in filepath.lower(): 

115 return "Rhiza Scripts" 

116 if "Makefile" in filepath: 

117 return "Makefiles" 

118 return "Rhiza Configuration" 

119 

120 if first_dir == "tests": 

121 return "Tests" 

122 

123 if first_dir == "book": 

124 return "Documentation" 

125 

126 return None 

127 

128 

129def _categorize_single_file(filepath: str) -> str: 

130 """Categorize a single file path. 

131 

132 Args: 

133 filepath: File path to categorize 

134 

135 Returns: 

136 Category name 

137 """ 

138 path_parts = Path(filepath).parts 

139 

140 if not path_parts: 

141 return "Other" 

142 

143 # Try directory-based categorization first 

144 category = _categorize_by_directory(path_parts[0], filepath) 

145 if category: 

146 return category 

147 

148 # Check file-based categories 

149 if filepath.endswith(".md"): 

150 return "Documentation" 

151 

152 if filepath in _get_config_files(): 

153 return "Configuration Files" 

154 

155 return "Other" 

156 

157 

158def categorize_files(files: list[str]) -> dict[str, list[str]]: 

159 """Categorize files by type. 

160 

161 Args: 

162 files: List of file paths 

163 

164 Returns: 

165 Dictionary mapping category names to file lists 

166 """ 

167 categories = defaultdict(list) 

168 

169 for filepath in files: 

170 category = _categorize_single_file(filepath) 

171 categories[category].append(filepath) 

172 

173 return dict(categories) 

174 

175 

176def get_template_info(repo_path: Path) -> tuple[str, str]: 

177 """Get template repository and branch from template.yml. 

178 

179 Args: 

180 repo_path: Path to the repository 

181 

182 Returns: 

183 Tuple of (template_repo, template_branch) 

184 """ 

185 template_file = repo_path / ".rhiza" / "template.yml" 

186 

187 if not template_file.exists(): 

188 return ("jebel-quant/rhiza", "main") 

189 

190 template_repo = "jebel-quant/rhiza" 

191 template_branch = "main" 

192 

193 with open(template_file) as f: 

194 for line in f: 

195 line = line.strip() 

196 if line.startswith("template-repository:"): 

197 template_repo = line.split(":", 1)[1].strip().strip('"') 

198 elif line.startswith("template-branch:"): 

199 template_branch = line.split(":", 1)[1].strip().strip('"') 

200 

201 return template_repo, template_branch 

202 

203 

204def get_last_sync_date(repo_path: Path) -> str | None: 

205 """Get the date of the last sync commit. 

206 

207 Args: 

208 repo_path: Path to the repository 

209 

210 Returns: 

211 ISO format date string or None if not found 

212 """ 

213 # Look for the most recent commit with "rhiza" in the message 

214 output = run_git_command( 

215 ["log", "--grep=rhiza", "--grep=Sync", "--grep=template", "-i", "--format=%cI", "-1"], cwd=repo_path 

216 ) 

217 

218 if output: 

219 return output 

220 

221 # Fallback: try to get date from history file if it exists 

222 history_file = repo_path / ".rhiza" / "history" 

223 if history_file.exists(): 

224 # Get the file modification time 

225 stat = history_file.stat() 

226 return datetime.fromtimestamp(stat.st_mtime).isoformat() 

227 

228 return None 

229 

230 

231def _format_file_list(files: list[str], status_emoji: str) -> list[str]: 

232 """Format a list of files with the given status emoji. 

233 

234 Args: 

235 files: List of file paths 

236 status_emoji: Emoji to use (✅ for added, 📝 for modified, ❌ for deleted) 

237 

238 Returns: 

239 List of formatted lines 

240 """ 

241 lines = [] 

242 for f in sorted(files): 

243 lines.append(f"- {status_emoji} `{f}`") 

244 return lines 

245 

246 

247def _add_category_section(lines: list[str], title: str, count: int, files: list[str], emoji: str) -> None: 

248 """Add a collapsible section for a category and change type. 

249 

250 Args: 

251 lines: List to append lines to 

252 title: Section title (e.g., "Added", "Modified") 

253 count: Number of files 

254 files: List of file paths 

255 emoji: Status emoji 

256 """ 

257 if not files: 

258 return 

259 

260 lines.append("<details>") 

261 lines.append(f"<summary>{title} ({count})</summary>") 

262 lines.append("") 

263 lines.extend(_format_file_list(files, emoji)) 

264 lines.append("") 

265 lines.append("</details>") 

266 lines.append("") 

267 

268 

269def _build_header(template_repo: str) -> list[str]: 

270 """Build the PR description header. 

271 

272 Args: 

273 template_repo: Template repository name 

274 

275 Returns: 

276 List of header lines 

277 """ 

278 return [ 

279 "## 🔄 Template Synchronization", 

280 "", 

281 f"This PR synchronizes the repository with the [{template_repo}](https://github.com/{template_repo}) template.", 

282 "", 

283 ] 

284 

285 

286def _build_summary(changes: dict[str, list[str]]) -> list[str]: 

287 """Build the change summary section. 

288 

289 Args: 

290 changes: Dictionary of changes by type 

291 

292 Returns: 

293 List of summary lines 

294 """ 

295 return [ 

296 "### 📊 Change Summary", 

297 "", 

298 f"- **{len(changes['added'])}** files added", 

299 f"- **{len(changes['modified'])}** files modified", 

300 f"- **{len(changes['deleted'])}** files deleted", 

301 "", 

302 ] 

303 

304 

305def _build_footer(template_repo: str, template_branch: str, last_sync: str | None) -> list[str]: 

306 """Build the PR description footer with metadata. 

307 

308 Args: 

309 template_repo: Template repository name 

310 template_branch: Template branch name 

311 last_sync: Last sync date string or None 

312 

313 Returns: 

314 List of footer lines 

315 """ 

316 lines = [ 

317 "---", 

318 "", 

319 "**🤖 Generated by [rhiza](https://github.com/jebel-quant/rhiza-cli)**", 

320 "", 

321 f"- Template: `{template_repo}@{template_branch}`", 

322 ] 

323 if last_sync: 

324 lines.append(f"- Last sync: {last_sync}") 

325 lines.append(f"- Sync date: {datetime.now().astimezone().isoformat()}") 

326 return lines 

327 

328 

329def generate_pr_description(repo_path: Path) -> str: 

330 """Generate PR description based on staged changes. 

331 

332 Args: 

333 repo_path: Path to the repository 

334 

335 Returns: 

336 Formatted PR description 

337 """ 

338 changes = get_staged_changes(repo_path) 

339 template_repo, template_branch = get_template_info(repo_path) 

340 last_sync = get_last_sync_date(repo_path) 

341 

342 # Build header 

343 lines = _build_header(template_repo) 

344 

345 # Check if there are any changes 

346 total_changes = sum(len(files) for files in changes.values()) 

347 if total_changes == 0: 

348 lines.append("No changes detected.") 

349 return "\n".join(lines) 

350 

351 # Add summary 

352 lines.extend(_build_summary(changes)) 

353 

354 # Add detailed changes by category 

355 all_changed_files = changes["added"] + changes["modified"] + changes["deleted"] 

356 categories = categorize_files(all_changed_files) 

357 

358 if categories: 

359 lines.append("### 📁 Changes by Category") 

360 lines.append("") 

361 

362 for category, files in sorted(categories.items()): 

363 lines.append(f"#### {category}") 

364 lines.append("") 

365 

366 # Group files by change type 

367 category_added = [f for f in files if f in changes["added"]] 

368 category_modified = [f for f in files if f in changes["modified"]] 

369 category_deleted = [f for f in files if f in changes["deleted"]] 

370 

371 _add_category_section(lines, "Added", len(category_added), category_added, "✅") 

372 _add_category_section(lines, "Modified", len(category_modified), category_modified, "📝") 

373 _add_category_section(lines, "Deleted", len(category_deleted), category_deleted, "❌") 

374 

375 # Add footer 

376 lines.extend(_build_footer(template_repo, template_branch, last_sync)) 

377 

378 return "\n".join(lines) 

379 

380 

381def summarise(target: Path, output: Path | None = None) -> None: 

382 """Generate a summary of staged changes for rhiza sync operations. 

383 

384 This command analyzes staged git changes and generates a structured 

385 PR description with: 

386 - Summary statistics (files added/modified/deleted) 

387 - Changes categorized by type (workflows, configs, docs, tests, etc.) 

388 - Template repository information 

389 - Last sync date 

390 

391 Args: 

392 target: Path to the target repository. 

393 output: Optional output file path. If not provided, prints to stdout. 

394 """ 

395 target = target.resolve() 

396 logger.info(f"Target repository: {target}") 

397 

398 # Check if target is a git repository 

399 if not (target / ".git").is_dir(): 

400 logger.error(f"Target directory is not a git repository: {target}") 

401 logger.error("Initialize a git repository with 'git init' first") 

402 sys.exit(1) 

403 

404 # Generate the PR description 

405 description = generate_pr_description(target) 

406 

407 # Output the description 

408 if output: 

409 output_path = output.resolve() 

410 output_path.write_text(description) 

411 logger.success(f"PR description written to {output_path}") 

412 else: 

413 # Print to stdout 

414 print(description) 

415 

416 logger.success("Summary generated successfully")