#!/usr/bin/env python3 """ Fix broken cross-references in API documentation. """ import os import re from pathlib import Path SCRIPT_DIR = Path(__file__).resolve().parent API_DOCS_ROOT = SCRIPT_DIR.parent if SCRIPT_DIR.name == "_tools" else SCRIPT_DIR def normalize_path(path_str): """Remove duplicate path segments like ./module/./module/.""" parts = [] for part in path_str.replace("\\", "/").split("/"): if part == "." or part == "": continue parts.append(part) return "/".join(parts) def resolve_reference(current_file, ref_path): """Resolve a reference path relative to current file and check if it exists.""" current_file = Path(current_file) parent_dir = current_file.parent # Normalize the reference path normalized_ref = normalize_path(ref_path) # Try to resolve the path resolved = (parent_dir / normalized_ref).resolve() # Also try with normalized path resolved2 = (parent_dir / ref_path.replace("\\", "/")).resolve() return resolved, resolved2 def get_relative_path(from_file, to_file): """Get correct relative path from one file to another.""" from_file = Path(from_file) to_file = Path(to_file) # Get common prefix from_parts = from_file.parts to_parts = to_file.parts # Find common prefix length common_len = 0 for i in range(min(len(from_parts), len(to_parts))): if from_parts[i] == to_parts[i]: common_len = i + 1 else: break # Build relative path up_count = len(from_parts) - common_len - 1 # -1 for the filename parts = [".."] * up_count + list(to_parts[common_len:]) return "/".join(parts) def find_file_by_name(target_name, base_dir): """Find a file with given name in the base directory.""" base_dir = Path(base_dir) api_docs = API_DOCS_ROOT # Search for the file pattern = f"**/{target_name}" matches = list(api_docs.glob(pattern)) # Filter for exact filename match for match in matches: if match.name == target_name: return match return None def fix_links_in_file(file_path, verbose=True): """Analyze and fix links in a single file.""" file_path = Path(file_path) if not file_path.exists(): return [] with open(file_path, "r", encoding="utf-8") as f: content = f.read() original_content = content fixes = [] # Find all markdown links: [text](path) link_pattern = r"\[([^\]]*)\]\(([^)]+\.md)\)" def replace_link(match): link_text = match.group(1) link_path = match.group(2) # Skip external links if link_path.startswith("http://") or link_path.startswith("https://"): return match.group(0) # Skip anchor links if link_path.startswith("#"): return match.group(0) # Normalize the path normalized = normalize_path(link_path) # Try to resolve the target file parent_dir = file_path.parent # Try the exact path first target_path = parent_dir / link_path.replace("\\", "/") target_exists = target_path.exists() if not target_exists: # Try normalized path target_path = parent_dir / normalized target_exists = target_path.exists() if not target_exists: # Try to find the file elsewhere # Get just the filename filename = Path(normalized).name # Try to find it found = find_file_by_name(filename, API_DOCS_ROOT) if found: # Calculate correct relative path correct_rel = get_relative_path(file_path, found) if correct_rel != normalized: fixes.append( { "file": file_path, "line": content[: match.start()].count("\n") + 1, "old": link_path, "new": correct_rel, "target": found, } ) return f"[{link_text}]({correct_rel})" else: fixes.append( { "file": file_path, "line": content[: match.start()].count("\n") + 1, "old": link_path, "new": None, "target": None, "error": "Target file not found", } ) return match.group(0) elif normalized != link_path: # Path has duplicate segments that need fixing fixes.append( { "file": file_path, "line": content[: match.start()].count("\n") + 1, "old": link_path, "new": normalized, "target": target_path, } ) return f"[{link_text}]({normalized})" return match.group(0) new_content = re.sub(link_pattern, replace_link, content) if new_content != original_content: with open(file_path, "w", encoding="utf-8") as f: f.write(new_content) return fixes def main(): print("=" * 70) print("API Documentation Cross-Reference Fixer") print("=" * 70) # Collect all markdown files md_files = list(API_DOCS_ROOT.glob("**/*.md")) print(f"\nFound {len(md_files)} markdown files in {API_DOCS_ROOT}") all_fixes = [] broken_refs = [] for md_file in md_files: fixes = fix_links_in_file(md_file, verbose=False) for fix in fixes: if fix.get("error"): broken_refs.append(fix) else: all_fixes.append(fix) print(f"\n{'=' * 70}") print("FIXES APPLIED:") print("=" * 70) if all_fixes: for fix in all_fixes: rel_file = fix["file"].relative_to(API_DOCS_ROOT) print(f"\n File: {rel_file}") print(f" Line: {fix['line']}") print(f" Old: {fix['old']}") print(f" New: {fix['new']}") else: print("\n No fixes needed.") print(f"\n{'=' * 70}") print("BROKEN REFERENCES (target files don't exist):") print("=" * 70) if broken_refs: for ref in broken_refs: rel_file = ref["file"].relative_to(API_DOCS_ROOT) print(f"\n File: {rel_file}") print(f" Line: {ref['line']}") print(f" Broken ref: {ref['old']}") else: print("\n No broken references found.") print(f"\n{'=' * 70}") print(f"SUMMARY:") print(f" Total fixes applied: {len(all_fixes)}") print(f" Broken references: {len(broken_refs)}") print("=" * 70) return len(all_fixes), len(broken_refs) if __name__ == "__main__": main()