Files
XCEngine/docs/api/_tools/audit_api_docs.py

403 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from __future__ import annotations
import argparse
import re
from collections import defaultdict
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
SCRIPT_DIR = Path(__file__).resolve().parent
DOC_ROOT = SCRIPT_DIR.parent if SCRIPT_DIR.name == "_tools" else SCRIPT_DIR
REPO_ROOT = DOC_ROOT.parents[1]
INCLUDE_ROOT = REPO_ROOT / "engine" / "include"
PUBLIC_INCLUDE_ROOT = INCLUDE_ROOT / "XCEngine"
PARALLEL_ROOT = DOC_ROOT / "XCEngine"
META_ROOT = DOC_ROOT / "_meta"
DEFAULT_REPORT = META_ROOT / "rebuild-status.md"
HEADER_RE = re.compile(r"^\*\*头文件\*\*:\s*`([^`]+\.h)`", re.MULTILINE)
NAMESPACE_RE = re.compile(r"^\*\*命名空间\*\*:\s*`[^`]+`", re.MULTILINE)
TYPE_RE = re.compile(r"^\*\*类型\*\*:\s*`[^`]+`", re.MULTILINE)
DESCRIPTION_RE = re.compile(r"^\*\*描述\*\*:\s*.+$", re.MULTILINE)
MD_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
LEGACY_SECTION_RE = re.compile(r"^## (Syntax|Remarks|See Also|Examples)$", re.MULTILINE)
FENCED_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL)
@dataclass
class ModuleCoverage:
module: str
public_headers: int
documented_headers: int
@property
def missing_headers(self) -> int:
return self.public_headers - self.documented_headers
def normalize_rel_path(path: str) -> str:
return path.replace("\\", "/")
def iter_markdown_files() -> list[Path]:
return sorted(
path
for path in DOC_ROOT.rglob("*.md")
if path.name != DEFAULT_REPORT.name
)
def iter_canonical_markdown_files() -> list[Path]:
return sorted(
path
for path in PARALLEL_ROOT.rglob("*.md")
)
def iter_public_headers() -> list[str]:
return sorted(
normalize_rel_path(path.relative_to(INCLUDE_ROOT).as_posix())
for path in PUBLIC_INCLUDE_ROOT.rglob("*.h")
)
def iter_public_include_dirs() -> list[str]:
dirs = ["XCEngine"]
dirs.extend(
f"XCEngine/{path.relative_to(PUBLIC_INCLUDE_ROOT).as_posix()}"
for path in sorted(PUBLIC_INCLUDE_ROOT.rglob("*"))
if path.is_dir()
)
return dirs
def dir_index_name(relative: str) -> str:
return f"{Path(relative).name}.md"
def dir_index_doc_path(relative: str) -> Path:
return DOC_ROOT / relative / dir_index_name(relative)
def resolve_md_target(source: Path, target: str) -> Path:
clean = target.split("#", 1)[0].replace("\\", "/")
return (source.parent / clean).resolve()
def strip_fenced_code_blocks(content: str) -> str:
return FENCED_CODE_BLOCK_RE.sub("", content)
def is_dir_index_page(page: Path) -> bool:
if not page.is_relative_to(PARALLEL_ROOT):
return False
if page.stem != page.parent.name:
return False
relative_dir = page.parent.relative_to(PARALLEL_ROOT)
source_dir = PUBLIC_INCLUDE_ROOT / relative_dir
return source_dir.exists() and source_dir.is_dir()
def is_flat_header_page(page: Path, rel_page: str) -> bool:
if not rel_page.startswith("XCEngine/"):
return False
if is_dir_index_page(page):
return False
return (INCLUDE_ROOT / Path(rel_page).with_suffix(".h")).exists()
def collect_doc_state(report_path: Path) -> dict[str, object]:
markdown_files = iter_markdown_files()
canonical_markdown_files = iter_canonical_markdown_files()
public_headers = iter_public_headers()
public_include_dirs = iter_public_include_dirs()
declared_header_refs: set[str] = set()
valid_header_refs: set[str] = set()
canonical_valid_header_refs: set[str] = set()
invalid_header_refs: list[tuple[str, str]] = []
broken_md_links: list[tuple[str, str]] = []
non_md_relative_links: list[tuple[str, str]] = []
old_template_pages: list[str] = []
flat_header_pages: list[str] = []
metadata_counts = {
"namespace": 0,
"type": 0,
"description": 0,
"header": 0,
}
for page in markdown_files:
rel_page = normalize_rel_path(page.relative_to(DOC_ROOT).as_posix())
content = page.read_text(encoding="utf-8")
is_canonical_page = rel_page.startswith("XCEngine/")
if is_canonical_page and NAMESPACE_RE.search(content):
metadata_counts["namespace"] += 1
if is_canonical_page and TYPE_RE.search(content):
metadata_counts["type"] += 1
if is_canonical_page and DESCRIPTION_RE.search(content):
metadata_counts["description"] += 1
if is_canonical_page and HEADER_RE.search(content):
metadata_counts["header"] += 1
if is_canonical_page and LEGACY_SECTION_RE.search(content):
old_template_pages.append(rel_page)
if is_flat_header_page(page, rel_page):
flat_header_pages.append(rel_page)
for match in HEADER_RE.finditer(content):
header = normalize_rel_path(match.group(1))
declared_header_refs.add(header)
if (INCLUDE_ROOT / header).exists():
valid_header_refs.add(header)
if is_canonical_page:
canonical_valid_header_refs.add(header)
elif is_canonical_page:
invalid_header_refs.append((rel_page, header))
if is_canonical_page or rel_page.startswith(("_meta/", "_tools/")):
link_scan_content = strip_fenced_code_blocks(content)
for _, target in MD_LINK_RE.findall(link_scan_content):
if target.startswith(("http://", "https://", "mailto:", "#")):
continue
normalized = target.replace("\\", "/")
if normalized.endswith(".md") or ".md#" in normalized:
resolved = resolve_md_target(page, normalized)
if not resolved.exists() and resolved != report_path.resolve():
broken_md_links.append((rel_page, target))
continue
non_md_relative_links.append((rel_page, target))
public_by_module: dict[str, list[str]] = defaultdict(list)
documented_by_module: dict[str, set[str]] = defaultdict(set)
for header in public_headers:
module = header.split("/", 2)[1]
public_by_module[module].append(header)
for header in canonical_valid_header_refs:
if not header.startswith("XCEngine/"):
continue
module = header.split("/", 2)[1]
documented_by_module[module].add(header)
module_coverages = [
ModuleCoverage(
module=module,
public_headers=len(headers),
documented_headers=len(documented_by_module.get(module, set())),
)
for module, headers in sorted(public_by_module.items())
]
missing_headers = [
header for header in public_headers if header not in canonical_valid_header_refs
]
missing_parallel_indexes = [
relative
for relative in public_include_dirs
if not dir_index_doc_path(relative).exists()
]
support_top_dirs = sorted(
path.name
for path in DOC_ROOT.iterdir()
if path.is_dir() and path.name in {"_meta", "_tools"}
)
return {
"generated_at": datetime.now(),
"markdown_files": markdown_files,
"canonical_markdown_files": canonical_markdown_files,
"public_headers": public_headers,
"public_include_dirs": public_include_dirs,
"declared_header_refs": sorted(declared_header_refs),
"valid_header_refs": sorted(valid_header_refs),
"canonical_valid_header_refs": sorted(canonical_valid_header_refs),
"invalid_header_refs": invalid_header_refs,
"broken_md_links": broken_md_links,
"non_md_relative_links": non_md_relative_links,
"old_template_pages": sorted(old_template_pages),
"flat_header_pages": sorted(flat_header_pages),
"missing_headers": missing_headers,
"module_coverages": module_coverages,
"metadata_counts": metadata_counts,
"support_top_dirs": support_top_dirs,
"missing_parallel_indexes": missing_parallel_indexes,
}
def format_pairs_table(headers: tuple[str, str], rows: list[tuple[str, str]]) -> list[str]:
output = [
f"| {headers[0]} | {headers[1]} |",
"|------|------|",
]
for left, right in rows:
output.append(f"| `{left}` | `{right}` |")
return output
def build_report(state: dict[str, object]) -> str:
generated_at: datetime = state["generated_at"] # type: ignore[assignment]
markdown_files: list[Path] = state["markdown_files"] # type: ignore[assignment]
canonical_markdown_files: list[Path] = state["canonical_markdown_files"] # type: ignore[assignment]
public_headers: list[str] = state["public_headers"] # type: ignore[assignment]
public_include_dirs: list[str] = state["public_include_dirs"] # type: ignore[assignment]
valid_header_refs: list[str] = state["valid_header_refs"] # type: ignore[assignment]
canonical_valid_header_refs: list[str] = state["canonical_valid_header_refs"] # type: ignore[assignment]
invalid_header_refs: list[tuple[str, str]] = state["invalid_header_refs"] # type: ignore[assignment]
broken_md_links: list[tuple[str, str]] = state["broken_md_links"] # type: ignore[assignment]
non_md_relative_links: list[tuple[str, str]] = state["non_md_relative_links"] # type: ignore[assignment]
old_template_pages: list[str] = state["old_template_pages"] # type: ignore[assignment]
flat_header_pages: list[str] = state["flat_header_pages"] # type: ignore[assignment]
missing_headers: list[str] = state["missing_headers"] # type: ignore[assignment]
module_coverages: list[ModuleCoverage] = state["module_coverages"] # type: ignore[assignment]
metadata_counts: dict[str, int] = state["metadata_counts"] # type: ignore[assignment]
support_top_dirs: list[str] = state["support_top_dirs"] # type: ignore[assignment]
missing_parallel_indexes: list[str] = state["missing_parallel_indexes"] # type: ignore[assignment]
lines: list[str] = []
lines.append("# API 文档重构状态")
lines.append("")
lines.append(f"**生成时间**: `{generated_at.strftime('%Y-%m-%d %H:%M:%S')}`")
lines.append("")
lines.append("**来源**: `docs/api/_tools/audit_api_docs.py`")
lines.append("")
lines.append("## 摘要")
lines.append("")
lines.append(f"- Markdown 页面数(全部): `{len(markdown_files)}`")
lines.append(f"- Markdown 页面数canonical: `{len(canonical_markdown_files)}`")
lines.append(f"- Public headers 数: `{len(public_headers)}`")
lines.append(f"- 有效头文件引用数(全部): `{len(valid_header_refs)}`")
lines.append(f"- 有效头文件引用数canonical: `{len(canonical_valid_header_refs)}`")
lines.append(f"- 无效头文件引用数: `{len(invalid_header_refs)}`")
lines.append(f"- 失效 `.md` 链接数: `{len(broken_md_links)}`")
lines.append(f"- 非 `.md` 相对链接数: `{len(non_md_relative_links)}`")
lines.append(f"- 旧模板页面数: `{len(old_template_pages)}`")
lines.append(f"- 扁平 header 页面数: `{len(flat_header_pages)}`")
lines.append("")
lines.append("## 平行目录")
lines.append("")
lines.append(f"- Canonical 根目录: `{PARALLEL_ROOT.relative_to(DOC_ROOT).as_posix()}`")
lines.append(f"- 源码目录节点数: `{len(public_include_dirs)}`")
lines.append(
f"- 已生成目录总览页节点数: `{len(public_include_dirs) - len(missing_parallel_indexes)}`"
)
lines.append(f"- 缺失目录总览页节点数: `{len(missing_parallel_indexes)}`")
if support_top_dirs:
lines.append(f"- 支撑目录: `{', '.join(support_top_dirs)}`")
lines.append("")
lines.append("## 模块覆盖")
lines.append("")
lines.append("| 模块 | Public headers | 已覆盖 | 未覆盖 |")
lines.append("|------|----------------|--------|--------|")
for coverage in module_coverages:
lines.append(
f"| `{coverage.module}` | `{coverage.public_headers}` | "
f"`{coverage.documented_headers}` | `{coverage.missing_headers}` |"
)
lines.append("")
lines.append("## 元信息覆盖")
lines.append("")
lines.append("| 字段 | 页面数 |")
lines.append("|------|--------|")
lines.append(f"| `命名空间` | `{metadata_counts['namespace']}` |")
lines.append(f"| `类型` | `{metadata_counts['type']}` |")
lines.append(f"| `描述` | `{metadata_counts['description']}` |")
lines.append(f"| `头文件` | `{metadata_counts['header']}` |")
lines.append("")
if missing_parallel_indexes:
lines.append("## 缺失的平行目录总览页")
lines.append("")
for relative in missing_parallel_indexes:
lines.append(f"- `{relative}`")
lines.append("")
if invalid_header_refs:
lines.append("## 无效头文件引用")
lines.append("")
lines.extend(format_pairs_table(("文档", "头文件"), invalid_header_refs[:50]))
lines.append("")
if broken_md_links:
lines.append("## 失效 Markdown 链接")
lines.append("")
lines.extend(format_pairs_table(("文档", "目标"), broken_md_links[:50]))
lines.append("")
if non_md_relative_links:
lines.append("## 非 `.md` 相对链接")
lines.append("")
lines.extend(format_pairs_table(("文档", "目标"), non_md_relative_links[:50]))
lines.append("")
if old_template_pages:
lines.append("## 旧模板页面")
lines.append("")
for page in old_template_pages[:80]:
lines.append(f"- `{page}`")
if len(old_template_pages) > 80:
lines.append(f"- 其余 `{len(old_template_pages) - 80}` 个页面请直接运行脚本查看。")
lines.append("")
if flat_header_pages:
lines.append("## 扁平 header 页面")
lines.append("")
for page in flat_header_pages[:120]:
lines.append(f"- `{page}`")
if len(flat_header_pages) > 120:
lines.append(f"- 其余 `{len(flat_header_pages) - 120}` 个页面请直接运行脚本查看。")
lines.append("")
if missing_headers:
lines.append("## 未覆盖的 public headers")
lines.append("")
for header in missing_headers[:120]:
lines.append(f"- `{header}`")
if len(missing_headers) > 120:
lines.append(f"- 其余 `{len(missing_headers) - 120}` 个 header 请直接运行脚本查看。")
lines.append("")
return "\n".join(lines).rstrip() + "\n"
def main() -> int:
parser = argparse.ArgumentParser(description="Audit XCEngine API documentation.")
parser.add_argument(
"--report",
default=str(DEFAULT_REPORT),
help="Markdown report output path.",
)
args = parser.parse_args()
report_path = Path(args.report)
report_path.parent.mkdir(parents=True, exist_ok=True)
state = collect_doc_state(report_path)
report_path.write_text(build_report(state), encoding="utf-8")
print(f"Markdown pages (all): {len(state['markdown_files'])}")
print(f"Markdown pages (canonical): {len(state['canonical_markdown_files'])}")
print(f"Public headers: {len(state['public_headers'])}")
print(f"Valid header refs (all): {len(state['valid_header_refs'])}")
print(f"Valid header refs (canonical): {len(state['canonical_valid_header_refs'])}")
print(f"Invalid header refs: {len(state['invalid_header_refs'])}")
print(f"Broken .md links: {len(state['broken_md_links'])}")
print(f"Non-.md relative links: {len(state['non_md_relative_links'])}")
print(f"Old template pages: {len(state['old_template_pages'])}")
print(f"Flat header pages: {len(state['flat_header_pages'])}")
print(f"Missing directory index pages: {len(state['missing_parallel_indexes'])}")
print(f"Report written to: {report_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())