refactor api documentation structure
This commit is contained in:
402
docs/api/_tools/audit_api_docs.py
Normal file
402
docs/api/_tools/audit_api_docs.py
Normal file
@@ -0,0 +1,402 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
DOC_ROOT = SCRIPT_DIR.parent if SCRIPT_DIR.name == "_tools" else SCRIPT_DIR
|
||||
REPO_ROOT = DOC_ROOT.parents[1]
|
||||
INCLUDE_ROOT = REPO_ROOT / "engine" / "include"
|
||||
PUBLIC_INCLUDE_ROOT = INCLUDE_ROOT / "XCEngine"
|
||||
PARALLEL_ROOT = DOC_ROOT / "XCEngine"
|
||||
META_ROOT = DOC_ROOT / "_meta"
|
||||
DEFAULT_REPORT = META_ROOT / "rebuild-status.md"
|
||||
|
||||
HEADER_RE = re.compile(r"^\*\*头文件\*\*:\s*`([^`]+\.h)`", re.MULTILINE)
|
||||
NAMESPACE_RE = re.compile(r"^\*\*命名空间\*\*:\s*`[^`]+`", re.MULTILINE)
|
||||
TYPE_RE = re.compile(r"^\*\*类型\*\*:\s*`[^`]+`", re.MULTILINE)
|
||||
DESCRIPTION_RE = re.compile(r"^\*\*描述\*\*:\s*.+$", re.MULTILINE)
|
||||
MD_LINK_RE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
||||
LEGACY_SECTION_RE = re.compile(r"^## (Syntax|Remarks|See Also|Examples)$", re.MULTILINE)
|
||||
FENCED_CODE_BLOCK_RE = re.compile(r"```.*?```", re.DOTALL)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModuleCoverage:
|
||||
module: str
|
||||
public_headers: int
|
||||
documented_headers: int
|
||||
|
||||
@property
|
||||
def missing_headers(self) -> int:
|
||||
return self.public_headers - self.documented_headers
|
||||
|
||||
|
||||
def normalize_rel_path(path: str) -> str:
|
||||
return path.replace("\\", "/")
|
||||
|
||||
|
||||
def iter_markdown_files() -> list[Path]:
|
||||
return sorted(
|
||||
path
|
||||
for path in DOC_ROOT.rglob("*.md")
|
||||
if path.name != DEFAULT_REPORT.name
|
||||
)
|
||||
|
||||
|
||||
def iter_canonical_markdown_files() -> list[Path]:
|
||||
return sorted(
|
||||
path
|
||||
for path in PARALLEL_ROOT.rglob("*.md")
|
||||
)
|
||||
|
||||
|
||||
def iter_public_headers() -> list[str]:
|
||||
return sorted(
|
||||
normalize_rel_path(path.relative_to(INCLUDE_ROOT).as_posix())
|
||||
for path in PUBLIC_INCLUDE_ROOT.rglob("*.h")
|
||||
)
|
||||
|
||||
|
||||
def iter_public_include_dirs() -> list[str]:
|
||||
dirs = ["XCEngine"]
|
||||
dirs.extend(
|
||||
f"XCEngine/{path.relative_to(PUBLIC_INCLUDE_ROOT).as_posix()}"
|
||||
for path in sorted(PUBLIC_INCLUDE_ROOT.rglob("*"))
|
||||
if path.is_dir()
|
||||
)
|
||||
return dirs
|
||||
|
||||
|
||||
def dir_index_name(relative: str) -> str:
|
||||
return f"{Path(relative).name}.md"
|
||||
|
||||
|
||||
def dir_index_doc_path(relative: str) -> Path:
|
||||
return DOC_ROOT / relative / dir_index_name(relative)
|
||||
|
||||
|
||||
def resolve_md_target(source: Path, target: str) -> Path:
|
||||
clean = target.split("#", 1)[0].replace("\\", "/")
|
||||
return (source.parent / clean).resolve()
|
||||
|
||||
|
||||
def strip_fenced_code_blocks(content: str) -> str:
|
||||
return FENCED_CODE_BLOCK_RE.sub("", content)
|
||||
|
||||
|
||||
def is_dir_index_page(page: Path) -> bool:
|
||||
if not page.is_relative_to(PARALLEL_ROOT):
|
||||
return False
|
||||
if page.stem != page.parent.name:
|
||||
return False
|
||||
|
||||
relative_dir = page.parent.relative_to(PARALLEL_ROOT)
|
||||
source_dir = PUBLIC_INCLUDE_ROOT / relative_dir
|
||||
return source_dir.exists() and source_dir.is_dir()
|
||||
|
||||
|
||||
def is_flat_header_page(page: Path, rel_page: str) -> bool:
|
||||
if not rel_page.startswith("XCEngine/"):
|
||||
return False
|
||||
if is_dir_index_page(page):
|
||||
return False
|
||||
return (INCLUDE_ROOT / Path(rel_page).with_suffix(".h")).exists()
|
||||
|
||||
|
||||
def collect_doc_state(report_path: Path) -> dict[str, object]:
|
||||
markdown_files = iter_markdown_files()
|
||||
canonical_markdown_files = iter_canonical_markdown_files()
|
||||
public_headers = iter_public_headers()
|
||||
public_include_dirs = iter_public_include_dirs()
|
||||
|
||||
declared_header_refs: set[str] = set()
|
||||
valid_header_refs: set[str] = set()
|
||||
canonical_valid_header_refs: set[str] = set()
|
||||
invalid_header_refs: list[tuple[str, str]] = []
|
||||
broken_md_links: list[tuple[str, str]] = []
|
||||
non_md_relative_links: list[tuple[str, str]] = []
|
||||
old_template_pages: list[str] = []
|
||||
flat_header_pages: list[str] = []
|
||||
|
||||
metadata_counts = {
|
||||
"namespace": 0,
|
||||
"type": 0,
|
||||
"description": 0,
|
||||
"header": 0,
|
||||
}
|
||||
|
||||
for page in markdown_files:
|
||||
rel_page = normalize_rel_path(page.relative_to(DOC_ROOT).as_posix())
|
||||
content = page.read_text(encoding="utf-8")
|
||||
is_canonical_page = rel_page.startswith("XCEngine/")
|
||||
|
||||
if is_canonical_page and NAMESPACE_RE.search(content):
|
||||
metadata_counts["namespace"] += 1
|
||||
if is_canonical_page and TYPE_RE.search(content):
|
||||
metadata_counts["type"] += 1
|
||||
if is_canonical_page and DESCRIPTION_RE.search(content):
|
||||
metadata_counts["description"] += 1
|
||||
if is_canonical_page and HEADER_RE.search(content):
|
||||
metadata_counts["header"] += 1
|
||||
|
||||
if is_canonical_page and LEGACY_SECTION_RE.search(content):
|
||||
old_template_pages.append(rel_page)
|
||||
|
||||
if is_flat_header_page(page, rel_page):
|
||||
flat_header_pages.append(rel_page)
|
||||
|
||||
for match in HEADER_RE.finditer(content):
|
||||
header = normalize_rel_path(match.group(1))
|
||||
declared_header_refs.add(header)
|
||||
if (INCLUDE_ROOT / header).exists():
|
||||
valid_header_refs.add(header)
|
||||
if is_canonical_page:
|
||||
canonical_valid_header_refs.add(header)
|
||||
elif is_canonical_page:
|
||||
invalid_header_refs.append((rel_page, header))
|
||||
|
||||
if is_canonical_page or rel_page.startswith(("_meta/", "_tools/")):
|
||||
link_scan_content = strip_fenced_code_blocks(content)
|
||||
for _, target in MD_LINK_RE.findall(link_scan_content):
|
||||
if target.startswith(("http://", "https://", "mailto:", "#")):
|
||||
continue
|
||||
|
||||
normalized = target.replace("\\", "/")
|
||||
if normalized.endswith(".md") or ".md#" in normalized:
|
||||
resolved = resolve_md_target(page, normalized)
|
||||
if not resolved.exists() and resolved != report_path.resolve():
|
||||
broken_md_links.append((rel_page, target))
|
||||
continue
|
||||
|
||||
non_md_relative_links.append((rel_page, target))
|
||||
|
||||
public_by_module: dict[str, list[str]] = defaultdict(list)
|
||||
documented_by_module: dict[str, set[str]] = defaultdict(set)
|
||||
|
||||
for header in public_headers:
|
||||
module = header.split("/", 2)[1]
|
||||
public_by_module[module].append(header)
|
||||
|
||||
for header in canonical_valid_header_refs:
|
||||
if not header.startswith("XCEngine/"):
|
||||
continue
|
||||
module = header.split("/", 2)[1]
|
||||
documented_by_module[module].add(header)
|
||||
|
||||
module_coverages = [
|
||||
ModuleCoverage(
|
||||
module=module,
|
||||
public_headers=len(headers),
|
||||
documented_headers=len(documented_by_module.get(module, set())),
|
||||
)
|
||||
for module, headers in sorted(public_by_module.items())
|
||||
]
|
||||
|
||||
missing_headers = [
|
||||
header for header in public_headers if header not in canonical_valid_header_refs
|
||||
]
|
||||
missing_parallel_indexes = [
|
||||
relative
|
||||
for relative in public_include_dirs
|
||||
if not dir_index_doc_path(relative).exists()
|
||||
]
|
||||
support_top_dirs = sorted(
|
||||
path.name
|
||||
for path in DOC_ROOT.iterdir()
|
||||
if path.is_dir() and path.name in {"_meta", "_tools"}
|
||||
)
|
||||
|
||||
return {
|
||||
"generated_at": datetime.now(),
|
||||
"markdown_files": markdown_files,
|
||||
"canonical_markdown_files": canonical_markdown_files,
|
||||
"public_headers": public_headers,
|
||||
"public_include_dirs": public_include_dirs,
|
||||
"declared_header_refs": sorted(declared_header_refs),
|
||||
"valid_header_refs": sorted(valid_header_refs),
|
||||
"canonical_valid_header_refs": sorted(canonical_valid_header_refs),
|
||||
"invalid_header_refs": invalid_header_refs,
|
||||
"broken_md_links": broken_md_links,
|
||||
"non_md_relative_links": non_md_relative_links,
|
||||
"old_template_pages": sorted(old_template_pages),
|
||||
"flat_header_pages": sorted(flat_header_pages),
|
||||
"missing_headers": missing_headers,
|
||||
"module_coverages": module_coverages,
|
||||
"metadata_counts": metadata_counts,
|
||||
"support_top_dirs": support_top_dirs,
|
||||
"missing_parallel_indexes": missing_parallel_indexes,
|
||||
}
|
||||
|
||||
|
||||
def format_pairs_table(headers: tuple[str, str], rows: list[tuple[str, str]]) -> list[str]:
|
||||
output = [
|
||||
f"| {headers[0]} | {headers[1]} |",
|
||||
"|------|------|",
|
||||
]
|
||||
for left, right in rows:
|
||||
output.append(f"| `{left}` | `{right}` |")
|
||||
return output
|
||||
|
||||
|
||||
def build_report(state: dict[str, object]) -> str:
|
||||
generated_at: datetime = state["generated_at"] # type: ignore[assignment]
|
||||
markdown_files: list[Path] = state["markdown_files"] # type: ignore[assignment]
|
||||
canonical_markdown_files: list[Path] = state["canonical_markdown_files"] # type: ignore[assignment]
|
||||
public_headers: list[str] = state["public_headers"] # type: ignore[assignment]
|
||||
public_include_dirs: list[str] = state["public_include_dirs"] # type: ignore[assignment]
|
||||
valid_header_refs: list[str] = state["valid_header_refs"] # type: ignore[assignment]
|
||||
canonical_valid_header_refs: list[str] = state["canonical_valid_header_refs"] # type: ignore[assignment]
|
||||
invalid_header_refs: list[tuple[str, str]] = state["invalid_header_refs"] # type: ignore[assignment]
|
||||
broken_md_links: list[tuple[str, str]] = state["broken_md_links"] # type: ignore[assignment]
|
||||
non_md_relative_links: list[tuple[str, str]] = state["non_md_relative_links"] # type: ignore[assignment]
|
||||
old_template_pages: list[str] = state["old_template_pages"] # type: ignore[assignment]
|
||||
flat_header_pages: list[str] = state["flat_header_pages"] # type: ignore[assignment]
|
||||
missing_headers: list[str] = state["missing_headers"] # type: ignore[assignment]
|
||||
module_coverages: list[ModuleCoverage] = state["module_coverages"] # type: ignore[assignment]
|
||||
metadata_counts: dict[str, int] = state["metadata_counts"] # type: ignore[assignment]
|
||||
support_top_dirs: list[str] = state["support_top_dirs"] # type: ignore[assignment]
|
||||
missing_parallel_indexes: list[str] = state["missing_parallel_indexes"] # type: ignore[assignment]
|
||||
|
||||
lines: list[str] = []
|
||||
lines.append("# API 文档重构状态")
|
||||
lines.append("")
|
||||
lines.append(f"**生成时间**: `{generated_at.strftime('%Y-%m-%d %H:%M:%S')}`")
|
||||
lines.append("")
|
||||
lines.append("**来源**: `docs/api/_tools/audit_api_docs.py`")
|
||||
lines.append("")
|
||||
lines.append("## 摘要")
|
||||
lines.append("")
|
||||
lines.append(f"- Markdown 页面数(全部): `{len(markdown_files)}`")
|
||||
lines.append(f"- Markdown 页面数(canonical): `{len(canonical_markdown_files)}`")
|
||||
lines.append(f"- Public headers 数: `{len(public_headers)}`")
|
||||
lines.append(f"- 有效头文件引用数(全部): `{len(valid_header_refs)}`")
|
||||
lines.append(f"- 有效头文件引用数(canonical): `{len(canonical_valid_header_refs)}`")
|
||||
lines.append(f"- 无效头文件引用数: `{len(invalid_header_refs)}`")
|
||||
lines.append(f"- 失效 `.md` 链接数: `{len(broken_md_links)}`")
|
||||
lines.append(f"- 非 `.md` 相对链接数: `{len(non_md_relative_links)}`")
|
||||
lines.append(f"- 旧模板页面数: `{len(old_template_pages)}`")
|
||||
lines.append(f"- 扁平 header 页面数: `{len(flat_header_pages)}`")
|
||||
lines.append("")
|
||||
lines.append("## 平行目录")
|
||||
lines.append("")
|
||||
lines.append(f"- Canonical 根目录: `{PARALLEL_ROOT.relative_to(DOC_ROOT).as_posix()}`")
|
||||
lines.append(f"- 源码目录节点数: `{len(public_include_dirs)}`")
|
||||
lines.append(
|
||||
f"- 已生成目录总览页节点数: `{len(public_include_dirs) - len(missing_parallel_indexes)}`"
|
||||
)
|
||||
lines.append(f"- 缺失目录总览页节点数: `{len(missing_parallel_indexes)}`")
|
||||
if support_top_dirs:
|
||||
lines.append(f"- 支撑目录: `{', '.join(support_top_dirs)}`")
|
||||
lines.append("")
|
||||
lines.append("## 模块覆盖")
|
||||
lines.append("")
|
||||
lines.append("| 模块 | Public headers | 已覆盖 | 未覆盖 |")
|
||||
lines.append("|------|----------------|--------|--------|")
|
||||
for coverage in module_coverages:
|
||||
lines.append(
|
||||
f"| `{coverage.module}` | `{coverage.public_headers}` | "
|
||||
f"`{coverage.documented_headers}` | `{coverage.missing_headers}` |"
|
||||
)
|
||||
lines.append("")
|
||||
lines.append("## 元信息覆盖")
|
||||
lines.append("")
|
||||
lines.append("| 字段 | 页面数 |")
|
||||
lines.append("|------|--------|")
|
||||
lines.append(f"| `命名空间` | `{metadata_counts['namespace']}` |")
|
||||
lines.append(f"| `类型` | `{metadata_counts['type']}` |")
|
||||
lines.append(f"| `描述` | `{metadata_counts['description']}` |")
|
||||
lines.append(f"| `头文件` | `{metadata_counts['header']}` |")
|
||||
lines.append("")
|
||||
|
||||
if missing_parallel_indexes:
|
||||
lines.append("## 缺失的平行目录总览页")
|
||||
lines.append("")
|
||||
for relative in missing_parallel_indexes:
|
||||
lines.append(f"- `{relative}`")
|
||||
lines.append("")
|
||||
|
||||
if invalid_header_refs:
|
||||
lines.append("## 无效头文件引用")
|
||||
lines.append("")
|
||||
lines.extend(format_pairs_table(("文档", "头文件"), invalid_header_refs[:50]))
|
||||
lines.append("")
|
||||
|
||||
if broken_md_links:
|
||||
lines.append("## 失效 Markdown 链接")
|
||||
lines.append("")
|
||||
lines.extend(format_pairs_table(("文档", "目标"), broken_md_links[:50]))
|
||||
lines.append("")
|
||||
|
||||
if non_md_relative_links:
|
||||
lines.append("## 非 `.md` 相对链接")
|
||||
lines.append("")
|
||||
lines.extend(format_pairs_table(("文档", "目标"), non_md_relative_links[:50]))
|
||||
lines.append("")
|
||||
|
||||
if old_template_pages:
|
||||
lines.append("## 旧模板页面")
|
||||
lines.append("")
|
||||
for page in old_template_pages[:80]:
|
||||
lines.append(f"- `{page}`")
|
||||
if len(old_template_pages) > 80:
|
||||
lines.append(f"- 其余 `{len(old_template_pages) - 80}` 个页面请直接运行脚本查看。")
|
||||
lines.append("")
|
||||
|
||||
if flat_header_pages:
|
||||
lines.append("## 扁平 header 页面")
|
||||
lines.append("")
|
||||
for page in flat_header_pages[:120]:
|
||||
lines.append(f"- `{page}`")
|
||||
if len(flat_header_pages) > 120:
|
||||
lines.append(f"- 其余 `{len(flat_header_pages) - 120}` 个页面请直接运行脚本查看。")
|
||||
lines.append("")
|
||||
|
||||
if missing_headers:
|
||||
lines.append("## 未覆盖的 public headers")
|
||||
lines.append("")
|
||||
for header in missing_headers[:120]:
|
||||
lines.append(f"- `{header}`")
|
||||
if len(missing_headers) > 120:
|
||||
lines.append(f"- 其余 `{len(missing_headers) - 120}` 个 header 请直接运行脚本查看。")
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Audit XCEngine API documentation.")
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
default=str(DEFAULT_REPORT),
|
||||
help="Markdown report output path.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
report_path = Path(args.report)
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
state = collect_doc_state(report_path)
|
||||
report_path.write_text(build_report(state), encoding="utf-8")
|
||||
|
||||
print(f"Markdown pages (all): {len(state['markdown_files'])}")
|
||||
print(f"Markdown pages (canonical): {len(state['canonical_markdown_files'])}")
|
||||
print(f"Public headers: {len(state['public_headers'])}")
|
||||
print(f"Valid header refs (all): {len(state['valid_header_refs'])}")
|
||||
print(f"Valid header refs (canonical): {len(state['canonical_valid_header_refs'])}")
|
||||
print(f"Invalid header refs: {len(state['invalid_header_refs'])}")
|
||||
print(f"Broken .md links: {len(state['broken_md_links'])}")
|
||||
print(f"Non-.md relative links: {len(state['non_md_relative_links'])}")
|
||||
print(f"Old template pages: {len(state['old_template_pages'])}")
|
||||
print(f"Flat header pages: {len(state['flat_header_pages'])}")
|
||||
print(f"Missing directory index pages: {len(state['missing_parallel_indexes'])}")
|
||||
print(f"Report written to: {report_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user