Files
XCEngine/docs/api/_tools/cleanup_template_api_docs.py

642 lines
22 KiB
Python
Raw Normal View History

2026-04-08 16:07:03 +08:00
#!/usr/bin/env python3
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
from generate_core_resources_canonical_pages import (
DOC_ROOT,
INCLUDE_ROOT,
REPO_ROOT,
build_namespace_map,
find_declarations,
group_methods,
select_primary,
)
TEMPLATE_METHOD_TOKENS = (
"当前页面用于固定",
"获取相关状态或对象。",
"设置相关状态或配置。",
"加载资源或数据。",
"公开方法,详见头文件声明。",
"参数语义详见头文件声明。",
"返回值语义详见头文件声明。",
)
TEMPLATE_TABLE_TOKENS = (
"获取相关状态或对象。",
"设置相关状态或配置。",
"加载资源或数据。",
"公开方法,详见头文件声明。",
)
GENERIC_FALLBACK_RE = re.compile(
r"执行该公开方法对应的当前实现。|"
r"返回 `[^`]+` 相关结果。|"
r"更新 `[^`]+` 相关状态。|"
r"判断 `[^`]+` 条件是否成立。|"
r"判断是否具备 `[^`]+`。|"
r"判断当前是否可以执行 `[^`]+`。"
)
METHOD_SECTION_RE = re.compile(r"(?ms)^## 公共方法\n.*?(?=\n## |\Z)")
BLOCK_COMMENT_RE = re.compile(r"/\*.*?\*/", re.DOTALL)
LINE_COMMENT_RE = re.compile(r"//.*")
RETURN_MEMBER_RE = re.compile(r"^return\s+(m_[A-Za-z_]\w*)\s*;?$")
RETURN_MEMBER_METHOD_RE = re.compile(
r"^return\s+(m_[A-Za-z_]\w*)\.(Data|data|Size|size|CStr|c_str|Get)\s*\((.*?)\)\s*;?$"
)
RETURN_SIMPLE_CALL_RE = re.compile(r"^return\s+([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*\((.*)\)\s*;?$")
RETURN_CONST_RE = re.compile(r"^return\s+([^;]+?)\s*;?$")
ASSIGN_MEMBER_RE = re.compile(r"^(m_[A-Za-z_]\w*)\s*=\s*([^;]+?)\s*;?$")
MEMBER_WRITE_RE = re.compile(r"\b(m_[A-Za-z_]\w*)\b\s*(?:=|\+=|-=|\*=|/=|%=|>>=|<<=|\+\+|--)")
MEMBER_CALL_RE = re.compile(r"\b(m_[A-Za-z_]\w*)\.(\w+)\s*\(")
DIRECT_CALL_RE = re.compile(r"\b([A-Za-z_]\w*(?:::[A-Za-z_]\w*)*)\s*\(")
ARROW_CALL_RE = re.compile(r"->\s*([A-Za-z_]\w*)\s*\(")
DOT_CALL_RE = re.compile(r"\.\s*([A-Za-z_]\w*)\s*\(")
FIELD_NAME_RE = re.compile(r"\bm_[A-Za-z_]\w*\b")
KEYWORDS = {
"if",
"for",
"while",
"switch",
"return",
"sizeof",
"static_cast",
"reinterpret_cast",
"const_cast",
"dynamic_cast",
"catch",
"new",
"delete",
}
MUTATING_MEMBER_CALLS = {
"Append",
"Assign",
"Clear",
"ClearDirty",
"Emplace",
"Erase",
"Insert",
"PopBack",
"PushBack",
"Release",
"Remove",
"Reset",
"Resize",
"Set",
"SetInvalid",
"Shrink",
}
@dataclass
class ImplementationFact:
summary: str | None
details: list[str]
@dataclass
class ImplementationBlock:
kind: str
body: str
def has_any_token(content: str, tokens: tuple[str, ...]) -> bool:
return any(token in content for token in tokens)
def has_generic_fallback(content: str) -> bool:
return GENERIC_FALLBACK_RE.search(content) is not None
def strip_comments(text: str) -> str:
return LINE_COMMENT_RE.sub("", BLOCK_COMMENT_RE.sub("", text))
def normalize_whitespace(text: str) -> str:
return " ".join(text.strip().split())
def split_statements(body: str) -> list[str]:
parts: list[str] = []
current: list[str] = []
depth_paren = 0
depth_brace = 0
depth_angle = 0
for char in body:
if char == "(":
depth_paren += 1
elif char == ")":
depth_paren = max(0, depth_paren - 1)
elif char == "{":
depth_brace += 1
elif char == "}":
depth_brace = max(0, depth_brace - 1)
elif char == "<":
depth_angle += 1
elif char == ">":
depth_angle = max(0, depth_angle - 1)
if char == ";" and depth_paren == 0 and depth_brace == 0 and depth_angle == 0:
statement = "".join(current).strip()
if statement:
parts.append(statement)
current = []
continue
current.append(char)
tail = "".join(current).strip()
if tail:
parts.append(tail)
return parts
def camel_tail(name: str, prefix_len: int) -> str:
tail = name[prefix_len:]
return tail or name
def format_identifier_list(items: list[str]) -> str:
unique: list[str] = []
seen: set[str] = set()
for item in items:
if item in seen:
continue
unique.append(item)
seen.add(item)
return "".join(f"`{item}`" for item in unique)
def extract_balanced(text: str, start: int, open_char: str, close_char: str) -> tuple[str, int] | None:
depth = 0
for index in range(start, len(text)):
char = text[index]
if char == open_char:
depth += 1
elif char == close_char:
depth -= 1
if depth == 0:
return text[start:index + 1], index + 1
return None
def scan_definition_suffix(text: str, start: int) -> tuple[str, int]:
index = start
while True:
while index < len(text) and text[index].isspace():
index += 1
if text.startswith("const", index):
index += len("const")
continue
if text.startswith("override", index):
index += len("override")
continue
if text.startswith("final", index):
index += len("final")
continue
if text.startswith("constexpr", index):
index += len("constexpr")
continue
if text.startswith("noexcept", index):
index += len("noexcept")
while index < len(text) and text[index].isspace():
index += 1
if index < len(text) and text[index] == "(":
extracted = extract_balanced(text, index, "(", ")")
if extracted:
_, index = extracted
continue
if index < len(text) and text[index] in {"&", "*"}:
index += 1
continue
break
if text.startswith("= default", index):
return "default", index + len("= default")
if text.startswith("= delete", index):
return "delete", index + len("= delete")
if index < len(text) and text[index] == "{":
return "body", index
return "", index
def extract_qualified_blocks(text: str, class_name: str, method_name: str) -> list[ImplementationBlock]:
pattern = re.compile(rf"{re.escape(class_name)}\s*::\s*{re.escape(method_name)}\s*\(")
blocks: list[ImplementationBlock] = []
for match in pattern.finditer(text):
params_start = text.find("(", match.start())
extracted = extract_balanced(text, params_start, "(", ")")
if not extracted:
continue
_, cursor = extracted
kind, suffix_pos = scan_definition_suffix(text, cursor)
if kind == "body":
body_block = extract_balanced(text, suffix_pos, "{", "}")
if body_block:
body, _ = body_block
blocks.append(ImplementationBlock(kind="body", body=body[1:-1]))
elif kind in {"default", "delete"}:
blocks.append(ImplementationBlock(kind=kind, body=""))
return blocks
def extract_inline_blocks(text: str, method_name: str) -> list[ImplementationBlock]:
pattern = re.compile(rf"{re.escape(method_name)}\s*\(")
blocks: list[ImplementationBlock] = []
for match in pattern.finditer(text):
previous = text[match.start() - 1] if match.start() > 0 else ""
if previous.isalnum() or previous in {":", ".", ">"}:
continue
params_start = text.find("(", match.start())
extracted = extract_balanced(text, params_start, "(", ")")
if not extracted:
continue
_, cursor = extracted
kind, suffix_pos = scan_definition_suffix(text, cursor)
if kind == "body":
body_block = extract_balanced(text, suffix_pos, "{", "}")
if body_block:
body, _ = body_block
blocks.append(ImplementationBlock(kind="body", body=body[1:-1]))
elif kind in {"default", "delete"}:
blocks.append(ImplementationBlock(kind=kind, body=""))
return blocks
def summarize_return_value(expr: str) -> str:
cleaned = normalize_whitespace(expr)
if cleaned in {"true", "false", "nullptr"}:
return f"固定返回 `{cleaned}`。"
if FIELD_NAME_RE.fullmatch(cleaned):
return f"返回 `{cleaned}` 当前值。"
if cleaned.endswith("()"):
return f"返回 `{cleaned}` 的结果。"
return f"返回 `{cleaned}`。"
def analyze_simple_statement(statement: str) -> ImplementationFact | None:
normalized = normalize_whitespace(statement)
if not normalized:
return None
match = RETURN_MEMBER_RE.match(normalized)
if match:
field = match.group(1)
return ImplementationFact(
summary=f"返回 `{field}` 当前值。",
details=[f"内联返回 `{field}`。"],
)
match = RETURN_MEMBER_METHOD_RE.match(normalized)
if match:
field, method, args = match.groups()
if method.lower() == "data":
summary = f"返回 `{field}` 暴露的首地址。"
elif method.lower() == "size":
summary = f"返回 `{field}` 当前大小。"
elif method in {"CStr", "c_str"}:
summary = f"返回 `{field}` 的 C 风格字符串视图。"
else:
summary = f"返回 `{field}.{method}()` 的结果。"
detail = f"当前实现直接调用 `{field}.{method}({args})`。".replace("()", "()")
return ImplementationFact(summary=summary, details=[detail])
match = ASSIGN_MEMBER_RE.match(normalized)
if match:
field, value = match.groups()
return ImplementationFact(
summary=f"写入 `{field}`。",
details=[f"当前实现把 `{value}` 写入 `{field}`。"],
)
match = RETURN_SIMPLE_CALL_RE.match(normalized)
if match:
call_name = match.group(1)
return ImplementationFact(
summary=f"返回 `{call_name}(...)` 的结果。",
details=[f"当前实现直接转发到 `{call_name}(...)`。"],
)
match = RETURN_CONST_RE.match(normalized)
if match:
return ImplementationFact(
summary=summarize_return_value(match.group(1)),
details=[summarize_return_value(match.group(1))],
)
return None
def collect_calls(body: str) -> list[str]:
calls: list[str] = []
for regex in (DIRECT_CALL_RE, ARROW_CALL_RE, DOT_CALL_RE):
for match in regex.finditer(body):
name = match.group(1)
short_name = name.split("::")[-1]
if short_name in KEYWORDS:
continue
calls.append(short_name)
return calls
def collect_member_writes(body: str) -> list[str]:
writes = MEMBER_WRITE_RE.findall(body)
for field, method in MEMBER_CALL_RE.findall(body):
if method in MUTATING_MEMBER_CALLS or method.startswith("Set"):
writes.append(field)
deduped: list[str] = []
seen: set[str] = set()
for field in writes:
if field in seen:
continue
deduped.append(field)
seen.add(field)
return deduped
def analyze_complex_body(body: str) -> ImplementationFact:
stripped = strip_comments(body)
normalized = normalize_whitespace(stripped)
statements = [normalize_whitespace(item) for item in split_statements(stripped)]
if not normalized:
return ImplementationFact(
summary="当前实现为空。",
details=["当前函数体为空。"],
)
if len(statements) == 1:
simple = analyze_simple_statement(statements[0])
if simple:
return simple
details: list[str] = []
summary: str | None = None
writes = collect_member_writes(stripped)
if writes:
details.append(f"会更新 {format_identifier_list(writes[:4])}")
if len(writes) == 1 and summary is None:
summary = f"更新 `{writes[0]}`。"
calls = collect_calls(stripped)
filtered_calls = [call for call in calls if call not in {"Get", "Set", "Data", "Size"}]
if filtered_calls:
details.append(f"当前实现会调用 {format_identifier_list(filtered_calls[:5])}")
if summary is None and len(filtered_calls) == 1:
summary = f"执行 `{filtered_calls[0]}(...)` 相关流程。"
elif summary is None:
summary = f"执行 {format_identifier_list(filtered_calls[:3])} 协同流程。"
return_values = re.findall(r"\breturn\s+([^;]+);", stripped)
if len(return_values) > 1 or ("if (" in stripped and return_values):
details.append("包含条件分支,并可能提前返回。")
elif return_values and summary is None:
summary = summarize_return_value(return_values[0])
if "nullptr" in stripped:
details.append("包含 `nullptr` 相关分支。")
if "not implemented" in stripped or "未实现" in stripped:
details.append("当前实现仍带有未完成分支。")
if summary is None:
summary = "执行该公开方法对应的当前实现。"
return ImplementationFact(summary=summary, details=details or [summary])
def dedupe_lines(lines: list[str]) -> list[str]:
result: list[str] = []
seen: set[str] = set()
for line in lines:
if line in seen:
continue
seen.add(line)
result.append(line)
return result
def analyze_method_group(
class_name: str,
method_name: str,
overloads: list[dict[str, object]],
header_text: str,
source_text: str,
source_rel: str | None,
) -> ImplementationFact:
implementation_blocks = extract_qualified_blocks(source_text, class_name, method_name)
if not implementation_blocks:
implementation_blocks = extract_inline_blocks(header_text, method_name)
details: list[str] = []
summaries: list[str] = []
for overload in overloads:
suffix = str(overload.get("suffix", "")).strip()
if "= 0" in suffix:
summaries.append("纯虚接口。")
details.append("该声明是纯虚接口,基类不提供实现。")
for block in implementation_blocks:
if block.kind == "default":
if method_name == class_name:
summaries.append(f"构造 `{class_name}` 实例。")
details.append("当前为默认构造实现。")
elif method_name == f"~{class_name}":
summaries.append(f"销毁 `{class_name}` 实例。")
details.append("当前为默认析构实现。")
else:
details.append("当前为 `= default` 实现。")
continue
if block.kind == "delete":
details.append("当前声明为 `= delete`。")
continue
fact = analyze_complex_body(block.body)
if fact.summary:
summaries.append(fact.summary)
details.extend(fact.details)
if not details and source_rel:
details.append(f"具体定义位于 `{source_rel}`。")
short_desc = next((item for item in summaries if item), None)
if short_desc is None:
if method_name == class_name:
short_desc = f"构造 `{class_name}` 实例。"
elif method_name == f"~{class_name}":
short_desc = f"销毁 `{class_name}` 实例。"
elif method_name.startswith("Get"):
short_desc = f"返回 `{camel_tail(method_name, 3)}` 相关结果。"
elif method_name.startswith("Set"):
short_desc = f"更新 `{camel_tail(method_name, 3)}` 相关状态。"
elif method_name.startswith("Is"):
short_desc = f"判断 `{camel_tail(method_name, 2)}` 条件是否成立。"
elif method_name.startswith("Has"):
short_desc = f"判断是否具备 `{camel_tail(method_name, 3)}`。"
elif method_name.startswith("Can"):
short_desc = f"判断当前是否可以执行 `{camel_tail(method_name, 3)}`。"
elif method_name.startswith("Load"):
short_desc = f"执行 `{method_name}` 加载流程。"
elif method_name.startswith("Create"):
short_desc = f"执行 `{method_name}` 创建流程。"
elif method_name.startswith("Update"):
short_desc = f"执行一次 `{method_name}` 更新。"
else:
short_desc = f"执行 `{method_name}` 对应的公开操作。"
if not details:
details.append(short_desc)
return ImplementationFact(summary=short_desc, details=dedupe_lines(details))
def build_method_page(
class_name: str,
namespace: str,
relative_header: str,
group: dict[str, object],
analysis: ImplementationFact,
) -> str:
label = str(group["label"])
method_name = str(group["method_name"])
overloads: list[dict[str, object]] = group["overloads"] # type: ignore[assignment]
lines: list[str] = []
lines.append(f"# {class_name}::{label}")
lines.append("")
lines.append(f"**命名空间**: `{namespace}`")
lines.append("")
lines.append("**类型**: `method`")
lines.append("")
lines.append(f"**头文件**: `{relative_header}`")
lines.append("")
lines.append("## 签名")
lines.append("")
lines.append("```cpp")
for overload in overloads:
lines.append(f"{overload['signature']};")
lines.append("```")
lines.append("")
lines.append("## 作用")
lines.append("")
lines.append(analysis.summary)
lines.append("")
lines.append("## 当前实现")
lines.append("")
for detail in analysis.details:
lines.append(f"- {detail}")
lines.append("")
lines.append("## 相关文档")
lines.append("")
lines.append(f"- [{class_name}]({class_name}.md)")
if method_name.startswith("Get") and any(str(item["file_name"]) == f"Set{method_name[3:]}" for item in group.get("siblings", [])): # type: ignore[operator]
lines.append(f"- [Set{method_name[3:]}](Set{method_name[3:]}.md)")
elif method_name.startswith("Set") and any(str(item["file_name"]) == f"Get{method_name[3:]}" for item in group.get("siblings", [])): # type: ignore[operator]
lines.append(f"- [Get{method_name[3:]}](Get{method_name[3:]}.md)")
return "\n".join(lines).rstrip() + "\n"
def rebuild_method_table(
content: str,
method_groups: list[dict[str, object]],
analyses: dict[str, ImplementationFact],
) -> str:
if "## 公共方法" not in content:
return content
lines = ["## 公共方法", "", "| 方法 | 描述 |", "|------|------|"]
for group in method_groups:
file_name = str(group["file_name"])
label = str(group["label"])
description = analyses[file_name].summary
lines.append(f"| [{label}]({file_name}.md) | {description} |")
section = "\n".join(lines) + "\n"
return METHOD_SECTION_RE.sub(section, content)
def main() -> int:
rewritten_method_pages = 0
rewritten_overviews = 0
for header_path in sorted(INCLUDE_ROOT.rglob("*.h")):
relative_header = header_path.relative_to(INCLUDE_ROOT.parent).as_posix()
relative_source = header_path.relative_to(INCLUDE_ROOT).with_suffix(".cpp")
source_path = REPO_ROOT / "engine" / "src" / relative_source
source_text = source_path.read_text(encoding="utf-8", errors="ignore") if source_path.exists() else ""
source_rel = source_path.relative_to(REPO_ROOT).as_posix() if source_path.exists() else None
header_text = header_path.read_text(encoding="utf-8", errors="ignore")
lines = header_text.splitlines()
declarations = find_declarations(lines, build_namespace_map(lines))
primary = select_primary(header_path.stem, declarations)
if primary is None or not primary.methods:
continue
doc_dir = DOC_ROOT / "XCEngine" / header_path.parent.relative_to(INCLUDE_ROOT) / header_path.stem
if not doc_dir.exists():
continue
method_groups = group_methods(primary.methods, primary.name)
analyses: dict[str, ImplementationFact] = {}
for group in method_groups:
group["siblings"] = method_groups
analyses[str(group["file_name"])] = analyze_method_group(
primary.name,
str(group["method_name"]),
group["overloads"], # type: ignore[arg-type]
header_text,
source_text,
source_rel,
)
overview_path = doc_dir / f"{header_path.stem}.md"
if overview_path.exists():
overview_content = overview_path.read_text(encoding="utf-8")
if has_any_token(overview_content, TEMPLATE_TABLE_TOKENS) or has_generic_fallback(overview_content):
updated = rebuild_method_table(overview_content, method_groups, analyses)
if updated != overview_content:
overview_path.write_text(updated, encoding="utf-8")
rewritten_overviews += 1
for group in method_groups:
file_name = str(group["file_name"])
page_path = doc_dir / f"{file_name}.md"
if not page_path.exists():
continue
content = page_path.read_text(encoding="utf-8")
if not has_any_token(content, TEMPLATE_METHOD_TOKENS) and not has_generic_fallback(content):
continue
updated = build_method_page(
primary.name,
primary.namespace,
relative_header,
group,
analyses[file_name],
)
if updated != content:
page_path.write_text(updated, encoding="utf-8")
rewritten_method_pages += 1
print(f"Rewritten overview pages: {rewritten_overviews}")
print(f"Rewritten method pages: {rewritten_method_pages}")
return 0
if __name__ == "__main__":
raise SystemExit(main())