Files
XCSDD/src/lib/parser.ts

237 lines
6.6 KiB
TypeScript
Raw Normal View History

import type { ParsedDoc, DocMetadata, DocSection, DocTable, DocCodeBlock } from './types'
const METADATA_REGEX = /^\*\*([^\*]+)\*\*:\s*(.+)$/
const TABLE_REGEX = /^\|(.+)\|$/
const CODE_BLOCK_REGEX = /^```(\w*)$/
const HEADING_REGEX = /^(#{1,6})\s+(.+)$/
const HR_REGEX = /^---+$/
const REFERENCE_REGEX = /@see\s+([^\s]+)/g
export function parseMarkdown(content: string): ParsedDoc {
const lines = content.split('\n')
const result: ParsedDoc = {
title: '',
metadata: {},
sections: [],
references: [],
}
const references: string[] = []
let metadata: DocMetadata = {}
let sections: DocSection[] = []
let currentSection: DocSection | null = null
let currentContent: Array<{ type: 'text' | 'table' | 'code'; data: string | DocTable | DocCodeBlock }> = []
let inCodeBlock = false
let codeBlockLanguage = ''
let codeBlockLines: string[] = []
let tableBuffer: string[] = []
let inTable = false
for (let i = 0; i < lines.length; i++) {
const line = lines[i]
const trimmedLine = line.trim()
const refMatch = trimmedLine.match(REFERENCE_REGEX)
if (refMatch) {
let match
while ((match = REFERENCE_REGEX.exec(trimmedLine)) !== null) {
references.push(match[1])
}
}
const codeMatch = line.match(CODE_BLOCK_REGEX)
if (codeMatch) {
if (currentSection) {
currentSection.content = [...currentContent]
sections.push(currentSection)
currentSection = null
}
inCodeBlock = true
codeBlockLanguage = codeMatch[1] || ''
currentContent = []
continue
}
if (inCodeBlock) {
if (trimmedLine === '```') {
currentContent.push({
type: 'code',
data: { language: codeBlockLanguage, code: codeBlockLines.join('\n') },
} as { type: 'code'; data: DocCodeBlock })
codeBlockLines = []
codeBlockLanguage = ''
inCodeBlock = false
if (!currentSection) {
currentSection = { title: '', level: 2, content: [] }
}
} else {
codeBlockLines.push(line)
}
continue
}
if (HR_REGEX.test(trimmedLine)) {
if (inTable && tableBuffer.length > 0) {
const table = parseTable(tableBuffer)
if (table) {
currentContent.push({ type: 'table', data: table })
}
tableBuffer = []
inTable = false
}
continue
}
if (!result.title && line.startsWith('# ')) {
result.title = line.slice(2).trim()
continue
}
const headingMatch = line.match(HEADING_REGEX)
if (headingMatch) {
if (currentSection) {
currentSection.content = [...currentContent]
sections.push(currentSection)
}
const level = headingMatch[1].length
const title = headingMatch[2].trim()
currentSection = { title, level, content: [] }
currentContent = []
inTable = false
tableBuffer = []
continue
}
const metaMatch = trimmedLine.match(METADATA_REGEX)
if (metaMatch) {
const key = metaMatch[1].toLowerCase()
const value = metaMatch[2].trim()
if (key === 'namespace') metadata.namespace = value
else if (key === 'description') metadata.description = value
else if (key === 'type') metadata.type = value
else if (key === 'inherits') metadata.inherits = value
else if (key === 'package') metadata.package = value
continue
}
const tableMatch = line.match(TABLE_REGEX)
if (tableMatch) {
const isSeparator = /^[\s|*-]+$/.test(trimmedLine.replace(/\|/g, '').trim())
if (!isSeparator) {
tableBuffer.push(trimmedLine)
inTable = true
} else {
inTable = true
}
continue
} else if (inTable && tableBuffer.length > 0) {
const table = parseTable(tableBuffer)
if (table) {
currentContent.push({ type: 'table', data: table })
}
tableBuffer = []
inTable = false
}
if (trimmedLine) {
currentContent.push({ type: 'text', data: trimmedLine })
}
}
if (currentSection) {
currentSection.content = [...currentContent]
sections.push(currentSection)
}
result.metadata = metadata
result.sections = sections
result.references = references
return result
}
function parseTable(tableLines: string[]): DocTable | null {
if (tableLines.length < 1) return null
const headers = tableLines[0].split('|').filter(h => h.trim()).map(h => h.trim())
const rows: string[][] = []
for (let i = 1; i < tableLines.length; i++) {
const line = tableLines[i]
const isSeparator = /^[\s|*-]+$/.test(line.replace(/\|/g, '').trim())
if (isSeparator) continue
const cells = line.split('|').filter(c => c.trim()).map(c => c.trim())
if (cells.length > 0) {
rows.push(cells)
}
}
return { headers, rows }
}
export function getDisplayName(filename: string): string {
const name = filename.replace(/\.md$/, '')
return name
}
export function buildFileTree(files: string[], basePath: string): import('./types').DocFile[] {
const tree: Map<string, import('./types').DocFile> = new Map()
for (const file of files) {
const relativePath = file.split('/').filter(Boolean).join('/')
const parts = relativePath.split('/')
let currentPath = ''
for (let i = 0; i < parts.length; i++) {
const part = parts[i]
const isLast = i === parts.length - 1
currentPath = currentPath ? `${currentPath}/${part}` : part
if (!tree.has(currentPath)) {
tree.set(currentPath, {
name: part,
path: `${basePath}/${currentPath}`,
relativePath: currentPath,
isDir: !isLast,
children: isLast ? undefined : [],
})
}
const parentPath = currentPath.split('/').slice(0, -1).join('/')
if (parentPath && tree.has(parentPath)) {
const parent = tree.get(parentPath)!
if (parent.children) {
const existing = parent.children.find(c => c.name === part)
if (!existing) {
parent.children.push(tree.get(currentPath)!)
}
}
}
}
}
const rootFiles: import('./types').DocFile[] = []
for (const [path, file] of tree) {
const parentPath = path.split('/').slice(0, -1).join('/')
if (!parentPath) {
rootFiles.push(file)
}
}
const sortFiles = (files: import('./types').DocFile[]): import('./types').DocFile[] => {
return files.sort((a, b) => {
if (a.isDir !== b.isDir) return a.isDir ? -1 : 1
return a.name.localeCompare(b.name)
}).map(f => {
if (f.children) {
f.children = sortFiles(f.children)
}
return f
})
}
return sortFiles(rootFiles)
}