import os
import sys
import argparse
import time
from openai import OpenAI

# 任务定义
TASKS = {
    "fix_markdown": {
        "system": "你是一个 Markdown 格式专家。请修复以下 Markdown 文档片段的格式问题。不要修改文档的原始内容，只调整格式（如标题、列表、代码块等的规范化）。直接输出修复后的 Markdown 内容，不要包含任何解释或 ```markdown 标记。注意：这是长文档的一部分，请保持上下文连贯。",
    }
}

# 最大块大小（字符数）
MAX_CHUNK_SIZE = 3000

def split_markdown(text, max_length=MAX_CHUNK_SIZE):
    """
    将 Markdown 文本分割成较小的块，尽量保持段落和代码块完整。
    """
    lines = text.split('\n')
    chunks = []
    current_chunk = []
    current_length = 0
    in_code_block = False
    
    for line in lines:
        # 检测代码块状态
        if line.strip().startswith('```'):
            in_code_block = not in_code_block
            
        line_len = len(line) + 1 # +1 for newline
        
        # 决定是否需要切分：
        # 1. 当前长度超过最大限制
        # 2. 且不在代码块内 (in_code_block == False)
        if current_length + line_len > max_length and not in_code_block:
            # 如果当前块不为空，则保存当前块
            if current_chunk:
                chunks.append('\n'.join(current_chunk))
                current_chunk = []
                current_length = 0
            
            # 如果单行本身就超过了最大长度（极少见情况），也只能强行放入
            current_chunk.append(line)
            current_length += line_len
        else:
            current_chunk.append(line)
            current_length += line_len
            
    if current_chunk:
        chunks.append('\n'.join(current_chunk))
        
    return chunks

def process_chunk(client, content, task_config, model="doubao-seed-1-8-251228"):
    """
    处理单个文本块
    """
    try:
        completion = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": task_config["system"]},
                {"role": "user", "content": content},
            ],
            max_tokens=4096, # 保持较大的输出 token 限制
        )
        return completion.choices[0].message.content
    except Exception as e:
        # 如果出错，打印错误到 stderr 但不中断整个流程（或者选择中断）
        # 这里选择抛出异常以便外层捕获
        raise e

def main():
    parser = argparse.ArgumentParser(description="Doubao AI Task Executor")
    parser.add_argument("--task", required=True, help="Task name", choices=TASKS.keys())
    args = parser.parse_args()

    # 优先从环境变量读取，如果没有则使用硬编码的 Key (仅供演示，实际应走环境变量)
    api_key = os.getenv('ARK_API_KEY') or "a5ab502d-c9a9-49f3-a80b-9c80c6b5378b"
    
    if not api_key:
        print("Error: ARK_API_KEY environment variable is not set.", file=sys.stderr)
        sys.exit(1)

    client = OpenAI(
        base_url="https://ark.cn-beijing.volces.com/api/v3",
        api_key=api_key,
    )

    # Windows UTF-8 处理
    if sys.platform == 'win32':
        import io
        sys.stdin = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8', errors='ignore')
        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='ignore')

    # 读取全部内容
    content = sys.stdin.read()
    
    # 清洗非法字符
    content = content.encode('utf-8', 'ignore').decode('utf-8')

    if not content:
        print("Error: No input content provided via stdin.", file=sys.stderr)
        sys.exit(1)

    task_config = TASKS[args.task]
    
    # 1. 分割文本
    chunks = split_markdown(content)
    
    # 2. 依次处理
    results = []
    total_chunks = len(chunks)
    
    # 打印进度信息到 stderr (前端看不到，但方便调试)
    print(f"Processing {total_chunks} chunks...", file=sys.stderr)

    for i, chunk in enumerate(chunks):
        try:
            # 简单的重试机制
            retry_count = 0
            max_retries = 3
            result = None
            
            while retry_count < max_retries:
                try:
                    result = process_chunk(client, chunk, task_config)
                    break
                except Exception as e:
                    retry_count += 1
                    print(f"Chunk {i+1}/{total_chunks} failed (attempt {retry_count}): {e}", file=sys.stderr)
                    time.sleep(2) # 等待后重试
            
            if result is None:
                print(f"Error: Failed to process chunk {i+1} after {max_retries} attempts.", file=sys.stderr)
                # 失败时保留原始内容，避免数据丢失
                results.append(chunk) 
            else:
                results.append(result)
                
            # 避免触发速率限制
            if i < total_chunks - 1:
                time.sleep(0.5)
                
        except Exception as e:
            print(f"Critical error on chunk {i+1}: {e}", file=sys.stderr)
            results.append(chunk)

    # 3. 合并输出
    final_output = '\n'.join(results)
    
    # 4. 打印最终结果
    print(final_output)

if __name__ == "__main__":
    main()