feat: 添加语音模块支持，优化服务启动方式

2026-03-17 04:03:39 +08:00
parent 308df54a15
commit 90517f2289
18 changed files with 221 additions and 43 deletions
--- a/api/.env.example
+++ b/api/.env.example
@@ -0,0 +1,2 @@
+MINIMAX_API_KEY=your_api_key_here
+MINIMAX_GROUP_ID=your_group_id_here
--- a/api/app.ts
+++ b/api/app.ts
@@ -23,8 +23,12 @@ import { apiModules } from './modules/index.js'
 import { validateModuleConsistency } from './infra/moduleValidator.js'
 import path from 'path'
 import fs from 'fs'
+import { fileURLToPath } from 'url'

-dotenv.config()
+const __filename = fileURLToPath(import.meta.url)
+const __dirname = path.dirname(__filename)
+
+dotenv.config({ path: path.resolve(__dirname, './.env') })

 const app: express.Application = express()
 export const container = new ServiceContainer()
--- a/api/config/index.ts
+++ b/api/config/index.ts
@@ -38,6 +38,18 @@ export const config = {
  get isDev(): boolean {
    return !this.isElectron && !this.isVercel
  },
+
+  get minimaxApiKey(): string | undefined {
+    return process.env.MINIMAX_API_KEY
+  },
+
+  get minimaxGroupId(): string | undefined {
+    return process.env.MINIMAX_GROUP_ID
+  },
+
+  get openaiApiKey(): string | undefined {
+    return process.env.OPENAI_API_KEY
+  },
 }

 export const PATHS = {
--- a/api/modules/index.ts
+++ b/api/modules/index.ts
@@ -63,6 +63,13 @@ async function getStaticModules(): Promise<ApiModule[]> {
    console.warn('[ModuleLoader] Failed to load opencode module:', e)
  }

+  try {
+    const { createVoiceModule } = await import('./voice/index.js')
+    modules.push(createVoiceModule())
+  } catch (e) {
+    console.warn('[ModuleLoader] Failed to load voice module:', e)
+  }
+
  modules.sort((a, b) => {
    const orderA = a.metadata.order ?? 0
    const orderB = b.metadata.order ?? 0
@@ -82,3 +89,4 @@ export * from './document-parser/index.js'
 export * from './ai/index.js'
 export * from './remote/index.js'
 export * from './opencode/index.js'
+export * from './voice/index.js'
--- a/api/modules/voice/index.ts
+++ b/api/modules/voice/index.ts
@@ -0,0 +1,25 @@
+import type { Router } from 'express'
+import type { ServiceContainer } from '../../infra/container.js'
+import { createApiModule } from '../../infra/createModule.js'
+import { voiceModule, VOICE_MODULE } from '../../../shared/modules/voice/index.js'
+import { createVoiceRoutes } from './routes.js'
+
+export * from './routes.js'
+
+export const createVoiceModule = () => {
+  return createApiModule(
+    {
+      ...voiceModule,
+      basePath: '/voice',
+      version: '1.0.0',
+      order: 100,
+    },
+    {
+      routes: (_container: ServiceContainer): Router => {
+        return createVoiceRoutes()
+      },
+    }
+  )
+}
+
+export default createVoiceModule
--- a/api/modules/voice/routes.ts
+++ b/api/modules/voice/routes.ts
@@ -0,0 +1,117 @@
+import { Router } from 'express'
+import { config } from '../../config/index.js'
+
+interface AIMLAPISTTCreateResponse {
+  generation_id?: string
+  status?: string
+  error?: {
+    message: string
+  }
+}
+
+interface AIMLAPISTTQueryResponse {
+  status?: string
+  result?: {
+    results?: {
+      channels?: Array<{
+        alternatives?: Array<{
+          transcript?: string
+        }>
+      }>
+    }
+  }
+  error?: {
+    message: string
+  }
+}
+
+export function createVoiceRoutes(): Router {
+  const router = Router()
+
+  router.post('/stt', async (req, res) => {
+    console.log('[Voice] Request received, body keys:', Object.keys(req.body || {}))
+    try {
+      const apiKey = config.minimaxApiKey
+
+      console.log('[Voice] API Key exists:', !!apiKey)
+
+      if (!apiKey) {
+        res.status(500).json({ error: 'MiniMax API key not configured' })
+        return
+      }
+
+      if (!req.body.audio) {
+        res.status(400).json({ error: 'No audio data provided' })
+        return
+      }
+
+      const audioBuffer = Buffer.from(req.body.audio, 'base64')
+      console.log('[Voice] Audio buffer size:', audioBuffer.length)
+
+      const formData = new FormData()
+      const blob = new Blob([audioBuffer], { type: 'audio/webm' })
+      formData.append('file', blob, 'audio.webm')
+      formData.append('model', '#g1_whisper-large')
+
+      console.log('[Voice] Creating STT job via MiniMax...')
+
+      const createResponse = await fetch('https://api.minimax.chat/v1/stt/create', {
+        method: 'POST',
+        headers: {
+          'Authorization': `Bearer ${apiKey}`,
+        },
+        body: formData,
+      })
+
+      console.log('[Voice] Create response status:', createResponse.status)
+      const createData: AIMLAPISTTCreateResponse = await createResponse.json()
+      console.log('[Voice] Create response:', createData)
+
+      if (!createResponse.ok || !createData.generation_id) {
+        console.error('[Voice] Failed to create STT job:', createData.error?.message)
+        res.status(500).json({ error: createData.error?.message || 'Failed to create STT job' })
+        return
+      }
+
+      const jobId = createData.generation_id
+      console.log('[Voice] Job ID:', jobId)
+
+      console.log('[Voice] Polling for result...')
+      let resultText = ''
+      for (let i = 0; i < 30; i++) {
+        await new Promise(resolve => setTimeout(resolve, 1000))
+
+        const queryResponse = await fetch(`https://api.minimax.chat/v1/stt/${jobId}`, {
+          headers: {
+            'Authorization': `Bearer ${apiKey}`,
+          },
+        })
+
+        const queryData: AIMLAPISTTQueryResponse = await queryResponse.json()
+        console.log('[Voice] Query response:', queryData)
+
+        if (queryData.status === 'succeeded') {
+          resultText = queryData.result?.results?.channels?.[0]?.alternatives?.[0]?.transcript || ''
+          break
+        } else if (queryData.status === 'failed') {
+          console.error('[Voice] STT job failed:', queryData.error?.message)
+          res.status(500).json({ error: queryData.error?.message || 'STT processing failed' })
+          return
+        }
+      }
+
+      if (!resultText) {
+        res.status(500).json({ error: 'STT processing timeout' })
+        return
+      }
+
+      console.log('[Voice] Final result:', resultText)
+      res.json({ text: resultText })
+    } catch (error) {
+      console.error('[Voice] STT error:', error)
+      res.status(500).json({ error: 'Failed to process audio' })
+    }
+  })
+
+  return router
+}