perplexity_hackathon/src/services/ttsService.ts · perplexity-hackathon-LawMitra

import config from '../config';
import { store } from '../utils/tempStore';
import fs from 'fs';
import path from 'path';
import { promisify } from 'util';
import axios from 'axios';

const writeFile = promisify(fs.writeFile);

export interface TTSOptions {
  text: string;
  voice_id?: string;
  model_id?: string;
  stability?: number;
  similarity_boost?: number;
}

export interface TTSResponse {
  audioContent: Buffer;
}

export class TTSService {
  private readonly apiKey: string;
  private readonly apiUrl: string;
  private readonly outputDir: string = path.join(__dirname, '../../tmp/tts');

  constructor() {
    this.apiKey = process.env.ELEVENLABS_API_KEY || '';
    this.apiUrl = 'https://api.elevenlabs.io/v1/text-to-speech';

    if (!this.apiKey) {
      throw new Error('ELEVENLABS_API_KEY environment variable is not set');
    }

    // Ensure output directory exists
    fs.mkdirSync(this.outputDir, { recursive: true });
  }

  async generateSpeech(options: TTSOptions): Promise<TTSResponse> {
    try {
      const voiceId = options.voice_id || 'pNInz6obpgDQGcFmaJgB'; // Default ElevenLabs voice
      const response = await axios.post(
        `${this.apiUrl}/${voiceId}`,
        {
          text: options.text,
          model_id: options.model_id || 'eleven_multilingual_v2',
          voice_settings: {
            stability: options.stability || 0.5,
            similarity_boost: options.similarity_boost || 0.75
          }
        },
        {
          headers: {
            'xi-api-key': this.apiKey,
            'Content-Type': 'application/json',
            'Accept': 'audio/mpeg'
          },
          responseType: 'arraybuffer'
        }
      );

      const audioBuffer = response.data;
      const outputFileName = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}.mp3`;
      const outputPath = path.join(this.outputDir, outputFileName);

      await writeFile(outputPath, Buffer.from(audioBuffer));

      // Calculate approximate duration (assuming average speaking rate)
      const wordCount = options.text.split(/\s+/).length;
      const approximateDuration = (wordCount / 150) * 60; // Assuming 150 words per minute

      return {
        audioContent: Buffer.from(audioBuffer)
      };
    } catch (error) {
      console.error('Error generating speech:', error);
      throw new Error('Failed to generate speech');
    }
  }

  async cleanup(filePath: string): Promise<void> {
    try {
      await fs.promises.unlink(filePath);
    } catch (error) {
      console.error('Error cleaning up TTS file:', error);
    }
  }
}

export const ttsService = new TTSService();