import * as sdk from 'microsoft-cognitiveservices-speech-sdk';
import { SpeechToTextProvider } from './types';
import { defaultPhrases } from './utils';
const errorMessage = 'Speech-to-text conversion failed';
// TODO:
// Add a env variable to the STT
export class MicrosoftSpeechToTextProvider implements SpeechToTextProvider {
  private languages: string[] = ['en-US', 'de-DE', 'fr-FR', 'it-IT'];
  private recognizer: sdk.SpeechRecognizer | null = null;
  private audioConfig: sdk.AudioConfig | null = null;
  private speechConfig: sdk.SpeechConfig | null = null;

  // Callbacks
  public onTranscript: (text: string) => void;
  public onInterimTranscript?: (text: string) => void;
  public onError?: (error: unknown) => void;

  public constructor(
    private accessToken: string,
    private region: string,
    private language?: string,
    private stream?: MediaStream,
    private grammarList?: string[],
  ) {}

  public async initialize(): Promise<void> {
    this.initConfig();
    this.initializeRecognizer();
  }

  get availableLanguages(): string[] {
    return this.languages;
  }

  private initConfig(): void {
    try {
      if (this.accessToken?.length <= 0 || !this.region || !this.stream) {
        throw new Error('Invalid config');
      }

      this.createAudioConfig();
      this.setupSpeechConfig();
    } catch (_err) {
      this.onError?.(errorMessage);
    }
  }

  private createAudioConfig() {
    this.audioConfig = sdk.AudioConfig.fromStreamInput(this.stream);
  }

  private async setupSpeechConfig() {
    // TODO: This needs to be ajusted to work on single tenants since its using the public api url
    // needs to be changed to use fromEndpoint() instead of fromAuthorizationToken when available
    // we are waiting for Microsoft reply: https://unique-ch.atlassian.net/browse/UN-9427
    this.speechConfig = sdk.SpeechConfig.fromAuthorizationToken(this.accessToken, this.region);
    // -- END
    this.speechConfig.outputFormat = sdk.OutputFormat.Detailed;
    this.speechConfig.setProfanity(sdk.ProfanityOption.Raw);
    this.setupLanguage();
  }

  private setupLanguage(): void {
    let lang = this.language;
    // Find the closest language if `lang` is not in the list
    if (!this.languages.includes(lang)) {
      // Split the language into parts (e.g., "de-CH" => ["de", "CH"])
      const [primaryLang] = lang.split('-');
      // Try to find a language with the same primary code (e.g., "de" matches "de-DE")
      const closestLang = this.languages.find((l) => l.startsWith(primaryLang));
      // Fallback to a default or the first available language if no match is found
      lang = closestLang || this.languages[0];
    }

    // Set the speech recognition language
    this.speechConfig.speechRecognitionLanguage = lang;
  }

  private async setupGrammar() {
    const phraseList = sdk.PhraseListGrammar.fromRecognizer(this.recognizer);
    phraseList.addPhrases([...defaultPhrases, ...(this.grammarList || [])] || []);
  }

  private async initializeRecognizer(): Promise<void> {
    try {
      this.recognizer = new sdk.SpeechRecognizer(this.speechConfig, this.audioConfig);
      // Sets up the grammar for the current recognition
      this.setupGrammar();

      let previousTranscript = '';
      this.recognizer.recognizing = (_sender, event) => {
        const transcript = event.result.text.replace(previousTranscript, '');
        this.onInterimTranscript?.(transcript);
        previousTranscript = event.result.text;
      };
      this.recognizer.canceled = () => {
        this.onError?.(errorMessage);
      };

      this.recognizer.recognized = (_sender, event) => {
        if (event.result.reason === sdk.ResultReason.RecognizedSpeech) {
          this.onTranscript(event.result.text);
        }
      };
    } catch (_err) {
      this.onError?.(errorMessage);
    }
  }

  public async startRecognition(): Promise<boolean> {
    return new Promise((resolve, reject) => {
      this.recognizer?.startContinuousRecognitionAsync(
        () => resolve(true),
        (err) => reject(err),
      );
    });
  }

  public async stopRecognition(): Promise<boolean> {
    if (!this.recognizer) {
      return;
    }

    return new Promise((resolve, reject) => {
      this.recognizer.stopContinuousRecognitionAsync(
        () => resolve(true),
        (err) => reject(err),
      );
    });
  }

  public async clean() {
    this.recognizer = null;
    this.audioConfig = null;
    this.speechConfig = null;
    this.accessToken = null;
    this.region = null;
  }

  static async getAccessCredentials(
    backendUrl: string,
    userAccessToken: string,
  ): Promise<{
    accessToken: string;
    region: string;
  }> {
    const response = await fetch(`${backendUrl}/speech/access-token`, {
      method: 'POST',
      headers: {
        Authorization: `Bearer ${userAccessToken}`,
      },
    });

    if (!response.ok) {
      throw new Error('Failed to fetch access token');
    }

    return response.json();
  }
}
