/* eslint-disable @typescript-eslint/no-non-null-assertion */
import {
  MutableRefObject,
  useCallback,
  useEffect,
  useMemo,
  useRef,
  useState,
  useContext,
} from 'react';
import { AUTO_DETECT } from 'src/constants/languages';
import {
  AudioConfig,
  AutoDetectSourceLanguageConfig,
  Recognizer,
  ResultReason,
  SpeechConfig,
  SpeechRecognitionEventArgs,
  SpeechRecognizer,
} from 'microsoft-cognitiveservices-speech-sdk';
import AudioContext from 'src/contexts/AudioContext';
import { translateText } from 'src/utils';
import { logAction } from 'src/utils/analytics';
import { useLazyGetSpeechTokenQuery } from 'src/store/services';
import { useSession } from 'src/hooks';
import log from 'src/utils/logger';
import logger from 'src/utils/logger';

/**
 * When users stops speaking we need to wait AUTO_SEND_TIMEOUT ms before sending request.
 */
const AUTO_SEND_TIMEOUT = 1000;

type useSpeechRecognizerReturnValue = {
  recordInProgressRef: MutableRefObject<boolean> | undefined;
  voiceDetected: boolean;
  microphoneIsMuted: boolean;
  stopSpeechRecognizing: () => void;
  startSpeechRecognizing: () => void;
  muteMicrophone: () => void;
  unMuteMicrophone: () => void;
};

type Props = {
  onRecognizing: (value: string) => void;
  onRecognized: (value: string, audioLocale?: string) => void;
  sourceLanguage?: string;
};

const autoDetectLanguages = [
  'en-US',
  'ar-SA',
  'es-MX',
  'fr-FR',
  'hi-IN',
  'ja-JP',
  'zh-CN',
  'de-DE',
  'tr-TR',
  'uk-UA',
];

export const useSpeechRecognizer = ({
  onRecognizing,
  onRecognized,
  sourceLanguage = 'en-US',
}: Props): useSpeechRecognizerReturnValue => {
  // recordInProgressRef is needed for sync state update so that it can be used in async callback.
  const recordInProgressRef = useRef(false);
  const recognizer = useRef<SpeechRecognizer>();
  const audioConfig = useRef<AudioConfig>();
  const mediaStream = useRef<MediaStream>();

  const [voiceDetected, setVoiceDetected] = useState<boolean>(false);
  const [microphoneIsMuted, setMicrophoneIsMuted] = useState<boolean>(false);

  const { recordInProgress, setDetectedLanguage, setRecordInProgress } =
    useContext(AudioContext);
  const [fetchSpeechToken] = useLazyGetSpeechTokenQuery();
  const { appUser } = useSession();

  useEffect(() => {
    if (recordInProgress || recordInProgressRef.current) {
      stopSpeechRecognizing().then(() => {
        startSpeechRecognizing();
      });
    }
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [sourceLanguage]);

  const initializeRecognizer = useCallback(async () => {
    const result = await fetchSpeechToken(appUser.user_id);

    if (!result.data) {
      return;
    }
    const speechConfig = SpeechConfig.fromAuthorizationToken(
      result.data.token,
      result.data.region,
    );

    mediaStream.current = await navigator.mediaDevices.getUserMedia({
      audio: true,
    });

    audioConfig.current = AudioConfig.fromStreamInput(mediaStream.current);

    speechConfig.setProperty(
      'Speech_SegmentationSilenceTimeoutMs',
      `${AUTO_SEND_TIMEOUT}`,
    );
    speechConfig.setProperty(
      'SpeechServiceConnection_LanguageIdMode',
      `Continuous`,
    );

    const audioConfiguration = AudioConfig.fromStreamInput(mediaStream.current);

    const isAutoDetect = sourceLanguage === AUTO_DETECT;

    if (isAutoDetect) {
      const autoDetectSourceLanguageConfig =
        AutoDetectSourceLanguageConfig.fromLanguages(autoDetectLanguages);

      recognizer.current = SpeechRecognizer.FromConfig(
        speechConfig,
        autoDetectSourceLanguageConfig,
        audioConfiguration,
      );
    } else {
      speechConfig.speechRecognitionLanguage = sourceLanguage;
      recognizer.current = new SpeechRecognizer(
        speechConfig,
        audioConfig.current,
      );
    }
  }, [sourceLanguage, fetchSpeechToken, appUser.user_id]);

  const startSpeechRecognizing = useCallback(async () => {
    try {
      await initializeRecognizer();
    } catch (error) {
      logger.warn('Failed to initialize speech recognizing', error);
      return;
    }

    recognizer!.current!.recognizing = (
      _: Recognizer,
      event: SpeechRecognitionEventArgs,
    ) => {
      if (!event.result?.text) {
        return;
      }
      setVoiceDetected(true);
      switch (event.result.reason) {
        case ResultReason.RecognizingSpeech:
          onRecognizing(event.result.text);
          break;
        default:
          log.error(
            'Unexpected Microsoft TTS recognizing result: ',
            event.result.reason,
          );
          break;
      }
    };

    recognizer!.current!.recognized = async (
      _: Recognizer,
      event: SpeechRecognitionEventArgs,
    ) => {
      if (!event.result?.text) {
        return;
      }

      setDetectedLanguage(event.result.language);

      logAction('speech_recognized', {
        wordsCount: event.result.text?.split(' ').length,
        language: event.result.language,
      });
      setVoiceDetected(false);
      switch (event.result.reason) {
        case ResultReason.RecognizedSpeech:
          /**
           *
           * CE Requires text to be in english.
           * When user speaks in another language we get text in that language.
           * Here we translate that text into English so that it can be sent to CE.
           */
          if (event.result.language !== 'en-US') {
            const translationsMap = await translateText(event.result.text, [
              'en-US',
            ]);
            const translatedText = translationsMap
              .get(event.result.text)
              ?.get('en');
            translatedText &&
              onRecognized(`${translatedText}`, event.result.language);
          } else {
            onRecognized(event.result.text, event.result.language);
          }

          break;
        default:
          log.error(
            'Unexpected Microsoft TTS recognized result: ',
            event.result.reason,
          );
          break;
      }
    };

    setRecordInProgress(true);
    recordInProgressRef.current = true;

    recognizer.current?.startContinuousRecognitionAsync();
  }, [
    initializeRecognizer,
    onRecognized,
    onRecognizing,
    setDetectedLanguage,
    setRecordInProgress,
  ]);

  const stopSpeechRecognizing = useCallback(async () => {
    mediaStream.current?.getAudioTracks().forEach((track) => track.stop());

    setVoiceDetected(false);
    setRecordInProgress(false);
    recordInProgressRef.current = false;

    let promiseResolve: (value: unknown) => void;
    const promise = new Promise((res, rej) => {
      promiseResolve = res;
    });

    recognizer.current?.stopContinuousRecognitionAsync(() => {
      promiseResolve(true);
    });

    return promise;
  }, [setRecordInProgress]);

  const muteMicrophone = () => {
    setMicrophoneIsMuted(true);
    mediaStream.current
      ?.getAudioTracks()
      .forEach((track) => (track.enabled = false));
  };

  const unMuteMicrophone = () => {
    setMicrophoneIsMuted(false);
    mediaStream.current
      ?.getAudioTracks()
      .forEach((track) => (track.enabled = true));
  };

  return useMemo(
    () => ({
      recordInProgressRef,
      voiceDetected,
      microphoneIsMuted,
      startSpeechRecognizing,
      stopSpeechRecognizing,
      muteMicrophone,
      unMuteMicrophone,
    }),
    [
      voiceDetected,
      microphoneIsMuted,
      startSpeechRecognizing,
      stopSpeechRecognizing,
    ],
  );
};
