import { useRef, useState } from "react";
import { createContainer } from "unstated-next";
import * as Vosk from "vosk-browser";
import useStateRef from "./useStateRef";
const localModel = require("../ml-models/en/model.tar.gz");

const maxLatencySeconds = 0.75; // TODO: test with this value - this is 0.75 seconds of delay, should this be lower? higher?

function useVoiceCommandsContainer() {
  const [isListening, setIsListening, isListeningRef] = useStateRef(false);
  const [isDictationMode, setIsDictationMode, isDictationModeRef] = useStateRef(false);
  const [isLoading, setIsLoading] = useState(false);
  const [errors, setErrors] = useState([] as string[]);
  const modelRef = useRef(null as Vosk.Model);
  const recognizerRef = useRef(null as Vosk.KaldiRecognizer);
  const audioContextRef = useRef(null as AudioContext);
  const recognizerProcessorRef = useRef(null as AudioWorkletNode);
  const sourceRef = useRef(null as any);
  const mediaStreamRef = useRef(null as any);
  const channelRef = useRef(null as any);
  const [isPaused, setIsPaused, pausedRef] = useStateRef(false);
  const partialListenersRef = useRef([] as ((text: string) => void)[]);
  const finalListenersRef = useRef([] as ((text: string) => void)[]);
  const dictationListenersRef = useRef([] as ((text: string) => void)[]);
  const [detectedCommands, setDetectedCommands] = useState([] as string[]);
  const [isLatencyHigh, setIsLatencyHigh] = useState(false);

  const setPartialListeners = (listeners: ((text: string) => void)[]) => {
    partialListenersRef.current = listeners;
  };
  const setFinalListeners = (listeners: ((text: string) => void)[]) => {
    finalListenersRef.current = listeners;
  };
  const setDictationListeners = (listeners: ((text: string) => void)[]) => {
    dictationListenersRef.current = listeners;
  };

  const updateDictationMode = (isDictationMode: boolean) => {
    setIsDictationMode(isDictationMode);
  };

  const pauseListening = () => {
    setIsPaused(true);
  };
  const resumeListening = async () => {
    if (pausedRef.current) {
      setIsPaused(false);
    }
  };
  const resetListening = async () => {
    stopListening();
    await startListening();
  };
  const startListening = async (phrases?: string[]) => {
    if (isListeningRef.current) {
      return null;
    }
    setIsListening(true);
    console.log('Starting listening...');
    //const model = await Vosk.createModel('https://enabledfirmwarereleases.blob.core.windows.net/speech-models/en-us-small.tar.gz');
    const model = await Vosk.createModel(localModel);

    let channel = new MessageChannel();
    let clearTask = null;
    const sampleRate = 48000;
    let recognizer = phrases
      ? new model.KaldiRecognizer(sampleRate, JSON.stringify(phrases))
      : new model.KaldiRecognizer(sampleRate);

    model.registerPort(channel.port1);
    recognizer.setWords(true);
    recognizer.on("result", (message: any) => {
      if (message.result.text.length > 0 && !pausedRef.current) {
        if (isDictationModeRef.current) {
          for (let i = 0; i < dictationListenersRef.current.length; i++) {
            dictationListenersRef.current[i](message.result.text);
          }
        } else {
          for (let i = 0; i < finalListenersRef.current.length; i++) {
            finalListenersRef.current[i](message.result.text);
          }
        }
      }
    });
    recognizer.on("partialresult", (message: any) => {
      //console.log('Audio latency:', audioContext?.outputLatency);
      if (audioContext?.outputLatency > maxLatencySeconds) {
        setIsLatencyHigh(true);
      } else if (isLatencyHigh) {
        setIsLatencyHigh(false);
      }
      if (isDictationMode) {
        return;
        // TODO: check for commands for toggling voice controls and dictation mode here though.
      }
      if (message.result.partial.length > 0 && !pausedRef.current) {
        for (let i = 0; i < partialListenersRef.current.length; i++) {
          partialListenersRef.current[i](message.result.partial);
        }
      }
    });

    let mediaStream = await navigator.mediaDevices.getUserMedia({
      video: false,
      audio: {
        echoCancellation: true,
        noiseSuppression: true,
        channelCount: 1,
        sampleRate,
      },
    });

    let audioContext = new AudioContext();

    // we use a blob of the raw javascript instead a remote url in order to ensure consistency across dev/prod environments and platforms
    const processorBlob = new Blob([audioProcessorWorklet], {
      type: "text/javascript",
    });
    const processorURL = URL.createObjectURL(processorBlob);

    await audioContext.audioWorklet.addModule(processorURL);
    let recognizerProcessor = new AudioWorkletNode(
      audioContext,
      "recognizer-processor",
      {
        channelCount: 1,
        numberOfInputs: 1,
        numberOfOutputs: 1,
      }
    );
    recognizerProcessor.port.postMessage(
      { action: "init", recognizerId: recognizer.id },
      [channel.port2]
    );
    recognizerProcessor.connect(audioContext.destination);

    let source = audioContext.createMediaStreamSource(mediaStream);
    source.connect(recognizerProcessor);

    modelRef.current = model;
    recognizerRef.current = recognizer;
    audioContextRef.current = audioContext;
    recognizerProcessorRef.current = recognizerProcessor;
    sourceRef.current = source;
    mediaStreamRef.current = mediaStream;
    channelRef.current = channel;
    setIsListening(true);
  };
  const stopListening = () => {
    if (recognizerProcessorRef.current) {
      recognizerProcessorRef.current.disconnect();
      recognizerProcessorRef.current = null;
    }

    if (sourceRef.current) {
      sourceRef.current.disconnect();
      sourceRef.current = null;
    }
    if (mediaStreamRef.current) {
      console.log("Stopping media stream...")
      mediaStreamRef.current.getTracks().forEach((track: any) => track.stop());
      mediaStreamRef.current.removeTrack(mediaStreamRef.current.getAudioTracks()?.[0]);
      mediaStreamRef.current = null;
    }
    if (channelRef.current) {
      channelRef.current.port1.close();
      channelRef.current = null;
    }

    if (recognizerRef.current) {
      recognizerRef.current.remove();
      recognizerRef.current = null;
    }
    if(modelRef.current) {
      modelRef.current.terminate();
      modelRef.current = null;
    }
    setIsListening(false);
  };
  const clearListeners = () => {
    setPartialListeners([]);
    setFinalListeners([]);
    setDictationListeners([]);
  };
  return {
    isListening,
    partialListenersRef,
    finalListenersRef,
    startListening,
    stopListening,
    setFinalListeners,
    setPartialListeners,
    setDictationListeners,
    clearListeners,
    pauseListening,
    resumeListening,
    isPaused,
    isLoading,
    isDictationMode,
    updateDictationMode,
    resetListening,
    detectedCommands,
    setDetectedCommands,
    isLatencyHigh,
    errors,
  };
}

// locally loaded 'https://enabledfirmwarereleases.blob.core.windows.net/speech-models/recognizer-processor.js' in order to async load across platforms
const audioProcessorWorklet = `
class RecognizerAudioProcessor extends AudioWorkletProcessor {
    constructor(options) {
        super(options);
        
        this.port.onmessage = this._processMessage.bind(this);
    }
    
    _processMessage(event) {
        if (event.data.action === "init") {
            this._recognizerId = event.data.recognizerId;
            this._recognizerPort = event.ports[0];
        }
    }
    
    process(inputs, outputs, parameters) {
        const data = inputs[0][0];
        if (this._recognizerPort && data) {
            // AudioBuffer samples are represented as floating point numbers between -1.0 and 1.0 whilst
            // Kaldi expects them to be between -32768 and 32767 (the range of a signed int16)
            const audioArray = data.map((value) => value * 0x8000);
        
            this._recognizerPort.postMessage(
                {
                    action: "audioChunk",
                    data: audioArray,
                    recognizerId: this._recognizerId,
                    sampleRate, // Part of AudioWorkletGlobalScope
                },
                {
                    transfer: [audioArray.buffer],
                }
            );
        }
        return true;
    }
}

registerProcessor('recognizer-processor', RecognizerAudioProcessor)`;

const VoiceCommandsContainer = createContainer(useVoiceCommandsContainer);
export default VoiceCommandsContainer;
