import RecordRTC, { StereoAudioRecorder } from 'recordrtc'
import { blobToBase64, stringify } from 'vocode/dist/utils'
import type { AudioMessage } from 'vocode'

// length of audio snippet in milliseconds to trigger callback
const TIME_SLICE = 500

// transcription will not work if it doesn't get audio with this sampling rate
export const SAMPLING_RATE = 48000

export default class Recorder {
  private recordRTC: RecordRTC | undefined
  private socket: WebSocket | undefined
  public audioStream: MediaStream

  constructor(audioStream: MediaStream, socket: WebSocket) {
    this.setSocket(socket)
    this.audioStream = audioStream
    this.recordRTC = new RecordRTC(audioStream, {
      type: 'audio',
      mimeType: 'audio/webm;codecs=pcm', // endpoint requires 16bit PCM audio
      recorderType: StereoAudioRecorder,
      timeSlice: TIME_SLICE,
      desiredSampRate: SAMPLING_RATE,
      numberOfAudioChannels: 1, // real-time requires only one channel
      bufferSize: 16384,
      audioBitsPerSecond: 128000,
      ondataavailable: (data: Blob) => dataHandler(data, this.socket),
    })
  }

  start() {
    return this.recordRTC?.startRecording()
  }

  stop() {
    this.audioStream!.getTracks().forEach(t => t.stop())
    return this.recordRTC?.stopRecording()
  }

  pause() {
    return this.recordRTC?.pauseRecording()
  }

  resume() {
    return this.recordRTC?.resumeRecording()
  }

  setSocket(socket: WebSocket) {
    this.socket = socket
  }

  get state() {
    return this.recordRTC?.state
  }
}

const dataHandler = async (data: Blob, socket?: WebSocket) => {
  const base64Encoded = await blobToBase64(data)
  if (!base64Encoded) return
  const audioMessage: AudioMessage = {
    type: 'websocket_audio',
    data: base64Encoded,
  }
  if (socket?.readyState === WebSocket.OPEN)
    socket.send(stringify(audioMessage))
}
