import serial
import numpy as np
import soundfile as sf
import time
import wave
import pyaudio
import speech_recognition as sr

def read_and_transcribe():
    try:
        # Initialize serial connection
        ser = serial.Serial(
            port='/dev/cu.usbmodem101',
            baudrate=115200,
            timeout=1
        )
        
        # Initialize recognizer
        r = sr.Recognizer()
        
        print("Connected to device. Starting transcription...")
        
        # Parameters for audio processing
        CHUNK_SIZE = 16000  # 1 second of audio at 16kHz
        FORMAT = pyaudio.paInt16
        CHANNELS = 1
        RATE = 16000
        
        while True:
            data = []
            # Collect approximately 1 second of data
            start_time = time.time()
            while time.time() - start_time < 1:
                bytes_data = ser.read(2)
                if len(bytes_data) == 2:
                    value = int.from_bytes(bytes_data, byteorder='big')
                    data.append(value)
            
            if len(data) == 0:
                continue
                
            # Convert and normalize data
            data_array = np.array([x - 65536/2 for x in data], dtype=np.int16)
            
            # Apply noise reduction
            data_array = apply_noise_reduction(data_array)
            
            # Save temporary WAV file
            temp_filename = "temp_audio.wav"
            sf.write(temp_filename, data_array, RATE)
            
            # Try to recognize speech
            try:
                with sr.AudioFile(temp_filename) as source:
                    audio = r.record(source)
                    text = r.recognize_google(audio)
                    print(f"Transcription: {text}")
            except sr.UnknownValueError:
                print("No speech detected")
            except sr.RequestError as e:
                print(f"Could not request results; {e}")
            
    except serial.SerialException as e:
        print(f"Error connecting to device: {e}")
    except KeyboardInterrupt:
        print("\nStopping transcription...")
    finally:
        if 'ser' in locals():
            ser.close()

def apply_noise_reduction(data, window_size=5):
    """
    Apply simple noise reduction techniques
    """
    # 1. Moving average filter to reduce high-frequency noise
    kernel = np.ones(window_size) / window_size
    smoothed_data = np.convolve(data, kernel, mode='same')
    
    # 2. Remove DC offset
    smoothed_data = smoothed_data - np.mean(smoothed_data)
    
    # 3. Normalize amplitude
    max_amplitude = np.max(np.abs(smoothed_data))
    if max_amplitude > 0:
        smoothed_data = smoothed_data * (32767 / max_amplitude)
    
    return smoothed_data.astype(np.int16)

if __name__ == "__main__":
    # Install required packages if not already installed
    # pip install SpeechRecognition pyaudio
    read_and_transcribe()