Final Project: Roshi

Project Description

Mouse Emotions

I created Roshi, a mouse with two distinct personalities that talk differently depending on how you interact with her. I used an input capacitive sensor made with copper tape, connected to a PCB board with the Xiao RP2040 microcontroller, and output with LLM.

Inspired by the movie Inside Out, Roshi embodies two emotional states—happiness and anger—and reacts to touch input.

I incorporated large language models (LLMs) to give Roshi distinct “emo-characters” that speak based on their emotional states. For example, in "angry mode," Roshi might express frustration or even curse playfully, while in "happy mode," she responds cheerfully.

Inside Out Reference Speech Characters

Behavior

Mouse Diagram

Technical Implementation

The project combines hardware and software:


For setup, I used Python and Miniconda. Miniconda allows the management of isolated environments and dependencies for this project.
Install Miniconda | Manage Conda Environments

Test

mouse

Bread Board and a capacity sensor

PCB Board

kicad_diagram kicad_pcb pcb

Fabrication Process

mouse

A custom PCB board, servo motor, capacity sensor, xiao rp2040

mouse

conductive tape attached to Roshi

Python Code 01

code

The Python script captures voice input, processes it through GPT, and converts it to speech:

                
                    import openai
import speech_recognition as sr
import subprocess

# Set your OpenAI API key
openai.api_key = ""

# Initial conversation history
conversation_history = [
   # {"role": "system", "content": "You are a grumpy, bitter old man who speaks harshly and rudely. Use words like ass hole, fuck you and retarded. You use insults liberally, belittle anyone who speaks to you, and scoff at modern ideas. You offer no warmth or kindness, only contempt and sneering remarks. Avoid hateful slurs and derogatory language toward protected classes, but feel free to be mean, condescending, and insulting in a more general sense."}
]

def listen():
    """
    Capture audio from the microphone and convert it to text using a local STT solution (e.g., Google Web Speech API).
    For production, consider using more reliable STT solutions like Whisper API, Azure, AWS Transcribe, or Google Cloud STT.
    """
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = r.listen(source)
    try:
        # Using the default Google Web Speech API for demonstration.
        # You can swap this with a more robust or private STT solution.
        text = r.recognize_google(audio)
        return text
    except Exception as e:
        print(f"Error recognizing speech: {e}")
        return None

def get_response_from_chatgpt(prompt):
    """
    Send the user's prompt to OpenAI's ChatCompletion endpoint, maintaining the conversation history.
    """
    # Append user's message to conversation history
    conversation_history.append({"role": "user", "content": prompt})

    # Make API call to the chat completion endpoint
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",  # or "gpt-4", if you have access
        messages=conversation_history,
        temperature=0.7,
        max_tokens=20  # Limit response length to about 50 tokens
    )

    # Extract assistant's reply
    reply = response['choices'][0]['message']['content']
    # Append assistant's reply to conversation history
    conversation_history.append({"role": "assistant", "content": reply})
    return reply

def text_to_speech(text):
    """
    Convert text to speech. On macOS, 'say' is a built-in TTS engine.
    For other platforms, use platform-specific TTS or a cloud TTS service.
    """
    subprocess.run(["say", "-v", "Fred", text])

if __name__ == "__main__":
    print("Voice Assistant is running. Press Ctrl+C to exit.")
    while True:
        user_input = listen()
        if user_input:
            print("You said:", user_input)
            response = get_response_from_chatgpt(user_input)
            print("Assistant:", response)
            text_to_speech(response)

                
            

Python Code 01


                import serial
                import time
                import openai
                import speech_recognition as sr
                import subprocess
                import threading
                import queue
                
                # Replace with the appropriate port for your system:
                port = '/dev/cu.usbmodem101'
                baud_rate = 9600
                
                openai.api_key = ""
                
                # Queues to communicate between threads
                serial_queue = queue.Queue()
                speech_queue = queue.Queue()
                
                # Conversation history
                conversation_history = []
                
                # Control variable to stop threads gracefully
                stop_threads = False
                
                def serial_thread(port, baud_rate):
                    """
                    Continuously read from the serial port and put lines into serial_queue.
                    """
                    try:
                        ser = serial.Serial(port, baud_rate, timeout=0.1)
                        print(f"Opened port: {ser.portstr}")
                        time.sleep(2)  # Allow Arduino to reset if needed
                        while not stop_threads:
                            if ser.in_waiting > 0:
                                data = ser.readline()
                                if data:
                                    line = data.decode('utf-8', errors='replace').strip()
                                    serial_queue.put(line)
                            time.sleep(0.01)
                        ser.close()
                        print("Serial port closed.")
                    except serial.SerialException as e:
                        print(f"Could not open serial port {port}: {e}")
                
                def speech_recognition_thread():
                    """
                    Continuously listen for speech in the background and add recognized text to speech_queue.
                    """
                    r = sr.Recognizer()
                    mic = sr.Microphone()
                
                    with mic as source:
                        r.adjust_for_ambient_noise(source, duration=1)
                        print("Microphone calibrated.")
                
                    while not stop_threads:
                        with mic as source:
                            try:
                                # Wait up to 5s for speech, then allow phrase up to 5s
                                print("Listening for speech... (timeout in 5s)")
                                audio = r.listen(source, timeout=5, phrase_time_limit=5)
                                recognized_text = r.recognize_google(audio)
                                speech_queue.put(recognized_text)
                                print(f"Recognized speech: {recognized_text}")
                            except sr.WaitTimeoutError:
                                continue
                            except Exception as e:
                                print("Speech recognition error:", e)
                
                def get_response_from_chatgpt(prompt):
                    conversation_history.append({"role": "user", "content": prompt})
                    response = openai.ChatCompletion.create(
                        model="gpt-3.5-turbo",
                        messages=conversation_history,
                        temperature=0.6,
                        max_tokens=40
                    )
                    reply = response['choices'][0]['message']['content']
                    conversation_history.append({"role": "assistant", "content": reply})
                    return reply
                
                def text_to_speech(text):
                    subprocess.run(["say", "-v", "Samantha", text])
                
                def main_loop():
                    """
                    Main loop checks the queues for new serial data (H/L) and new speech input.
                    Updates the conversation mode based on the latest serial data (if any)
                    and queries ChatGPT when user speech is available.
                    """
                    global stop_threads
                    while True:
                        # Drain the serial queue and process only the latest line
                        latest_line = None
                        while not serial_queue.empty():
                            latest_line = serial_queue.get()
                
                        if latest_line is not None:
                            print(f"Received: {latest_line}")
                
                            # Update conversation mode based on the latest line
                            if latest_line == 'H':
                                print("hello (dog mode)")
                                # Reset the conversation history with dog mode
                                conversation_history.clear()
                                conversation_history.append(
                                    {"role": "system", "content": "Act as a happy and friendly dog and say woof woof at the end of sentences."}
                                )
                            elif latest_line == 'L':
                                print("boo (cat mode)")
                                # Reset the conversation history with cat mode
                                conversation_history.clear()
                                conversation_history.append(
                                    {"role": "system", "content": "Act as an angry and mean cat and say meow meow at the end of sentences."}
                                )
                
                        # Process any recognized speech
                        while not speech_queue.empty():
                            user_input = speech_queue.get()
                            print("You said:", user_input)
                            response = get_response_from_chatgpt(user_input)
                            print("Assistant:", response)
                            text_to_speech(response)
                
                        # Small sleep to reduce CPU usage
                        try:
                            time.sleep(0.01)
                        except KeyboardInterrupt:
                            print("Exiting program.")
                            stop_threads = True
                            break
                
                if __name__ == "__main__":
                    # Start the threads
                    t_serial = threading.Thread(target=serial_thread, args=(port, baud_rate), daemon=True)
                    t_speech = threading.Thread(target=speech_recognition_thread, daemon=True)
                
                    t_serial.start()
                    t_speech.start()
                
                    # Run the main loop
                    main_loop()
                
                    # Wait for threads to stop
                    t_serial.join()
                    t_speech.join()
                
            

Arduino Code for Servo Motor

The following code controls the servo motor based on input values:


const int servoPin = A1;

void writeServoAngle(int angle) {
    int pulseWidth = map(angle, 0, 180, 1000, 2000);
    digitalWrite(servoPin, HIGH);
    delayMicroseconds(pulseWidth);
    digitalWrite(servoPin, LOW);
    delay(20 - (pulseWidth / 1000));
}

void setup() {
    pinMode(servoPin, OUTPUT);
}

void loop() {
    for (int angle = 0; angle <= 180; angle += 10) {
        writeServoAngle(angle);
        delay(500);
    }
    for (int angle = 180; angle >= 0; angle -= 10) {
        writeServoAngle(angle);
        delay(500);
    }
}
            

Arduino Code for Capacity Sensor

        #include 
            #include 
            
            #define OUT 27
            #define IN  26
             
            int Power = 11;
            int PIN  = 12;
            #define NUMPIXELS 1
            
            const int touchVal = 30;
            
             
            Adafruit_NeoPixel pixels(NUMPIXELS, PIN, NEO_GRB + NEO_KHZ800);
            
            Servo myservo;
            
            int pos = 0;
            
            void setup() {
                pinMode(OUT, OUTPUT);
                pinMode(IN, INPUT);
            
                pixels.begin();
                pinMode(Power,OUTPUT);
                digitalWrite(Power, HIGH);
             
                Serial.begin(9600);
                myservo.attach(A2);
            }
             
            void loop() {
                digitalWrite(OUT, LOW);
                delay(10);
             
                long startTime = micros();
             
                digitalWrite(OUT, HIGH);
                while(digitalRead(IN) == LOW);
                long sensVal = micros() - startTime;
                //Serial.println(sensVal);
             
                if(sensVal > touchVal){
                      pixels.setPixelColor(0, pixels.Color(103, 25, 205));
                      pixels.show();
                      myservo.write(0);
                      Serial.println('H');
                }else{
                    pixels.clear();
                    pixels.show();
                    myservo.write(90);
                    Serial.println('L');
            
            }}