# -*- coding: utf-8 -*-
"""Final Project Prelim Data Analysis

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1P60Xtf6BAPLtjpwnxrB_6QUqan4xHNu2

# ES20r Physics of Sports 🏋
#### Fall 2024


### Notebook 7: Mini-Lab

# Objectives

</left><font color="black">
At the end of this notebook, students should be able to:
- Process and analyze the data collected from an experiment

# Copy to Drive

To begin working in this notebook, click on the *Copy to Drive* tab, located at the top of the notebook.
- This will copy the notebook to your own Google drive and allow you to edit/work on this notebook.

# Prerequisite

Last class, we had the data collection portion of the Mini-Lab!

This time, we are going to process the data from the three experiments we completed (the vertical jump, horizontal jump and the walk) and see what we can find.

# Disclaimer!
This is going to be a collosal notebook - there will be a lot of code, some math and you will likely run into bugs - not everyone's data will look the same and we may have to do some adjusting.

But don't worry! We are here to help and guide you along. Let us know if the notebook ever gets too confusing. However, once you work through this notebook, you will be well prepared to compelte your own experiment from start to finish.

# Set Up
As per usual, let's get our codespace set up with the relevant libraries first.
"""

pip install detecta

# Download ES20r.py Python File:
!wget https://raw.githubusercontent.com/JMartinezCEE/ES20r/main/ES20r.py
# Import ES20r library functions:
from ES20r import data_prep # Import custom functions from ES20r.py

# Import Libraries:
import pandas as pd # Python library for data manipulation and analysis
import matplotlib.pyplot as plt # Python library for data visualization
import numpy as np # Numerical Python
import glob                         # Return all file paths that match a specific pattern
from detecta import detect_peaks

fs = 14 #set_fontsize

"""
Our next step is to load in and process all our files: force plate, tracker and sensor data. The code below prepares all this for you!

Note that this processing code also normalizes the force palte data for you."""

# Read in all your .csv files
file_path = '/content'
data_list = {}

# read data from all .csv files
for datafile in glob.glob(file_path + "/*.csv"):
    data_name = datafile.split("/")[-1].split(".")[0] # extract the file name
    data_list[data_name] = pd.read_csv(datafile,header=None)

# Prep the libraries for the sensor data
data_a_list = {}
data_g_list = {}
data_l_list = {}
data_m_list = {}
data_r_list = {}

# Process all the data given its designation as sensor, tracker or force plate data
for key in data_list.keys():
    # if 'force' in key:
    #     data_list[key].columns = data_list[key].iloc[0]
    #     data_list[key] = data_list[key].loc[1:].copy()  # Use .loc and .copy() to avoid the warning
    #     data_list[key].loc[:, 'Time'] = data_list[key].Seq.astype(float) * 0.001
    #     data_list[key].loc[:, 'Fx'] = data_list[key].Fx.astype(float)
    #     data_list[key].loc[:, 'Fy'] = data_list[key].Fy.astype(float)
    #     data_list[key].loc[:, 'Fz'] = data_list[key].Fz.astype(float)
    #     data_list[key].loc[:, 'Fx_norm'] = data_list[key].Fx.astype(float) / mass
    #     data_list[key].loc[:, 'Fy_norm'] = data_list[key].Fy.astype(float) / mass
    #     data_list[key].loc[:, 'Fz_norm'] = data_list[key].Fz.astype(float) / mass
    #     print(f'Data file:   {key}     processed')
    if 'foot' in key:
        aa, gg, ll, mm, rr = data_prep(data_list[key])
        data_a_list[key] = aa
        data_g_list[key] = gg
        data_l_list[key] = ll
        data_m_list[key] = mm
        data_r_list[key] = rr
        data_l_list[key]['a_mag'] = np.sqrt(data_l_list[key].lx**2 + data_l_list[key].ly**2 + data_l_list[key].lz**2)
        print(f'Data file:   {key}    processed')
    if 'back' in key:
        aa, gg, ll, mm, rr = data_prep(data_list[key])
        data_a_list[key] = aa
        data_g_list[key] = gg
        data_l_list[key] = ll
        data_m_list[key] = mm
        data_r_list[key] = rr
        data_l_list[key]['a_mag'] = np.sqrt(data_l_list[key].lx**2 + data_l_list[key].ly**2 + data_l_list[key].lz**2)
        print(f'Data file:   {key}    processed')
    # if 'tracker' in key:
    #     data_list[key] = data_list[key]
    #     #removed this: .drop(3, axis=1)
    #     data_list[key].columns = data_list[key].iloc[0]
    #     data_list[key] = data_list[key][1:]
    #     data_list[key].t = data_list[key].t.astype(float)
    #     data_list[key].x = data_list[key].x.astype(float)
    #     data_list[key].y = data_list[key].y.astype(float)
    #     data_list[key] = data_list[key].dropna(subset=['x', 'y'])

        print(f'Data file:   {key}   processed')

"""## Sensor Data:

Last but not least, we have the sensor data for the vertical jumps. BUt guess what, we've already gotten all the values we wanted using just tracker and the force plate data. Even so, let's look at the sensor data to see how the sensor's acceleration compares to the force plate normalized force.

First, let's take a look at the pure sensor data
"""

# experiment name
trial = 'AB_Trial1_back' # change to view different runs

# Plot acc vs. time
plt.figure(figsize=(10, 5))           # Set figure size in inches (default units)

plt.plot(data_l_list[trial].time, data_l_list[trial].lx, '-', label=f"{trial} lx")   # Plots time vs. lx
plt.plot(data_l_list[trial].time, data_l_list[trial].ly, '-', label=f"{trial} ly")   # Plots time vs. ly
plt.plot(data_l_list[trial].time, data_l_list[trial].lz, '-', label=f"{trial} lz")   # Plots time vs. lz
plt.plot(data_l_list[trial].time, data_l_list[trial].a_mag, '-', label=f"{trial} a_mag")   # Plots time vs. a_mag
plt.axhline(y=9.8, label="9.8  ${m}/{s^2}$", color="purple", linestyle="dashed")

plt.xlabel('Time [s]')                          # Set x-label
plt.ylabel(r'Acceleration $[\mathrm{m}/\mathrm{s}^2]$')              # set y-label
# plt.xlim(4,7)
plt.legend(ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc='best') # Shows legend
plt.tick_params(which='major', width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid

trial = 'AB_Trial2_back' # change to view different runs

# Plot acc vs. time
plt.figure(figsize=(10, 5))           # Set figure size in inches (default units)

plt.plot(data_l_list[trial].time, data_l_list[trial].lx, '-', label=f"{trial} lx")   # Plots time vs. lx
plt.plot(data_l_list[trial].time, data_l_list[trial].ly, '-', label=f"{trial} ly")   # Plots time vs. ly
plt.plot(data_l_list[trial].time, data_l_list[trial].lz, '-', label=f"{trial} lz")   # Plots time vs. lz
plt.plot(data_l_list[trial].time, data_l_list[trial].a_mag, '-', label=f"{trial} a_mag")   # Plots time vs. a_mag
plt.axhline(y=9.8, label="9.8  ${m}/{s^2}$", color="purple", linestyle="dashed")

plt.xlabel('Time [s]')                          # Set x-label
plt.ylabel(r'Acceleration $[\mathrm{m}/\mathrm{s}^2]$')              # set y-label
# plt.xlim(4,7)
plt.legend(ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc='best') # Shows legend
plt.tick_params(which='major', width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid

"""We can align all our data in the time domain using our typical approach:"""

for key in data_list.keys():
        jump_detection = data_l_list[key].a_mag > 30

        # find index when sensor first jumps
        jump_index = data_l_list[key].loc[jump_detection,:].index.tolist()[0]

        # time at beginning of the jump
        t_jump = data_l_list[key].time[jump_index]

        # translate the data in time so that the fall begins at t = 0
        data_l_list[key].time = data_l_list[key].time - t_jump

"""Now let's plot the magnitudes of all our sensor data trials."""

# Plot acc vs. time
plt.figure(figsize=(10, 5))           # Set figure size in inches (default units)
for key in data_list.keys():
    if ('v' in key or 'V' in key) and 'sensor' in key:
        plt.plot(data_l_list[key].time, data_l_list[key].a_mag, '-', label=f"{key} a_mag")   # Plots time vs. a_mag
plt.axhline(y=9.8, label="9.8  ${m}/{s^2}$", color="purple", linestyle="dashed")

plt.xlabel('Time [s]')                          # Set x-label
plt.ylabel(r'Acceleration $[\mathrm{m}/\mathrm{s}^2]$')              # set y-label
plt.xlim(-1,2)
plt.legend(ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc='best') # Shows legend
plt.tick_params(which='major', width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid

"""For now, we only care about the magnitudes when comparing to the force plate data. But why?

This is becasue the axes of the sensor are not aligned with the fixed axes of the force plate. I will argue that since most of the force on the force plate was concentrated on the z-direction, comaring the magnitude of the acceleration to the force plate data in the z-direction should be a decent proxy.
"""

# experiment name
trial = 's5v1_sensor' # change to view different runs

# Plot acc vs. time
plt.figure(figsize=(14, 5))           # Set figure size in inches (default units)
for key in data_list.keys():
    if ('v' in key or 'V' in key) and 'sensor' in key:
        plt.plot(data_l_list[key].time, data_l_list[key].a_mag, '-', label=f"{key} a_mag")   # Plots time vs. a_mag
    if ('v' in key or 'V' in key) and 'force' in key:
        plt.plot(data_list[key].Time, data_list[key].Fz_norm, '-', label=f"{key} Fz Norm")
plt.axhline(y=9.8, label="9.8  ${m}/{s^2}$", color="purple", linestyle="dashed")

plt.xlabel('Time [s]', fontsize=fs)                          # Set x-label
plt.ylabel(r'Acceleration $[\mathrm{m}/\mathrm{s}^2]$' , fontsize=fs)              # set y-label
plt.xlim(-1,1)
plt.legend(fontsize=fs, ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc=[1,0]) # Shows legend
plt.tick_params(which='major', labelsize=fs, width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', labelsize=fs, width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid

"""How does the data align?

## Sensor Data

This time, we're going to skip the sensor data (we already have the variables we need).

If you're curious, this is some code to plot all the magnitudes of acceleration for the horizontal jumps. Do you see any patterns we could exploit?
"""

# Plot acc vs. time
plt.figure(figsize=(10, 5))           # Set figure size in inches (default units)
for key in data_list.keys():
    if 'foot' in key:
        plt.plot(data_l_list[key].time, data_l_list[key].a_mag, '-', label=f"{key} a_mag")   # Plots time vs. a_mag
plt.axhline(y=9.8, label="9.8  ${m}/{s^2}$", color="purple", linestyle="dashed")

plt.xlabel('Time [s]')                          # Set x-label
plt.ylabel(r'Acceleration $[\mathrm{m}/\mathrm{s}^2]$')              # set y-label
plt.legend(ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc='best') # Shows legend
plt.tick_params(which='major', width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid

"""# Walking

Now onto our last experiment. Don't worry the end is in sight, all we need to do is.... peak detection.

Peak detection will likely be very useful for many of your projects, so it's an important skill to introduce.

Here is what the data looks like for all our walks. We can look at the data in both acceleration and angle measurements.
"""

# Plot height vs. time
plt.figure(figsize=(10, 5))           # Set figure size in inches (default units)
for key in data_list.keys():
    #if ('w' in key or 'W' in key) and 'sensor' in key:
    plt.plot(data_l_list[key].time, data_l_list[key].lz, '-', label=key)

plt.xlabel('Time [s]')                          # Set x-label
plt.ylabel('Height [m]')              # set y-label
plt.xlim(-1,)
plt.legend(ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc='best') # Shows legend
plt.tick_params(which='major', width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid
plt.show()

# Plot height vs. time
plt.figure(figsize=(10, 5))           # Set figure size in inches (default units)
for key in data_list.keys():
    if 'JK' in key and "foot" in key:
      plt.plot(data_r_list[key].time, data_r_list[key].roll_x, '-', label=key)

plt.xlabel('Time [s]', fontsize=fs)                          # Set x-label
plt.ylabel('Height [m]' , fontsize=fs)              # set y-label
plt.xlim(-1,)
plt.legend(fontsize=fs, ncol=1, framealpha=0.5, fancybox=False, edgecolor='black', loc='best') # Shows legend
plt.tick_params(which='major', labelsize=fs, width=1, length=7,  direction='in', bottom = True, top= True, left= True, right= True) # set major thicks (optional)
plt.tick_params(which='minor', labelsize=fs, width=1, length=3,  direction='in', bottom = True, top= True, left= True, right= True) # set minor thicks (optional)
plt.minorticks_on()        # Turns minor thicks on (optional)
plt.grid()                 # Shows grid
plt.show()

"""Note that for the acceleration plots, each step seems to have a double peak. However, for the angle measurements, each step seems to have a single peak (or in this case valley).

For the purposes of peak detection, it is likely best to use the angle measurements. This way we can be confident that we will only identify one peak per step.

This next bit of code is important. Here we use peak detection to collect cadence, the number of steps and the time of the walk.

We will go over the `detect_peaks()` function in class, but it may take some tuning in order to have it align with your data correctly.

Once the peaks are detected for each walk, the number of peaks is equal to the number of steps, we will define the time between the first and last peak as the total walk time, and cadence is determined using the frequency of the steps in time.
"""

# Cadence
cadence_dict = {}
cadence_avg_dict = {}
num_steps_dict = {}
walk_time_dict = {}

for key in data_list.keys():
    if "foot" in key:
        peak_indexes = detect_peaks(data_r_list[key]['roll_x'], mph=-35, mpd=50, valley=True, show=True)  #mph = minimum peak height     mpd=minimum peak distance (separation)
        peaks = data_r_list[key]['time'].iloc[peak_indexes]

        num_steps_dict[key] = len(peaks)

        peaks = pd.DataFrame(peaks)
        peaks['period'] = peaks["time"].diff(-1).abs()
        peaks['cadence'] = 1/peaks['period'].div(60)

        cadence_dict[key] = peaks
        cadence_avg_dict[key] = peaks['cadence'].mean()
        walk_time_dict[key] = peaks['time'].iloc[-1] - peaks['time'].iloc[0] if len(peaks) > 1 else 0

"""## Walk Data

Here is a plot showing our walk data!
"""

walk_data = {
    'step count': num_steps_dict,
    'walk time': walk_time_dict,
    'cadence': cadence_avg_dict
}

walk_dataframe = pd.DataFrame(walk_data)

walk_dataframe

"""Yay!!!!!!!!!!!!!!!!!!!!!

We're finally done with the notebook. If you've survived this far, a round of applause for you. I know this was a monster of a notebook and you may not understand all the code, but that's ok.

Hopefully this will be a good resource for you when you create your own experiment. Copy and paste is always a great option!

If you have any questions about the notebook, especially if you want to look deeper into the code, please reach out to me:

Max Christopher

maxchristopher@fas.harvard.edu
"""