setwd("/home/perovich/Desktop/week12/danceStats") #Read in the files: created through python script w/Rpy2 salsa<-read.csv("data/salsa.csv") chacha<-read.csv("data/chacha.csv") arumba<-read.csv("data/amRumba.csv") irumba<-read.csv("data/intRumba.csv") swing<-read.csv("data/swing.csv") #Make sure there are no dummy rows around--for now that means all feet are L (since right has yet to make it into the code) salsa<-subset(salsa, salsa$foot=="L") chacha<-subset(chacha, chacha$foot=="L") arumba<-subset(arumba, arumba$foot=="L") irumba<-subset(irumba, irumba$foot=="L") swing<-subset(swing, swing$foot=="L") dances<-list(salsa, chacha, arumba, irumba, swing) names(dances)<-c("salsa", "chacha", "arumba", "irumba", "swing") #Truncate a bit to drop out the start/end junk #(this shouldn't matter much because there's a lot of data to wash it out--but just in case) cutter<-function(x,y){ #x is the dataset #y is the total percentage of the data you want to drop (half from each end) (as a decimal) rowcount<-nrow(x) rowcutends<-floor(rowcount*y/2) x2<-x[rowcutends:(rowcount-rowcutends),] return(x2) } #Function that makes the pressure data yes/no binaried<-function(x, y){ #x is the dataset with column "press", y is the numeric cutpoint for 0 vs 1 binning x$press<-ifelse(x$press10) # Arbitrary threshold chosen by trial and error. { period <- round(1/spec$freq[which.max(spec$spec)]) if(period==Inf) # Find next local maximum { j <- which(diff(spec$spec)>0) if(length(j)>0) { nextmax <- j[1] + which.max(spec$spec[j[1]:500]) period <- round(1/spec$freq[nextmax]) } else period <- 1 } } else period <- 1 return(period) } #Try it on the regular looking time series data dancesFreq<-dancesT dancesFreq2<-data.frame(names(dancesFreq), c(1:length(dancesFreq))) colnames(dancesFreq2)<-c("dance", "Freq") for (i in 1:length(dancesFreq)){ dancesFreq[[i]]<-find.freq(dancesT[[i]]) dancesFreq2$Freq[[i]]<-dancesFreq[i][[1]] } #Try it on the binary version of the time series data dancesFreqB<-dancesBT for (i in 1:length(dancesFreqB)) dancesFreqB[[i]]<-find.freq(dancesBT[[i]]) #DOESN"T WORK ON THE BINARY VERSION. #ANOTHER PLAN: Find the change points and do something with them....(diffeq) ########################################################################### ####################### REAL DATA ######################################### ########################################################################### #See if I can match new data to the existing dance data. #Read in the files: created through python script w/Rpy2 salsaT1<-read.csv("data/salsaTest1.csv") chachaT1<-read.csv("data/chachaTest1.csv") arumbaT1<-read.csv("data/AmRumbaTest1.csv") irumbaT1<-read.csv("data/intRumbaTest1.csv") swingT1<-read.csv("data/swingTest1.csv") salsaT2<-read.csv("data/salsaTest2.csv") chachaT2<-read.csv("data/chachaTest2.csv") arumbaT2<-read.csv("data/AmRumbaTest2.csv") irumbaT2<-read.csv("data/intRumbaTest2.csv") swingT2<-read.csv("data/swingTest2.csv") tests<-list(salsaT1,salsaT2, chachaT1, chachaT2, arumbaT1, arumbaT2, irumbaT1, irumbaT2, swingT1, swingT2) names(tests)<-c("salsaT1","salsaT2", "chachaT1", "chachaT2", "arumbaT1", "arumbaT2", "irumbaT1", "irumbaT2", "swingT1", "swingT2") #Cleaning, binary versions, summary stats etc testsB<-tests testsSum<-tests for (i in 1:length(tests)){ tests[[i]]<-subset(tests[[i]], tests[[i]]$foot=="L") tests[[i]]<-cutter(tests[[i]], .02) testsB[[i]]<-binaried(tests[[i]], 20) testsSum[[i]]<-summary(tests[[i]]$press, options(scipen=2)) } dancesSum #Reformat the results so it can be used to match more easily #could us plyr instead probably better. sumTable<-function(dancesSum){ meanss<-c(1:length(dancesSum)) medianss<-c(1:length(dancesSum)) for (i in 1:length(dancesSum)){ meanss[[i]]<-dancesSum[[i]]["Mean"][[1]] medianss[[i]]<-dancesSum[[i]]["Median"][[1]] } MeanTable<-data.frame(names(dancesSum), meanss, medianss) colnames(MeanTable)<-c("dance", "Mean", "Median") return(MeanTable) } danceTable<-sumTable(dancesSum) testTable<-sumTable(testsSum) danceTable$dance<-as.character(danceTable$dance) for (i in 1:nrow(testTable)){ testTable$GuessMean[[i]]<-danceTable$dance[[which.min(abs(testTable$Mean[[i]]-danceTable$Mean))]] testTable$GuessMedian[[i]]<-danceTable$dance[[which.min(abs(testTable$Median[[i]]-danceTable$Median))]] } testTable #FAILURE! Everything was higher this time around it seems #SUMMARY STATS ARE NOT THE WAY TO IDENTIFY THESE! #See how predictive the frequency estimates are #(not super optimistic here--speed should change this) #(maybe estimates can be used to pick out chunks to try to match though) #Try it on the regular looking time series data #Try time series: testsT<-tests for (i in 1:length(testsT)) testsT[[i]]<-ts(testsT[[i]]$press) #estimates of the autocovariance or autocorrelation function (???) for (i in 1:length(testsT)){ jpeg(paste("plots/acf", names(testsT)[[i]], ".jpg", sep='')) print(acf(testsT[[i]], main=names(testsT)[[i]])) dev.off() } #These are maybe useful for matching....not totally consistent though.... #Note: blue lines are 95th confidence cut-offs--so basically all our data are significant #Try this other numerical thing testsTmod<-testsT for (i in 1:length(testsTmod)) testsTmod[[i]]<-ar.yw(testsT[[i]]) #Pull orders testsTorders<-testsTmod testsTorders<-data.frame(names(testsTmod), c(1:length(testsTmod))) colnames(testsTorders)<-c("dancetest", "order") for (i in 1:nrow(testsTorders)){ testsTorders$order[[i]]<-testsTmod[[i]]$order } #Not great. Maybe order isn't the thing to look at...... #Time series plots for (i in 1:length(tests)){ jpeg(paste("plots/TS", names(testsT)[[i]], ".jpg", sep='')) plot.ts(tests[[i]]$press, xlim=c(50,250), main=names(tests)[[i]]) dev.off() } #Try it on the regular looking time series data testsFreq<-testsT testsFreq2<-data.frame(names(testsFreq), c(1:length(testsFreq))) colnames(testsFreq2)<-c("dancetest", "Freq") for (i in 1:length(testsFreq)){ testsFreq[[i]]<-find.freq(testsT[[i]]) testsFreq2$Freq[[i]]<-testsFreq[i][[1]] } #Clean up to get predictions testsFreq2$dancetest<-as.character(testsFreq2$dancetest) testsFreq2$dance<-as.character(testsFreq2$dancetest) for (i in 1:nrow(testsFreq2)){ testsFreq2$Guess[[i]]<-as.character(dancesFreq2$dance)[[which.min(abs(testsFreq2$Freq[[i]]-dancesFreq2$Freq))]] testsFreq2$dance[[i]]<-substring(testsFreq2$dancetest[[i]], 1, nchar(testsFreq2$dancetest[[i]])-2) } DF2<-dancesFreq2 colnames(DF2)<-c("dance", "officialFreq") testsFreq2<-merge(testsFreq2, DF2, by="dance", all=TRUE) #NOT TOO BAD!!!! #Everything but salsa seemed to match up okay.... #definitely depeds on speed of motion though because irumba with a song got messed up too. #mmmmmm, not robust. I want it based on shape of graph. #More to do here.... #