
#
## function for plot
#
plotData <- function(data, day=NULL, start=NULL, end=NULL){#
day	findMidnight <- function(data){#
	day	n <- length(data[,1])#
		mm <- 0#
		for(i in 1:(n-1)){#
			mm <- c(mm, ifelse(data$days[i]==data$days[i+1], 0, 1))#
			}#
		data.midnight <- data[mm==1,]#
		first.rowname <- as.numeric(row.names(data[1,]))#
		midnightStart <- as.numeric(row.names(data.midnight) ) - first.rowname + 1#
		return(midnightStart)#
	}#
	if(is.null(start)==1){
#
		start <- 1
#
		}
#
	if(is.null(end)==1){
#
		end <- length(data[,1])
#
		}
#
	if(is.null(day)!=1){
#
		dd <- data[data$days==day,]
#
		midnightMark <- findMidnight(dd)
#
		} else{
#
			dd <- data[start:end,]
#
			midnightMark <- findMidnight(dd)
#
			}
#
	plot(dd$counts, type="l", xlab="Time (min)", ylab="Counts")
#
	abline(v=midnightMark, lty=2, lwd=1.5, col=4)
#
	text(midnightMark, 0, pos=1,"0 AM", cex=0.8, col=4)
#
	return(midnightMark)
#
	}

#
## function for plot
#
plotData <- function(data, day=NULL, start=NULL, end=NULL){#
	findMidnight <- function(data){#
	n <- length(data[,1])#
		mm <- 0#
		for(i in 1:(n-1)){#
			mm <- c(mm, ifelse(data$days[i]==data$days[i+1], 0, 1))#
			}#
		data.midnight <- data[mm==1,]#
		first.rowname <- as.numeric(row.names(data[1,]))#
		midnightStart <- as.numeric(row.names(data.midnight) ) - first.rowname + 1#
		return(midnightStart)#
	}#
	if(is.null(start)==1){
#
		start <- 1
#
		}
#
	if(is.null(end)==1){
#
		end <- length(data[,1])
#
		}
#
	if(is.null(day)!=1){
#
		dd <- data[data$days==day,]
#
		midnightMark <- findMidnight(dd)
#
		} else{
#
			dd <- data[start:end,]
#
			midnightMark <- findMidnight(dd)
#
			}
#
	plot(dd$counts, type="l", xlab="Time (min)", ylab="Counts")
#
	abline(v=midnightMark, lty=2, lwd=1.5, col=4)
#
	text(midnightMark, 0, pos=1,"0 AM", cex=0.8, col=4)
#
	return(midnightMark)
#
	}

#
summaryData <- function(data, validCut=600, perMinuteCts=1, markingString = "w"){
#
	if(perMinuteCts==1){
#
		unit = "1 min"
#
		} else if(perMinuteCts==60){
#
			unit = "1 sec"			
#
			} else{
#
				epoch <- 60/perMinuteCts
#
				unit = paste(epoch, "sec")
#
			}
#
	validCut <- validCut*perMinuteCts
#
	data$weekend <- ifelse(data$weekday=="Saturday" | data$weekday=="Sunday", 1, 0)
#
	data$weekend <- factor(data$weekend, levels=c(0,1), labels=c("weekday", "weekend") )
#
	# total number of week and weekend days
#
	totalNumWeekWeekend <- tapply(data$weekend, data$weekend, length)/(1440*perMinuteCts)
#
	totalNumWeekWeekend[is.na(totalNumWeekWeekend)==1] <- 0
#
	# total number of days
#
	totalNumDays <- (length(data[,1])/(1440*perMinuteCts))
#
	totalNumDays[is.na(totalNumDays)==1] <- 0
#
	wearTime <- sumVct(data, markingString = markingString)
#
	wearTime$weekend <- ifelse(wearTime$weekday=="Saturday" | wearTime$weekday=="Sunday", 1, 0)
#
	wearTime$weekend <- factor(wearTime$weekend, levels=c(0,1), labels=c("weekday", "weekend") )
#
	wearTimeByDay <- tapply(wearTime$duration, wearTime$days, sum, na.rm=T)
#
	wearTimeByDay
#
	validWearTimeByDay <- wearTimeByDay[ifelse(wearTimeByDay > validCut, 1, 0)==1]
#
	validWearTimeByDay
#
	valid.days <- as.numeric(names(validWearTimeByDay))
#
	valid.days
#
	valid.wearTime <- wearTime[wearTime$days%in%valid.days,]
#
	valid.wearTime
#
	valid.data <- data[data$days%in%valid.days,]
#
	valid.data$weekend <- ifelse(valid.data$weekday=="Saturday" | valid.data$weekday=="Sunday", 1, 0)
#
	valid.data$weekend <- factor(valid.data$weekend, levels=c(0,1), labels=c("weekday", "weekend") )
#
	# total number of week and weekend days for valid days
#
	totalValidNumWeekWeekend <- tapply(valid.data$weekend, valid.data$weekend, length)/(1440*perMinuteCts)
#
	totalValidNumWeekWeekend[is.na(totalValidNumWeekWeekend)==1] <- 0
#
	# total number of days for valid days
#
	totalValidNumDays <- (length(valid.data[,1])/(1440*perMinuteCts))
#
	totalValidNumDays[is.na(totalValidNumDays)==1] <- 0
#
	meanWeartimeValidDays <- tapply(valid.wearTime$duration, valid.wearTime$weekend, sum, na.rm=T)/totalValidNumWeekWeekend
#
	meanWeartimeValidDays
#
	meanWeartimeValidDays[is.na(meanWeartimeValidDays)==1] <- 0
#
	meanWeartimeOverallValidDays <- sum(tapply(valid.wearTime$duration, valid.wearTime$weekend, sum, na.rm=T), na.rm=T)/totalValidNumDays
#
	meanWeartimeOverallValidDays
#
	meanWeartimeOverallValidDays[is.na(meanWeartimeOverallValidDays)==1] <- 0
#
	return(list(unit=unit, totalNumDays=totalNumDays,totalNumWeekWeekend=totalNumWeekWeekend, validCut=validCut, totalValidNumDays=totalValidNumDays, totalValidNumWeekWeekend=totalValidNumWeekWeekend, wearTimeByDay=wearTimeByDay, validWearTimeByDay=validWearTimeByDay, meanWeartimeValidDays=meanWeartimeValidDays, meanWeartimeOverallValidDays=meanWeartimeOverallValidDays) )
#
	}
workName = "Rpackage"
#

#
if(.Platform$OS.type == "windows") Sys.setenv(HOME = substr(R.home(), 1, 2))
#
projectDir <- file.path(Sys.getenv("HOME"), "Projects/Mac")
#
workDir <- file.path(projectDir, workName)
#

#
setwd(workDir)
#

#
#call in the program
#
source("wearingMarking.R")
mydata1s = readCountsData(paste(workDir,"/PhysicalActivity/data/sampleSecData.dat", sep=""))
#

#
## 10-second epoch data
#
mydata10s = dataCollapser(mydata1s, TS = "TimeStamp", col = "counts", by = 10)
plotData(data=data1m)
data1m = wearingMarking(dataset = mydata1m,
#
                       frame = 90, 
#
                       perMinuteCts = 1,
#
                       TS = "TimeStamp",
#
                       cts = "counts", 
#
                       streamFrame = NULL, 
#
                       allowanceFrame= 2, 
#
                       newcolname = "wearing")
mydata1m = dataCollapser(mydata1s, TS = "TimeStamp", col = "counts", by = 60)
data1m = wearingMarking(dataset = mydata1m,
#
                       frame = 90, 
#
                       perMinuteCts = 1,
#
                       TS = "TimeStamp",
#
                       cts = "counts", 
#
                       streamFrame = NULL, 
#
                       allowanceFrame= 2, 
#
                       newcolname = "wearing")
data10s = wearingMarking(dataset = mydata10s,
#
                       frame = 90, 
#
                       perMinuteCts = 6,
#
                       TS = "TimeStamp",
#
                       cts = "counts", 
#
                       streamFrame = NULL, 
#
                       allowanceFrame= 2, 
#
                       newcolname = "wearing")
plotData(data=data1m)
plotData(data=data10s)
466*6
7666*6
6226*6
plotData(data=data1m)
pdf("plot.pdf")#
plotData(data=data1m)#
dev.off()
#
pdf("plot2.pdf")#
plotData(data=data10s)#
dev.off()
plotData <- function(data, day=NULL, start=NULL, end=NULL){#
	findMidnight <- function(data){#
	n <- length(data[,1])#
		mm <- 0#
		for(i in 1:(n-1)){#
			mm <- c(mm, ifelse(data$days[i]==data$days[i+1], 0, 1))#
			}#
		data.midnight <- data[mm==1,]#
		first.rowname <- as.numeric(row.names(data[1,]))#
		midnightStart <- as.numeric(row.names(data.midnight) ) - first.rowname + 1#
		return(midnightStart)#
	}#
	if(is.null(start)==1){
#
		start <- 1
#
		}
#
	if(is.null(end)==1){
#
		end <- length(data[,1])
#
		}
#
	if(is.null(day)!=1){
#
		dd <- data[data$days==day,]
#
		midnightMark <- findMidnight(dd)
#
		} else{
#
			dd <- data[start:end,]
#
			midnightMark <- findMidnight(dd)
#
			}
#
	plot(dd$counts, type="l", xlab="Time (min)", ylab="Counts")
#
	abline(v=midnightMark, lty=2, lwd=1.5, col=4)
#
	text(midnightMark, 0, pos=1,"0 AM", cex=0.8, col=4)
#
	}
plotData(data=data1m)
plotData(data=data10s)

#
## function for plot
#
plotData <- function(data, day=NULL, start=NULL, end=NULL){#
	findMidnight <- function(data){#
	n <- length(data[,1])#
		mm <- 0#
		for(i in 1:(n-1)){#
			mm <- c(mm, ifelse(data$days[i]==data$days[i+1], 0, 1))#
			}#
		data.midnight <- data[mm==1,]#
		first.rowname <- as.numeric(row.names(data[1,]))#
		midnightStart <- as.numeric(row.names(data.midnight) ) - first.rowname + 1#
		return(midnightStart)#
	}#
	if(is.null(start)==1){
#
		start <- 1
#
		}
#
	if(is.null(end)==1){
#
		end <- length(data[,1])
#
		}
#
	if(is.null(day)!=1){
#
		dd <- data[data$days==day,]
#
		midnightMark <- findMidnight(dd)
#
		} else{
#
			dd <- data[start:end,]
#
			midnightMark <- findMidnight(dd)
#
			}
#
	plot(dd$counts, type="l", xlab="Time", ylab="Counts")
#
	abline(v=midnightMark, lty=2, lwd=1.5, col=4)
#
	text(midnightMark, 0, pos=1,"0 AM", cex=0.8, col=4)
#
	}
plotData(data=data1m)
plotData(data=data10s)
plotData(data=data1m, days=2)
plotData(data=data1m, day=2)
plotData(data=data1m, start=400, end=3000)
plotData(data=data10s, day=2)
plotData(data=data10s, start=400, end=3000)
plotData(data=data1m, start=400, end=3000)
plotData(data=data1m, day=2)
plotData(data=data1m, start=400, end=3000)
summaryData(data=data1m, validCut=600, perMinuteCts=1, markingString = "w")
workName = "Rpackage"
#

#
if(.Platform$OS.type == "windows") Sys.setenv(HOME = substr(R.home(), 1, 2))
#
projectDir <- file.path(Sys.getenv("HOME"), "Projects/Mac")
#
workDir <- file.path(projectDir, workName)
workDir
file.path(Sys.getenv("HOME"), "Projects/Mac")
R.home()
###########################################################################
#
#					Wearing marking program
#
#						Zhouwen Liu
#
#						May 04, 2010
#
#
#
#	This is a program used to mark wearing status of a monitor data.  The candidate
#
#data set must be in continuous time sequence.
#
#
#
#	There four functions in this program: nthOccurance, dataCollapser, marking and 
#
#wearingMarking.  The first three functions are helper functions of wearingMarking 
#
#function.
#
############################################################################
#

#

#
############################################################################
#
#Name:		nthOccurance
#
#Author:		Zhouwen Liu
#
#			zhouwen.liu@vanderbilt.edu
#
#
#
#Description:
#
#	Looking for the nth occurance of a value, then return the row number
#
#return 0, if nothing found
#
#nth is the numther th of the value to find
#
#nth = c(1,3,5): to find the first, third and fifth occurce of this value
#
#nth default to NA, which tells the program to find all the occurance of the value
#
#
#
#Input.
#
#dataVct:	A vector of data to be searched
#
#value:	The value to be searched for in the dataVct
#
#nth:		the numther of the value to be found
#
#reverse:	If true, searching start from the back-end, otherwise starting from
#
#		the beginning.  Default "FALSE"
#
#
#
#Output:	A numeric vector contains all the locations of the found value
#
#############################################################################
#
nthOccurance = function(dataVct, value, nth = NA, reverse = FALSE)
#
{
#
    loc = c()
#
    if(reverse){
#
        dataVct = rev(dataVct)
#
    }
#

#
    if(is.na(value)){
#
        value = "NA"
#
        dataVct[is.na(dataVct)] = "NA"
#
    }
#

#
    temp = 1:length(dataVct)
#
    if(length(nth)==1){
#
        if( is.na(nth)){
#
            loc = temp[match(dataVct, value, nomatch = 0)==1]
#
        }else{
#
            loc = temp[match(dataVct, value, nomatch = 0)==1][nth]
#
        }
#
    }else{
#
        loc = temp[match(dataVct, value, nomatch = 0)==1][nth]
#
    }
#

#
    if(reverse){ 
#
        loc = length(dataVct) - loc +1
#
    }
#

#
    if(sum(is.na(loc)) == length(loc)){
#
        loc = 0
#
    }
#

#
    return(loc)
#
}
#
#end of function definition
#

#
############################################################################
#
#Name:		dataCollapser
#
#Author:		Zhouwen Liu
#
#			zhouwen.liu@vanderbilt.edu
#
#
#
#Description:
#
#	Collapses data from larger time units to small time units.
#
#
#
#Input.
#
#dataset:	A dataframe of data to be combined
#
#TS:		The value to be searched for in the dataVct
#
#by:		collapsing units in second.  
#
#           i.e., 
#
#           collapse data into 10-second data, by = 10
#
#           collapse data into 1-minute data, by = 60
#
#col:		which column to be collapsed                  
#
#func:	function to collpse the data, default = sum()
#
#...:		additional parameter settings used by func
#
#
#
#Output:	A collapsed data frame
#
#############################################################################
#
dataCollapser = function(dataset, TS, by, col, func = sum, ...)
#
{
#
    ts = as.vector(dataset[,TS])
#
    ct = as.numeric(dataset[,col])
#

#
    timeRange = range(as.vector(ts))
#
    epoch = as.numeric(as.POSIXlt(ts[2]) - as.POSIXlt(ts[1]))
#
    ratio = by/epoch
#

#
    newrange = c(0: (ceiling(length(ts)/ratio)-1))*by
#
    step1 = rep(as.POSIXlt(timeRange[1], tz = "GMT"), length(newrange))
#
    newts = gsub(" GMT", "", step1+ newrange)
#
    newct = rep(NA, length(newrange))
#

#
    i = 1
#
    prevts = 
#
    while(i <= length(newts))
#
    {
#
        start = (i-1)*ratio +1
#
        end = i*ratio
#
        if(end > length(ct))
#
        {    end = length(ct)} 
#

#
        newct[i] = func(ct[start:end], ...)
#
        
#
        i = i+1
#
    }
#

#
    tf = data.frame(timestamp = newts, counts = newct)
#
    names(tf) = c(TS, col)
#
    return(tf)
#
}
#

#
############################################################################
#
#Name:		markingTime
#
#Author:		Zhouwen Liu
#
#			zhouwen.liu@vanderbilt.edu
#
#
#
#Description:
#
#	Marking days in sequential order.
#
#
#
#Input.
#
#dataset:	A dataframe of data to be marked
#
#timestamp:	Timestamp column name
#
#startTime:	Day starting time
#
#endTime:	Day ending time
#
#
#
#Output:	Same dataset dataset with extra "days" column
#
#############################################################################
#
markingTime = function(dataset, timestamp, startTime = "00:00:00", endTime = "23:59:59")
#
{
#
    if(is.numeric(timestamp)){
#
        cadval = as.vector(dataset[,timestamp])
#
    }else {
#
        cadval = as.vector(dataset[,c(names(dataset)== timestamp)])
#
    }
#
    size = length(cadval)
#

#
    daystart = paste(substring(as.POSIXlt(cadval[1], tz = "GMT"),1,10), startTime)
#
    dayend = paste(substring(as.POSIXlt(cadval[1], tz = "GMT"),1,10), endTime)
#

#
    days = 1;
#
    dayMarking = rep(NA, size)
#
    while(as.POSIXlt(dayend, tz = "GMT")  < 60*60*24 + as.POSIXlt(cadval[size], tz = "GMT"))
#
    {
#
        dayMarking[as.POSIXlt(cadval, tz = "GMT")>= as.POSIXlt(daystart, tz = "GMT") & 
#
                   as.POSIXlt(cadval, tz = "GMT")<= as.POSIXlt(dayend, tz = "GMT")] = days
#
        
#
        days = days+1
#
        daystart = as.POSIXlt(dayend, tz = "GMT")+ 1 
#
        dayend = as.POSIXlt(dayend, tz = "GMT")+ 60*60*24
#
    }
#
    temp = cbind(dataset, days = dayMarking) 
#
    return(temp)
#
}
#

#
#######################################################
#
#Name:          sumVct
#
#Author:        Zhouwen Liu
#
#               zhouwen.liu@vanderbilt.edu
#
#
#
#Description:
#
#        Marking wearing or non-wearing.
#
#
#
#Inputs:
#
#datavct:		The candidate data set that needs to be summerized.   
#
#wearing:		The name of the column that has the wearing marking
#
#TS:			the name of the time stamp column
#
#markingString:	The string that needs to be searched
#
#by:			The name of the column that used to break down the summerization
#
#id:			Id name that will be added to the result dataset
#
#Output
#
#	A data frame
#
########################################################
#
sumVct = function(datavct, wearing = "wearing", TS = "TimeStamp", markingString = "w", by = "days", id = NULL)
#
{
#
    len = as.numeric(table(datavct[, c(by)]))
#
    if(length(len) > 1){
#
        for( j in 2:length(len)){
#
            len[j] = len[j]+ len[j-1]
#
        }
#
    }
#
    len=c(0, len)
#

#
    d = unique(datavct[, c(by)])
#
    allrst = NULL
#
    for(i in 1:length(d)){
#
        smalldatavct = datavct[datavct[,c(by)] == d[i],]
#
        temp = as.vector(smalldatavct[,c(wearing)])
#
         
#
        loc = 1:length(temp)
#
        loc = loc[temp == markingString]
#
        if(length(loc)>0){
#
            loc1= loc[1:(length(loc)-1)]
#
            loc2= loc[2:length(loc)]
#
 
#
            tempdf = data.frame(diff = (loc1 == loc2-1), loc1, loc2)
#
            pos = sort(c(loc[1], tempdf[tempdf$diff == FALSE,]$loc1, tempdf[tempdf$diff == FALSE,]$loc2, loc[length(loc)]))
#
            start = pos[1:(length(pos)/2)*2-1] 
#
            end = pos[1:(length(pos)/2)*2]
#
            size = end-start+1
#
            rst = data.frame(start, end, duration = size)
#
            rst$startTimeStamp = smalldatavct[rst$start, TS]
#
            rst$endTimeStamp = smalldatavct[rst$end, TS]
#
            rst$days = d[i]
#
            if(!is.null(id)){
#
                rst$id = id
#
            }
#
            rst$start = rst$start +len[i]
#
            rst$end = rst$end +len[i]
#
            rst$weekday = weekdays(as.Date(rst$startTimeStamp))
#
            allrst = rbind(allrst,rst)
#
        }
#
    }
#
    collist = c("startTimeStamp", "endTimeStamp", "days", "weekday", "start", "end", "duration")
#
    if(!is.null(id)){
#
        collist = c("id", collist)
#
    }
#

#
    allrst = allrst[collist]
#
    return(allrst)
#
}
#

#

#
#######################################################
#
#Name:          marking
#
#Author:        Zhouwen Liu
#
#               zhouwen.liu@vanderbilt.edu
#
#
#
#Description:
#
#        Marking wearing or non-wearing.
#
#
#
#Inputs:
#
#dataset:		The candidate data set that needs to be marked.   
#
#frame:		the size of time interval to be considered
#
#cts:			the name of the counts column
#
#streamFrame:	Default half of frame
#
#allowanceFrame:	the size of time interval that zero counts allowed.  Default is 5.
#
#newcolname:	the wearing marking column name.  Default is "wearing"
#
#
#
#Output
#
#	A marked data frame
#
########################################################
#
marking = function(dataset, 
#
                   frame, 
#
                   cts = "counts", 
#
                   streamFrame = NULL, 
#
                   allowanceFrame= 2, 
#
                   newcolname = "wearing")
#
{
#
    ct = as.vector(dataset[,names(dataset) == cts])
#

#
    if(is.null(streamFrame)){
#
        streamFrame = round(0.5*frame)
#
    }
#

#
    cat("frame is ", frame, "\n")
#
    cat("streamFrame is ", streamFrame, "\n")
#
    cat("allowanceFrame is ", allowanceFrame, "\n")
#

#
    #all the NA's in the original counts data will be treated as 0 counts
#
    ct1 = ct
#
    ct[is.na(ct)] = 0
#

#
    size = dim(dataset)[1]
#
    wearing = rep("nw", size)
#

#
    ct_bool = ct > 0
#
    rowPos = nthOccurance (dataVct = ct_bool, value= TRUE)
#

#
    #getting section start and end positions
#
    startpos = rowPos[1]
#
    endpos = c()
#
    prev = TRUE
#
    for(q  in 2: (length(rowPos)))
#
    {
#
        if(prev)
#
        {
#
            if( rowPos[q] - rowPos[q-1]>1 )
#
            {
#
                endpos = c(endpos, rowPos[q-1])
#
                startpos = c(startpos, rowPos[q])
#
            }
#
        }
#
        else
#
        {
#
            startpos = c(startpos, rowPos[q])
#
            prev = TRUE
#
        }
#
        if(q == length(rowPos))
#
        {endpos = c(endpos, rowPos[q])}
#
    }#end of q
#

#
    #ele3 should be handled here on startpos/endpos level
#
    allowancewin = endpos-startpos
#
    for(r in 1:length(allowancewin))
#
    {
#
        if(allowancewin[r] < allowanceFrame)
#
        {
#
            #upstream
#
            usStart = startpos[r] - streamFrame
#
            usEnd = startpos[r] - 1
#
            if(usStart <=0)
#
            {usStart = 1}
#
            if(usEnd <= 0)
#
            {usStart = 1}
#
            if(usEnd-usStart == 0){
#
                usSignal = "nowearing"
#
            }else {
#
                if(sum(ct_bool[usStart:usEnd]) >0){
#
                    usSignal = "wearing"
#
                }else {
#
                    usSignal = "nowearing"    
#
                }
#
            }
#

#
            #downstream
#
            dsEnd = endpos[r] + streamFrame
#
            dsStart = endpos[r] + 1
#
            if(dsEnd >size)
#
            {dsEnd = size}
#
            if(dsStart > size)
#
            {dsStart = size}
#
            if(dsEnd-dsStart == 0){
#
                dsSignal = "nowearing"
#
            }else {
#
                if(sum(ct_bool[dsStart:dsEnd]) >0){
#
                    dsSignal = "wearing"
#
                }else {
#
                    dsSignal = "nowearing"    
#
                }
#
            }  
#

#
            if(usSignal == "nowearing" & dsSignal == "nowearing")
#
            {
#
                startpos[r] = -1
#
                endpos[r] = -1
#
            }      
#
        }#end of if/allowancewin
#
    }#end of for/r
#

#
    startpos = startpos[startpos != -1]
#
    endpos = endpos[endpos!=-1]
#
    #end of ele3
#

#
    #now get the non-wearing gap
#
    #frame is the gap allowed between time section.  ie if 90 minutes allowed
#
    #between two wearing sections, them frame = 90
#
    gap = startpos[-1] - endpos[1:length(endpos)-1]
#
    endgap = endpos[1:length(gap)]
#
    startgap = startpos[-1]
#
    endgap[gap<= frame] = NA
#
    startgap [gap <= frame] = NA
#
    startgap = c(startpos[1], startgap)
#
    endgap = c(endgap, endpos[length(gap)+1])
#

#
    newstartpos = startgap[!is.na(startgap)]
#
    newendpos = endgap[!is.na(endgap)]
#

#
    for(w in 1: length(newendpos)){
#
        wearing[newstartpos[w]:newendpos[w]] = "w"
#
    }
#

#
    tlen= length(wearing)
#
    wearing[tlen] = wearing[tlen-1]
#

#
    wearing[is.na(ct1)] = NA
#

#
    oldnames = names(dataset)
#
    rst = cbind(dataset, wearing = wearing)
#
    names(rst) = c(oldnames, newcolname)
#
    return(rst)
#
}
#

#
#######################################################
#
#Name:          wearingMarking
#
#Author:        Zhouwen Liu
#
#               zhouwen.liu@vanderbilt.edu
#
#
#
#Description:
#
#        Marking wearing or non-wearing.
#
#
#
#Inputs:
#
#dataset:		The candidate data set that needs to be marked.   
#
#frame:		the size of time interval to be considered
#
#perMinuteCts:	the epoch of the input dataset.  default count 60 times per minute,
#
#TS:			The name of the timestamp column.  Default: "TimeStamp"
#
#cts:			the name of the counts column
#
#streamFrame:	Default half of frame
#
#allowanceFrame:	the size of time interval that zero counts allowed.  Default is 5.
#
#newcolname:	the wearing marking column name.  Default is "wearing"
#
#getMinuteMarking:If the program output marked minute data.  Default is FALSE
#
#...:			parameter settings that will be used dataCollapser function
#
#
#
#Output
#
#	A marked data frame
#
###########################################################
#
wearingMarking = function(dataset, 
#
                          frame, 
#
                          perMinuteCts = 60,
#
                          TS = "TimeStamp",
#
                          cts = "counts", 
#
                          streamFrame = NULL, 
#
                          allowanceFrame= 2, 
#
                          newcolname = "wearing",
#
                          getMinuteMarking = FALSE,
#
                          dayStart = "00:00:00",
#
                          dayEnd = "23:59:59",
#
                          ...)
#
{
#
    if(perMinuteCts != 1){
#
        #not a minute data run collapse
#
        data2 = dataCollapser(dataset, TS=TS, by = 60, col = cts, ...)
#
    }else{
#
        data2 = dataset
#
    }
#
    data3 = marking(data2, frame = frame, cts = cts, streamFrame = streamFrame, 
#
                          allowanceFrame = allowanceFrame, newcolname = newcolname)
#

#
    colName = names(data3)
#
    if(!getMinuteMarking){
#
        dataset$key = substring(dataset$TimeStamp, 1, 16)
#
        data3$key = substring(data3$TimeStamp, 1, 16)
#
        data4 = merge(dataset, data3[c(newcolname, "key")], all.x = TRUE, by = "key")[c(colName)]
#
    }else{
#
        data4 = data3[c(colName)]
#
    }
#

#
    data4$weekday = weekdays(as.POSIXlt(as.vector(data4[,TS]),format = "%Y-%m-%d %H:%M:%S", tz = "GMT"))
#
    markingTime(data4, TS, dayStart, dayEnd)
#
}
#

#
#############################################################################
#
#file read function for chamber/field data
#
#this is a better version than previous one, replace previous one with this.
#
#
#
#read data in and stretch it in columns then add time stamp
#
##############################################################################
#
getEpoch = function(filename, unit ="min")
#
{
#
    #reading raw data
#
    Tfile <- file(filename, "r")
#
    if(isOpen(Tfile, "r")) #  TRUE
#
    {
#
        seek(Tfile, 0, rw="r") # reset to beginning
#
        lines = readLines(Tfile, n = 20)
#
        close(Tfile)
#
    }
#

#
    #get epochtime
#
    epochTPos = grep("Epoch Period", lines)  #start time
#
    epochTime = gsub("Epoch Period \\(hh:mm:ss\\) ", "", lines[epochTPos])
#
    epochTime = gsub("[[:blank:]]", "", epochTime)
#
    #end of getting epoch time
#

#
    specing = as.numeric(substring(epochTime,1,2))*60*60 + as.numeric(substring(epochTime,4,5))*60 + 
#
              as.numeric(substring(epochTime,7,8))
#

#
    if(unit == "min"){
#
        rst = 60/specing
#
        cat("This dataset counts", rst, "times per minute:\n")
#
    }
#

#
    if(unit == "hr"){
#
        rst = 3600/specing
#
        cat("This dataset counts", rst, "times per hour:\n")
#
    }
#

#
    if(unit == "sec"){
#
        rst = 1/specing
#
        cat("This dataset counts", rst, "times per second:\n")
#
    }
#
    
#
    rst
#
}
#

#
#############################################################################
#
#file read function for chamber/field data
#
#this is a better version than previous one, replace previous one with this.
#
#
#
#read data in and stretch it in columns then add time stamp
#
##############################################################################
#
readCountsData = function(filename, ctPerSec = NULL)
#
{
#
    print("Please wait while I am reading your source data ...")
#
    #reading raw data
#
    Tfile <- file(filename, "r")
#
    if(isOpen(Tfile, "r")) #  TRUE
#
    {
#
        seek(Tfile, 0, rw="r") # reset to beginning
#
        lines = readLines(Tfile)
#
        close(Tfile)
#
    }
#

#
    skipPos = grep("-----", lines)[2]  #number of skip lines
#
    startTPos = grep("Start Time", lines)  #start time
#

#
    #get start date
#
    startTime = gsub("Start Time ", "", lines[startTPos])
#
    startTime = gsub("[[:blank:]]", "", startTime)
#
    startDatePos = grep("Start Date ", lines)  #startdate
#
    startDate = gsub("Start Date ", "", lines[startDatePos])
#
    startDate = gsub("[[:blank:]]", "", startDate)
#
    startDate = strsplit(startDate, "/")[[1]]
#
    if(nchar(startDate[1]) == 1){
#
        startDate[1] = paste("0", startDate[1], sep ="")
#
    }
#
    if(nchar(startDate[2]) == 1){
#
        startDate[2] = paste("0", startDate[2], sep ="")
#
    }
#
    startDate = paste(startDate[3], startDate[1], startDate[2], sep = "-")
#
    #end of getting startdate
#
    rawTimeStamp1  = paste(startDate, startTime, sep = " ")
#
 
#
    #get epochtime
#
    if(is.null(ctPerSec)){
#
        ctPerSec = getEpoch(filename, unit = "sec")
#
    }
#
    #end of getting epoch time
#

#
    startline = skipPos+1
#
    endline = length(lines)
#

#
    rawdata = c()
#
    timeline = c()
#
    for(i in startline: endline){
#
        temp0 = gsub("[[:blank:]]+", " ",lines[i])
#
        temp = strsplit(temp0, " ")[[1]]
#
        temp = temp[temp != ""]
#
        rawdata = c(rawdata, temp)
#
    }
#

#
    if(ctPerSec >1){
#
        timeline = rep(0:as.integer(length(rawdata)/ctPerSec), each = ctPerSec)[1:length(rawdata)]
#
    }else{  
#
        timeline = (0:as.integer(length(rawdata)-1)/ctPerSec)
#
    }
#

#
    rawTimeStamp = rep(rawTimeStamp1, length(rawdata))
#
    rst = gsub(" GMT", "", as.POSIXlt(rawTimeStamp, tz = "GMT")+ timeline)
#
    data.frame(TimeStamp = as.vector(rst), counts = as.numeric(as.vector(rawdata)))
#
}#end of readCountsData
#

#

#
#############################################################################
#
#file read function for chamber/field data
#
#this is a better version than previous one, replace previous one with this.
#
#This program must use perl
#
#read data in and stretch it in columns then add time stamp
#
##############################################################################
#
readCountsDataWithPerl = function(filename, ctPerSec, perlLoc = "L:/zhouwenProgram/programs/markActivity", tempf = "tempfile.R")
#
{
#
    if(file.exists("c:/strawberry/perl/bin/perl.exe")){
#
        perlfile = paste(perlLoc, "fileReader.pl", sep = "/")
#
        print("This program will call a Perl program, so you must have Perl complier installed.")
#
        print("Please wait while I am reading your source data ...")
#
        #reading raw data
#
        Tfile <- file(filename, "r")
#
        if(isOpen(Tfile, "r")) #  TRUE
#
        {
#
            seek(Tfile, 0, rw="r") # reset to beginning
#
            lines = readLines(Tfile, n=30)
#
            close(Tfile)
#
        }
#

#
        skipPos = grep("-----", lines)[2]  #number of skip lines
#
        startTPos = grep("Start Time", lines)  #start time
#

#
        #get start date
#
        startTime = gsub("Start Time ", "", lines[startTPos])
#
        startTime = gsub("[[:blank:]]", "", startTime)
#
        startDatePos = grep("Start Date ", lines)  #startdate
#
        startDate = gsub("Start Date ", "", lines[startDatePos])
#
        startDate = gsub("[[:blank:]]", "", startDate)
#
        startDate = strsplit(startDate, "/")[[1]]
#
        if(nchar(startDate[1]) == 1)
#
        {
#
            startDate[1] = paste("0", startDate[1], sep ="")
#
        }
#
        if(nchar(startDate[2]) == 1)
#
        {
#
            startDate[2] = paste("0", startDate[2], sep ="")
#
        }
#
        startDate = paste(startDate[3], startDate[1], startDate[2], sep = "-")
#
        #end of getting startdate
#
        rawTimeStamp1  = paste(startDate, startTime, sep = " ")
#
 
#
        #get epochtime
#
        epochTPos = grep("Epoch Period", lines)  #start time
#
        epochTime = gsub("Epoch Period \\(hh:mm:ss\\) ", "", lines[epochTPos])
#
        epochTime = gsub("[[:blank:]]", "", epochTime)
#
        #end of getting epoch time
#

#
        #data reading start from here
#
        cat("starting Perl...\n")
#
        system(paste("perl ",  perlfile, " \"", filename, "\" ", tempf, sep = ""))
#
        cat("reading finished.\n")
#
        source(tempf)
#
        rawdata =  dataObj
#
        unlink(tempf)
#
        #end of data reading
#

#
        if(ctPerSec >1)
#
        {
#
            timeline = rep(0:as.integer(length(rawdata)/ctPerSec), each = ctPerSec)[1:length(rawdata)]
#
        }else{  
#
            timeline = (0:as.integer(length(rawdata)-1)/ctPerSec)
#
        }
#

#
        rawTimeStamp = rep(rawTimeStamp1, length(rawdata))
#
        rst = gsub(" GMT", "", as.POSIXlt(rawTimeStamp, tz = "GMT")+ timeline)
#
        finalrst = data.frame(TimeStamp = as.vector(rst), counts = as.numeric(as.vector(rawdata)))
#
    }else{
#
        print("Perl compilor is not found.  Running old reading program...")
#
        finalrst = readCountsData2(filename = filename, ctPerSec = ctPerSec)
#
    }
#

#
    finalrst
#
}#end of readCountsData
paste(R.home(),"/library","/PhysicalActivity/data/sampleSecData.dat", sep="")
mydata1s = readCountsData(paste(R.home(),"/library","/zz/data/sampleSecData.dat", sep=""))
mydata1m = dataCollapser(mydata1s, TS = "TimeStamp", col = "counts", by = 60)
data1m = wearingMarking(dataset = mydata1m,
#
                       frame = 90, 
#
                       perMinuteCts = 1,
#
                       TS = "TimeStamp",
#
                       cts = "counts", 
#
                       streamFrame = NULL, 
#
                       allowanceFrame= 2, 
#
                       newcolname = "wearing")
#

#
sumVct(data1m, id="sdata1m")
plotData(data=data1m, start=400, end=3000)
rm(list=ls(all=TRUE))
#

#
workName = "Rpackage"
#

#
if(.Platform$OS.type == "windows") Sys.setenv(HOME = substr(R.home(), 1, 2))
#
projectDir <- file.path(Sys.getenv("HOME"), "Projects/Mac")
#
workDir <- file.path(projectDir, workName)
#

#
setwd(workDir)
#

#
#call in the program
#
source("wearingMarking.R")
#

#
#######################################################################
#
## Example of wearing/non-wearing marking 
#
#######################################################################
#
# 1 second epoch data
#
load(paste(workDir,"/PhysicalActivity/data/dataSec.RDA", sep=""))
#
mydata1s = dataSec
mydata1m = dataCollapser(mydata1s, TS = "TimeStamp", col = "counts", by = 60)
mydata1m = read.csv("output/sample1m.csv")
#
data1m = wearingMarking(dataset = mydata1m,
#
                       frame = 90, 
#
                       perMinuteCts = 1,
#
                       TS = "TimeStamp",
#
                       cts = "counts", 
#
                       streamFrame = NULL, 
#
                       allowanceFrame= 2, 
#
                       newcolname = "wearing")
#

#
sumVct(data1m, id="sdata1m")
data1m[1,]
library(PhysicalActivity)
help(PhysicalActivity)
data(dataSec)#
#
mydata1m = dataCollapser(dataSec, TS = "TimeStamp", col = "counts", by = 60)#
data1m = wearingMarking(dataset = mydata1m,#
                       frame = 90, #
                       perMinuteCts = 1,#
                       TS = "TimeStamp",#
                       cts = "counts", #
                       streamFrame = NULL, #
                       allowanceFrame= 2, #
                       newcolname = "wearing")
#
sumVct(data1m, id="sdata1m")
#
plotData(data=data1m)
#
summaryData(data=data1m, validCut=600, perMinuteCts=1, markingString = "w")
mydata10s = dataCollapser(dataSec, TS = "TimeStamp", col = "counts", by = 10)
mydata10s[1:4,]
dataSec[1:4,]
mydata1m[1:4,]
setwd("~/Projects/Mac/Rpackage/check/PhysicalActivity/R")
source("wearingMarking.R")
dataSec = read.csv("sdata.csv")
dataSec = read.csv("~/Projects/Mac/Rpackage/check/PhysicalActivity/data/sdata.csv")
dataSec = read.csv("~/Projects/Mac/Rpackage/check/PhysicalActivity/testProgram/2010-05-01_publish/sdata.csv")
dataSec = read.csv("~/Projects/Mac/Rpackage/testProgram/2010-05-01_publish/sdata.csv")
dataSec[1,]
mydata1m = dataCollapser(dataSec, TS = "TimeStamp", col = "counts", by = 60)
rm(list=ls(all=TRUE))#

#
setwd("~/Projects/Mac/Rpackage/check/PhysicalActivity/R")#
source("wearingMarking.R")#
source("dataCollapser.R")#
source("marking.R")#
source("markingTime.R")#
source("nthOccurance.R")#
source("plotData.R")#
source("readCountsData.R")#
source("summaryData.R")#
source("sumVct.R")
dataSec = read.csv("~/Projects/Mac/Rpackage/testProgram/2010-05-01_publish/sdata.csv")
xx <- dataSec
xx[1,]
xx$t <- x$TimeStamp
xx <- dataSec
xx$t <- xx$TimeStamp
xx[1,]
xx$ct <- xx$counts
xx$ct <- xx$counts
xx[1,]
mydata1m = dataCollapser(dataSec, TS = "t", col = "ct", by = 60)
mydata1m = dataCollapser(dataSec, TS = "TimeStamp", col = "counts", by = 60)
mydata1m = dataCollapser(dataSec, TS = "TimeStamp", col = "counts", by = 60)
mydata1m = dataCollapser(dataSec, TS = "t", col = "ct", by = 60)
#
data1s = wearingMarking(dataset = dataSec,#
                       frame = 90, #
                       perMinuteCts = 60,#
                       TS = "TimeStamp",#
                       cts = "counts", #
                       streamFrame = NULL, #
                       allowanceFrame= 2, #
                       newcolname = "wearing",#
                       getMinuteMarking = FALSE)
dataSec[1,]
xx[1:4,]
setwd("~/Projects/Mac/Rpackage/check/PhysicalActivity/data")
data(dataSec)
load(dataSec)
load(dataSec.rda)
data(dataSec.rda)
data(dataSec)
dataSec = read.csv("~/Projects/Mac/Rpackage/testProgram/2010-05-01_publish/sdata.csv")
dataSec[1,]
dataSec <- dataSec[1:2000,]
data1s = wearingMarking(dataset = dataSec,#
                       frame = 90, #
                       perMinuteCts = 60,#
                       TS = "TimeStamp",#
                       cts = "counts", #
                       streamFrame = NULL, #
                       allowanceFrame= 2, #
                       newcolname = "wearing",#
                       getMinuteMarking = FALSE)
dataSec[1:30,]
library(PhysicalActivity)
data(dataSec)
dataSec[1:20,]
data1s = wearingMarking(dataset = dataSec,#
                       frame = 90, #
                       perMinuteCts = 60,#
                       TS = "TimeStamp",#
                       cts = "counts", #
                       streamFrame = NULL, #
                       allowanceFrame= 2, #
                       newcolname = "wearing",#
                       getMinuteMarking = FALSE)
rm(list=ls(all=TRUE))
