Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up and improve sleeplog handling #1244

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# CHANGES IN GGIR VERSION 3.1-10

- Part 4:

- Speed up the loading of advanced format sleeplog

- Improve automated recognition of date format in sleeplog

# CHANGES IN GGIR VERSION 3.1-9

- Part 3:
Expand Down
77 changes: 58 additions & 19 deletions R/g.loadlog.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,35 +204,48 @@
}
napcnt = nwcnt = iccnt = 1
IDcouldNotBeMatched = TRUE
dateformat_found = FALSE
for (i in 1:nrow(S)) { # loop through rows in sleeplog
ID = S[i,colid]
if (ID %in% startdates$ID == TRUE) { # matching ID in acc data, if not ignore ID
IDcouldNotBeMatched = FALSE
startdate_acc = as.Date(startdates$startdate[which(startdates$ID == ID)], tz = desiredtz)
startdate_sleeplog = S[i, datecols[1]]
startdate_sleeplog = as.character(S[i, datecols[1:pmin(length(datecols), 5)]])
Sdates_correct = c()
dateformats_to_consider = c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m",
"%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m",
"%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m",
"%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m")
if (dateformat_found == TRUE && dateformats_to_consider[1] != dateformat_correct) {
# If found then first try that before trying anything else
dateformats_to_consider = unique(c(dateformat_correct, dateformats_to_consider))

Check warning on line 221 in R/g.loadlog.R

View check run for this annotation

Codecov / codecov/patch

R/g.loadlog.R#L221

Added line #L221 was not covered by tests
}
# Detect data format in sleeplog:
for (dateformat in c("%Y-%m-%d", "%d-%m-%Y", "%m-%d-%Y", "%Y-%d-%m",
"%y-%m-%d", "%d-%m-%y", "%m-%d-%y", "%y-%d-%m",
"%Y/%m/%d", "%d/%m/%Y", "%m/%d/%Y", "%Y/%d/%m",
"%y/%m/%d", "%d/%m/%y", "%m/%d/%y", "%y/%d/%m")) {
for (dateformat in dateformats_to_consider) {
startdate_sleeplog_tmp = as.Date(startdate_sleeplog, format = dateformat, tz = desiredtz)
Sdates = as.Date(as.character(S[i,datecols]), format = dateformat, tz = desiredtz)
if (length(which(diff(which(is.na(Sdates))) > 1)) > 0) {
stop(paste0("\nSleeplog for ID: ", ID, " has missing date(s)"), call. = FALSE)
}
if (is.na(startdate_sleeplog_tmp) == FALSE) {
if (all(is.na(startdate_sleeplog_tmp) == FALSE)) {
deltadate = as.numeric(startdate_sleeplog_tmp - startdate_acc)
if (is.na(deltadate) == FALSE) {
if (abs(deltadate) < 30) {
startdate_sleeplog = startdate_sleeplog_tmp
if (all(is.na(deltadate) == FALSE)) {
if (all(abs(deltadate) < 30)) {
startdate_sleeplog = startdate_sleeplog_tmp[1]
Sdates_correct = Sdates
dateformat_correct = dateformat
deltadate = deltadate[1]
dateformat_found = TRUE
break
}
}
}
}
if (deltadate > 300) {
warning(paste0("For ID ", ID, " the sleeplog start date is more than 300 days separated ",
"from the dates in the accelerometer recording, this may indicate a ",
"problem with date formats or their recognition, please check."), call. = FALSE)

Check warning on line 247 in R/g.loadlog.R

View check run for this annotation

Codecov / codecov/patch

R/g.loadlog.R#L245-L247

Added lines #L245 - L247 were not covered by tests
}
if (startdates$startAtMidnight[which(startdates$ID == ID)] == TRUE) {
# If the first day in the advanced sleeplog is 28/11
# and the recording starts at midnight 27/11 00:00:00
Expand All @@ -251,6 +264,10 @@
} else {
# handle missing dates
ndates = as.numeric(diff(range(Sdates_correct[!is.na(Sdates_correct)]))) + 1
if (ndates > 300) warning(paste0("For ID ", ID, " the sleeplog has has ",
"more than 300 missing dates, this may ",
"indicate a problem with date format ",
"recognition. Please check."), call. = FALSE)

Check warning on line 270 in R/g.loadlog.R

View check run for this annotation

Codecov / codecov/patch

R/g.loadlog.R#L267-L270

Added lines #L267 - L270 were not covered by tests
if (ndates > nnights) {
extraColumns = matrix("", nrow(newsleeplog), max(c((ndates - nnights)*2, 100)) + 1)
newsleeplog = cbind(newsleeplog, extraColumns)
Expand All @@ -270,7 +287,8 @@
}
newsleeplog[count ,1] = ID
newbedlog[count ,1] = ID
newsleeplog_times = newbedlog_times = c()
newsleeplog_times = newbedlog_times = rep("time", 100)
newCounter = 1
expected_dates = seq(startdate_sleeplog - deltadate, startdate_sleeplog + nnights, by = 1)
# loop over expect dates giving start date of sleeplog
for (ni in 1:(length(expected_dates) - 1)) {
Expand All @@ -283,7 +301,11 @@
}
curdatecol = datecols[ind]
nextdatecol = datecols[which(datecols > curdatecol)[1]]
if (is.na(nextdatecol)) nextdatecol = ncol(S) + 1
lastday = FALSE
if (is.na(nextdatecol)) {
nextdatecol = ncol(S) + 1
lastday = TRUE
}
# Handle mixed reporting of time in bed and SPT"
if (length(bedendcols) == 0 & length(bedstartcols) != 0 &
length(onsetcols) == 0 & length(wakecols) != 0) {
Expand All @@ -301,19 +323,29 @@
}
# Sleeplog:
onseti = onsetcols[which(onsetcols > curdatecol & onsetcols < nextdatecol)]
wakeupi = wakecols[which(wakecols > nextdatecol)[1]]
if (lastday == FALSE) {
wakeupi = wakecols[which(wakecols > nextdatecol)[1]]
wakeuptime = S[i,wakeupi]
} else {
wakeuptime = ""
}
if (length(onseti) == 1 & length(wakeupi) == 1) {
newsleeplog_times = c(newsleeplog_times, S[i,onseti], S[i,wakeupi])
newsleeplog_times[newCounter:(newCounter + 1)] = c(S[i,onseti], wakeuptime)
} else {
newsleeplog_times = c(newsleeplog_times, "", "")
newsleeplog_times[newCounter:(newCounter + 1)] = c("", "")
}
# time in bed
bedstarti = bedstartcols[which(bedstartcols > curdatecol & bedstartcols < nextdatecol)]
bedendi = bedendcols[which(bedendcols > nextdatecol)[1]]
if (lastday == FALSE) {
bedendi = bedendcols[which(bedendcols > nextdatecol)[1]]
bedendtime = S[i,bedendi]
} else {
bedendtime = ""
}
if (length(bedstarti) == 1 & length(bedendi) == 1) {
newbedlog_times = c(newbedlog_times, S[i,bedstarti], S[i,bedendi])
newbedlog_times[newCounter:(newCounter + 1)] = c(S[i,bedstarti], bedendtime)
} else {
newbedlog_times = c(newbedlog_times, "", "")
newbedlog_times[newCounter:(newCounter + 1)] = c("", "")
}
# Also grap nap, non-wear, and imputation code info and put those in separate matrices:
naps = napcols[which(napcols > curdatecol & napcols < nextdatecol)]
Expand All @@ -338,10 +370,17 @@
iccnt = iccnt + 1
}
} else {
newsleeplog_times = c(newsleeplog_times, "", "")
newbedlog_times = c(newbedlog_times, "", "")
newsleeplog_times[newCounter:(newCounter + 1)] = c("", "")
newbedlog_times[newCounter:(newCounter + 1)] = c("", "")
}
newCounter = newCounter + 2
if (newCounter > length(newbedlog_times) - 5) {
newbedlog_times = c(newbedlog_times, rep("time", 100))
newsleeplog_times = c(newsleeplog_times, rep("time", 100))

Check warning on line 379 in R/g.loadlog.R

View check run for this annotation

Codecov / codecov/patch

R/g.loadlog.R#L378-L379

Added lines #L378 - L379 were not covered by tests
}
}
newsleeplog_times = newsleeplog_times[which(newsleeplog_times != "time")]
newbedlog_times = newbedlog_times[which(newbedlog_times != "time")]
# add columns to sleeplog
extracols = (length(newsleeplog_times) + 2) - ncol(newsleeplog)
if (extracols > 0) {
Expand Down
20 changes: 16 additions & 4 deletions R/g.part4.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,27 @@
dolog = FALSE
}
if (dolog == TRUE) {
logs_diaries = g.loadlog(params_sleep[["loglocation"]], coln1 = params_sleep[["coln1"]], colid = params_sleep[["colid"]],
meta.sleep.folder = meta.sleep.folder,
desiredtz = params_general[["desiredtz"]])
sleeplogRDataFile = paste0(metadatadir,"/meta/sleeplog.RData")
# only re-process sleeplog if sleeplog.RData does not exist or if sleeplog
# is from a date equal to or after sleeplog.RData
if (!file.exists(sleeplogRDataFile) ||
as.Date(file.info(params_sleep[["loglocation"]])$ctime) >= as.Date(file.info(sleeplogRDataFile)$ctime)) {
logs_diaries = g.loadlog(params_sleep[["loglocation"]],
coln1 = params_sleep[["coln1"]],
colid = params_sleep[["colid"]],
meta.sleep.folder = meta.sleep.folder,
desiredtz = params_general[["desiredtz"]])
save(logs_diaries, file = sleeplogRDataFile)
} else {
load(file = sleeplogRDataFile)

Check warning on line 47 in R/g.part4.R

View check run for this annotation

Codecov / codecov/patch

R/g.part4.R#L47

Added line #L47 was not covered by tests
}
if (params_sleep[["sleepwindowType"]] == "TimeInBed" && length(logs_diaries$bedlog) > 0) {
sleeplog = logs_diaries$bedlog
} else {
sleeplog = logs_diaries$sleeplog
}
save(logs_diaries, file = paste0(metadatadir,"/meta/sleeplog.RData"))
sleeplog$night = as.numeric(sleeplog$night)
sleeplog$duration = as.numeric(sleeplog$duration)
}
#------------------------------------------------
# get list of accelerometer milestone data files from sleep (produced by g.part3)
Expand Down
Loading