diff --git a/R/ARMA.R b/R/ARMA.R index ae6819778..d688f6db6 100644 --- a/R/ARMA.R +++ b/R/ARMA.R @@ -21,7 +21,7 @@ #' @param shrink logical; \code{FALSE} (default) Ensembles forecasts with \code{method = "means"}. #' @param plot logical; \code{TRUE} (default) Returns the plot of all periods exhibiting seasonality and the \code{variable} level reference in upper panel. Lower panel returns original data and forecast. #' @param seasonal.plot logical; \code{TRUE} (default) Adds the seasonality plot above the forecast. Will be set to \code{FALSE} if no seasonality is detected or \code{seasonal.factor} is set to an integer value. -#' @param pred.int numeric [0, 1]; \code{NULL} (default) Plots and returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{meboot} on the final estimates. +#' @param pred.int numeric [0, 1]; \code{NULL} (default) Plots and returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{NNS.meboot} on the final estimates. #' @return Returns a vector of forecasts of length \code{(h)} if no \code{pred.int} specified. Else, returns a \link{data.table} with the forecasts as well as lower and upper prediction intervals per forecast point. #' @note #' For monthly data series, increased accuracy may be realized from forcing seasonal factors to multiples of 12. For example, if the best periods reported are: \{37, 47, 71, 73\} use diff --git a/R/ARMA_optim.R b/R/ARMA_optim.R index 9be306a16..a5226bbf3 100644 --- a/R/ARMA_optim.R +++ b/R/ARMA_optim.R @@ -12,7 +12,7 @@ #' \code{expression(cor(predicted, actual, method = "spearman") / sum((predicted - actual)^2))} (default) Rank correlation / sum of squared errors is the default objective function. Any \code{expression(...)} using the specific terms \code{predicted} and \code{actual} can be used. #' @param objective options: ("min", "max") \code{"max"} (default) Select whether to minimize or maximize the objective function \code{obj.fn}. #' @param linear.approximation logical; \code{TRUE} (default) Uses the best linear output from \code{NNS.reg} to generate a nonlinear and mixture regression for comparison. \code{FALSE} is a more exhaustive search over the objective space. -#' @param pred.int numeric [0, 1]; 0.95 (default) Returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{meboot} on the final estimates. +#' @param pred.int numeric [0, 1]; 0.95 (default) Returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{NNS.meboot} on the final estimates. #' @param print.trace logical; \code{TRUE} (default) Prints current iteration information. Suggested as backup in case of error, best parameters to that point still known and copyable! #' @param plot logical; \code{FALSE} (default) #' diff --git a/R/Boost.R b/R/Boost.R index a27100f92..cfdc5559f 100644 --- a/R/Boost.R +++ b/R/Boost.R @@ -10,7 +10,7 @@ #' @param learner.trials integer; 100 (default) Sets the number of trials to obtain an accuracy \code{threshold} level. If the number of all possible feature combinations is less than selected value, the minimum of the two values will be used. #' @param epochs integer; \code{2*length(DV.train)} (default) Total number of feature combinations to run. #' @param CV.size numeric [0, 1]; \code{NULL} (default) Sets the cross-validation size. Defaults to a random value between 0.2 and 0.33 for a random sampling of the training set. -#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling from \code{caret} to balance the classes. \code{type="CLASS"} required. +#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling to balance the classes. \code{type="CLASS"} required. #' @param ts.test integer; NULL (default) Sets the length of the test set for time-series data; typically \code{2*h} parameter value from \link{NNS.ARMA} or double known periods to forecast. #' @param folds integer; 5 (default) Sets the number of \code{folds} in the \link{NNS.stack} procedure for optimal \code{n.best} parameter. #' @param threshold numeric; \code{NULL} (default) Sets the \code{obj.fn} threshold to keep feature combinations. @@ -123,8 +123,8 @@ NNS.boost <- function(IVs.train, y_train <- as.factor(as.character(DV.train)) - training_1 <- do.call(cbind, caret::downSample(IVs.train, y_train, list = TRUE)) - training_2 <- do.call(cbind, caret::upSample(IVs.train, y_train, list = TRUE)) + training_1 <- do.call(cbind, downSample(IVs.train, y_train, list = TRUE)) + training_2 <- do.call(cbind, upSample(IVs.train, y_train, list = TRUE)) training <- rbind.data.frame(training_1, training_2) diff --git a/R/Internal_Functions.R b/R/Internal_Functions.R index 3d97ce1e9..0fb27a63a 100644 --- a/R/Internal_Functions.R +++ b/R/Internal_Functions.R @@ -207,3 +207,76 @@ is.fcl <- function(x) is.factor(x) || is.character(x) || is.logical(x) is.discrete <- function(x) sum(as.numeric(x)%%1)==0 + +### upSample / downSample to avoid dependencies +downSample <- function(x, y, list = FALSE, yname = "Class") { + if (!is.data.frame(x)) { + x <- as.data.frame(x, stringsAsFactors = TRUE) + } + if (!is.factor(y)) { + warning( + "Down-sampling requires a factor variable as the response. The original data was returned." + ) + return(list(x = x, y = y)) + } + + minClass <- min(table(y)) + x$.outcome <- y + + x <- plyr::ddply(x, .(y), + function(dat, n) + dat[sample(seq(along = dat$.outcome), n), , drop = FALSE], + n = minClass) + y <- x$.outcome + x <- x[, !(colnames(x) %in% c("y", ".outcome")), drop = FALSE] + if (list) { + if (inherits(x, "matrix")) { + x <- as.matrix(x) + } + out <- list(x = x, y = y) + } else { + out <- cbind(x, y) + colnames(out)[ncol(out)] <- yname + } + out +} + + +upSample <- function(x, y, list = FALSE, yname = "Class") { + if (!is.data.frame(x)) { + x <- as.data.frame(x, stringsAsFactors = TRUE) + } + if (!is.factor(y)) { + warning( + "Up-sampling requires a factor variable as the response. The original data was returned." + ) + return(list(x = x, y = y)) + } + + maxClass <- max(table(y)) + x$.outcome <- y + + x <- plyr::ddply(x, .(y), + function(x, top = maxClass) { + if (nrow(x) < top) { + ind <- sample(1:nrow(x), + size = top - nrow(x), + replace = TRUE) + ind <- c(1:nrow(x), ind) + x <- x[ind, , drop = FALSE] + } + x + }) + y <- x$.outcome + x <- x[,!(colnames(x) %in% c("y", ".outcome")), drop = FALSE] + if (list) { + if (inherits(x, "matrix")) { + x <- as.matrix(x) + } + out <- list(x = x, y = y) + } else { + out <- cbind(x, y) + colnames(out)[ncol(out)] <- yname + } + out +} diff --git a/R/NNS_term_matrix.R b/R/NNS_term_matrix.R index 7c1de4adb..cc18f529e 100644 --- a/R/NNS_term_matrix.R +++ b/R/NNS_term_matrix.R @@ -77,12 +77,12 @@ NNS.term.matrix <- function(x, oos = NULL){ } } - NNS.TM <- t(sapply(1 : length(x[ , 1]), function(i) as.integer(tryCatch(stringr::str_count(x[i, 1], unique.vocab), error = function (e) 0)))) + NNS.TM <- t(sapply(1 : length(x[ , 1]), function(i) as.integer(tryCatch(as.numeric(unique.vocab%in%x[i,1]), error = function (e) 0)))) colnames(NNS.TM) <- c(unique.vocab) if(!is.null(oos)){ - OOS.TM <- t(sapply(1 : length(oos), function(i) stringr::str_count(oos[i], unique.vocab))) + OOS.TM <- t(sapply(1 : length(oos), function(i) as.numeric(unique.vocab%in%oos[i]))) colnames(OOS.TM) <- c(unique.vocab) diff --git a/R/Nowcast.R b/R/Nowcast.R index ce7b90693..0c40f9e29 100644 --- a/R/Nowcast.R +++ b/R/Nowcast.R @@ -46,6 +46,7 @@ #' \item \code{PALLFNFINDEXM} -- Global Price Index of All Commodities #' \item \code{FEDFUNDS} -- Federal Funds Effective Rate #' \item \code{PPIACO} -- Producer Price Index All Commodities +#' \item \code{CIVPART} -- Labor Force Participation Rate #' } #' #' @return Returns the following matrices of forecasted variables: diff --git a/R/Stack.R b/R/Stack.R index 5b95cca94..3c59d33d2 100644 --- a/R/Stack.R +++ b/R/Stack.R @@ -11,7 +11,7 @@ #' @param optimize.threshold logical; \code{TRUE} (default) Will optimize the probability threshold value for rounding in classification problems. If \code{FALSE}, returns 0.5. #' @param dist options:("L1", "L2", "DTW", "FACTOR") the method of distance calculation; Selects the distance calculation used. \code{dist = "L2"} (default) selects the Euclidean distance and \code{(dist = "L1")} selects the Manhattan distance; \code{(dist = "DTW")} selects the dynamic time warping distance; \code{(dist = "FACTOR")} uses a frequency. #' @param CV.size numeric [0, 1]; \code{NULL} (default) Sets the cross-validation size if \code{(IVs.test = NULL)}. Defaults to a random value between 0.2 and 0.33 for a random sampling of the training set. -#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling from \code{caret} to balance the classes. \code{type="CLASS"} required. +#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling to balance the classes. \code{type="CLASS"} required. #' @param ts.test integer; NULL (default) Sets the length of the test set for time-series data; typically \code{2*h} parameter value from \link{NNS.ARMA} or double known periods to forecast. #' @param folds integer; \code{folds = 5} (default) Select the number of cross-validation folds. #' @param order options: (integer, "max", NULL); \code{NULL} (default) Sets the order for \link{NNS.reg}, where \code{(order = "max")} is the k-nearest neighbors equivalent, which is suggested for mixed continuous and discrete (unordered, ordered) data. @@ -194,8 +194,8 @@ NNS.stack <- function(IVs.train, CV.DV.test <- DV.train[c(test.set)] y_train <- as.factor(CV.DV.train) - training_1 <- do.call(cbind, caret::downSample(CV.IVs.train, y_train, list = TRUE)) - training_2 <- do.call(cbind, caret::upSample(CV.IVs.train, y_train, list = TRUE)) + training_1 <- do.call(cbind, downSample(CV.IVs.train, y_train, list = TRUE)) + training_2 <- do.call(cbind, upSample(CV.IVs.train, y_train, list = TRUE)) training <- rbind.data.frame(training_1, training_2) diff --git a/R/gvload.R b/R/gvload.R index 396416c34..3b562093c 100644 --- a/R/gvload.R +++ b/R/gvload.R @@ -1,19 +1,17 @@ # Import calls and globalvariable calls -#' @importFrom caret upSample downSample #' @importFrom grDevices adjustcolor rainbow rgb #' @importFrom graphics abline boxplot legend lines par plot points segments text matplot title axis mtext barplot hist strwidth polygon +#' @importFrom plyr ddply #' @importFrom quantmod getSymbols #' @importFrom Rfast colmeans rowmeans rowsums comb_n #' @importFrom stats coef cor lm na.omit sd median complete.cases resid uniroot aggregate density hat qnorm model.matrix fivenum acf qt ecdf time approx embed frequency is.ts runif start ts optim quantile optimize dnorm dlnorm dexp dt t.test wilcox.test .preformat.ts var poly -#' @importFrom stringr str_count #' @importFrom utils globalVariables head tail combn flush.console #' @importFrom xts to.monthly #' @importFrom zoo as.yearmon index #' @import data.table #' @import doParallel #' @import foreach -#' @import meboot #' @rawNamespace import(Rcpp, except = LdFlags) #' @import RcppParallel #' @import rgl @@ -42,7 +40,6 @@ requireNamespace("data.table") requireNamespace("doParallel") requireNamespace("foreach") - requireNamespace("meboot") requireNamespace("Rcpp") requireNamespace("RcppParallel") requireNamespace("rgl")