Skip to content

Commit

Permalink
NNS 10.8 Beta
Browse files Browse the repository at this point in the history
  • Loading branch information
OVVO-Financial authored Apr 18, 2024
1 parent dbce625 commit 94a8a78
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 14 deletions.
2 changes: 1 addition & 1 deletion R/ARMA.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#' @param shrink logical; \code{FALSE} (default) Ensembles forecasts with \code{method = "means"}.
#' @param plot logical; \code{TRUE} (default) Returns the plot of all periods exhibiting seasonality and the \code{variable} level reference in upper panel. Lower panel returns original data and forecast.
#' @param seasonal.plot logical; \code{TRUE} (default) Adds the seasonality plot above the forecast. Will be set to \code{FALSE} if no seasonality is detected or \code{seasonal.factor} is set to an integer value.
#' @param pred.int numeric [0, 1]; \code{NULL} (default) Plots and returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{meboot} on the final estimates.
#' @param pred.int numeric [0, 1]; \code{NULL} (default) Plots and returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{NNS.meboot} on the final estimates.
#' @return Returns a vector of forecasts of length \code{(h)} if no \code{pred.int} specified. Else, returns a \link{data.table} with the forecasts as well as lower and upper prediction intervals per forecast point.
#' @note
#' For monthly data series, increased accuracy may be realized from forcing seasonal factors to multiples of 12. For example, if the best periods reported are: \{37, 47, 71, 73\} use
Expand Down
2 changes: 1 addition & 1 deletion R/ARMA_optim.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' \code{expression(cor(predicted, actual, method = "spearman") / sum((predicted - actual)^2))} (default) Rank correlation / sum of squared errors is the default objective function. Any \code{expression(...)} using the specific terms \code{predicted} and \code{actual} can be used.
#' @param objective options: ("min", "max") \code{"max"} (default) Select whether to minimize or maximize the objective function \code{obj.fn}.
#' @param linear.approximation logical; \code{TRUE} (default) Uses the best linear output from \code{NNS.reg} to generate a nonlinear and mixture regression for comparison. \code{FALSE} is a more exhaustive search over the objective space.
#' @param pred.int numeric [0, 1]; 0.95 (default) Returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{meboot} on the final estimates.
#' @param pred.int numeric [0, 1]; 0.95 (default) Returns the associated prediction intervals for the final estimate. Constructed using the maximum entropy bootstrap \link{NNS.meboot} on the final estimates.
#' @param print.trace logical; \code{TRUE} (default) Prints current iteration information. Suggested as backup in case of error, best parameters to that point still known and copyable!
#' @param plot logical; \code{FALSE} (default)
#'
Expand Down
6 changes: 3 additions & 3 deletions R/Boost.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @param learner.trials integer; 100 (default) Sets the number of trials to obtain an accuracy \code{threshold} level. If the number of all possible feature combinations is less than selected value, the minimum of the two values will be used.
#' @param epochs integer; \code{2*length(DV.train)} (default) Total number of feature combinations to run.
#' @param CV.size numeric [0, 1]; \code{NULL} (default) Sets the cross-validation size. Defaults to a random value between 0.2 and 0.33 for a random sampling of the training set.
#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling from \code{caret} to balance the classes. \code{type="CLASS"} required.
#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling to balance the classes. \code{type="CLASS"} required.
#' @param ts.test integer; NULL (default) Sets the length of the test set for time-series data; typically \code{2*h} parameter value from \link{NNS.ARMA} or double known periods to forecast.
#' @param folds integer; 5 (default) Sets the number of \code{folds} in the \link{NNS.stack} procedure for optimal \code{n.best} parameter.
#' @param threshold numeric; \code{NULL} (default) Sets the \code{obj.fn} threshold to keep feature combinations.
Expand Down Expand Up @@ -123,8 +123,8 @@ NNS.boost <- function(IVs.train,

y_train <- as.factor(as.character(DV.train))

training_1 <- do.call(cbind, caret::downSample(IVs.train, y_train, list = TRUE))
training_2 <- do.call(cbind, caret::upSample(IVs.train, y_train, list = TRUE))
training_1 <- do.call(cbind, downSample(IVs.train, y_train, list = TRUE))
training_2 <- do.call(cbind, upSample(IVs.train, y_train, list = TRUE))

training <- rbind.data.frame(training_1, training_2)

Expand Down
73 changes: 73 additions & 0 deletions R/Internal_Functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,76 @@ is.fcl <- function(x) is.factor(x) || is.character(x) || is.logical(x)

is.discrete <- function(x) sum(as.numeric(x)%%1)==0


### upSample / downSample to avoid dependencies
downSample <- function(x, y, list = FALSE, yname = "Class") {
if (!is.data.frame(x)) {
x <- as.data.frame(x, stringsAsFactors = TRUE)
}
if (!is.factor(y)) {
warning(
"Down-sampling requires a factor variable as the response. The original data was returned."
)
return(list(x = x, y = y))
}

minClass <- min(table(y))
x$.outcome <- y

x <- plyr::ddply(x, .(y),
function(dat, n)
dat[sample(seq(along = dat$.outcome), n), , drop = FALSE],
n = minClass)
y <- x$.outcome
x <- x[, !(colnames(x) %in% c("y", ".outcome")), drop = FALSE]
if (list) {
if (inherits(x, "matrix")) {
x <- as.matrix(x)
}
out <- list(x = x, y = y)
} else {
out <- cbind(x, y)
colnames(out)[ncol(out)] <- yname
}
out
}


upSample <- function(x, y, list = FALSE, yname = "Class") {
if (!is.data.frame(x)) {
x <- as.data.frame(x, stringsAsFactors = TRUE)
}
if (!is.factor(y)) {
warning(
"Up-sampling requires a factor variable as the response. The original data was returned."
)
return(list(x = x, y = y))
}

maxClass <- max(table(y))
x$.outcome <- y

x <- plyr::ddply(x, .(y),
function(x, top = maxClass) {
if (nrow(x) < top) {
ind <- sample(1:nrow(x),
size = top - nrow(x),
replace = TRUE)
ind <- c(1:nrow(x), ind)
x <- x[ind, , drop = FALSE]
}
x
})
y <- x$.outcome
x <- x[,!(colnames(x) %in% c("y", ".outcome")), drop = FALSE]
if (list) {
if (inherits(x, "matrix")) {
x <- as.matrix(x)
}
out <- list(x = x, y = y)
} else {
out <- cbind(x, y)
colnames(out)[ncol(out)] <- yname
}
out
}
4 changes: 2 additions & 2 deletions R/NNS_term_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ NNS.term.matrix <- function(x, oos = NULL){
}
}

NNS.TM <- t(sapply(1 : length(x[ , 1]), function(i) as.integer(tryCatch(stringr::str_count(x[i, 1], unique.vocab), error = function (e) 0))))
NNS.TM <- t(sapply(1 : length(x[ , 1]), function(i) as.integer(tryCatch(as.numeric(unique.vocab%in%x[i,1]), error = function (e) 0))))

colnames(NNS.TM) <- c(unique.vocab)

if(!is.null(oos)){
OOS.TM <- t(sapply(1 : length(oos), function(i) stringr::str_count(oos[i], unique.vocab)))
OOS.TM <- t(sapply(1 : length(oos), function(i) as.numeric(unique.vocab%in%oos[i])))

colnames(OOS.TM) <- c(unique.vocab)

Expand Down
1 change: 1 addition & 0 deletions R/Nowcast.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#' \item \code{PALLFNFINDEXM} -- Global Price Index of All Commodities
#' \item \code{FEDFUNDS} -- Federal Funds Effective Rate
#' \item \code{PPIACO} -- Producer Price Index All Commodities
#' \item \code{CIVPART} -- Labor Force Participation Rate
#' }
#'
#' @return Returns the following matrices of forecasted variables:
Expand Down
6 changes: 3 additions & 3 deletions R/Stack.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @param optimize.threshold logical; \code{TRUE} (default) Will optimize the probability threshold value for rounding in classification problems. If \code{FALSE}, returns 0.5.
#' @param dist options:("L1", "L2", "DTW", "FACTOR") the method of distance calculation; Selects the distance calculation used. \code{dist = "L2"} (default) selects the Euclidean distance and \code{(dist = "L1")} selects the Manhattan distance; \code{(dist = "DTW")} selects the dynamic time warping distance; \code{(dist = "FACTOR")} uses a frequency.
#' @param CV.size numeric [0, 1]; \code{NULL} (default) Sets the cross-validation size if \code{(IVs.test = NULL)}. Defaults to a random value between 0.2 and 0.33 for a random sampling of the training set.
#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling from \code{caret} to balance the classes. \code{type="CLASS"} required.
#' @param balance logical; \code{FALSE} (default) Uses both up and down sampling to balance the classes. \code{type="CLASS"} required.
#' @param ts.test integer; NULL (default) Sets the length of the test set for time-series data; typically \code{2*h} parameter value from \link{NNS.ARMA} or double known periods to forecast.
#' @param folds integer; \code{folds = 5} (default) Select the number of cross-validation folds.
#' @param order options: (integer, "max", NULL); \code{NULL} (default) Sets the order for \link{NNS.reg}, where \code{(order = "max")} is the k-nearest neighbors equivalent, which is suggested for mixed continuous and discrete (unordered, ordered) data.
Expand Down Expand Up @@ -194,8 +194,8 @@ NNS.stack <- function(IVs.train,
CV.DV.test <- DV.train[c(test.set)]

y_train <- as.factor(CV.DV.train)
training_1 <- do.call(cbind, caret::downSample(CV.IVs.train, y_train, list = TRUE))
training_2 <- do.call(cbind, caret::upSample(CV.IVs.train, y_train, list = TRUE))
training_1 <- do.call(cbind, downSample(CV.IVs.train, y_train, list = TRUE))
training_2 <- do.call(cbind, upSample(CV.IVs.train, y_train, list = TRUE))

training <- rbind.data.frame(training_1, training_2)

Expand Down
5 changes: 1 addition & 4 deletions R/gvload.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
# Import calls and globalvariable calls

#' @importFrom caret upSample downSample
#' @importFrom grDevices adjustcolor rainbow rgb
#' @importFrom graphics abline boxplot legend lines par plot points segments text matplot title axis mtext barplot hist strwidth polygon
#' @importFrom plyr ddply
#' @importFrom quantmod getSymbols
#' @importFrom Rfast colmeans rowmeans rowsums comb_n
#' @importFrom stats coef cor lm na.omit sd median complete.cases resid uniroot aggregate density hat qnorm model.matrix fivenum acf qt ecdf time approx embed frequency is.ts runif start ts optim quantile optimize dnorm dlnorm dexp dt t.test wilcox.test .preformat.ts var poly
#' @importFrom stringr str_count
#' @importFrom utils globalVariables head tail combn flush.console
#' @importFrom xts to.monthly
#' @importFrom zoo as.yearmon index
#' @import data.table
#' @import doParallel
#' @import foreach
#' @import meboot
#' @rawNamespace import(Rcpp, except = LdFlags)
#' @import RcppParallel
#' @import rgl
Expand Down Expand Up @@ -42,7 +40,6 @@
requireNamespace("data.table")
requireNamespace("doParallel")
requireNamespace("foreach")
requireNamespace("meboot")
requireNamespace("Rcpp")
requireNamespace("RcppParallel")
requireNamespace("rgl")
Expand Down

0 comments on commit 94a8a78

Please sign in to comment.