diff --git a/.Rbuildignore b/.Rbuildignore index 5719857..655e8af 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,4 +2,6 @@ ^\.git$ ^\.gitignore$ ^\.lintr$ -^\.travis.yml$ \ No newline at end of file +^\.travis.yml$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 07e3a03..5336c35 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ *.swp *.so *.o -*.rds \ No newline at end of file +*.rds +.Rproj.user diff --git a/DESCRIPTION b/DESCRIPTION index 68e9b47..1d27c0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,4 +39,4 @@ Description: MetNet contains functionality to infer metabolic network topologies the two matrices are combined to form a adjacency matrix inferred from both quantitative and structure information. License: GPL (>= 3) -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.1 diff --git a/MetNet.Rproj b/MetNet.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/MetNet.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/NAMESPACE b/NAMESPACE index 58fd65e..abacebb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,7 @@ # Generated by roxygen2: do not edit by hand +export(adjacency_list) +export(annotaionNames) export(aracne) export(bayes) export(clr) @@ -11,7 +13,9 @@ export(randomForest) export(rtCorrection) export(statistical) export(structural) +export(summary_mz) export(threshold) +export(threshold_p) importFrom(BiocParallel,bplapply) importFrom(GENIE3,GENIE3) importFrom(bnlearn,arcs) diff --git a/R/combine.R b/R/combine.R index 8856329..f44036e 100644 --- a/R/combine.R +++ b/R/combine.R @@ -11,6 +11,7 @@ #' in the returned when the sum exceeds the `threshold`. #' \code{combine} returns this consensus matrix supported #' by the structural and statistical adjacency matrices. +#' #' #' @param structural list containing `numeric` structural adjacency matrix in #' the first entry and `character` structural adjanceny matrix in the second @@ -20,6 +21,11 @@ #' #' @param threshold numeric, threshold value to be applied to define a #' connection as present +#' +#' @param model character, defines model that is used for building the consensus +#' matrix. The model is part of `statistical`. The default is "Consensus" which creates +#' the consensus matrix by using a combination of models present in `statistical`. +#' Besides, "pearson" and "spearman" models were implemented. #' #' @details The matrices will be added and a unweighted connection will #' be reported when the value exceeds a certain value. @@ -31,6 +37,7 @@ #' `character` the corresonding type/putative link at this position. #' #' @author Thomas Naake, \email{thomasnaake@@googlemail.com} +#' Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} #' #' @examples #' data("x_test", package = "MetNet") @@ -50,191 +57,95 @@ #' combine(struct_adj, stat_adj) #' #' @export -#' #' Changes to MetNet: New attributes added, if model = "combined" (default) the result will be the same -#' as in MetNet, except the output list items were named to "combined" and "Character" -#' if model = "pearson" or "spearman" than also the corresponding weighted statistical -#' adjacency matrix is required as attribute (weighted_statistical = XY) -#' The output in this case will be a list containing 4 listitems, where combination relies on the -#' unweighted adjacency matrix of either Pearson or Spearman. -#' Moreover corresponding Correlation and p-values will be displayes as listitems -combine <- function(structural, statistical, threshold = 1, model = "combined", weighted_statistical) { - - ## Is changed since structural list is now lenght 3 - if (!is.list(structural) | length(structural) != 3) - stop("structural is not a list of length 3") - - if (!is.matrix(structural[[1]]) | !is.numeric(structural[[1]])) - stop("structural[[1]] is not a numeric matrix") - - if (!is.matrix(structural[[2]]) | !is.character(structural[[2]])) - stop("structural[[2]] is not a character matrix") - - ## Additional to MetNet: - if (!is.matrix(statistical[[3]]) | !is.numeric(statistical[[3]])) - stop("statistical is not a numeric matrix") - - if (!all(rownames(structural[[1]]) == rownames(structural[[2]]))) - stop(c("rownames of structural[[1]] are not identical to rownames of ", - "structural[[2]]")) - - if (!all(colnames(structural[[1]]) == colnames(structural[[2]]))) - stop(c("colnames of structural[[1]] are not identical to colnames of ", - "structural[[2]]")) - - if (!all(rownames(structural[[1]]) == rownames(statistical))) - stop("rownames are not identical") - - if (!all(colnames(structural[[1]]) == colnames(statistical))) - stop("colnames are not identical") - - if (!is.numeric(threshold)) stop("threshold is not numeric") - - ## create list to store results - res <- list() - - - ## Changes to MetNet: Distinguish between default (model == "combined") and model = "pearson" or "spearman" - if(model == "pearson"){ - - ## create the first entry of the list - ## sum the matrices structural and statistical, if the value is above - ## threshold then assign 1, otherwise 0 - cons_num <- structural[[1]] + statistical[["pearson"]] - cons_num <- ifelse(cons_num > threshold, 1, 0) - - ## if p-values have previously been calculated - if("Correlation Value" %in% names(weighted_statistical[["pearson"]][1])){ - cons_corr <- ifelse(cons_num == 1, weighted_statistical[["pearson"]][["Correlation Value"]], "") - cons_p <- ifelse(cons_num == 1, weighted_statistical[["pearson"]][["p-Value"]], "")} - else { - cons_corr <- ifelse(cons_num == 1, weighted_statistical[["pearson"]], "") - cons_p <- NaN - }} - - if(model == "spearman"){ - ## create the first entry of the list - ## sum the matrices structural and statistical, if the value is above - ## threshold then assign 1, otherwise 0 - cons_num <- structural[[1]] + statistical[["spearman"]] - cons_num <- ifelse(cons_num > threshold, 1, 0) - - ## if p-values have previously been calculated - if("Correlation Value" %in% names(weighted_statistical[["spearman"]][1])){ - cons_corr <- ifelse(cons_num == 1, weighted_statistical[["spearman"]][["Correlation Value"]], "") - cons_p <- ifelse(cons_num == 1, weighted_statistical[["spearman"]][["p-Value"]], "")} - else { - cons_corr <- ifelse(cons_num == 1, weighted_statistical[["spearman"]], "") - cons_p <- NaN - }} - - - if(model == "combined"){ - ## create the first entry of the list - ## sum the matrices structural and statistical, if the value is above - ## threshold then assign 1, otherwise 0 - cons_num <- structural[[1]] + statistical[[3]] - cons_num <- ifelse(cons_num > threshold, 1, 0) - cons_corr <- NaN - cons_p <- NaN - } - - ## create the second entry of the list - ## if element in cons_num is equal to 1, take the element in structural[[2]] - ## (the type of link), otherwise "" - cons_char <- ifelse(cons_num == 1, structural[[2]], "") - - ## assign to list - ## Compared to MetNet names were assigned - res[[model]] <- cons_num - res[["Character"]] <- cons_char - - ## assign Correlation and p-values to list if model is "pearson" or "spearman" - if(model == "pearson" || model == "spearman"){ - res[["Correlation Value"]] <- cons_corr - res[["p-Value"]] <- cons_p - } - - - return(res) -} - +combine <- function(structural, statistical, threshold = 1, model = "Consensus") { + + + if (!is.matrix(structural[[1]]) | !is.numeric(structural[[1]])) + stop("structural[[1]] is not a numeric matrix") + + + if (!is.matrix(statistical[["Consensus"]]) | !is.numeric(statistical[["Consensus"]])) + stop("statistical is not a numeric matrix") + + if (!all(rownames(structural[[1]]) == rownames(structural[[2]]))) + stop(c("rownames of structural[[1]] are not identical to rownames of ", + "structural[[2]]")) + + if (!all(colnames(structural[[1]]) == colnames(structural[[2]]))) + stop(c("colnames of structural[[1]] are not identical to colnames of ", + "structural[[2]]")) + + if (!all(rownames(structural[[1]]) == rownames(statistical))) + stop("rownames are not identical") + + if (!all(colnames(structural[[1]]) == colnames(statistical))) + stop("colnames are not identical") + + if (!is.numeric(threshold)) stop("threshold is not numeric") + + ## create list to store results + res <- list() + + -#' exportNet2gml is a function that exports adjacency matrices to gml using igraph -#' Needs following attributes: -#' x: adjacency matrix that needs to be exported -#' from: originated from wich function, possible values are -#' - "structural" Produces a gml file with edge attributes containing mass difference values, data saved as "structural_type.gml" -#' - "statistical+p" produces a gml file with edge attributes containing correlation values and p-values, saved as "statistical.'model'.gml" -#' (TO DO: ADD "statistical") -#' - "combine" produces a gml file with edge attributes containing correlation and p-values for pearson/ spearman correlations, -#' saved as "combined.gml" -exportNet2gml <- function (x, from, adjacency_list, ...) { - #Check arguments - if (!from %in% c("structural", "statistical+p", "combine")) - stop("type not in 'structural', 'statistical+p', 'combine'") - - if ("structural" %in% from) { - - mat <- x[[1]] - net <- - graph_from_adjacency_matrix(mat, mode = "undirected") - E(net)$sourceID <- as.character(adjacency_list$`Var1`) - E(net)$targetID <- as.character(adjacency_list$`Var2`) - E(net)$`mass difference` <- adjacency_list$`mass difference` - - write_graph(net, "structural.gml", format = c("gml")) - } - else if ("statistical+p" %in% from) { - - for (i in 1:length(x)) { - cor_list <- x[[i]] - ##Plot structural adjacency matrix and export to gml - net_cor <- - igraph::graph_from_adjacency_matrix(cor_list[[1]], mode = "undirected", weighted = T) - net_p <- - igraph::graph_from_adjacency_matrix(cor_list[[2]], mode = "undirected", weighted = T) - net_comb <- union(net_cor, net_p) - names(edge_attr(net_comb))[1] <- "correlation" - names(edge_attr(net_comb))[2] <- "p" - # #net_plot <- plot(net_type, edge.width = 5, vertex.label.cex = 0.5, edge.color = "grey") - q <- names(x[i]) - #E(net_cor)$`correlation_tst` <- adjacency_list[,"pearson"] - - # rownames(cor_list[[1]]) - write_graph(net_comb, file = sprintf('statistical.%s.gml', q), format = c("gml")) - - } - - - net <- graph_from_data_frame(adjacency_list, directed = TRUE, vertices = NULL) - E(net)$sourceID <- as.character(adjacency_list$`Feature1`) - E(net)$targetID <- as.character(adjacency_list$`Feature2`) - write_graph(net, file = "statistical.gml", format = c("gml")) - - - } - - else if ("combine" %in% from) { - if ("pearson" %in% names(x[1]) | "spearman" %in% names(x[1]) ){ - net <- graph_from_adjacency_matrix(x[[1]], mode = "undirected") - E(net)$sourceID <- as.character(adjacency_list$`Var1`) - E(net)$targetID <- as.character(adjacency_list$`Var2`) - E(net)$massdifference <- adjacency_list$`value` - E(net)$correlation <- adjacency_list$`Correlation Value` - } - else { #if "combined" or other model - net <- - graph_from_adjacency_matrix(x[[1]], mode = "undirected", weighted = T) - - } - write_graph(net, "combined.gml", format = c("gml")) - - - } + ## create the first entry of the list + ## sum the matrices structural and statistical, if the value is above + ## threshold then assign 1, otherwise 0 + cons_num <- structural[[1]] + statistical[[model]] + cons_num <- ifelse(cons_num > threshold, 1, 0) + + ## create the second entry of the list + ## if element in cons_num is equal to 1, take the element in structural[[2]] + ## (the type of link), otherwise "" + cons_char <- ifelse(cons_num == 1, structural[[2]], "") + + ## assign to list + res[[1]] <- cons_num + res[[2]] <- cons_char + + return(res) } -#' adjacency_list is a function that creates a list of an adjacency matrix x -#' from: originated from which function, possiblie attributes are "structural", "statistical", "combine" + +#' @name adjacency_list +#' +#' @aliases adjacency_list +#' +#' @title Create a list from adjacency matrices +#' +#' @description +#' The function `adjacency_list` creates a dataframe from the adjacency matrix. +#' Unique values are displayed by the dataframe. Missing values and the upper triangle of +#' the adjacency matrix are left out. +#' The function returns a dataframe containing different column outputs depending +#' on the source of the adjacency matrix, which is defined by `from`. +#' +#' @param x +#' `data.frame`, adjacency matrix, that has previously been generated +#' by the functions `structural`, `statistical`, or `combine` +#' +#' @param from +#' `character` defines the function from which the adjacency matrix originated. +#' This can be either "structural", "statistical", or "combine". +#' +#' @details +#' The function `adjacency_list` includes functionality to generate a dataframe from +#' adjacency matrices. The output depends on the original function from which the adjacency +#' matrix was generated. +#' +#' @return `data.frame` containing the listed values each pair of features from +#' adjacency matrix `x` specified by `from`. +#' +#' @author Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} +#' +#' @examples +#' data("x_test", package = "MetNet") +#' x <- x_test[1:10, 3:ncol(x_test)] +#' x <- as.matrix(x) +#' stat_adj <- statistical(x = x, model = c("pearson", "spearman")) +#' adjacency_list(x = stat_adj, from = "statistical") +#' +#' @export adjacency_list <- function(x, from){ if (!(all(from %in% c("structural", "statistical", "combine")))) @@ -242,31 +153,46 @@ adjacency_list <- function(x, from){ if ("structural" %in% from) { + # Checks if upper triangle of the adjacency matrix is the same as the lower one + # If yes, the upper triangle will be cleared + if (all(x[[2]][upper.tri(x[[2]])] == x[[2]][t(lower.tri(x[[2]]))]) == TRUE) { x[[2]][upper.tri(x[[2]])] <- '' x[[3]][upper.tri(x[[3]])] <- '' - - list_type <- melt(x[[2]]) %>% filter(Var1 != Var2) %>% filter(value != '') - list_mass <- melt(x[[3]]) %>% filter(Var1 != Var2) %>% filter(value != '') - combine <- add_column(list_type, `mass difference`= list_mass$value) %>% as.data.frame() + } + + + list_type <- reshape2::melt(x[[2]]) %>% filter(Var1 != Var2) %>% filter(value != '') + list_mass <- reshape2::melt(x[[3]]) %>% filter(Var1 != Var2) %>% filter(value != '') + combine <- merge(list_type, list_mass, by = c("Var1", "Var2")) + colnames(combine) <- c("Var1", "Var2", "value", "mass-difference") return(combine) + } + else if ("statistical" %in% from) { for (i in seq_along(x)) { if (i == 1) { + if (all(x[[i]][upper.tri(x[[i]])] == x[[i]][t(lower.tri(x[[i]]))]) == TRUE) { + x[[i]][upper.tri(x[[i]])] <- '' - list_corr <- melt(x[[i]]) %>% filter(Var1 != Var2) %>% filter(value != '') %>% + } + + list_corr <- reshape2::melt(x[[i]]) %>% filter(Var1 != Var2) %>% filter(value != '') %>% select(Var1, Var2, value) - colnames(list_corr) <- c("Feature1", "Feature2", names(x[i])) - #return(list_corr) + colnames(list_corr) <- c("Var1", "Var2", names(x[i])) } if (i != 1){ model = names(x[i]) - x[[i]][upper.tri(x[[i]])] <- '' - list_corr2 <- melt(x[[i]]) %>% filter(Var1 != Var2) %>% filter(value != '') - list_comb <- add_column(list_corr, list_corr2$value) - list_comb <- as.data.frame(list_comb) + + if (all(x[[i]][upper.tri(x[[i]])] == x[[i]][t(lower.tri(x[[i]]))]) == TRUE) { + + x[[i]][upper.tri(x[[i]])] <- '' + } + + list_corr2 <- reshape2::melt(x[[i]]) %>% filter(Var1 != Var2) %>% filter(value != '') + list_comb <- merge(list_corr, list_corr2, by = c("Var1", "Var2") ) colnames(list_comb)[i+2] <- c(names(x[i])) list_corr <- list_comb } @@ -274,25 +200,73 @@ adjacency_list <- function(x, from){ return(list_corr) } else if ("combine" %in% from){ + if (all(x[[2]][upper.tri(x[[2]])] == x[[2]][t(lower.tri(x[[2]]))]) == TRUE) { + x[[2]][upper.tri(x[[2]])] <- '' - x[[3]][upper.tri(x[[3]])] <- '' - x[[4]][upper.tri(x[[4]])] <- '' + } + - list_mass <- melt(x[[2]]) %>% filter(Var1 != Var2) %>% filter(value != '') - list_corr <- melt(x[[3]]) %>% filter(Var1 != Var2) %>% filter(value != '') - list_p <- melt(x[[4]]) %>% filter(Var1 != Var2) %>% filter(value != '') - listed <- add_column(list_mass, `Correlation Value` = list_corr$value) - listed <- add_column(listed, `p-Value` = list_p$value) - return(listed) + list_mass <- reshape2::melt(x[[2]]) %>% filter(Var1 != Var2) %>% filter(value != '') + colnames(list_mass) <- c("Var1", "Var2", "value") + + return(list_mass) } } -#' sum_mass summarises the adjacency list containing mass difference values, -#' i.e. either adjacency list from structural or combine may be used -sum_mass <- function(adjacency_list){ - + +#' @name summary_mz +#' +#' @aliases summary_mz +#' +#' @title Create a summary from adjacency list containing mass-differences with possible filter +#' +#' @description +#' The function `summary_mz` creates a summary from the adjacency list. +#' Individual mass differences are count over all features. The input may be +#' adjacency lists originating from the function `structural`, +#' or `combine`. The parameter `filtered` will define if data will be +#' filtered above a certain threshold or not. +#' +#' @param adjacency_list +#' `data.frame`, a list of the mass-difference adjacency mateix, that has previously +#' been generated by the function `adjacency_list(x, from = "statistical")`or +#' `adjacency_list(x, from = "combine")` +#' +#' @param filter +#' `number`/`FALSE`, leave empty or set to `FALSE` if unfiltered data are +#' required. Select a `number` as a threshold where mz differences were filtered. +#' May be usefull at visualization (plotting) for big data. +#' +#' +#' @details +#' Summarises the adjacency list containing mass difference values, +#' i.e. either adjacency list from structural or combine may be used. +#' Also the plots will be displayed. +#' The default is filter = F, so the unfiltered summary will be returned. +#' If filter is set to a `number`, e.g. 1000 only mz differences above +#' this threshold will be displayed. +#' The function can be applied for adjacency lists from `structural` and `combine` +#' +#' @return +#' `data.frame` containing the numbers of present mz differences and +#' corresponding name. Also a plot will be displayed. +#' +#' @author Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} +#' +#' @examples +#' data("x_test", package = "MetNet") +#' x <- x_test[1:10, 3:ncol(x_test)] +#' x <- as.matrix(x) +#' stat_adj <- statistical(x = x, model = c("pearson", "spearman")) +#' adj_l <- adjacency_list(x = stat_adj, from = "statistical") +#' summary_mz(adjacency_list = adj_l) +#' summary_mz(adjacency_list = adj_l, filter = 100) # filtered +#' +#' +#'@export +summary_mz <- function(adjacency_list, filter = F, ...){ if("mass difference" %in% names(adjacency_list)){ sum_mass <- adjacency_list %>% group_by(`mass difference`) %>% summarise(count=n()) %>% as.data.frame() @@ -307,10 +281,98 @@ sum_mass <- function(adjacency_list){ } - plot_list <- ggplot(sum_comb, aes(x=Type, y=Counts, fill=Type)) + geom_bar(stat = "identity") + theme_minimal() + - labs(title = "Numbers of destinct type of a biochemical reaction")+ scale_fill_brewer(palette = "Blues") + theme(legend.position = "right") - plot(plot_list) - return(sum_comb) + if (filter == F) { + plot_list <- ggplot(sum_comb, aes(x=Type, y=Counts)) + + geom_bar(stat = "identity") + + theme_minimal() + + coord_flip() + + labs(title = "Numbers of determined mass differences") + + plot(plot_list) + + return(sum_comb) + } + + else if (filter != F) { + sum_f <- filter(sum_comb,sum_comb$Counts >= filter) + plot_list_f <- ggplot(sum_f, aes(x=Type, y=Counts)) + + geom_bar(stat = "identity") + + theme_minimal() + + coord_flip() + + labs(title = "Numbers of determined mass differences", + subtitle = "(filtered)") + + plot(plot_list_f) + + return(sum_f) + + } + + } + + + + + +#' @name annotaionNames +#' +#' @aliases annotaionNames +#' +#' @title Add annotations to adjacency list +#' +#' @description +#' The function `annotationNames` adds available annotation values for all features +#' to the adjacency list. +#' +#' @param +#' `list` is the adjacency list, and `names` is a dataframe containing the feature names as rows, +#' mz values, RT values and annotations in columns +#' +#' +#' @details +#' annotaionNames adds annotation to an adjacency list as additional column. +#' +#' @return `data.frame` containing the the `list`-values and additional column with annotations +#' +#' @author Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} +#' +#' @examples +#' data("x_test", package = "MetNet") +#' x <- x_test[1:10, 3:ncol(x_test)] +#' x <- as.matrix(x) +#' stat_adj <- statistical(x = x, model = c("pearson", "spearman")) +#' adj_l <- adjacency_list(x = stat_adj, from = "statistical") +#' annotationNames(list = adj_l, names = annotationFile) +#' +#' @export +annotaionNames <- function (list, names) { + Var1_names <- c() + + for (i in 1:length(list$Var1)) { + + + value <- names[row.names(names) == list$Var1[i], "manualAnnotation"] + value <- as.character(value) + Var1_names <- c(Var1_names, value) + + } + Var2_names <- c() + + for (i in 1:length(list$Var2)) { + + + value <- names[row.names(names) == list$Var2[i], "manualAnnotation"] + value <- as.character(value) + Var2_names <- c(Var2_names, value) + + } + + + list$Var1_annotation <- Var1_names + list$Var2_annotation <- Var2_names + + return(list) +} \ No newline at end of file diff --git a/R/statistical.R b/R/statistical.R index 641b87a..afcc464 100644 --- a/R/statistical.R +++ b/R/statistical.R @@ -355,15 +355,23 @@ correlation <- function(x, type = "pearson", use = "pairwise.complete.obs") { #' correlation_p is an additional function (complementary to correlation()), and needed for positive/negative pearson/spearman #' correlation value and p-value calculation +#' @details dependencies: Hmisc package correlation_p <- function(x, type = "pearson", use = "pairwise.complete.obs") { ## for pearson/spearman correlation - if (type %in% c("pearson", "spearman")) { - cor_list <- rcorr(x = t(x), type = type) + if (type %in% c("spearman")) { + cor_list <- Hmisc::rcorr(x = t(x), type = type) + names(cor_list) <- c("spearman_corr", "spearman_n", "spearman_p") + + } + ## for pearson/spearman correlation + if (type %in% c("pearson")) { + cor_list <- Hmisc::rcorr(x = t(x), type = type) + names(cor_list) <- c("pearson_corr", "pearson_n", "pearson_p") + } - names(cor_list) <- c("Correlation Value", "n", "p-Value") ## exclude "n" column cor_list <- cor_list[-2] @@ -531,6 +539,10 @@ addToList <- function(l, name, object) { #' semipartial), Spearman correlation (also partial and semipartial) #' and score-based structure learning (Bayes). The function returns a #' list of adjacency matrices that are defined by `model`. +#' If `p` = TRUE and `model` == "pearson" or "spearman", than the output +#' contains pearson/spearman positive and negative correlation values and +#' an additional list entry with their corresponding p-values +#' (saved as "pearson_p" or "spearman_p") #' #' @param #' x `matrix` that contains intensity values of @@ -541,6 +553,10 @@ addToList <- function(l, name, object) { #' (`"lasso"`, `"randomForest"`, `"clr"`, `"aracne"`, `"pearson"`, #' `"pearson_partial"`, `"pearson_semipartial"`, `"spearman"`, #' `"spearman_partial"`, `"spearman_semipartial"`, `"bayes"`) +#' +#' @param +#' p `logical` by default set to FALSE, if set to TRUE p-values will +#' be calculated, works only for `model` "pearson" or "spearman" #' #' @param #' ... parameters passed to the functions `lasso`, `randomForest`, @@ -567,9 +583,10 @@ addToList <- function(l, name, object) { #' will be used in `lasso`, `clr` and/or `aracne`. #' #' @return `list` containing the respective adjacency matrices specified by -#' `model` +#' `model` and `p` #' #' @author Thomas Naake, \email{thomasnaake@@googlemail.com} +#' Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} #' #' @examples #' data("x_test", package = "MetNet") @@ -578,10 +595,6 @@ addToList <- function(l, name, object) { #' statistical(x = x, model = c("pearson", "spearman")) #' #' @export -#'Changes to MetNet: -#' additional attribute in funtion: p; default is FALSE (so works like MetNet), if p is TRUE and model is spearman/pearson -#' than the output will contain lists of pearson/spearman containing the corresponding correlation values (INCLUDING positive -#' and negative values) and p-values statistical <- function(x, model, p = FALSE, ...) { ## check if model complies with the implemented model and return error @@ -594,6 +607,14 @@ statistical <- function(x, model, p = FALSE, ...) { ## check if x is numeric matrix and return error if not so if (mode(x) != "numeric") stop("x is not a numerical matrix") + if (TRUE %in% p){ + #if (model != "spearman" || model != "pearson") + # stop("'model' not implemented in statistical(p = TRUE)") + + if (!(all(model %in% c("pearson", "spearman")))) + stop("'model' not implemented in statistical") + } + ## z-scale x and transpose x_z <- apply(x, 1, function(y) { (y - mean(y, na.rm = TRUE)) / sd(y, na.rm = TRUE) @@ -702,7 +723,8 @@ statistical <- function(x, model, p = FALSE, ...) { #diag(pearson) <- NaN diag(pearson[[1]]) <- NaN diag(pearson[[2]]) <- NaN - l <- addToList(l, "pearson", pearson) + l <- addToList(l, object = pearson[[1]], name = "pearson") + l <- addToList(l, object = pearson[[2]], name = "pearson_p") print("pearson finished.") } @@ -713,7 +735,9 @@ statistical <- function(x, model, p = FALSE, ...) { #diag(spearman) <- NaN diag(spearman[[1]]) <- NaN diag(spearman[[2]]) <- NaN - l <- addToList(l, "spearman", spearman) + + l <- addToList(l, object = spearman[[1]], name = "spearman") + l <- addToList(l, object = spearman[[2]], name = "spearman_p") print("spearman finished.") } @@ -850,11 +874,14 @@ getLinks <- function(mat, exclude = "== 1") { #' `statistical` is taken. #' For `type = "mean"`, the average rank in `statistical` is taken. #' Subsequently the first `n` unique ranks are returned. +#' #' #' @return `matrix`, binary adjacency matrix given the links supported by the #' `type` and the `args` #' -#' @author Thomas Naake, \email{thomasnaake@@googlemail.com} +#' @author Thomas Naake, \email{thomasnaake@@googlemail.com}, +#' Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} +#' #' @examples #' data("x_test", package = "MetNet") #' x <- x_test[1:10, 3:ncol(x_test)] @@ -877,211 +904,279 @@ getLinks <- function(mat, exclude = "== 1") { #' ## type = "mean" #' threshold(statistical = l, type = "mean", args = args) #' @export -#'Changes to MetNet: new attribute for type: "threshold_p" -#'A list is created instead of a single matrix as output containing 1/0 assigned values of -#'model matrices (e.g. pearson and spearman) and consensus matrix. -#' -#'If "treshold_p" is selected in 'type' all values are assigned to 1 if their p-value -#'is BELOW a defined threshold (defined in'args') -#'If "treshold" is selected in 'type' all values are assigned to 1 if their Correlation-value -#'is ABOVE a defined threshold (defined in'args') threshold <- function(statistical, type, args, values = c("all", "min", "max"), ...) { + + ## checks if p values are included in list and deletes them + if ( TRUE == any(endsWith(names(statistical), "_p"))) { + l <- statistical[!endsWith(names(statistical), "_p")] + } + + else {l <- statistical} + + + ## args, either N for tops + ## args, either N for tops + ## or a list of threshold + if (any(duplicated(names(args)))) { + stop("names(args) contain duplicated entries") + } + + if (!type %in% c("top1", "top2", "mean", "threshold")) + stop("type not in 'top1', 'top2', 'mean', 'threshold'") + + ## check args + if (type %in% c("threshold")) { + if (!(all(names(l) %in% names(args)))) { + stop("'args' does not contain entries for all 'model's in ", + "'statistical'") + } - l <- statistical - ## args, either N for tops - ## or a list of threshold - if (any(duplicated(names(args)))) { - stop("names(args) contain duplicated entries") + if (!"threshold" %in% names(args) && length(args$threshold) != 1) { + stop("'args' does not contain entry 'threshold' of length 1") } + } + + ## check match.arg for values + values <- match.arg(values) + + + if (type %in% c("top1", "top2", "mean")) { + if (!("n" %in% names(args) && length(args$n) == 1 && + is.numeric(args$n))) + stop("args does not contain the numeric entry `n` of length 1") + } + + if (type == "threshold") { + ## iterate through the list and remove the links below or above the + ## threshold and write to list + l <- lapply(seq_along(l), function(x) { + + + + ## find corresponding model in l + name_x <- names(l)[x] + + ## get corresponding threshold in args + threshold_x <- args[[names(l)[x]]] + + + ## get corresponding adjacency matrix in l + ## corresponds to MetNet function + l_x <- l[[name_x]] + ## for pearson/spearman correlation models (incl. partial and + ## semi-partial), lasso, randomForest, clr, aracne and bayes higher + ## values corresond to higher confidence + ## only assign 1 to values that are above the threshold + ifelse(abs(l_x) > threshold_x, 1, 0) - ##Changes to MetNet: new attribute for type: "threshold_p" - if (!type %in% c("top1", "top2", "mean", "threshold", "threshold_p")) - stop("type not in 'top1', 'top2', 'mean', 'threshold', 'threshold_p'") + + }) - ## check args - if (type %in% c("threshold")) { - if (!(all(names(l) %in% names(args)))) { - stop("'args' does not contain entries for all 'model's in ", - "'statistical'") - } - - if (!"threshold" %in% names(args) && length(args$threshold) != 1) { - stop("'args' does not contain entry 'threshold' of length 1") - } - } + ## allow for compatibility of arguments + ## calculate consenses from the binary matrices + cons <- threeDotsCall(sna::consensus, dat = l, ...) - ## check args - if (type %in% c("threshold")) { - if (!(all(names(l) %in% names(args)))) { - stop("'args' does not contain entries for all 'model's in ", - "'statistical'") - } + ## threshold consensus that it is a binary matrix + cons <- ifelse(cons >= args$threshold, 1, 0) + + rownames(cons) <- colnames(cons) <- colnames(l[[1]]) + + } + + else { ## if type is in "top1", "top2" or "mean" + l_df <- lapply(seq_along(l), function(x) { + + ## find corresponding model in l + name_x <- names(l)[x] + + ## get corresponding adjacency matrix in l + l_x <- l[[name_x]] + + ## take the respective minimum or maximum depending on `values`, + ## do not do anything if `values` is equal to `all` + if (values %in% c("min", "max")) { - if (!"threshold" %in% names(args) && length(args$threshold) != 1) { - stop("'args' does not contain entry 'threshold' of length 1") - } - } - ## complementary to "threshold": - if (type %in% c("threshold_p")) { - if (!(all(names(l) %in% names(args)))) { - stop("'args' does not contain entries for all 'model's in ", - "'statistical'") - } + ## get values from the lower triangle + lower_tri <- l_x[lower.tri(l_x)] + + ## get values from the upper triangle (requires transposing) + l_x_t <- t(l_x) + upper_tri <- l_x_t[lower.tri(l_x_t)] - if (!"threshold_p" %in% names(args) && length(args$threshold) != 1) { - stop("'args' does not contain entry 'threshold' of length 1") + ## get min of lower_tri and upper_tri + if (values == "min") { + values <- apply(rbind(lower_tri, upper_tri), 2, min) + } else { + values <- apply(rbind(lower_tri, upper_tri), 2, max) } - } + + ## write back to the matrix + l_x[lower.tri(l_x)] <- values + l_x <- t(l_x) + l_x[lower.tri(l_x)] <- values + } + + ## for pearson/spearman correlation (incl. partial and + ## semi-partial), lasso, randomForest, clr, aracne and bayes + ## higher values corresond to higher confidence + if (grepl(name_x, pattern = "lasso|randomForest|bayes")) { + ## set values that are equal to 0 to NaN (values that are 0) + ## do not explain the variability + res <- getLinks(l_x, exclude = "== 0") + } + if (grepl(name_x, pattern = "pearson|spearman|clr|aracne")) { + res <- getLinks(l_x, exclude = NULL) + } + + res + }) - ## check match.arg for values - values <- match.arg(values) + names(l_df) <- names(l) - if (type %in% c("top1", "top2", "mean")) { - if (!("n" %in% names(args) && length(args$n) == 1 && - is.numeric(args$n))) - stop("args does not contain the numeric entry `n` of length 1") - } + ## bind together the ranks of the models, stored in l_df + ranks <- lapply(l_df, function(x) x$rank) + ranks <- do.call("cbind", ranks) + colnames(ranks) <- names(l_df) + ## calculate the consensus information, i.e. either get the first or + ## second top rank per row or calculate the average across rows + ## depending on the type argument + cons_val <- topKnet(ranks, type) + ## bind row and col information with cons information + row_col <- l_df[[1]][, c("row", "col")] + ranks <- cbind(row_col, cons_val) + ## get the top N features + n <- args$n + top_n <- sort(unique(cons_val))[1:n] + ranks_top <- ranks[cons_val %in% top_n, ] - if (type == "threshold" || type == "threshold_p") { - ## iterate through the list and remove the links below or above the - ## threshold and write to list - l <- lapply(seq_along(l), function(x) { - - ## find corresponding model in l - name_x <- names(l)[x] - - ## get corresponding threshold in args - threshold_x <- args[[names(l)[x]]] - - ## Changed to MetNet - if ("threshold" %in% type) { - - if("Correlation Value" %in% names(l[[name_x]][1])) { - ## get corresponding adjacency matrix of Correlation Values in l - ## is used when statistical was calculated with p=TRUE - l_x <- l[[name_x]]$`Correlation Value` - - ## only assign 1 to values that are above the threshold - ifelse(l_x > threshold_x, 1, 0) - } - else{ - ## get corresponding adjacency matrix in l - ## corresponds to MetNet function - l_x <- l[[name_x]] - ## for pearson/spearman correlation models (incl. partial and - ## semi-partial), lasso, randomForest, clr, aracne and bayes higher - ## values corresond to higher confidence - ## only assign 1 to values that are above the threshold - ifelse(l_x > threshold_x, 1, 0) - } - } - else if("threshold_p" %in% type){ - ## get corresponding adjacency matrix of p-Values in l - ## is used when statistical was calculated with p=TRUE - l_x <- l[[name_x]]$`p-Value` - ## only assign 1 to values that are below the threshold - ifelse(l_x < threshold_x, 1, 0) - } - - }) - - ## allow for compatibility of arguments - ## calculate consenses from the binary matrices - cons <- threeDotsCall(sna::consensus, dat = l, ...) - - ## threshold consensus that it is a binary matrix - cons <- ifelse(cons >= args$threshold, 1, 0) - - rownames(cons) <- colnames(cons) <- colnames(l[[1]]) - } - else { ## if type is in "top1", "top2" or "mean" - l_df <- lapply(seq_along(l), function(x) { - - ## find corresponding model in l - name_x <- names(l)[x] - - ## get corresponding adjacency matrix in l - l_x <- l[[name_x]] - - ## take the respective minimum or maximum depending on `values`, - ## do not do anything if `values` is equal to `all` - if (values %in% c("min", "max")) { - - ## get values from the lower triangle - lower_tri <- l_x[lower.tri(l_x)] - - ## get values from the upper triangle (requires transposing) - l_x_t <- t(l_x) - upper_tri <- l_x_t[lower.tri(l_x_t)] - - ## get min of lower_tri and upper_tri - if (values == "min") { - values <- apply(rbind(lower_tri, upper_tri), 2, min) - } else { - values <- apply(rbind(lower_tri, upper_tri), 2, max) - } - - ## write back to the matrix - l_x[lower.tri(l_x)] <- values - l_x <- t(l_x) - l_x[lower.tri(l_x)] <- values - } - - ## for pearson/spearman correlation (incl. partial and - ## semi-partial), lasso, randomForest, clr, aracne and bayes - ## higher values corresond to higher confidence - if (grepl(name_x, pattern = "lasso|randomForest|bayes")) { - ## set values that are equal to 0 to NaN (values that are 0) - ## do not explain the variability - res <- getLinks(l_x, exclude = "== 0") - } - if (grepl(name_x, pattern = "pearson|spearman|clr|aracne")) { - res <- getLinks(l_x, exclude = NULL) - } - - res - }) - - names(l_df) <- names(l) - - ## bind together the ranks of the models, stored in l_df - ranks <- lapply(l_df, function(x) x$rank) - ranks <- do.call("cbind", ranks) - colnames(ranks) <- names(l_df) - - ## calculate the consensus information, i.e. either get the first or - ## second top rank per row or calculate the average across rows - ## depending on the type argument - cons_val <- MetNet:::topKnet(ranks, type) - - ## bind row and col information with cons information - row_col <- l_df[[1]][, c("row", "col")] - ranks <- cbind(row_col, cons_val) - - ## get the top N features - n <- args$n - top_n <- sort(unique(cons_val))[1:n] - ranks_top <- ranks[cons_val %in% top_n, ] - - ## write links in ranks_top to binary adjacency matrix cons - cons <- matrix(0, nrow = ncol(l[[1]]), ncol = ncol(l[[1]])) - rownames(cons) <- colnames(cons) <- colnames(l[[1]]) - cons[as.numeric(rownames(ranks_top))] <- 1 - - } + ## write links in ranks_top to binary adjacency matrix cons + cons <- matrix(0, nrow = ncol(l[[1]]), ncol = ncol(l[[1]])) + rownames(cons) <- colnames(cons) <- colnames(l[[1]]) + cons[as.numeric(rownames(ranks_top))] <- 1 - ## Changes to MetNet: A list is created as output containing 1/0 assigned values of - ## model matrices (e.g. pearson and spearman) and consensus matrix - names(l) <- names(statistical) - l[["Consensus"]] <- cons - class(l[[3]]) <- "numeric" - return(l) + } + + names(l) <- names(statistical[!endsWith(names(statistical), "_p")]) + l[["Consensus"]] <- cons + #class(l[[3]]) <- "numeric" + return(l) + } + +#' @name threshold_p +#' +#' @aliases threshold_p +#' +#' @title Threshold the statistical adjacency matrices (using p-values) +#' +#' @description +#' The function `threshold_p` takes as input a list of adjacency matrices +#' as returned from the function `statistical`. 'threshold_p` will identify the +#' strongest link that are lower then a certain p-value threshold +#' +#' @param statistical `list` containing adjacency matrices that contains p-values +#' +#' @param args `list` of arguments, has to contain thresholds for weighted +#' adjacency matrices depending on the statistical model +#' (a named list, where names are identical to `model`s in `statistical`) +#' For example "pearson_p" is the corresponding name for pearson p-values +#' +#' @param ... parameters passed to the function `consensus` in the +#' `sna` package +#' +#' @details +#' `args` has to contain numeric vector of +#' length 1 with names equal to `names(statistical)` for each `model` +#' (`names(statistical)`) and the entry `threshold`, a numerical `vector(1)` +#' to threshold the consensus +#' matrix after using the `consensus` function from the `sna` package. +#' +#' +#' When combining the adjacency matrices the `threshold` value defines if an +#' edge is reported or not. The value should be carefully defined by the user. +#' +#' +#' @return `matrix`, binary adjacency matrix given the links supported by the `args` +#' +#' @author Liesa Salzer, \email{liesa.salzer@@helmholtz-muenchen.de} +#' +#' @examples +#' data("x_test", package = "MetNet") +#' x <- x_test[1:10, 3:ncol(x_test)] +#' x <- as.matrix(x) +#' model <- c("pearson", "spearman") +#' l <- statistical(x, model = model, p = TRUE) +#' +#' args <- list("pearson_p" = 0.05, "spearman" = 0.05, threshold = 1) +#' threshold_p(statistical = l, type = "threshold", args = args) +#' +#' +#' @export +threshold_p <- function(statistical, args, ...) { + + ## checks if p values are included + if ( TRUE != any(endsWith(names(statistical), "_p"))) { + stop("p-values missing") + } + + else { l <- statistical[endsWith(names(statistical), "_p")] +} + if ( TRUE != any(endsWith(names(args), "_p"))) { + stop("Please use `_p` arguments") + } + + ## or a list of threshold + if (any(duplicated(names(args)))) { + stop("names(args) contain duplicated entries") + } + + + ## iterate through the list and remove the links below or above the + ## threshold and write to list + l <- lapply(seq_along(l), function(x) { + + + + ## find corresponding model in l + name_x <- names(l)[x] + + ## get corresponding threshold in args + threshold_x <- args[[name_x]] + + + ## get corresponding adjacency matrix in l + ## corresponds to MetNet function + l_x <- l[[name_x]] + ## for pearson/spearman correlation models (incl. partial and + ## semi-partial), lasso, randomForest, clr, aracne and bayes higher + ## values corresond to higher confidence + ## only assign 1 to values that are above the threshold + ifelse(l_x < threshold_x, 1, 0) + + + }) + ## allow for compatibility of arguments + ## calculate consenses from the binary matrices + cons <- threeDotsCall(sna::consensus, dat = l, ...) + + ## threshold consensus that it is a binary matrix + cons <- ifelse(cons >= args$threshold, 1, 0) + + rownames(cons) <- colnames(cons) <- colnames(l[[1]]) + + + + names(l) <- names(statistical[!endsWith(names(statistical), "_p")]) + l[["Consensus"]] <- cons + #class(l[[3]]) <- "numeric" + return(l) + +} #' @name topKnet #' @aliases topKnet #' @title Return consensus ranks from a matrix containing ranks @@ -1215,4 +1310,4 @@ threeDotsCall <- function(fun, ...) { ## call the function res <- do.call(fun, input) return(res) -} \ No newline at end of file +} diff --git a/R/structural.R b/R/structural.R index e4e939a..b4f0d1c 100644 --- a/R/structural.R +++ b/R/structural.R @@ -11,6 +11,8 @@ #' loss/addition of functional groups. `structural` returns #' the unweighted `numeric` `matrix` together with a `character` `matrix` with #' the type of loss/addition as a list at the specific positions. +#' Changes to MetNet: +#' structural() has additional list entry of matrix containing mass values of respective matches #' #' @param #' x `matrix`, where columns are the samples and the rows are features @@ -62,9 +64,6 @@ #' struct_adj <- structural(x_test, transformation, ppm = 5, directed = TRUE) #' #' @export -#' #' Changes to MetNet: -#' structural() has additional list entry of matrix containing mass values of respective matches -#' structural <- function(x, transformation, ppm = 5, directed = FALSE) { if (!is.data.frame(transformation)) @@ -224,7 +223,7 @@ rtCorrection <- function(structural, x, transformation) { ## check arguments if (!is.list(structural)) stop("structural is not a list") - if (!length(structural) == 2) stop("structural is not a list of length 2") + #if (!length(structural) == 2) stop("structural is not a list of length 2") ## allocate structural[[1]] and structural[[2]] to adj and group adj <- structural[[1]] group <- structural[[2]] diff --git a/data/.peaklist_example.RData.icloud b/data/.peaklist_example.RData.icloud new file mode 100644 index 0000000..5bee4e4 Binary files /dev/null and b/data/.peaklist_example.RData.icloud differ diff --git a/data/peaklist_example.RData b/data/peaklist_example.RData deleted file mode 100644 index 157fade..0000000 Binary files a/data/peaklist_example.RData and /dev/null differ diff --git a/man/addToList.Rd b/man/addToList.Rd index ee4334c..f373045 100644 --- a/man/addToList.Rd +++ b/man/addToList.Rd @@ -33,6 +33,7 @@ cor_pearson <- correlation(x, type = "pearson") cor_spearman <- correlation(x, type = "spearman") l <- list(pearson = cor_pearson) MetNet:::addToList(l, "spearman", cor_spearman) +Changes to MetNet: Not tested anymore if object is a matrix (since object is a list) } \author{ Thomas Naake, \email{thomasnaake@googlemail.com} diff --git a/man/adjacency_list.Rd b/man/adjacency_list.Rd new file mode 100644 index 0000000..87de249 --- /dev/null +++ b/man/adjacency_list.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine.R +\name{adjacency_list} +\alias{adjacency_list} +\title{Create a list from adjacency matrices} +\usage{ +adjacency_list(x, from) +} +\arguments{ +\item{x}{`data.frame`, adjacency matrix, that has previously been generated +by the functions `structural`, `statistical`, or `combine`} + +\item{from}{`character` defines the function from which the adjacency matrix originated. +This can be either "structural", "statistical", or "combine".} +} +\value{ +`data.frame` containing the listed values each pair of features from +adjacency matrix `x` specified by `from`. +} +\description{ +The function `adjacency_list` creates a dataframe from the adjacency matrix. +Unique values are displayed by the dataframe. Missing values and the upper triangle of +the adjacency matrix are left out. +The function returns a dataframe containing different column outputs depending +on the source of the adjacency matrix, which is defined by `from`. +} +\details{ +The function `adjacency_list` includes functionality to generate a dataframe from +adjacency matrices. The output depends on the original function from which the adjacency +matrix was generated. +} +\examples{ +data("x_test", package = "MetNet") +x <- x_test[1:10, 3:ncol(x_test)] +x <- as.matrix(x) +stat_adj <- statistical(x = x, model = c("pearson", "spearman")) +adjacency_list(x = stat_adj, from = "statistical") + +} +\author{ +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} +} diff --git a/man/annotaionNames.Rd b/man/annotaionNames.Rd new file mode 100644 index 0000000..b3adedc --- /dev/null +++ b/man/annotaionNames.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine.R +\name{annotaionNames} +\alias{annotaionNames} +\title{Add annotations to adjacency list} +\usage{ +annotaionNames(list, names) +} +\arguments{ +\item{`list`}{is the adjacency list, and `names` is a dataframe containing the feature names as rows, +mz values, RT values and annotations in columns} +} +\value{ +`data.frame` containing the the `list`-values and additional column with annotations +} +\description{ +The function `annotationNames` adds available annotation values for all features +to the adjacency list. +} +\details{ +annotaionNames adds annotation to an adjacency list as additional column. +} +\examples{ +data("x_test", package = "MetNet") +x <- x_test[1:10, 3:ncol(x_test)] +x <- as.matrix(x) +stat_adj <- statistical(x = x, model = c("pearson", "spearman")) +adj_l <- adjacency_list(x = stat_adj, from = "statistical") +annotationNames(list = adj_l, names = annotationFile) + +} +\author{ +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} +} diff --git a/man/combine.Rd b/man/combine.Rd index d7baade..075ce7a 100644 --- a/man/combine.Rd +++ b/man/combine.Rd @@ -4,7 +4,7 @@ \alias{combine} \title{Combine structural and statistical adjacency matrix} \usage{ -combine(structural, statistical, threshold = 1) +combine(structural, statistical, threshold = 1, model = "Consensus") } \arguments{ \item{structural}{list containing `numeric` structural adjacency matrix in @@ -15,6 +15,11 @@ entry} \item{threshold}{numeric, threshold value to be applied to define a connection as present} + +\item{model}{character, defines model that is used for building the consensus +matrix. The model is part of `statistical`. The default is "Consensus" which creates +the consensus matrix by using a combination of models present in `statistical`. +Besides, "pearson" and "spearman" models were implemented.} } \value{ `list`, in the first entry `matrix` of type `numeric`containing the @@ -55,4 +60,5 @@ combine(struct_adj, stat_adj) } \author{ Thomas Naake, \email{thomasnaake@googlemail.com} +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} } diff --git a/man/correlation_p.Rd b/man/correlation_p.Rd new file mode 100644 index 0000000..1adc38d --- /dev/null +++ b/man/correlation_p.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statistical.R +\name{correlation_p} +\alias{correlation_p} +\title{correlation_p is an additional function (complementary to correlation()), and needed for positive/negative pearson/spearman +correlation value and p-value calculation} +\usage{ +correlation_p(x, type = "pearson", use = "pairwise.complete.obs") +} +\description{ +correlation_p is an additional function (complementary to correlation()), and needed for positive/negative pearson/spearman +correlation value and p-value calculation +} +\details{ +dependencies: Hmisc package +} diff --git a/man/mat_test.Rd b/man/mat_test.Rd index 05993b6..410ae92 100644 --- a/man/mat_test.Rd +++ b/man/mat_test.Rd @@ -4,7 +4,9 @@ \name{mat_test} \alias{mat_test} \title{Example data for \code{MetNet}: unit tests} -\format{\code{matrix}} +\format{ +\code{matrix} +} \source{ set.seed(1) random_numbers <- rnorm(140, mean = 10, sd = 2) diff --git a/man/mat_test_z.Rd b/man/mat_test_z.Rd index 14ded0f..412caf4 100644 --- a/man/mat_test_z.Rd +++ b/man/mat_test_z.Rd @@ -4,7 +4,9 @@ \name{mat_test_z} \alias{mat_test_z} \title{Example data for \code{MetNet}: unit tests} -\format{\code{matrix}} +\format{ +\code{matrix} +} \source{ set.seed(1) random_numbers <- rnorm(140, mean = 10, sd = 2) diff --git a/man/peaklist.Rd b/man/peaklist.Rd index 0041663..1468c6a 100644 --- a/man/peaklist.Rd +++ b/man/peaklist.Rd @@ -4,7 +4,9 @@ \name{peaklist} \alias{peaklist} \title{Example data for \code{MetNet}: data input} -\format{\code{data.frame}} +\format{ +\code{data.frame} +} \source{ Internal peaklist from metabolite profiling of Nicotiana species after W+OS and MeJA treatment. The data was processed by \code{xcms} and diff --git a/man/statistical.Rd b/man/statistical.Rd index a42f877..d911df9 100644 --- a/man/statistical.Rd +++ b/man/statistical.Rd @@ -4,7 +4,7 @@ \alias{statistical} \title{Create a list of adjacency matrices from statistical methods} \usage{ -statistical(x, model, ...) +statistical(x, model, p = FALSE, ...) } \arguments{ \item{x}{`matrix` that contains intensity values of @@ -15,12 +15,15 @@ features/metabolites (rows) per sample (columns).} `"pearson_partial"`, `"pearson_semipartial"`, `"spearman"`, `"spearman_partial"`, `"spearman_semipartial"`, `"bayes"`)} +\item{p}{`logical` by default set to FALSE, if set to TRUE p-values will +be calculated, works only for `model` "pearson" or "spearman"} + \item{...}{parameters passed to the functions `lasso`, `randomForest`, `clr`, `aracne`, `correlation` and/or `bayes`} } \value{ `list` containing the respective adjacency matrices specified by -`model` +`model` and `p` } \description{ The function `statitical` infers adjacency matrix topologies from @@ -32,6 +35,10 @@ cellular networks (ARACNE), Pearson correlation (also partial and semipartial), Spearman correlation (also partial and semipartial) and score-based structure learning (Bayes). The function returns a list of adjacency matrices that are defined by `model`. +If `p` = TRUE and `model` == "pearson" or "spearman", than the output +contains pearson/spearman positive and negative correlation values and +an additional list entry with their corresponding p-values +(saved as "pearson_p" or "spearman_p") } \details{ The function `statistical` includes functionality to calculate adjacency @@ -62,4 +69,5 @@ statistical(x = x, model = c("pearson", "spearman")) } \author{ Thomas Naake, \email{thomasnaake@googlemail.com} +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} } diff --git a/man/structural.Rd b/man/structural.Rd index abd4851..2e222d5 100644 --- a/man/structural.Rd +++ b/man/structural.Rd @@ -37,6 +37,8 @@ adjacency matrix using differences in m/z values that are matched against a loss/addition of functional groups. `structural` returns the unweighted `numeric` `matrix` together with a `character` `matrix` with the type of loss/addition as a list at the specific positions. +Changes to MetNet: +structural() has additional list entry of matrix containing mass values of respective matches } \details{ `structural` accesses the column `"mz"` of diff --git a/man/summary_mz.Rd b/man/summary_mz.Rd new file mode 100644 index 0000000..cdd8d7e --- /dev/null +++ b/man/summary_mz.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine.R +\name{summary_mz} +\alias{summary_mz} +\title{Create a summary from adjacency list containing mass-differences with possible filter} +\usage{ +summary_mz(adjacency_list, filter = F, ...) +} +\arguments{ +\item{adjacency_list}{`data.frame`, a list of the mass-difference adjacency mateix, that has previously +been generated by the function `adjacency_list(x, from = "statistical")`or +`adjacency_list(x, from = "combine")`} + +\item{filter}{`number`/`FALSE`, leave empty or set to `FALSE` if unfiltered data are +required. Select a `number` as a threshold where mz differences were filtered. +May be usefull at visualization (plotting) for big data.} +} +\value{ +`data.frame` containing the numbers of present mz differences and +corresponding name. Also a plot will be displayed. +} +\description{ +The function `summary_mz` creates a summary from the adjacency list. +Individual mass differences are count over all features. The input may be +adjacency lists originating from the function `structural`, +or `combine`. The parameter `filtered` will define if data will be +filtered above a certain threshold or not. +} +\details{ +Summarises the adjacency list containing mass difference values, +i.e. either adjacency list from structural or combine may be used. +Also the plots will be displayed. +The default is filter = F, so the unfiltered summary will be returned. +If filter is set to a `number`, e.g. 1000 only mz differences above +this threshold will be displayed. +The function can be applied for adjacency lists from `structural` and `combine` +} +\examples{ +data("x_test", package = "MetNet") +x <- x_test[1:10, 3:ncol(x_test)] +x <- as.matrix(x) +stat_adj <- statistical(x = x, model = c("pearson", "spearman")) +adj_l <- adjacency_list(x = stat_adj, from = "statistical") +summary_mz(adjacency_list = adj_l) +summary_mz(adjacency_list = adj_l, filter = 100) # filtered + + +} +\author{ +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} +} diff --git a/man/threshold.Rd b/man/threshold.Rd index eb56e01..17656d3 100644 --- a/man/threshold.Rd +++ b/man/threshold.Rd @@ -92,5 +92,6 @@ threshold(statistical = l, type = "top2", args = args) threshold(statistical = l, type = "mean", args = args) } \author{ -Thomas Naake, \email{thomasnaake@googlemail.com} +Thomas Naake, \email{thomasnaake@googlemail.com}, +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} } diff --git a/man/threshold_p.Rd b/man/threshold_p.Rd new file mode 100644 index 0000000..6ad1f72 --- /dev/null +++ b/man/threshold_p.Rd @@ -0,0 +1,53 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/statistical.R +\name{threshold_p} +\alias{threshold_p} +\title{Threshold the statistical adjacency matrices (using p-values)} +\usage{ +threshold_p(statistical, args, ...) +} +\arguments{ +\item{statistical}{`list` containing adjacency matrices that contains p-values} + +\item{args}{`list` of arguments, has to contain thresholds for weighted +adjacency matrices depending on the statistical model +(a named list, where names are identical to `model`s in `statistical`) +For example "pearson_p" is the corresponding name for pearson p-values} + +\item{...}{parameters passed to the function `consensus` in the +`sna` package} +} +\value{ +`matrix`, binary adjacency matrix given the links supported by the `args` +} +\description{ +The function `threshold_p` takes as input a list of adjacency matrices +as returned from the function `statistical`. 'threshold_p` will identify the +strongest link that are lower then a certain p-value threshold +} +\details{ +`args` has to contain numeric vector of +length 1 with names equal to `names(statistical)` for each `model` +(`names(statistical)`) and the entry `threshold`, a numerical `vector(1)` +to threshold the consensus +matrix after using the `consensus` function from the `sna` package. + + +When combining the adjacency matrices the `threshold` value defines if an +edge is reported or not. The value should be carefully defined by the user. +} +\examples{ +data("x_test", package = "MetNet") +x <- x_test[1:10, 3:ncol(x_test)] +x <- as.matrix(x) +model <- c("pearson", "spearman") +l <- statistical(x, model = model, p = TRUE) + +args <- list("pearson_p" = 0.05, "spearman" = 0.05, threshold = 1) +threshold_p(statistical = l, type = "threshold", args = args) + + +} +\author{ +Liesa Salzer, \email{liesa.salzer@helmholtz-muenchen.de} +} diff --git a/man/x_test.Rd b/man/x_test.Rd index 63e27df..c4de6d1 100644 --- a/man/x_test.Rd +++ b/man/x_test.Rd @@ -4,7 +4,9 @@ \name{x_test} \alias{x_test} \title{Example data for \code{MetNet}: data input} -\format{\code{matrix}} +\format{ +\code{matrix} +} \source{ data("peaklist_example", package = "MetNet") peaklist[, 3:dim(peaklist)[2]] <- apply(peaklist[, 3:dim(peaklist)[2]], 2, diff --git a/vignettes/MetNet.Rmd b/vignettes/MetNet.Rmd index 9b1799d..4b9aaa2 100644 --- a/vignettes/MetNet.Rmd +++ b/vignettes/MetNet.Rmd @@ -114,8 +114,8 @@ send a mail to the developer. To install `MetNet` enter the following to the `R` console ```{r install, eval=FALSE} -install.packages("BiocManager") -BiocManager::install("MetNet") +#install.packages("BiocManager") +#BiocManager::install("MetNet") ``` Before starting with the analysis, load the `MetNet` package. This @@ -125,6 +125,7 @@ will also load the required packages `glmnet`, `stabs`, for functions in the statistical adjacency matrix inference. ```{r load_MetNet,eval=TRUE} library(MetNet) +library(tidyverse) ``` The data format that is compatible with the `MetNet` framework is @@ -238,9 +239,32 @@ from these structural information we enter ```{r structure,eval=TRUE,echo=TRUE} struct_adj <- structural(x = x_test, transformation = transformations, ppm = 10) + +struct_list <- adjacency_list(struct_adj, from = "structural") + ``` in the `R` console. +The function `adjacency_list` will then create a list from the corresponding adjacency matrix. The origin of the matrix is defined by `from`, which is in this case the previously used function `"structural"`. + +```{r,eval=TRUE,echo=TRUE} +struct_list <- adjacency_list(struct_adj, from = "structural") +``` + +Some overview on the mass-difference distribution of the data can be observed using the `summary_mz` function. For larger data-sets, also a `filter` can be applied to visualize only mass-difference above a certain threshold. + +```{r,eval=TRUE,echo=TRUE} +summary_mz(struct_list, filter = 0) +``` + +We can also add putative annotations to the adjacency list using `annotationNames`. Therefore we need to provide a data frame containing the feature names as rownames and the annotations as additional column called `"manualAnnotation"`. + +```{r,eval=TRUE,echo=TRUE} +x_names <- cbind(x_test, manualAnnotation = c("test_annotation", rep("", 35))) + +struct_annotation <- annotaionNames(list = struct_list, names = x_names) +``` + ## Refining the structural adjacency matrix (optional) {-} Depending on the chemical group added the retention time will differ depending on the chemical group added, e.g. an addition of a glycosyl group will @@ -340,13 +364,23 @@ as links in the unweighted consensus adjacency matrix. In the following example, we will create a list of unweighted adjacency matrices using Pearson and Spearman correlation using the intensity values as input data. +After that, again a list is created from the adjacency matrices. ```{r statistical,eval=TRUE,echo=TRUE} x_int <- x_test[, 3:dim(x_test)[2]] x_int <- as.matrix(x_int) stat_adj_l <- statistical(x_int, model = c("pearson", "spearman")) + +stat_adj_list <- adjacency_list(stat_adj_l, from = "statistical") ``` +For pearson and spearman correlation also negative correlations can be calculated including their corresponding p-values using the attribute `p = TRUE` in the `statistical` function. + +```{r,eval=TRUE,echo=TRUE} +stat_adj_p <- statistical(x_int, model = c("pearson", "spearman"), p = TRUE) + +stat_adj_list_p <- adjacency_list(stat_adj_l, from = "statistical") +``` `threshold` implements four types to obtain an unweighted adjacency matrix. We will create for all types the unweighted consensus adjacency matrices. @@ -357,6 +391,10 @@ args_thr <- list("pearson" = 0.95, "spearman" = 0.95, threshold = 1) stat_adj_thr <- threshold(statistical = stat_adj_l, type = "threshold", args = args_thr) +## thresholding using p-values +args_thr_p <- list("pearson_p" = 0.05, "spearman_p" = 0.05, threshold = 1) +stat_adj_thr_p <- threshold_p(statistical = stat_adj_p, args = args_thr_p) + ## type = "top1" args_top <- list(n = 40) stat_adj_top1 <- threshold(statistical = stat_adj_l, type = "top2",