Skip to content

Commit

Permalink
Adding MassBank (#77)
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed Sep 6, 2023
1 parent 2c71f31 commit 06a0ff4
Show file tree
Hide file tree
Showing 12 changed files with 240 additions and 69 deletions.
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ BugReports: https://github.com/taxonomicallyinformedannotation/tima-r/issues
Depends:
R (>= 4.3.0)
Imports:
AnnotationHub (>= 3.8.0),
crayon (>= 1.5.2),
docopt (>= 0.7.1),
dplyr (>= 1.1.3),
Expand Down Expand Up @@ -98,6 +99,7 @@ Collate:
'get_example_sirius.R'
'get_gnps_tables.R'
'get_last_version_from_zenodo.R'
'get_massbank_spectra.R'
'get_organism_taxonomy_ott.R'
'parse_yaml_paths.R'
'parse_yaml_params.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export(get_example_sirius)
export(get_file)
export(get_gnps_tables)
export(get_last_version_from_zenodo)
export(get_massbank_spectra)
export(get_organism_taxonomy_ott)
export(get_params)
export(harmonize_names_sirius)
Expand Down
11 changes: 6 additions & 5 deletions R/annotate_spectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ annotate_spectra <- function(input = params$files$spectral$raw,

params <<- parameters
if (length(library) > 1) {
library <- library[[polarity]]
library <- library[grepl(polarity, library)]
}

log_debug("Loading spectra...")
Expand All @@ -64,6 +64,7 @@ annotate_spectra <- function(input = params$files$spectral$raw,

df_empty <- data.frame(
feature_id = NA,
candidate_library = NA,
candidate_error_mz = NA,
candidate_structure_name = NA,
candidate_structure_inchikey_no_stereo = NA,
Expand Down Expand Up @@ -146,7 +147,8 @@ annotate_spectra <- function(input = params$files$spectral$raw,
lib_precursors * (1 + (10^-6 * ppm))
)

lib_id <- spectral_library@backend@spectraData$spectrum_id
lib_id <- seq_along(1:length(spectral_library))
spectral_library$spectrum_id <- lib_id
lib_spectra <- spectral_library@backend@peaksData

calculate_entropy_score <-
Expand Down Expand Up @@ -259,15 +261,15 @@ annotate_spectra <- function(input = params$files$spectral$raw,
lib_inchikey <- rep(NA_character_, length(spectral_library))
}
lib_inchikey2D <-
spectral_library@backend@spectraData$inchikey_no_stereo
spectral_library@backend@spectraData$inchikey_2D
if (is.null(lib_inchikey2D)) {
lib_inchikey2D <- rep(NA_character_, length(spectral_library))
}
lib_smiles <- spectral_library@backend@spectraData$smiles
if (is.null(lib_smiles)) {
lib_smiles <- rep(NA_character_, length(spectral_library))
}
lib_smiles2D <- spectral_library@backend@spectraData$smiles_no_stereo
lib_smiles2D <- spectral_library@backend@spectraData$smiles_2D
if (is.null(lib_smiles2D)) {
lib_smiles2D <- rep(NA_character_, length(spectral_library))
}
Expand Down Expand Up @@ -310,7 +312,6 @@ annotate_spectra <- function(input = params$files$spectral$raw,
tidytable::select(-target_id)

df_final <- df_final |>
tidytable::rowwise() |>
tidytable::mutate(
candidate_structure_error_mz = target_precursorMz - precursorMz,
candidate_structure_inchikey_no_stereo = ifelse(
Expand Down
48 changes: 48 additions & 0 deletions R/get_massbank_spectra.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#' @title Get MassBank spectra
#'
#' @description This function gets MassBank spectra
#'
#' @param output_dir Output where to store the spectra
#'
#' @return NULL
#'
#' @export
#'
#' @examples NULL
get_massbank_spectra <-
function(output_dir = "data/source/libraries/spectra/exp/") {
log_debug("Loading last AnnotationHub version ...")
ah <- AnnotationHub::AnnotationHub()

log_debug("Querying for most recent MassBank version available ...")
ahmb <- AnnotationHub::query(ah, "MassBank")
mb_last <- ahmb$ah_id |> tail(1)

export <- file.path(output_dir, paste0(mb_last, ".mgf"))

log_debug("Checking if a previous MassBank version already exists")
if (!file.exists(export)) {
log_debug("Downloading most recent MassBank version available ...")
mb_sp <- AnnotationHub::AnnotationHub()[[mb_last]] |>
Spectra::Spectra()
log_debug("Removing faulty columns")
mb_sp_2 <- mb_sp |>
Spectra::selectSpectraVariables(
Spectra::spectraVariables(mb_sp)[!grepl(
pattern = "synonym",
x = Spectra::spectraVariables(mb_sp)
)]
) |>
log_pipe("Exporting") |>
Spectra::export(
backend = MsBackendMgf::MsBackendMgf(),
file = export
)
} else {
log_debug(
"It appears you already have",
"the most recent MassBank version available!"
)
}
return(export)
}
47 changes: 30 additions & 17 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,19 @@
"version": ">= 4.3.0"
},
"2": {
"@type": "SoftwareApplication",
"identifier": "AnnotationHub",
"name": "AnnotationHub",
"version": ">= 3.8.0",
"provider": {
"@id": "https://www.bioconductor.org",
"@type": "Organization",
"name": "Bioconductor",
"url": "https://www.bioconductor.org"
},
"sameAs": "https://bioconductor.org/packages/release/bioc/html/AnnotationHub.html"
},
"3": {
"@type": "SoftwareApplication",
"identifier": "crayon",
"name": "crayon",
Expand All @@ -316,7 +329,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=crayon"
},
"3": {
"4": {
"@type": "SoftwareApplication",
"identifier": "docopt",
"name": "docopt",
Expand All @@ -329,7 +342,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=docopt"
},
"4": {
"5": {
"@type": "SoftwareApplication",
"identifier": "dplyr",
"name": "dplyr",
Expand All @@ -342,7 +355,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=dplyr"
},
"5": {
"6": {
"@type": "SoftwareApplication",
"identifier": "httr2",
"name": "httr2",
Expand All @@ -355,7 +368,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=httr2"
},
"6": {
"7": {
"@type": "SoftwareApplication",
"identifier": "igraph",
"name": "igraph",
Expand All @@ -368,7 +381,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=igraph"
},
"7": {
"8": {
"@type": "SoftwareApplication",
"identifier": "jsonlite",
"name": "jsonlite",
Expand All @@ -381,7 +394,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=jsonlite"
},
"8": {
"9": {
"@type": "SoftwareApplication",
"identifier": "MsBackendMgf",
"name": "MsBackendMgf",
Expand All @@ -394,7 +407,7 @@
},
"sameAs": "https://bioconductor.org/packages/release/bioc/html/MsBackendMgf.html"
},
"9": {
"10": {
"@type": "SoftwareApplication",
"identifier": "msentropy",
"name": "msentropy",
Expand All @@ -407,7 +420,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=msentropy"
},
"10": {
"11": {
"@type": "SoftwareApplication",
"identifier": "pbapply",
"name": "pbapply",
Expand All @@ -420,7 +433,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=pbapply"
},
"11": {
"12": {
"@type": "SoftwareApplication",
"identifier": "R.utils",
"name": "R.utils",
Expand All @@ -433,7 +446,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=R.utils"
},
"12": {
"13": {
"@type": "SoftwareApplication",
"identifier": "rotl",
"name": "rotl",
Expand All @@ -446,7 +459,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=rotl"
},
"13": {
"14": {
"@type": "SoftwareApplication",
"identifier": "Spectra",
"name": "Spectra",
Expand All @@ -459,12 +472,12 @@
},
"sameAs": "https://bioconductor.org/packages/release/bioc/html/Spectra.html"
},
"14": {
"15": {
"@type": "SoftwareApplication",
"identifier": "stats",
"name": "stats"
},
"15": {
"16": {
"@type": "SoftwareApplication",
"identifier": "stringi",
"name": "stringi",
Expand All @@ -477,7 +490,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=stringi"
},
"16": {
"17": {
"@type": "SoftwareApplication",
"identifier": "tidytable",
"name": "tidytable",
Expand All @@ -490,7 +503,7 @@
},
"sameAs": "https://CRAN.R-project.org/package=tidytable"
},
"17": {
"18": {
"@type": "SoftwareApplication",
"identifier": "tidyfst",
"name": "tidyfst",
Expand All @@ -503,12 +516,12 @@
},
"sameAs": "https://CRAN.R-project.org/package=tidyfst"
},
"18": {
"19": {
"@type": "SoftwareApplication",
"identifier": "utils",
"name": "utils"
},
"19": {
"20": {
"@type": "SoftwareApplication",
"identifier": "yaml",
"name": "yaml",
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ InChI
InChIKey
Lifecycle
MatchForwardReverseParam
MassBank
MGF
MegaLinter
MsCoreUtils
Expand Down
10 changes: 6 additions & 4 deletions inst/app/server.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,11 @@ server <- function(input, output, session) {
"benchmark_ann_pre_ms2_b_c_pos",
"benchmark_ann_pre_ms2_b_neg",
"benchmark_ann_pre_ms2_b_pos",
"benchmark_ann_spe_is_lot_neg",
"benchmark_ann_spe_is_lot_pos",
"benchmark_ann_spe_is_pre_neg",
"benchmark_ann_spe_is_pre_pos",
"benchmark_ann_sir_pre",
"benchmark_ann_spe_neg",
"benchmark_ann_spe_pos",
"benchmark_ann_spe_pre_neg",
"benchmark_ann_spe_pre_pos",
"benchmark_com_neg",
"benchmark_com_pos",
"benchmark_com_pre_neg",
Expand All @@ -132,6 +133,7 @@ server <- function(input, output, session) {
"benchmark_def_cre_edg_com",
"benchmark_def_cre_edg_spe",
"benchmark_def_fil_ann",
"benchmark_def_pre_ann_sir",
"benchmark_def_pre_ann_spe",
"benchmark_def_pre_fea_com",
"benchmark_def_pre_fea_edg",
Expand Down
6 changes: 4 additions & 2 deletions inst/params/default/annotate_spectra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ files:
exp:
#' List of negative experimental spectral libraries. STRING
neg:
- data/interim/libraries/spectra/exp/internal_neg.rds
- data/interim/libraries/spectra/exp/massbank_neg.rds
# - data/interim/libraries/spectra/exp/internal_neg.rds

#' List of positive experimental spectral libraries. STRING
pos:
- data/interim/libraries/spectra/exp/internal_pos.rds
- data/interim/libraries/spectra/exp/massbank_pos.rds
# - data/interim/libraries/spectra/exp/internal_pos.rds

#' In silico libraries
is:
Expand Down
6 changes: 4 additions & 2 deletions inst/params/default/params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,13 @@ files:
exp:
#' List of negative experimental spectral libraries. STRING
neg:
- data/interim/libraries/spectra/exp/internal_neg.rds
- data/interim/libraries/spectra/exp/massbank_neg.rds
# - data/interim/libraries/spectra/exp/internal_neg.rds

#' List of positive experimental spectral libraries. STRING
pos:
- data/interim/libraries/spectra/exp/internal_pos.rds
- data/interim/libraries/spectra/exp/massbank_pos.rds
# - data/interim/libraries/spectra/exp/internal_pos.rds

#' In silico libraries
is:
Expand Down
Loading

0 comments on commit 06a0ff4

Please sign in to comment.