Skip to content

Commit

Permalink
feat: add dataset info to output json [wip]
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov committed Nov 3, 2023
1 parent 2621a31 commit b5f4cd2
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 6 deletions.
1 change: 1 addition & 0 deletions packages_rs/nextclade-cli/src/cli/nextclade_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
} = nextclade.get_initial_data();

let mut output_writer = NextcladeOrderedWriter::new(
&inputs.dataset_info,
&nextclade.gene_map,
clade_node_attr_key_descs,
phenotype_attr_descs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use log::{info, warn};
use nextclade::alphabet::nuc::from_nuc_seq;
use nextclade::analyze::virus_properties::PhenotypeAttrDesc;
use nextclade::gene::gene_map::GeneMap;
use nextclade::io::dataset::DatasetInfoShort;
use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
use nextclade::io::ndjson::NdjsonFileWriter;
use nextclade::io::nextclade_csv::{CsvColumnConfig, NextcladeResultsCsvFileWriter};
Expand Down Expand Up @@ -35,6 +36,7 @@ pub struct NextcladeOrderedWriter {

impl NextcladeOrderedWriter {
pub fn new(
dataset_info: &DatasetInfoShort,
gene_map: &GeneMap,
clade_node_attr_key_descs: &[CladeNodeAttrKeyDesc],
phenotype_attr_key_desc: &[PhenotypeAttrDesc],
Expand All @@ -50,7 +52,12 @@ impl NextcladeOrderedWriter {
.map_ref_fallible(|output_translations| FastaPeptideWriter::new(gene_map, output_translations))?;

let output_json_writer = output_params.output_json.map_ref_fallible(|output_json| {
ResultsJsonWriter::new(output_json, clade_node_attr_key_descs, phenotype_attr_key_desc)
ResultsJsonWriter::new(
dataset_info,
output_json,
clade_node_attr_key_descs,
phenotype_attr_key_desc,
)
})?;

let output_ndjson_writer = output_params.output_ndjson.map_ref_fallible(NdjsonFileWriter::new)?;
Expand Down
4 changes: 3 additions & 1 deletion packages_rs/nextclade-cli/src/dataset/dataset_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use itertools::Itertools;
use log::LevelFilter;
use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties};
use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
use nextclade::io::dataset::{Dataset, DatasetFiles, DatasetMeta, DatasetsIndexJson};
use nextclade::io::dataset::{
Dataset, DatasetAttributeValue, DatasetAttributes, DatasetFiles, DatasetInfoShort, DatasetsIndexJson, DatasetMeta,
};
use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
use nextclade::io::file::create_file_or_stdout;
use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
Expand Down
30 changes: 30 additions & 0 deletions packages_rs/nextclade/src/io/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ pub struct Dataset {
}

impl Dataset {
pub fn short_info(&self) -> DatasetInfoShort {
DatasetInfoShort {
path: self.path.clone(),
version: self.version.clone(),
server: None,
other: serde_json::Value::default(),
}
}

pub fn name(&self) -> Option<&str> {
self.attributes.get("name").and_then(AnyType::as_str_maybe)
}
Expand Down Expand Up @@ -331,3 +340,24 @@ pub struct MinimizerIndexVersion {
#[serde(flatten)]
pub other: serde_json::Value,
}

#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DatasetInfoShort {
pub path: String,

#[serde(default, skip_serializing_if = "DatasetVersion::is_empty")]
pub version: DatasetVersion,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub server: Option<String>,

#[serde(flatten)]
pub other: serde_json::Value,
}

impl DatasetInfoShort {
pub fn from_str(s: impl AsRef<str>) -> Result<Self, Report> {
json_parse(s)
}
}
20 changes: 16 additions & 4 deletions packages_rs/nextclade/src/io/results_json.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use crate::analyze::virus_properties::PhenotypeAttrDesc;
use crate::io::dataset::DatasetInfoShort;
use crate::io::json::{json_stringify, json_write, JsonPretty};
use crate::io::ndjson::NdjsonWriter;
use crate::tree::tree::CladeNodeAttrKeyDesc;
use crate::types::outputs::{
combine_outputs_and_errors_sorted, NextcladeErrorOutputs, NextcladeOutputOrError, NextcladeOutputs,
};
use crate::utils::datetime::date_iso_now;
use crate::utils::info::this_package_version_str;
use eyre::Report;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use crate::utils::info::this_package_version_str;

#[derive(Serialize, Deserialize, schemars::JsonSchema)]
#[serde(rename_all = "camelCase")]
Expand All @@ -21,6 +22,8 @@ pub struct ResultsJson {
#[serde(skip_serializing_if = "Option::is_none")]
pub nextclade_web_version: Option<String>,

pub dataset_info: DatasetInfoShort,

pub created_at: String,

pub clade_node_attr_keys: Vec<CladeNodeAttrKeyDesc>,
Expand All @@ -33,11 +36,16 @@ pub struct ResultsJson {
}

impl ResultsJson {
pub fn new(clade_node_attrs: &[CladeNodeAttrKeyDesc], phenotype_attr_keys: &[PhenotypeAttrDesc]) -> Self {
pub fn new(
dataset_info: &DatasetInfoShort,
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
) -> Self {
Self {
schema_version: "3.0.0".to_owned(),
nextclade_algo_version: this_package_version_str().to_owned(),
nextclade_web_version: None,
dataset_info: dataset_info.clone(),
created_at: date_iso_now(),
clade_node_attr_keys: clade_node_attrs.to_vec(),
phenotype_attr_keys: phenotype_attr_keys.to_vec(),
Expand All @@ -47,13 +55,14 @@ impl ResultsJson {
}

pub fn from_outputs(
dataset_info: &DatasetInfoShort,
outputs: &[NextcladeOutputs],
errors: &[NextcladeErrorOutputs],
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
nextclade_web_version: &Option<String>,
) -> Self {
let mut this = Self::new(clade_node_attrs, phenotype_attr_keys);
let mut this = Self::new(dataset_info, clade_node_attrs, phenotype_attr_keys);
this.results = outputs.to_vec();
this.errors = errors.to_vec();
this.nextclade_web_version = nextclade_web_version.clone();
Expand All @@ -68,13 +77,14 @@ pub struct ResultsJsonWriter {

impl ResultsJsonWriter {
pub fn new(
dataset_info: &DatasetInfoShort,
filepath: impl AsRef<Path>,
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
) -> Result<Self, Report> {
Ok(Self {
filepath: filepath.as_ref().to_owned(),
result: ResultsJson::new(clade_node_attrs, phenotype_attr_keys),
result: ResultsJson::new(dataset_info, clade_node_attrs, phenotype_attr_keys),
})
}

Expand Down Expand Up @@ -103,13 +113,15 @@ impl Drop for ResultsJsonWriter {
}

pub fn results_to_json_string(
dataset_info: &DatasetInfoShort,
outputs: &[NextcladeOutputs],
errors: &[NextcladeErrorOutputs],
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
nextclade_web_version: &Option<String>,
) -> Result<String, Report> {
let results_json = ResultsJson::from_outputs(
dataset_info,
outputs,
errors,
clade_node_attrs,
Expand Down
8 changes: 8 additions & 0 deletions packages_rs/nextclade/src/run/nextclade_wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::analyze::phenotype::get_phenotype_attr_descs;
use crate::analyze::virus_properties::{AaMotifsDesc, PhenotypeAttrDesc, VirusProperties};
use crate::gene::gene_map::GeneMap;
use crate::graph::graph::{convert_auspice_tree_to_graph, convert_graph_to_auspice_tree};
use crate::io::dataset::DatasetInfoShort;
use crate::io::fasta::{read_one_fasta_str, FastaRecord};
use crate::io::nextclade_csv::CsvColumnConfig;
use crate::io::nwk_writer::convert_graph_to_nwk_string;
Expand All @@ -22,6 +23,7 @@ use crate::types::outputs::NextcladeOutputs;
use eyre::{Report, WrapErr};
use itertools::Itertools;
use schemars::JsonSchema;
use serde::__private::de::IdentifierDeserializer;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::str::FromStr;
Expand All @@ -34,6 +36,7 @@ pub struct NextcladeParams {
pub gene_map: GeneMap,
pub tree: Option<AuspiceTree>,
pub virus_properties: VirusProperties,
pub dataset_info: DatasetInfoShort,
}

impl NextcladeParams {
Expand All @@ -52,11 +55,14 @@ impl NextcladeParams {
|gene_map| GeneMap::from_str(gene_map).wrap_err("When parsing genome annotation"),
)?;

let dataset_info = DatasetInfoShort::from_str(&raw.dataset_info).wrap_err("When parsing dataset short info")?;

Ok(Self {
ref_record,
gene_map,
tree,
virus_properties,
dataset_info,
})
}
}
Expand All @@ -69,6 +75,7 @@ pub struct NextcladeParamsRaw {
pub gene_map: Option<String>,
pub tree: Option<String>,
pub virus_properties: String,
pub dataset_info: String,
}

#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
Expand Down Expand Up @@ -158,6 +165,7 @@ impl Nextclade {
gene_map,
tree,
virus_properties,
dataset_info,
} = inputs;

let params = NextcladeInputParams::from_optional(params, &virus_properties)?;
Expand Down

0 comments on commit b5f4cd2

Please sign in to comment.