Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add dataset info to output json [wip] #1311

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages_rs/nextclade-cli/src/cli/nextclade_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ pub fn nextclade_run(run_args: NextcladeRunArgs) -> Result<(), Report> {
} = nextclade.get_initial_data();

let mut output_writer = NextcladeOrderedWriter::new(
&inputs.dataset_info,
&nextclade.gene_map,
clade_node_attr_key_descs,
phenotype_attr_descs,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use log::{info, warn};
use nextclade::alphabet::nuc::from_nuc_seq;
use nextclade::analyze::virus_properties::PhenotypeAttrDesc;
use nextclade::gene::gene_map::GeneMap;
use nextclade::io::dataset::DatasetInfoShort;
use nextclade::io::fasta::{FastaPeptideWriter, FastaRecord, FastaWriter};
use nextclade::io::ndjson::NdjsonFileWriter;
use nextclade::io::nextclade_csv::{CsvColumnConfig, NextcladeResultsCsvFileWriter};
Expand Down Expand Up @@ -35,6 +36,7 @@ pub struct NextcladeOrderedWriter {

impl NextcladeOrderedWriter {
pub fn new(
dataset_info: &DatasetInfoShort,
gene_map: &GeneMap,
clade_node_attr_key_descs: &[CladeNodeAttrKeyDesc],
phenotype_attr_key_desc: &[PhenotypeAttrDesc],
Expand All @@ -50,7 +52,12 @@ impl NextcladeOrderedWriter {
.map_ref_fallible(|output_translations| FastaPeptideWriter::new(gene_map, output_translations))?;

let output_json_writer = output_params.output_json.map_ref_fallible(|output_json| {
ResultsJsonWriter::new(output_json, clade_node_attr_key_descs, phenotype_attr_key_desc)
ResultsJsonWriter::new(
dataset_info,
output_json,
clade_node_attr_key_descs,
phenotype_attr_key_desc,
)
})?;

let output_ndjson_writer = output_params.output_ndjson.map_ref_fallible(NdjsonFileWriter::new)?;
Expand Down
4 changes: 3 additions & 1 deletion packages_rs/nextclade-cli/src/dataset/dataset_download.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use itertools::Itertools;
use log::LevelFilter;
use nextclade::analyze::virus_properties::{LabelledMutationsConfig, VirusProperties};
use nextclade::gene::gene_map::{filter_gene_map, GeneMap};
use nextclade::io::dataset::{Dataset, DatasetFiles, DatasetMeta, DatasetsIndexJson};
use nextclade::io::dataset::{
Dataset, DatasetAttributeValue, DatasetAttributes, DatasetFiles, DatasetInfoShort, DatasetsIndexJson, DatasetMeta,
};
use nextclade::io::fasta::{read_one_fasta, read_one_fasta_str};
use nextclade::io::file::create_file_or_stdout;
use nextclade::io::fs::{ensure_dir, has_extension, read_file_to_string};
Expand Down
30 changes: 30 additions & 0 deletions packages_rs/nextclade/src/io/dataset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ pub struct Dataset {
}

impl Dataset {
pub fn short_info(&self) -> DatasetInfoShort {
DatasetInfoShort {
path: self.path.clone(),
version: self.version.clone(),
server: None,
other: serde_json::Value::default(),
}
}

pub fn name(&self) -> Option<&str> {
self.attributes.get("name").and_then(AnyType::as_str_maybe)
}
Expand Down Expand Up @@ -331,3 +340,24 @@ pub struct MinimizerIndexVersion {
#[serde(flatten)]
pub other: serde_json::Value,
}

#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "camelCase")]
pub struct DatasetInfoShort {
pub path: String,

#[serde(default, skip_serializing_if = "DatasetVersion::is_empty")]
pub version: DatasetVersion,

#[serde(default, skip_serializing_if = "Option::is_none")]
pub server: Option<String>,

#[serde(flatten)]
pub other: serde_json::Value,
}

impl DatasetInfoShort {
pub fn from_str(s: impl AsRef<str>) -> Result<Self, Report> {
json_parse(s)
}
}
20 changes: 16 additions & 4 deletions packages_rs/nextclade/src/io/results_json.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use crate::analyze::virus_properties::PhenotypeAttrDesc;
use crate::io::dataset::DatasetInfoShort;
use crate::io::json::{json_stringify, json_write, JsonPretty};
use crate::io::ndjson::NdjsonWriter;
use crate::tree::tree::CladeNodeAttrKeyDesc;
use crate::types::outputs::{
combine_outputs_and_errors_sorted, NextcladeErrorOutputs, NextcladeOutputOrError, NextcladeOutputs,
};
use crate::utils::datetime::date_iso_now;
use crate::utils::info::this_package_version_str;
use eyre::Report;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use crate::utils::info::this_package_version_str;

#[derive(Serialize, Deserialize, schemars::JsonSchema)]
#[serde(rename_all = "camelCase")]
Expand All @@ -21,6 +22,8 @@ pub struct ResultsJson {
#[serde(skip_serializing_if = "Option::is_none")]
pub nextclade_web_version: Option<String>,

pub dataset_info: DatasetInfoShort,

pub created_at: String,

pub clade_node_attr_keys: Vec<CladeNodeAttrKeyDesc>,
Expand All @@ -33,11 +36,16 @@ pub struct ResultsJson {
}

impl ResultsJson {
pub fn new(clade_node_attrs: &[CladeNodeAttrKeyDesc], phenotype_attr_keys: &[PhenotypeAttrDesc]) -> Self {
pub fn new(
dataset_info: &DatasetInfoShort,
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
) -> Self {
Self {
schema_version: "3.0.0".to_owned(),
nextclade_algo_version: this_package_version_str().to_owned(),
nextclade_web_version: None,
dataset_info: dataset_info.clone(),
created_at: date_iso_now(),
clade_node_attr_keys: clade_node_attrs.to_vec(),
phenotype_attr_keys: phenotype_attr_keys.to_vec(),
Expand All @@ -47,13 +55,14 @@ impl ResultsJson {
}

pub fn from_outputs(
dataset_info: &DatasetInfoShort,
outputs: &[NextcladeOutputs],
errors: &[NextcladeErrorOutputs],
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
nextclade_web_version: &Option<String>,
) -> Self {
let mut this = Self::new(clade_node_attrs, phenotype_attr_keys);
let mut this = Self::new(dataset_info, clade_node_attrs, phenotype_attr_keys);
this.results = outputs.to_vec();
this.errors = errors.to_vec();
this.nextclade_web_version = nextclade_web_version.clone();
Expand All @@ -68,13 +77,14 @@ pub struct ResultsJsonWriter {

impl ResultsJsonWriter {
pub fn new(
dataset_info: &DatasetInfoShort,
filepath: impl AsRef<Path>,
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
) -> Result<Self, Report> {
Ok(Self {
filepath: filepath.as_ref().to_owned(),
result: ResultsJson::new(clade_node_attrs, phenotype_attr_keys),
result: ResultsJson::new(dataset_info, clade_node_attrs, phenotype_attr_keys),
})
}

Expand Down Expand Up @@ -103,13 +113,15 @@ impl Drop for ResultsJsonWriter {
}

pub fn results_to_json_string(
dataset_info: &DatasetInfoShort,
outputs: &[NextcladeOutputs],
errors: &[NextcladeErrorOutputs],
clade_node_attrs: &[CladeNodeAttrKeyDesc],
phenotype_attr_keys: &[PhenotypeAttrDesc],
nextclade_web_version: &Option<String>,
) -> Result<String, Report> {
let results_json = ResultsJson::from_outputs(
dataset_info,
outputs,
errors,
clade_node_attrs,
Expand Down
8 changes: 8 additions & 0 deletions packages_rs/nextclade/src/run/nextclade_wasm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::analyze::phenotype::get_phenotype_attr_descs;
use crate::analyze::virus_properties::{AaMotifsDesc, PhenotypeAttrDesc, VirusProperties};
use crate::gene::gene_map::GeneMap;
use crate::graph::graph::{convert_auspice_tree_to_graph, convert_graph_to_auspice_tree};
use crate::io::dataset::DatasetInfoShort;
use crate::io::fasta::{read_one_fasta_str, FastaRecord};
use crate::io::nextclade_csv::CsvColumnConfig;
use crate::io::nwk_writer::convert_graph_to_nwk_string;
Expand All @@ -22,6 +23,7 @@ use crate::types::outputs::NextcladeOutputs;
use eyre::{Report, WrapErr};
use itertools::Itertools;
use schemars::JsonSchema;
use serde::__private::de::IdentifierDeserializer;
use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::str::FromStr;
Expand All @@ -34,6 +36,7 @@ pub struct NextcladeParams {
pub gene_map: GeneMap,
pub tree: Option<AuspiceTree>,
pub virus_properties: VirusProperties,
pub dataset_info: DatasetInfoShort,
}

impl NextcladeParams {
Expand All @@ -52,11 +55,14 @@ impl NextcladeParams {
|gene_map| GeneMap::from_str(gene_map).wrap_err("When parsing genome annotation"),
)?;

let dataset_info = DatasetInfoShort::from_str(&raw.dataset_info).wrap_err("When parsing dataset short info")?;

Ok(Self {
ref_record,
gene_map,
tree,
virus_properties,
dataset_info,
})
}
}
Expand All @@ -69,6 +75,7 @@ pub struct NextcladeParamsRaw {
pub gene_map: Option<String>,
pub tree: Option<String>,
pub virus_properties: String,
pub dataset_info: String,
}

#[derive(Clone, Debug, Serialize, Deserialize, schemars::JsonSchema)]
Expand Down Expand Up @@ -158,6 +165,7 @@ impl Nextclade {
gene_map,
tree,
virus_properties,
dataset_info,
} = inputs;

let params = NextcladeInputParams::from_optional(params, &virus_properties)?;
Expand Down
Loading