Skip to content

Commit

Permalink
Merge pull request #111 from qbicsoftware/feature/nanopore-remove-hid…
Browse files Browse the repository at this point in the history
…den-files

delete hidden files if and only if nanopore validation is successful
  • Loading branch information
wow-such-code authored Oct 13, 2022
2 parents a7050e9 + 842261b commit 6da23b2
Show file tree
Hide file tree
Showing 61 changed files with 239 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@
<dependency>
<artifactId>data-model-lib</artifactId>
<groupId>life.qbic</groupId>
<version>2.20.0</version>
<version>2.21.0</version>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
Expand Down
53 changes: 44 additions & 9 deletions src/main/groovy/life/qbic/utils/NanoporeParser.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import com.fasterxml.jackson.databind.ObjectMapper
import groovy.json.JsonSlurper
import life.qbic.datamodel.instruments.OxfordNanoporeInstrumentOutput
import life.qbic.datamodel.instruments.OxfordNanoporeInstrumentOutputV2
import life.qbic.datamodel.instruments.OxfordNanoporeInstrumentOutputV3
import org.everit.json.schema.Schema
import org.everit.json.schema.ValidationException
import org.everit.json.schema.loader.SchemaLoader
Expand All @@ -16,10 +17,14 @@ import java.nio.file.Paths
import java.text.ParseException
import life.qbic.datamodel.datasets.OxfordNanoporeExperiment

import java.util.stream.Collectors

class NanoporeParser {

private static Set<File> hiddenFiles = new HashSet<>()
/**
* Generates a map representing the folder structure
* Generates a map representing the folder structure, if it is a correct structure
* Deletes any hidden files, if the structure fits one of the Nanopore models
* @param directory path of directory whose fileTree should be converted into map
*/
static OxfordNanoporeExperiment parseFileStructure(Path directory) {
Expand All @@ -35,6 +40,10 @@ class NanoporeParser {
def finalMap = parseMetaData(convertedDirectory, directory)
// Step5: Create the final OxfordNanoporeExperiment from the map
OxfordNanoporeExperiment convertedExperiment = OxfordNanoporeExperiment.create(finalMap)
// Step6: This is a valid experiment, we can now delete the hidden files
for (File hiddenFile : hiddenFiles) {
deleteFile(hiddenFile)
}
return convertedExperiment
} catch (ValidationException validationException) {
// we have to fetch all validation exceptions
Expand All @@ -43,6 +52,15 @@ class NanoporeParser {
}
}

private static void deleteFile(File file) {
if(file.isDirectory()) {
for(File child : file.listFiles()) {
deleteFile(child)
}
}
file.delete()
}

/**
* The main metadata we need to provide for the OxfordNanoporeExperiment is in
* the report markdown file and final summary file.
Expand Down Expand Up @@ -163,7 +181,11 @@ class NanoporeParser {
// Step 2: validate against schema return if valid, throw exception if invalid
validateUsingSchema(OxfordNanoporeInstrumentOutput.getSchemaAsStream(), jsonObject)
} catch (ValidationException validationException) {
validateUsingSchema(OxfordNanoporeInstrumentOutputV2.getSchemaAsStream(), jsonObject)
try {
validateUsingSchema(OxfordNanoporeInstrumentOutputV2.getSchemaAsStream(), jsonObject)
} catch (ValidationException validationExceptionV2) {
validateUsingSchema(OxfordNanoporeInstrumentOutputV3.getSchemaAsStream(), jsonObject)
}
}
}

Expand Down Expand Up @@ -214,6 +236,8 @@ class NanoporeParser {

/**
* Convert a directory structure to a map, following the Nanopore schema.
* Ignores hidden files in the structure and adds them to a global set to be
* dealt with later.
* @param a path to the current location in recursion
* @return a map representing a directory with name, path and children as keys
*/
Expand All @@ -224,22 +248,33 @@ class NanoporeParser {
if (IGNORED_FOLDERNAMES.contains(name)) {
return null
}
List children = currentDirectory.listFiles().findAll { file ->
List<File> children = currentDirectory.listFiles()

List<File> visibleChildren = children.stream()
.filter(file -> !file.isHidden()).collect(Collectors.toList());

for (File file : children) {
if (!visibleChildren.contains(file)) {
hiddenFiles.add(file);
}
}

visibleChildren = visibleChildren.findAll { file ->
String currentFolderName = file.getName()
return !IGNORED_FOLDERNAMES.contains(currentFolderName)
}.collect {
file ->
if (file.isFile()) {
convertFile(file.toPath())
} else if (file.isDirectory()) {
convertDirectory(file.toPath())
}
if (file.isFile()) {
convertFile(file.toPath())
} else if (file.isDirectory()) {
convertDirectory(file.toPath())
}
}

def convertedDirectory = [
"name" : name,
"path" : path,
"children": children
"children": visibleChildren
]

return convertedDirectory
Expand Down
26 changes: 26 additions & 0 deletions src/test/groovy/life/qbic/utils/NanoporeParserSpec.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@ class NanoporeParserSpec extends Specification {
assert experiment.getMeasurements().get(0).getLibraryPreparationKit() == "SQK-LSK109"
}

def "parsing a valid file structure with an html report returns an OxfordNanoporeExperiment Object"() {
given:
def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates/QABCD001AB_E12A345a01_PAE12345_nanopore_html_report")
when:
def experiment = NanoporeParser.parseFileStructure(pathToDirectory)
then:
assert experiment instanceof OxfordNanoporeExperiment
// Check that the metadata from the report file has been retrieved
assert experiment.getMeasurements().get(0).getMachineHost() == "PCT0094"
// Check that the metadata from the summary file has been retrieved
assert experiment.getMeasurements().get(0).getLibraryPreparationKit() == "SQK-LSK109"
}

def "parsing the alternative valid file structure returns an OxfordNanoporeExperiment Object"() {
given:
def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates/QABCD001AB_E12A345a01_PAE12345_nanopore_new")
Expand All @@ -38,6 +51,19 @@ class NanoporeParserSpec extends Specification {
assert experiment.getMeasurements().get(0).getLibraryPreparationKit() == "SQK-LSK109-XL"
}

def "parsing the newest valid file structure returns an OxfordNanoporeExperiment Object"() {
given:
def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates/QABCD001AB_E12A345a01_PAE12345_nanopore_v3")
when:
def experiment = NanoporeParser.parseFileStructure(pathToDirectory)
then:
assert experiment instanceof OxfordNanoporeExperiment
// Check that the metadata from the report file has been retrieved
assert experiment.getMeasurements().get(0).getMachineHost() == "PCT0094"
// Check that the metadata from the summary file has been retrieved
assert experiment.getMeasurements().get(0).getLibraryPreparationKit() == "SQK-LSK109-XL"
}

def "parsing the alternative valid file structure with metadata missing returns an OxfordNanoporeExperiment Object"() {
given:
def pathToDirectory = Paths.get(exampleDirectoriesRoot, "validates/QABCD001AB_E12A345a01_PAE12345_nanopore_new_minimal")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
instrument=PCT0094
position=1-A3-D3
flow_cell_id=PAE24142
sample_id=QNANO027AE_E19D023a01_200211
protocol_group_id=20200211_QNANO
protocol=sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109:True
protocol_run_id=5a7cfc2a-81b0-412d-baa0-51b939cd8e76
acquisition_run_id=c6028297dff19d01e7c5fba6487de807d1e99c05
started=2020-02-11T15:52:10.465982+01:00
acquisition_stopped=2020-02-14T08:39:54.688916+01:00
processing_stopped=2020-02-14T08:39:58.804639+01:00
basecalling_enabled=1
sequencing_summary_file=sequencing_summary_PAE24142_c6028297.txt
fast5_files_in_final_dest=2189
fast5_files_in_fallback=0
fastq_files_in_final_dest=2189
fastq_files_in_fallback=0
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Tracking ID
===========

{
"asic_id": "0004A30B0022C63E",
"asic_id_eeprom": "0004A30B0022C63E",
"asic_temp": "32.631687",
"asic_version": "Unknown",
"auto_update": "0",
"auto_update_source": "https://mirror.oxfordnanoportal.com/software/MinKNOW/",
"bream_is_standard": "0",
"configuration_version": "1.0.7",
"device_id": "1-E9-H9",
"device_type": "promethion",
"distribution_status": "stable",
"distribution_version": "19.12.5",
"exp_script_name": "N/A",
"exp_script_purpose": "sequencing_run",
"exp_start_time": "2020-01-28T15:17:38Z",
"flow_cell_id": "PAE26989",
"flow_cell_product_code": "FLO-PRO002",
"guppy_version": "3.2.8+bd67289",
"heatsink_temp": "36.179111",
"hostname": "PCT0094",
"hublett_board_id": "0132136faade2e15",
"hublett_firmware_version": "2.0.12",
"installation_type": "nc",
"ip_address": "",
"local_firmware_file": "1",
"mac_address": "",
"operating_system": "ubuntu 16.04",
"protocol_group_id": "20200128_QNANO",
"protocol_run_id": "",
"protocols_version": "4.3.16",
"run_id": "db9e9383d44d80bbe1e2600c7a7419056610d46d",
"sample_id": "QNANO036AD_E19D023b04",
"satellite_board_id": "0000000000000000",
"satellite_firmware_version": "2.0.12",
"usb_config": "firm_1.2.3_ware#rbt_4.5.6_rbt#ctrl#USB3",
"version": "3.6.1"
}

Duty Time
=========

ID: db9e9383d44d80bbe1e2600c7a7419056610d46d

Channel State,Experiment Time (minutes),State Time (samples),
strand,0,144832342
strand,1,158421270
strand,2,378095352
strand,3,472685319
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is some text
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
instrument=PCT0094
position=1-A3-D3
flow_cell_id=PAE24142
sample_id=QNANO027AE_E19D023a01_200211
protocol_group_id=20200211_QNANO
protocol=sequencing/sequencing_PRO002_DNA:FLO-PRO002:SQK-LSK109-XL
protocol_run_id=5a7cfc2a-81b0-412d-baa0-51b939cd8e76
acquisition_run_id=c6028297dff19d01e7c5fba6487de807d1e99c05
started=2020-02-11T15:52:10.465982+01:00
acquisition_stopped=2020-02-14T08:39:54.688916+01:00
processing_stopped=2020-02-14T08:39:58.804639+01:00
basecalling_enabled=1
sequencing_summary_file=sequencing_summary_PAE24142_c6028297.txt
fast5_files_in_final_dest=2189
fast5_files_in_fallback=0
fastq_files_in_final_dest=2189
fastq_files_in_fallback=0
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Tracking ID
===========

{
"asic_id": "0004A30B0022C63E",
"asic_id_eeprom": "0004A30B0022C63E",
"asic_temp": "32.631687",
"asic_version": "Unknown",
"auto_update": "0",
"auto_update_source": "https://mirror.oxfordnanoportal.com/software/MinKNOW/",
"bream_is_standard": "0",
"configuration_version": "1.0.7",
"device_id": "1-E9-H9",
"device_type": "promethion",
"distribution_status": "stable",
"distribution_version": "19.12.5",
"exp_script_name": "N/A",
"exp_script_purpose": "sequencing_run",
"exp_start_time": "2020-01-28T15:17:38Z",
"flow_cell_id": "PAE26989",
"flow_cell_product_code": "FLO-PRO002",
"guppy_version": "3.2.8+bd67289",
"heatsink_temp": "36.179111",
"hostname": "PCT0094",
"hublett_board_id": "0132136faade2e15",
"hublett_firmware_version": "2.0.12",
"installation_type": "nc",
"ip_address": "",
"local_firmware_file": "1",
"mac_address": "",
"operating_system": "ubuntu 16.04",
"protocol_group_id": "20200128_QNANO",
"protocol_run_id": "",
"protocols_version": "4.3.16",
"run_id": "db9e9383d44d80bbe1e2600c7a7419056610d46d",
"sample_id": "QNANO036AD_E19D023b04",
"satellite_board_id": "0000000000000000",
"satellite_firmware_version": "2.0.12",
"usb_config": "firm_1.2.3_ware#rbt_4.5.6_rbt#ctrl#USB3",
"version": "3.6.1"
}

Duty Time
=========

ID: db9e9383d44d80bbe1e2600c7a7419056610d46d

Channel State,Experiment Time (minutes),State Time (samples),
strand,0,144832342
strand,1,158421270
strand,2,378095352
strand,3,472685319

0 comments on commit 6da23b2

Please sign in to comment.