Skip to content

Commit

Permalink
locally working first pass at adding in the ploidy info. Still needs …
Browse files Browse the repository at this point in the history
…to have the arguments passed through so it works in the WDLs
  • Loading branch information
koncheto-broad committed Jun 3, 2024
1 parent f73c0b4 commit 2fb59b7
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import htsjdk.variant.vcf.VCFHeader;
import org.apache.avro.generic.GenericRecord;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.LongRange;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.hellbender.engine.FeatureContext;
Expand Down Expand Up @@ -34,6 +36,12 @@
public class ExtractCohortEngine {
private final Logger logger;

private final List<LongRange> PARRegions = List.of(
new LongRange(23000000010001L, 23000002781479L), // PAR1, X
new LongRange(24000000010001L, 24000002781479L), // PAR1, Y
new LongRange(23000155701383L, 23000156030895L), // PAR2, X
new LongRange(24000056887903L, 24000057217415L)); // PAR2, Y

private final boolean printDebugInformation;
private final int localSortMaxRecordsInRam;
private final List<SimpleInterval> traversalIntervals;
Expand Down Expand Up @@ -303,7 +311,6 @@ public void traverse() {
String sampleId = queryRow.get(SchemaUtils.SAMPLE_ID).toString();
Integer ploidy = Integer.parseInt(queryRow.get(SchemaUtils.PLOIDY).toString());
// If not for this, we'd need a map to another data structure just to look up the ploidy. Best to flatten the compound key
String key = chromosome+"-"+sampleId;
samplePloidyMap.put(chromosome+"-"+sampleId, ploidy);
}
processBytesScanned(reader);
Expand Down Expand Up @@ -511,22 +518,22 @@ protected VariantContext processSampleRecordsForLocation(final long location,
// Nothing to do here -- just needed to mark the sample as seen so it doesn't get put in the high confidence ref band
break;
case "1": // Non-Variant Block with 10 <= GQ < 20
refCalls.add(new ReferenceGenotypeInfo(sampleName, 10));
refCalls.add(new ReferenceGenotypeInfo(sampleName, 10, sampleRecord.getSampleId().intValue()));
break;
case "2": // Non-Variant Block with 20 <= GQ < 30
refCalls.add(new ReferenceGenotypeInfo(sampleName, 20));
refCalls.add(new ReferenceGenotypeInfo(sampleName, 20, sampleRecord.getSampleId().intValue()));
break;
case "3": // Non-Variant Block with 30 <= GQ < 40
refCalls.add(new ReferenceGenotypeInfo(sampleName, 30));
refCalls.add(new ReferenceGenotypeInfo(sampleName, 30, sampleRecord.getSampleId().intValue()));
break;
case "4": // Non-Variant Block with 40 <= GQ < 50
refCalls.add(new ReferenceGenotypeInfo(sampleName, 40));
refCalls.add(new ReferenceGenotypeInfo(sampleName, 40, sampleRecord.getSampleId().intValue()));
break;
case "5": // Non-Variant Block with 50 <= GQ < 60
refCalls.add(new ReferenceGenotypeInfo(sampleName, 50));
refCalls.add(new ReferenceGenotypeInfo(sampleName, 50, sampleRecord.getSampleId().intValue()));
break;
case "6": // Non-Variant Block with 60 <= GQ (usually omitted from tables)
refCalls.add(new ReferenceGenotypeInfo(sampleName, 60));
refCalls.add(new ReferenceGenotypeInfo(sampleName, 60, sampleRecord.getSampleId().intValue()));
break;
case "*": // Spanning Deletion - do nothing. just mark the sample as seen
break;
Expand Down Expand Up @@ -587,9 +594,23 @@ VariantContext finalizeCurrentVariant(final List<VariantContext> unmergedVariant
for (final ReferenceGenotypeInfo info : referenceCalls) {
genotypeBuilder.reset(false);
genotypeBuilder.name(info.getSampleName());
// TODO: This is where we expand out "this is ref" to a "0/0" call. Reference ploidy data here
String key = "asdf";
genotypeBuilder.alleles(gtAlleles);

long chromAsPosition = location - SchemaUtils.decodePosition(location);
String key = chromAsPosition+"-"+info.getSampleId();

// Logic for determining the correct ploidy for reference data
// If we have no info in the table, the ploidy is explicitly 2, OR we are in a PAR, use diploid reference.
// If we have looked up the ploidy in our table and it says 1, use a haploid reference
// Otherwise, if we have a ploidy that is neither 1 nor 2, throw a user exception because we haven't coded for this case
int ploidy = (samplePloidyMap.containsKey(key) ? samplePloidyMap.get(key) : 2);
if (ploidy == 2 || isInPAR(location)) {
genotypeBuilder.alleles(gtAlleles);
} else if (ploidy == 1) {
genotypeBuilder.alleles(gtHaploidAlleles);
} else {
throw new UserException("GVS cannot currently handle extracting with a ploidy of "+ploidy+" as seen at "+SchemaUtils.decodeContig(location)+": "+SchemaUtils.decodePosition(location)+".");
}

genotypeBuilder.GQ(info.getGQ());
genotypes.add(genotypeBuilder.make());
}
Expand Down Expand Up @@ -649,6 +670,16 @@ VariantContext finalizeCurrentVariant(final List<VariantContext> unmergedVariant
return removeAnnotations(filteredVC);
}

boolean isInPAR(final long location) {
for (LongRange region : PARRegions) {
if (region.containsLong(location)) {
return true;
}
}
return false;
}


private VariantContext filterSiteByAlleleSpecificVQScore(VariantContext mergedVC,
Map<Allele, Map<Allele, Double>> scoreMap,
Map<Allele, Map<Allele, Double>> vqScoreMap,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

public class ReferenceGenotypeInfo {
private final String sampleName;
private final int sampleId;
private final int GQ;

public ReferenceGenotypeInfo(String sampleName, int GQ) {
public ReferenceGenotypeInfo(String sampleName, int GQ, int sampleId) {
this.sampleName = sampleName;
this.GQ = GQ;
this.sampleId = sampleId;
}

public String getSampleName() {
Expand All @@ -16,4 +18,8 @@ public String getSampleName() {
public int getGQ() {
return GQ;
}

public int getSampleId() {
return sampleId;
}
}

0 comments on commit 2fb59b7

Please sign in to comment.