Skip to content

Commit

Permalink
many bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
patidarr committed Jun 6, 2017
1 parent 924cb19 commit caa5115
Show file tree
Hide file tree
Showing 11 changed files with 93 additions and 41 deletions.
Binary file modified Rulegraph.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions config/config_annotation.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"annot_tcc" : "hg19_targated_cancer_care_10262015.txt",
"annot_civic" : "hg19_civic_10262015.txt",
"blacklisted" : "hg19_blacklistedSites.txt",
"whitelisted" : "hg19_whitelistedSites.txt",
"geneList" : "geneLists/combinedList_01032017",
"cgc" : "geneLists/CancerGeneCensus.v78.txt",
"somaticActSites" : "hg19_SomaticActionableSites.txt",
Expand Down
2 changes: 1 addition & 1 deletion config/config_common_biowulf.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"nextera" : "/data/Clinomics/Ref/khanlab/design/nextera.target.refseq.ensembl.noUTR.notfound.bed",
"strexome" : "/data/Clinomics/Ref/khanlab/design/Strexome.target.bed",
"sureselect.exon.v5_utr": "/data/Clinomics/Ref/khanlab/design/NIAID.sureselect.exon.v5_utr.target.bed",
"sureselect.exon.v6" : "/data/Clinomics/Ref/khanlab/design/NIAID.sureselect.exon.v6.target.bed",
"sureselect.exon.v6" : "/data/Clinomics/Ref/khanlab/design/NIAID.sureselect.exon.v6.target.bed"
},
"hotspot_bed" :{
"wholegenome" : "/data/Clinomics/Ref/khanlab/hotspot/hg19_Hotspot.08.08.16_merged.bed",
Expand Down
Binary file modified dag.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 10 additions & 9 deletions ngs_pipeline.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,11 @@ ALL_QC += expand("{subject}/{TIME}/igv/session_{subject}.xml", TIME=TIME, su
for subject in config['subject']:
for library in config['subject'][subject]:
if config['sample_captures'][library] not in config['Panel_List']:
# any output which is desired on all libraries but Panel goes here, the list of panel captures should be maintained in the Panel_List in config file
ALL_QC += [subject+"/"+TIME+"/qc/"+subject+".circos.png"]
if config['sample_type'][library] == 'Normal':
# This might be temporary, if needed on all sample remove line above line.
ALL_QC += [subject+"/"+TIME+"/"+library+"/HLA/seq2HLA/"+library+"-ClassI.HLAgenotype4digits"]
ALL_QC += [subject+"/"+TIME+"/"+library+"/HLA/HLAminer/HLAminer_HPTASR.csv"]
ALL_QC += [subject+"/"+TIME+"/"+library+"/HLA/"+library+".Calls.txt"]
ALL_QC += [subject+"/"+TIME+"/"+library+"/HLA/seq2HLA/"+library+"-ClassI.HLAgenotype4digits"]
ALL_QC += [subject+"/"+TIME+"/"+library+"/HLA/HLAminer/HLAminer_HPTASR.csv"]
ALL_QC += [subject+"/"+TIME+"/"+library+"/HLA/"+library+".Calls.txt"]

if len(config['sample_references']) > 0:
for Tumor in config['sample_references']:
Expand All @@ -172,6 +171,7 @@ if len(config['sample_references']) > 0:
seq2HLA = "{subject}/{TIME}/{sample}/HLA/seq2HLA/{sample}-ClassI.HLAgenotype4digits".format(TIME=TIME, subject=SAMPLE_TO_SUBJECT[Normal], sample=Normal)
HLAminer = "{subject}/{TIME}/{sample}/HLA/HLAminer/HLAminer_HPTASR.csv".format(TIME=TIME, subject=SAMPLE_TO_SUBJECT[Normal], sample=Normal)
if config['sample_captures'][Tumor] not in config['Panel_List']:
# any output which is desired on all somatic libraries but Panel goes here, the list of panel captures should be maintained in the Panel_List in config file
HLA[Tumor] = [seq2HLA, HLAminer]
SequenzaPairs[Tumor] = ["{subject}/{TIME}/{sample}/{sample}.mpileup.gz".format(TIME=TIME, subject=SAMPLE_TO_SUBJECT[Normal], sample=Normal), "{subject}/{TIME}/{sample}/{sample}.mpileup.gz".format(TIME=TIME, subject=SAMPLE_TO_SUBJECT[Tumor], sample=Tumor) ]
###########################################################################
Expand Down Expand Up @@ -223,6 +223,7 @@ for sample in config['sample_references'].keys():
UNION_SOM_MUT[sample] = local
UNION_SOM_MUT_LIST +=[subject+"/"+TIME+ACT_DIR+sample+".unionSomaticVars.txt"]
if config['sample_captures'][sample] not in config['Panel_List']:
# any output which is desired on all libraries but Panel goes here, the list of panel captures should be maintained in the Panel_List in config file
UNION_SOM_MUT_LIST +=[subject+"/"+TIME+ACT_DIR+sample+".mutationalSignature.pdf"]

##########################################################################
Expand Down Expand Up @@ -374,15 +375,13 @@ rule Khanlab_Pipeline:
ActionableFiles,
UNION_SOM_MUT_LIST,
expand ("ngs_pipeline_{NOW}.rnaseq.done", NOW=NOW)
version: "1.0"
version: config["pipeline_version"]
wildcard_constraints:
NOW="\w+"
params:
rulename = "Final",
batch = config[config['host']]["job_default"],
group = config["group"],
wait4job = NGS_PIPELINE + "/scripts/block_for_jobid.pl",
sort = NGS_PIPELINE + "/scripts/awk_sort_withHeader.awk",
mail = NGS_PIPELINE + "/scripts/tsv2html.final.sh",
email = config["mail"],
host = config["host"],
Expand All @@ -400,7 +399,7 @@ rule Khanlab_Pipeline:
chmod g+rw {WORK_DIR}/${{sub}}/{TIME}/successful.txt
chgrp {params.group} {WORK_DIR}/${{sub}}/{TIME}/successful.txt
done
ssh {params.host} "{params.mail} --location {WORK_DIR} --host {params.host} --head {WORK_DIR}/ngs_pipeline_{NOW}.csv |mutt -e \\\"my_hdr Content-Type: text/html\\\" -s 'Khanlab ngs-pipeline Status' `whoami`@mail.nih.gov {params.email}"
ssh {params.host} "{params.mail} --location {WORK_DIR} --host {params.host} --head --version {version} {WORK_DIR}/ngs_pipeline_{NOW}.csv |mutt -e \\\"my_hdr Content-Type: text/html\\\" -s 'Khanlab ngs-pipeline Status' `whoami`@mail.nih.gov {params.email}"
rm -rf {WORK_DIR}/ngs_pipeline_{NOW}.csv
#######################
"""
Expand Down Expand Up @@ -685,6 +684,7 @@ rule CN_LRR:
#######################
module load R/{version}
module load bedtools/2.25.0
export LC_ALL=C
mkdir -p {wildcards.subject}/{TIME}/Actionable/
echo -e "#Chr\\tStart\\tEnd\\tNormalCoverage\\tTumorCoverage\\tRatio\\tLRR\\tGene(s)\\tStrand(s)" >{output.out}
paste {input.files} |cut -f 1-4,8 |awk '{{OFS="\\t"}};{{print $1,$2,$3,$4,$5,($5+1)/($4+1),log(($5+1)/($4+1))/log(2)}}' >{output.out}.temp1
Expand Down Expand Up @@ -1004,6 +1004,7 @@ rule FormatInput:
shell: """
#######################
module load annovar/{version}
export LC_ALL=C
cut -f 1-5 {input.txtFiles} |sort |uniq > {wildcards.subject}/{TIME}/annotation/AnnotationInput
perl {input.convertor} {wildcards.subject}/{TIME}/annotation/AnnotationInput
rm -rf "{wildcards.subject}/{TIME}/annotation/AnnotationInput.pph"
Expand Down
4 changes: 3 additions & 1 deletion ruleBook/NeoAntigen.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ rule MergeHLA:
batch = config[config['host']]["job_default"]
shell: """
#######################
export LC_ALL=C
perl {input.Tool} {input.B} {input.A} | sort > {output}
#######################
"""
Expand All @@ -79,7 +80,7 @@ rule VEP:
perl {input.tool} -vcf {wildcards.base}/{wildcards.TIME}/{wildcards.sample}/calls/{wildcards.sample}.strelka.indels.raw.vcf,{wildcards.base}/{wildcards.TIME}/{wildcards.sample}/calls/{wildcards.sample}.strelka.snvs.raw.vcf,{wildcards.base}/{wildcards.TIME}/{wildcards.sample}/calls/{wildcards.sample}.MuTect.raw.vcf -order {params.normal},{wildcards.sample} -filter REJECT |vcf-subset -u -c {wildcards.sample} >{output.vcf}.tmp
variant_effect_predictor.pl -i {output.vcf}.tmp --plugin Downstream --plugin Wildtype --terms SO --offline --cache --dir_cache $VEPCACHEDIR --assembly GRCh37 --output_file {output.vcf} --vcf --force_overwrite
rm -rf {output.vcf}.tmp
export LC_ALL=C
perl {input.merge} {input.HLA[0]} {input.HLA[1]} | sort > {wildcards.base}/{wildcards.TIME}/{params.normal}/HLA/{params.normal}.Calls.txt
#######################
"""
Expand All @@ -105,6 +106,7 @@ rule pVACseq:
module load pvacseq python/{params.python}
pvacseq run --iedb-install-directory {params.IEDB} -e 8,9,10,11 --fasta-size=200 {input} {wildcards.sample} ${{allele}} {{NNalign,NetMHC,NetMHCIIpan,NetMHCcons,NetMHCpan,PickPocket,SMM,SMMPMBEC,SMMalign}} {wildcards.base}/{wildcards.TIME}/{wildcards.sample}/NeoAntigen/
export LC_ALL=C
perl {params.tool} {output} |sort |uniq >{wildcards.base}/{wildcards.TIME}/{wildcards.sample}/NeoAntigen/{wildcards.sample}.final.txt
#######################
"""
6 changes: 3 additions & 3 deletions ruleBook/annot.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,9 @@ rule CombineAnnotation:
"{base}/AnnotationInput.sift.out",
convertor = NGS_PIPELINE + "/scripts/CombineAnnotations.pl",
geneanno = NGS_PIPELINE + "/scripts/GeneAnnotation.pl",
filter = NGS_PIPELINE + "/scripts/filterVariants.v1.pl",
coding = NGS_PIPELINE + "/scripts/ProteinCoding.pl",
coding = NGS_PIPELINE + "/scripts/ProteinCodingRare.pl",
blacklisted = config["annovar_data"]+config["blacklisted"],
whitelisted = config["annovar_data"]+config["whitelisted"],
ACMG = config["annovar_data"]+config['ACMG']
output:
filtered="{base}/AnnotationInput.coding.rare.txt",
Expand Down Expand Up @@ -286,7 +286,7 @@ echo "{wildcards.base}/AnnotationInput
perl {input.convertor} {wildcards.base}/list >{output.all}.tmp
perl {input.geneanno} {input.ACMG} {output.all}.tmp >{output.all}
perl {input.coding} {wildcards.base}/AnnotationInput.annotations.final.txt | perl {input.filter} - {input.blacklisted} 0.05 >{output.all}.tmp
perl {input.coding} {input.blacklisted} {input.whitelisted} {wildcards.base}/AnnotationInput.annotations.final.txt 0.05 >{output.all}.tmp
grep -P "Chr\\tStart\\tEnd\\tRef\\tAlt" {output.all}.tmp >{output.filtered}
grep -v -P "Chr\\tStart\\tEnd\\tRef\\tAlt" {output.all}.tmp >>{output.filtered}
rm -rf {output.all}.tmp {wildcards.base}/list
Expand Down
2 changes: 1 addition & 1 deletion ruleBook/bam2mpg.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rule Bam2MPG:
fi
module load bam2mpg/{version}
module load vcftools/{params.vcftools}
module load vcftools/{params.vcftools} {params.samtools}
for CHR in `seq 1 22` X Y;
do
echo "bam2mpg --shorten --vcf_spec --qual_filter 20 -bam_filter '-q31' --region chr${{CHR}} --only_nonref --snv_vcf ${{LOCAL}}/chr${{CHR}}{wildcards.sample}.snps.vcf --div_vcf ${{LOCAL}}/chr${{CHR}}{wildcards.sample}.indel.vcf {input.ref} {input.bam}"
Expand Down
9 changes: 5 additions & 4 deletions ruleBook/rnaseq_pipeline.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ for subject in config['RNASeq'].keys():
RNA_QC_ALL +=[subject+"/"+TIME+"/qc/"+subject+".RnaSeqQC.txt"]
ALL_QC +=[subject+"/"+TIME+"/qc/"+subject+".transcriptCoverage.png"]
for sample in config['RNASeq'][subject]:
ALL_QC += [subject+"/"+TIME+"/qc/"+subject+".circos.png"]
ALL_QC += [subject+"/"+TIME+"/"+sample+"/HLA/seq2HLA/"+sample+"-ClassI.HLAgenotype4digits"]
ALL_QC += [subject+"/"+TIME+"/"+sample+"/HLA/HLAminer/HLAminer_HPTASR.csv"]
ALL_QC += [subject+"/"+TIME+"/"+sample+"/HLA/"+sample+".Calls.txt"]
Expand All @@ -38,12 +39,12 @@ for subject in config['RNASeq'].keys():
ALL_QC += [subject+"/"+TIME+"/"+sample+"/qc/"+sample+".star.gt"]
ALL_QC += [subject+"/"+TIME+"/"+sample+"/fusion/"+sample+".actionable.fusion.txt"]
add_to_SUBJECT_ANNO(subject, "rnaseq", [subject+"/"+TIME+"/"+sample+"/calls/"+sample+".HC_RNASeq.annotated.txt"])
EXPRESSION += [subject+"/"+TIME+"/"+sample+"/exonExp_UCSC/"+sample+".exonExpression.UCSC.txt"]
EXPRESSION += [subject+"/"+TIME+"/"+sample+"/exonExp_ENS/"+sample+".exonExpression.ENS.txt"]
#EXPRESSION += [subject+"/"+TIME+"/"+sample+"/exonExp_UCSC/"+sample+".exonExpression.UCSC.txt"]
#EXPRESSION += [subject+"/"+TIME+"/"+sample+"/exonExp_ENS/"+sample+".exonExpression.ENS.txt"]
for gtf in config['GTF']:
EXPRESSION += [subject+"/"+TIME+"/"+sample+"/cufflinks_"+gtf+"/genes.fpkm_tracking_log2"]
#EXPRESSION += [subject+"/"+TIME+"/"+sample+"/cufflinks_"+gtf+"/genes.fpkm_tracking_log2"]
EXPRESSION += [subject+"/"+TIME+"/"+sample+"/TPM_"+gtf+"/"+sample+"_counts.Gene.txt"]
ALL_QC += [subject+"/"+TIME+"/"+subject+"/db/matrixInput_"+sample+"_"+gtf]
#ALL_QC += [subject+"/"+TIME+"/"+subject+"/db/matrixInput_"+sample+"_"+gtf]
RNA_CALLS += ["{subject}/{TIME}/{sample}/calls/{sample}.HC_RNASeq.raw.vcf".format(TIME=TIME, subject=SUB2RNA[s], sample=s) for s in config['RNASeq'][subject]]
SUB_FUSION[subject] = ["{subject}/{TIME}/{sample}/fusion/{sample}.actionable.fusion.txt".format(TIME=TIME, subject=SUB2RNA[s], sample=s) for s in config['RNASeq'][subject]]
SUB_QC[subject] = ["{subject}/{TIME}/{sample}/qc/{sample}.RnaSeqQC.txt".format(TIME=TIME, subject=SUB2RNA[s], sample=s) for s in config['RNASeq'][subject]]
Expand Down
62 changes: 62 additions & 0 deletions scripts/ProteinCodingRare.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#!/usr/bin/perl
use strict;
use warnings;
use 5.010;
local $SIG{__WARN__} = sub {my $message =shift; die $message;};

# Author: Rajesh Patidar rajbtpatidar@gmail.com
# This scripts take 4 inputs (in the order listed)
# 1 a list of variants (chr\tstart\tend\ref\talt\totherInfo) which should be filtered out from the
# 2 a bed file of regions to be excluded from filtering out
# 3 file to work on, must be annotated using khanlab annotation-pipeline/ngs-pipeline
# 4 MAF filter (0.05) This remove any variant if present in any manoj/minor 1000g/ES/ExAC population.
#
#
open(BLACK, $ARGV[0]);
open(WHITE, $ARGV[1]);
open(IN, $ARGV[2]);
my $freq = $ARGV[3];
my %REMOVE;
while(<BLACK>){
chomp;
my @t =split("\t", $_);
$REMOVE{"$t[0]\t$t[1]\t$t[2]\t$t[3]\t$t[4]"} ="$t[5]";
}
close BLACK;
my %KEEP;
while(<WHITE>){
chomp;
my @t =split("\t", $_);
$KEEP{"$t[0]\t$t[1]\t$t[2]"} ="$t[3]";
}
while(<IN>){
chomp;
if($_ =~ /^Chr/){
print "$_\n";
next;
}
my @a = split ("\t", $_);
if (exists $REMOVE{"$a[0]\t$a[1]\t$a[2]\t$a[3]\t$a[4]"}){
next;
}
foreach my $key (keys %KEEP){
my @region= split("\t", $key);
if($a[0] =~ /^$region[0]$/ and $a[1] >=$region[1] and $a[1] <= $region[2]){
print "$_\n";
next;
}
}
#if(exists $KEEP{"$a[0]\t$a[1]\t$a[2]\t$a[3]\t$a[4]"}){
# print "$_\n";
# next;
#}
if($a[5] =~ /^exonic/ and ($a[8] =~ /nonsynonymous/ or $a[8] =~ /stop/ or $a[8] =~ /frameshift/) or $a[5] =~ /^splicing/){
for (@a[12..28]){
s/^\.$/-2/;
}
if( $a[12] <=$freq and $a[13] <=$freq and $a[14] <=$freq and $a[15] <=$freq and $a[16] <=$freq and $a[17] <=$freq and $a[18] <=$freq and $a[19] <=$freq and $a[20] <=$freq and $a[21] <=$freq and $a[22] <=$freq and $a[23] <=$freq and $a[24] <=$freq and $a[25] <=$freq and $a[26] <=$freq and $a[27] <=$freq and $a[28] <=$freq){
print "$_\n";
}
}
}
close IN;
29 changes: 7 additions & 22 deletions scripts/tsv2html.final.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,11 @@ Options:
-d Specify delimiter to look for, instead of "\t".
--version Pipeline version.
--head Treat first line as header, enclosing in <thead> and <th> tags.
--foot Treat last line as footer, enclosing in <tfoot> and <th> tags.
--Name To include in Mail
--Diagnosis To include in the mail body
Examples:
1. $(baselocation $0) --Name NCI00001 input.csv
Above will parse file 'input.csv' with comma as the field separator and
output HTML tables to STDOUT.
2. $(baselocation $0) -d '|' < input.psv > output.htm
Above will parse file "input.psv", looking for the pipe character as the
delimiter, then output results to "output.htm".
3. $(baselocation $0) -d '\t' --head --foot < input.tsv > output.htm
Above will parse file "input.tsv", looking for tab as the delimiter, then
process first and last lines as header/footer (that contain data labels), then
write output to "output.htm".
EOF
}

Expand All @@ -46,6 +27,10 @@ while true; do
shift
d="$1"
;;
--version)
shift
version="$1"
;;
--foot)
foot="-v ftr=1"
;;
Expand Down Expand Up @@ -121,7 +106,7 @@ echo "<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www
<body>
<p>
Hello,<br><br>
ngs-pipeline finished successfully on <b>$host</b><br><br>
ngs-pipeline version <b>$version</b> finished successfully on <b>$host</b><br><br>
Subject(s) Processed:<br><br>
Expand Down

0 comments on commit caa5115

Please sign in to comment.