-
Notifications
You must be signed in to change notification settings - Fork 1
/
parameters_example.txt
196 lines (158 loc) · 13.5 KB
/
parameters_example.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#Last edited on 03/15/2017 by Eric Reed
#########################################################################################################
# Standard parameters
#########################################################################################################
#The parameters in this first section change the most, and will need to be adjusted for each run
#The second section includes the parameters for the cluster / parallelization, which only need to be set once
#The others sections include mostly default values that can be adjusted, but work just fine as they are.
#####working directory where a results and report directory will be created
working_dir = /restricted/projectnb/montilab-p/projects/pipeline_dev/unit_tests/human_paired_end/
#####file that contains 2 (for single end) or 3 (for paired end) columns with fastq | [fastq2] | stub - see example in the repository
raw_filenames = /restricted/projectnb/montilab-p/projects/pipeline_dev/data/paired_end/raw_filenames_10K.txt
#####paired end reads?
paired = TRUE #are there paired end reads or single strand reads
stranded = no #(yes/no/reverse) is strand information available for each read
#####Aligner and Indizes for alignment
aligner = star #right now only 'skip', 'star', 'bowtie2', and 'tophat' are allowed
tophat_index = /restricted/projectnb/montilab-p/CBMrepositoryData/annot/hg19_ensembl_GRCh37_tophat/Ensembl/GRCh37/Sequence/Bowtie2Index/genome
star_index = /restricted/projectnb/montilab-p/CBMrepositoryData/annot/hg19_ensembl_GRCh37_tophat/Ensembl/GRCh37/Sequence/star_index/
bowtie2_index = /restricted/projectnb/montilab-p/CBMrepositoryData/annot/hg19_ensembl_GRCh37_tophat/Ensembl/GRCh37/Sequence/Bowtie2Index/genome
#####genome and genome annotation
genome_annotation_gft = /restricted/projectnb/montilab-p/CBMrepositoryData/annot/hg19_ensembl_GRCh37_tophat/Ensembl/GRCh37/Annotation/Archives/archive-2014-05-23-16-03-55/Genes/genes.gtf
genome = /restricted/projectnb/montilab-p/CBMrepositoryData/annot/hg19_ensembl_GRCh37_tophat/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa
#####the adapters here specified work for Illumina TrueSeq if you need others check at https://github.com/marcelm/cutadapt/blob/master/README.md
skip_trimming = FALSE
cutadapt_first_adapter = AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC
cutadapt_second_adapter = AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
#####Option to run only the initial fastqc, then clip the standard illumina adapters and finally run another fastqc used to check if all adapters were trimmed before running the actual alignment
QC_and_trim_only = FALSE
#####choose which of the programs you want to run to quantify reads
run_cufflinks = TRUE
run_htseq = TRUE
run_featureCount = TRUE
####remove intermediate files? (This will removes all large intermediate files except for bam files)
remove_intermediate = TRUE
#########################################################################################################
# Cluster parameters
#########################################################################################################
#This section includes all cluster specific parameters. They need to be set once when installing the pipeline and can then be reused as is.
run_single_cpu = FALSE #this will circumvent the parallelization and run all samples on a single node
#qsub parameters
qsub_email = name@bu.edu
qsub_send_email = FALSE
qsub_memory = 4g
qsub_PROJECT = gsc-p #in some qsub systems you need to specify a project name (-P) for the purpose of accounting
qsub_MACHINE = scc
qsub_RUNTIME_LIMIT = 96:00:00
qsub_wait_time = 10
qsub_num_processors = 4-8
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#####################################END OF STANDARD PARAMETERS##########################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
#########################################################################################################
# General Hydra related parameters
#########################################################################################################
#should everything also be printed on the command line
verbose = TRUE
#different ways to run the pipeline
clean_run = TRUE #start pipeline from scratch or try to pick up at the last step that was performed successfully
ask_before_deleting = TRUE #pipeline will ask before deleting any file. If you submit the pipeline as a batch job you want to set this to FALSE
run_failed_from_scratch = FALSE #this function will rerun all samples that failed from scratch
#Does the raw_filenames file have a header
raw_file_header = TRUE
#Are the rawfiles provided as zips?
zipped_fastq = TRUE
############################################################################################################################################
# CUTADAPT
############################################################################################################################################
cutadapt_exec = cutadapt
cutadapt_python_version = python
cutadapt_m = 20 #(20)minimum length of read (if the read is smaller than that it will be removed)
cutadapt_q_end = 20 #(20)minimum quality threshold for the end of the read, if it's below the threshold the read is trimmed
cutadapt_q_start = 20 #(20)minimum quality threshold for the beginning of the read
cutadapt_quality = 33 #(default 33) - if phred quality score is based 64 adjust accordingly
cutadapt_u = 0 #force removal of first X bases (or laste -X bases)
############################################################################################################################################
#FAST QC
############################################################################################################################################
fastqc_exec = fastqc
include_full_fastqc_report = TRUE #Flag that controls whether the full reports should be included. Each report is around 4mb, so for larger datasets this blows up the report considerably
remove_failed = TRUE #do not process samples that fail the per base sequence quality after trimming and adapter trimming
############################################################################################################################################
#TOPHAT
############################################################################################################################################
tophat_exec = tophat2
#The tophat index refers to the prefix of a bowtie2 index that must be built prior to running tophat2
tophat_qual = none # can be none, --solexa-quals or --solexa1.3-quals
tophat_N = 2 #--read-mismatches #Default is 2
tophat_gap_length = 2 #Default is 2
tophat_edit_dist = 2 #Default is 2
mate_inner_dist = 50
mate_std_dev = 20
######################################################################################
#STAR ALIGNER
######################################################################################
star_exec = STAR
#optional parameters (do not change unless you've read the documentation):
outFilterType = BySJout #default is BySJout
outFilterMultimapNmax = 20 #max number of multiple alignments allowed (def: 20)
alignSJoverhangMin = 8 #minimum overhang for unannotated junctions (def: 8)
alignSJDBoverhangMin = 1 #minimum overhang for annotated junctions (def: 1)
outFilterMismatchNmax = 999 #max number of mismatches per pair (def: 999 - inactive)
outFilterMismatchNoverLmax = 0.04 #max number of mismatches per pair relative to the read length(def: 0.04 - 8 for paired reads with length 100)
alignIntronMin = 20 #minimum intron length
alignIntronMax = 1000000 #maximum intron length
alignMatesGapMax = 1000000 #maximum genomic distance between mates
outputSAMtype = BAM_SortedByCoordinate #Can be BAM_SortedByCoordinate or BAM_unsorted
######################################################################################
##BOWTIE ALIGNER
#######################################################################################
bowtie2_exec = bowtie2
#optional parameters (do not change unless you've read the documentation):
bowtie2_type = --local #--end-to-end or --local
bowtie2_N = 0 #Number of mismatches allowed (0 or 1)
bowtie2_D = 15 #Give up extending after <int> failed extends in a row
bowtie2_R = 2 #For reads w/ repetitive seeds, try <int> sets of seeds
bowtie2_L = 20 #length of seed substrings; must be >3, <32
bowtie2_i = S,1,0.75 #interval between seed substrings w/r/t read len
bowtie2_rdg = 5,3
bowtie2_rfg = 5,3
############################################################################################################################################
#BAMQC
############################################################################################################################################
bamqc_exec = python
bamqc_script = run_bamqc.py
############################################################################################################################################
#CUFFLINKS
############################################################################################################################################
cufflinks_exec = cufflinks #on SCC we don't need to specify the location of the executable we just need to load the proper module
cufflinks_compatible_hits = inactive #Cufflinks counts only those fragments compatible with some reference transcript towards the number of mapped hits used in the FPKM denominator. (can be 'active' to turn it on)
#The following parameter all normalize the counts during the cufflinks run. However, if you intend to run edgeR afterwards you need raw counts not fpkm
cufflinks_total_hits = active #create FPKM (can be 'active' for turning this option on or anything else to turn it off)
cufflinks_N = active #upper quantile normalization (can be 'active' for turning this option on or anything else to turn it off)
cufflinks_u = active #multi read correct (can be 'active' for turning this option on or anything else to turn it off)
############################################################################################################################################
#HT-Seq
############################################################################################################################################
HTSeq_exec = htseq-count
sam_exec = samtools #To run samtools view and pipe into htseq-count
HTSeq_t = exon
HTSeq_m = intersection-nonempty #union, intersection-strict and intersection-nonempty
HTSeq_id = gene_id #Default, for most gtf files: gene_id
HTSeq_r = pos #ordering of the alignment file, can be 'pos' or 'name'
Rscript_exec = Rscript
############################################################################################################################################
#featureCount
############################################################################################################################################
featureCount_exec = featureCounts
featureCount_t = exon #Feature level over which read summarization will be performed. 'exon' by default
featureCount_id = gene_id #Attribute type used to group features into meta-features (e.g. genes), when GTF file is provided. 'gene_id' by default (usually 'gene_id' for most annotation files)
featureCount_by_meta = TRUE #Set this to true if you want to assemble feature counts to their meta-feature specified using the 'featureCount_id' parameter. If FALSE, will generate counts per feature specified using the featureCount_t parameter
Rscript_exec = Rscript