-
Notifications
You must be signed in to change notification settings - Fork 4
/
VelikaVini
executable file
·185 lines (163 loc) · 6.11 KB
/
VelikaVini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
source $vini_dir/globals
> $WORKDIR/missing_genes
> $WORKDIR/failed_predictions
> $WORKDIR/SLEM_values #delete SLEM values from the previous run
rm -f $WORKDIR/slem*
rm -f $WORKDIR/dock*
rm -f $WORKDIR/postproc*
rm -f $WORKDIR/SLEM*
rm -f $vini_dir/genes/cosmic_ids.csv
nr_ligands=`wc -l < $vini_dir/ligands/ligands_list`
#nr_complexes=`cat $WORKDIR/nr_complexes`
data="_data"
mkdir -p $vini_dir/database/KEGG_cancer_pathways
mkdir -p $WORKDIR/${cancer_type}_data #folder with an intermediate data
grep $cancer_type $vini_dir/database/cross_references > tmp
line=`cat tmp`
tissue=`echo $line | awk '{print $2}'`
mkdir -p $vini_dir/database/KEGG_cancer_pathways/$CANCER_PATHWAY
sh download_KEGG_pathway
echo "Processing" $CANCER_PATHWAY "metabolic pathway"
start_date=`date`
echo "Analysis started at ${start_date} "
rm -f $WORKDIR/END
sh $vini_dir/create_relations_receptors_files #create KEGG receptor structures
sh $vini_dir/create_receptor_structures
if [ -s $WORKDIR/missing_genes ]
then
echo "Cannot continue as the following KEGG genes are missing entry in Uniprot:"
cat $WORKDIR/missing_genes
echo "Find their fasta sequences at https://www.ncbi.nlm.nih.gov/ and predict their structures."
echo "Then upload structures to $vini_dir/database/genes/pdb_files directory and start Vini again."
fi
if [ -s $WORKDIR/failed_pubchem_structures ]
then
echo "Cannot continue as Openbabel failed sdf to pdb conversion for the following KEGG substances:"
cat $WORKDIR/failed_pubchem_structures
echo "You may try to convert them with Cactus server: https://cactus.nci.nih.gov/translate/"
echo "Then upload these strucures to $vini_dir/database/genes/pdb_file directory and start Vini again."
echo "Exiting." ; exit
fi
if [ ! -e $vini_dir/genes/${cell_line} ]
then
mkdir $vini_dir/genes/${cell_line}
fi
rm -f $vini_dir/genes/cosmic_ids.csv
rm -f $vini_dir/genes/expressions/* $vini_dir/genes/mutations/* $vini_dir/genes/sequences/*
if [ ! -e $vini_dir/ligands/${cell_line} ]
then
echo -n "Performing cleanup..."
while read -r line
do
drug=`echo $line | awk -F',' '{print $1}'`
type=`echo $line | awk -F',' '{print $2}'`
if [ $type == P ]
then
echo -n "."
rm -f $WORKDIR/AlphaFold_${drug}
rm -f $WORKDIR/${drug}.err $WORKDIR/${drug}.out
rm -rf $WORKDIR/${drug}
fi
done < $vini_dir/ligands/ligands_list
mkdir -p $vini_dir/ligands/${cell_line}
echo "Predicting mAb drug structures with AlphaFold. May take up to 2 days, do not interrupt!."
sh $vini_dir/predict_protein_drug_structures
sh $vini_dir/wait_until_jobs_finish
fi
cp -r $vini_dir/ligands/${cell_line} $vini_dir/ligands/pdb_files
if [ $cosmic == y ]
then
echo "Downloading the data from Cosmic, do not interrupt!"
sh $vini_dir/download_Cosmic_data
rm -f $WORKDIR/*err $WORKDIR/*out
sh $vini_dir/predict_mutated_genes
fi
echo -n "Adding gene expressions to receptors and relations..."
sh add_cell_expressions_to_receptors
sh add_expressions_to_relations
echo "done."
if [ $kit == y ]
then
while read -r line
do
uniprotID=`echo $line | awk '{print $2}'`
expression=`echo $line | awk '{print $3}'`
echo $uniprotID
echo $expression
grep $uniprotID $WORKDIR/receptors_contracted > tmp
if [ -s tmp ]
then
echo "gene found"
else
echo "gene not found"
fi
done < $vini_dir/database/genes/custom_genes
fi
while read -r line #Cleanup
do
receptor=`echo $line | awk '{print $3}'`
state=`echo $line | awk '{print $4}'`
if [ $state == W ]
then
echo -n "."
rm -f $WORKDIR/AlphaFold_${receptor}
rm -f $WORKDIR/${receptor}.err $WORKDIR/${receptor}.out
rm -rf $WORKDIR/${receptor}
fi
done < $WORKDIR/receptors_contracted ; echo "done."
j=1
mkdir -p $WORKDIR/${cancer_type}_data #prepare receptors
cp $ROSETTA_TOOLS/amino_acids.py ./
cp $ROSETTA_PUB/clean_pdb_keep_ligand.py ./
source $INSTALL/miniconda2/bin/activate
echo "Copying receptor structures to $WORKDIR/${cancer_type}_data area."
while read -r line
do
printf -v i "%03d" $j
gene=`echo $line | awk '{print $3}'` #get receptor name
cp $vini_dir/genes/pdb_files/${gene}.pdb ./
grep COMPND ${gene}.pdb > tmp
if [ ! -s tmp ]
then
grep ATOM ${gene}.pdb > tmp.pdb
python clean_pdb_keep_ligand.py tmp.pdb -ignorechain
mv tmp.pdb_00.pdb ${gene}.pdb
chain=`head -1 ${gene}.pdb | awk '{print $5}'`
if [ $chain != A ]
then
java -jar $vini_dir/PDBChainNameSupstitutor-1.0-SNAPSHOT-shaded.jar ${gene}.pdb $chain A
mv ${gene}.pdb_${chain}_to_A.pdb ${gene}.pdb
fi
fi
grep -v TER ${gene}.pdb > tmp #adjust PDB format
echo "TER " >> tmp
mv tmp $WORKDIR/${cancer_type}_data/complex_${i}.pdb
rm ${gene}.pdb
let j++
done < $WORKDIR/receptors_contracted
conda deactivate
i=1
rm -f $WORKDIR/ligands_stage/*
while read -r line #Copy ligand files to the stage
do
drug=`echo $line | awk -F',' '{print $1}'`
type=`echo $line | awk -F',' '{print $2}'`
printf -v lig_index "%03d" $i
echo "Copying "${drug}" structure to the stagging area."
if [ $type == S ]
then
cp $vini_dir/ligands/pdb_files/"${drug}".pdb $WORKDIR/ligands_stage/ligand_${lig_index}.pdb
cp $vini_dir/ligands/pdbqt_files/"$drug".pdbqt $WORKDIR/ligands_stage/ligand_${lig_index}.pdbqt
else
cp $vini_dir/ligands/pdb_files/"${drug}".pdb $WORKDIR/ligands_stage/ligand_${lig_index}.pdb
fi
let i++
done < $vini_dir/ligands/ligands_list
nr_complexes=`wc -l $WORKDIR/receptors_contracted | awk '{ print $1 }'`
echo $nr_complexes > $WORKDIR/nr_complexes
for (( therapy_level=1; therapy_level<=max_therapy_level; therapy_level++ ))
do
echo $therapy_level > $vini_dir/therapy_level
sh $vini_dir/malavini
sh $vini_dir/wait_until_jobs_finish
done