-
Notifications
You must be signed in to change notification settings - Fork 0
/
orf.py
49 lines (39 loc) · 1.06 KB
/
orf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @File : orf.py
# @Date : 2019-02-17
# @Author : luyang(luyang@novogene.com)
import re
from Bio.Seq import Seq
from module.readfasta import readfasta
def find_orf(seq):
position = []
for i in pattern.finditer(seq):
start_position = i.start()
j = start_position
while j < len(seq):
mrna = seq[j:j + 3]
if mrna in stop_codons:
flag = 1
break
else:
flag = 0
j += 3
if flag:
tmp = Seq(seq[start_position:j + 3]).translate(stop_symbol='')
prot[str(tmp)] = ''
def main():
file = 'input/rosalind_orf.txt'
reads = readfasta(file)
for name in reads:
seq = reads[name]
find_orf(seq)
seq = str(Seq(reads[name]).reverse_complement())
find_orf(seq)
for key in prot.keys():
print(key)
if __name__ == "__main__":
pattern = re.compile('(?=(ATG))')
stop_codons = ['TAG', 'TGA', 'TAA']
prot = {}
main()