Skip to content

Commit

Permalink
Merge branch 'gtr_substitution' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
priyappillai committed May 14, 2021
2 parents 7c8b01b + a5b226b commit 465e8be
Show file tree
Hide file tree
Showing 9 changed files with 514 additions and 35 deletions.
79 changes: 71 additions & 8 deletions adapt/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ def determine_representative_guides(self, start, guide_length,
representatives.add(gd)
return representatives

def compute_activity(self, start, gd_sequence, predictor):
def compute_activity(self, start, gd_sequence, predictor, mutator=None):
"""Compute activity between a guide sequence and every target sequence
in the alignment.
Expand All @@ -743,15 +743,45 @@ def compute_activity(self, start, gd_sequence, predictor):
start: start position in alignment at which to target
gd_sequence: str representing guide sequence
predictor: a adapt.utils.predict_activity.Predictor object
mutator: a adapt.utils.mutate.Mutator object
Returns:
numpy array x where x[i] gives the predicted activity between
gd_sequence and the sequence in the alignment at index i
gd_sequence and the sequence in the alignment at index i. If
mutator is not None, x[i] gives the predicted activity after the
mutations specified in the mutator.
"""
guide_length = len(gd_sequence)
assert start + guide_length <= self.seq_length

if isinstance(predictor, predict_activity.SimpleBinaryPredictor):
if mutator:
# Extract the target sequences, including context to use with
# prediction (i.e. the flanking sequences, if they exist)
left_context = 0
right_context = 0
if predictor:
if predictor.required_flanking_seqs[0]:
left_context = len(predictor.required_flanking_seqs[0])
if predictor.required_flanking_seqs[1]:
right_context = len(predictor.required_flanking_seqs[1])

aln_for_guide_with_context = self.extract_range(
start - left_context,
start + guide_length + right_context)
seq_rows_with_context = aln_for_guide_with_context.make_list_of_seqs()

# Start array of predicted activities; make this all 0s so that
# sequences for which activities are not computed (e.g., gap)
# have activity=0
activities = np.zeros(self.num_sequences)
for i, seq_with_context in enumerate(seq_rows_with_context):
activity = mutator.compute_mutated_activity(predictor,
seq_with_context,
gd_sequence,
start=start)
activities[i] = activity
return activities
# Do not use a model; just predict binary activity (1 or 0)
# based on distance between guide and targets
return predictor.compute_activity(start, gd_sequence, self)
Expand Down Expand Up @@ -785,12 +815,24 @@ def compute_activity(self, start, gd_sequence, predictor):
# is best to batch these
pairs_to_eval = []
pairs_to_eval_seq_idx = []
for seq_with_context, seq_idx in seq_rows_with_context:
pair = (seq_with_context, gd_sequence)
pairs_to_eval += [pair]
pairs_to_eval_seq_idx += [seq_idx]
# Evaluate activity
evals = predictor.compute_activity(start, pairs_to_eval)
evals = []

if mutator:
for seq_with_context, seq_idx in seq_rows_with_context:
activity = mutator.compute_mutated_activity(predictor,
seq_with_context,
gd_sequence,
start=start)
evals.append(activity)
pairs_to_eval_seq_idx.append(seq_idx)
else:
for seq_with_context, seq_idx in seq_rows_with_context:
pair = (seq_with_context, gd_sequence)
pairs_to_eval.append(pair)
pairs_to_eval_seq_idx.append(seq_idx)
# Evaluate activity
evals = predictor.compute_activity(start, pairs_to_eval)

for activity, seq_idx in zip(evals, pairs_to_eval_seq_idx):
# Fill in the activity for seq_idx
activities[seq_idx] = activity
Expand Down Expand Up @@ -860,6 +902,27 @@ def position_entropy(self):

return position_entropy

def base_percentages(self):
"""Determines the percentage of each base pair in the alignment.
Returns:
dictionary of base pair to its percentage in the alignment
"""
counts = {'A': 0, 'T': 0, 'C': 0, 'G': 0}
for i in range(self.seq_length):
for b in [self.seqs[i][j] for j in range(self.num_sequences)]:
if b in counts:
counts[b] += 1
elif b in guide.FASTA_CODES:
for c in guide.FASTA_CODES[b]:
counts[c] += 1.0 / len(guide.FASTA_CODES[b])
elif b != '-':
raise ValueError("Unknown base call %s" % b)
total = sum(counts.values())
for base in counts:
counts[base] /= total
return counts

@staticmethod
def from_list_of_seqs(seqs):
"""Construct a Alignment from aligned list of sequences.
Expand Down
2 changes: 1 addition & 1 deletion adapt/guide_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ def guide_set_activities_percentile(self, window_start, window_end,
"""Compute percentiles of activity across target sequences for
a guide set in a window.
For example, when percentiles is 0.5, this returns the median
For example, when percentiles is 50, this returns the median
activity across the target sequences that the guide set provides.
Args:
Expand Down
50 changes: 45 additions & 5 deletions adapt/target_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import logging
import math
import os
import numpy as np

from adapt import guide_search
from adapt import primer_search
Expand All @@ -28,7 +29,8 @@ def __init__(self, ps, gs, obj_type='min',
max_primers_at_site=None,
max_target_length=None, obj_weights=None,
only_account_for_amplified_seqs=False,
halt_early=False, obj_value_shift=None):
halt_early=False, obj_value_shift=None,
mutator=None):
"""
Args:
ps: PrimerSearcher object
Expand Down Expand Up @@ -62,6 +64,8 @@ def __init__(self, ps, gs, obj_type='min',
positive so that there is not confusion about differences
between positive/negative. If None (default), defaults are
set below based on obj_type.
mutator: a adapt.utils.mutate Mutator object. If None (default),
do not predict activity after mutations.
"""
self.ps = ps
self.gs = gs
Expand Down Expand Up @@ -99,6 +103,8 @@ def __init__(self, ps, gs, obj_type='min',
# negative, so shift all up so that most are positive
self.obj_value_shift = 4.0

self.mutator = mutator

def _find_primer_pairs(self):
"""Find suitable primer pairs using self.ps.
Expand All @@ -115,6 +121,33 @@ def _find_primer_pairs(self):
p2 = primers[j]
yield (p1, p2)

def _find_mutated_activity(self, targets):
"""Find the activities of the guides after mutation
For each target, find the activity of the best of the guide
set across each potential starting location after mutation,
averaged across the sequences.
Args:
targets: list of tuples (obj_value, target) where target is a tuple
((p1, p2), (guides_stats, guides))
Returns:
list of mutated activities, ordered by target's ordering.
"""
mutated_activities = [None] * len(targets)
for i, (obj_value, target) in enumerate(targets):
((p1, p2), (guides_stats, guides)) = target
mutated_activities[i] = max(max(np.average(
self.gs.aln.compute_activity(
start_pos,
guide,
self.gs.predictor,
self.mutator))
for start_pos in self.gs._selected_guide_positions[guide])
for guide in guides)
return mutated_activities

def find_targets(self, best_n=10, no_overlap='amplicon'):
"""Find targets across an alignment.
Expand Down Expand Up @@ -520,10 +553,12 @@ def find_and_write_targets(self, out_fn, best_n=10, no_overlap='amplicon'):
When 'none', many targets in the best_n may be very similar
"""
targets = self.find_targets(best_n=best_n, no_overlap=no_overlap)
if self.mutator:
mutated_activity = self._find_mutated_activity(targets)

with open(out_fn, 'w') as outf:
# Write a header
outf.write('\t'.join(['objective-value', 'target-start', 'target-end',
headers = ['objective-value', 'target-start', 'target-end',
'target-length',
'left-primer-start', 'left-primer-num-primers',
'left-primer-frac-bound', 'left-primer-target-sequences',
Expand All @@ -533,10 +568,13 @@ def find_and_write_targets(self, out_fn, best_n=10, no_overlap='amplicon'):
'guide-set-expected-activity',
'guide-set-median-activity', 'guide-set-5th-pctile-activity',
'guide-expected-activities',
'guide-target-sequences', 'guide-target-sequence-positions']) +
'\n')
'guide-target-sequences', 'guide-target-sequence-positions']
if self.mutator:
headers.append('guide-set-5th-pctile-mutated-activity')

outf.write('\t'.join(headers) + '\n')

for (obj_value, target) in targets:
for i, (obj_value, target) in enumerate(targets):
((p1, p2), (guides_stats, guides)) = target

# Break out guides_stats
Expand Down Expand Up @@ -585,6 +623,8 @@ def find_and_write_targets(self, out_fn, best_n=10, no_overlap='amplicon'):
guides_activity_median, guides_activity_5thpctile,
expected_activities_per_guide_str,
guides_seqs_str, guides_positions_str]
if self.mutator:
line.append(mutated_activity[i])

outf.write('\t'.join([str(x) for x in line]) + '\n')

Expand Down
50 changes: 38 additions & 12 deletions adapt/tests/test_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,33 +572,59 @@ def test_position_entropy_simple(self):
'AAAAA']
aln = alignment.Alignment.from_list_of_seqs(seqs)
all_ps = [[1],
[0.25, 0.75],
[0.25, 0.25, .5],
[0.25, 0.75],
[0.25, 0.25, .5],
[0.25, 0.25, 0.25, 0.25],
[0.5, 0.5]]

entropy = [sum([-p*log2(p) for p in ps]) for ps in all_ps]
self.assertEqual(aln.position_entropy(),
entropy)
self.assertEqual(aln.position_entropy(), entropy)

def test_position_entropy_with_ambiguity(self):
seqs = ['MRWSYKVHDBN']
seqs = ['MRWSYKVHDBN-']
aln = alignment.Alignment.from_list_of_seqs(seqs)
all_ps = [[0.5, 0.5],
[0.5, 0.5],
[0.5, 0.5],
[0.5, 0.5],
[0.5, 0.5],
[0.5, 0.5],
[1/3.0, 1/3.0, 1/3.0],
[1/3.0, 1/3.0, 1/3.0],
[1/3.0, 1/3.0, 1/3.0],
[1/3.0, 1/3.0, 1/3.0],
[0.25, 0.25, 0.25, 0.25]]
[1/3.0, 1/3.0, 1/3.0],
[1/3.0, 1/3.0, 1/3.0],
[1/3.0, 1/3.0, 1/3.0],
[1/3.0, 1/3.0, 1/3.0],
[0.25, 0.25, 0.25, 0.25],
[1]]

entropy = [sum([-p*log2(p) for p in ps]) for ps in all_ps]
self.assertEqual(aln.position_entropy(),
entropy)
self.assertEqual(aln.position_entropy(), entropy)

def test_base_percentages_simple(self):
seqs = ['ACCCC',
'AAGGC',
'AAATA',
'AAAAA']
aln = alignment.Alignment.from_list_of_seqs(seqs)
base_p = {
'A': 0.6,
'C': 0.25,
'G': 0.1,
'T': 0.05
}

self.assertEqual(aln.base_percentages(), base_p)

def test_base_percentages_with_ambiguity(self):
seqs = ['MRWSYKVHDBN-']
aln = alignment.Alignment.from_list_of_seqs(seqs)
base_p = {
'A': 0.25,
'C': 0.25,
'G': 0.25,
'T': 0.25
}

self.assertEqual(aln.base_percentages(), base_p)

def test_construct_from_0_seqs(self):
with self.assertRaises(Exception):
Expand Down
20 changes: 19 additions & 1 deletion adapt/tests/test_target_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from adapt import guide_search
from adapt import primer_search
from adapt import target_search
from adapt.utils import lsh
from adapt.utils import lsh, mutate, predict_activity

__author__ = 'Hayden Metsky <hayden@mit.edu>'

Expand Down Expand Up @@ -339,6 +339,24 @@ def test_find_targets_allowing_overlap_maximize_random_greedy(self):
'random-greedy', 1, 3, 100, check_for_one_guide=True,
check_for_no_targets=True)

def test_find_mutated_activity(self):
mutator = mutate.GTRSubstitutionMutator(self.a_aln, 1, 1, 1, 1, 1, 1,
1, 1, 1)
predictor = predict_activity.SimpleBinaryPredictor(
6, False, required_flanking_seqs=('A', None))
c_ps = primer_search.PrimerSearcher(self.a_aln, 4, 0, 1.0, (1, 1, 100))
c_gs = guide_search.GuideSearcherMaximizeActivity(
self.a_aln, 6, 1, 5, 0.05, (1, 1, 100), algorithm='greedy',
predictor=predictor)
c = target_search.TargetSearcher(c_ps, c_gs, max_primers_at_site=2,
obj_type='max', mutator=mutator)
for best_n in [1, 2, 3, 4, 5, 6]:
targets = c.find_targets(best_n=best_n, no_overlap='none')
mut_activities = c._find_mutated_activity(targets)
self.assertEqual(len(mut_activities), best_n)
for mut_activity in mut_activities:
self.assertIsInstance(mut_activity, float)

def tearDown(self):
# Re-enable logging
logging.disable(logging.NOTSET)
Loading

0 comments on commit 465e8be

Please sign in to comment.