Skip to content

Commit

Permalink
BUG: allow metadata to be a superset of the table
Browse files Browse the repository at this point in the history
  • Loading branch information
wasade committed Jan 16, 2024
1 parent c9fe7b7 commit bba25a2
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 23 deletions.
23 changes: 14 additions & 9 deletions q2_katharoseq/_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,22 +109,30 @@ def read_count_threshold(
positive_control_column = positive_control_column.to_series()
cell_count_column = cell_count_column.to_series()

# # FILTER COLUMNS
if set(positive_control_column.index) != set(cell_count_column.index):
raise ValueError("Columns have different samples")

md_samples = set(positive_control_column.index)
tab_samples = set(table.index)
if not md_samples.issuperset(tab_samples):
raise ValueError("Table contains samples not in metadata")

# overlap to what's present in the feature table
positive_control_column = positive_control_column.loc[list(tab_samples)]
cell_count_column = cell_count_column.loc[list(tab_samples)]

# FILTER COLUMNS
positive_controls = positive_control_column[
positive_control_column == positive_control_value]

if not positive_controls.shape[0]:
raise ValueError('No positive controls found in ' +
'positive control column.')
'positive control column which are in the table.')
positive_controls = pd.Series(positive_controls)

cell_counts = cell_count_column.loc[positive_controls.index]

# # CHECK SHAPES
inds = positive_controls.index.intersection(table.index)
print(inds)
if len(inds) == 0:
raise KeyError('No positive controls found in table.')
table_positive = table.loc[inds]

if threshold > 100 or threshold < 0:
Expand All @@ -135,9 +143,6 @@ def read_count_threshold(
# READS IN HIGHEST INPUT SAMPLE
max_cell_counts = cell_counts.idxmax()

if max_cell_counts not in df.index.values:
raise KeyError('No positive controls found in table.')

max_input = df.loc[max_cell_counts]
max_inputT = max_input.T
max_inputT = max_inputT.sort_values(ascending=False).head(10)
Expand Down
17 changes: 3 additions & 14 deletions q2_katharoseq/tests/test_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,23 +163,12 @@ def test_no_positive_controls_in_col(self):
self.control)

def test_no_positive_controls_in_table(self):

ind = pd.Index(
['s5', 's6', 's7', 's8'],
name='sampleid')
table = pd.DataFrame(
[[0, 1, 2, 3],
[0, 1, 2, 3],
[5, 4, 3, 2],
[7, 2, 3, 4]],
index=ind, # change index
columns=['f1', 'f2', 'f3', 'f4'])
table = self.table.loc[['s2', 's4']]

with tempfile.TemporaryDirectory() as output_dir, \
self.assertRaisesRegex(
KeyError,
'No positive controls found '
'in table.'):
ValueError,
'No positive controls found'):

read_count_threshold(
output_dir,
Expand Down

0 comments on commit bba25a2

Please sign in to comment.