Skip to content

Commit

Permalink
fixed postfix
Browse files Browse the repository at this point in the history
  • Loading branch information
Akaud committed Aug 21, 2024
1 parent 332e61c commit b7daca3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
6 changes: 3 additions & 3 deletions api/data/refactoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def add_g_position_to_gnomad(gnomad):
gnomAD dataframe. This function modifies it in-place.
"""
gnomad[['chromosome', 'position', 'ref', 'alt']] = gnomad['gnomAD ID'].str.split('-', expand=True)
gnomad['hg38_gnomAD'] = 'g.' + gnomad['position'] + gnomad['ref'] + '>' + gnomad['alt']
gnomad['hg38'] = 'g.' + gnomad['position'] + gnomad['ref'] + '>' + gnomad['alt']
gnomad.drop(columns=['chromosome', 'position', 'ref', 'alt'], inplace=True)


Expand All @@ -203,14 +203,14 @@ def merge_gnomad_lovd(lovd, gnomad):
"""

add_g_position_to_gnomad(gnomad)
gnomad.columns = [col + '_gnomad' if col != 'hg38_gnomAD' else col for col in gnomad.columns]
gnomad.columns = [col + '_gnomad' for col in gnomad.columns]

main_frame = pd.merge(
lovd,
gnomad,
how="outer",
left_on="VariantOnGenome/DNA/hg38",
right_on="hg38_gnomAD")
right_on="hg38_gnomad")

return main_frame

Expand Down
14 changes: 7 additions & 7 deletions tests/pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3996,10 +3996,10 @@
},
{
"data": {
"text/plain": " id transcriptid effectid position_c_start \\\n0 170936 7329 90 -538 \n1 235579 7329 99 -332 \n2 235593 7329 99 1300 \n3 235595 7329 99 1300 \n4 235603 7329 99 6572 \n... ... ... ... ... \n13272 822052 7329 70 1767 \n13273 822775 7329 70 0 \n13274 822785 7329 70 0 \n13275 822816 7329 70 0 \n13276 867648 7329 70 0 \n\n position_c_start_intron position_c_end position_c_end_intron \\\n0 0 1599 1 \n1 -1 748 1 \n2 -1 1459 1 \n3 -1 1459 1 \n4 -1 6725 1 \n... ... ... ... \n13272 -1 2023 1 \n13273 0 0 0 \n13274 0 0 0 \n13275 0 0 0 \n13276 0 0 0 \n\n VariantOnTranscript/DNA VariantOnTranscript/RNA \\\n0 c.(?_-538)_(1599+1_1600-1)del r.0? \n1 c.(-333+1_-332-1)_(748+1_749-1)del r.? \n2 c.(1299+1_1300-1)_(1459+1_1460-1)del r.? \n3 c.(1299+1_1300-1)_(1459+1_1460-1)del r.(?) \n4 c.(6571+1_6572-1)_(6725+1_6726-1)del r.? \n... ... ... \n13272 c.(1766+1_1767-1)_(2023+1_2024-1)del r.spl \n13273 c.? r.(?) \n13274 c.? r.(?) \n13275 c.? r.(?) \n13276 c.? r.(?) \n\n VariantOnTranscript/Protein VariantOnTranscript/Exon \\\n0 p.0? _1_10i \n1 p.? 2i_4i \n2 p.? 8i_9i \n3 p.? 8i_9i \n4 p.(Ser2191Thrfs*14) 32i_33i \n... ... ... \n13272 p.(?) \n13273 p.(Tyr2555fs) \n13274 p.(Asp498fs) \n13275 p.(Gln3101fs) \n13276 p.? \n\n VariantOnGenome/DNA/hg38 gnomAD ID_gnomad hg38_gnomAD \n0 <NA> <NA> \n1 <NA> <NA> \n2 <NA> <NA> \n3 <NA> <NA> \n4 <NA> <NA> \n... ... ... ... \n13272 g.? <NA> <NA> \n13273 g.? <NA> <NA> \n13274 g.? <NA> <NA> \n13275 g.? <NA> <NA> \n13276 g.? <NA> <NA> \n\n[13277 rows x 14 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>transcriptid</th>\n <th>effectid</th>\n <th>position_c_start</th>\n <th>position_c_start_intron</th>\n <th>position_c_end</th>\n <th>position_c_end_intron</th>\n <th>VariantOnTranscript/DNA</th>\n <th>VariantOnTranscript/RNA</th>\n <th>VariantOnTranscript/Protein</th>\n <th>VariantOnTranscript/Exon</th>\n <th>VariantOnGenome/DNA/hg38</th>\n <th>gnomAD ID_gnomad</th>\n <th>hg38_gnomAD</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>170936</td>\n <td>7329</td>\n <td>90</td>\n <td>-538</td>\n <td>0</td>\n <td>1599</td>\n <td>1</td>\n <td>c.(?_-538)_(1599+1_1600-1)del</td>\n <td>r.0?</td>\n <td>p.0?</td>\n <td>_1_10i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>1</th>\n <td>235579</td>\n <td>7329</td>\n <td>99</td>\n <td>-332</td>\n <td>-1</td>\n <td>748</td>\n <td>1</td>\n <td>c.(-333+1_-332-1)_(748+1_749-1)del</td>\n <td>r.?</td>\n <td>p.?</td>\n <td>2i_4i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>2</th>\n <td>235593</td>\n <td>7329</td>\n <td>99</td>\n <td>1300</td>\n <td>-1</td>\n <td>1459</td>\n <td>1</td>\n <td>c.(1299+1_1300-1)_(1459+1_1460-1)del</td>\n <td>r.?</td>\n <td>p.?</td>\n <td>8i_9i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>3</th>\n <td>235595</td>\n <td>7329</td>\n <td>99</td>\n <td>1300</td>\n <td>-1</td>\n <td>1459</td>\n <td>1</td>\n <td>c.(1299+1_1300-1)_(1459+1_1460-1)del</td>\n <td>r.(?)</td>\n <td>p.?</td>\n <td>8i_9i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>4</th>\n <td>235603</td>\n <td>7329</td>\n <td>99</td>\n <td>6572</td>\n <td>-1</td>\n <td>6725</td>\n <td>1</td>\n <td>c.(6571+1_6572-1)_(6725+1_6726-1)del</td>\n <td>r.?</td>\n <td>p.(Ser2191Thrfs*14)</td>\n <td>32i_33i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>13272</th>\n <td>822052</td>\n <td>7329</td>\n <td>70</td>\n <td>1767</td>\n <td>-1</td>\n <td>2023</td>\n <td>1</td>\n <td>c.(1766+1_1767-1)_(2023+1_2024-1)del</td>\n <td>r.spl</td>\n <td>p.(?)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13273</th>\n <td>822775</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.(Tyr2555fs)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13274</th>\n <td>822785</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.(Asp498fs)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13275</th>\n <td>822816</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.(Gln3101fs)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13276</th>\n <td>867648</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.?</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n </tbody>\n</table>\n<p>13277 rows × 14 columns</p>\n</div>"
"text/plain": " id transcriptid effectid position_c_start \\\n0 170936 7329 90 -538 \n1 235579 7329 99 -332 \n2 235593 7329 99 1300 \n3 235595 7329 99 1300 \n4 235603 7329 99 6572 \n... ... ... ... ... \n13272 822052 7329 70 1767 \n13273 822775 7329 70 0 \n13274 822785 7329 70 0 \n13275 822816 7329 70 0 \n13276 867648 7329 70 0 \n\n position_c_start_intron position_c_end position_c_end_intron \\\n0 0 1599 1 \n1 -1 748 1 \n2 -1 1459 1 \n3 -1 1459 1 \n4 -1 6725 1 \n... ... ... ... \n13272 -1 2023 1 \n13273 0 0 0 \n13274 0 0 0 \n13275 0 0 0 \n13276 0 0 0 \n\n VariantOnTranscript/DNA VariantOnTranscript/RNA \\\n0 c.(?_-538)_(1599+1_1600-1)del r.0? \n1 c.(-333+1_-332-1)_(748+1_749-1)del r.? \n2 c.(1299+1_1300-1)_(1459+1_1460-1)del r.? \n3 c.(1299+1_1300-1)_(1459+1_1460-1)del r.(?) \n4 c.(6571+1_6572-1)_(6725+1_6726-1)del r.? \n... ... ... \n13272 c.(1766+1_1767-1)_(2023+1_2024-1)del r.spl \n13273 c.? r.(?) \n13274 c.? r.(?) \n13275 c.? r.(?) \n13276 c.? r.(?) \n\n VariantOnTranscript/Protein VariantOnTranscript/Exon \\\n0 p.0? _1_10i \n1 p.? 2i_4i \n2 p.? 8i_9i \n3 p.? 8i_9i \n4 p.(Ser2191Thrfs*14) 32i_33i \n... ... ... \n13272 p.(?) \n13273 p.(Tyr2555fs) \n13274 p.(Asp498fs) \n13275 p.(Gln3101fs) \n13276 p.? \n\n VariantOnGenome/DNA/hg38 gnomAD ID_gnomad hg38_gnomad \n0 <NA> <NA> \n1 <NA> <NA> \n2 <NA> <NA> \n3 <NA> <NA> \n4 <NA> <NA> \n... ... ... ... \n13272 g.? <NA> <NA> \n13273 g.? <NA> <NA> \n13274 g.? <NA> <NA> \n13275 g.? <NA> <NA> \n13276 g.? <NA> <NA> \n\n[13277 rows x 14 columns]",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>id</th>\n <th>transcriptid</th>\n <th>effectid</th>\n <th>position_c_start</th>\n <th>position_c_start_intron</th>\n <th>position_c_end</th>\n <th>position_c_end_intron</th>\n <th>VariantOnTranscript/DNA</th>\n <th>VariantOnTranscript/RNA</th>\n <th>VariantOnTranscript/Protein</th>\n <th>VariantOnTranscript/Exon</th>\n <th>VariantOnGenome/DNA/hg38</th>\n <th>gnomAD ID_gnomad</th>\n <th>hg38_gnomad</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>170936</td>\n <td>7329</td>\n <td>90</td>\n <td>-538</td>\n <td>0</td>\n <td>1599</td>\n <td>1</td>\n <td>c.(?_-538)_(1599+1_1600-1)del</td>\n <td>r.0?</td>\n <td>p.0?</td>\n <td>_1_10i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>1</th>\n <td>235579</td>\n <td>7329</td>\n <td>99</td>\n <td>-332</td>\n <td>-1</td>\n <td>748</td>\n <td>1</td>\n <td>c.(-333+1_-332-1)_(748+1_749-1)del</td>\n <td>r.?</td>\n <td>p.?</td>\n <td>2i_4i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>2</th>\n <td>235593</td>\n <td>7329</td>\n <td>99</td>\n <td>1300</td>\n <td>-1</td>\n <td>1459</td>\n <td>1</td>\n <td>c.(1299+1_1300-1)_(1459+1_1460-1)del</td>\n <td>r.?</td>\n <td>p.?</td>\n <td>8i_9i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>3</th>\n <td>235595</td>\n <td>7329</td>\n <td>99</td>\n <td>1300</td>\n <td>-1</td>\n <td>1459</td>\n <td>1</td>\n <td>c.(1299+1_1300-1)_(1459+1_1460-1)del</td>\n <td>r.(?)</td>\n <td>p.?</td>\n <td>8i_9i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>4</th>\n <td>235603</td>\n <td>7329</td>\n <td>99</td>\n <td>6572</td>\n <td>-1</td>\n <td>6725</td>\n <td>1</td>\n <td>c.(6571+1_6572-1)_(6725+1_6726-1)del</td>\n <td>r.?</td>\n <td>p.(Ser2191Thrfs*14)</td>\n <td>32i_33i</td>\n <td></td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>13272</th>\n <td>822052</td>\n <td>7329</td>\n <td>70</td>\n <td>1767</td>\n <td>-1</td>\n <td>2023</td>\n <td>1</td>\n <td>c.(1766+1_1767-1)_(2023+1_2024-1)del</td>\n <td>r.spl</td>\n <td>p.(?)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13273</th>\n <td>822775</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.(Tyr2555fs)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13274</th>\n <td>822785</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.(Asp498fs)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13275</th>\n <td>822816</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.(Gln3101fs)</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n <tr>\n <th>13276</th>\n <td>867648</td>\n <td>7329</td>\n <td>70</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>c.?</td>\n <td>r.(?)</td>\n <td>p.?</td>\n <td></td>\n <td>g.?</td>\n <td>&lt;NA&gt;</td>\n <td>&lt;NA&gt;</td>\n </tr>\n </tbody>\n</table>\n<p>13277 rows × 14 columns</p>\n</div>"
},
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -4029,19 +4029,19 @@
" on='id',\n",
" how='left')\n",
"\n",
"gnomad_data = gnomad_data.copy()[[\"gnomAD ID\"]]\n",
"gnomad_data = gnomad_data.copy()\n",
"final_data = merge_gnomad_lovd(lovd_data, gnomad_data)\n",
"final_data"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-08-21T18:22:09.068809Z",
"start_time": "2024-08-21T18:21:48.966115Z"
"end_time": "2024-08-21T18:35:42.249375Z",
"start_time": "2024-08-21T18:35:33.312752Z"
}
},
"id": "dd9b17623f26a07c",
"execution_count": 2
"execution_count": 1
},
{
"cell_type": "code",
Expand Down

0 comments on commit b7daca3

Please sign in to comment.