Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge main into JTE/PKFE-46 #59

Merged
merged 27 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
929d137
implemented a method request gene information by its id, forms it to …
KajusC Jul 31, 2024
ea192ee
dynamically adds variation properties
KajusC Aug 6, 2024
6dba6fe
Implemented a function which extracts variation ids from gene name, t…
KajusC Aug 7, 2024
35f43bf
Reformatted the parse
KajusC Aug 19, 2024
5243f8e
Created a function which collects EYS data from gnomAD api.
KajusC Aug 22, 2024
5c30ce1
Added ability to get any gene from API
KajusC Aug 22, 2024
197864c
Removed from last branch
KajusC Aug 22, 2024
5a23fea
Merge branch 'main' into KCE/GnomAD_api_requests
KajusC Aug 22, 2024
4d3575a
Downloaded data from API, formatted code
KajusC Aug 28, 2024
437954a
Refactored and resolved PR comments
KajusC Sep 2, 2024
6b68a98
MDE/PKFE-31 bug-fix
mantvydasdeltuva Sep 7, 2024
be9dc76
MDE/PKFE-31 updated workspace provider for file tree array
mantvydasdeltuva Sep 7, 2024
fc8d067
MDE/PKFE-31 toolbar resizing
mantvydasdeltuva Sep 7, 2024
c27b552
MDE/PKFE-31 implemented toolbar context provider
mantvydasdeltuva Sep 7, 2024
456a4f7
MDE/PKFE-31 overhauled toolbar buttons logic for hook support
mantvydasdeltuva Sep 7, 2024
26f842d
MDE/PKFE-31 styled core components for param fields
mantvydasdeltuva Sep 7, 2024
36af934
MDE/PKFE-31 implemented different groups params fields
mantvydasdeltuva Sep 7, 2024
523657e
MDE/PKFE-31 updated toolbar view
mantvydasdeltuva Sep 7, 2024
33d500c
MDE/PKFE-31 update button icon color on disabled
mantvydasdeltuva Sep 7, 2024
9a0b879
MDE/PKFE-31 removed commented code
mantvydasdeltuva Sep 7, 2024
f758a51
MDE/PKFE-31 select changes during file selection
mantvydasdeltuva Sep 9, 2024
6c8fad4
MDE/PKFE-31 params additional fixes and changes
mantvydasdeltuva Sep 9, 2024
ae02827
MDE/PKFE-31 visual bug-fix
mantvydasdeltuva Sep 9, 2024
522cf70
Extra PR refactoring
KajusC Sep 9, 2024
29974ef
Fixed visual issues
justinnas Sep 9, 2024
64779bb
Merge pull request #43 from Strexas/KCE/GnomAD_api_requests
KajusC Sep 10, 2024
1c7e137
Merge pull request #57 from Strexas/MDE/PKFE-31
mantvydasdeltuva Sep 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,6 @@
# Functions for refactoring data
set_lovd_dtypes,
parse_lovd,
from_clinvar_name_to_cdna_position
from_clinvar_name_to_cdna_position,
save_lovd_as_vcf,
)
65 changes: 65 additions & 0 deletions api/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
Package for data collection purposes provides both collection and refactoring functionality.

Data from LOVD, ClinVar and GnomAd databases can be downloaded using this package. GnomAd and
ClinVar are limited with EYS gene, but it is possible to download data for any gene in LOVD.

All necessary functionality can be imported directly from data without
specifying the module.

data collection pipeline example is established for project's specific usage.
"""

# CONSTANTS IMPORT
from .constants import (
# URLs for LOVD database
LOVD_URL, LOVD_URL_EYS, LOVD_FILE_URL, LOVD_FILE_URL_EYS,

# URLs for gnomAD database
GNOMAD_URL, GNOMAD_URL_EYS, GNOMAD_FILE_URL_EYS,

# URLs for ClinVar database
CLINVAR_URL, CLINVAR_URL_EYS, CLINVAR_FILE_URL_EYS,

# Paths for data storage
DATA_PATH, LOVD_PATH, GNOMAD_PATH, CLINVAR_PATH,

# Data types for tables
LOVD_TABLES_DATA_TYPES,

# Paths for database downloads
DATABASES_DOWNLOAD_PATHS,

GNOMAD_PATH,
)

# DATA COLLECTION IMPORT
from .collection import (
# Custom exceptions
BadResponseException,
DownloadError,

# Custom utility functions
get_file_from_url,

# Functions for downloading databases
download_lovd_database_for_eys_gene,
download_genes_lovd,
download_database_for_eys_gene,

# Functions for storing databases
store_database_for_eys_gene
)

# DATA REFACTORING IMPORT
from .refactoring import (
# Functions for refactoring data
set_lovd_dtypes,
parse_lovd,
from_clinvar_name_to_cdna_position,
save_lovd_as_vcf,
request_gnomad_api_data,
merge_gnomad_lovd,
parse_gnomad,
set_gnomad_dtypes,
)
147 changes: 147 additions & 0 deletions api/data/refactoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
import os
import logging

import requests

import pandas as pd
from pandas import DataFrame

from .constants import LOVD_TABLES_DATA_TYPES, LOVD_PATH, GNOMAD_TABLES_DATA_TYPES, GNOMAD_PATH



def set_lovd_dtypes(df_dict):
"""
Convert data from LOVD format table to desired data format based on specified data types.
Expand Down Expand Up @@ -242,3 +245,147 @@ def save_lovd_as_vcf(data, save_to="./lovd.vcf"):

f.write("\t".join(record))
f.write("\n")


def prepare_popmax_calculation(df, pop_data, name, pop_ids, index):
"""
prepares the calculation of popmax and popmax population for a variant.
genome and exome data of ac and an.

:param DataFrame df: DataFrame containing gnomAD data
:param dict pop_data: dictionary containing population data
:param str name: name of the population
:param list[str] pop_ids: list of population ids
:param int index: index of the variant
"""

for pop_id in pop_ids:
df.loc[index, f'{name}_ac_{pop_id}'] = 0
df.loc[index, f'{name}_an_{pop_id}'] = 0
if isinstance(pop_data, list):
for pop in pop_data:
variant_id = pop['id']
df.loc[index, f'{name}_ac_{variant_id}'] = pop['ac']
df.loc[index, f'{name}_an_{variant_id}'] = pop['an']


def request_gnomad_api_data(gene_name):
"""
Requests gnomAD API for data about a specific gene containing:
- variant_id
- cDNA change
- protein change
- allele frequency
- homozygote count
- popmax
- popmax population

:param str gene_name: name of gene
:param bool to_file: if True, saves data to variants.csv
:returns: DataFrame from gnomAD API
:rtype: DataFrame
"""

url = 'https://gnomad.broadinstitute.org/api'
query = f"""
query{{
gene(gene_symbol: "{gene_name}", reference_genome: GRCh38) {{
variants(dataset: gnomad_r4)
{{
variant_id
chrom
pos
ref
hgvsc
hgvsp
alt
exome {{
ac
an
ac_hom
populations
{{
id
ac
an
}}
}}
genome
{{
ac
an
ac_hom
populations
{{
id
ac
an
}}
}}
}}
}}
}}
"""

response = requests.post(url, json={'query': query}, timeout=300) # timeout set to 5 minutes

if response.status_code != 200:
print('Error:', response.status_code)

data = response.json()['data']['gene']['variants']

df = pd.json_normalize(data)

df.loc[:, 'total_ac'] = df.loc[:, 'exome.ac'].fillna(0) + df.loc[:, 'genome.ac'].fillna(0)
df.loc[:, 'total_an'] = df.loc[:, 'exome.an'].fillna(0) + df.loc[:, 'genome.an'].fillna(0)

df.loc[:, 'HGVS Consequence'] = df.loc[:, 'hgvsc'].fillna(0) # cDNA change
df.loc[:, 'Protein Consequence'] = df.loc[:, 'hgvsp'].fillna(0) # Protein change

df.loc[:, 'Allele Frequency'] = df.loc[:, 'total_ac'] / df.loc[:, 'total_an']
df.loc[:, 'Homozygote Count'] = df.loc[:, 'exome.ac_hom'].fillna(0) + df.loc[:, 'genome.ac_hom'].fillna(0)
exome_populations = df.loc[:, 'exome.populations']
genome_populations = df.loc[:, 'genome.populations']
population_ids = ['afr', 'eas', 'asj', 'sas', 'nfe', 'fin', 'mid', 'amr', 'ami', 'remaining']

for i in range(len(exome_populations)):
exome_pop = exome_populations[i]
prepare_popmax_calculation(df, exome_pop, 'exome', population_ids, i)
genome_pop = genome_populations[i]
prepare_popmax_calculation(df, genome_pop, 'genome', population_ids, i)

for population_id in population_ids:
df.loc[:, f'Allele_Frequency_{population_id}'] = (
(df.loc[:, f'exome_ac_{population_id}'].fillna(0) + df.loc[:, f'genome_ac_{population_id}'].fillna(0)) / (
df.loc[:, f'exome_an_{population_id}'].fillna(0) + df.loc[:, f'genome_an_{population_id}'].fillna(0)))
population_mapping = {
'afr': 'African/African American',
'eas': 'East Asian',
'asj': 'Ashkenazi Jew',
'sas': 'South Asian',
'nfe': 'European (non-Finnish)',
'fin': 'European (Finnish)',
'mid': 'Middle Eastern',
'amr': 'Admixed American',
'ami': "Amish",
'remaining': 'Remaining',
'': ''
}

for i in range(df.shape[0]):
max_pop = 0
max_id = ''
for population_id in population_ids:
if df.loc[i, f'Allele_Frequency_{population_id}'] > max_pop:
max_pop = df.loc[i, f'Allele_Frequency_{population_id}']
max_id = population_id
df.loc[i, 'Popmax'] = max_pop
df.loc[i, 'Popmax population'] = population_mapping[max_id]
not_to_drop = ['Popmax', 'Popmax population', 'Homozygote Count', 'Allele Frequency',
'variant_id', 'cDNA change', 'Protein change']

df = df.filter(not_to_drop, axis="columns")

df.rename(columns={'variant_id': 'gnomAD ID'})

return df
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ export const EditorView: React.FC = () => {
const { totalRows, header, rows } = fileContentResponse;

if (!header) {
fileStateUpdate(undefined, { columns: [], rows: [], aggregations: {} }, undefined);
fileStateUpdate(undefined, { columns: [], rows: [], aggregations: fileContent.aggregations }, undefined);
return;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { List, useTheme } from '@mui/material';
import { Box, List, useTheme } from '@mui/material';

export interface ToolbarGroupProps {
children: React.ReactNode;
params: React.ReactNode;
buttons: React.ReactNode;
}

/**
Expand All @@ -25,24 +26,34 @@ export interface ToolbarGroupProps {
* @param {React.ReactNode} children - The child elements to be displayed inside the list.
* @returns {JSX.Element} The rendered List component.
*/
export const ToolbarGroup: React.FC<ToolbarGroupProps> = ({ children }) => {
export const ToolbarGroup: React.FC<ToolbarGroupProps> = ({ params, buttons }) => {
const Theme = useTheme();

return (
<List
<Box
sx={{
height: '75%',
bgcolor: Theme.palette.background.paper,
px: '1rem',
display: 'flex',
flexDirection: 'row',
flexWrap: 'wrap',
gap: '1rem',
width: '100%',
height: '100%',
display: 'grid',
gridTemplateColumns: '30% 70%',
overflow: 'auto',
alignContent: 'flex-start',
bgcolor: Theme.palette.background.paper,
}}
>
{children}
</List>
<Box sx={{ borderRight: `solid 2px ${Theme.palette.action.selected}` }}>{params}</Box>
<List
sx={{
pl: '1rem',
display: 'flex',
flexDirection: 'row',
flexWrap: 'wrap',
gap: '1rem',
overflow: 'auto',
alignContent: 'flex-start',
}}
>
{buttons}
</List>
</Box>
);
};
Loading
Loading