Skip to content

Latest commit

 

History

History
122 lines (76 loc) · 2.46 KB

ReadmissionNotebook.md

File metadata and controls

122 lines (76 loc) · 2.46 KB
pip install pytrials
import pandas as pd
from pytrials.client import ClinicalTrials

ct = ClinicalTrials()


# Get 50 full studies related to CP in json format.
#ct.get_full_studies(search_expr="readmission", max_studies=50)
# Get several fields from 1000 studies related to readmission, in csv format.
readmission_fields = ct.get_study_fields(
    search_expr="readmission",
    fields=[
        "NCTId", "BriefTitle", "OverallStatus", "StartDate", "CompletionDate",
        "LastUpdatePostDate", "LeadSponsorName", "BriefSummary",
        "DetailedDescription", "StudyType", "EnrollmentType",
        "InterventionType", "InterventionName", "PrimaryOutcomeMeasure",
        "SecondaryOutcomeMeasure", "OtherOutcomeMeasure",
        "EligibilityCriteria", "Gender", "MinimumAge", "MaximumAge"
    ],
    max_studies=1000,
    fmt="csv",
)
# Get the count of studies related to readmission*.
ct.get_study_count(search_expr="readmission")
# Create DataFrame from the fetched data
df = pd.DataFrame.from_records(readmission_fields[1:], columns=readmission_fields[0])
# Convert 'StartDate' to datetime and sort by this column in descending order
df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')
df = df.sort_values(by='StartDate', ascending=False)
# Select the top 1000 most recent studies
df = df.head(1000)
# Add a new column for the cardiac flag, initialize to 0
df['cardiac'] = 0
# Add a new column for the learning health system flag, initialize to 0
df['LHS'] = 0
# Iterate over the rows and check for the phrase "learning health system"
for index, row in df.iterrows():
    if "learning health system" in str(row['DetailedDescription']).lower():
        df.at[index, 'LHS'] = 1
# Iterate over the rows and check for the phrase "cardiac"
for index, row in df.iterrows():
    if "cardiac" in str(row['DetailedDescription']).lower():
        df.at[index, 'cardiac'] = 1
# Sort the DataFrame by StartDate in descending order
df['StartDate'] = pd.to_datetime(df['StartDate'], errors='coerce')  # Convert StartDate to datetime
df = df.sort_values(by='StartDate', ascending=False)
# Now, export this updated DataFrame to a CSV file
df.to_csv('cardiac_readmission_fields_sorted.csv', index=False)  # 'index=False' to not include row indices in the CSV