Skip to content

Commit

Permalink
Merge pull request #34 from UBC-MDS/makefile
Browse files Browse the repository at this point in the history
Add preliminary Makefile
  • Loading branch information
htang085 authored Dec 14, 2024
2 parents 19ceaed + eb8780f commit 7e19c54
Show file tree
Hide file tree
Showing 14 changed files with 452 additions and 973 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG_LONG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
IMPROVEMENTS
1. Fix the action workflow that contains a typo by changing the name of "Docker-compose.yml" to "docker-compose.yml". Commit message: "Rename Docker-compose.yml to docker-compose.yml"
2. Suppress warnings. Commit message: "Add docstrings and suppress warnings in script 4"
3. Remove pickle out of environment.yml and arrange Dockerfile Run commands. Commit message: "Remove pickle and arrange Dockerfile"
25 changes: 19 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,23 @@ FROM quay.io/jupyter/minimal-notebook:afe30f0c9ad8

COPY conda-linux-64.lock /tmp/conda-linux-64.lock

RUN mamba update --quiet --file /tmp/conda-linux-64.lock
RUN mamba clean --all -y -f
RUN pip install deepchecks==0.18.1 seaborn==0.13.2 altair-ally==0.1.1
RUN fix-permissions "${CONDA_DIR}"
RUN fix-permissions "/home/${NB_USER}"
USER root

RUN echo "Done Building!!"
# install lmodern for Quarto PDF rendering
RUN sudo apt update \
&& sudo apt install -y \
lmodern

USER $NB_UID

RUN mamba update --quiet --file /tmp/conda-linux-64.lock \
&& mamba clean --all -y -f \
&& fix-permissions "${CONDA_DIR}" \
&& fix-permissions "/home/${NB_USER}"

RUN pip install \
deepchecks==0.18.1 \
seaborn==0.13.2 \
altair-ally==0.1.1

RUN echo "Done Building Container!!"
89 changes: 89 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# makefile
# author: Long Nguyen
# date: 2024-12-13

.PHONY: all clean

all: reports/heart_diagnostic_analysis.html reports/heart_diagnostic_analysis.pdf

# 1. Download and extract data
data/raw/pretransformed_heart_disease.csv: scripts/1_download_decode_data.py
python scripts/1_download_decode_data.py \
--id=45 \
--write-to=data/raw

# 2. Read, validate, and split data
data/processed/train_df.csv data/processed/test_df.csv: scripts/2_data_split_validate.py \
data/raw/pretransformed_heart_disease.csv
python scripts/2_data_split_validate.py \
--split=0.1 \
--raw-data=data/raw/pretransformed_heart_disease.csv \
--write-to=data/processed

# 3. EDA
results/figures/numeric_distributions.png \
results/figures/categorical_distributions.png \
results/figures/correlation_matrix.png \
results/figures/pairwise_relationships.png: scripts/3_eda.py \
data/processed/train_df.csv
python scripts/3_eda.py \
--train data/processed/train_df.csv \
--write-to results

# 4. Training models
results/tables/cross_val_std.csv results/tables/cross_val_score.csv results/models/disease_pipeline.pickle: scripts/4_training_models.py \
data/processed/train_df.csv
python scripts/4_training_models.py \
--train data/processed/train_df.csv \
--seed 123 \
--write-to results


# 5. Evaluate model
results/figures/confusion_matrix.png results/tables/model_metrics.csv: scripts/5_evaluate.py \
data/processed/train_df.csv \
data/processed/test_df.csv \
results/models/disease_pipeline.pickle
python scripts/5_evaluate.py \
--train data/processed/train_df.csv \
--test data/processed/test_df.csv \
--pipeline results/models/disease_pipeline.pickle \
--write-to results



# Hey Marek! you only need to make changes from here down. The below is the template. Still looking for a command to automatically copy html to docs folder as index.html so we can render it to be landing page

# build HTML report and copy build to docs folder
reports/heart_diagnostic_analysis.html reports/heart_diagnostic_analysis.pdf : reports/heart_diagnostic_analysis.qmd \
reports/references.bib \
results/table/test_score.csv \
results/figures/eda1.png \
results/figures/eda2.png \
results/figures/eda3.png \
results/figures/eda4.png \
results/figures/eda5.png \
results/figures/eda6.png \
results/figures/cm.png
quarto render report/adult_income_predictor_report.qmd --to html
quarto render report/adult_income_predictor_report.qmd --to pdf

# clean up analysis / nuke everything
clean :
rm -rf data/raw/*
rm -rf data/logs/validation_errors.log \
data/processed/cleaned_data.csv
rm -rf results/figures/eda1.png \
results/figures/eda2.png \
results/figures/eda3.png \
results/figures/eda4.png \
results/figures/eda5.png \
results/figures/eda6.png
rm -rf data/processed/X_test.csv \
data/processed/y_test.csv \
results/models/model.pickle
rm -rf results/figures/cm.png \
results/table/test_score.csv
rm -rf report/adult_income_predictor_report.html \
report/adult_income_predictor_report.pdf \
report/adult_income_predictor_report_files
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,12 @@ python scripts/2_data_split_validate.py --split=0.1 --raw-data=data/raw/pretrans
python scripts/3_eda.py --train data/processed/train_df.csv --write-to results
python scripts/4_training_models.py --train data/processed/train_df.csv --write-to results
python scripts/4_training_models.py --train data/processed/train_df.csv --seed 123 --write-to results
python scripts/5_evaluate.py --train data/processed/train_df.csv --test data/processed/test_df.csv --write-to results
python scripts/5_evaluate.py --train data/processed/train_df.csv \
--test data/processed/test_df.csv \
--pipeline results/models/disease_pipeline.pickle \
--write-to results
quarto render reports/heart_diagnostic_analysis.qmd --to html
```
Expand Down
Loading

0 comments on commit 7e19c54

Please sign in to comment.