-
Notifications
You must be signed in to change notification settings - Fork 0
/
00-Setup.py
90 lines (59 loc) · 1.94 KB
/
00-Setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Databricks notebook source
# MAGIC %md
# MAGIC #### Data Setup
# MAGIC - Create an s3 folder
# MAGIC - Copy the `diabetes.csv` file
# MAGIC - Copy the `Postural_Tremor_DA_Raw.csv` file
# MAGIC - Create an iam role that has access to s3 path
# MAGIC - If UC is enabled, Create an external location and storage credential
# MAGIC - If UC is not enabled, Create a cluster with the instance profile with the above IAM role
# MAGIC - Test access
# COMMAND ----------
# MAGIC %md
# MAGIC #### Delta Tables
# COMMAND ----------
# MAGIC %run ./init
# COMMAND ----------
# MAGIC %md
# MAGIC #### Create Database
# COMMAND ----------
spark.sql(f"CREATE DATABASE IF NOT EXISTS {database}")
# COMMAND ----------
# MAGIC %md
# MAGIC ##### Cleanup Data Tables
# COMMAND ----------
spark.sql(f"DROP TABLE IF EXISTS {demographic_table}")
spark.sql(f"DROP TABLE IF EXISTS {lab_results_table}")
spark.sql(f"DROP TABLE IF EXISTS {physicals_results_table}")
# COMMAND ----------
# MAGIC %md
# MAGIC ##### Cleanup Feature Tables
# COMMAND ----------
#Drop the delta table
spark.sql(f"DROP TABLE IF EXISTS {feature_table_name}")
#Remove the feature store entry
from databricks import feature_store
fs = feature_store.FeatureStoreClient()
try:
#Check if feature table exists. Delete if exists
fs.drop_table(name=feature_table_name)
except:
print(f"Feature table {feature_table_name} not found")
# COMMAND ----------
# MAGIC %md
# MAGIC ##### Cleanup Inference Tables
# COMMAND ----------
spark.sql(f"DROP TABLE IF EXISTS {inference_data_table_nonfs}")
spark.sql(f"DROP TABLE IF EXISTS {inference_data_table_fs}")
# COMMAND ----------
# MAGIC %md
# MAGIC #### Copy Images for Markdown
# COMMAND ----------
src_folder = workspace_resource_path
tgt_folder = "/FileStore/tmp/merck_diabetes_prediction"
dbutils.fs.rm(tgt_folder,True)
dbutils.fs.mkdirs(tgt_folder)
dbutils.fs.cp(src_folder,tgt_folder, True)
# COMMAND ----------
dbutils.fs.ls(tgt_folder)
# COMMAND ----------