Skip to content

Commit

Permalink
Merge pull request #50 from databricks-industry-solutions/dev_rkm_marv1
Browse files Browse the repository at this point in the history
Dev rkm marv1
  • Loading branch information
arunpamulapati authored Mar 29, 2023
2 parents a739d41 + 1ac57f4 commit 81c121b
Show file tree
Hide file tree
Showing 15 changed files with 758 additions and 74 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ The dashboard is broken into the five sections and each pillar is laid out in a

## Detection example

Security Analysis Tool (SAT) analyzes 37 best practices, with more on the way. In the example below, the SAT scan highlights one finding that surfaces a potential risk, and one that meets Databricks' best practices. The Deprecated runtime versions check is red indicating that there are runtimes that are deprecated. Workloads on unsupported runtime versions may continue to run, but they receive no Databricks support or fixes. The Remediation column in the screenshot describes the risk and links to the documentation of the Databricks runtime versions that are currently supported.
Security Analysis Tool (SAT) analyzes 60 best practices, with more on the way. In the example below, the SAT scan highlights one finding that surfaces a potential risk, and one that meets Databricks' best practices. The Deprecated runtime versions check is red indicating that there are runtimes that are deprecated. Workloads on unsupported runtime versions may continue to run, but they receive no Databricks support or fixes. The Remediation column in the screenshot describes the risk and links to the documentation of the Databricks runtime versions that are currently supported.

On the other hand, the Log delivery check is green, confirming that the workspace follows Databricks security best practices. Run these checks regularly to comprehensively view Databricks account workspace security and ensure continuous improvement.

Expand Down
116 changes: 63 additions & 53 deletions configs/security_best_practices.csv

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion notebooks/Includes/install_sat_sdk.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Databricks notebook source
SDK_VERSION='0.1.23'
SDK_VERSION='0.1.25'

# COMMAND ----------

Expand Down
219 changes: 219 additions & 0 deletions notebooks/Includes/workspace_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ def log_check(df):
df = df.rdd.map(lambda x: ( re.sub('[\"\'\\\\]', '_',x[0]), x[1])).toDF(['config_name', 'config_id'])
logc = df.collect()
logc_dict = {'audit_logs' : [[i.config_name, i.config_id] for i in logc]}

print(logc_dict)
return (check_id, 0, logc_dict)
else:
Expand Down Expand Up @@ -995,6 +996,224 @@ def uc_check(df):

# COMMAND ----------

check_id='53' # GOV-16 Workspace Unity Catalog metastore assignment
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_metasore_assignment(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {i.metastore_id : [i.workspace_id] for i in uc_metasore}
return (check_id, 0, uc_metasore_dict )
else:
return (check_id, 1, {})
if enabled:
tbl_name = 'global_temp.unitycatalogmsv2' + '_' + workspace_id
sql=f'''
SELECT metastore_id,workspace_id
FROM {tbl_name}
WHERE workspace_id="{workspaceId}"
'''
sqlctrl(workspace_id, sql, uc_metasore_assignment)

# COMMAND ----------

check_id='54' # GOV-17 Lifetime of metastore delta sharing recipient token set less than 90 days
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_metasore_token(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {num: [row.name,row.delta_sharing_recipient_token_lifetime_in_seconds] for num,row in enumerate(uc_metasore)}
return (check_id, 1, uc_metasore_dict )
else:
return (check_id, 0, {})
if enabled:
tbl_name = 'global_temp.unitycatalogmsv1' + '_' + workspace_id
sql=f'''
SELECT name, delta_sharing_recipient_token_lifetime_in_seconds
FROM {tbl_name}
WHERE delta_sharing_scope ="INTERNAL_AND_EXTERNAL" AND delta_sharing_recipient_token_lifetime_in_seconds < 7776000
'''
sqlctrl(workspace_id, sql, uc_metasore_token)


# COMMAND ----------

check_id='55' # GOV-18 Check if there are any token based sharing without IP access lists ip_access_list
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_delta_share_ip_accesslist(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {num: [row.name,row.owner] for num,row in enumerate(uc_metasore)}
return (check_id, 1, uc_metasore_dict )
else:
return (check_id, 0, {})
if enabled:
tbl_name = 'global_temp.unitycatalogsharerecipients' + '_' + workspace_id
sql=f'''
SELECT name, owner
FROM {tbl_name}
where authentication_type = 'TOKEN' and ip_access_list is NULL
'''
sqlctrl(workspace_id, sql, uc_delta_share_ip_accesslist)


# COMMAND ----------

check_id='56' # GOV-19 Check if Delta sharing Token Expiration
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_delta_share_expiration_time(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {num: [row.name,row.owner] for num,row in enumerate(uc_metasore)}
return (check_id, 1, uc_metasore_dict )
else:
return (check_id, 0, {})
if enabled:
tbl_name = 'global_temp.unitycatalogsharerecipients' + '_' + workspace_id
sql=f'''
SELECT tokens.* FROM (select explode(tokens) as tokens, full_name, owner
FROM {tbl_name}
WHERE authentication_type = 'TOKEN') WHERE tokens.expiration_time is NULL
'''
sqlctrl(workspace_id, sql, uc_delta_share_expiration_time)


# COMMAND ----------

check_id='57' # GOV-20 Check Use of Metastore
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_metastore(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {i.name : [i.owner] for i in uc_metasore}
return (check_id, 0, uc_metasore_dict )
else:
return (check_id, 1, {})
if enabled:
tbl_name = 'global_temp.unitycatalogmsv1' + '_' + workspace_id
sql=f'''
SELECT name,owner
FROM {tbl_name}
WHERE securable_type = 'METASTORE'
'''
sqlctrl(workspace_id, sql, uc_metastore)


# COMMAND ----------

check_id='58' # GOV-21 Check Metastore Admin is also the creator
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_metastore_owner(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {i.name : [i.owner, i.created_by] for i in uc_metasore}
return (check_id, 1, uc_metasore_dict )
else:
return (check_id, 0, {})
if enabled:
tbl_name = 'global_temp.unitycatalogmsv1' + '_' + workspace_id
sql=f'''
SELECT name,owner,created_by
FROM {tbl_name}
WHERE securable_type = 'METASTORE' and owner == created_by
'''
sqlctrl(workspace_id, sql, uc_metastore_owner)


# COMMAND ----------

check_id='59' # GOV-22 Check Metastore Storage Credentials
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_metastore_storage_creds(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {num: [row.name,row.owner, row.created_by] for num,row in enumerate(uc_metasore)}
return (check_id, 1, uc_metasore_dict )
else:
return (check_id, 0, {})
if enabled:
tbl_name = 'global_temp.unitycatalogcredentials' + '_' + workspace_id
sql=f'''
SELECT name,owner,created_by
FROM {tbl_name}
WHERE securable_type = "STORAGE_CREDENTIAL"
'''
sqlctrl(workspace_id, sql, uc_metastore_storage_creds)


# COMMAND ----------

check_id='60' # GOV-23 Check UC enabled Data warehouses
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def uc_dws(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {i.name : [i.creator_name] for i in uc_metasore}

return (check_id, 1, uc_metasore_dict )
else:
return (check_id, 0, {})
if enabled:
tbl_name = 'global_temp.dbsql_warehouselistv2' + '_' + workspace_id
sql=f'''
SELECT warehouse.name as name , warehouse.creator_name as creator_name from (select explode(warehouses) as warehouse
FROM {tbl_name} )
where warehouse.disable_uc = true
'''
sqlctrl(workspace_id, sql, uc_dws)


# COMMAND ----------

check_id='61' # INFO-17 Check Serverless Compute enabled
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def dbsql_enable_serverless_compute(df):
if df is not None and not df.rdd.isEmpty():
return (check_id, 0, {'enable_serverless_compute':'Serverless Compute enabled'} )
else:
return (check_id, 1, {'enable_serverless_compute':'Serverless Compute not enabled'})
if enabled:
tbl_name = 'global_temp.dbsql_workspaceconfig' + '_' + workspace_id
sql=f'''
SELECT enable_serverless_compute FROM
FROM {tbl_name}
WHERE enable_serverless_compute = true
'''
sqlctrl(workspace_id, sql, dbsql_enable_serverless_compute)


# COMMAND ----------

check_id='62' # INFO-18 Check Delta Sharing CREATE_RECIPIENT and CREATE_SHARE permissions
enabled, sbp_rec = getSecurityBestPracticeRecord(check_id, cloud_type)

def metastore_delta_sharing_permissions(df):
if df is not None and not df.rdd.isEmpty():
uc_metasore = df.collect()
uc_metasore_dict = {num: [row.metastore_name,row.principal, row.privilege] for num,row in enumerate(uc_metasore)}
return (check_id, 0, uc_metasore_dict ) # intentionally kept the score to 0 as its not a pass or fail. Its more of FYI
else:
return (check_id, 0, {}) # intentionally kept the score to 0 as its not a pass or fail. Its more of FYI
if enabled:
tbl_name = 'global_temp.metastorepermissions' + '_' + workspace_id
sql=f'''
SELECT * FROM (SELECT metastore_name,principal,explode(privileges) as privilege
FROM {tbl_name} )
WHERE privilege= "CREATE_RECIPIENT" OR privilege="CREATE_SHARE"
'''
sqlctrl(workspace_id, sql, metastore_delta_sharing_permissions)

# COMMAND ----------

tcomp = time.time() - start_time
print(f"Workspace Analysis - {tcomp} seconds to run")

Expand Down
4 changes: 2 additions & 2 deletions notebooks/Includes/workspace_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ def enableNotebookGitVersioning(df):
for row in df.rdd.collect():
value = row.value
defn = {'defn' : row.defn.replace("'", '')}
if(value == 'true'):
if(value == None or value == 'true'):
return (id, 0, defn)
else:
return (id, 1, defn)
Expand All @@ -442,7 +442,7 @@ def enableNotebookGitVersioning(df):
tbl_name = 'global_temp.workspacesettings' + '_' + workspace_id
sql = f'''
SELECT * FROM {tbl_name}
WHERE workspace_id = "{workspace_id}" AND name="enableNotebookGitVersioning"
WHERE name="enableNotebookGitVersioning"
'''
sqlctrl(workspace_id, sql, enableNotebookGitVersioning)

Expand Down
4 changes: 4 additions & 0 deletions notebooks/Utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,3 +358,7 @@ def notifyworkspaceCompleted(workspaceID, completed):
# COMMAND ----------

JSONLOCALTESTB = '{"account_id": "", "sql_warehouse_id": "4a936419ee9b9d68", "username_for_alerts": "sat@regemail", "verbosity": "info", "master_name_scope": "sat_scope", "master_name_key": "user", "master_pwd_scope": "sat_scope", "master_pwd_key": "pass", "workspace_pat_scope": "sat_scope", "workspace_pat_token_prefix": "sat_token", "dashboard_id": "317f4809-8d9d-4956-a79a-6eee51412217", "dashboard_folder": "../../dashboards/", "dashboard_tag": "SAT", "use_mastercreds": true, "subscription_id": "", "tenant_id": "", "client_id": "", "client_secret": "", "generate_pat_tokens": false, "url": "https://adb-83xxx7.17.azuredatabricks.net", "workspace_id": "83xxxx7", "clusterid": "0105-242242-ir40aiai", "sso": true, "scim": false, "object_storage_encryption": false, "vpc_peering": false, "table_access_control_enabled": false, "cloud_type":"azure"}'

# COMMAND ----------


69 changes: 67 additions & 2 deletions notebooks/Utils/workspace_bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@

# COMMAND ----------

from clientpkgs.db_sql_client import DBSqlClient
from clientpkgs.dbsql_client import DBSQLClient
try:
db_sql_client = DBSqlClient(json_)
db_sql_client = DBSQLClient(json_)
except Exception:
loggr.exception("Exception encountered")

Expand All @@ -130,6 +130,22 @@

# COMMAND ----------

bootstrap('dbsql_alerts' + '_' + workspace_id, db_sql_client.get_alerts_list)

# COMMAND ----------

bootstrap('dbsql_warehouselist' + '_' + workspace_id, db_sql_client.get_sql_warehouse_list)

# COMMAND ----------

bootstrap('dbsql_warehouselistv2' + '_' + workspace_id, db_sql_client.get_sql_warehouse_listv2)

# COMMAND ----------

bootstrap('dbsql_workspaceconfig' + '_' + workspace_id, db_sql_client.get_sql_workspace_config)

# COMMAND ----------

# MAGIC %md
# MAGIC ##### IPAccessList

Expand Down Expand Up @@ -395,6 +411,55 @@

# COMMAND ----------

# MAGIC %md
# MAGIC ##### Unity Catalog

# COMMAND ----------

from clientpkgs.unity_catalog_client import UnityCatalogClient
try:
uc_client = UnityCatalogClient(json_)
except:
loggr.exception("Exception encountered")

# COMMAND ----------

bootstrap('unitycatalogmsv1' + '_' + workspace_id, uc_client.get_metastore_list)

# COMMAND ----------

bootstrap('unitycatalogmsv2' + '_' + workspace_id, uc_client.get_workspace_metastore_assignments)

# COMMAND ----------

bootstrap('unitycatalogexternallocations' + '_' + workspace_id, uc_client.get_external_locations)

# COMMAND ----------

bootstrap('unitycatalogcredentials' + '_' + workspace_id, uc_client.get_credentials)

# COMMAND ----------

bootstrap('unitycatalogshares' + '_' + workspace_id, uc_client.get_list_shares)

# COMMAND ----------

bootstrap('unitycatalogshareproviders' + '_' + workspace_id, uc_client.get_sharing_providers_list)

# COMMAND ----------

bootstrap('unitycatalogsharerecipients' + '_' + workspace_id, uc_client.get_sharing_recepients_list)

# COMMAND ----------

bootstrap('unitycatalogcatlist' + '_' + workspace_id, uc_client.get_catalogs_list)

# COMMAND ----------

bootstrap('metastorepermissions' + '_' + workspace_id, uc_client.get_grants_effective_permissions_ext)

# COMMAND ----------

# MAGIC %md
# MAGIC ##### Workspace

Expand Down
13 changes: 0 additions & 13 deletions src/securityanalysistoolproject/clientpkgs/db_sql_client.py

This file was deleted.

Loading

0 comments on commit 81c121b

Please sign in to comment.