forked from garystafford/dataproc-workflow-templates
-
Notifications
You must be signed in to change notification settings - Fork 0
/
template-demo-4.yaml
33 lines (33 loc) · 900 Bytes
/
template-demo-4.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
jobs:
- pysparkJob:
args:
- storage_bucket_parameter
- data_file_parameter
- results_directory_parameter
mainPythonFileUri: main_python_file_parameter
stepId: ibrd-pyspark
placement:
clusterSelector:
clusterLabels:
goog-dataproc-cluster-uuid: 577ab78d-30a3-487c-8f5b-63a3e455b759
parameters:
- description: Python script to run
fields:
- jobs['ibrd-pyspark'].pysparkJob.mainPythonFileUri
name: MAIN_PYTHON_FILE
- description: Storage bucket location of data file and results
fields:
- jobs['ibrd-pyspark'].pysparkJob.args[0]
name: STORAGE_BUCKET
validation:
regex:
regexes:
- gs://.*
- description: IBRD data file
fields:
- jobs['ibrd-pyspark'].pysparkJob.args[1]
name: IBRD_DATA_FILE
- description: Result directory
fields:
- jobs['ibrd-pyspark'].pysparkJob.args[2]
name: RESULTS_DIRECTORY