-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f50ad2f
commit 3ca6e69
Showing
6 changed files
with
463 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
44 changes: 44 additions & 0 deletions
44
spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/JobGroupExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package io.dataflint.example | ||
|
||
import org.apache.spark.sql.{DataFrame, SparkSession} | ||
import org.apache.spark.sql.functions._ | ||
|
||
object JobGroupExample extends App { | ||
val spark = SparkSession | ||
.builder() | ||
.appName("JobGroupExample") | ||
.config("spark.plugins", "io.dataflint.spark.SparkDataflintPlugin") | ||
.config("spark.ui.port", "10000") | ||
.config("spark.dataflint.telemetry.enabled", value = false) | ||
.config("spark.eventLog.enabled", "true") | ||
.config("spark.sql.maxMetadataStringLength", "10000") | ||
.master("local[*]") | ||
.getOrCreate() | ||
|
||
import spark.implicits._ | ||
|
||
val data = Seq( | ||
("Alice", "Math", 85), | ||
("Alice", "Physics", 95), | ||
("Bob", "Math", 78), | ||
("Bob", "Physics", 88), | ||
("Charlie", "Math", 92), | ||
("Charlie", "Physics", 80) | ||
).toDF("name", "subject", "score") | ||
|
||
data.createOrReplaceTempView("student_scores") | ||
|
||
// Set up and run the first query with a specific group ID | ||
spark.sparkContext.setJobGroup("queryGroup1", "Group 1: Math Scores") | ||
val mathScores = spark.sql("SELECT name, score FROM student_scores WHERE subject = 'Math'") | ||
mathScores.count() | ||
|
||
// Set up and run the second query with a different group ID | ||
spark.sparkContext.setJobGroup("queryGroup2", "Group 2: Average Scores") | ||
val avgScores = spark.sql("SELECT name, AVG(score) as avg_score FROM student_scores GROUP BY name") | ||
avgScores.count() | ||
|
||
scala.io.StdIn.readLine() | ||
|
||
spark.stop() | ||
} |
43 changes: 43 additions & 0 deletions
43
spark-plugin/example_3_5_1/src/main/scala/io/dataflint/example/JobGroupExportedLocal.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package io.dataflint.example | ||
|
||
import org.apache.spark.sql.SparkSession | ||
|
||
object JobGroupExportedLocal extends App { | ||
val spark = SparkSession | ||
.builder() | ||
.appName("JobGroupExample") | ||
.config("spark.plugins", "io.dataflint.spark.SparkDataflintPlugin") | ||
.config("spark.ui.port", "10000") | ||
.config("spark.dataflint.telemetry.enabled", value = false) | ||
.config("spark.eventLog.enabled", "true") | ||
.config("spark.dataflint.mode", "local") | ||
.config("spark.dataflint.token", "AKIAZEUOHHYMKVUKYYZB-1234") | ||
.config("spark.sql.maxMetadataStringLength", "10000") | ||
.master("local[*]") | ||
.getOrCreate() | ||
|
||
import spark.implicits._ | ||
|
||
val data = Seq( | ||
("Alice", "Math", 85), | ||
("Alice", "Physics", 95), | ||
("Bob", "Math", 78), | ||
("Bob", "Physics", 88), | ||
("Charlie", "Math", 92), | ||
("Charlie", "Physics", 80) | ||
).toDF("name", "subject", "score") | ||
|
||
data.createOrReplaceTempView("student_scores") | ||
|
||
// Set up and run the first query with a specific group ID | ||
spark.sparkContext.setJobGroup("queryGroup1", "Group 1: Math Scores") | ||
val mathScores = spark.sql("SELECT name, score FROM student_scores WHERE subject = 'Math'") | ||
mathScores.count() | ||
|
||
// Set up and run the second query with a different group ID | ||
spark.sparkContext.setJobGroup("queryGroup2", "Group 2: Average Scores") | ||
val avgScores = spark.sql("SELECT name, AVG(score) as avg_score FROM student_scores GROUP BY name") | ||
avgScores.count() | ||
|
||
spark.stop() | ||
} |
56 changes: 56 additions & 0 deletions
56
...xample_3_5_1/src/main/scala/org/apache/spark/dataflint/jobgroup/tests/JobGroupTests.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
package org.apache.spark.dataflint.jobgroup.tests | ||
|
||
import org.apache.spark.dataflint.jobgroup.JobGroupExtractor | ||
import org.apache.spark.sql.SparkSession | ||
|
||
class JobGroupTests extends org.scalatest.funsuite.AnyFunSuiteLike { | ||
test("test job group extractor with 2 groups") { | ||
val spark = SparkSession | ||
.builder() | ||
.appName("JobGroupExample") | ||
.config("spark.plugins", "io.dataflint.spark.SparkDataflintPlugin") | ||
.config("spark.ui.port", "10000") | ||
.config("spark.sql.maxMetadataStringLength", "10000") | ||
.master("local[*]") | ||
.getOrCreate() | ||
|
||
import spark.implicits._ | ||
|
||
val data = Seq( | ||
("Alice", "Math", 85), | ||
("Alice", "Physics", 95), | ||
("Bob", "Math", 78), | ||
("Bob", "Physics", 88), | ||
("Charlie", "Math", 92), | ||
("Charlie", "Physics", 80) | ||
).toDF("name", "subject", "score") | ||
|
||
data.createOrReplaceTempView("student_scores") | ||
|
||
// Set up and run the first query with a specific group ID | ||
spark.sparkContext.setJobGroup("queryGroup1", "Group 1: Math Scores") | ||
val mathScores = spark.sql("SELECT name, score FROM student_scores WHERE subject = 'Math'") | ||
mathScores.count() | ||
|
||
spark.sparkContext.clearJobGroup() | ||
|
||
// Set up and run the second query with a different group ID | ||
spark.sparkContext.setJobGroup("queryGroup2", "Group 2: Average Scores") | ||
val avgScores = spark.sql("SELECT name, AVG(score) as avg_score FROM student_scores GROUP BY name") | ||
avgScores.count() | ||
|
||
// Optionally, clear job group if needed | ||
spark.sparkContext.clearJobGroup() | ||
|
||
Thread.sleep(1000) | ||
|
||
val extractor = new JobGroupExtractor(spark.sparkContext.ui.get.store, spark.sharedState.statusStore) | ||
val queryGroup1Store = extractor.extract("queryGroup1") | ||
val queryGroup2Store = extractor.extract("queryGroup2") | ||
|
||
assert(queryGroup1Store._2.executionsList().length == 1) | ||
assert(queryGroup2Store._2.executionsList().length == 1) | ||
spark.stop() | ||
} | ||
|
||
} |
Oops, something went wrong.