Skip to content

Commit

Permalink
[SPARK-44442][MESOS] Remove Mesos support
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Remove Mesos resource manager support and docs, previously deprecated in Spark 3.2, for Spark 4.0.

### Why are the changes needed?

Mesos is no longer supported and has been deprecated for several versions.

### Does this PR introduce _any_ user-facing change?

Other than the lack of Mesos support, no

### How was this patch tested?

Existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43135 from srowen/SPARK-44442.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
  • Loading branch information
srowen committed Sep 28, 2023
1 parent 6a9d35f commit c596731
Show file tree
Hide file tree
Showing 110 changed files with 117 additions and 9,991 deletions.
3 changes: 0 additions & 3 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,6 @@ R:
- "bin/sparkR*"
YARN:
- "resource-managers/yarn/**/*"
MESOS:
- "resource-managers/mesos/**/*"
- "sbin/*mesos*.sh"
KUBERNETES:
- "resource-managers/kubernetes/**/*"
WINDOWS:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ jobs:
key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Run benchmarks
run: |
./build/sbt -Pscala-${{ github.event.inputs.scala }} -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl Test/package
./build/sbt -Pscala-${{ github.event.inputs.scala }} -Pyarn -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl Test/package
# Make less noisy
cp conf/log4j2.properties.template conf/log4j2.properties
sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ jobs:
- >-
streaming, sql-kafka-0-10, streaming-kafka-0-10,
mllib-local, mllib,
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
yarn, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
connect, protobuf
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
Expand Down Expand Up @@ -823,7 +823,7 @@ jobs:
export MAVEN_CLI_OPTS="--no-transfer-progress"
export JAVA_VERSION=${{ matrix.java }}
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
rm -rf ~/.m2/repository/org/apache/spark
# Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ jobs:
- >-
connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro
- >-
sql#catalyst,resource-managers#yarn,resource-managers#mesos,resource-managers#kubernetes#core
sql#catalyst,resource-managers#yarn,resource-managers#kubernetes#core
- >-
connect
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
Expand Down Expand Up @@ -180,18 +180,18 @@ jobs:
export JAVA_VERSION=${{ matrix.java }}
# Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install
if [[ "$INCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
# To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
else
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} test -fae
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} test -fae
fi
- name: Clean up local Maven repository
run: |
Expand Down
3 changes: 1 addition & 2 deletions LICENSE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,7 @@ org.apache.directory.server:apacheds-i18n
org.apache.directory.server:apacheds-kerberos-codec
org.apache.htrace:htrace-core
org.apache.ivy:ivy
org.apache.mesos:mesos
org.apache.parquet:parquet-column
=org.apache.parquet:parquet-column
org.apache.parquet:parquet-common
org.apache.parquet:parquet-encoding
org.apache.parquet:parquet-format
Expand Down
3 changes: 0 additions & 3 deletions NOTICE-binary
Original file line number Diff line number Diff line change
Expand Up @@ -673,9 +673,6 @@ Copyright 2002-2012 The Apache Software Foundation
Google Guice - Core Library
Copyright 2006-2011 Google, Inc.

mesos
Copyright 2017 The Apache Software Foundation

Apache Parquet Hadoop Bundle (Incubating)
Copyright 2015 The Apache Software Foundation

Expand Down
6 changes: 3 additions & 3 deletions R/pkg/tests/fulltests/test_sparkR.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
context("functions in sparkR.R")

test_that("sparkCheckInstall", {
# "local, yarn-client, mesos-client" mode, SPARK_HOME was set correctly,
# "local, yarn-client" mode, SPARK_HOME was set correctly,
# and the SparkR job was submitted by "spark-submit"
sparkHome <- paste0(tempdir(), "/", "sparkHome")
dir.create(sparkHome)
Expand All @@ -27,14 +27,14 @@ test_that("sparkCheckInstall", {
expect_true(is.null(sparkCheckInstall(sparkHome, master, deployMode)))
unlink(sparkHome, recursive = TRUE)

# "yarn-cluster, mesos-cluster" mode, SPARK_HOME was not set,
# "yarn-cluster" mode, SPARK_HOME was not set,
# and the SparkR job was submitted by "spark-submit"
sparkHome <- ""
master <- ""
deployMode <- ""
expect_true(is.null(sparkCheckInstall(sparkHome, master, deployMode)))

# "yarn-client, mesos-client" mode, SPARK_HOME was not set
# "yarn-client" mode, SPARK_HOME was not set
sparkHome <- ""
master <- "yarn"
deployMode <- "client"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ To run one of them, use `./bin/run-example <class> [params]`. For example:
will run the Pi example locally.

You can set the MASTER environment variable when running examples to submit
examples to a cluster. This can be a mesos:// or spark:// URL,
examples to a cluster. This can be spark:// URL,
"yarn" to run on YARN, and "local" to run
locally with one thread, or "local[N]" to run locally with N threads. You
can also use an abbreviated class name if the class is in the `examples`
Expand Down
10 changes: 0 additions & 10 deletions assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,6 @@
</dependency>
</dependencies>
</profile>
<profile>
<id>mesos</id>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mesos_${scala.binary.version}</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
<profile>
<id>connect</id>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@

import org.apache.spark.network.protocol.Encodable;
import org.apache.spark.network.shuffle.ExternalBlockHandler;
import org.apache.spark.network.shuffle.protocol.mesos.RegisterDriver;
import org.apache.spark.network.shuffle.protocol.mesos.ShuffleServiceHeartbeat;

/**
* Messages handled by the {@link ExternalBlockHandler}, or
Expand Down Expand Up @@ -73,8 +71,6 @@ public static BlockTransferMessage fromByteBuffer(ByteBuffer msg) {
case 1: return UploadBlock.decode(buf);
case 2: return RegisterExecutor.decode(buf);
case 3: return StreamHandle.decode(buf);
case 4: return RegisterDriver.decode(buf);
case 5: return ShuffleServiceHeartbeat.decode(buf);
case 6: return UploadBlockStream.decode(buf);
case 7: return RemoveBlocks.decode(buf);
case 8: return BlocksRemoved.decode(buf);
Expand Down

This file was deleted.

This file was deleted.

1 change: 0 additions & 1 deletion conf/spark-env.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos

# Options read in any mode
# - SPARK_CONF_DIR, Alternate conf dir. (Default: ${SPARK_HOME}/conf)
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/SparkConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
private[spark] def validateSettings(): Unit = {
if (contains("spark.local.dir")) {
val msg = "Note that spark.local.dir will be overridden by the value set by " +
"the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS" +
"the cluster manager (via SPARK_LOCAL_DIRS in standalone/kubernetes and LOCAL_DIRS" +
" in YARN)."
logWarning(msg)
}
Expand Down
14 changes: 5 additions & 9 deletions core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Alternative constructor that allows setting common Spark properties directly
*
* @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
* @param master Cluster URL to connect to (e.g. spark://host:port, local[4]).
* @param appName A name for your application, to display on the cluster web UI
* @param conf a [[org.apache.spark.SparkConf]] object specifying other Spark parameters
*/
Expand All @@ -142,7 +142,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Alternative constructor that allows setting common Spark properties directly
*
* @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
* @param master Cluster URL to connect to (e.g. spark://host:port, local[4]).
* @param appName A name for your application, to display on the cluster web UI.
* @param sparkHome Location where Spark is installed on cluster nodes.
* @param jars Collection of JARs to send to the cluster. These can be paths on the local file
Expand All @@ -164,7 +164,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Alternative constructor that allows setting common Spark properties directly
*
* @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
* @param master Cluster URL to connect to (e.g. spark://host:port, local[4]).
* @param appName A name for your application, to display on the cluster web UI.
*/
private[spark] def this(master: String, appName: String) =
Expand All @@ -173,7 +173,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Alternative constructor that allows setting common Spark properties directly
*
* @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
* @param master Cluster URL to connect to (e.g. spark://host:port, local[4]).
* @param appName A name for your application, to display on the cluster web UI.
* @param sparkHome Location where Spark is installed on cluster nodes.
*/
Expand All @@ -183,7 +183,7 @@ class SparkContext(config: SparkConf) extends Logging {
/**
* Alternative constructor that allows setting common Spark properties directly
*
* @param master Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
* @param master Cluster URL to connect to (e.g. spark://host:port, local[4]).
* @param appName A name for your application, to display on the cluster web UI.
* @param sparkHome Location where Spark is installed on cluster nodes.
* @param jars Collection of JARs to send to the cluster. These can be paths on the local file
Expand Down Expand Up @@ -352,7 +352,6 @@ class SparkContext(config: SparkConf) extends Logging {
* (i.e.
* in case of local spark app something like 'local-1433865536131'
* in case of YARN something like 'application_1433865536131_34483'
* in case of MESOS something like 'driver-20170926223339-0001'
* )
*/
def applicationId: String = _applicationId
Expand Down Expand Up @@ -557,9 +556,6 @@ class SparkContext(config: SparkConf) extends Logging {
Option(System.getenv("SPARK_PREPEND_CLASSES")).foreach { v =>
executorEnvs("SPARK_PREPEND_CLASSES") = v
}
// The Mesos scheduler backend relies on this environment variable to set executor memory.
// TODO: Set this only in the Mesos scheduler.
executorEnvs("SPARK_EXECUTOR_MEMORY") = executorMemory + "m"
executorEnvs ++= _conf.getExecutorEnv
executorEnvs("SPARK_USER") = sparkUser

Expand Down
Loading

0 comments on commit c596731

Please sign in to comment.