From 3df6ab3cef26b6f39801600368a6070d2289a929 Mon Sep 17 00:00:00 2001 From: Zach Loafman Date: Fri, 20 Nov 2015 09:33:41 -0800 Subject: [PATCH] Spark: Add DM template for Spark This is a first-cut template based on https://github.com/kubernetes/kubernetes/tree/master/examples/spark with minor templating. As I iterate on that example to add PDs, etc. (which will add much more variance), I'll iterate here, too. --- templates/spark/v1/example.yaml | 6 ++ templates/spark/v1/spark.jinja | 113 ++++++++++++++++++++++++++ templates/spark/v1/spark.jinja.schema | 50 ++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 templates/spark/v1/example.yaml create mode 100644 templates/spark/v1/spark.jinja create mode 100644 templates/spark/v1/spark.jinja.schema diff --git a/templates/spark/v1/example.yaml b/templates/spark/v1/example.yaml new file mode 100644 index 000000000..c9f6b4c0b --- /dev/null +++ b/templates/spark/v1/example.yaml @@ -0,0 +1,6 @@ +imports: +- path: spark.jinja + +resources: +- name: spark + type: spark.jinja diff --git a/templates/spark/v1/spark.jinja b/templates/spark/v1/spark.jinja new file mode 100644 index 000000000..ec0a777ae --- /dev/null +++ b/templates/spark/v1/spark.jinja @@ -0,0 +1,113 @@ +{% set PROPERTIES = properties or {} %} +{% set WORKERS = PROPERTIES['workers'] or 2 %} +{% set REPO = PROPERTIES['repository'] or 'gcr.io/google_containers' %} +{% set SPARK_VERSION = PROPERTIES['spark_version'] or '1.5.1_v2' %} +{% set ZEPPELIN_VERSION = PROPERTIES['zeppelin_version'] or 'v0.5.5_v2' %} +{% set MASTER_CPU = PROPERTIES['master_cpu'] or '100m' %} +{% set ZEPPELIN_CPU = PROPERTIES['zeppelin_cpu'] or '100m' %} +{% set WORKER_CPU = PROPERTIES['worker_cpu'] or '100m' %} + +# TODO(zmerlynn): This can't be a standard replicatedservice (yet) because of: +# - the double containerPort +# - the cpu request +# (neither are possible in the type as of v2) +resources: +- name: spark-master + type: ReplicationController + properties: + kind: ReplicationController + apiVersion: v1 + metadata: + name: spark-master-controller + spec: + replicas: 1 + selector: + component: spark-master + template: + metadata: + labels: + component: spark-master + spec: + containers: + - name: spark-master + image: {{ REPO }}/spark-master:{{ SPARK_VERSION }} + ports: + - containerPort: 7077 + - containerPort: 8080 + resources: + requests: + cpu: {{ MASTER_CPU }} +- name: spark-master-service + type: Service + properties: + kind: Service + apiVersion: v1 + metadata: + name: spark-master + spec: + ports: + - port: 7077 + targetPort: 7077 + selector: + component: spark-master +- name: spark-webui + type: Service + properties: + kind: Service + apiVersion: v1 + metadata: + name: spark-webui + spec: + ports: + - port: 8080 + targetPort: 8080 + selector: + component: spark-master +- name: spark-worker-controller + type: ReplicationController + properties: + kind: ReplicationController + apiVersion: v1 + metadata: + name: spark-worker-controller + spec: + replicas: 3 + selector: + component: spark-worker + template: + metadata: + labels: + component: spark-worker + spec: + containers: + - name: spark-worker + image: {{ REPO }}/spark-worker:{{ SPARK_VERSION }} + ports: + - containerPort: 8081 + resources: + requests: + cpu: {{ WORKER_CPU }} +- name: zeppelin-controller + type: ReplicationController + properties: + kind: ReplicationController + apiVersion: v1 + metadata: + name: zeppelin-controller + spec: + replicas: 1 + selector: + component: zeppelin + template: + metadata: + labels: + component: zeppelin + spec: + containers: + - name: zeppelin + image: {{ REPO }}/zeppelin:{{ ZEPPELIN_VERSION }} + ports: + - containerPort: 8080 + resources: + requests: + cpu: {{ ZEPPELIN_CPU }} diff --git a/templates/spark/v1/spark.jinja.schema b/templates/spark/v1/spark.jinja.schema new file mode 100644 index 000000000..f0f2f1f1b --- /dev/null +++ b/templates/spark/v1/spark.jinja.schema @@ -0,0 +1,50 @@ +info: + title: Spark cluster with Zeppelin front-end + description: | + Defines a Spark cluster with a single master in standalone mode, + and a Zeppelin web notebook front-end. After deploying the + cluster, the Spark WebUI can be accessed by starting: + + kubectl proxy --port=8001 & + + Then visiting: + + http://localhost:8001/api/v1/proxy/namespaces/default/services/spark-webui/ + + The Zeppelin WebUI can be access by finding the Zeppelin pod and port-forwarding: + + kubectl get pods -lcomponent=zeppelin # Take the pod from here + kubectl port-forward zeppelin-controller-abcef :8080 + + Then visit the forwarded port. + +properties: + master_cpu: + type: string + default: 100m + description: CPU request for the master (in KCUs) (see http://kubernetes.io/v1.1/docs/design/resources.html) + + workers: + type: int + default: 3 + description: Number of Spark workers. + + worker_cpu: + type: string + default: 100m + description: CPU request for each worker (in KCUs) (see http://kubernetes.io/v1.1/docs/design/resources.html) + + repository: + type: string + default: gcr.io/google_containers + description: Docker repo that houses the {spark-worker, spark-master, zeppelin} images. + + spark_version: + type: string + default: 1.5.1_v2 + description: spark-worker / spark-master image version to use + + zeppelin_version: + type: string + default: 0.5.5_v2 + description: zeppelin image version to use