spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Aida <Aida1.Tef...@gmail.com>
Subject Re: Installing Spark on Mac
Date Thu, 10 Mar 2016 18:22:07 GMT
Hi Gaini, thanks for your response

Please see the below contents of the files in the conf. directory:

1. docker.properties.template

Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

spark.mesos.executor.docker.image: <image built from
`../docker/spark-mesos/Dockerfile`>
spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro
spark.mesos.executor.home: /opt/spark

2. fairscheduler.xml.template

<?xml version="1.0"?>



<allocations>
  <pool name="production">
    <schedulingMode>FAIR</schedulingMode>
    <weight>1</weight>
    <minShare>2</minShare>
  </pool>
  <pool name="test">
    <schedulingMode>FIFO</schedulingMode>
    <weight>2</weight>
    <minShare>3</minShare>
  </pool>
</allocations>


3. log4j.properties.template

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Set everything to be logged to the console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p
%c{1}: %m%n

# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark-project.jetty=WARN
log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR

# SPARK-9183: Settings to avoid annoying messages when looking up
nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR

4. metrics.properties.template
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#  syntax: [instance].sink|source.[name].[options]=[value]

#  This file configures Spark's internal metrics system. The metrics system
is
#  divided into instances which correspond to internal components.
#  Each instance can be configured to report its metrics to one or more
sinks.
#  Accepted values for [instance] are "master", "worker", "executor",
"driver",
#  and "applications". A wildcard "*" can be used as an instance name, in
#  which case all instances will inherit the supplied property.
#
#  Within an instance, a "source" specifies a particular set of grouped
metrics.
#  there are two kinds of sources:
#    1. Spark internal sources, like MasterSource, WorkerSource, etc, which
will
#    collect a Spark component's internal state. Each instance is paired
with a
#    Spark source that is added automatically.
#    2. Common sources, like JvmSource, which will collect low level state.
#    These can be added through configuration options and are then loaded
#    using reflection.
#
#  A "sink" specifies where metrics are delivered to. Each instance can be
#  assigned one or more sinks.
#
#  The sink|source field specifies whether the property relates to a sink or
#  source.
#
#  The [name] field specifies the name of source or sink.
#
#  The [options] field is the specific property of this source or sink. The
#  source or sink is responsible for parsing this property.
#
#  Notes:
#    1. To add a new sink, set the "class" option to a fully qualified class
#    name (see examples below).
#    2. Some sinks involve a polling period. The minimum allowed polling
period
#    is 1 second.
#    3. Wildcard properties can be overridden by more specific properties.
#    For example, master.sink.console.period takes precedence over
#    *.sink.console.period.
#    4. A metrics specific configuration
#    "spark.metrics.conf=${SPARK_HOME}/conf/metrics.properties" should be
#    added to Java properties using -Dspark.metrics.conf=xxx if you want to
#    customize metrics system. You can also put the file in
${SPARK_HOME}/conf
#    and it will be loaded automatically.
#    5. MetricsServlet is added by default as a sink in master, worker and
client
#    driver, you can send http request "/metrics/json" to get a snapshot of
all the
#    registered metrics in json format. For master, requests
"/metrics/master/json" and
#    "/metrics/applications/json" can be sent seperately to get metrics
snapshot of
#    instance master and applications. MetricsServlet may not be configured
by self.
#

## List of available common sources and their properties.

# org.apache.spark.metrics.source.JvmSource
#   Note: Currently, JvmSource is the only available common source 
#         to add additionaly to an instance, to enable this, 
#         set the "class" option to its fully qulified class name (see
examples below)

## List of available sinks and their properties.

# org.apache.spark.metrics.sink.ConsoleSink
#   Name:   Default:   Description:
#   period  10         Poll period
#   unit    seconds    Units of poll period

# org.apache.spark.metrics.sink.CSVSink
#   Name:     Default:   Description:
#   period    10         Poll period
#   unit      seconds    Units of poll period
#   directory /tmp       Where to store CSV files

# org.apache.spark.metrics.sink.GangliaSink
#   Name:     Default:   Description:
#   host      NONE       Hostname or multicast group of Ganglia server
#   port      NONE       Port of Ganglia server(s)
#   period    10         Poll period
#   unit      seconds    Units of poll period
#   ttl       1          TTL of messages sent by Ganglia
#   mode      multicast  Ganglia network mode ('unicast' or 'multicast')

# org.apache.spark.metrics.sink.JmxSink

# org.apache.spark.metrics.sink.MetricsServlet
#   Name:     Default:   Description:
#   path      VARIES*    Path prefix from the web server root
#   sample    false      Whether to show entire set of samples for
histograms ('false' or 'true')
#
# * Default path is /metrics/json for all instances except the master. The
master has two paths:
#     /metrics/applications/json # App information
#     /metrics/master/json       # Master information

# org.apache.spark.metrics.sink.GraphiteSink
#   Name:     Default:      Description:
#   host      NONE          Hostname of Graphite server
#   port      NONE          Port of Graphite server
#   period    10            Poll period
#   unit      seconds       Units of poll period
#   prefix    EMPTY STRING  Prefix to prepend to metric name
#   protocol  tcp           Protocol ("tcp" or "udp") to use

## Examples
# Enable JmxSink for all instances by class name
#*.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink

# Enable ConsoleSink for all instances by class name
#*.sink.console.class=org.apache.spark.metrics.sink.ConsoleSink

# Polling period for ConsoleSink
#*.sink.console.period=10

#*.sink.console.unit=seconds

# Master instance overlap polling period
#master.sink.console.period=15

#master.sink.console.unit=seconds

# Enable CsvSink for all instances
#*.sink.csv.class=org.apache.spark.metrics.sink.CsvSink

# Polling period for CsvSink
#*.sink.csv.period=1

#*.sink.csv.unit=minutes

# Polling directory for CsvSink
#*.sink.csv.directory=/tmp/

# Worker instance overlap polling period
#worker.sink.csv.period=10

#worker.sink.csv.unit=minutes

# Enable Slf4jSink for all instances by class name
#*.sink.slf4j.class=org.apache.spark.metrics.sink.Slf4jSink

# Polling period for Slf4JSink
#*.sink.slf4j.period=1

#*.sink.slf4j.unit=minutes


# Enable jvm source for instance master, worker, driver and executor
#master.source.jvm.class=org.apache.spark.metrics.source.JvmSource

#worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource

#driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource

#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource

5. slaves.template
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# A Spark Worker will be started on each of the machines listed below.
localhost

6. spark-defaults.conf.template
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Default system properties included when running spark-submit.
# This is useful for setting default environmental settings.

# Example:
# spark.master                     spark://master:7077
# spark.eventLog.enabled           true
# spark.eventLog.dir               hdfs://namenode:8021/directory
# spark.serializer                
org.apache.spark.serializer.KryoSerializer
# spark.driver.memory              5g
# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value
-Dnumbers="one two three"

7. spark-env.sh.template
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This file is sourced when running various Spark programs.
# Copy it as spark-env.sh and edit that to configure Spark for your site.

# Options read when launching programs locally with
# ./bin/run-example or ./bin/spark-submit
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public dns name of the driver program
# - SPARK_CLASSPATH, default classpath entries to append

# Options read by executors and drivers running inside the cluster
# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
# - SPARK_PUBLIC_DNS, to set the public DNS name of the driver program
# - SPARK_CLASSPATH, default classpath entries to append
# - SPARK_LOCAL_DIRS, storage directories to use on this node for shuffle
and RDD data
# - MESOS_NATIVE_JAVA_LIBRARY, to point to your libmesos.so if you use Mesos

# Options read in YARN client mode
# - HADOOP_CONF_DIR, to point Spark towards Hadoop configuration files
# - SPARK_EXECUTOR_INSTANCES, Number of executors to start (Default: 2)
# - SPARK_EXECUTOR_CORES, Number of cores for the executors (Default: 1).
# - SPARK_EXECUTOR_MEMORY, Memory per Executor (e.g. 1000M, 2G) (Default:
1G)
# - SPARK_DRIVER_MEMORY, Memory for Driver (e.g. 1000M, 2G) (Default: 1G)
# - SPARK_YARN_APP_NAME, The name of your application (Default: Spark)
# - SPARK_YARN_QUEUE, The hadoop queue to use for allocation requests
(Default: ‘default’)
# - SPARK_YARN_DIST_FILES, Comma separated list of files to be distributed
with the job.
# - SPARK_YARN_DIST_ARCHIVES, Comma separated list of archives to be
distributed with the job.

# Options for the daemons used in the standalone deploy mode
# - SPARK_MASTER_IP, to bind the master to a different IP address or
hostname
# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
for the master
# - SPARK_MASTER_OPTS, to set config properties only for the master (e.g.
"-Dx=y")
# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
# - SPARK_WORKER_MEMORY, to set how much total memory workers have to give
executors (e.g. 1000m, 2g)
# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT, to use non-default ports
for the worker
# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
# - SPARK_WORKER_DIR, to set the working directory of worker processes
# - SPARK_WORKER_OPTS, to set config properties only for the worker (e.g.
"-Dx=y")
# - SPARK_DAEMON_MEMORY, to allocate to the master, worker and history
server themselves (default: 1g).
# - SPARK_HISTORY_OPTS, to set config properties only for the history server
(e.g. "-Dx=y")
# - SPARK_SHUFFLE_OPTS, to set config properties only for the external
shuffle service (e.g. "-Dx=y")
# - SPARK_DAEMON_JAVA_OPTS, to set config properties for all daemons (e.g.
"-Dx=y")
# - SPARK_PUBLIC_DNS, to set the public dns name of the master or workers

# Generic options for the daemons used in the standalone deploy mode
# - SPARK_CONF_DIR      Alternate conf dir. (Default: ${SPARK_HOME}/conf)
# - SPARK_LOG_DIR       Where log files are stored.  (Default:
${SPARK_HOME}/logs)
# - SPARK_PID_DIR       Where the pid file is stored. (Default: /tmp)
# - SPARK_IDENT_STRING  A string representing this instance of spark.
(Default: $USER)
# - SPARK_NICENESS      The scheduling priority for daemons. (Default: 0)




--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Installing-Spark-on-Mac-tp26397p26450.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@spark.apache.org
For additional commands, e-mail: user-help@spark.apache.org


Mime
View raw message