spark-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Sumit Khanna <sumit.kha...@askme.in>
Subject Re: how to debug spark app?
Date Thu, 04 Aug 2016 01:19:02 GMT
Am not really sure of the best practices on this , but I either consult the
localhost:4040/jobs/ etc
or better this :

val customSparkListener: CustomSparkListener = new CustomSparkListener()
sc.addSparkListener(customSparkListener)

class CustomSparkListener extends SparkListener {
 override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
  debug(s"application ended at time : ${applicationEnd.time}")
 }
 override def onApplicationStart(applicationStart:
SparkListenerApplicationStart): Unit ={
  debug(s"[SPARK LISTENER DEBUGS] application Start app attempt id :
${applicationStart.appAttemptId}")
  debug(s"[SPARK LISTENER DEBUGS] application Start app id :
${applicationStart.appId}")
  debug(s"[SPARK LISTENER DEBUGS] application start app name :
${applicationStart.appName}")
  debug(s"[SPARK LISTENER DEBUGS] applicaton start driver logs :
${applicationStart.driverLogs}")
  debug(s"[SPARK LISTENER DEBUGS] application start spark user :
${applicationStart.sparkUser}")
  debug(s"[SPARK LISTENER DEBUGS] application start time :
${applicationStart.time}")
 }
 override def onExecutorAdded(executorAdded:
SparkListenerExecutorAdded): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorId}")
  debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.executorInfo}")
  debug(s"[SPARK LISTENER DEBUGS] ${executorAdded.time}")
 }
 override  def onExecutorRemoved(executorRemoved:
SparkListenerExecutorRemoved): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] the executor removed Id :
${executorRemoved.executorId}")
  debug(s"[SPARK LISTENER DEBUGS] the executor removed reason :
${executorRemoved.reason}")
  debug(s"[SPARK LISTENER DEBUGS] the executor temoved at time :
${executorRemoved.time}")
 }

 override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] job End id : ${jobEnd.jobId}")
  debug(s"[SPARK LISTENER DEBUGS] job End job Result : ${jobEnd.jobResult}")
  debug(s"[SPARK LISTENER DEBUGS] job End time : ${jobEnd.time}")
 }
 override def onJobStart(jobStart: SparkListenerJobStart) {
  debug(s"[SPARK LISTENER DEBUGS] Job started with properties
${jobStart.properties}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with time ${jobStart.time}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with job id
${jobStart.jobId.toString}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with stage ids
${jobStart.stageIds.toString()}")
  debug(s"[SPARK LISTENER DEBUGS] Job started with stages
${jobStart.stageInfos.size} : $jobStart")
 }

 override def onStageCompleted(stageCompleted:
SparkListenerStageCompleted): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] Stage
${stageCompleted.stageInfo.stageId} completed with
${stageCompleted.stageInfo.numTasks} tasks.")
  debug(s"[SPARK LISTENER DEBUGS] Stage details :
${stageCompleted.stageInfo.details.toString}")
  debug(s"[SPARK LISTENER DEBUGS] Stage completion time :
${stageCompleted.stageInfo.completionTime}")
  debug(s"[SPARK LISTENER DEBUGS] Stage details :
${stageCompleted.stageInfo.rddInfos.toString()}")
 }
 override def onStageSubmitted(stageSubmitted:
SparkListenerStageSubmitted): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] Stage properties :
${stageSubmitted.properties}")
  debug(s"[SPARK LISTENER DEBUGS] Stage rddInfos :
${stageSubmitted.stageInfo.rddInfos.toString()}")
  debug(s"[SPARK LISTENER DEBUGS] Stage submission Time :
${stageSubmitted.stageInfo.submissionTime}")
  debug(s"[SPARK LISTENER DEBUGS] Stage submission details :
${stageSubmitted.stageInfo.details.toString()}")
 }
 override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
  debug(s"[SPARK LISTENER DEBUGS] task type : ${taskEnd.taskType}")
  debug(s"[SPARK LISTENER DEBUGS] task Metrics : ${taskEnd.taskMetrics}")
  debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskEnd.taskInfo}")
  debug(s"[SPARK LISTENER DEBUGS] task stage Id : ${taskEnd.stageId}")
  debug(s"[SPARK LISTENER DEBUGS] task stage attempt Id :
${taskEnd.stageAttemptId}")
  debug(s"[SPARK LISTENER DEBUGS] task ended reason : ${taskEnd.reason}")
 }
 override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] stage Attempt id :
${taskStart.stageAttemptId}")
  debug(s"[SPARK LISTENER DEBUGS] stage Id : ${taskStart.stageId}")
  debug(s"[SPARK LISTENER DEBUGS] task Info : ${taskStart.taskInfo}")
 }
 override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = {
  debug(s"[SPARK LISTENER DEBUGS] the unpersist RDD id : ${unpersistRDD.rddId}")
 }
}

and then usually check for logs. P.S :I am running it as a jar.

Thanks,


On Thu, Aug 4, 2016 at 6:46 AM, Ted Yu <yuzhihong@gmail.com> wrote:

> Have you looked at:
>
> https://spark.apache.org/docs/latest/running-on-yarn.html#debugging-your-application
>
> If you use Mesos:
>
> https://spark.apache.org/docs/latest/running-on-mesos.html#troubleshooting-and-debugging
>
> On Wed, Aug 3, 2016 at 6:13 PM, glen <cnglen@126.com> wrote:
>
>> Any tool like gdb? Which support break point at some line or some
>> function?
>>
>>
>>
>>
>>
>>
>

Mime
View raw message