flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-8005) Snapshotting FlinkKafkaProducer011 fails due to ClassLoader issues
Date Wed, 08 Nov 2017 15:34:00 GMT

    [ https://issues.apache.org/jira/browse/FLINK-8005?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16244162#comment-16244162
] 

ASF GitHub Bot commented on FLINK-8005:
---------------------------------------

Github user GJL commented on a diff in the pull request:

    https://github.com/apache/flink/pull/4980#discussion_r149703482
  
    --- Diff: flink-runtime/src/test/java/org/apache/flink/runtime/taskmanager/TaskStopTest.java
---
    @@ -1,157 +0,0 @@
    -/*
    - * Licensed to the Apache Software Foundation (ASF) under one
    - * or more contributor license agreements.  See the NOTICE file
    - * distributed with this work for additional information
    - * regarding copyright ownership.  The ASF licenses this file
    - * to you under the Apache License, Version 2.0 (the
    - * "License"); you may not use this file except in compliance
    - * with the License.  You may obtain a copy of the License at
    - *
    - *     http://www.apache.org/licenses/LICENSE-2.0
    - *
    - * Unless required by applicable law or agreed to in writing, software
    - * distributed under the License is distributed on an "AS IS" BASIS,
    - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    - * See the License for the specific language governing permissions and
    - * limitations under the License.
    - */
    -package org.apache.flink.runtime.taskmanager;
    -
    -import org.apache.flink.api.common.JobID;
    -import org.apache.flink.api.common.TaskInfo;
    -import org.apache.flink.configuration.Configuration;
    -import org.apache.flink.runtime.blob.BlobCacheService;
    -import org.apache.flink.runtime.blob.PermanentBlobCache;
    -import org.apache.flink.runtime.blob.TransientBlobCache;
    -import org.apache.flink.runtime.broadcast.BroadcastVariableManager;
    -import org.apache.flink.runtime.checkpoint.TaskStateSnapshot;
    -import org.apache.flink.runtime.clusterframework.types.AllocationID;
    -import org.apache.flink.runtime.deployment.InputGateDeploymentDescriptor;
    -import org.apache.flink.runtime.deployment.ResultPartitionDeploymentDescriptor;
    -import org.apache.flink.runtime.deployment.TaskDeploymentDescriptor;
    -import org.apache.flink.runtime.execution.ExecutionState;
    -import org.apache.flink.runtime.execution.librarycache.LibraryCacheManager;
    -import org.apache.flink.runtime.executiongraph.ExecutionAttemptID;
    -import org.apache.flink.runtime.executiongraph.JobInformation;
    -import org.apache.flink.runtime.executiongraph.TaskInformation;
    -import org.apache.flink.runtime.filecache.FileCache;
    -import org.apache.flink.runtime.io.disk.iomanager.IOManager;
    -import org.apache.flink.runtime.io.network.NetworkEnvironment;
    -import org.apache.flink.runtime.io.network.netty.PartitionProducerStateChecker;
    -import org.apache.flink.runtime.io.network.partition.ResultPartitionConsumableNotifier;
    -import org.apache.flink.runtime.jobgraph.JobVertexID;
    -import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
    -import org.apache.flink.runtime.jobgraph.tasks.InputSplitProvider;
    -import org.apache.flink.runtime.jobgraph.tasks.StoppableTask;
    -import org.apache.flink.runtime.memory.MemoryManager;
    -import org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup;
    -import org.apache.flink.runtime.metrics.groups.TaskMetricGroup;
    -
    -import org.junit.Test;
    -import org.junit.runner.RunWith;
    -import org.powermock.core.classloader.annotations.PrepareForTest;
    -import org.powermock.modules.junit4.PowerMockRunner;
    -
    -import java.lang.reflect.Field;
    -import java.util.Collections;
    -import java.util.concurrent.Executor;
    -
    -import scala.concurrent.duration.FiniteDuration;
    -
    -import static org.mockito.Mockito.mock;
    -import static org.mockito.Mockito.when;
    -
    -@RunWith(PowerMockRunner.class)
    -@PrepareForTest({ TaskDeploymentDescriptor.class, JobID.class, FiniteDuration.class })
    -public class TaskStopTest {
    -	private Task task;
    -
    -	public void doMocking(AbstractInvokable taskMock) throws Exception {
    -
    -		TaskInfo taskInfoMock = mock(TaskInfo.class);
    -		when(taskInfoMock.getTaskNameWithSubtasks()).thenReturn("dummyName");
    -
    -		TaskManagerRuntimeInfo tmRuntimeInfo = mock(TaskManagerRuntimeInfo.class);
    -		when(tmRuntimeInfo.getConfiguration()).thenReturn(new Configuration());
    -
    -		TaskMetricGroup taskMetricGroup = mock(TaskMetricGroup.class);
    -		when(taskMetricGroup.getIOMetricGroup()).thenReturn(mock(TaskIOMetricGroup.class));
    -
    -		BlobCacheService blobService =
    -			new BlobCacheService(mock(PermanentBlobCache.class), mock(TransientBlobCache.class));
    -
    -		task = new Task(
    -			mock(JobInformation.class),
    -			new TaskInformation(
    -				new JobVertexID(),
    -				"test task name",
    -				1,
    -				1,
    -				"foobar",
    -				new Configuration()),
    -			mock(ExecutionAttemptID.class),
    -			mock(AllocationID.class),
    -			0,
    -			0,
    -			Collections.<ResultPartitionDeploymentDescriptor>emptyList(),
    -			Collections.<InputGateDeploymentDescriptor>emptyList(),
    -			0,
    -			mock(TaskStateSnapshot.class),
    -			mock(MemoryManager.class),
    -			mock(IOManager.class),
    -			mock(NetworkEnvironment.class),
    -			mock(BroadcastVariableManager.class),
    -			mock(TaskManagerActions.class),
    -			mock(InputSplitProvider.class),
    -			mock(CheckpointResponder.class),
    -			blobService,
    -			mock(LibraryCacheManager.class),
    -			mock(FileCache.class),
    -			tmRuntimeInfo,
    -			taskMetricGroup,
    -			mock(ResultPartitionConsumableNotifier.class),
    -			mock(PartitionProducerStateChecker.class),
    -			mock(Executor.class));
    -		Field f = task.getClass().getDeclaredField("invokable");
    -		f.setAccessible(true);
    -		f.set(task, taskMock);
    -
    -		Field f2 = task.getClass().getDeclaredField("executionState");
    -		f2.setAccessible(true);
    -		f2.set(task, ExecutionState.RUNNING);
    -	}
    -
    -	@Test(timeout = 20000)
    -	public void testStopExecution() throws Exception {
    -		StoppableTestTask taskMock = new StoppableTestTask();
    -		doMocking(taskMock);
    -
    -		task.stopExecution();
    -
    -		while (!taskMock.stopCalled) {
    -			Thread.sleep(100);
    -		}
    -	}
    -
    -	@Test(expected = RuntimeException.class)
    -	public void testStopExecutionFail() throws Exception {
    --- End diff --
    
    This is now covered by `testThrowExceptionIfStopInvokedWithNotStoppableTask`


> Snapshotting FlinkKafkaProducer011 fails due to ClassLoader issues
> ------------------------------------------------------------------
>
>                 Key: FLINK-8005
>                 URL: https://issues.apache.org/jira/browse/FLINK-8005
>             Project: Flink
>          Issue Type: Bug
>          Components: Core, Kafka Connector, State Backends, Checkpointing
>    Affects Versions: 1.4.0
>            Reporter: Gary Yao
>            Assignee: Gary Yao
>            Priority: Blocker
>             Fix For: 1.4.0
>
>
> *Problem Description*
> Classes in the user code jar cannot be loaded by the snapshot thread’s context class
loader ({{AppClassLoader}}).
> For example, when creating instances of {{KafkaProducer}}, Strings are resolved to class
objects by Kafka.
> Find below an extract from {{ConfigDef.java}}:
> {code}
> case CLASS:
>     if (value instanceof Class)
>         return value;
>     else if (value instanceof String)
>         return Class.forName(trimmed, true, Utils.getContextOrKafkaClassLoader());
>     else
>         throw new ConfigException(name, value, "Expected a Class instance or class name.");
> {code}
> *Exception/Stacktrace*
> {noformat}
> Caused by: java.lang.Exception: Could not complete snapshot 1 for operator Source: Collection
Source -> Sink: kafka-sink-1510048188383 (1/1).
> 	at org.apache.flink.streaming.api.operators.AbstractStreamOperator.snapshotState(AbstractStreamOperator.java:379)
> 	at org.apache.flink.streaming.runtime.tasks.StreamTask$CheckpointingOperation.checkpointStreamOperator(StreamTask.java:1077)
> 	at org.apache.flink.streaming.runtime.tasks.StreamTask$CheckpointingOperation.executeCheckpointing(StreamTask.java:1026)
> 	at org.apache.flink.streaming.runtime.tasks.StreamTask.checkpointState(StreamTask.java:659)
> 	at org.apache.flink.streaming.runtime.tasks.StreamTask.performCheckpoint(StreamTask.java:595)
> 	at org.apache.flink.streaming.runtime.tasks.StreamTask.triggerCheckpoint(StreamTask.java:526)
> 	... 7 more
> Caused by: org.apache.kafka.common.config.ConfigException: Invalid value org.apache.kafka.common.serialization.ByteArraySerializer
for configuration key.serializer: Class org.apache.kafka.common.serialization.ByteArraySerializer
could not be found.
> 	at org.apache.kafka.common.config.ConfigDef.parseType(ConfigDef.java:715)
> 	at org.apache.kafka.common.config.ConfigDef.parseValue(ConfigDef.java:460)
> 	at org.apache.kafka.common.config.ConfigDef.parse(ConfigDef.java:453)
> 	at org.apache.kafka.common.config.AbstractConfig.<init>(AbstractConfig.java:62)
> 	at org.apache.kafka.common.config.AbstractConfig.<init>(AbstractConfig.java:75)
> 	at org.apache.kafka.clients.producer.ProducerConfig.<init>(ProducerConfig.java:360)
> 	at org.apache.kafka.clients.producer.KafkaProducer.<init>(KafkaProducer.java:288)
> 	at org.apache.flink.streaming.connectors.kafka.internal.FlinkKafkaProducer.<init>(FlinkKafkaProducer.java:114)
> 	at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011.initProducer(FlinkKafkaProducer011.java:913)
> 	at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011.initTransactionalProducer(FlinkKafkaProducer011.java:904)
> 	at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011.createOrGetProducerFromPool(FlinkKafkaProducer011.java:637)
> 	at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011.beginTransaction(FlinkKafkaProducer011.java:613)
> 	at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011.beginTransaction(FlinkKafkaProducer011.java:94)
> 	at org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction.beginTransactionInternal(TwoPhaseCommitSinkFunction.java:359)
> 	at org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction.snapshotState(TwoPhaseCommitSinkFunction.java:294)
> 	at org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer011.snapshotState(FlinkKafkaProducer011.java:756)
> 	at org.apache.flink.streaming.util.functions.StreamingFunctionUtils.trySnapshotFunctionState(StreamingFunctionUtils.java:118)
> 	at org.apache.flink.streaming.util.functions.StreamingFunctionUtils.snapshotFunctionState(StreamingFunctionUtils.java:99)
> 	at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.snapshotState(AbstractUdfStreamOperator.java:90)
> 	at org.apache.flink.streaming.api.operators.AbstractStreamOperator.snapshotState(AbstractStreamOperator.java:357)
> 	... 12 more
> {noformat}
> *How to reproduce*
> Note that the problem only appears when a job is deployed on a cluster. 
> # Build Flink 1.4
> # Build test job https://github.com/GJL/flink-kafka011-producer-test with {{mvn -o clean
install -Pbuild-jar}}
> # Start job:
> {noformat}
> bin/flink run -c com.garyyao.StreamingJob /pathto/flink-kafka011-producer/target/flink-kafka011-producer-1.0-SNAPSHOT.jar
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Mime
View raw message