flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Guowei Ma (Jira)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-22266) Harden JobMasterStopWithSavepointITCase
Date Mon, 03 May 2021 04:24:00 GMT

    [ https://issues.apache.org/jira/browse/FLINK-22266?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17338154#comment-17338154
] 

Guowei Ma commented on FLINK-22266:
-----------------------------------

https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=17502&view=logs&j=5c8e7682-d68f-54d1-16a2-a09310218a49&t=f508e270-48d6-5f1e-3138-42a17e0714f0&l=4511


{code:java}
suspendWithSavepointWithoutComplicationsShouldSucceedAndLeadJobToFinished(org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase)
 Time elapsed: 10.077 s  <<< ERROR!
2021-05-02T22:50:16.1542494Z May 02 22:50:16 java.util.concurrent.ExecutionException: java.util.concurrent.TimeoutException:
Invocation of public default java.util.concurrent.CompletableFuture org.apache.flink.runtime.webmonitor.RestfulGateway.stopWithSavepoint(org.apache.flink.api.common.JobID,java.lang.String,boolean,org.apache.flink.api.common.time.Time)
timed out.
2021-05-02T22:50:16.1544195Z May 02 22:50:16 	at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357)
2021-05-02T22:50:16.1545389Z May 02 22:50:16 	at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908)
2021-05-02T22:50:16.1581395Z May 02 22:50:16 	at org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase.stopWithSavepointNormalExecutionHelper(JobMasterStopWithSavepointITCase.java:123)
2021-05-02T22:50:16.1583108Z May 02 22:50:16 	at org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase.suspendWithSavepointWithoutComplicationsShouldSucceedAndLeadJobToFinished(JobMasterStopWithSavepointITCase.java:105)
2021-05-02T22:50:16.1584290Z May 02 22:50:16 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
Method)
2021-05-02T22:50:16.1585178Z May 02 22:50:16 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
2021-05-02T22:50:16.1586128Z May 02 22:50:16 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
2021-05-02T22:50:16.1586983Z May 02 22:50:16 	at java.lang.reflect.Method.invoke(Method.java:498)
2021-05-02T22:50:16.1587861Z May 02 22:50:16 	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
2021-05-02T22:50:16.1588812Z May 02 22:50:16 	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
2021-05-02T22:50:16.1589712Z May 02 22:50:16 	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
2021-05-02T22:50:16.1590653Z May 02 22:50:16 	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
2021-05-02T22:50:16.1591556Z May 02 22:50:16 	at org.junit.internal.runners.statements.RunAfters.evaluate(RunAfters.java:27)
2021-05-02T22:50:16.1592354Z May 02 22:50:16 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
2021-05-02T22:50:16.1593225Z May 02 22:50:16 	at org.apache.flink.util.TestNameProvider$1.evaluate(TestNameProvider.java:45)
2021-05-02T22:50:16.1594370Z May 02 22:50:16 	at org.junit.rules.TestWatcher$1.evaluate(TestWatcher.java:55)
2021-05-02T22:50:16.1595082Z May 02 22:50:16 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
2021-05-02T22:50:16.1595862Z May 02 22:50:16 	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:325)
2021-05-02T22:50:16.1596945Z May 02 22:50:16 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:78)
2021-05-02T22:50:16.1597960Z May 02 22:50:16 	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:57)
2021-05-02T22:50:16.1598905Z May 02 22:50:16 	at org.junit.runners.ParentRunner$3.run(ParentRunner.java:290)
2021-05-02T22:50:16.1599640Z May 02 22:50:16 	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:71)
2021-05-02T22:50:16.1600477Z May 02 22:50:16 	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:288)
2021-05-02T22:50:16.1601289Z May 02 22:50:16 	at org.junit.runners.ParentRunner.access$000(ParentRunner.java:58)
2021-05-02T22:50:16.1602080Z May 02 22:50:16 	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:268)
2021-05-02T22:50:16.1602907Z May 02 22:50:16 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
2021-05-02T22:50:16.1603854Z May 02 22:50:16 	at org.junit.rules.ExternalResource$1.evaluate(ExternalResource.java:48)
2021-05-02T22:50:16.1604594Z May 02 22:50:16 	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
2021-05-02T22:50:16.1605349Z May 02 22:50:16 	at org.junit.runners.ParentRunner.run(ParentRunner.java:363)
2021-05-02T22:50:16.1606128Z May 02 22:50:16 	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
2021-05-02T22:50:16.1607075Z May 02 22:50:16 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
2021-05-02T22:50:16.1608111Z May 02 22:50:16 	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
2021-05-02T22:50:16.1608984Z May 02 22:50:16 	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
2021-05-02T22:50:16.1609976Z May 02 22:50:16 	at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:384)
2021-05-02T22:50:16.1640984Z May 02 22:50:16 	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:345)
2021-05-02T22:50:16.1641875Z May 02 22:50:16 	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:126)
2021-05-02T22:50:16.1657319Z May 02 22:50:16 	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:418)
2021-05-02T22:50:16.1658668Z May 02 22:50:16 Caused by: java.util.concurrent.TimeoutException:
Invocation of public default java.util.concurrent.CompletableFuture org.apache.flink.runtime.webmonitor.RestfulGateway.stopWithSavepoint(org.apache.flink.api.common.JobID,java.lang.String,boolean,org.apache.flink.api.common.time.Time)
timed out.
2021-05-02T22:50:16.1659635Z May 02 22:50:16 	at com.sun.proxy.$Proxy32.stopWithSavepoint(Unknown
Source)
2021-05-02T22:50:16.1660339Z May 02 22:50:16 	at org.apache.flink.runtime.minicluster.MiniCluster.lambda$stopWithSavepoint$9(MiniCluster.java:724)
2021-05-02T22:50:16.1661430Z May 02 22:50:16 	at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
2021-05-02T22:50:16.1662258Z May 02 22:50:16 	at java.util.concurrent.CompletableFuture.uniApplyStage(CompletableFuture.java:628)
2021-05-02T22:50:16.1662973Z May 02 22:50:16 	at java.util.concurrent.CompletableFuture.thenApply(CompletableFuture.java:1996)
2021-05-02T22:50:16.1663723Z May 02 22:50:16 	at org.apache.flink.runtime.minicluster.MiniCluster.runDispatcherCommand(MiniCluster.java:751)
2021-05-02T22:50:16.1664283Z May 02 22:50:16 	at org.apache.flink.runtime.minicluster.MiniCluster.stopWithSavepoint(MiniCluster.java:722)
2021-05-02T22:50:16.1664965Z May 02 22:50:16 	at org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase.stopWithSavepoint(JobMasterStopWithSavepointITCase.java:250)
2021-05-02T22:50:16.1665756Z May 02 22:50:16 	at org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase.stopWithSavepointNormalExecutionHelper(JobMasterStopWithSavepointITCase.java:117)
2021-05-02T22:50:16.1666314Z May 02 22:50:16 	... 34 more
2021-05-02T22:50:16.1668114Z May 02 22:50:16 Caused by: akka.pattern.AskTimeoutException:
Ask timed out on [Actor[akka://flink/user/rpc/dispatcher_2#544661845]] after [10000 ms]. Message
of type [org.apache.flink.runtime.rpc.messages.LocalFencedMessage]. A typical reason for `AskTimeoutException`
is that the recipient actor didn't send a reply.
2021-05-02T22:50:16.1669462Z May 02 22:50:16 	at akka.pattern.PromiseActorRef$$anonfun$2.apply(AskSupport.scala:635)
2021-05-02T22:50:16.1670011Z May 02 22:50:16 	at akka.pattern.PromiseActorRef$$anonfun$2.apply(AskSupport.scala:635)
2021-05-02T22:50:16.1670622Z May 02 22:50:16 	at akka.pattern.PromiseActorRef$$anonfun$1.apply$mcV$sp(AskSupport.scala:648)
2021-05-02T22:50:16.1671147Z May 02 22:50:16 	at akka.actor.Scheduler$$anon$4.run(Scheduler.scala:205)
2021-05-02T22:50:16.1671721Z May 02 22:50:16 	at scala.concurrent.Future$InternalCallbackExecutor$.unbatchedExecute(Future.scala:601)
2021-05-02T22:50:16.1672244Z May 02 22:50:16 	at scala.concurrent.BatchingExecutor$class.execute(BatchingExecutor.scala:109)
2021-05-02T22:50:16.1672975Z May 02 22:50:16 	at scala.concurrent.Future$InternalCallbackExecutor$.execute(Future.scala:599)
2021-05-02T22:50:16.1673767Z May 02 22:50:16 	at akka.actor.LightArrayRevolverScheduler$TaskHolder.executeTask(LightArrayRevolverScheduler.scala:328)
2021-05-02T22:50:16.1674434Z May 02 22:50:16 	at akka.actor.LightArrayRevolverScheduler$$anon$4.executeBucket$1(LightArrayRevolverScheduler.scala:279)
2021-05-02T22:50:16.1675060Z May 02 22:50:16 	at akka.actor.LightArrayRevolverScheduler$$anon$4.nextTick(LightArrayRevolverScheduler.scala:283)
2021-05-02T22:50:16.1675597Z May 02 22:50:16 	at akka.actor.LightArrayRevolverScheduler$$anon$4.run(LightArrayRevolverScheduler.scala:235)
2021-05-02T22:50:16.1676109Z May 02 22:50:16 	at java.lang.Thread.run(Thread.java:748)
{code}


> Harden JobMasterStopWithSavepointITCase
> ---------------------------------------
>
>                 Key: FLINK-22266
>                 URL: https://issues.apache.org/jira/browse/FLINK-22266
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Checkpointing, Runtime / Coordination
>    Affects Versions: 1.13.0, 1.14.0
>            Reporter: Guowei Ma
>            Assignee: Dawid Wysakowicz
>            Priority: Critical
>              Labels: test-stability
>             Fix For: 1.14.0
>
>
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=16451&view=logs&j=5c8e7682-d68f-54d1-16a2-a09310218a49&t=f508e270-48d6-5f1e-3138-42a17e0714f0&l=3884
> {code:java}
> [ERROR] throwingExceptionOnCallbackWithNoRestartsShouldFailTheTerminate(org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase)
 Time elapsed: 0.154 s  <<< FAILURE!
> java.lang.AssertionError
> 	at org.junit.Assert.fail(Assert.java:86)
> 	at org.junit.Assert.assertTrue(Assert.java:41)
> 	at org.junit.Assert.assertTrue(Assert.java:52)
> 	at org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase.throwingExceptionOnCallbackWithoutRestartsHelper(JobMasterStopWithSavepointITCase.java:154)
> 	at org.apache.flink.runtime.jobmaster.JobMasterStopWithSavepointITCase.throwingExceptionOnCallbackWithNoRestartsShouldFailTheTerminate(JobMasterStopWithSavepointITCase.java:138)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:50)
> 	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
> 	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:47)
> 	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
> 	at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:298)
> 	at org.junit.internal.runners.statements.FailOnTimeout$CallableStatement.call(FailOnTimeout.java:292)
> 	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> 	at java.lang.Thread.run(Thread.java:748)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Mime
View raw message