flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] pnowojski commented on a change in pull request #6924: [FLINK-10600] Provide End-to-end test cases for modern Kafka connectors
Date Thu, 25 Oct 2018 13:53:13 GMT
pnowojski commented on a change in pull request #6924: [FLINK-10600] Provide End-to-end test
cases for modern Kafka connectors
URL: https://github.com/apache/flink/pull/6924#discussion_r228181381
 
 

 ##########
 File path: flink-examples/flink-examples-streaming/src/main/java/org/apache/flink/streaming/examples/kafka/KafkaExample.java
 ##########
 @@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.streaming.examples.kafka;
+
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
+import org.apache.flink.api.java.utils.ParameterTool;
+import org.apache.flink.streaming.api.TimeCharacteristic;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
+import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
+
+/**
+ * A simple example that shows how to read from and write to modern Kafka. This will read
String messages
+ * from the input topic, parse them into a POJO type {@link KafkaEvent}, group by some key,
and finally
+ * perform a rolling addition on each key for which the results are written back to another
topic.
+ *
+ * <p>This example also demonstrates using a watermark assigner to generate per-partition
+ * watermarks directly in the Flink Kafka consumer. For demonstration purposes, it is assumed
that
+ * the String messages are of formatted as a (word,frequency,timestamp) tuple.
+ *
+ * <p>Example usage:
+ * 	--input-topic test-input --output-topic test-output --bootstrap.servers localhost:9092
+ * 	--zookeeper.connect localhost:2181 --group.id myconsumer
+ */
+public class KafkaExample {
+
+	public static void main(String[] args) throws Exception {
+		// parse input arguments
+		final ParameterTool parameterTool = ParameterTool.fromArgs(args);
+
+		if (parameterTool.getNumberOfParameters() < 5) {
+			System.out.println("Missing parameters!\n" +
+				"Usage: Kafka --input-topic <topic> --output-topic <topic> " +
+				"--bootstrap.servers <kafka brokers> " +
+				"--zookeeper.connect <zk quorum> --group.id <some id>");
+			return;
+		}
+
+		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+		env.getConfig().disableSysoutLogging();
+		env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
+		env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
+		env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in
the web interface
+		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
+
+		DataStream<KafkaEvent> input = env
+			.addSource(
+				new FlinkKafkaConsumer<>(
+					parameterTool.getRequired("input-topic"),
+					new KafkaEventSchema(),
+					parameterTool.getProperties())
+					.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
+			.keyBy("word")
+			.map(new RollingAdditionMapper());
+
+		input.addSink(
+			new FlinkKafkaProducer<>(
+				parameterTool.getRequired("output-topic"),
+				new KafkaEventSchema(),
+				parameterTool.getProperties()));
 
 Review comment:
   I think it would be better to use `EXACTLY_ONCE` mode here

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message