samoa-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gdfm <...@git.apache.org>
Subject [GitHub] incubator-samoa pull request: SAMOA-40: Add Kafka stream reader mo...
Date Tue, 21 Jul 2015 11:05:13 GMT
Github user gdfm commented on a diff in the pull request:

    https://github.com/apache/incubator-samoa/pull/32#discussion_r35091125
  
    --- Diff: samoa-api/src/main/java/org/apache/samoa/streams/kafka/KafkaReader.java ---
    @@ -0,0 +1,279 @@
    +package org.apache.samoa.streams.kafka;
    +
    +/*
    + * #%L
    + * SAMOA
    + * %%
    + * Copyright (C) 2014 - 2015 Apache Software Foundation
    + * %%
    + * Licensed under the Apache License, Version 2.0 (the "License");
    + * you may not use this file except in compliance with the License.
    + * You may obtain a copy of the License at
    + * 
    + *      http://www.apache.org/licenses/LICENSE-2.0
    + * 
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + * #L%
    + */
    +
    +import kafka.api.FetchRequest;
    +import kafka.api.FetchRequestBuilder;
    +import kafka.api.PartitionOffsetRequestInfo;
    +import kafka.common.ErrorMapping;
    +import kafka.common.TopicAndPartition;
    +import kafka.javaapi.*;
    +import kafka.javaapi.consumer.SimpleConsumer;
    +import kafka.message.MessageAndOffset;
    +
    +import java.io.UnsupportedEncodingException;
    +import java.nio.ByteBuffer;
    +import java.util.*;
    +
    +public class KafkaReader {
    +
    +    protected long readOffset;
    +
    +    private List<String> m_replicaBrokers = new ArrayList<String>();
    +
    +    public KafkaReader() {
    +        m_replicaBrokers = new ArrayList<String>();
    +        readOffset = 0L;
    +    }
    +
    +    public ArrayList<String> run(long a_maxReads, String a_topic, int a_partition,
List<String> a_seedBrokers, int a_port) {
    +
    +        // find the meta data about the topic and partition we are interested in
    +        String answer = "";
    +        ArrayList<String> returnInstances = new ArrayList<String>();
    +        PartitionMetadata metadata = findLeader(a_seedBrokers, a_port, a_topic, a_partition);
    +        if (metadata == null) {
    +            System.out.println("Can't find metadata for Topic and Partition. Exiting");
    +            return null;
    +        }
    +        if (metadata.leader() == null) {
    +            System.out.println("Can't find Leader for Topic and Partition. Exiting");
    +            return null;
    +        }
    +        String leadBroker = metadata.leader().host();
    +        String clientName = "Client_" + a_topic + "_" + a_partition;
    +
    +        SimpleConsumer consumer = new SimpleConsumer(leadBroker, a_port, 100000, 64 *
1024, clientName);
    +        //long readOffset = getLastOffset(consumer,a_topic, a_partition, kafka.api.OffsetRequest.EarliestTime(),
clientName);
    +        //readOffset = 0L;
    +        int numErrors = 0;
    +        while (a_maxReads > 0) {
    +            if (consumer == null) {
    +                consumer = new SimpleConsumer(leadBroker, a_port, 100000, 64 * 1024,
clientName);
    +            }
    +            /**
    +             * Reading data
    +             */
    +            FetchRequest req = new FetchRequestBuilder()
    +                    .clientId(clientName)
    +                    .addFetch(a_topic, a_partition, readOffset, 100000)
    +                    .build();
    +            FetchResponse fetchResponse = null;
    +
    +            try {
    +                fetchResponse = consumer.fetch(req);
    +            } catch (Exception e) {
    +
    +            }
    +
    +            /**
    +             * SimpleConsumer does not handle lead broker failures, you have to handle
it
    +             *  once the fetch returns an error, we log the reason, close the consumer
then try to figure
    +             *  out who the new leader is
    +             */
    +            if (fetchResponse.hasError()) {
    +                numErrors++;
    +                // Something went wrong!
    +                short code = fetchResponse.errorCode(a_topic, a_partition);
    +                System.out.println("Error fetching data from the Broker:" + leadBroker
+ " Reason: " + code);
    +                if (numErrors > 5) break;
    +                if (code == ErrorMapping.OffsetOutOfRangeCode()) {
    +                    // We asked for an invalid offset. For simple case ask for the last
element to reset
    +                    //readOffset = getLastOffset(consumer,a_topic, a_partition, kafka.api.OffsetRequest.LatestTime(),
clientName);
    +                    continue;
    +                }
    +                consumer.close();
    +                consumer = null;
    +                try {
    +                    leadBroker = findNewLeader(leadBroker, a_topic, a_partition, a_port);
    +                } catch (Exception e) {
    +                    // TODO Auto-generated catch block
    +                    e.printStackTrace();
    +                }
    +                continue;
    +            }
    +            //End Error handling
    +
    +            // Reading data cont.
    +            numErrors = 0;
    +
    +            long numRead = 0;
    +            Iterator it = (Iterator) fetchResponse.messageSet(a_topic, a_partition).iterator();
    +            MessageAndOffset messageAndOffset = null;
    +
    +            try {
    +                messageAndOffset = (MessageAndOffset) it.next();
    +            } catch (Exception e) {
    +                return null;
    +            }
    +
    +            //for (MessageAndOffset messageAndOffset : fetchResponse.messageSet(a_topic,
a_partition)) {
    +            long currentOffset = messageAndOffset.offset();
    +            if (currentOffset < readOffset) {
    +                System.out.println("Found an old offset: " + currentOffset + " Expecting:
" + readOffset);
    +                continue;
    +            }
    +            readOffset = messageAndOffset.nextOffset();
    +            ByteBuffer payload = messageAndOffset.message().payload();
    +
    +            byte[] bytes = new byte[payload.limit()];
    +            payload.get(bytes);
    +            try {
    +                System.out.println(String.valueOf(messageAndOffset.offset()) + ": " +
new String(bytes, "UTF-8"));
    +                answer = String.valueOf(messageAndOffset.offset()) + ": " + new String(bytes,
"UTF-8");
    +                returnInstances.add(answer);
    +            } catch (UnsupportedEncodingException e) {
    +                e.printStackTrace();
    +            }
    +
    +            numRead++;
    +            a_maxReads--;
    +            //  break;
    +            //}
    +
    +            if (numRead == 0) {
    +                try {
    +                    Thread.sleep(1000);
    +                } catch (InterruptedException ie) {
    +                }
    +            }
    +        }
    +        if (consumer != null) consumer.close();
    +        return returnInstances;
    +    }
    +
    +    /**
    +     * Defines where to start reading data from
    +     * Helpers Available:
    +     * kafka.api.OffsetRequest.EarliestTime() => finds the beginning of the data in
the logs and starts streaming
    +     * from there
    +     * kafka.api.OffsetRequest.LatestTime()   => will only stream new messages
    +     *
    +     * @param consumer
    +     * @param topic
    +     * @param partition
    +     * @param whichTime
    +     * @param clientName
    +     * @return
    +     */
    +    public static long getLastOffset(SimpleConsumer consumer, String topic, int partition,
long whichTime, String clientName) {
    +        TopicAndPartition topicAndPartition = new TopicAndPartition(topic, partition);
    +        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo = new HashMap<TopicAndPartition,
PartitionOffsetRequestInfo>();
    +        requestInfo.put(topicAndPartition, new PartitionOffsetRequestInfo(whichTime,
1));
    +        kafka.javaapi.OffsetRequest request = new kafka.javaapi.OffsetRequest(
    +                requestInfo, kafka.api.OffsetRequest.CurrentVersion(), clientName);
    +        OffsetResponse response = consumer.getOffsetsBefore(request);
    +
    +        if (response.hasError()) {
    +            System.out.println("Error fetching data Offset Data the Broker. Reason: "
+ response.errorCode(topic, partition));
    +            return 0;
    +        }
    +        long[] offsets = response.offsets(topic, partition);
    +        return offsets[0];
    +    }
    +
    +    /**
    +     * Uses the findLeader() logic we defined to find the new leader, except here we
only try to connect to one of the
    +     * replicas for the topic/partition. This way if we can’t reach any of the Brokers
with the data we are interested
    +     * in we give up and exit hard.
    +     *
    +     * @param a_oldLeader
    +     * @param a_topic
    +     * @param a_partition
    +     * @param a_port
    +     * @return
    +     * @throws Exception
    +     */
    +    private String findNewLeader(String a_oldLeader, String a_topic, int a_partition,
int a_port) throws Exception {
    +        for (int i = 0; i < 3; i++) {
    +            boolean goToSleep = false;
    +            PartitionMetadata metadata = findLeader(m_replicaBrokers, a_port, a_topic,
a_partition);
    +            if (metadata == null) {
    +                goToSleep = true;
    +            } else if (metadata.leader() == null) {
    +                goToSleep = true;
    +            } else if (a_oldLeader.equalsIgnoreCase(metadata.leader().host()) &&
i == 0) {
    +                // first time through if the leader hasn't changed give ZooKeeper a second
to recover
    +                // second time, assume the broker did recover before failover, or it
was a non-Broker issue
    +                //
    +                goToSleep = true;
    +            } else {
    +                return metadata.leader().host();
    +            }
    +            if (goToSleep) {
    +                try {
    +                    Thread.sleep(1000);
    +                } catch (InterruptedException ie) {
    +                }
    +            }
    +        }
    +        System.out.println("Unable to find new leader after Broker failure. Exiting");
    +        throw new Exception("Unable to find new leader after Broker failure. Exiting");
    +    }
    +
    +    /**
    +     * Query a live broker to find out leader information and replica information for
a given topic and partition
    +     *
    +     * @param a_seedBrokers
    +     * @param a_port
    +     * @param a_topic
    +     * @param a_partition
    +     * @return
    +     */
    +    private PartitionMetadata findLeader(List<String> a_seedBrokers, int a_port,
String a_topic, int a_partition) {
    +        PartitionMetadata returnMetaData = null;
    +        for (String seed : a_seedBrokers) {
    +            SimpleConsumer consumer = null;
    +            try {
    +                consumer = new SimpleConsumer(seed, a_port, 100000, 64 * 1024, "leaderLookup");
//broker_host, broker_port, timeout, buffer_size, client_id
    --- End diff --
    
    I am not fully aware of Kafka internals, but I know that they have two APIs for consumers.
Does this one use the new API?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message