DataLake seed code
[dcaegen2/services.git] / components / datalake-handler / feeder / src / main / java / org / onap / datalake / feeder / service / PullThread.java
1 /*
2 * ============LICENSE_START=======================================================
3 * ONAP : DATALAKE
4 * ================================================================================
5 * Copyright 2019 China Mobile
6 *=================================================================================
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *     http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 * ============LICENSE_END=========================================================
19 */
20
21 package org.onap.datalake.feeder.service;
22
23 import java.time.Duration;
24 import java.util.ArrayList;
25 import java.util.Collection;
26 import java.util.Collections;
27 import java.util.List;
28 import java.util.Properties;
29 import java.util.concurrent.atomic.AtomicBoolean;
30
31 import javax.annotation.PostConstruct;
32
33 import org.apache.commons.lang3.tuple.Pair;
34 import org.apache.kafka.clients.consumer.ConsumerConfig;
35 import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
36 import org.apache.kafka.clients.consumer.ConsumerRecord;
37 import org.apache.kafka.clients.consumer.ConsumerRecords;
38 import org.apache.kafka.clients.consumer.KafkaConsumer;
39 import org.apache.kafka.clients.consumer.OffsetAndMetadata;
40 import org.apache.kafka.common.TopicPartition;
41 import org.onap.datalake.feeder.config.ApplicationConfiguration;
42 import org.slf4j.Logger;
43 import org.slf4j.LoggerFactory;
44 import org.springframework.beans.factory.annotation.Autowired;
45 import org.springframework.beans.factory.config.ConfigurableBeanFactory;
46 import org.springframework.context.annotation.Scope;
47 import org.springframework.stereotype.Service;
48
49 /**
50  * Thread that pulls messages from DMaaP and save them to Big Data DBs
51  * 
52  * @author Guobiao Mo
53  *
54  */
55
56 @Service
57 @Scope(value=ConfigurableBeanFactory.SCOPE_PROTOTYPE)
58 public class PullThread implements Runnable {
59
60         @Autowired
61         private DmaapService dmaapService;
62
63         @Autowired
64         private StoreService storeService;
65
66         @Autowired
67         private ApplicationConfiguration config;
68                 
69         private final Logger log = LoggerFactory.getLogger(this.getClass());
70
71         private KafkaConsumer<String, String> consumer; //<String, String> is key-value type, in our case key is empty, value is JSON text
72         private int id; 
73
74         private final AtomicBoolean active = new AtomicBoolean(false);
75         private boolean async;
76
77         public PullThread(int id) {
78                 this.id = id;
79         }
80
81         @PostConstruct
82         private void init() {
83                 async = config.isAsync();
84                 Properties consumerConfig = getConsumerConfig();
85                 consumer = new KafkaConsumer<>(consumerConfig);
86         }
87
88         private Properties getConsumerConfig() {
89                 Properties consumerConfig = new Properties();
90
91                 consumerConfig.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, config.getDmaapKafkaHostPort());
92                 consumerConfig.put(ConsumerConfig.GROUP_ID_CONFIG,  config.getDmaapKafkaGroup());
93                 consumerConfig.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
94                 consumerConfig.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
95                 consumerConfig.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.StringDeserializer");
96                 consumerConfig.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG, "org.apache.kafka.clients.consumer.RoundRobinAssignor");
97                 consumerConfig.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
98
99                 return consumerConfig;
100         }
101
102         /**
103          * start pulling.
104          */
105         @Override
106         public void run() {
107                 active.set(true);
108
109                 DummyRebalanceListener rebalanceListener = new DummyRebalanceListener();
110
111                 try {
112                         List<String> topics = dmaapService.getActiveTopics(); //TODO get updated topic list within loop
113
114                         log.info("Thread {} going to subscribe to topics: {}", id, topics);
115                         
116                         consumer.subscribe(topics, rebalanceListener);
117
118                         while (active.get()) {
119
120                                 ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(config.getDmaapKafkaTimeout()));
121
122                                 List<Pair<Long, String>> messages = new ArrayList<>(records.count());
123                                 for (TopicPartition partition : records.partitions()) {
124                                         messages.clear();
125                                         List<ConsumerRecord<String, String>> partitionRecords = records.records(partition);
126                                         for (ConsumerRecord<String, String> record : partitionRecords) {
127                                                 messages.add(Pair.of(record.timestamp(), record.value()));
128                                                 log.debug("threadid={} topic={}, timestamp={} key={}, offset={}, partition={}, value={}", id, record.topic(), record.timestamp(), record.key(), record.offset(), record.partition(), record.value());
129                                         }
130                                         storeService.saveMessages(partition.topic(), messages);
131                                         log.info("topic={} count={}", partition.topic(), partitionRecords.size());//TODO we may record this number to DB
132                                         
133                                         if (!async) {//for reliability, sync commit offset to Kafka, this slows down a bit
134                                                 long lastOffset = partitionRecords.get(partitionRecords.size() - 1).offset();
135                                                 consumer.commitSync(Collections.singletonMap(partition, new OffsetAndMetadata(lastOffset + 1)));
136                                         }
137                                 }
138
139                                 if (async) {//for high Throughput, async commit offset in batch to Kafka
140                                         consumer.commitAsync();
141                                 }
142                         }
143                 } catch (Exception e) {
144                         log.error("Puller {} run():   exception={}", id, e.getMessage());
145                         log.error("", e);
146                 } finally {
147                         consumer.close();
148                 }
149         }
150
151         public void shutdown() {
152                 active.set(false);
153                 consumer.wakeup();
154         }
155
156         private class DummyRebalanceListener implements ConsumerRebalanceListener {
157                 @Override
158                 public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
159                         log.info("Called onPartitionsRevoked with partitions: {}", partitions);
160                 }
161
162                 @Override
163                 public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
164                         log.info("Called onPartitionsAssigned with partitions: {}", partitions);
165                 }
166         }
167
168 }