1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.replication.regionserver;
20
21 import com.google.common.collect.Lists;
22
23 import java.io.IOException;
24 import java.io.InterruptedIOException;
25 import java.util.ArrayList;
26 import java.util.Collection;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.TreeMap;
33 import java.util.UUID;
34 import java.util.concurrent.atomic.AtomicLong;
35
36 import org.apache.commons.lang.StringUtils;
37 import org.apache.commons.logging.Log;
38 import org.apache.commons.logging.LogFactory;
39 import org.apache.hadoop.conf.Configuration;
40 import org.apache.hadoop.fs.Path;
41 import org.apache.hadoop.hbase.Cell;
42 import org.apache.hadoop.hbase.CellScanner;
43 import org.apache.hadoop.hbase.CellUtil;
44 import org.apache.hadoop.hbase.HBaseConfiguration;
45 import org.apache.hadoop.hbase.HConstants;
46 import org.apache.hadoop.hbase.TableName;
47 import org.apache.hadoop.hbase.TableNotFoundException;
48 import org.apache.hadoop.hbase.classification.InterfaceAudience;
49 import org.apache.hadoop.hbase.client.Connection;
50 import org.apache.hadoop.hbase.client.ConnectionFactory;
51 import org.apache.hadoop.hbase.client.Delete;
52 import org.apache.hadoop.hbase.client.Mutation;
53 import org.apache.hadoop.hbase.client.Put;
54 import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
55 import org.apache.hadoop.hbase.client.Row;
56 import org.apache.hadoop.hbase.client.Table;
57 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.WALEntry;
58 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
59 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.BulkLoadDescriptor;
60 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.StoreDescriptor;
61 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
62 import org.apache.hadoop.hbase.util.Bytes;
63 import org.apache.hadoop.hbase.util.Pair;
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80 @InterfaceAudience.Private
81 public class ReplicationSink {
82
83 private static final Log LOG = LogFactory.getLog(ReplicationSink.class);
84 private final Configuration conf;
85
86
87 private volatile Connection sharedHtableCon;
88 private final MetricsSink metrics;
89 private final AtomicLong totalReplicatedEdits = new AtomicLong();
90 private final Object sharedHtableConLock = new Object();
91
92 private long hfilesReplicated = 0;
93 private SourceFSConfigurationProvider provider;
94
95
96
97
98 private final int rowSizeWarnThreshold;
99
100
101
102
103
104
105 public ReplicationSink(Configuration conf)
106 throws IOException {
107 this.conf = HBaseConfiguration.create(conf);
108 rowSizeWarnThreshold = conf.getInt(
109 HConstants.BATCH_ROWS_THRESHOLD_NAME, HConstants.BATCH_ROWS_THRESHOLD_DEFAULT);
110 decorateConf();
111 this.metrics = new MetricsSink();
112
113 String className =
114 conf.get("hbase.replication.source.fs.conf.provider",
115 DefaultSourceFSConfigurationProvider.class.getCanonicalName());
116 try {
117 @SuppressWarnings("rawtypes")
118 Class c = Class.forName(className);
119 this.provider = (SourceFSConfigurationProvider) c.newInstance();
120 } catch (Exception e) {
121 throw new IllegalArgumentException("Configured source fs configuration provider class "
122 + className + " throws error.", e);
123 }
124 }
125
126
127
128
129
130 private void decorateConf() {
131 this.conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
132 this.conf.getInt("replication.sink.client.retries.number", 4));
133 this.conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT,
134 this.conf.getInt("replication.sink.client.ops.timeout", 10000));
135 String replicationCodec = this.conf.get(HConstants.REPLICATION_CODEC_CONF_KEY);
136 if (StringUtils.isNotEmpty(replicationCodec)) {
137 this.conf.set(HConstants.RPC_CODEC_CONF_KEY, replicationCodec);
138 }
139 }
140
141
142
143
144
145
146
147
148
149
150
151
152
153 public void replicateEntries(List<WALEntry> entries, final CellScanner cells,
154 String replicationClusterId, String sourceBaseNamespaceDirPath,
155 String sourceHFileArchiveDirPath) throws IOException {
156 if (entries.isEmpty()) return;
157 if (cells == null) throw new NullPointerException("TODO: Add handling of null CellScanner");
158
159
160 try {
161 long totalReplicated = 0;
162
163
164 Map<TableName, Map<List<UUID>, List<Row>>> rowMap =
165 new TreeMap<TableName, Map<List<UUID>, List<Row>>>();
166
167 Map<List<String>, Map<String, List<Pair<byte[], List<String>>>>> bulkLoadsPerClusters = null;
168 for (WALEntry entry : entries) {
169 TableName table =
170 TableName.valueOf(entry.getKey().getTableName().toByteArray());
171 Cell previousCell = null;
172 Mutation m = null;
173 int count = entry.getAssociatedCellCount();
174 for (int i = 0; i < count; i++) {
175
176 if (!cells.advance()) {
177 throw new ArrayIndexOutOfBoundsException("Expected=" + count + ", index=" + i);
178 }
179 Cell cell = cells.current();
180
181 if (CellUtil.matchingQualifier(cell, WALEdit.BULK_LOAD)) {
182 BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cell);
183 if(bulkLoadsPerClusters == null) {
184 bulkLoadsPerClusters = new HashMap<>();
185 }
186
187
188 Map<String, List<Pair<byte[], List<String>>>> bulkLoadHFileMap =
189 bulkLoadsPerClusters.get(bld.getClusterIdsList());
190 if (bulkLoadHFileMap == null) {
191 bulkLoadHFileMap = new HashMap<>();
192 bulkLoadsPerClusters.put(bld.getClusterIdsList(), bulkLoadHFileMap);
193 }
194 buildBulkLoadHFileMap(bulkLoadHFileMap, table, bld);
195 } else {
196
197 if (isNewRowOrType(previousCell, cell)) {
198
199 m =
200 CellUtil.isDelete(cell) ? new Delete(cell.getRowArray(), cell.getRowOffset(),
201 cell.getRowLength()) : new Put(cell.getRowArray(), cell.getRowOffset(),
202 cell.getRowLength());
203 List<UUID> clusterIds = new ArrayList<UUID>();
204 for (HBaseProtos.UUID clusterId : entry.getKey().getClusterIdsList()) {
205 clusterIds.add(toUUID(clusterId));
206 }
207 m.setClusterIds(clusterIds);
208 addToHashMultiMap(rowMap, table, clusterIds, m);
209 }
210 if (CellUtil.isDelete(cell)) {
211 ((Delete) m).addDeleteMarker(cell);
212 } else {
213 ((Put) m).add(cell);
214 }
215 previousCell = cell;
216 }
217 }
218 totalReplicated++;
219 }
220
221
222 if (!rowMap.isEmpty()) {
223 LOG.debug("Started replicating mutations.");
224 for (Entry<TableName, Map<List<UUID>, List<Row>>> entry : rowMap.entrySet()) {
225 batch(entry.getKey(), entry.getValue().values(), rowSizeWarnThreshold);
226 }
227 LOG.debug("Finished replicating mutations.");
228 }
229
230 if(bulkLoadsPerClusters != null) {
231 for (Entry<List<String>, Map<String, List<Pair<byte[],
232 List<String>>>>> entry : bulkLoadsPerClusters.entrySet()) {
233 Map<String, List<Pair<byte[], List<String>>>> bulkLoadHFileMap = entry.getValue();
234 if (bulkLoadHFileMap != null && !bulkLoadHFileMap.isEmpty()) {
235 if(LOG.isDebugEnabled()) {
236 LOG.debug("Started replicating bulk loaded data from cluster ids: " +
237 entry.getKey().toString());
238 }
239 HFileReplicator hFileReplicator =
240 new HFileReplicator(this.provider.getConf(this.conf, replicationClusterId),
241 sourceBaseNamespaceDirPath, sourceHFileArchiveDirPath, bulkLoadHFileMap, conf,
242 getConnection(), entry.getKey());
243 hFileReplicator.replicate();
244 if(LOG.isDebugEnabled()) {
245 LOG.debug("Finished replicating bulk loaded data from cluster id: " +
246 entry.getKey().toString());
247 }
248 }
249 }
250 }
251
252 int size = entries.size();
253 this.metrics.setAgeOfLastAppliedOp(entries.get(size - 1).getKey().getWriteTime());
254 this.metrics.applyBatch(size + hfilesReplicated, hfilesReplicated);
255 this.totalReplicatedEdits.addAndGet(totalReplicated);
256 } catch (IOException ex) {
257 LOG.error("Unable to accept edit because:", ex);
258 throw ex;
259 }
260 }
261
262 private void buildBulkLoadHFileMap(
263 final Map<String, List<Pair<byte[], List<String>>>> bulkLoadHFileMap, TableName table,
264 BulkLoadDescriptor bld) throws IOException {
265 List<StoreDescriptor> storesList = bld.getStoresList();
266 int storesSize = storesList.size();
267 for (int j = 0; j < storesSize; j++) {
268 StoreDescriptor storeDescriptor = storesList.get(j);
269 List<String> storeFileList = storeDescriptor.getStoreFileList();
270 int storeFilesSize = storeFileList.size();
271 hfilesReplicated += storeFilesSize;
272 for (int k = 0; k < storeFilesSize; k++) {
273 byte[] family = storeDescriptor.getFamilyName().toByteArray();
274
275
276 String pathToHfileFromNS = getHFilePath(table, bld, storeFileList.get(k), family);
277 String tableName = table.getNameWithNamespaceInclAsString();
278 List<Pair<byte[], List<String>>> familyHFilePathsList = bulkLoadHFileMap.get(tableName);
279 if (familyHFilePathsList != null) {
280 boolean foundFamily = false;
281 for (Pair<byte[], List<String>> familyHFilePathsPair : familyHFilePathsList) {
282 if (Bytes.equals(familyHFilePathsPair.getFirst(), family)) {
283
284 familyHFilePathsPair.getSecond().add(pathToHfileFromNS);
285 foundFamily = true;
286 break;
287 }
288 }
289 if (!foundFamily) {
290
291 addFamilyAndItsHFilePathToTableInMap(family, pathToHfileFromNS, familyHFilePathsList);
292 }
293 } else {
294
295 addNewTableEntryInMap(bulkLoadHFileMap, family, pathToHfileFromNS, tableName);
296 }
297 }
298 }
299 }
300
301 private void addFamilyAndItsHFilePathToTableInMap(byte[] family, String pathToHfileFromNS,
302 List<Pair<byte[], List<String>>> familyHFilePathsList) {
303 List<String> hfilePaths = new ArrayList<String>();
304 hfilePaths.add(pathToHfileFromNS);
305 familyHFilePathsList.add(new Pair<byte[], List<String>>(family, hfilePaths));
306 }
307
308 private void addNewTableEntryInMap(
309 final Map<String, List<Pair<byte[], List<String>>>> bulkLoadHFileMap, byte[] family,
310 String pathToHfileFromNS, String tableName) {
311 List<String> hfilePaths = new ArrayList<String>();
312 hfilePaths.add(pathToHfileFromNS);
313 Pair<byte[], List<String>> newFamilyHFilePathsPair =
314 new Pair<byte[], List<String>>(family, hfilePaths);
315 List<Pair<byte[], List<String>>> newFamilyHFilePathsList =
316 new ArrayList<Pair<byte[], List<String>>>();
317 newFamilyHFilePathsList.add(newFamilyHFilePathsPair);
318 bulkLoadHFileMap.put(tableName, newFamilyHFilePathsList);
319 }
320
321 private String getHFilePath(TableName table, BulkLoadDescriptor bld, String storeFile,
322 byte[] family) {
323 return new StringBuilder(100).append(table.getNamespaceAsString()).append(Path.SEPARATOR)
324 .append(table.getQualifierAsString()).append(Path.SEPARATOR)
325 .append(Bytes.toString(bld.getEncodedRegionName().toByteArray())).append(Path.SEPARATOR)
326 .append(Bytes.toString(family)).append(Path.SEPARATOR).append(storeFile).toString();
327 }
328
329
330
331
332
333
334 private boolean isNewRowOrType(final Cell previousCell, final Cell cell) {
335 return previousCell == null || previousCell.getTypeByte() != cell.getTypeByte() ||
336 !CellUtil.matchingRow(previousCell, cell);
337 }
338
339 private java.util.UUID toUUID(final HBaseProtos.UUID uuid) {
340 return new java.util.UUID(uuid.getMostSigBits(), uuid.getLeastSigBits());
341 }
342
343
344
345
346
347
348
349
350
351
352 private <K1, K2, V> List<V> addToHashMultiMap(Map<K1, Map<K2,List<V>>> map, K1 key1, K2 key2, V value) {
353 Map<K2,List<V>> innerMap = map.get(key1);
354 if (innerMap == null) {
355 innerMap = new HashMap<K2, List<V>>();
356 map.put(key1, innerMap);
357 }
358 List<V> values = innerMap.get(key2);
359 if (values == null) {
360 values = new ArrayList<V>();
361 innerMap.put(key2, values);
362 }
363 values.add(value);
364 return values;
365 }
366
367
368
369
370 public void stopReplicationSinkServices() {
371 try {
372 if (this.sharedHtableCon != null) {
373 synchronized (sharedHtableConLock) {
374 if (this.sharedHtableCon != null) {
375 this.sharedHtableCon.close();
376 this.sharedHtableCon = null;
377 }
378 }
379 }
380 } catch (IOException e) {
381 LOG.warn("IOException while closing the connection", e);
382 }
383 }
384
385
386
387
388
389
390
391
392 private void batch(TableName tableName, Collection<List<Row>> allRows, int batchRowSizeThreshold)
393 throws IOException {
394 if (allRows.isEmpty()) {
395 return;
396 }
397 Table table = null;
398 try {
399 Connection connection = getConnection();
400 table = connection.getTable(tableName);
401 for (List<Row> rows : allRows) {
402 List<List<Row>> batchRows;
403 if (rows.size() > batchRowSizeThreshold) {
404 batchRows = Lists.partition(rows, batchRowSizeThreshold);
405 } else {
406 batchRows = Collections.singletonList(rows);
407 }
408 for (List<Row> rowList : batchRows) {
409 table.batch(rowList);
410 }
411 }
412 } catch (RetriesExhaustedWithDetailsException rewde) {
413 for (Throwable ex : rewde.getCauses()) {
414 if (ex instanceof TableNotFoundException) {
415 throw new TableNotFoundException("'" + tableName + "'");
416 }
417 }
418 throw rewde;
419 } catch (InterruptedException ix) {
420 throw (InterruptedIOException) new InterruptedIOException().initCause(ix);
421 } finally {
422 if (table != null) {
423 table.close();
424 }
425 }
426 }
427
428 private Connection getConnection() throws IOException {
429
430 Connection connection = sharedHtableCon;
431 if (connection == null) {
432 synchronized (sharedHtableConLock) {
433 connection = sharedHtableCon;
434 if (connection == null) {
435 connection = sharedHtableCon = ConnectionFactory.createConnection(conf);
436 }
437 }
438 }
439 return connection;
440 }
441
442
443
444
445
446
447 public String getStats() {
448 return this.totalReplicatedEdits.get() == 0 ? "" : "Sink: " +
449 "age in ms of last applied edit: " + this.metrics.refreshAgeOfLastAppliedOp() +
450 ", total replicated edits: " + this.totalReplicatedEdits;
451 }
452
453
454
455
456
457 public MetricsSink getSinkMetrics() {
458 return this.metrics;
459 }
460 }