1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.regionserver.querymatcher;
19
20 import org.apache.hadoop.hbase.Cell;
21 import org.apache.hadoop.hbase.KeepDeletedCells;
22 import org.apache.hadoop.hbase.classification.InterfaceAudience;
23 import org.apache.hadoop.hbase.regionserver.DeleteTracker;
24 import org.apache.hadoop.hbase.regionserver.ScanInfo;
25
26 /**
27 * A query matcher for compaction which can drop delete markers.
28 */
29 @InterfaceAudience.Private
30 public abstract class DropDeletesCompactionScanQueryMatcher extends CompactionScanQueryMatcher {
31
32 /**
33 * By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete marker is always removed
34 * during a major compaction. If set to non-zero value then major compaction will try to keep a
35 * delete marker around for the given number of milliseconds. We want to keep the delete markers
36 * around a bit longer because old puts might appear out-of-order. For example, during log
37 * replication between two clusters.
38 * <p>
39 * If the delete marker has lived longer than its column-family's TTL then the delete marker will
40 * be removed even if time.to.purge.deletes has not passed. This is because all the Puts that this
41 * delete marker can influence would have also expired. (Removing of delete markers on col family
42 * TTL will not happen if min-versions is set to non-zero)
43 * <p>
44 * But, if time.to.purge.deletes has not expired then a delete marker will not be removed just
45 * because there are no Puts that it is currently influencing. This is because Puts, that this
46 * delete can influence. may appear out of order.
47 */
48 protected final long timeToPurgeDeletes;
49
50 /**
51 * Oldest put in any of the involved store files Used to decide whether it is ok to delete family
52 * delete marker of this store keeps deleted KVs.
53 */
54 protected final long earliestPutTs;
55
56 protected DropDeletesCompactionScanQueryMatcher(ScanInfo scanInfo, DeleteTracker deletes,
57 long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long now) {
58 super(scanInfo, deletes, readPointToUse, oldestUnexpiredTS, now);
59 this.timeToPurgeDeletes = scanInfo.getTimeToPurgeDeletes();
60 this.earliestPutTs = earliestPutTs;
61 }
62
63 protected final MatchCode tryDropDelete(Cell cell) {
64 long timestamp = cell.getTimestamp();
65 // If it is not the time to drop the delete marker, just return
66 if (timeToPurgeDeletes > 0 && now - timestamp <= timeToPurgeDeletes) {
67 return MatchCode.INCLUDE;
68 }
69 if (keepDeletedCells == KeepDeletedCells.TRUE
70 || (keepDeletedCells == KeepDeletedCells.TTL && timestamp >= oldestUnexpiredTS)) {
71 // If keepDeletedCell is true, or the delete marker is not expired yet, we should include it
72 // in version counting to see if we can drop it. The only exception is that, we can make
73 // sure that no put is older than this delete marker. And under this situation, all later
74 // cells of this column(must be delete markers) can be skipped.
75 if (timestamp < earliestPutTs) {
76 return columns.getNextRowOrNextColumn(cell.getQualifierArray(), cell.getQualifierOffset(),
77 cell.getQualifierLength());
78 } else {
79 return null;
80 }
81 } else {
82 return MatchCode.SKIP;
83 }
84 }
85 }