View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver.querymatcher;
19  
20  import com.google.common.base.Preconditions;
21  
22  import java.io.IOException;
23  import java.util.Arrays;
24  import java.util.NavigableSet;
25  
26  import org.apache.hadoop.hbase.Cell;
27  import org.apache.hadoop.hbase.CellUtil;
28  import org.apache.hadoop.hbase.HConstants;
29  import org.apache.hadoop.hbase.KeepDeletedCells;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.client.Scan;
32  import org.apache.hadoop.hbase.filter.Filter;
33  import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
34  import org.apache.hadoop.hbase.io.TimeRange;
35  import org.apache.hadoop.hbase.regionserver.DeleteTracker;
36  import org.apache.hadoop.hbase.regionserver.DeleteTracker.DeleteResult;
37  import org.apache.hadoop.hbase.regionserver.RegionCoprocessorHost;
38  import org.apache.hadoop.hbase.regionserver.ScanInfo;
39  import org.apache.hadoop.hbase.regionserver.ScanType;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
42  
43  /**
44   * The old query matcher implementation. Used to keep compatibility for coprocessor that could
45   * overwrite the StoreScanner before compaction. Should be removed once we find a better way to do
46   * filtering during compaction.
47   */
48  @Deprecated
49  @InterfaceAudience.Private
50  public class LegacyScanQueryMatcher extends ScanQueryMatcher {
51  
52    private final TimeRange tr;
53  
54    private final Filter filter;
55  
56    /** Keeps track of deletes */
57    private final DeleteTracker deletes;
58  
59    /**
60     * The following three booleans define how we deal with deletes. There are three different
61     * aspects:
62     * <ol>
63     * <li>Whether to keep delete markers. This is used in compactions. Minor compactions always keep
64     * delete markers.</li>
65     * <li>Whether to keep deleted rows. This is also used in compactions, if the store is set to keep
66     * deleted rows. This implies keeping the delete markers as well.</li> In this case deleted rows
67     * are subject to the normal max version and TTL/min version rules just like "normal" rows.
68     * <li>Whether a scan can do time travel queries even before deleted marker to reach deleted
69     * rows.</li>
70     * </ol>
71     */
72    /** whether to retain delete markers */
73    private boolean retainDeletesInOutput;
74  
75    /** whether to return deleted rows */
76    private final KeepDeletedCells keepDeletedCells;
77  
78    // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete
79    // marker is always removed during a major compaction. If set to non-zero
80    // value then major compaction will try to keep a delete marker around for
81    // the given number of milliseconds. We want to keep the delete markers
82    // around a bit longer because old puts might appear out-of-order. For
83    // example, during log replication between two clusters.
84    //
85    // If the delete marker has lived longer than its column-family's TTL then
86    // the delete marker will be removed even if time.to.purge.deletes has not
87    // passed. This is because all the Puts that this delete marker can influence
88    // would have also expired. (Removing of delete markers on col family TTL will
89    // not happen if min-versions is set to non-zero)
90    //
91    // But, if time.to.purge.deletes has not expired then a delete
92    // marker will not be removed just because there are no Puts that it is
93    // currently influencing. This is because Puts, that this delete can
94    // influence. may appear out of order.
95    private final long timeToPurgeDeletes;
96  
97    /**
98     * This variable shows whether there is an null column in the query. There always exists a null
99     * column in the wildcard column query. There maybe exists a null column in the explicit column
100    * query based on the first column.
101    */
102   private final boolean hasNullColumn;
103 
104   /** readPoint over which the KVs are unconditionally included */
105   private final long maxReadPointToTrackVersions;
106 
107   /**
108    * Oldest put in any of the involved store files Used to decide whether it is ok to delete family
109    * delete marker of this store keeps deleted KVs.
110    */
111   protected final long earliestPutTs;
112 
113   private final byte[] stopRow;
114 
115   private byte[] dropDeletesFromRow = null, dropDeletesToRow = null;
116 
117   private LegacyScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
118       boolean hasNullColumn, DeleteTracker deletes, ScanType scanType, long readPointToUse,
119       long earliestPutTs, long oldestUnexpiredTS, long now) {
120     super(createStartKeyFromRow(scan.getStartRow(), scanInfo), scanInfo, columns, oldestUnexpiredTS,
121         now);
122     TimeRange timeRange = scan.getColumnFamilyTimeRange().get(scanInfo.getFamily());
123     if (timeRange == null) {
124       this.tr = scan.getTimeRange();
125     } else {
126       this.tr = timeRange;
127     }
128     this.hasNullColumn = hasNullColumn;
129     this.deletes = deletes;
130     this.filter = scan.getFilter();
131     this.maxReadPointToTrackVersions = readPointToUse;
132     this.timeToPurgeDeletes = scanInfo.getTimeToPurgeDeletes();
133     this.earliestPutTs = earliestPutTs;
134 
135     /* how to deal with deletes */
136     this.keepDeletedCells = scanInfo.getKeepDeletedCells();
137     this.retainDeletesInOutput = scanType == ScanType.COMPACT_RETAIN_DELETES;
138     this.stopRow = scan.getStopRow();
139   }
140 
141   private LegacyScanQueryMatcher(Scan scan, ScanInfo scanInfo, ColumnTracker columns,
142       boolean hasNullColumn, DeleteTracker deletes, ScanType scanType, long readPointToUse,
143       long earliestPutTs, long oldestUnexpiredTS, long now, byte[] dropDeletesFromRow,
144       byte[] dropDeletesToRow) {
145     this(scan, scanInfo, columns, hasNullColumn, deletes, scanType, readPointToUse, earliestPutTs,
146         oldestUnexpiredTS, now);
147     this.dropDeletesFromRow = Preconditions.checkNotNull(dropDeletesFromRow);
148     this.dropDeletesToRow = Preconditions.checkNotNull(dropDeletesToRow);
149   }
150 
151   @Override
152   public MatchCode match(Cell cell) throws IOException {
153     if (filter != null && filter.filterAllRemaining()) {
154       return MatchCode.DONE_SCAN;
155     }
156     MatchCode returnCode = preCheck(cell);
157     if (returnCode != null) {
158       return returnCode;
159     }
160     /*
161      * The delete logic is pretty complicated now.
162      * This is corroborated by the following:
163      * 1. The store might be instructed to keep deleted rows around.
164      * 2. A scan can optionally see past a delete marker now.
165      * 3. If deleted rows are kept, we have to find out when we can
166      *    remove the delete markers.
167      * 4. Family delete markers are always first (regardless of their TS)
168      * 5. Delete markers should not be counted as version
169      * 6. Delete markers affect puts of the *same* TS
170      * 7. Delete marker need to be version counted together with puts
171      *    they affect
172      */
173     long timestamp = cell.getTimestamp();
174     byte typeByte = cell.getTypeByte();
175     long mvccVersion = cell.getSequenceId();
176     int qualifierOffset = cell.getQualifierOffset();
177     int qualifierLength = cell.getQualifierLength();
178     if (CellUtil.isDelete(typeByte)) {
179       if (keepDeletedCells == KeepDeletedCells.FALSE
180           || (keepDeletedCells == KeepDeletedCells.TTL && timestamp < oldestUnexpiredTS)) {
181         // first ignore delete markers if the scanner can do so, and the
182         // range does not include the marker
183         //
184         // during flushes and compactions also ignore delete markers newer
185         // than the readpoint of any open scanner, this prevents deleted
186         // rows that could still be seen by a scanner from being collected
187         boolean includeDeleteMarker = tr.withinOrAfterTimeRange(timestamp);
188         if (includeDeleteMarker && mvccVersion <= maxReadPointToTrackVersions) {
189           this.deletes.add(cell);
190         }
191         // Can't early out now, because DelFam come before any other keys
192       }
193 
194       if (timeToPurgeDeletes > 0
195           && (EnvironmentEdgeManager.currentTime() - timestamp) <= timeToPurgeDeletes) {
196         return MatchCode.INCLUDE;
197       } else if (retainDeletesInOutput || mvccVersion > maxReadPointToTrackVersions) {
198         // always include or it is not time yet to check whether it is OK
199         // to purge deltes or not
200         // if this is not a user scan (compaction), we can filter this deletemarker right here
201         // otherwise (i.e. a "raw" scan) we fall through to normal version and timerange checking
202         return MatchCode.INCLUDE;
203       } else if (keepDeletedCells == KeepDeletedCells.TRUE
204           || (keepDeletedCells == KeepDeletedCells.TTL && timestamp >= oldestUnexpiredTS)) {
205         if (timestamp < earliestPutTs) {
206           // keeping delete rows, but there are no puts older than
207           // this delete in the store files.
208           return columns.getNextRowOrNextColumn(cell.getQualifierArray(), qualifierOffset,
209             qualifierLength);
210         }
211         // else: fall through and do version counting on the
212         // delete markers
213       } else {
214         return MatchCode.SKIP;
215       }
216       // note the following next else if...
217       // delete marker are not subject to other delete markers
218     } else if (!this.deletes.isEmpty()) {
219       DeleteResult deleteResult = deletes.isDeleted(cell);
220       switch (deleteResult) {
221         case FAMILY_DELETED:
222         case COLUMN_DELETED:
223           return columns.getNextRowOrNextColumn(cell.getQualifierArray(), qualifierOffset,
224             qualifierLength);
225         case VERSION_DELETED:
226         case FAMILY_VERSION_DELETED:
227           return MatchCode.SKIP;
228         case NOT_DELETED:
229           break;
230         default:
231           throw new RuntimeException("UNEXPECTED");
232         }
233     }
234 
235     int timestampComparison = tr.compare(timestamp);
236     if (timestampComparison >= 1) {
237       return MatchCode.SKIP;
238     } else if (timestampComparison <= -1) {
239       return columns.getNextRowOrNextColumn(cell.getQualifierArray(), qualifierOffset,
240         qualifierLength);
241     }
242 
243     // STEP 1: Check if the column is part of the requested columns
244     MatchCode colChecker = columns.checkColumn(cell.getQualifierArray(),
245       qualifierOffset, qualifierLength, typeByte);
246     if (colChecker == MatchCode.INCLUDE) {
247       ReturnCode filterResponse = ReturnCode.SKIP;
248       // STEP 2: Yes, the column is part of the requested columns. Check if filter is present
249       if (filter != null) {
250         // STEP 3: Filter the key value and return if it filters out
251         filterResponse = filter.filterKeyValue(cell);
252         switch (filterResponse) {
253         case SKIP:
254           return MatchCode.SKIP;
255         case NEXT_COL:
256           return columns.getNextRowOrNextColumn(cell.getQualifierArray(),
257             qualifierOffset, qualifierLength);
258         case NEXT_ROW:
259           return MatchCode.SEEK_NEXT_ROW;
260         case SEEK_NEXT_USING_HINT:
261           return MatchCode.SEEK_NEXT_USING_HINT;
262         default:
263           //It means it is either include or include and seek next
264           break;
265         }
266       }
267       /*
268        * STEP 4: Reaching this step means the column is part of the requested columns and either
269        * the filter is null or the filter has returned INCLUDE or INCLUDE_AND_NEXT_COL response.
270        * Now check the number of versions needed. This method call returns SKIP, INCLUDE,
271        * INCLUDE_AND_SEEK_NEXT_ROW, INCLUDE_AND_SEEK_NEXT_COL.
272        *
273        * FilterResponse            ColumnChecker               Desired behavior
274        * INCLUDE                   SKIP                        row has already been included, SKIP.
275        * INCLUDE                   INCLUDE                     INCLUDE
276        * INCLUDE                   INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
277        * INCLUDE                   INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
278        * INCLUDE_AND_SEEK_NEXT_COL SKIP                        row has already been included, SKIP.
279        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE                     INCLUDE_AND_SEEK_NEXT_COL
280        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_COL   INCLUDE_AND_SEEK_NEXT_COL
281        * INCLUDE_AND_SEEK_NEXT_COL INCLUDE_AND_SEEK_NEXT_ROW   INCLUDE_AND_SEEK_NEXT_ROW
282        *
283        * In all the above scenarios, we return the column checker return value except for
284        * FilterResponse (INCLUDE_AND_SEEK_NEXT_COL) and ColumnChecker(INCLUDE)
285        */
286       colChecker =
287           columns.checkVersions(cell.getQualifierArray(), qualifierOffset,
288               qualifierLength, timestamp, typeByte,
289             mvccVersion > maxReadPointToTrackVersions);
290       return (filterResponse == ReturnCode.INCLUDE_AND_NEXT_COL &&
291           colChecker == MatchCode.INCLUDE) ? MatchCode.INCLUDE_AND_SEEK_NEXT_COL
292           : colChecker;
293     }
294     return colChecker;
295   }
296 
297   @Override
298   public boolean hasNullColumnInQuery() {
299     return hasNullColumn;
300   }
301 
302   /**
303    * Handle partial-drop-deletes. As we match keys in order, when we have a range from which we can
304    * drop deletes, we can set retainDeletesInOutput to false for the duration of this range only,
305    * and maintain consistency.
306    */
307   private void checkPartialDropDeleteRange(Cell curCell) {
308     byte[] rowArray = curCell.getRowArray();
309     int rowOffset = curCell.getRowOffset();
310     short rowLength = curCell.getRowLength();
311     // If partial-drop-deletes are used, initially, dropDeletesFromRow and dropDeletesToRow
312     // are both set, and the matcher is set to retain deletes. We assume ordered keys. When
313     // dropDeletesFromRow is leq current kv, we start dropping deletes and reset
314     // dropDeletesFromRow; thus the 2nd "if" starts to apply.
315     if ((dropDeletesFromRow != null)
316         && (Arrays.equals(dropDeletesFromRow, HConstants.EMPTY_START_ROW)
317             || (Bytes.compareTo(rowArray, rowOffset, rowLength, dropDeletesFromRow, 0,
318               dropDeletesFromRow.length) >= 0))) {
319       retainDeletesInOutput = false;
320       dropDeletesFromRow = null;
321     }
322     // If dropDeletesFromRow is null and dropDeletesToRow is set, we are inside the partial-
323     // drop-deletes range. When dropDeletesToRow is leq current kv, we stop dropping deletes,
324     // and reset dropDeletesToRow so that we don't do any more compares.
325     if ((dropDeletesFromRow == null) && (dropDeletesToRow != null)
326         && !Arrays.equals(dropDeletesToRow, HConstants.EMPTY_END_ROW) && (Bytes.compareTo(rowArray,
327           rowOffset, rowLength, dropDeletesToRow, 0, dropDeletesToRow.length) >= 0)) {
328       retainDeletesInOutput = true;
329       dropDeletesToRow = null;
330     }
331   }
332 
333   @Override
334   protected void reset() {
335     checkPartialDropDeleteRange(currentRow);
336   }
337 
338   @Override
339   public boolean isUserScan() {
340     return false;
341   }
342 
343   @Override
344   public boolean moreRowsMayExistAfter(Cell cell) {
345     if (this.stopRow == null || this.stopRow.length == 0) {
346       return true;
347     }
348     return rowComparator.compareRows(cell, stopRow, 0, stopRow.length) < 0;
349   }
350 
351   @Override
352   public Filter getFilter() {
353     return filter;
354   }
355 
356   @Override
357   public Cell getNextKeyHint(Cell cell) throws IOException {
358     if (filter == null) {
359       return null;
360     } else {
361       return filter.getNextCellHint(cell);
362     }
363   }
364 
365   public static LegacyScanQueryMatcher create(Scan scan, ScanInfo scanInfo,
366       NavigableSet<byte[]> columns, ScanType scanType, long readPointToUse, long earliestPutTs,
367       long oldestUnexpiredTS, long now, byte[] dropDeletesFromRow, byte[] dropDeletesToRow,
368       RegionCoprocessorHost regionCoprocessorHost) throws IOException {
369     int maxVersions = Math.min(scan.getMaxVersions(), scanInfo.getMaxVersions());
370     boolean hasNullColumn;
371     ColumnTracker columnTracker;
372     if (columns == null || columns.size() == 0) {
373       // there is always a null column in the wildcard column query.
374       hasNullColumn = true;
375       // use a specialized scan for wildcard column tracker.
376       columnTracker = new ScanWildcardColumnTracker(scanInfo.getMinVersions(), maxVersions,
377           oldestUnexpiredTS);
378     } else {
379       // We can share the ExplicitColumnTracker, diff is we reset
380       // between rows, not between storefiles.
381       // whether there is null column in the explicit column query
382       hasNullColumn = columns.first().length == 0;
383       columnTracker = new ExplicitColumnTracker(columns, scanInfo.getMinVersions(), maxVersions,
384           oldestUnexpiredTS);
385     }
386     DeleteTracker deletes = instantiateDeleteTracker(regionCoprocessorHost);
387     if (dropDeletesFromRow == null) {
388       return new LegacyScanQueryMatcher(scan, scanInfo, columnTracker, hasNullColumn, deletes,
389           scanType, readPointToUse, earliestPutTs, oldestUnexpiredTS, now);
390     } else {
391       return new LegacyScanQueryMatcher(scan, scanInfo, columnTracker, hasNullColumn, deletes,
392           scanType, readPointToUse, earliestPutTs, oldestUnexpiredTS, now, dropDeletesFromRow,
393           dropDeletesToRow);
394     }
395   }
396 }