View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver.querymatcher;
19  
20  import java.io.IOException;
21  import java.util.NavigableSet;
22  
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.HConstants;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode;
27  import org.apache.hadoop.hbase.util.Bytes;
28  
29  /**
30   * This class is used for the tracking and enforcement of columns and numbers of versions during the
31   * course of a Get or Scan operation, when explicit column qualifiers have been asked for in the
32   * query. With a little magic (see {@link ScanQueryMatcher}), we can use this matcher for both scans
33   * and gets. The main difference is 'next' and 'done' collapse for the scan case (since we see all
34   * columns in order), and we only reset between rows.
35   * <p>
36   * This class is utilized by {@link ScanQueryMatcher} mainly through two methods:
37   * <ul>
38   * <li>{@link #checkColumn} is called when a Put satisfies all other conditions of the query.</li>
39   * <li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher believes that the current
40   * column should be skipped (by timestamp, filter etc.)</li>
41   * </ul>
42   * <p>
43   * These two methods returns a
44   * {@link org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode} to define
45   * what action should be taken.
46   * <p>
47   * This class is NOT thread-safe as queries are never multi-threaded
48   */
49  @InterfaceAudience.Private
50  public class ExplicitColumnTracker implements ColumnTracker {
51  
52    private final int maxVersions;
53    private final int minVersions;
54  
55    /**
56     * Contains the list of columns that the ExplicitColumnTracker is tracking. Each ColumnCount
57     * instance also tracks how many versions of the requested column have been returned.
58     */
59    private final ColumnCount[] columns;
60    private int index;
61    private ColumnCount column;
62    /**
63     * Keeps track of the latest timestamp included for current column. Used to eliminate duplicates.
64     */
65    private long latestTSOfCurrentColumn;
66    private long oldestStamp;
67  
68    /**
69     * Default constructor.
70     * @param columns columns specified user in query
71     * @param minVersions minimum number of versions to keep
72     * @param maxVersions maximum versions to return per column
73     * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
74     */
75    public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions, int maxVersions,
76        long oldestUnexpiredTS) {
77      this.maxVersions = maxVersions;
78      this.minVersions = minVersions;
79      this.oldestStamp = oldestUnexpiredTS;
80      this.columns = new ColumnCount[columns.size()];
81      int i = 0;
82      for (byte[] column : columns) {
83        this.columns[i++] = new ColumnCount(column);
84      }
85      reset();
86    }
87  
88    /**
89     * Done when there are no more columns to match against.
90     */
91    public boolean done() {
92      return this.index >= columns.length;
93    }
94  
95    public ColumnCount getColumnHint() {
96      return this.column;
97    }
98  
99    /**
100    * {@inheritDoc}
101    */
102   @Override
103   public ScanQueryMatcher.MatchCode checkColumn(byte[] bytes, int offset, int length, byte type) {
104     // delete markers should never be passed to an
105     // *Explicit*ColumnTracker
106     assert !CellUtil.isDelete(type);
107     do {
108       // No more columns left, we are done with this query
109       if (done()) {
110         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
111       }
112 
113       // No more columns to match against, done with storefile
114       if (this.column == null) {
115         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
116       }
117 
118       // Compare specific column to current column
119       int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(), column.getLength(), bytes,
120         offset, length);
121 
122       // Column Matches. Return include code. The caller would call checkVersions
123       // to limit the number of versions.
124       if (ret == 0) {
125         return ScanQueryMatcher.MatchCode.INCLUDE;
126       }
127 
128       resetTS();
129 
130       if (ret > 0) {
131         // The current KV is smaller than the column the ExplicitColumnTracker
132         // is interested in, so seek to that column of interest.
133         return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
134       }
135 
136       // The current KV is bigger than the column the ExplicitColumnTracker
137       // is interested in. That means there is no more data for the column
138       // of interest. Advance the ExplicitColumnTracker state to next
139       // column of interest, and check again.
140       ++this.index;
141       if (done()) {
142         // No more to match, do not include, done with this row.
143         return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
144       }
145       // This is the recursive case.
146       this.column = this.columns[this.index];
147     } while (true);
148   }
149 
150   @Override
151   public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
152       long timestamp, byte type, boolean ignoreCount) throws IOException {
153     assert !CellUtil.isDelete(type);
154     if (ignoreCount) {
155       return ScanQueryMatcher.MatchCode.INCLUDE;
156     }
157     // Check if it is a duplicate timestamp
158     if (sameAsPreviousTS(timestamp)) {
159       // If duplicate, skip this Key
160       return ScanQueryMatcher.MatchCode.SKIP;
161     }
162     int count = this.column.increment();
163     if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
164       // Done with versions for this column
165       ++this.index;
166       resetTS();
167       if (done()) {
168         // We have served all the requested columns.
169         this.column = null;
170         return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
171       }
172       // We are done with current column; advance to next column
173       // of interest.
174       this.column = this.columns[this.index];
175       return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
176     }
177     setTS(timestamp);
178     return ScanQueryMatcher.MatchCode.INCLUDE;
179   }
180 
181   // Called between every row.
182   public void reset() {
183     this.index = 0;
184     this.column = this.columns[this.index];
185     for (ColumnCount col : this.columns) {
186       col.setCount(0);
187     }
188     resetTS();
189   }
190 
191   private void resetTS() {
192     latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
193   }
194 
195   private void setTS(long timestamp) {
196     latestTSOfCurrentColumn = timestamp;
197   }
198 
199   private boolean sameAsPreviousTS(long timestamp) {
200     return timestamp == latestTSOfCurrentColumn;
201   }
202 
203   private boolean isExpired(long timestamp) {
204     return timestamp < oldestStamp;
205   }
206 
207   /**
208    * This method is used to inform the column tracker that we are done with this column. We may get
209    * this information from external filters or timestamp range and we then need to indicate this
210    * information to tracker. It is required only in case of ExplicitColumnTracker.
211    * @param bytes
212    * @param offset
213    * @param length
214    */
215   public void doneWithColumn(byte[] bytes, int offset, int length) {
216     while (this.column != null) {
217       int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(), column.getLength(),
218         bytes, offset, length);
219       resetTS();
220       if (compare <= 0) {
221         ++this.index;
222         if (done()) {
223           // Will not hit any more columns in this storefile
224           this.column = null;
225         } else {
226           this.column = this.columns[this.index];
227         }
228         if (compare <= -1) {
229           continue;
230         }
231       }
232       return;
233     }
234   }
235 
236   public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset, int qualLength) {
237     doneWithColumn(bytes, offset, qualLength);
238     if (getColumnHint() == null) {
239       return MatchCode.SEEK_NEXT_ROW;
240     } else {
241       return MatchCode.SEEK_NEXT_COL;
242     }
243   }
244 
245   public boolean isDone(long timestamp) {
246     return minVersions <= 0 && isExpired(timestamp);
247   }
248 }