1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.regionserver.querymatcher;
19
20 import java.io.IOException;
21 import java.util.NavigableSet;
22
23 import org.apache.hadoop.hbase.CellUtil;
24 import org.apache.hadoop.hbase.HConstants;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode;
27 import org.apache.hadoop.hbase.util.Bytes;
28
29 /**
30 * This class is used for the tracking and enforcement of columns and numbers of versions during the
31 * course of a Get or Scan operation, when explicit column qualifiers have been asked for in the
32 * query. With a little magic (see {@link ScanQueryMatcher}), we can use this matcher for both scans
33 * and gets. The main difference is 'next' and 'done' collapse for the scan case (since we see all
34 * columns in order), and we only reset between rows.
35 * <p>
36 * This class is utilized by {@link ScanQueryMatcher} mainly through two methods:
37 * <ul>
38 * <li>{@link #checkColumn} is called when a Put satisfies all other conditions of the query.</li>
39 * <li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher believes that the current
40 * column should be skipped (by timestamp, filter etc.)</li>
41 * </ul>
42 * <p>
43 * These two methods returns a
44 * {@link org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode} to define
45 * what action should be taken.
46 * <p>
47 * This class is NOT thread-safe as queries are never multi-threaded
48 */
49 @InterfaceAudience.Private
50 public class ExplicitColumnTracker implements ColumnTracker {
51
52 private final int maxVersions;
53 private final int minVersions;
54
55 /**
56 * Contains the list of columns that the ExplicitColumnTracker is tracking. Each ColumnCount
57 * instance also tracks how many versions of the requested column have been returned.
58 */
59 private final ColumnCount[] columns;
60 private int index;
61 private ColumnCount column;
62 /**
63 * Keeps track of the latest timestamp included for current column. Used to eliminate duplicates.
64 */
65 private long latestTSOfCurrentColumn;
66 private long oldestStamp;
67
68 /**
69 * Default constructor.
70 * @param columns columns specified user in query
71 * @param minVersions minimum number of versions to keep
72 * @param maxVersions maximum versions to return per column
73 * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL
74 */
75 public ExplicitColumnTracker(NavigableSet<byte[]> columns, int minVersions, int maxVersions,
76 long oldestUnexpiredTS) {
77 this.maxVersions = maxVersions;
78 this.minVersions = minVersions;
79 this.oldestStamp = oldestUnexpiredTS;
80 this.columns = new ColumnCount[columns.size()];
81 int i = 0;
82 for (byte[] column : columns) {
83 this.columns[i++] = new ColumnCount(column);
84 }
85 reset();
86 }
87
88 /**
89 * Done when there are no more columns to match against.
90 */
91 public boolean done() {
92 return this.index >= columns.length;
93 }
94
95 public ColumnCount getColumnHint() {
96 return this.column;
97 }
98
99 /**
100 * {@inheritDoc}
101 */
102 @Override
103 public ScanQueryMatcher.MatchCode checkColumn(byte[] bytes, int offset, int length, byte type) {
104 // delete markers should never be passed to an
105 // *Explicit*ColumnTracker
106 assert !CellUtil.isDelete(type);
107 do {
108 // No more columns left, we are done with this query
109 if (done()) {
110 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
111 }
112
113 // No more columns to match against, done with storefile
114 if (this.column == null) {
115 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
116 }
117
118 // Compare specific column to current column
119 int ret = Bytes.compareTo(column.getBuffer(), column.getOffset(), column.getLength(), bytes,
120 offset, length);
121
122 // Column Matches. Return include code. The caller would call checkVersions
123 // to limit the number of versions.
124 if (ret == 0) {
125 return ScanQueryMatcher.MatchCode.INCLUDE;
126 }
127
128 resetTS();
129
130 if (ret > 0) {
131 // The current KV is smaller than the column the ExplicitColumnTracker
132 // is interested in, so seek to that column of interest.
133 return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
134 }
135
136 // The current KV is bigger than the column the ExplicitColumnTracker
137 // is interested in. That means there is no more data for the column
138 // of interest. Advance the ExplicitColumnTracker state to next
139 // column of interest, and check again.
140 ++this.index;
141 if (done()) {
142 // No more to match, do not include, done with this row.
143 return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row
144 }
145 // This is the recursive case.
146 this.column = this.columns[this.index];
147 } while (true);
148 }
149
150 @Override
151 public ScanQueryMatcher.MatchCode checkVersions(byte[] bytes, int offset, int length,
152 long timestamp, byte type, boolean ignoreCount) throws IOException {
153 assert !CellUtil.isDelete(type);
154 if (ignoreCount) {
155 return ScanQueryMatcher.MatchCode.INCLUDE;
156 }
157 // Check if it is a duplicate timestamp
158 if (sameAsPreviousTS(timestamp)) {
159 // If duplicate, skip this Key
160 return ScanQueryMatcher.MatchCode.SKIP;
161 }
162 int count = this.column.increment();
163 if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) {
164 // Done with versions for this column
165 ++this.index;
166 resetTS();
167 if (done()) {
168 // We have served all the requested columns.
169 this.column = null;
170 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW;
171 }
172 // We are done with current column; advance to next column
173 // of interest.
174 this.column = this.columns[this.index];
175 return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL;
176 }
177 setTS(timestamp);
178 return ScanQueryMatcher.MatchCode.INCLUDE;
179 }
180
181 // Called between every row.
182 public void reset() {
183 this.index = 0;
184 this.column = this.columns[this.index];
185 for (ColumnCount col : this.columns) {
186 col.setCount(0);
187 }
188 resetTS();
189 }
190
191 private void resetTS() {
192 latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP;
193 }
194
195 private void setTS(long timestamp) {
196 latestTSOfCurrentColumn = timestamp;
197 }
198
199 private boolean sameAsPreviousTS(long timestamp) {
200 return timestamp == latestTSOfCurrentColumn;
201 }
202
203 private boolean isExpired(long timestamp) {
204 return timestamp < oldestStamp;
205 }
206
207 /**
208 * This method is used to inform the column tracker that we are done with this column. We may get
209 * this information from external filters or timestamp range and we then need to indicate this
210 * information to tracker. It is required only in case of ExplicitColumnTracker.
211 * @param bytes
212 * @param offset
213 * @param length
214 */
215 public void doneWithColumn(byte[] bytes, int offset, int length) {
216 while (this.column != null) {
217 int compare = Bytes.compareTo(column.getBuffer(), column.getOffset(), column.getLength(),
218 bytes, offset, length);
219 resetTS();
220 if (compare <= 0) {
221 ++this.index;
222 if (done()) {
223 // Will not hit any more columns in this storefile
224 this.column = null;
225 } else {
226 this.column = this.columns[this.index];
227 }
228 if (compare <= -1) {
229 continue;
230 }
231 }
232 return;
233 }
234 }
235
236 public MatchCode getNextRowOrNextColumn(byte[] bytes, int offset, int qualLength) {
237 doneWithColumn(bytes, offset, qualLength);
238 if (getColumnHint() == null) {
239 return MatchCode.SEEK_NEXT_ROW;
240 } else {
241 return MatchCode.SEEK_NEXT_COL;
242 }
243 }
244
245 public boolean isDone(long timestamp) {
246 return minVersions <= 0 && isExpired(timestamp);
247 }
248 }