1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 package org.apache.hadoop.hbase.filter;
21
22 import java.io.IOException;
23 import java.util.List;
24
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.hbase.classification.InterfaceStability;
27 import org.apache.hadoop.hbase.Cell;
28 import org.apache.hadoop.hbase.KeyValue;
29 import org.apache.hadoop.hbase.exceptions.DeserializationException;
30
31 /**
32 * Interface for row and column filters directly applied within the regionserver.
33 *
34 * A filter can expect the following call sequence:
35 * <ul>
36 * <li> {@link #reset()} : reset the filter state before filtering a new row. </li>
37 * <li> {@link #filterAllRemaining()}: true means row scan is over; false means keep going. </li>
38 * <li> {@link #filterRowKey(byte[],int,int)}: true means drop this row; false means include.</li>
39 * <li> {@link #filterKeyValue(Cell)}: decides whether to include or exclude this KeyValue.
40 * See {@link ReturnCode}. </li>
41 * <li> {@link #transform(KeyValue)}: if the KeyValue is included, let the filter transform the
42 * KeyValue. </li>
43 * <li> {@link #filterRowCells(List)}: allows direct modification of the final list to be submitted
44 * <li> {@link #filterRow()}: last chance to drop entire row based on the sequence of
45 * filter calls. Eg: filter a row if it doesn't contain a specified column. </li>
46 * </ul>
47 *
48 * Filter instances are created one per region/scan. This abstract class replaces
49 * the old RowFilterInterface.
50 *
51 * When implementing your own filters, consider inheriting {@link FilterBase} to help
52 * you reduce boilerplate.
53 *
54 * @see FilterBase
55 */
56 @InterfaceAudience.Public
57 @InterfaceStability.Stable
58 public abstract class Filter {
59 protected transient boolean reversed;
60 /**
61 * Reset the state of the filter between rows.
62 *
63 * Concrete implementers can signal a failure condition in their code by throwing an
64 * {@link IOException}.
65 *
66 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
67 */
68 abstract public void reset() throws IOException;
69
70 /**
71 * Filters a row based on the row key. If this returns true, the entire row will be excluded. If
72 * false, each KeyValue in the row will be passed to {@link #filterKeyValue(Cell)} below.
73 *
74 * Concrete implementers can signal a failure condition in their code by throwing an
75 * {@link IOException}.
76 *
77 * @param buffer buffer containing row key
78 * @param offset offset into buffer where row key starts
79 * @param length length of the row key
80 * @return true, remove entire row, false, include the row (maybe).
81 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
82 */
83 abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
84
85 /**
86 * If this returns true, the scan will terminate.
87 *
88 * Concrete implementers can signal a failure condition in their code by throwing an
89 * {@link IOException}.
90 *
91 * @return true to end scan, false to continue.
92 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
93 */
94 abstract public boolean filterAllRemaining() throws IOException;
95
96 /**
97 * A way to filter based on the column family, column qualifier and/or the column value. Return
98 * code is described below. This allows filters to filter only certain number of columns, then
99 * terminate without matching ever column.
100 *
101 * If filterRowKey returns true, filterKeyValue needs to be consistent with it.
102 *
103 * filterKeyValue can assume that filterRowKey has already been called for the row.
104 *
105 * If your filter returns <code>ReturnCode.NEXT_ROW</code>, it should return
106 * <code>ReturnCode.NEXT_ROW</code> until {@link #reset()} is called just in case the caller calls
107 * for the next row.
108 *
109 * Concrete implementers can signal a failure condition in their code by throwing an
110 * {@link IOException}.
111 *
112 * @param v the Cell in question
113 * @return code as described below
114 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
115 * @see Filter.ReturnCode
116 */
117 abstract public ReturnCode filterKeyValue(final Cell v) throws IOException;
118
119 /**
120 * Give the filter a chance to transform the passed KeyValue. If the Cell is changed a new
121 * Cell object must be returned.
122 *
123 * @see org.apache.hadoop.hbase.KeyValue#shallowCopy()
124 * The transformed KeyValue is what is eventually returned to the client. Most filters will
125 * return the passed KeyValue unchanged.
126 * @see org.apache.hadoop.hbase.filter.KeyOnlyFilter#transform(KeyValue) for an example of a
127 * transformation.
128 *
129 * Concrete implementers can signal a failure condition in their code by throwing an
130 * {@link IOException}.
131 *
132 * @param v the KeyValue in question
133 * @return the changed KeyValue
134 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
135 */
136 abstract public Cell transformCell(final Cell v) throws IOException;
137
138 /**
139 * WARNING: please to not override this method. Instead override {@link #transformCell(Cell)}.
140 * This is for transition from 0.94 -> 0.96
141 **/
142 @Deprecated // use Cell transformCell(final Cell)
143 abstract public KeyValue transform(final KeyValue currentKV) throws IOException;
144
145
146 /**
147 * Return codes for filterValue().
148 */
149 @InterfaceAudience.Public
150 @InterfaceStability.Stable
151 public enum ReturnCode {
152 /**
153 * Include the Cell
154 */
155 INCLUDE,
156 /**
157 * Include the Cell and seek to the next column skipping older versions.
158 */
159 INCLUDE_AND_NEXT_COL,
160 /**
161 * Skip this Cell
162 */
163 SKIP,
164 /**
165 * Skip this column. Go to the next column in this row.
166 */
167 NEXT_COL,
168 /**
169 * Seek to next row in current family. It may still pass a cell whose family is different but
170 * row is the same as previous cell to {@link #filterKeyValue(Cell)} , even if we get a NEXT_ROW
171 * returned for previous cell. For more details see HBASE-18368. <br>
172 * Once reset() method was invoked, then we switch to the next row for all family, and you can
173 * catch the event by invoking CellUtils.matchingRows(previousCell, currentCell). <br>
174 * Note that filterRow() will still be called. <br>
175 */
176 NEXT_ROW,
177 /**
178 * Seek to next key which is given as hint by the filter.
179 */
180 SEEK_NEXT_USING_HINT,
181 /**
182 * Include KeyValue and done with row, seek to next. See NEXT_ROW
183 */
184 INCLUDE_AND_SEEK_NEXT_ROW,
185 }
186
187 /**
188 * Chance to alter the list of Cells to be submitted. Modifications to the list will carry on
189 *
190 * Concrete implementers can signal a failure condition in their code by throwing an
191 * {@link IOException}.
192 *
193 * @param kvs the list of Cells to be filtered
194 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
195 */
196 abstract public void filterRowCells(List<Cell> kvs) throws IOException;
197
198 /**
199 * Primarily used to check for conflicts with scans(such as scans that do not read a full row at a
200 * time).
201 *
202 * @return True if this filter actively uses filterRowCells(List) or filterRow().
203 */
204 abstract public boolean hasFilterRow();
205
206 /**
207 * Last chance to veto row based on previous {@link #filterKeyValue(Cell)} calls. The filter
208 * needs to retain state then return a particular value for this call if they wish to exclude a
209 * row if a certain column is missing (for example).
210 *
211 * Concrete implementers can signal a failure condition in their code by throwing an
212 * {@link IOException}.
213 *
214 * @return true to exclude row, false to include row.
215 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
216 */
217 abstract public boolean filterRow() throws IOException;
218
219 /**
220 * @param currentKV
221 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
222 * seek to next.
223 * @throws IOException
224 * Function is Deprecated. Use {@link #getNextCellHint(Cell)} instead.
225 */
226 @Deprecated
227 abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
228
229 /**
230 * If the filter returns the match code SEEK_NEXT_USING_HINT, then it should also tell which is
231 * the next key it must seek to. After receiving the match code SEEK_NEXT_USING_HINT, the
232 * QueryMatcher would call this function to find out which key it must next seek to.
233 *
234 * Concrete implementers can signal a failure condition in their code by throwing an
235 * {@link IOException}.
236 *
237 * @return KeyValue which must be next seeked. return null if the filter is not sure which key to
238 * seek to next.
239 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
240 */
241 abstract public Cell getNextCellHint(final Cell currentKV) throws IOException;
242
243 /**
244 * Check that given column family is essential for filter to check row. Most filters always return
245 * true here. But some could have more sophisticated logic which could significantly reduce
246 * scanning process by not even touching columns until we are 100% sure that it's data is needed
247 * in result.
248 *
249 * Concrete implementers can signal a failure condition in their code by throwing an
250 * {@link IOException}.
251 *
252 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
253 */
254 abstract public boolean isFamilyEssential(byte[] name) throws IOException;
255
256 /**
257 * TODO: JAVADOC
258 *
259 * Concrete implementers can signal a failure condition in their code by throwing an
260 * {@link IOException}.
261 *
262 * @return The filter serialized using pb
263 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
264 */
265 abstract public byte[] toByteArray() throws IOException;
266
267 /**
268 *
269 * Concrete implementers can signal a failure condition in their code by throwing an
270 * {@link IOException}.
271 *
272 * @param pbBytes A pb serialized {@link Filter} instance
273 * @return An instance of {@link Filter} made from <code>bytes</code>
274 * @throws DeserializationException
275 * @see #toByteArray
276 */
277 public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
278 throw new DeserializationException(
279 "parseFrom called on base Filter, but should be called on derived type");
280 }
281
282 /**
283 * Concrete implementers can signal a failure condition in their code by throwing an
284 * {@link IOException}.
285 *
286 * @param other
287 * @return true if and only if the fields of the filter that are serialized are equal to the
288 * corresponding fields in other. Used for testing.
289 * @throws IOException in case an I/O or an filter specific failure needs to be signaled.
290 */
291 abstract boolean areSerializedFieldsEqual(Filter other);
292
293 /**
294 * alter the reversed scan flag
295 * @param reversed flag
296 */
297 public void setReversed(boolean reversed) {
298 this.reversed = reversed;
299 }
300
301 public boolean isReversed() {
302 return this.reversed;
303 }
304 }