View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.filter;
21  
22  import org.apache.hadoop.hbase.Cell;
23  import org.apache.hadoop.hbase.CellUtil;
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  
26  import java.io.IOException;
27  import java.util.ArrayList;
28  import java.util.Collections;
29  import java.util.List;
30  import java.util.Objects;
31  
32  /**
33   * FilterListWithOR represents an ordered list of filters which will be evaluated with an OR
34   * operator.
35   */
36  @InterfaceAudience.Private
37  public class FilterListWithOR extends FilterListBase {
38  
39    /**
40     * Save previous return code and previous cell for every filter in filter list. For MUST_PASS_ONE,
41     * we use the previous return code to decide whether we should pass current cell encountered to
42     * the filter. For MUST_PASS_ALL, the two list are meaningless.
43     */
44    private List<ReturnCode> prevFilterRCList = null;
45    private List<Cell> prevCellList = null;
46  
47    public FilterListWithOR(List<Filter> filters) {
48      super(filters);
49      prevFilterRCList =
50          new ArrayList<ReturnCode>(Collections.nCopies(filters.size(), (ReturnCode) null));
51      prevCellList = new ArrayList<Cell>(Collections.nCopies(filters.size(), (Cell) null));
52      subFiltersIncludedCell = new ArrayList<Boolean>(Collections.nCopies(filters.size(), false));
53    }
54  
55    @Override
56    public void addFilterLists(List<Filter> filters) {
57      if (checkAndGetReversed(filters, isReversed()) != isReversed()) {
58        throw new IllegalArgumentException("Filters in the list must have the same reversed flag");
59      }
60      this.filters.addAll(filters);
61      this.subFiltersIncludedCell.addAll(Collections.nCopies(filters.size(), false));
62      this.prevFilterRCList.addAll(Collections.nCopies(filters.size(), (ReturnCode) null));
63      this.prevCellList.addAll(Collections.nCopies(filters.size(), (Cell) null));
64    }
65  
66    @Override
67    protected String formatLogFilters(List<Filter> logFilters) {
68      return String.format("FilterList OR (%d/%d): %s", logFilters.size(), this.size(),
69        logFilters.toString());
70    }
71  
72    /**
73     * For MUST_PASS_ONE, we cannot make sure that when filter-A in filter list return NEXT_COL then
74     * the next cell passing to filterList will be the first cell in next column, because if filter-B
75     * in filter list return SKIP, then the filter list will return SKIP. In this case, we should pass
76     * the cell following the previous cell, and it's possible that the next cell has the same column
77     * as the previous cell even if filter-A has NEXT_COL returned for the previous cell. So we should
78     * save the previous cell and the return code list when checking previous cell for every filter in
79     * filter list, and verify if currentCell fit the previous return code, if fit then pass the
80     * currentCell to the corresponding filter. (HBASE-17678) <br>
81     * Note that: In StoreScanner level, NEXT_ROW will skip to the next row in current family, and in
82     * RegionScanner level, NEXT_ROW will skip to the next row in current family and switch to the
83     * next family for RegionScanner, INCLUDE_AND_NEXT_ROW is the same. so we should pass current cell
84     * to the filter, if row mismatch or row match but column family mismatch. (HBASE-18368)
85     * @see org.apache.hadoop.hbase.filter.Filter.ReturnCode
86     * @param subFilter which sub-filter to calculate the return code by using previous cell and
87     *          previous return code.
88     * @param prevCell the previous cell passed to given sub-filter.
89     * @param currentCell the current cell which will pass to given sub-filter.
90     * @param prevCode the previous return code for given sub-filter.
91     * @return return code calculated by using previous cell and previous return code. null means can
92     *         not decide which return code should return, so we will pass the currentCell to
93     *         subFilter for getting currentCell's return code, and it won't impact the sub-filter's
94     *         internal states.
95     */
96    private ReturnCode calculateReturnCodeByPrevCellAndRC(Filter subFilter, Cell currentCell,
97        Cell prevCell, ReturnCode prevCode) throws IOException {
98      if (prevCell == null || prevCode == null) {
99        return null;
100     }
101     switch (prevCode) {
102     case INCLUDE:
103     case SKIP:
104         return null;
105     case SEEK_NEXT_USING_HINT:
106         Cell nextHintCell = subFilter.getNextCellHint(prevCell);
107         return nextHintCell != null && compareCell(currentCell, nextHintCell) < 0
108           ? ReturnCode.SEEK_NEXT_USING_HINT : null;
109     case NEXT_COL:
110     case INCLUDE_AND_NEXT_COL:
111         // Once row changed, reset() will clear prevCells, so we need not to compare their rows
112         // because rows are the same here.
113         return CellUtil.matchingColumn(prevCell, currentCell) ? ReturnCode.NEXT_COL : null;
114     case NEXT_ROW:
115     case INCLUDE_AND_SEEK_NEXT_ROW:
116         // As described above, rows are definitely the same, so we only compare the family.
117         return CellUtil.matchingFamily(prevCell, currentCell) ? ReturnCode.NEXT_ROW : null;
118     default:
119         throw new IllegalStateException("Received code is not valid.");
120     }
121   }
122 
123   /**
124    * FilterList with MUST_PASS_ONE choose the minimal forward step among sub-filter in filter list.
125    * Let's call it: The Minimal Step Rule. So if filter-A in filter list return INCLUDE and filter-B
126    * in filter list return INCLUDE_AND_NEXT_COL, then the filter list should return INCLUDE. For
127    * SEEK_NEXT_USING_HINT, it's more special, because we do not know how far it will forward, so we
128    * use SKIP by default.<br/>
129    * <br/>
130    * The jump step will be:
131    *
132    * <pre>
133    * INCLUDE &lt; SKIP &lt; INCLUDE_AND_NEXT_COL &lt; NEXT_COL &lt; INCLUDE_AND_SEEK_NEXT_ROW &lt; NEXT_ROW &lt; SEEK_NEXT_USING_HINT
134    * </pre>
135    *
136    * Here, we have the following map to describe The Minimal Step Rule. if current return code (for
137    * previous sub-filters in filter list) is <strong>ReturnCode</strong>, and current filter returns
138    * <strong>localRC</strong>, then we should return map[ReturnCode][localRC] for the merged result,
139    * according to The Minimal Step Rule.<br/>
140    *
141    * <pre>
142    * LocalCode\ReturnCode       INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  SKIP      NEXT_COL              NEXT_ROW                  SEEK_NEXT_USING_HINT
143    * INCLUDE                    INCLUDE INCLUDE                  INCLUDE                    INCLUDE   INCLUDE               INCLUDE                   INCLUDE
144    * INCLUDE_AND_NEXT_COL       INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_NEXT_COL       INCLUDE   INCLUDE_AND_NEXT_COL  INCLUDE_AND_NEXT_COL      INCLUDE
145    * INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  INCLUDE   INCLUDE_AND_NEXT_COL  INCLUDE_AND_SEEK_NEXT_ROW INCLUDE
146    * SKIP                       INCLUDE INCLUDE                  INCLUDE                    SKIP      SKIP                  SKIP                      SKIP
147    * NEXT_COL                   INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_NEXT_COL       SKIP      NEXT_COL              NEXT_COL                  SKIP
148    * NEXT_ROW                   INCLUDE INCLUDE_AND_NEXT_COL     INCLUDE_AND_SEEK_NEXT_ROW  SKIP      NEXT_COL              NEXT_ROW                  SKIP
149    * SEEK_NEXT_USING_HINT       INCLUDE INCLUDE                  INCLUDE                    SKIP      SKIP                  SKIP                      SEEK_NEXT_USING_HINT
150    * </pre>
151    *
152    * @param rc Return code which is calculated by previous sub-filter(s) in filter list.
153    * @param localRC Return code of the current sub-filter in filter list.
154    * @return Return code which is merged by the return code of previous sub-filter(s) and the return
155    *         code of current sub-filter.
156    */
157   private ReturnCode mergeReturnCode(ReturnCode rc, ReturnCode localRC) {
158     if (rc == null) return localRC;
159     switch (localRC) {
160     case INCLUDE:
161       return ReturnCode.INCLUDE;
162     case INCLUDE_AND_NEXT_COL:
163       if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP,
164         ReturnCode.SEEK_NEXT_USING_HINT)) {
165         return ReturnCode.INCLUDE;
166       }
167       if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW,
168         ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
169         return ReturnCode.INCLUDE_AND_NEXT_COL;
170       }
171       break;
172     case INCLUDE_AND_SEEK_NEXT_ROW:
173       if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.SKIP,
174         ReturnCode.SEEK_NEXT_USING_HINT)) {
175         return ReturnCode.INCLUDE;
176       }
177       if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL, ReturnCode.NEXT_COL)) {
178         return ReturnCode.INCLUDE_AND_NEXT_COL;
179       }
180       if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW, ReturnCode.NEXT_ROW)) {
181         return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
182       }
183       break;
184     case SKIP:
185       if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
186         ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
187         return ReturnCode.INCLUDE;
188       }
189       if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW,
190         ReturnCode.SEEK_NEXT_USING_HINT)) {
191         return ReturnCode.SKIP;
192       }
193       break;
194     case NEXT_COL:
195       if (isInReturnCodes(rc, ReturnCode.INCLUDE)) {
196         return ReturnCode.INCLUDE;
197       }
198       if (isInReturnCodes(rc, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
199         return ReturnCode.NEXT_COL;
200       }
201       if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL,
202         ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
203         return ReturnCode.INCLUDE_AND_NEXT_COL;
204       }
205       if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) {
206         return ReturnCode.SKIP;
207       }
208       break;
209     case NEXT_ROW:
210       if (isInReturnCodes(rc, ReturnCode.INCLUDE)) {
211         return ReturnCode.INCLUDE;
212       }
213       if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_NEXT_COL)) {
214         return ReturnCode.INCLUDE_AND_NEXT_COL;
215       }
216       if (isInReturnCodes(rc, ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
217         return ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW;
218       }
219       if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.SEEK_NEXT_USING_HINT)) {
220         return ReturnCode.SKIP;
221       }
222       if (isInReturnCodes(rc, ReturnCode.NEXT_COL)) {
223         return ReturnCode.NEXT_COL;
224       }
225       if (isInReturnCodes(rc, ReturnCode.NEXT_ROW)) {
226         return ReturnCode.NEXT_ROW;
227       }
228       break;
229     case SEEK_NEXT_USING_HINT:
230       if (isInReturnCodes(rc, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
231         ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
232         return ReturnCode.INCLUDE;
233       }
234       if (isInReturnCodes(rc, ReturnCode.SKIP, ReturnCode.NEXT_COL, ReturnCode.NEXT_ROW)) {
235         return ReturnCode.SKIP;
236       }
237       if (isInReturnCodes(rc, ReturnCode.SEEK_NEXT_USING_HINT)) {
238         return ReturnCode.SEEK_NEXT_USING_HINT;
239       }
240       break;
241     }
242     throw new IllegalStateException(
243         "Received code is not valid. rc: " + rc + ", localRC: " + localRC);
244   }
245 
246   private void updatePrevFilterRCList(int index, ReturnCode currentRC) {
247     prevFilterRCList.set(index, currentRC);
248   }
249 
250   private void updatePrevCellList(int index, Cell currentCell, ReturnCode currentRC) {
251     if (currentCell == null || currentRC == ReturnCode.INCLUDE || currentRC == ReturnCode.SKIP) {
252       // If previous return code is INCLUDE or SKIP, we should always pass the next cell to the
253       // corresponding sub-filter(need not test calculateReturnCodeByPrevCellAndRC() method), So we
254       // need not save current cell to prevCellList for saving heap memory.
255       prevCellList.set(index, null);
256     } else {
257       prevCellList.set(index, currentCell);
258     }
259   }
260 
261   @Override
262   public ReturnCode filterKeyValue(Cell c) throws IOException {
263     if (isEmpty()) {
264       return ReturnCode.INCLUDE;
265     }
266     ReturnCode rc = null;
267     for (int i = 0, n = filters.size(); i < n; i++) {
268       Filter filter = filters.get(i);
269       subFiltersIncludedCell.set(i, false);
270 
271       Cell prevCell = this.prevCellList.get(i);
272       ReturnCode prevCode = this.prevFilterRCList.get(i);
273       if (filter.filterAllRemaining()) {
274         continue;
275       }
276 
277       ReturnCode localRC = calculateReturnCodeByPrevCellAndRC(filter, c, prevCell, prevCode);
278       if (localRC == null) {
279         // Can not get return code based on previous cell and previous return code. In other words,
280         // we should pass the current cell to this sub-filter to get the return code, and it won't
281         // impact the sub-filter's internal state.
282         localRC = filter.filterKeyValue(c);
283       }
284 
285       // Update previous return code and previous cell for filter[i].
286       updatePrevFilterRCList(i, localRC);
287       updatePrevCellList(i, c, localRC);
288 
289       rc = mergeReturnCode(rc, localRC);
290 
291       // For INCLUDE* case, we need to update the transformed cell.
292       if (isInReturnCodes(localRC, ReturnCode.INCLUDE, ReturnCode.INCLUDE_AND_NEXT_COL,
293         ReturnCode.INCLUDE_AND_SEEK_NEXT_ROW)) {
294         subFiltersIncludedCell.set(i, true);
295       }
296     }
297     // Each sub-filter in filter list got true for filterAllRemaining(), if rc is null, so we should
298     // return SKIP.
299     return rc == null ? ReturnCode.SKIP : rc;
300   }
301 
302   @Override
303   public void reset() throws IOException {
304     for (int i = 0, n = filters.size(); i < n; i++) {
305       filters.get(i).reset();
306       subFiltersIncludedCell.set(i, false);
307       prevFilterRCList.set(i, null);
308       prevCellList.set(i, null);
309     }
310   }
311 
312   @Override
313   public boolean filterRowKey(byte[] rowKey, int offset, int length) throws IOException {
314     if (isEmpty()) {
315       return super.filterRowKey(rowKey, offset, length);
316     }
317     boolean retVal = true;
318     for (int i = 0, n = filters.size(); i < n; i++) {
319       Filter filter = filters.get(i);
320       if (!filter.filterAllRemaining() && !filter.filterRowKey(rowKey, offset, length)) {
321         retVal = false;
322       }
323     }
324     return retVal;
325   }
326 
327   @Override
328   public boolean filterAllRemaining() throws IOException {
329     if (isEmpty()) {
330       return super.filterAllRemaining();
331     }
332     for (int i = 0, n = filters.size(); i < n; i++) {
333       if (!filters.get(i).filterAllRemaining()) {
334         return false;
335       }
336     }
337     return true;
338   }
339 
340   @Override
341   public boolean filterRow() throws IOException {
342     if (isEmpty()) {
343       return super.filterRow();
344     }
345     for (int i = 0, n = filters.size(); i < n; i++) {
346       Filter filter = filters.get(i);
347       if (!filter.filterRow()) {
348         return false;
349       }
350     }
351     return true;
352   }
353 
354   @Override
355   public Cell getNextCellHint(Cell currentCell) throws IOException {
356     if (isEmpty()) {
357       return super.getNextCellHint(currentCell);
358     }
359     Cell minKeyHint = null;
360     // If any condition can pass, we need to keep the min hint
361     for (int i = 0, n = filters.size(); i < n; i++) {
362       if (filters.get(i).filterAllRemaining()) {
363         continue;
364       }
365       Cell curKeyHint = filters.get(i).getNextCellHint(currentCell);
366       if (curKeyHint == null) {
367         // If we ever don't have a hint and this is must-pass-one, then no hint
368         return null;
369       }
370       // If this is the first hint we find, set it
371       if (minKeyHint == null) {
372         minKeyHint = curKeyHint;
373         continue;
374       }
375       if (this.compareCell(minKeyHint, curKeyHint) > 0) {
376         minKeyHint = curKeyHint;
377       }
378     }
379     return minKeyHint;
380   }
381 
382 
383   @Override
384   public boolean equals(Object obj) {
385     if (obj == null || (!(obj instanceof FilterListWithOR))) {
386       return false;
387     }
388     if (this == obj) {
389       return true;
390     }
391     FilterListWithOR f = (FilterListWithOR) obj;
392     return this.filters.equals(f.getFilters()) &&
393       this.prevFilterRCList.equals(f.prevFilterRCList) &&
394       this.prevCellList.equals(f.prevCellList);
395   }
396 
397   @Override
398   public int hashCode() {
399     return Objects.hash(this.prevFilterRCList, this.prevCellList, this.filters);
400   }
401 }