View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.Closeable;
22  import java.io.IOException;
23  
24  import org.apache.hadoop.hbase.classification.InterfaceAudience;
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.KeyValue;
27  import org.apache.hadoop.hbase.client.Scan;
28  
29  /**
30   * Scanner that returns the next KeyValue.
31   */
32  @InterfaceAudience.Private
33  // TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
34  // so this should be something else altogether, a decoration on our base CellScanner. TODO.
35  // This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
36  public interface KeyValueScanner {
37    /**
38     * The byte array represents for NO_NEXT_INDEXED_KEY;
39     * The actual value is irrelevant because this is always compared by reference.
40     */
41    public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
42  
43    /**
44     * Look at the next Cell in this scanner, but do not iterate scanner.
45     * NOTICE: The returned cell has not been passed into ScanQueryMatcher. So it may not be what the
46     * user need.
47     * @return the next Cell
48     */
49    Cell peek();
50  
51    /**
52     * Return the next Cell in this scanner, iterating the scanner
53     * @return the next Cell
54     */
55    Cell next() throws IOException;
56  
57    /**
58     * Seek the scanner at or after the specified KeyValue.
59     * @param key seek value
60     * @return true if scanner has values left, false if end of scanner
61     */
62    boolean seek(Cell key) throws IOException;
63  
64    /**
65     * Reseek the scanner at or after the specified KeyValue.
66     * This method is guaranteed to seek at or after the required key only if the
67     * key comes after the current position of the scanner. Should not be used
68     * to seek to a key which may come before the current position.
69     * @param key seek value (should be non-null)
70     * @return true if scanner has values left, false if end of scanner
71     */
72    boolean reseek(Cell key) throws IOException;
73  
74    /**
75     * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners and
76     * MemStoreScanners (other scanners simply return 0). This is required for comparing multiple
77     * files to find out which one has the latest data. StoreFileScanners are ordered from 0
78     * (oldest) to newest in increasing order. MemStoreScanner gets LONG.max since it always
79     * contains freshest data.
80     */
81    long getScannerOrder();
82  
83    /**
84     * Close the KeyValue scanner.
85     */
86    void close();
87  
88    /**
89     * Allows to filter out scanners (both StoreFile and memstore) that we don't
90     * want to use based on criteria such as Bloom filters and timestamp ranges.
91     * @param scan the scan that we are selecting scanners for
92     * @param store the store we are performing the scan on.
93     * @param oldestUnexpiredTS the oldest timestamp we are interested in for
94     *          this query, based on TTL
95     * @return true if the scanner should be included in the query
96     */
97    boolean shouldUseScanner(Scan scan, Store store, long oldestUnexpiredTS);
98  
99    // "Lazy scanner" optimizations
100 
101   /**
102    * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only
103    * does a seek operation after checking that it is really necessary for the
104    * row/column combination specified by the kv parameter. This function was
105    * added to avoid unnecessary disk seeks by checking row-column Bloom filters
106    * before a seek on multi-column get/scan queries, and to optimize by looking
107    * up more recent files first.
108    * @param forward do a forward-only "reseek" instead of a random-access seek
109    * @param useBloom whether to enable multi-column Bloom filter optimization
110    */
111   boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
112       throws IOException;
113 
114   /**
115    * We optimize our store scanners by checking the most recent store file
116    * first, so we sometimes pretend we have done a seek but delay it until the
117    * store scanner bubbles up to the top of the key-value heap. This method is
118    * then used to ensure the top store file scanner has done a seek operation.
119    */
120   boolean realSeekDone();
121 
122   /**
123    * Does the real seek operation in case it was skipped by
124    * seekToRowCol(KeyValue, boolean) (TODO: Whats this?). Note that this function should
125    * be never called on scanners that always do real seek operations (i.e. most
126    * of the scanners). The easiest way to achieve this is to call
127    * {@link #realSeekDone()} first.
128    */
129   void enforceSeek() throws IOException;
130 
131   /**
132    * @return true if this is a file scanner. Otherwise a memory scanner is
133    *         assumed.
134    */
135   boolean isFileScanner();
136 
137   // Support for "Reversed Scanner"
138   /**
139    * Seek the scanner at or before the row of specified Cell, it firstly
140    * tries to seek the scanner at or after the specified Cell, return if
141    * peek KeyValue of scanner has the same row with specified Cell,
142    * otherwise seek the scanner at the first Cell of the row which is the
143    * previous row of specified KeyValue
144    *
145    * @param key seek KeyValue
146    * @return true if the scanner is at the valid KeyValue, false if such
147    *         KeyValue does not exist
148    *
149    */
150   public boolean backwardSeek(Cell key) throws IOException;
151 
152   /**
153    * Seek the scanner at the first Cell of the row which is the previous row
154    * of specified key
155    * @param key seek value
156    * @return true if the scanner at the first valid Cell of previous row,
157    *         false if not existing such Cell
158    */
159   public boolean seekToPreviousRow(Cell key) throws IOException;
160 
161   /**
162    * Seek the scanner at the first KeyValue of last row
163    *
164    * @return true if scanner has values left, false if the underlying data is
165    *         empty
166    * @throws IOException
167    */
168   public boolean seekToLastRow() throws IOException;
169 
170   /**
171    * @return the next key in the index, usually the first key of next block OR a key that falls
172    * between last key of current block and first key of next block..
173    * see HFileWriterImpl#getMidpoint, or null if not known.
174    */
175   public Cell getNextIndexedKey();
176 }