1 /**
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19 package org.apache.hadoop.hbase.regionserver;
20
21 import java.io.Closeable;
22 import java.io.IOException;
23
24 import org.apache.hadoop.hbase.classification.InterfaceAudience;
25 import org.apache.hadoop.hbase.Cell;
26 import org.apache.hadoop.hbase.KeyValue;
27 import org.apache.hadoop.hbase.client.Scan;
28
29 /**
30 * Scanner that returns the next KeyValue.
31 */
32 @InterfaceAudience.Private
33 // TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner
34 // so this should be something else altogether, a decoration on our base CellScanner. TODO.
35 // This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
36 public interface KeyValueScanner {
37 /**
38 * The byte array represents for NO_NEXT_INDEXED_KEY;
39 * The actual value is irrelevant because this is always compared by reference.
40 */
41 public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue();
42
43 /**
44 * Look at the next Cell in this scanner, but do not iterate scanner.
45 * NOTICE: The returned cell has not been passed into ScanQueryMatcher. So it may not be what the
46 * user need.
47 * @return the next Cell
48 */
49 Cell peek();
50
51 /**
52 * Return the next Cell in this scanner, iterating the scanner
53 * @return the next Cell
54 */
55 Cell next() throws IOException;
56
57 /**
58 * Seek the scanner at or after the specified KeyValue.
59 * @param key seek value
60 * @return true if scanner has values left, false if end of scanner
61 */
62 boolean seek(Cell key) throws IOException;
63
64 /**
65 * Reseek the scanner at or after the specified KeyValue.
66 * This method is guaranteed to seek at or after the required key only if the
67 * key comes after the current position of the scanner. Should not be used
68 * to seek to a key which may come before the current position.
69 * @param key seek value (should be non-null)
70 * @return true if scanner has values left, false if end of scanner
71 */
72 boolean reseek(Cell key) throws IOException;
73
74 /**
75 * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners and
76 * MemStoreScanners (other scanners simply return 0). This is required for comparing multiple
77 * files to find out which one has the latest data. StoreFileScanners are ordered from 0
78 * (oldest) to newest in increasing order. MemStoreScanner gets LONG.max since it always
79 * contains freshest data.
80 */
81 long getScannerOrder();
82
83 /**
84 * Close the KeyValue scanner.
85 */
86 void close();
87
88 /**
89 * Allows to filter out scanners (both StoreFile and memstore) that we don't
90 * want to use based on criteria such as Bloom filters and timestamp ranges.
91 * @param scan the scan that we are selecting scanners for
92 * @param store the store we are performing the scan on.
93 * @param oldestUnexpiredTS the oldest timestamp we are interested in for
94 * this query, based on TTL
95 * @return true if the scanner should be included in the query
96 */
97 boolean shouldUseScanner(Scan scan, Store store, long oldestUnexpiredTS);
98
99 // "Lazy scanner" optimizations
100
101 /**
102 * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only
103 * does a seek operation after checking that it is really necessary for the
104 * row/column combination specified by the kv parameter. This function was
105 * added to avoid unnecessary disk seeks by checking row-column Bloom filters
106 * before a seek on multi-column get/scan queries, and to optimize by looking
107 * up more recent files first.
108 * @param forward do a forward-only "reseek" instead of a random-access seek
109 * @param useBloom whether to enable multi-column Bloom filter optimization
110 */
111 boolean requestSeek(Cell kv, boolean forward, boolean useBloom)
112 throws IOException;
113
114 /**
115 * We optimize our store scanners by checking the most recent store file
116 * first, so we sometimes pretend we have done a seek but delay it until the
117 * store scanner bubbles up to the top of the key-value heap. This method is
118 * then used to ensure the top store file scanner has done a seek operation.
119 */
120 boolean realSeekDone();
121
122 /**
123 * Does the real seek operation in case it was skipped by
124 * seekToRowCol(KeyValue, boolean) (TODO: Whats this?). Note that this function should
125 * be never called on scanners that always do real seek operations (i.e. most
126 * of the scanners). The easiest way to achieve this is to call
127 * {@link #realSeekDone()} first.
128 */
129 void enforceSeek() throws IOException;
130
131 /**
132 * @return true if this is a file scanner. Otherwise a memory scanner is
133 * assumed.
134 */
135 boolean isFileScanner();
136
137 // Support for "Reversed Scanner"
138 /**
139 * Seek the scanner at or before the row of specified Cell, it firstly
140 * tries to seek the scanner at or after the specified Cell, return if
141 * peek KeyValue of scanner has the same row with specified Cell,
142 * otherwise seek the scanner at the first Cell of the row which is the
143 * previous row of specified KeyValue
144 *
145 * @param key seek KeyValue
146 * @return true if the scanner is at the valid KeyValue, false if such
147 * KeyValue does not exist
148 *
149 */
150 public boolean backwardSeek(Cell key) throws IOException;
151
152 /**
153 * Seek the scanner at the first Cell of the row which is the previous row
154 * of specified key
155 * @param key seek value
156 * @return true if the scanner at the first valid Cell of previous row,
157 * false if not existing such Cell
158 */
159 public boolean seekToPreviousRow(Cell key) throws IOException;
160
161 /**
162 * Seek the scanner at the first KeyValue of last row
163 *
164 * @return true if scanner has values left, false if the underlying data is
165 * empty
166 * @throws IOException
167 */
168 public boolean seekToLastRow() throws IOException;
169
170 /**
171 * @return the next key in the index, usually the first key of next block OR a key that falls
172 * between last key of current block and first key of next block..
173 * see HFileWriterImpl#getMidpoint, or null if not known.
174 */
175 public Cell getNextIndexedKey();
176 }