1 /*
2 *
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20 package org.apache.hadoop.hbase.client;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.HashMap;
25 import java.util.List;
26 import java.util.Map;
27 import java.util.NavigableSet;
28 import java.util.TreeMap;
29 import java.util.TreeSet;
30
31 import org.apache.commons.logging.Log;
32 import org.apache.commons.logging.LogFactory;
33 import org.apache.hadoop.hbase.classification.InterfaceAudience;
34 import org.apache.hadoop.hbase.classification.InterfaceStability;
35 import org.apache.hadoop.hbase.HConstants;
36 import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
37 import org.apache.hadoop.hbase.filter.Filter;
38 import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
39 import org.apache.hadoop.hbase.io.TimeRange;
40 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
41 import org.apache.hadoop.hbase.security.access.Permission;
42 import org.apache.hadoop.hbase.security.visibility.Authorizations;
43 import org.apache.hadoop.hbase.util.Bytes;
44
45 /**
46 * Used to perform Scan operations.
47 * <p>
48 * All operations are identical to {@link Get} with the exception of
49 * instantiation. Rather than specifying a single row, an optional startRow
50 * and stopRow may be defined. If rows are not specified, the Scanner will
51 * iterate over all rows.
52 * <p>
53 * To get all columns from all rows of a Table, create an instance with no constraints; use the
54 * {@link #Scan()} constructor. To constrain the scan to specific column families,
55 * call {@link #addFamily(byte[]) addFamily} for each family to retrieve on your Scan instance.
56 * <p>
57 * To get specific columns, call {@link #addColumn(byte[], byte[]) addColumn}
58 * for each column to retrieve.
59 * <p>
60 * To only retrieve columns within a specific range of version timestamps,
61 * call {@link #setTimeRange(long, long) setTimeRange}.
62 * <p>
63 * To only retrieve columns with a specific timestamp, call
64 * {@link #setTimeStamp(long) setTimestamp}.
65 * <p>
66 * To limit the number of versions of each column to be returned, call
67 * {@link #setMaxVersions(int) setMaxVersions}.
68 * <p>
69 * To limit the maximum number of values returned for each call to next(),
70 * call {@link #setBatch(int) setBatch}.
71 * <p>
72 * To add a filter, call {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
73 * <p>
74 * Expert: To explicitly disable server-side block caching for this scan,
75 * execute {@link #setCacheBlocks(boolean)}.
76 * <p><em>Note:</em> Usage alters Scan instances. Internally, attributes are updated as the Scan
77 * runs and if enabled, metrics accumulate in the Scan instance. Be aware this is the case when
78 * you go to clone a Scan instance or if you go to reuse a created Scan instance; safer is create
79 * a Scan instance per usage.
80 */
81 @InterfaceAudience.Public
82 @InterfaceStability.Stable
83 public class Scan extends Query {
84 private static final Log LOG = LogFactory.getLog(Scan.class);
85
86 private static final String RAW_ATTR = "_raw_";
87
88 private byte[] startRow = HConstants.EMPTY_START_ROW;
89 private boolean includeStartRow = true;
90 private byte[] stopRow = HConstants.EMPTY_END_ROW;
91 private boolean includeStopRow = false;
92 private int maxVersions = 1;
93 private int batch = -1;
94
95 /**
96 * Partial {@link Result}s are {@link Result}s must be combined to form a complete {@link Result}.
97 * The {@link Result}s had to be returned in fragments (i.e. as partials) because the size of the
98 * cells in the row exceeded max result size on the server. Typically partial results will be
99 * combined client side into complete results before being delivered to the caller. However, if
100 * this flag is set, the caller is indicating that they do not mind seeing partial results (i.e.
101 * they understand that the results returned from the Scanner may only represent part of a
102 * particular row). In such a case, any attempt to combine the partials into a complete result on
103 * the client side will be skipped, and the caller will be able to see the exact results returned
104 * from the server.
105 */
106 private boolean allowPartialResults = false;
107
108 private int storeLimit = -1;
109 private int storeOffset = 0;
110 private boolean getScan;
111
112 /**
113 * @deprecated since 1.0.0. Use {@link #setScanMetricsEnabled(boolean)}
114 */
115 // Make private or remove.
116 @Deprecated
117 static public final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
118
119 /**
120 * Use {@link #getScanMetrics()}
121 */
122 // Make this private or remove.
123 @Deprecated
124 static public final String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data";
125
126 // If an application wants to use multiple scans over different tables each scan must
127 // define this attribute with the appropriate table name by calling
128 // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
129 static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
130
131 /**
132 * @deprecated without replacement
133 * This is now a no-op, SEEKs and SKIPs are optimizated automatically.
134 * Will be removed in 2.0+
135 */
136 @Deprecated
137 public static final String HINT_LOOKAHEAD = "_look_ahead_";
138
139 /*
140 * -1 means no caching specified and the value of {@link HConstants#HBASE_CLIENT_SCANNER_CACHING}
141 * (default to {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_CACHING}) will be used
142 */
143 private int caching = -1;
144 private long maxResultSize = -1;
145 private boolean cacheBlocks = true;
146 private boolean reversed = false;
147 private TimeRange tr = new TimeRange();
148 private Map<byte [], NavigableSet<byte []>> familyMap =
149 new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
150
151 /**
152 * Set it true for small scan to get better performance
153 *
154 * Small scan should use pread and big scan can use seek + read
155 *
156 * seek + read is fast but can cause two problem (1) resource contention (2)
157 * cause too much network io
158 *
159 * [89-fb] Using pread for non-compaction read request
160 * https://issues.apache.org/jira/browse/HBASE-7266
161 *
162 * On the other hand, if setting it true, we would do
163 * openScanner,next,closeScanner in one RPC call. It means the better
164 * performance for small scan. [HBASE-9488].
165 *
166 * Generally, if the scan range is within one data block(64KB), it could be
167 * considered as a small scan.
168 */
169 private boolean small = false;
170
171 /**
172 * The mvcc read point to use when open a scanner. Remember to clear it after switching regions as
173 * the mvcc is only valid within region scope.
174 */
175 private long mvccReadPoint = -1L;
176
177 /**
178 * The number of rows we want for this scan. We will terminate the scan if the number of return
179 * rows reaches this value.
180 */
181 private int limit = -1;
182
183 /**
184 * Control whether to use pread at server side.
185 */
186 private ReadType readType = ReadType.DEFAULT;
187
188 private boolean needCursorResult = false;
189
190 /**
191 * Create a Scan operation across all rows.
192 */
193 public Scan() {}
194
195 /**
196 * @deprecated use {@code new Scan().withStartRow(startRow).setFilter(filter)} instead.
197 */
198 @Deprecated
199 public Scan(byte[] startRow, Filter filter) {
200 this(startRow);
201 this.filter = filter;
202 }
203
204 /**
205 * Create a Scan operation starting at the specified row.
206 * <p>
207 * If the specified row does not exist, the Scanner will start from the next closest row after the
208 * specified row.
209 * @param startRow row to start scanner at or after
210 * @deprecated use {@code new Scan().withStartRow(startRow)} instead.
211 */
212 @Deprecated
213 public Scan(byte[] startRow) {
214 setStartRow(startRow);
215 }
216
217 /**
218 * Create a Scan operation for the range of rows specified.
219 * @param startRow row to start scanner at or after (inclusive)
220 * @param stopRow row to stop scanner before (exclusive)
221 * @deprecated use {@code new Scan().withStartRow(startRow).withStopRow(stopRow)} instead.
222 */
223 @Deprecated
224 public Scan(byte[] startRow, byte[] stopRow) {
225 setStartRow(startRow);
226 setStopRow(stopRow);
227 }
228
229 /**
230 * Creates a new instance of this class while copying all values.
231 *
232 * @param scan The scan instance to copy from.
233 * @throws IOException When copying the values fails.
234 */
235 public Scan(Scan scan) throws IOException {
236 startRow = scan.getStartRow();
237 includeStartRow = scan.includeStartRow();
238 stopRow = scan.getStopRow();
239 includeStopRow = scan.includeStopRow();
240 maxVersions = scan.getMaxVersions();
241 batch = scan.getBatch();
242 storeLimit = scan.getMaxResultsPerColumnFamily();
243 storeOffset = scan.getRowOffsetPerColumnFamily();
244 caching = scan.getCaching();
245 maxResultSize = scan.getMaxResultSize();
246 cacheBlocks = scan.getCacheBlocks();
247 getScan = scan.isGetScan();
248 filter = scan.getFilter(); // clone?
249 loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue();
250 consistency = scan.getConsistency();
251 this.setIsolationLevel(scan.getIsolationLevel());
252 reversed = scan.isReversed();
253 small = scan.isSmall();
254 allowPartialResults = scan.getAllowPartialResults();
255 TimeRange ctr = scan.getTimeRange();
256 tr = new TimeRange(ctr.getMin(), ctr.getMax());
257 Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
258 for (Map.Entry<byte[],NavigableSet<byte[]>> entry : fams.entrySet()) {
259 byte [] fam = entry.getKey();
260 NavigableSet<byte[]> cols = entry.getValue();
261 if (cols != null && cols.size() > 0) {
262 for (byte[] col : cols) {
263 addColumn(fam, col);
264 }
265 } else {
266 addFamily(fam);
267 }
268 }
269 for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
270 setAttribute(attr.getKey(), attr.getValue());
271 }
272 for (Map.Entry<byte[], TimeRange> entry : scan.getColumnFamilyTimeRange().entrySet()) {
273 TimeRange tr = entry.getValue();
274 setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
275 }
276 this.mvccReadPoint = scan.getMvccReadPoint();
277 this.limit = scan.getLimit();
278 this.needCursorResult = scan.isNeedCursorResult();
279 setPriority(scan.getPriority());
280 }
281
282 /**
283 * Builds a scan object with the same specs as get.
284 * @param get get to model scan after
285 */
286 public Scan(Get get) {
287 this.startRow = get.getRow();
288 this.includeStartRow = true;
289 this.stopRow = get.getRow();
290 this.includeStopRow = true;
291 this.filter = get.getFilter();
292 this.cacheBlocks = get.getCacheBlocks();
293 this.maxVersions = get.getMaxVersions();
294 this.storeLimit = get.getMaxResultsPerColumnFamily();
295 this.storeOffset = get.getRowOffsetPerColumnFamily();
296 this.tr = get.getTimeRange();
297 this.familyMap = get.getFamilyMap();
298 this.getScan = true;
299 this.consistency = get.getConsistency();
300 this.setIsolationLevel(get.getIsolationLevel());
301 this.loadColumnFamiliesOnDemand = get.getLoadColumnFamiliesOnDemandValue();
302 for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) {
303 setAttribute(attr.getKey(), attr.getValue());
304 }
305 for (Map.Entry<byte[], TimeRange> entry : get.getColumnFamilyTimeRange().entrySet()) {
306 TimeRange tr = entry.getValue();
307 setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
308 }
309 this.mvccReadPoint = -1L;
310 setPriority(get.getPriority());
311 }
312
313 public boolean isGetScan() {
314 return includeStartRow && includeStopRow
315 && ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow);
316 }
317
318 /**
319 * Get all columns from the specified family.
320 * <p>
321 * Overrides previous calls to addColumn for this family.
322 * @param family family name
323 * @return this
324 */
325 public Scan addFamily(byte [] family) {
326 familyMap.remove(family);
327 familyMap.put(family, null);
328 return this;
329 }
330
331 /**
332 * Get the column from the specified family with the specified qualifier.
333 * <p>
334 * Overrides previous calls to addFamily for this family.
335 * @param family family name
336 * @param qualifier column qualifier
337 * @return this
338 */
339 public Scan addColumn(byte [] family, byte [] qualifier) {
340 NavigableSet<byte []> set = familyMap.get(family);
341 if(set == null) {
342 set = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
343 familyMap.put(family, set);
344 }
345 if (qualifier == null) {
346 qualifier = HConstants.EMPTY_BYTE_ARRAY;
347 }
348 set.add(qualifier);
349 return this;
350 }
351
352 /**
353 * Get versions of columns only within the specified timestamp range,
354 * [minStamp, maxStamp). Note, default maximum versions to return is 1. If
355 * your time range spans more than one version and you want all versions
356 * returned, up the number of versions beyond the default.
357 * @param minStamp minimum timestamp value, inclusive
358 * @param maxStamp maximum timestamp value, exclusive
359 * @see #setMaxVersions()
360 * @see #setMaxVersions(int)
361 * @return this
362 */
363 public Scan setTimeRange(long minStamp, long maxStamp) throws IOException {
364 tr = new TimeRange(minStamp, maxStamp);
365 return this;
366 }
367
368 /**
369 * Get versions of columns with the specified timestamp. Note, default maximum
370 * versions to return is 1. If your time range spans more than one version
371 * and you want all versions returned, up the number of versions beyond the
372 * defaut.
373 * @param timestamp version timestamp
374 * @see #setMaxVersions()
375 * @see #setMaxVersions(int)
376 * @return this
377 */
378 public Scan setTimeStamp(long timestamp)
379 throws IOException {
380 try {
381 tr = new TimeRange(timestamp, timestamp+1);
382 } catch(Exception e) {
383 // This should never happen, unless integer overflow or something extremely wrong...
384 LOG.error("TimeRange failed, likely caused by integer overflow. ", e);
385 throw e;
386 }
387 return this;
388 }
389
390 @Override public Scan setColumnFamilyTimeRange(byte[] cf, long minStamp, long maxStamp) {
391 return (Scan) super.setColumnFamilyTimeRange(cf, minStamp, maxStamp);
392 }
393
394
395 /**
396 * Set the start row of the scan.
397 * <p>
398 * If the specified row does not exist, the Scanner will start from the next closest row after the
399 * specified row.
400 * @param startRow row to start scanner at or after
401 * @return this
402 * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
403 * exceeds {@link HConstants#MAX_ROW_LENGTH})
404 * @deprecated use {@link #withStartRow(byte[])} instead. This method may change the inclusive of
405 * the stop row to keep compatible with the old behavior.
406 */
407 @Deprecated
408 public Scan setStartRow(byte[] startRow) {
409 withStartRow(startRow);
410 if (ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow)) {
411 // for keeping the old behavior that a scan with the same start and stop row is a get scan.
412 this.includeStopRow = true;
413 }
414 return this;
415 }
416
417 /**
418 * Set the start row of the scan.
419 * <p>
420 * If the specified row does not exist, the Scanner will start from the next closest row after the
421 * specified row.
422 * @param startRow row to start scanner at or after
423 * @return this
424 * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
425 * exceeds {@link HConstants#MAX_ROW_LENGTH})
426 */
427 public Scan withStartRow(byte[] startRow) {
428 return withStartRow(startRow, true);
429 }
430
431 /**
432 * Set the start row of the scan.
433 * <p>
434 * If the specified row does not exist, or the {@code inclusive} is {@code false}, the Scanner
435 * will start from the next closest row after the specified row.
436 * @param startRow row to start scanner at or after
437 * @param inclusive whether we should include the start row when scan
438 * @return this
439 * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
440 * exceeds {@link HConstants#MAX_ROW_LENGTH})
441 */
442 public Scan withStartRow(byte[] startRow, boolean inclusive) {
443 if (Bytes.len(startRow) > HConstants.MAX_ROW_LENGTH) {
444 throw new IllegalArgumentException("startRow's length must be less than or equal to "
445 + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
446 }
447 this.startRow = startRow;
448 this.includeStartRow = inclusive;
449 return this;
450 }
451
452 /**
453 * Set the stop row of the scan.
454 * <p>
455 * The scan will include rows that are lexicographically less than the provided stopRow.
456 * <p>
457 * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
458 * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
459 * </p>
460 * @param stopRow row to end at (exclusive)
461 * @return this
462 * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
463 * exceeds {@link HConstants#MAX_ROW_LENGTH})
464 * @deprecated use {@link #withStartRow(byte[])} instead. This method may change the inclusive of
465 * the stop row to keep compatible with the old behavior.
466 */
467 @Deprecated
468 public Scan setStopRow(byte[] stopRow) {
469 withStopRow(stopRow);
470 if (ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow)) {
471 // for keeping the old behavior that a scan with the same start and stop row is a get scan.
472 this.includeStopRow = true;
473 }
474 return this;
475 }
476
477 /**
478 * Set the stop row of the scan.
479 * <p>
480 * The scan will include rows that are lexicographically less than the provided stopRow.
481 * <p>
482 * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
483 * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
484 * </p>
485 * @param stopRow row to end at (exclusive)
486 * @return this
487 * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
488 * exceeds {@link HConstants#MAX_ROW_LENGTH})
489 */
490 public Scan withStopRow(byte[] stopRow) {
491 return withStopRow(stopRow, false);
492 }
493
494 /**
495 * Set the stop row of the scan.
496 * <p>
497 * The scan will include rows that are lexicographically less than (or equal to if
498 * {@code inclusive} is {@code true}) the provided stopRow.
499 * @param stopRow row to end at
500 * @param inclusive whether we should include the stop row when scan
501 * @return this
502 * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
503 * exceeds {@link HConstants#MAX_ROW_LENGTH})
504 */
505 public Scan withStopRow(byte[] stopRow, boolean inclusive) {
506 if (Bytes.len(stopRow) > HConstants.MAX_ROW_LENGTH) {
507 throw new IllegalArgumentException("stopRow's length must be less than or equal to "
508 + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
509 }
510 this.stopRow = stopRow;
511 this.includeStopRow = inclusive;
512 return this;
513 }
514
515 /**
516 * <p>Set a filter (using stopRow and startRow) so the result set only contains rows where the
517 * rowKey starts with the specified prefix.</p>
518 * <p>This is a utility method that converts the desired rowPrefix into the appropriate values
519 * for the startRow and stopRow to achieve the desired result.</p>
520 * <p>This can safely be used in combination with setFilter.</p>
521 * <p><b>NOTE: Doing a {@link #setStartRow(byte[])} and/or {@link #setStopRow(byte[])}
522 * after this method will yield undefined results.</b></p>
523 * @param rowPrefix the prefix all rows must start with. (Set <i>null</i> to remove the filter.)
524 * @return this
525 */
526 public Scan setRowPrefixFilter(byte[] rowPrefix) {
527 if (rowPrefix == null) {
528 setStartRow(HConstants.EMPTY_START_ROW);
529 setStopRow(HConstants.EMPTY_END_ROW);
530 } else {
531 this.setStartRow(rowPrefix);
532 this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
533 }
534 return this;
535 }
536
537 /**
538 * Get all available versions.
539 * @return this
540 */
541 public Scan setMaxVersions() {
542 this.maxVersions = Integer.MAX_VALUE;
543 return this;
544 }
545
546 /**
547 * Get up to the specified number of versions of each column.
548 * @param maxVersions maximum versions for each column
549 * @return this
550 */
551 public Scan setMaxVersions(int maxVersions) {
552 this.maxVersions = maxVersions;
553 return this;
554 }
555
556 /**
557 * Set the maximum number of cells to return for each call to next(). Callers should be aware
558 * that this is not equivalent to calling {@link #setAllowPartialResults(boolean)}.
559 * If you don't allow partial results, the number of cells in each Result must equal to your
560 * batch setting unless it is the last Result for current row. So this method is helpful in paging
561 * queries. If you just want to prevent OOM at client, use setAllowPartialResults(true) is better.
562 * @param batch the maximum number of values
563 * @see Result#mayHaveMoreCellsInRow()
564 */
565 public Scan setBatch(int batch) {
566 if (this.hasFilter() && this.filter.hasFilterRow()) {
567 throw new IncompatibleFilterException(
568 "Cannot set batch on a scan using a filter" +
569 " that returns true for filter.hasFilterRow");
570 }
571 this.batch = batch;
572 return this;
573 }
574
575 /**
576 * Set the maximum number of values to return per row per Column Family
577 * @param limit the maximum number of values returned / row / CF
578 */
579 public Scan setMaxResultsPerColumnFamily(int limit) {
580 this.storeLimit = limit;
581 return this;
582 }
583
584 /**
585 * Set offset for the row per Column Family.
586 * @param offset is the number of kvs that will be skipped.
587 */
588 public Scan setRowOffsetPerColumnFamily(int offset) {
589 this.storeOffset = offset;
590 return this;
591 }
592
593 /**
594 * Set the number of rows for caching that will be passed to scanners.
595 * If not set, the Configuration setting {@link HConstants#HBASE_CLIENT_SCANNER_CACHING} will
596 * apply.
597 * Higher caching values will enable faster scanners but will use more memory.
598 * @param caching the number of rows for caching
599 */
600 public Scan setCaching(int caching) {
601 this.caching = caching;
602 return this;
603 }
604
605 /**
606 * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)}
607 */
608 public long getMaxResultSize() {
609 return maxResultSize;
610 }
611
612 /**
613 * Set the maximum result size. The default is -1; this means that no specific
614 * maximum result size will be set for this scan, and the global configured
615 * value will be used instead. (Defaults to unlimited).
616 *
617 * @param maxResultSize The maximum result size in bytes.
618 */
619 public Scan setMaxResultSize(long maxResultSize) {
620 this.maxResultSize = maxResultSize;
621 return this;
622 }
623
624 @Override
625 public Scan setFilter(Filter filter) {
626 super.setFilter(filter);
627 return this;
628 }
629
630 /**
631 * Setting the familyMap
632 * @param familyMap map of family to qualifier
633 * @return this
634 */
635 public Scan setFamilyMap(Map<byte [], NavigableSet<byte []>> familyMap) {
636 this.familyMap = familyMap;
637 return this;
638 }
639
640 /**
641 * Getting the familyMap
642 * @return familyMap
643 */
644 public Map<byte [], NavigableSet<byte []>> getFamilyMap() {
645 return this.familyMap;
646 }
647
648 /**
649 * @return the number of families in familyMap
650 */
651 public int numFamilies() {
652 if(hasFamilies()) {
653 return this.familyMap.size();
654 }
655 return 0;
656 }
657
658 /**
659 * @return true if familyMap is non empty, false otherwise
660 */
661 public boolean hasFamilies() {
662 return !this.familyMap.isEmpty();
663 }
664
665 /**
666 * @return the keys of the familyMap
667 */
668 public byte[][] getFamilies() {
669 if(hasFamilies()) {
670 return this.familyMap.keySet().toArray(new byte[0][0]);
671 }
672 return null;
673 }
674
675 /**
676 * @return the startrow
677 */
678 public byte [] getStartRow() {
679 return this.startRow;
680 }
681
682 /**
683 * @return if we should include start row when scan
684 */
685 public boolean includeStartRow() {
686 return includeStartRow;
687 }
688
689 /**
690 * @return the stoprow
691 */
692 public byte[] getStopRow() {
693 return this.stopRow;
694 }
695
696 /**
697 * @return if we should include stop row when scan
698 */
699 public boolean includeStopRow() {
700 return includeStopRow;
701 }
702
703 /**
704 * @return the max number of versions to fetch
705 */
706 public int getMaxVersions() {
707 return this.maxVersions;
708 }
709
710 /**
711 * @return maximum number of values to return for a single call to next()
712 */
713 public int getBatch() {
714 return this.batch;
715 }
716
717 /**
718 * @return maximum number of values to return per row per CF
719 */
720 public int getMaxResultsPerColumnFamily() {
721 return this.storeLimit;
722 }
723
724 /**
725 * Method for retrieving the scan's offset per row per column
726 * family (#kvs to be skipped)
727 * @return row offset
728 */
729 public int getRowOffsetPerColumnFamily() {
730 return this.storeOffset;
731 }
732
733 /**
734 * @return caching the number of rows fetched when calling next on a scanner
735 */
736 public int getCaching() {
737 return this.caching;
738 }
739
740 /**
741 * @return TimeRange
742 */
743 public TimeRange getTimeRange() {
744 return this.tr;
745 }
746
747 /**
748 * @return RowFilter
749 */
750 @Override
751 public Filter getFilter() {
752 return filter;
753 }
754
755 /**
756 * @return true is a filter has been specified, false if not
757 */
758 public boolean hasFilter() {
759 return filter != null;
760 }
761
762 /**
763 * Set whether blocks should be cached for this Scan.
764 * <p>
765 * This is true by default. When true, default settings of the table and
766 * family are used (this will never override caching blocks if the block
767 * cache is disabled for that family or entirely).
768 *
769 * @param cacheBlocks if false, default settings are overridden and blocks
770 * will not be cached
771 */
772 public Scan setCacheBlocks(boolean cacheBlocks) {
773 this.cacheBlocks = cacheBlocks;
774 return this;
775 }
776
777 /**
778 * Get whether blocks should be cached for this Scan.
779 * @return true if default caching should be used, false if blocks should not
780 * be cached
781 */
782 public boolean getCacheBlocks() {
783 return cacheBlocks;
784 }
785
786 /**
787 * Set whether this scan is a reversed one
788 * <p>
789 * This is false by default which means forward(normal) scan.
790 *
791 * @param reversed if true, scan will be backward order
792 * @return this
793 */
794 public Scan setReversed(boolean reversed) {
795 this.reversed = reversed;
796 return this;
797 }
798
799 /**
800 * Get whether this scan is a reversed one.
801 * @return true if backward scan, false if forward(default) scan
802 */
803 public boolean isReversed() {
804 return reversed;
805 }
806
807 /**
808 * Setting whether the caller wants to see the partial results when server returns
809 * less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client.
810 * By default this value is false and the complete results will be assembled client side
811 * before being delivered to the caller.
812 * @param allowPartialResults
813 * @return this
814 * @see Result#mayHaveMoreCellsInRow()
815 * @see #setBatch(int)
816 */
817 public Scan setAllowPartialResults(final boolean allowPartialResults) {
818 this.allowPartialResults = allowPartialResults;
819 return this;
820 }
821
822 /**
823 * @return true when the constructor of this scan understands that the results they will see may
824 * only represent a partial portion of a row. The entire row would be retrieved by
825 * subsequent calls to {@link ResultScanner#next()}
826 */
827 public boolean getAllowPartialResults() {
828 return allowPartialResults;
829 }
830
831 @Override
832 public Scan setLoadColumnFamiliesOnDemand(boolean value) {
833 return (Scan) super.setLoadColumnFamiliesOnDemand(value);
834 }
835
836 /**
837 * Compile the table and column family (i.e. schema) information
838 * into a String. Useful for parsing and aggregation by debugging,
839 * logging, and administration tools.
840 * @return Map
841 */
842 @Override
843 public Map<String, Object> getFingerprint() {
844 Map<String, Object> map = new HashMap<String, Object>();
845 List<String> families = new ArrayList<String>();
846 if(this.familyMap.size() == 0) {
847 map.put("families", "ALL");
848 return map;
849 } else {
850 map.put("families", families);
851 }
852 for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
853 this.familyMap.entrySet()) {
854 families.add(Bytes.toStringBinary(entry.getKey()));
855 }
856 return map;
857 }
858
859 /**
860 * Compile the details beyond the scope of getFingerprint (row, columns,
861 * timestamps, etc.) into a Map along with the fingerprinted information.
862 * Useful for debugging, logging, and administration tools.
863 * @param maxCols a limit on the number of columns output prior to truncation
864 * @return Map
865 */
866 @Override
867 public Map<String, Object> toMap(int maxCols) {
868 // start with the fingerpring map and build on top of it
869 Map<String, Object> map = getFingerprint();
870 // map from families to column list replaces fingerprint's list of families
871 Map<String, List<String>> familyColumns =
872 new HashMap<String, List<String>>();
873 map.put("families", familyColumns);
874 // add scalar information first
875 map.put("startRow", Bytes.toStringBinary(this.startRow));
876 map.put("stopRow", Bytes.toStringBinary(this.stopRow));
877 map.put("maxVersions", this.maxVersions);
878 map.put("batch", this.batch);
879 map.put("caching", this.caching);
880 map.put("maxResultSize", this.maxResultSize);
881 map.put("cacheBlocks", this.cacheBlocks);
882 map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand);
883 List<Long> timeRange = new ArrayList<Long>();
884 timeRange.add(this.tr.getMin());
885 timeRange.add(this.tr.getMax());
886 map.put("timeRange", timeRange);
887 int colCount = 0;
888 // iterate through affected families and list out up to maxCols columns
889 for (Map.Entry<byte [], NavigableSet<byte[]>> entry :
890 this.familyMap.entrySet()) {
891 List<String> columns = new ArrayList<String>();
892 familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
893 if(entry.getValue() == null) {
894 colCount++;
895 --maxCols;
896 columns.add("ALL");
897 } else {
898 colCount += entry.getValue().size();
899 if (maxCols <= 0) {
900 continue;
901 }
902 for (byte [] column : entry.getValue()) {
903 if (--maxCols <= 0) {
904 continue;
905 }
906 columns.add(Bytes.toStringBinary(column));
907 }
908 }
909 }
910 map.put("totalColumns", colCount);
911 if (this.filter != null) {
912 map.put("filter", this.filter.toString());
913 }
914 // add the id if set
915 if (getId() != null) {
916 map.put("id", getId());
917 }
918 return map;
919 }
920
921 /**
922 * Enable/disable "raw" mode for this scan.
923 * If "raw" is enabled the scan will return all
924 * delete marker and deleted rows that have not
925 * been collected, yet.
926 * This is mostly useful for Scan on column families
927 * that have KEEP_DELETED_ROWS enabled.
928 * It is an error to specify any column when "raw" is set.
929 * @param raw True/False to enable/disable "raw" mode.
930 */
931 public Scan setRaw(boolean raw) {
932 setAttribute(RAW_ATTR, Bytes.toBytes(raw));
933 return this;
934 }
935
936 /**
937 * @return True if this Scan is in "raw" mode.
938 */
939 public boolean isRaw() {
940 byte[] attr = getAttribute(RAW_ATTR);
941 return attr == null ? false : Bytes.toBoolean(attr);
942 }
943
944
945
946 /**
947 * Set whether this scan is a small scan
948 * <p>
949 * Small scan should use pread and big scan can use seek + read
950 *
951 * seek + read is fast but can cause two problem (1) resource contention (2)
952 * cause too much network io
953 *
954 * [89-fb] Using pread for non-compaction read request
955 * https://issues.apache.org/jira/browse/HBASE-7266
956 *
957 * On the other hand, if setting it true, we would do
958 * openScanner,next,closeScanner in one RPC call. It means the better
959 * performance for small scan. [HBASE-9488].
960 *
961 * Generally, if the scan range is within one data block(64KB), it could be
962 * considered as a small scan.
963 *
964 * @param small
965 */
966 public Scan setSmall(boolean small) {
967 this.small = small;
968 return this;
969 }
970
971 /**
972 * Get whether this scan is a small scan
973 * @return true if small scan
974 */
975 public boolean isSmall() {
976 return small;
977 }
978
979 @Override
980 public Scan setAttribute(String name, byte[] value) {
981 return (Scan) super.setAttribute(name, value);
982 }
983
984 @Override
985 public Scan setId(String id) {
986 return (Scan) super.setId(id);
987 }
988
989 @Override
990 public Scan setAuthorizations(Authorizations authorizations) {
991 return (Scan) super.setAuthorizations(authorizations);
992 }
993
994 @Override
995 public Scan setACL(Map<String, Permission> perms) {
996 return (Scan) super.setACL(perms);
997 }
998
999 @Override
1000 public Scan setACL(String user, Permission perms) {
1001 return (Scan) super.setACL(user, perms);
1002 }
1003
1004 @Override
1005 public Scan setConsistency(Consistency consistency) {
1006 return (Scan) super.setConsistency(consistency);
1007 }
1008
1009 @Override
1010 public Scan setReplicaId(int Id) {
1011 return (Scan) super.setReplicaId(Id);
1012 }
1013
1014 @Override
1015 public Scan setIsolationLevel(IsolationLevel level) {
1016 return (Scan) super.setIsolationLevel(level);
1017 }
1018
1019 @Override
1020 public Scan setPriority(int priority) {
1021 return (Scan) super.setPriority(priority);
1022 }
1023
1024 /**
1025 * Utility that creates a Scan that will do a small scan in reverse from passed row
1026 * looking for next closest row.
1027 * @param row
1028 * @param family
1029 * @return An instance of Scan primed with passed <code>row</code> and <code>family</code> to
1030 * scan in reverse for one row only.
1031 */
1032 static Scan createGetClosestRowOrBeforeReverseScan(byte[] row) {
1033 // Below does not work if you add in family; need to add the family qualifier that is highest
1034 // possible family qualifier. Do we have such a notion? Would have to be magic.
1035 Scan scan = new Scan(row);
1036 scan.setSmall(true);
1037 scan.setReversed(true);
1038 scan.setCaching(1);
1039 return scan;
1040 }
1041
1042 /**
1043 * Enable collection of {@link ScanMetrics}. For advanced users.
1044 * @param enabled Set to true to enable accumulating scan metrics
1045 */
1046 public Scan setScanMetricsEnabled(final boolean enabled) {
1047 setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.valueOf(enabled)));
1048 return this;
1049 }
1050
1051 /**
1052 * @return True if collection of scan metrics is enabled. For advanced users.
1053 */
1054 public boolean isScanMetricsEnabled() {
1055 byte[] attr = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
1056 return attr == null ? false : Bytes.toBoolean(attr);
1057 }
1058
1059 /**
1060 * @return Metrics on this Scan, if metrics were enabled.
1061 * @see #setScanMetricsEnabled(boolean)
1062 * @deprecated Use {@link ResultScanner#getScanMetrics()} instead. And notice that, please do not
1063 * use this method and {@link ResultScanner#getScanMetrics()} together, the metrics
1064 * will be messed up.
1065 */
1066 @Deprecated
1067 public ScanMetrics getScanMetrics() {
1068 byte[] bytes = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA);
1069 if (bytes == null) return null;
1070 return ProtobufUtil.toScanMetrics(bytes);
1071 }
1072
1073 /**
1074 * @return the limit of rows for this scan
1075 */
1076 public int getLimit() {
1077 return limit;
1078 }
1079
1080 /**
1081 * Set the limit of rows for this scan. We will terminate the scan if the number of returned rows
1082 * reaches this value.
1083 * <p>
1084 * This condition will be tested at last, after all other conditions such as stopRow, filter, etc.
1085 * @param limit the limit of rows for this scan
1086 * @return this
1087 */
1088 public Scan setLimit(int limit) {
1089 this.limit = limit;
1090 return this;
1091 }
1092
1093 /**
1094 * Call this when you only want to get one row. It will set {@code limit} to {@code 1}, and also
1095 * set {@code readType} to {@link ReadType#PREAD}.
1096 * @return this
1097 */
1098 public Scan setOneRowLimit() {
1099 return setLimit(1).setReadType(ReadType.PREAD);
1100 }
1101
1102 @InterfaceAudience.Public
1103 @InterfaceStability.Unstable
1104 public enum ReadType {
1105 DEFAULT, STREAM, PREAD
1106 }
1107
1108 /**
1109 * @return the read type for this scan
1110 */
1111 public ReadType getReadType() {
1112 return readType;
1113 }
1114
1115 /**
1116 * Set the read type for this scan.
1117 * <p>
1118 * Notice that we may choose to use pread even if you specific {@link ReadType#STREAM} here. For
1119 * example, we will always use pread if this is a get scan.
1120 * @return this
1121 */
1122 public Scan setReadType(ReadType readType) {
1123 this.readType = readType;
1124 return this;
1125 }
1126
1127 /**
1128 * Get the mvcc read point used to open a scanner.
1129 */
1130 long getMvccReadPoint() {
1131 return mvccReadPoint;
1132 }
1133
1134 /**
1135 * Set the mvcc read point used to open a scanner.
1136 */
1137 Scan setMvccReadPoint(long mvccReadPoint) {
1138 this.mvccReadPoint = mvccReadPoint;
1139 return this;
1140 }
1141
1142 /**
1143 * Set the mvcc read point to -1 which means do not use it.
1144 */
1145 Scan resetMvccReadPoint() {
1146 return setMvccReadPoint(-1L);
1147 }
1148
1149 /**
1150 * When the server is slow or we scan a table with many deleted data or we use a sparse filter,
1151 * the server will response heartbeat to prevent timeout. However the scanner will return a Result
1152 * only when client can do it. So if there are many heartbeats, the blocking time on
1153 * ResultScanner#next() may be very long, which is not friendly to online services.
1154 *
1155 * Set this to true then you can get a special Result whose #isCursor() returns true and is not
1156 * contains any real data. It only tells you where the server has scanned. You can call next
1157 * to continue scanning or open a new scanner with this row key as start row whenever you want.
1158 *
1159 * Users can get a cursor when and only when there is a response from the server but we can not
1160 * return a Result to users, for example, this response is a heartbeat or there are partial cells
1161 * but users do not allow partial result.
1162 *
1163 * Now the cursor is in row level which means the special Result will only contains a row key.
1164 * {@link Result#isCursor()}
1165 * {@link Result#getCursor()}
1166 * {@link Cursor}
1167 */
1168 public Scan setNeedCursorResult(boolean needCursorResult) {
1169 this.needCursorResult = needCursorResult;
1170 return this;
1171 }
1172
1173 public boolean isNeedCursorResult() {
1174 return needCursorResult;
1175 }
1176
1177 /**
1178 * Create a new Scan with a cursor. It only set the position information like start row key.
1179 * The others (like cfs, stop row, limit) should still be filled in by the user.
1180 * {@link Result#isCursor()}
1181 * {@link Result#getCursor()}
1182 * {@link Cursor}
1183 */
1184 public static Scan createScanFromCursor(Cursor cursor) {
1185 return new Scan().withStartRow(cursor.getRow());
1186 }
1187 }