001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.HashMap;
023import java.util.List;
024import java.util.Map;
025import java.util.NavigableSet;
026import java.util.TreeMap;
027import java.util.TreeSet;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
030import org.apache.hadoop.hbase.filter.Filter;
031import org.apache.hadoop.hbase.filter.IncompatibleFilterException;
032import org.apache.hadoop.hbase.io.TimeRange;
033import org.apache.hadoop.hbase.security.access.Permission;
034import org.apache.hadoop.hbase.security.visibility.Authorizations;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
041
042/**
043 * Used to perform Scan operations.
044 * <p>
045 * All operations are identical to {@link Get} with the exception of instantiation. Rather than
046 * specifying a single row, an optional startRow and stopRow may be defined. If rows are not
047 * specified, the Scanner will iterate over all rows.
048 * <p>
049 * To get all columns from all rows of a Table, create an instance with no constraints; use the
050 * {@link #Scan()} constructor. To constrain the scan to specific column families, call
051 * {@link #addFamily(byte[]) addFamily} for each family to retrieve on your Scan instance.
052 * <p>
053 * To get specific columns, call {@link #addColumn(byte[], byte[]) addColumn} for each column to
054 * retrieve.
055 * <p>
056 * To only retrieve columns within a specific range of version timestamps, call
057 * {@link #setTimeRange(long, long) setTimeRange}.
058 * <p>
059 * To only retrieve columns with a specific timestamp, call {@link #setTimestamp(long) setTimestamp}
060 * .
061 * <p>
062 * To limit the number of versions of each column to be returned, call {@link #setMaxVersions(int)
063 * setMaxVersions}.
064 * <p>
065 * To limit the maximum number of values returned for each call to next(), call
066 * {@link #setBatch(int) setBatch}.
067 * <p>
068 * To add a filter, call {@link #setFilter(org.apache.hadoop.hbase.filter.Filter) setFilter}.
069 * <p>
070 * For small scan, it is deprecated in 2.0.0. Now we have a {@link #setLimit(int)} method in Scan
071 * object which is used to tell RS how many rows we want. If the rows return reaches the limit, the
072 * RS will close the RegionScanner automatically. And we will also fetch data when openScanner in
073 * the new implementation, this means we can also finish a scan operation in one rpc call. And we
074 * have also introduced a {@link #setReadType(ReadType)} method. You can use this method to tell RS
075 * to use pread explicitly.
076 * <p>
077 * Expert: To explicitly disable server-side block caching for this scan, execute
078 * {@link #setCacheBlocks(boolean)}.
079 * <p>
080 * <em>Note:</em> Usage alters Scan instances. Internally, attributes are updated as the Scan runs
081 * and if enabled, metrics accumulate in the Scan instance. Be aware this is the case when you go to
082 * clone a Scan instance or if you go to reuse a created Scan instance; safer is create a Scan
083 * instance per usage.
084 */
085@InterfaceAudience.Public
086public class Scan extends Query {
087  private static final Logger LOG = LoggerFactory.getLogger(Scan.class);
088
089  private static final String RAW_ATTR = "_raw_";
090
091  private byte[] startRow = HConstants.EMPTY_START_ROW;
092  private boolean includeStartRow = true;
093  private byte[] stopRow = HConstants.EMPTY_END_ROW;
094  private boolean includeStopRow = false;
095  private int maxVersions = 1;
096  private int batch = -1;
097
098  /**
099   * Partial {@link Result}s are {@link Result}s must be combined to form a complete {@link Result}.
100   * The {@link Result}s had to be returned in fragments (i.e. as partials) because the size of the
101   * cells in the row exceeded max result size on the server. Typically partial results will be
102   * combined client side into complete results before being delivered to the caller. However, if
103   * this flag is set, the caller is indicating that they do not mind seeing partial results (i.e.
104   * they understand that the results returned from the Scanner may only represent part of a
105   * particular row). In such a case, any attempt to combine the partials into a complete result on
106   * the client side will be skipped, and the caller will be able to see the exact results returned
107   * from the server.
108   */
109  private boolean allowPartialResults = false;
110
111  private int storeLimit = -1;
112  private int storeOffset = 0;
113
114  /**
115   * @deprecated since 1.0.0. Use {@link #setScanMetricsEnabled(boolean)}
116   */
117  // Make private or remove.
118  @Deprecated
119  static public final String SCAN_ATTRIBUTES_METRICS_ENABLE = "scan.attributes.metrics.enable";
120
121  /**
122   * Use {@link #getScanMetrics()}
123   */
124  // Make this private or remove.
125  @Deprecated
126  static public final String SCAN_ATTRIBUTES_METRICS_DATA = "scan.attributes.metrics.data";
127
128  // If an application wants to use multiple scans over different tables each scan must
129  // define this attribute with the appropriate table name by calling
130  // scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName))
131  static public final String SCAN_ATTRIBUTES_TABLE_NAME = "scan.attributes.table.name";
132
133  /**
134   * -1 means no caching specified and the value of {@link HConstants#HBASE_CLIENT_SCANNER_CACHING}
135   * (default to {@link HConstants#DEFAULT_HBASE_CLIENT_SCANNER_CACHING}) will be used
136   */
137  private int caching = -1;
138  private long maxResultSize = -1;
139  private boolean cacheBlocks = true;
140  private boolean reversed = false;
141  private TimeRange tr = TimeRange.allTime();
142  private Map<byte[], NavigableSet<byte[]>> familyMap =
143    new TreeMap<byte[], NavigableSet<byte[]>>(Bytes.BYTES_COMPARATOR);
144  private Boolean asyncPrefetch = null;
145
146  /**
147   * Parameter name for client scanner sync/async prefetch toggle. When using async scanner,
148   * prefetching data from the server is done at the background. The parameter currently won't have
149   * any effect in the case that the user has set Scan#setSmall or Scan#setReversed
150   */
151  public static final String HBASE_CLIENT_SCANNER_ASYNC_PREFETCH =
152    "hbase.client.scanner.async.prefetch";
153
154  /**
155   * Default value of {@link #HBASE_CLIENT_SCANNER_ASYNC_PREFETCH}.
156   */
157  public static final boolean DEFAULT_HBASE_CLIENT_SCANNER_ASYNC_PREFETCH = false;
158
159  /**
160   * Set it true for small scan to get better performance Small scan should use pread and big scan
161   * can use seek + read seek + read is fast but can cause two problem (1) resource contention (2)
162   * cause too much network io [89-fb] Using pread for non-compaction read request
163   * https://issues.apache.org/jira/browse/HBASE-7266 On the other hand, if setting it true, we
164   * would do openScanner,next,closeScanner in one RPC call. It means the better performance for
165   * small scan. [HBASE-9488]. Generally, if the scan range is within one data block(64KB), it could
166   * be considered as a small scan.
167   */
168  private boolean small = false;
169
170  /**
171   * The mvcc read point to use when open a scanner. Remember to clear it after switching regions as
172   * the mvcc is only valid within region scope.
173   */
174  private long mvccReadPoint = -1L;
175
176  /**
177   * The number of rows we want for this scan. We will terminate the scan if the number of return
178   * rows reaches this value.
179   */
180  private int limit = -1;
181
182  /**
183   * Control whether to use pread at server side.
184   */
185  private ReadType readType = ReadType.DEFAULT;
186
187  private boolean needCursorResult = false;
188
189  /**
190   * Create a Scan operation across all rows.
191   */
192  public Scan() {
193  }
194
195  /**
196   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use
197   *             {@code new Scan().withStartRow(startRow).setFilter(filter)} instead.
198   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
199   */
200  @Deprecated
201  public Scan(byte[] startRow, Filter filter) {
202    this(startRow);
203    this.filter = filter;
204  }
205
206  /**
207   * Create a Scan operation starting at the specified row.
208   * <p>
209   * If the specified row does not exist, the Scanner will start from the next closest row after the
210   * specified row.
211   * @param startRow row to start scanner at or after
212   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use
213   *             {@code new Scan().withStartRow(startRow)} instead.
214   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
215   */
216  @Deprecated
217  public Scan(byte[] startRow) {
218    setStartRow(startRow);
219  }
220
221  /**
222   * Create a Scan operation for the range of rows specified.
223   * @param startRow row to start scanner at or after (inclusive)
224   * @param stopRow  row to stop scanner before (exclusive)
225   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use
226   *             {@code new Scan().withStartRow(startRow).withStopRow(stopRow)} instead.
227   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
228   */
229  @Deprecated
230  public Scan(byte[] startRow, byte[] stopRow) {
231    setStartRow(startRow);
232    setStopRow(stopRow);
233  }
234
235  /**
236   * Creates a new instance of this class while copying all values.
237   * @param scan The scan instance to copy from.
238   * @throws IOException When copying the values fails.
239   */
240  public Scan(Scan scan) throws IOException {
241    startRow = scan.getStartRow();
242    includeStartRow = scan.includeStartRow();
243    stopRow = scan.getStopRow();
244    includeStopRow = scan.includeStopRow();
245    maxVersions = scan.getMaxVersions();
246    batch = scan.getBatch();
247    storeLimit = scan.getMaxResultsPerColumnFamily();
248    storeOffset = scan.getRowOffsetPerColumnFamily();
249    caching = scan.getCaching();
250    maxResultSize = scan.getMaxResultSize();
251    cacheBlocks = scan.getCacheBlocks();
252    filter = scan.getFilter(); // clone?
253    loadColumnFamiliesOnDemand = scan.getLoadColumnFamiliesOnDemandValue();
254    consistency = scan.getConsistency();
255    this.setIsolationLevel(scan.getIsolationLevel());
256    reversed = scan.isReversed();
257    asyncPrefetch = scan.isAsyncPrefetch();
258    small = scan.isSmall();
259    allowPartialResults = scan.getAllowPartialResults();
260    tr = scan.getTimeRange(); // TimeRange is immutable
261    Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
262    for (Map.Entry<byte[], NavigableSet<byte[]>> entry : fams.entrySet()) {
263      byte[] fam = entry.getKey();
264      NavigableSet<byte[]> cols = entry.getValue();
265      if (cols != null && cols.size() > 0) {
266        for (byte[] col : cols) {
267          addColumn(fam, col);
268        }
269      } else {
270        addFamily(fam);
271      }
272    }
273    for (Map.Entry<String, byte[]> attr : scan.getAttributesMap().entrySet()) {
274      setAttribute(attr.getKey(), attr.getValue());
275    }
276    for (Map.Entry<byte[], TimeRange> entry : scan.getColumnFamilyTimeRange().entrySet()) {
277      TimeRange tr = entry.getValue();
278      setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
279    }
280    this.mvccReadPoint = scan.getMvccReadPoint();
281    this.limit = scan.getLimit();
282    this.needCursorResult = scan.isNeedCursorResult();
283    setPriority(scan.getPriority());
284    readType = scan.getReadType();
285    super.setReplicaId(scan.getReplicaId());
286  }
287
288  /**
289   * Builds a scan object with the same specs as get.
290   * @param get get to model scan after
291   */
292  public Scan(Get get) {
293    this.startRow = get.getRow();
294    this.includeStartRow = true;
295    this.stopRow = get.getRow();
296    this.includeStopRow = true;
297    this.filter = get.getFilter();
298    this.cacheBlocks = get.getCacheBlocks();
299    this.maxVersions = get.getMaxVersions();
300    this.storeLimit = get.getMaxResultsPerColumnFamily();
301    this.storeOffset = get.getRowOffsetPerColumnFamily();
302    this.tr = get.getTimeRange();
303    this.familyMap = get.getFamilyMap();
304    this.asyncPrefetch = false;
305    this.consistency = get.getConsistency();
306    this.setIsolationLevel(get.getIsolationLevel());
307    this.loadColumnFamiliesOnDemand = get.getLoadColumnFamiliesOnDemandValue();
308    for (Map.Entry<String, byte[]> attr : get.getAttributesMap().entrySet()) {
309      setAttribute(attr.getKey(), attr.getValue());
310    }
311    for (Map.Entry<byte[], TimeRange> entry : get.getColumnFamilyTimeRange().entrySet()) {
312      TimeRange tr = entry.getValue();
313      setColumnFamilyTimeRange(entry.getKey(), tr.getMin(), tr.getMax());
314    }
315    this.mvccReadPoint = -1L;
316    setPriority(get.getPriority());
317    super.setReplicaId(get.getReplicaId());
318  }
319
320  public boolean isGetScan() {
321    return includeStartRow && includeStopRow
322      && ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow);
323  }
324
325  /**
326   * Get all columns from the specified family.
327   * <p>
328   * Overrides previous calls to addColumn for this family.
329   * @param family family name
330   */
331  public Scan addFamily(byte[] family) {
332    familyMap.remove(family);
333    familyMap.put(family, null);
334    return this;
335  }
336
337  /**
338   * Get the column from the specified family with the specified qualifier.
339   * <p>
340   * Overrides previous calls to addFamily for this family.
341   * @param family    family name
342   * @param qualifier column qualifier
343   */
344  public Scan addColumn(byte[] family, byte[] qualifier) {
345    NavigableSet<byte[]> set = familyMap.get(family);
346    if (set == null) {
347      set = new TreeSet<>(Bytes.BYTES_COMPARATOR);
348      familyMap.put(family, set);
349    }
350    if (qualifier == null) {
351      qualifier = HConstants.EMPTY_BYTE_ARRAY;
352    }
353    set.add(qualifier);
354    return this;
355  }
356
357  /**
358   * Get versions of columns only within the specified timestamp range, [minStamp, maxStamp). Note,
359   * default maximum versions to return is 1. If your time range spans more than one version and you
360   * want all versions returned, up the number of versions beyond the default.
361   * @param minStamp minimum timestamp value, inclusive
362   * @param maxStamp maximum timestamp value, exclusive
363   * @see #setMaxVersions()
364   * @see #setMaxVersions(int)
365   */
366  public Scan setTimeRange(long minStamp, long maxStamp) throws IOException {
367    tr = new TimeRange(minStamp, maxStamp);
368    return this;
369  }
370
371  /**
372   * Get versions of columns with the specified timestamp. Note, default maximum versions to return
373   * is 1. If your time range spans more than one version and you want all versions returned, up the
374   * number of versions beyond the defaut.
375   * @param timestamp version timestamp
376   * @see #setMaxVersions()
377   * @see #setMaxVersions(int)
378   * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0. Use
379   *             {@link #setTimestamp(long)} instead
380   */
381  @Deprecated
382  public Scan setTimeStamp(long timestamp) throws IOException {
383    return this.setTimestamp(timestamp);
384  }
385
386  /**
387   * Get versions of columns with the specified timestamp. Note, default maximum versions to return
388   * is 1. If your time range spans more than one version and you want all versions returned, up the
389   * number of versions beyond the defaut.
390   * @param timestamp version timestamp
391   * @see #setMaxVersions()
392   * @see #setMaxVersions(int)
393   */
394  public Scan setTimestamp(long timestamp) {
395    try {
396      tr = new TimeRange(timestamp, timestamp + 1);
397    } catch (Exception e) {
398      // This should never happen, unless integer overflow or something extremely wrong...
399      LOG.error("TimeRange failed, likely caused by integer overflow. ", e);
400      throw e;
401    }
402
403    return this;
404  }
405
406  @Override
407  public Scan setColumnFamilyTimeRange(byte[] cf, long minStamp, long maxStamp) {
408    return (Scan) super.setColumnFamilyTimeRange(cf, minStamp, maxStamp);
409  }
410
411  /**
412   * Set the start row of the scan.
413   * <p>
414   * If the specified row does not exist, the Scanner will start from the next closest row after the
415   * specified row.
416   * @param startRow row to start scanner at or after
417   * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
418   *                                  exceeds {@link HConstants#MAX_ROW_LENGTH})
419   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #withStartRow(byte[])}
420   *             instead. This method may change the inclusive of the stop row to keep compatible
421   *             with the old behavior.
422   * @see #withStartRow(byte[])
423   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
424   */
425  @Deprecated
426  public Scan setStartRow(byte[] startRow) {
427    withStartRow(startRow);
428    if (ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow)) {
429      // for keeping the old behavior that a scan with the same start and stop row is a get scan.
430      this.includeStopRow = true;
431    }
432    return this;
433  }
434
435  /**
436   * Set the start row of the scan.
437   * <p>
438   * If the specified row does not exist, the Scanner will start from the next closest row after the
439   * specified row.
440   * @param startRow row to start scanner at or after
441   * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
442   *                                  exceeds {@link HConstants#MAX_ROW_LENGTH})
443   */
444  public Scan withStartRow(byte[] startRow) {
445    return withStartRow(startRow, true);
446  }
447
448  /**
449   * Set the start row of the scan.
450   * <p>
451   * If the specified row does not exist, or the {@code inclusive} is {@code false}, the Scanner
452   * will start from the next closest row after the specified row.
453   * @param startRow  row to start scanner at or after
454   * @param inclusive whether we should include the start row when scan
455   * @throws IllegalArgumentException if startRow does not meet criteria for a row key (when length
456   *                                  exceeds {@link HConstants#MAX_ROW_LENGTH})
457   */
458  public Scan withStartRow(byte[] startRow, boolean inclusive) {
459    if (Bytes.len(startRow) > HConstants.MAX_ROW_LENGTH) {
460      throw new IllegalArgumentException("startRow's length must be less than or equal to "
461        + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
462    }
463    this.startRow = startRow;
464    this.includeStartRow = inclusive;
465    return this;
466  }
467
468  /**
469   * Set the stop row of the scan.
470   * <p>
471   * The scan will include rows that are lexicographically less than the provided stopRow.
472   * <p>
473   * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
474   * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
475   * </p>
476   * @param stopRow row to end at (exclusive)
477   * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
478   *                                  exceeds {@link HConstants#MAX_ROW_LENGTH})
479   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #withStopRow(byte[])} instead.
480   *             This method may change the inclusive of the stop row to keep compatible with the
481   *             old behavior.
482   * @see #withStopRow(byte[])
483   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17320">HBASE-17320</a>
484   */
485  @Deprecated
486  public Scan setStopRow(byte[] stopRow) {
487    withStopRow(stopRow);
488    if (ClientUtil.areScanStartRowAndStopRowEqual(this.startRow, this.stopRow)) {
489      // for keeping the old behavior that a scan with the same start and stop row is a get scan.
490      this.includeStopRow = true;
491    }
492    return this;
493  }
494
495  /**
496   * Set the stop row of the scan.
497   * <p>
498   * The scan will include rows that are lexicographically less than the provided stopRow.
499   * <p>
500   * <b>Note:</b> When doing a filter for a rowKey <u>Prefix</u> use
501   * {@link #setRowPrefixFilter(byte[])}. The 'trailing 0' will not yield the desired result.
502   * </p>
503   * @param stopRow row to end at (exclusive)
504   * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
505   *                                  exceeds {@link HConstants#MAX_ROW_LENGTH})
506   */
507  public Scan withStopRow(byte[] stopRow) {
508    return withStopRow(stopRow, false);
509  }
510
511  /**
512   * Set the stop row of the scan.
513   * <p>
514   * The scan will include rows that are lexicographically less than (or equal to if
515   * {@code inclusive} is {@code true}) the provided stopRow.
516   * @param stopRow   row to end at
517   * @param inclusive whether we should include the stop row when scan
518   * @throws IllegalArgumentException if stopRow does not meet criteria for a row key (when length
519   *                                  exceeds {@link HConstants#MAX_ROW_LENGTH})
520   */
521  public Scan withStopRow(byte[] stopRow, boolean inclusive) {
522    if (Bytes.len(stopRow) > HConstants.MAX_ROW_LENGTH) {
523      throw new IllegalArgumentException("stopRow's length must be less than or equal to "
524        + HConstants.MAX_ROW_LENGTH + " to meet the criteria" + " for a row key.");
525    }
526    this.stopRow = stopRow;
527    this.includeStopRow = inclusive;
528    return this;
529  }
530
531  /**
532   * <p>
533   * Set a filter (using stopRow and startRow) so the result set only contains rows where the rowKey
534   * starts with the specified prefix.
535   * </p>
536   * <p>
537   * This is a utility method that converts the desired rowPrefix into the appropriate values for
538   * the startRow and stopRow to achieve the desired result.
539   * </p>
540   * <p>
541   * This can safely be used in combination with setFilter.
542   * </p>
543   * <p>
544   * <b>NOTE: Doing a {@link #setStartRow(byte[])} and/or {@link #setStopRow(byte[])} after this
545   * method will yield undefined results.</b>
546   * </p>
547   * @param rowPrefix the prefix all rows must start with. (Set <i>null</i> to remove the filter.)
548   */
549  public Scan setRowPrefixFilter(byte[] rowPrefix) {
550    if (rowPrefix == null) {
551      setStartRow(HConstants.EMPTY_START_ROW);
552      setStopRow(HConstants.EMPTY_END_ROW);
553    } else {
554      this.setStartRow(rowPrefix);
555      this.setStopRow(ClientUtil.calculateTheClosestNextRowKeyForPrefix(rowPrefix));
556    }
557    return this;
558  }
559
560  /**
561   * Get all available versions.
562   * @deprecated since 2.0.0 and will be removed in 3.0.0. It is easy to misunderstand with column
563   *             family's max versions, so use {@link #readAllVersions()} instead.
564   * @see #readAllVersions()
565   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17125">HBASE-17125</a>
566   */
567  @Deprecated
568  public Scan setMaxVersions() {
569    return readAllVersions();
570  }
571
572  /**
573   * Get up to the specified number of versions of each column.
574   * @param maxVersions maximum versions for each column
575   * @deprecated since 2.0.0 and will be removed in 3.0.0. It is easy to misunderstand with column
576   *             family's max versions, so use {@link #readVersions(int)} instead.
577   * @see #readVersions(int)
578   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17125">HBASE-17125</a>
579   */
580  @Deprecated
581  public Scan setMaxVersions(int maxVersions) {
582    return readVersions(maxVersions);
583  }
584
585  /**
586   * Get all available versions.
587   */
588  public Scan readAllVersions() {
589    this.maxVersions = Integer.MAX_VALUE;
590    return this;
591  }
592
593  /**
594   * Get up to the specified number of versions of each column.
595   * @param versions specified number of versions for each column
596   */
597  public Scan readVersions(int versions) {
598    this.maxVersions = versions;
599    return this;
600  }
601
602  /**
603   * Set the maximum number of cells to return for each call to next(). Callers should be aware that
604   * this is not equivalent to calling {@link #setAllowPartialResults(boolean)}. If you don't allow
605   * partial results, the number of cells in each Result must equal to your batch setting unless it
606   * is the last Result for current row. So this method is helpful in paging queries. If you just
607   * want to prevent OOM at client, use setAllowPartialResults(true) is better.
608   * @param batch the maximum number of values
609   * @see Result#mayHaveMoreCellsInRow()
610   */
611  public Scan setBatch(int batch) {
612    if (this.hasFilter() && this.filter.hasFilterRow()) {
613      throw new IncompatibleFilterException(
614        "Cannot set batch on a scan using a filter" + " that returns true for filter.hasFilterRow");
615    }
616    this.batch = batch;
617    return this;
618  }
619
620  /**
621   * Set the maximum number of values to return per row per Column Family
622   * @param limit the maximum number of values returned / row / CF
623   */
624  public Scan setMaxResultsPerColumnFamily(int limit) {
625    this.storeLimit = limit;
626    return this;
627  }
628
629  /**
630   * Set offset for the row per Column Family.
631   * @param offset is the number of kvs that will be skipped.
632   */
633  public Scan setRowOffsetPerColumnFamily(int offset) {
634    this.storeOffset = offset;
635    return this;
636  }
637
638  /**
639   * Set the number of rows for caching that will be passed to scanners. If not set, the
640   * Configuration setting {@link HConstants#HBASE_CLIENT_SCANNER_CACHING} will apply. Higher
641   * caching values will enable faster scanners but will use more memory.
642   * @param caching the number of rows for caching
643   */
644  public Scan setCaching(int caching) {
645    this.caching = caching;
646    return this;
647  }
648
649  /** Returns the maximum result size in bytes. See {@link #setMaxResultSize(long)} */
650  public long getMaxResultSize() {
651    return maxResultSize;
652  }
653
654  /**
655   * Set the maximum result size. The default is -1; this means that no specific maximum result size
656   * will be set for this scan, and the global configured value will be used instead. (Defaults to
657   * unlimited).
658   * @param maxResultSize The maximum result size in bytes.
659   */
660  public Scan setMaxResultSize(long maxResultSize) {
661    this.maxResultSize = maxResultSize;
662    return this;
663  }
664
665  @Override
666  public Scan setFilter(Filter filter) {
667    super.setFilter(filter);
668    return this;
669  }
670
671  /**
672   * Setting the familyMap
673   * @param familyMap map of family to qualifier
674   */
675  public Scan setFamilyMap(Map<byte[], NavigableSet<byte[]>> familyMap) {
676    this.familyMap = familyMap;
677    return this;
678  }
679
680  /**
681   * Getting the familyMap
682   */
683  public Map<byte[], NavigableSet<byte[]>> getFamilyMap() {
684    return this.familyMap;
685  }
686
687  /** Returns the number of families in familyMap */
688  public int numFamilies() {
689    if (hasFamilies()) {
690      return this.familyMap.size();
691    }
692    return 0;
693  }
694
695  /** Returns true if familyMap is non empty, false otherwise */
696  public boolean hasFamilies() {
697    return !this.familyMap.isEmpty();
698  }
699
700  /** Returns the keys of the familyMap */
701  public byte[][] getFamilies() {
702    if (hasFamilies()) {
703      return this.familyMap.keySet().toArray(new byte[0][0]);
704    }
705    return null;
706  }
707
708  /** Returns the startrow */
709  public byte[] getStartRow() {
710    return this.startRow;
711  }
712
713  /** Returns if we should include start row when scan */
714  public boolean includeStartRow() {
715    return includeStartRow;
716  }
717
718  /** Returns the stoprow */
719  public byte[] getStopRow() {
720    return this.stopRow;
721  }
722
723  /** Returns if we should include stop row when scan */
724  public boolean includeStopRow() {
725    return includeStopRow;
726  }
727
728  /** Returns the max number of versions to fetch */
729  public int getMaxVersions() {
730    return this.maxVersions;
731  }
732
733  /** Returns maximum number of values to return for a single call to next() */
734  public int getBatch() {
735    return this.batch;
736  }
737
738  /** Returns maximum number of values to return per row per CF */
739  public int getMaxResultsPerColumnFamily() {
740    return this.storeLimit;
741  }
742
743  /**
744   * Method for retrieving the scan's offset per row per column family (#kvs to be skipped)
745   * @return row offset
746   */
747  public int getRowOffsetPerColumnFamily() {
748    return this.storeOffset;
749  }
750
751  /** Returns caching the number of rows fetched when calling next on a scanner */
752  public int getCaching() {
753    return this.caching;
754  }
755
756  /** Returns TimeRange */
757  public TimeRange getTimeRange() {
758    return this.tr;
759  }
760
761  /** Returns RowFilter */
762  @Override
763  public Filter getFilter() {
764    return filter;
765  }
766
767  /** Returns true is a filter has been specified, false if not */
768  public boolean hasFilter() {
769    return filter != null;
770  }
771
772  /**
773   * Set whether blocks should be cached for this Scan.
774   * <p>
775   * This is true by default. When true, default settings of the table and family are used (this
776   * will never override caching blocks if the block cache is disabled for that family or entirely).
777   * @param cacheBlocks if false, default settings are overridden and blocks will not be cached
778   */
779  public Scan setCacheBlocks(boolean cacheBlocks) {
780    this.cacheBlocks = cacheBlocks;
781    return this;
782  }
783
784  /**
785   * Get whether blocks should be cached for this Scan.
786   * @return true if default caching should be used, false if blocks should not be cached
787   */
788  public boolean getCacheBlocks() {
789    return cacheBlocks;
790  }
791
792  /**
793   * Set whether this scan is a reversed one
794   * <p>
795   * This is false by default which means forward(normal) scan.
796   * @param reversed if true, scan will be backward order
797   */
798  public Scan setReversed(boolean reversed) {
799    this.reversed = reversed;
800    return this;
801  }
802
803  /**
804   * Get whether this scan is a reversed one.
805   * @return true if backward scan, false if forward(default) scan
806   */
807  public boolean isReversed() {
808    return reversed;
809  }
810
811  /**
812   * Setting whether the caller wants to see the partial results when server returns
813   * less-than-expected cells. It is helpful while scanning a huge row to prevent OOM at client. By
814   * default this value is false and the complete results will be assembled client side before being
815   * delivered to the caller.
816   * @see Result#mayHaveMoreCellsInRow()
817   * @see #setBatch(int)
818   */
819  public Scan setAllowPartialResults(final boolean allowPartialResults) {
820    this.allowPartialResults = allowPartialResults;
821    return this;
822  }
823
824  /**
825   * @return true when the constructor of this scan understands that the results they will see may
826   *         only represent a partial portion of a row. The entire row would be retrieved by
827   *         subsequent calls to {@link ResultScanner#next()}
828   */
829  public boolean getAllowPartialResults() {
830    return allowPartialResults;
831  }
832
833  @Override
834  public Scan setLoadColumnFamiliesOnDemand(boolean value) {
835    return (Scan) super.setLoadColumnFamiliesOnDemand(value);
836  }
837
838  /**
839   * Compile the table and column family (i.e. schema) information into a String. Useful for parsing
840   * and aggregation by debugging, logging, and administration tools.
841   */
842  @Override
843  public Map<String, Object> getFingerprint() {
844    Map<String, Object> map = new HashMap<>();
845    List<String> families = new ArrayList<>();
846    if (this.familyMap.isEmpty()) {
847      map.put("families", "ALL");
848      return map;
849    } else {
850      map.put("families", families);
851    }
852    for (Map.Entry<byte[], NavigableSet<byte[]>> entry : this.familyMap.entrySet()) {
853      families.add(Bytes.toStringBinary(entry.getKey()));
854    }
855    return map;
856  }
857
858  /**
859   * Compile the details beyond the scope of getFingerprint (row, columns, timestamps, etc.) into a
860   * Map along with the fingerprinted information. Useful for debugging, logging, and administration
861   * tools.
862   * @param maxCols a limit on the number of columns output prior to truncation
863   */
864  @Override
865  public Map<String, Object> toMap(int maxCols) {
866    // start with the fingerpring map and build on top of it
867    Map<String, Object> map = getFingerprint();
868    // map from families to column list replaces fingerprint's list of families
869    Map<String, List<String>> familyColumns = new HashMap<>();
870    map.put("families", familyColumns);
871    // add scalar information first
872    map.put("startRow", Bytes.toStringBinary(this.startRow));
873    map.put("stopRow", Bytes.toStringBinary(this.stopRow));
874    map.put("maxVersions", this.maxVersions);
875    map.put("batch", this.batch);
876    map.put("caching", this.caching);
877    map.put("maxResultSize", this.maxResultSize);
878    map.put("cacheBlocks", this.cacheBlocks);
879    map.put("loadColumnFamiliesOnDemand", this.loadColumnFamiliesOnDemand);
880    List<Long> timeRange = new ArrayList<>(2);
881    timeRange.add(this.tr.getMin());
882    timeRange.add(this.tr.getMax());
883    map.put("timeRange", timeRange);
884    int colCount = 0;
885    // iterate through affected families and list out up to maxCols columns
886    for (Map.Entry<byte[], NavigableSet<byte[]>> entry : this.familyMap.entrySet()) {
887      List<String> columns = new ArrayList<>();
888      familyColumns.put(Bytes.toStringBinary(entry.getKey()), columns);
889      if (entry.getValue() == null) {
890        colCount++;
891        --maxCols;
892        columns.add("ALL");
893      } else {
894        colCount += entry.getValue().size();
895        if (maxCols <= 0) {
896          continue;
897        }
898        for (byte[] column : entry.getValue()) {
899          if (--maxCols <= 0) {
900            continue;
901          }
902          columns.add(Bytes.toStringBinary(column));
903        }
904      }
905    }
906    map.put("totalColumns", colCount);
907    if (this.filter != null) {
908      map.put("filter", this.filter.toString());
909    }
910    // add the id if set
911    if (getId() != null) {
912      map.put("id", getId());
913    }
914    return map;
915  }
916
917  /**
918   * Enable/disable "raw" mode for this scan. If "raw" is enabled the scan will return all delete
919   * marker and deleted rows that have not been collected, yet. This is mostly useful for Scan on
920   * column families that have KEEP_DELETED_ROWS enabled. It is an error to specify any column when
921   * "raw" is set.
922   * @param raw True/False to enable/disable "raw" mode.
923   */
924  public Scan setRaw(boolean raw) {
925    setAttribute(RAW_ATTR, Bytes.toBytes(raw));
926    return this;
927  }
928
929  /** Returns True if this Scan is in "raw" mode. */
930  public boolean isRaw() {
931    byte[] attr = getAttribute(RAW_ATTR);
932    return attr == null ? false : Bytes.toBoolean(attr);
933  }
934
935  /**
936   * Set whether this scan is a small scan
937   * <p>
938   * Small scan should use pread and big scan can use seek + read seek + read is fast but can cause
939   * two problem (1) resource contention (2) cause too much network io [89-fb] Using pread for
940   * non-compaction read request https://issues.apache.org/jira/browse/HBASE-7266 On the other hand,
941   * if setting it true, we would do openScanner,next,closeScanner in one RPC call. It means the
942   * better performance for small scan. [HBASE-9488]. Generally, if the scan range is within one
943   * data block(64KB), it could be considered as a small scan.
944   * @deprecated since 2.0.0 and will be removed in 3.0.0. Use {@link #setLimit(int)} and
945   *             {@link #setReadType(ReadType)} instead. And for the one rpc optimization, now we
946   *             will also fetch data when openScanner, and if the number of rows reaches the limit
947   *             then we will close the scanner automatically which means we will fall back to one
948   *             rpc.
949   * @see #setLimit(int)
950   * @see #setReadType(ReadType)
951   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17045">HBASE-17045</a>
952   */
953  @Deprecated
954  public Scan setSmall(boolean small) {
955    this.small = small;
956    if (small) {
957      this.readType = ReadType.PREAD;
958    }
959    return this;
960  }
961
962  /**
963   * Get whether this scan is a small scan
964   * @return true if small scan
965   * @deprecated since 2.0.0 and will be removed in 3.0.0. See the comment of
966   *             {@link #setSmall(boolean)}
967   * @see <a href="https://issues.apache.org/jira/browse/HBASE-17045">HBASE-17045</a>
968   */
969  @Deprecated
970  public boolean isSmall() {
971    return small;
972  }
973
974  @Override
975  public Scan setAttribute(String name, byte[] value) {
976    return (Scan) super.setAttribute(name, value);
977  }
978
979  @Override
980  public Scan setId(String id) {
981    return (Scan) super.setId(id);
982  }
983
984  @Override
985  public Scan setAuthorizations(Authorizations authorizations) {
986    return (Scan) super.setAuthorizations(authorizations);
987  }
988
989  @Override
990  public Scan setACL(Map<String, Permission> perms) {
991    return (Scan) super.setACL(perms);
992  }
993
994  @Override
995  public Scan setACL(String user, Permission perms) {
996    return (Scan) super.setACL(user, perms);
997  }
998
999  @Override
1000  public Scan setConsistency(Consistency consistency) {
1001    return (Scan) super.setConsistency(consistency);
1002  }
1003
1004  @Override
1005  public Scan setReplicaId(int Id) {
1006    return (Scan) super.setReplicaId(Id);
1007  }
1008
1009  @Override
1010  public Scan setIsolationLevel(IsolationLevel level) {
1011    return (Scan) super.setIsolationLevel(level);
1012  }
1013
1014  @Override
1015  public Scan setPriority(int priority) {
1016    return (Scan) super.setPriority(priority);
1017  }
1018
1019  /**
1020   * Enable collection of {@link ScanMetrics}. For advanced users.
1021   * @param enabled Set to true to enable accumulating scan metrics
1022   */
1023  public Scan setScanMetricsEnabled(final boolean enabled) {
1024    setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE, Bytes.toBytes(Boolean.valueOf(enabled)));
1025    return this;
1026  }
1027
1028  /** Returns True if collection of scan metrics is enabled. For advanced users. */
1029  public boolean isScanMetricsEnabled() {
1030    byte[] attr = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_ENABLE);
1031    return attr == null ? false : Bytes.toBoolean(attr);
1032  }
1033
1034  /**
1035   * @return Metrics on this Scan, if metrics were enabled.
1036   * @see #setScanMetricsEnabled(boolean)
1037   * @deprecated Use {@link ResultScanner#getScanMetrics()} instead. And notice that, please do not
1038   *             use this method and {@link ResultScanner#getScanMetrics()} together, the metrics
1039   *             will be messed up.
1040   */
1041  @Deprecated
1042  public ScanMetrics getScanMetrics() {
1043    byte[] bytes = getAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA);
1044    if (bytes == null) return null;
1045    return ProtobufUtil.toScanMetrics(bytes);
1046  }
1047
1048  public Boolean isAsyncPrefetch() {
1049    return asyncPrefetch;
1050  }
1051
1052  public Scan setAsyncPrefetch(boolean asyncPrefetch) {
1053    this.asyncPrefetch = asyncPrefetch;
1054    return this;
1055  }
1056
1057  /** Returns the limit of rows for this scan */
1058  public int getLimit() {
1059    return limit;
1060  }
1061
1062  /**
1063   * Set the limit of rows for this scan. We will terminate the scan if the number of returned rows
1064   * reaches this value.
1065   * <p>
1066   * This condition will be tested at last, after all other conditions such as stopRow, filter, etc.
1067   * @param limit the limit of rows for this scan
1068   */
1069  public Scan setLimit(int limit) {
1070    this.limit = limit;
1071    return this;
1072  }
1073
1074  /**
1075   * Call this when you only want to get one row. It will set {@code limit} to {@code 1}, and also
1076   * set {@code readType} to {@link ReadType#PREAD}.
1077   */
1078  public Scan setOneRowLimit() {
1079    return setLimit(1).setReadType(ReadType.PREAD);
1080  }
1081
1082  @InterfaceAudience.Public
1083  public enum ReadType {
1084    DEFAULT,
1085    STREAM,
1086    PREAD
1087  }
1088
1089  /** Returns the read type for this scan */
1090  public ReadType getReadType() {
1091    return readType;
1092  }
1093
1094  /**
1095   * Set the read type for this scan.
1096   * <p>
1097   * Notice that we may choose to use pread even if you specific {@link ReadType#STREAM} here. For
1098   * example, we will always use pread if this is a get scan.
1099   */
1100  public Scan setReadType(ReadType readType) {
1101    this.readType = readType;
1102    return this;
1103  }
1104
1105  /**
1106   * Get the mvcc read point used to open a scanner.
1107   */
1108  long getMvccReadPoint() {
1109    return mvccReadPoint;
1110  }
1111
1112  /**
1113   * Set the mvcc read point used to open a scanner.
1114   */
1115  Scan setMvccReadPoint(long mvccReadPoint) {
1116    this.mvccReadPoint = mvccReadPoint;
1117    return this;
1118  }
1119
1120  /**
1121   * Set the mvcc read point to -1 which means do not use it.
1122   */
1123  Scan resetMvccReadPoint() {
1124    return setMvccReadPoint(-1L);
1125  }
1126
1127  /**
1128   * When the server is slow or we scan a table with many deleted data or we use a sparse filter,
1129   * the server will response heartbeat to prevent timeout. However the scanner will return a Result
1130   * only when client can do it. So if there are many heartbeats, the blocking time on
1131   * ResultScanner#next() may be very long, which is not friendly to online services. Set this to
1132   * true then you can get a special Result whose #isCursor() returns true and is not contains any
1133   * real data. It only tells you where the server has scanned. You can call next to continue
1134   * scanning or open a new scanner with this row key as start row whenever you want. Users can get
1135   * a cursor when and only when there is a response from the server but we can not return a Result
1136   * to users, for example, this response is a heartbeat or there are partial cells but users do not
1137   * allow partial result. Now the cursor is in row level which means the special Result will only
1138   * contains a row key. {@link Result#isCursor()} {@link Result#getCursor()} {@link Cursor}
1139   */
1140  public Scan setNeedCursorResult(boolean needCursorResult) {
1141    this.needCursorResult = needCursorResult;
1142    return this;
1143  }
1144
1145  public boolean isNeedCursorResult() {
1146    return needCursorResult;
1147  }
1148
1149  /**
1150   * Create a new Scan with a cursor. It only set the position information like start row key. The
1151   * others (like cfs, stop row, limit) should still be filled in by the user.
1152   * {@link Result#isCursor()} {@link Result#getCursor()} {@link Cursor}
1153   */
1154  public static Scan createScanFromCursor(Cursor cursor) {
1155    return new Scan().withStartRow(cursor.getRow());
1156  }
1157}