View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
29  import org.apache.hadoop.hbase.HColumnDescriptor;
30  import org.apache.hadoop.hbase.HRegionInfo;
31  import org.apache.hadoop.hbase.KeyValue;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.hbase.classification.InterfaceStability;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver;
37  import org.apache.hadoop.hbase.io.HeapSize;
38  import org.apache.hadoop.hbase.io.compress.Compression;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
41  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
42  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
43  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
44  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
45  import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;
46  import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
47  import org.apache.hadoop.hbase.security.User;
48  
49  /**
50   * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
51   * more StoreFiles, which stretch backwards over time.
52   */
53  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
54  @InterfaceStability.Evolving
55  public interface Store extends HeapSize, StoreConfigInformation, PropagatingConfigurationObserver {
56  
57    /* The default priority for user-specified compaction requests.
58     * The user gets top priority unless we have blocking compactions. (Pri <= 0)
59     */ int PRIORITY_USER = 1;
60    int NO_PRIORITY = Integer.MIN_VALUE;
61  
62    // General Accessors
63    KeyValue.KVComparator getComparator();
64  
65    Collection<StoreFile> getStorefiles();
66  
67    /**
68     * Close all the readers We don't need to worry about subsequent requests because the Region
69     * holds a write lock that will prevent any more reads or writes.
70     * @return the {@link StoreFile StoreFiles} that were previously being used.
71     * @throws IOException on failure
72     */
73    Collection<StoreFile> close() throws IOException;
74  
75    /**
76     * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
77     * compaction.
78     * @param scan Scan to apply when scanning the stores
79     * @param targetCols columns to scan
80     * @return a scanner over the current key values
81     * @throws IOException on failure
82     */
83    KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
84        throws IOException;
85  
86    /**
87     * Get all scanners with no filtering based on TTL (that happens further down
88     * the line).
89     * @param cacheBlocks
90     * @param isGet
91     * @param usePread
92     * @param isCompaction
93     * @param matcher
94     * @param startRow
95     * @param stopRow
96     * @param readPt
97     * @return all scanners for this store
98     */
99    List<KeyValueScanner> getScanners(
100     boolean cacheBlocks,
101     boolean isGet,
102     boolean usePread,
103     boolean isCompaction,
104     ScanQueryMatcher matcher,
105     byte[] startRow,
106     byte[] stopRow,
107     long readPt
108   ) throws IOException;
109 
110   /**
111    * Create scanners on the given files and if needed on the memstore with no filtering based on TTL
112    * (that happens further down the line).
113    * @param files the list of files on which the scanners has to be created
114    * @param cacheBlocks cache the blocks or not
115    * @param isGet true if it is get, false if not
116    * @param usePread true to use pread, false if not
117    * @param isCompaction true if the scanner is created for compaction
118    * @param matcher the scan query matcher
119    * @param startRow the start row
120    * @param stopRow the stop row
121    * @param readPt the read point of the current scan
122    * @param includeMemstoreScanner true if memstore has to be included
123    * @return scanners on the given files and on the memstore if specified
124    */
125    List<KeyValueScanner> getScanners(List<StoreFile> files, boolean cacheBlocks, boolean isGet,
126           boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow,
127           byte[] stopRow, long readPt, boolean includeMemstoreScanner) throws IOException;
128 
129   ScanInfo getScanInfo();
130 
131   /**
132    * Adds or replaces the specified KeyValues.
133    * <p>
134    * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
135    * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
136    * <p>
137    * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
138    * across all of them.
139    * @param cells
140    * @param readpoint readpoint below which we can safely remove duplicate KVs
141    * @param removedCells collect the removed cells. It can be null.
142    * @return memstore size delta
143    * @throws IOException
144    */
145   long upsert(Iterable<Cell> cells, long readpoint, List<Cell> removedCells) throws IOException;
146 
147   /**
148    * Adds a value to the memstore
149    * @param cell
150    * @return memstore size delta
151    */
152   long add(Cell cell);
153 
154   /**
155    * Adds the specified value to the memstore
156    * @param cells
157    * @return memstore size delta
158    */
159   long add(Iterable<Cell> cells);
160 
161   /**
162    * When was the last edit done in the memstore
163    */
164   long timeOfOldestEdit();
165 
166   /**
167    * Removes a Cell from the memstore. The Cell is removed only if its key
168    * &amp; memstoreTS match the key &amp; memstoreTS value of the cell
169    * parameter.
170    * @param cell
171    */
172   void rollback(final Cell cell);
173 
174   /**
175    * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING:
176    * Only use this method on a table where writes occur with strictly increasing timestamps. This
177    * method assumes this pattern of writes in order to make it reasonably performant. Also our
178    * search is dependent on the axiom that deletes are for cells that are in the container that
179    * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll
180    * see deletes before we come across cells we are to delete. Presumption is that the
181    * memstore#kvset is processed before memstore#snapshot and so on.
182    * @param row The row key of the targeted row.
183    * @return Found Cell or null if none found.
184    * @throws IOException
185    */
186   Cell getRowKeyAtOrBefore(final byte[] row) throws IOException;
187 
188   FileSystem getFileSystem();
189 
190 
191   /**
192    * @param maxKeyCount
193    * @param compression Compression algorithm to use
194    * @param isCompaction whether we are creating a new file in a compaction
195    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
196    * @return Writer for a new StoreFile in the tmp dir.
197    */
198   StoreFile.Writer createWriterInTmp(
199       long maxKeyCount,
200       Compression.Algorithm compression,
201       boolean isCompaction,
202       boolean includeMVCCReadpoint,
203       boolean includesTags
204   ) throws IOException;
205 
206   /**
207    * @param maxKeyCount
208    * @param compression Compression algorithm to use
209    * @param isCompaction whether we are creating a new file in a compaction
210    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
211    * @param shouldDropBehind should the writer drop caches behind writes
212    * @return Writer for a new StoreFile in the tmp dir.
213    */
214   StoreFile.Writer createWriterInTmp(
215     long maxKeyCount,
216     Compression.Algorithm compression,
217     boolean isCompaction,
218     boolean includeMVCCReadpoint,
219     boolean includesTags,
220     boolean shouldDropBehind
221   ) throws IOException;
222 
223   /**
224    * @param maxKeyCount
225    * @param compression Compression algorithm to use
226    * @param isCompaction whether we are creating a new file in a compaction
227    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
228    * @param shouldDropBehind should the writer drop caches behind writes
229    * @param totalCompactedFilesSize total compacted file size
230    * @return Writer for a new StoreFile in the tmp dir.
231    */
232   StoreFile.Writer createWriterInTmp(
233     long maxKeyCount,
234     Compression.Algorithm compression,
235     boolean isCompaction,
236     boolean includeMVCCReadpoint,
237     boolean includesTags,
238     boolean shouldDropBehind,
239     long totalCompactedFilesSize
240   ) throws IOException;
241 
242   /**
243    * @param maxKeyCount
244    * @param compression Compression algorithm to use
245    * @param isCompaction whether we are creating a new file in a compaction
246    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
247    * @param shouldDropBehind should the writer drop caches behind writes
248    * @param trt Ready-made timetracker to use.
249    * @return Writer for a new StoreFile in the tmp dir.
250    */
251   StoreFile.Writer createWriterInTmp(
252     long maxKeyCount,
253     Compression.Algorithm compression,
254     boolean isCompaction,
255     boolean includeMVCCReadpoint,
256     boolean includesTags,
257     boolean shouldDropBehind,
258     final TimeRangeTracker trt
259   ) throws IOException;
260 
261   /**
262    * @param maxKeyCount
263    * @param compression Compression algorithm to use
264    * @param isCompaction whether we are creating a new file in a compaction
265    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
266    * @param shouldDropBehind should the writer drop caches behind writes
267    * @param trt Ready-made timetracker to use.
268    * @param totalCompactedFilesSize total compacted file size
269    * @return Writer for a new StoreFile in the tmp dir.
270    */
271   StoreFile.Writer createWriterInTmp(
272     long maxKeyCount,
273     Compression.Algorithm compression,
274     boolean isCompaction,
275     boolean includeMVCCReadpoint,
276     boolean includesTags,
277     boolean shouldDropBehind,
278     final TimeRangeTracker trt,
279     long totalCompactedFilesSize
280   ) throws IOException;
281 
282   // Compaction oriented methods
283 
284   boolean throttleCompaction(long compactionSize);
285 
286   /**
287    * getter for CompactionProgress object
288    * @return CompactionProgress object; can be null
289    */
290   CompactionProgress getCompactionProgress();
291 
292   CompactionContext requestCompaction() throws IOException;
293 
294   /**
295    * @deprecated see requestCompaction(int, CompactionRequest, User)
296    */
297   @Deprecated
298   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
299       throws IOException;
300 
301   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user)
302       throws IOException;
303 
304   void cancelRequestedCompaction(CompactionContext compaction);
305 
306   /**
307    * @deprecated see compact(CompactionContext, ThroughputController, User)
308    */
309   @Deprecated
310   List<StoreFile> compact(CompactionContext compaction,
311       ThroughputController throughputController) throws IOException;
312 
313   List<StoreFile> compact(CompactionContext compaction,
314     ThroughputController throughputController, User user) throws IOException;
315 
316   /**
317    * @return true if we should run a major compaction.
318    */
319   boolean isMajorCompaction() throws IOException;
320 
321   void triggerMajorCompaction();
322 
323   /**
324    * See if there's too much store files in this store
325    * @return true if number of store files is greater than the number defined in minFilesToCompact
326    */
327   boolean needsCompaction();
328 
329   int getCompactPriority();
330 
331   StoreFlushContext createFlushContext(long cacheFlushId);
332 
333   /**
334    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
335    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
336    * See HBASE-2331.
337    * @param compaction the descriptor for compaction
338    * @param pickCompactionFiles whether or not pick up the new compaction output files and
339    * add it to the store
340    * @param removeFiles whether to remove/archive files from filesystem
341    */
342   void replayCompactionMarker(CompactionDescriptor compaction, boolean pickCompactionFiles,
343       boolean removeFiles)
344       throws IOException;
345 
346   // Split oriented methods
347 
348   boolean canSplit();
349 
350   /**
351    * Determines if Store should be split
352    * @return byte[] if store should be split, null otherwise.
353    */
354   byte[] getSplitPoint();
355 
356   // Bulk Load methods
357 
358   /**
359    * This throws a WrongRegionException if the HFile does not fit in this region, or an
360    * InvalidHFileException if the HFile is not valid.
361    */
362   void assertBulkLoadHFileOk(Path srcPath) throws IOException;
363 
364   /**
365    * This method should only be called from Region. It is assumed that the ranges of values in the
366    * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
367    *
368    * @param family the column family
369    * @param srcPathStr
370    * @param dstPath
371    */
372   Path bulkLoadHFile(byte[] family, String srcPathStr, Path dstPath) throws IOException;
373 
374   // General accessors into the state of the store
375   // TODO abstract some of this out into a metrics class
376 
377   /**
378    * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
379    */
380   boolean hasReferences();
381 
382   /**
383    * @return The size of this store's memstore, in bytes
384    */
385   long getMemStoreSize();
386 
387   /**
388    * @return The amount of memory we could flush from this memstore; usually this is equal to
389    * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
390    * outstanding snapshots.
391    */
392   long getFlushableSize();
393 
394   /**
395    * Returns the memstore snapshot size
396    * @return size of the memstore snapshot
397    */
398   long getSnapshotSize();
399 
400   HColumnDescriptor getFamily();
401 
402   /**
403    * @return The maximum sequence id in all store files.
404    */
405   long getMaxSequenceId();
406 
407   /**
408    * @return The maximum memstoreTS in all store files.
409    */
410   long getMaxMemstoreTS();
411 
412   /**
413    * @return the data block encoder
414    */
415   HFileDataBlockEncoder getDataBlockEncoder();
416 
417   /** @return aggregate size of all HStores used in the last compaction */
418   long getLastCompactSize();
419 
420   /** @return aggregate size of HStore */
421   long getSize();
422 
423   /**
424    * @return Count of store files
425    */
426   int getStorefilesCount();
427 
428   /**
429    * @return Max age of store files in this store
430    */
431   long getMaxStoreFileAge();
432 
433   /**
434    * @return Min age of store files in this store
435    */
436   long getMinStoreFileAge();
437 
438   /**
439    *  @return Average age of store files in this store, 0 if no store files
440    */
441   long getAvgStoreFileAge();
442 
443   /**
444    *  @return Number of reference files in this store
445    */
446   long getNumReferenceFiles();
447 
448   /**
449    *  @return Number of HFiles in this store
450    */
451   long getNumHFiles();
452 
453   /**
454    * @return The size of the store files, in bytes, uncompressed.
455    */
456   long getStoreSizeUncompressed();
457 
458   /**
459    * @return The size of the store files, in bytes.
460    */
461   long getStorefilesSize();
462 
463   /**
464    * @return The size of the store file indexes, in bytes.
465    */
466   long getStorefilesIndexSize();
467 
468   /**
469    * Returns the total size of all index blocks in the data block indexes, including the root level,
470    * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
471    * single-level indexes.
472    * @return the total size of block indexes in the store
473    */
474   long getTotalStaticIndexSize();
475 
476   /**
477    * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
478    * Bloom blocks currently not loaded into the block cache are counted.
479    * @return the total size of all Bloom filters in the store
480    */
481   long getTotalStaticBloomSize();
482 
483   // Test-helper methods
484 
485   /**
486    * Used for tests.
487    * @return cache configuration for this Store.
488    */
489   CacheConfig getCacheConfig();
490 
491   /**
492    * @return the parent region info hosting this store
493    */
494   HRegionInfo getRegionInfo();
495 
496   RegionCoprocessorHost getCoprocessorHost();
497 
498   boolean areWritesEnabled();
499 
500   /**
501    * @return The smallest mvcc readPoint across all the scanners in this
502    * region. Writes older than this readPoint, are included  in every
503    * read operation.
504    */
505   long getSmallestReadPoint();
506 
507   String getColumnFamilyName();
508 
509   TableName getTableName();
510 
511   /**
512    * @return The number of cells flushed to disk
513    */
514   long getFlushedCellsCount();
515 
516   /**
517    * @return The total size of data flushed to disk, in bytes
518    */
519   long getFlushedCellsSize();
520 
521   /**
522    * @return The total size of out output files on disk, in bytes
523    */
524   long getFlushedOutputFileSize();
525 
526   /**
527    * @return The number of cells processed during minor compactions
528    */
529   long getCompactedCellsCount();
530 
531   /**
532    * @return The total amount of data processed during minor compactions, in bytes
533    */
534   long getCompactedCellsSize();
535 
536   /**
537    * @return The number of cells processed during major compactions
538    */
539   long getMajorCompactedCellsCount();
540 
541   /**
542    * @return The total amount of data processed during major compactions, in bytes
543    */
544   long getMajorCompactedCellsSize();
545 
546   /*
547    * @param o Observer who wants to know about changes in set of Readers
548    */
549   void addChangedReaderObserver(ChangedReadersObserver o);
550 
551   /*
552    * @param o Observer no longer interested in changes in set of Readers.
553    */
554   void deleteChangedReaderObserver(ChangedReadersObserver o);
555 
556   /**
557    * @return Whether this store has too many store files.
558    */
559   boolean hasTooManyStoreFiles();
560 
561   /**
562    * Checks the underlying store files, and opens the files that  have not
563    * been opened, and removes the store file readers for store files no longer
564    * available. Mainly used by secondary region replicas to keep up to date with
565    * the primary region files.
566    * @throws IOException
567    */
568   void refreshStoreFiles() throws IOException;
569 
570   /**
571    * This value can represent the degree of emergency of compaction for this store. It should be
572    * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files.
573    * <ul>
574    * <li>if getStorefilesCount &lt;= getMinFilesToCompact, return 0.0</li>
575    * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount -
576    * getMinFilesToCompact)</li>
577    * </ul>
578    * <p>
579    * And for striped stores, we should calculate this value by the files in each stripe separately
580    * and return the maximum value.
581    * <p>
582    * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a
583    * linear formula.
584    */
585   double getCompactionPressure();
586 
587    /**
588     * Replaces the store files that the store has with the given files. Mainly used by
589     * secondary region replicas to keep up to date with
590     * the primary region files.
591     * @throws IOException
592     */
593   void refreshStoreFiles(Collection<String> newFiles) throws IOException;
594 
595   void bulkLoadHFile(StoreFileInfo fileInfo) throws IOException;
596 
597   boolean isPrimaryReplicaStore();
598 
599   /**
600    * Closes and archives the compacted files under this store
601    */
602   void closeAndArchiveCompactedFiles() throws IOException;
603 }