1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase.regionserver;
19
20 import java.io.IOException;
21 import java.util.Collection;
22 import java.util.List;
23 import java.util.NavigableSet;
24
25 import org.apache.hadoop.fs.FileSystem;
26 import org.apache.hadoop.fs.Path;
27 import org.apache.hadoop.hbase.Cell;
28 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
29 import org.apache.hadoop.hbase.HColumnDescriptor;
30 import org.apache.hadoop.hbase.HRegionInfo;
31 import org.apache.hadoop.hbase.KeyValue;
32 import org.apache.hadoop.hbase.TableName;
33 import org.apache.hadoop.hbase.classification.InterfaceAudience;
34 import org.apache.hadoop.hbase.classification.InterfaceStability;
35 import org.apache.hadoop.hbase.client.Scan;
36 import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver;
37 import org.apache.hadoop.hbase.io.HeapSize;
38 import org.apache.hadoop.hbase.io.compress.Compression;
39 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40 import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
41 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
42 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
43 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
44 import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
45 import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;
46 import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
47 import org.apache.hadoop.hbase.security.User;
48
49 /**
50 * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
51 * more StoreFiles, which stretch backwards over time.
52 */
53 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
54 @InterfaceStability.Evolving
55 public interface Store extends HeapSize, StoreConfigInformation, PropagatingConfigurationObserver {
56
57 /* The default priority for user-specified compaction requests.
58 * The user gets top priority unless we have blocking compactions. (Pri <= 0)
59 */ int PRIORITY_USER = 1;
60 int NO_PRIORITY = Integer.MIN_VALUE;
61
62 // General Accessors
63 KeyValue.KVComparator getComparator();
64
65 Collection<StoreFile> getStorefiles();
66
67 /**
68 * Close all the readers We don't need to worry about subsequent requests because the Region
69 * holds a write lock that will prevent any more reads or writes.
70 * @return the {@link StoreFile StoreFiles} that were previously being used.
71 * @throws IOException on failure
72 */
73 Collection<StoreFile> close() throws IOException;
74
75 /**
76 * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
77 * compaction.
78 * @param scan Scan to apply when scanning the stores
79 * @param targetCols columns to scan
80 * @return a scanner over the current key values
81 * @throws IOException on failure
82 */
83 KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
84 throws IOException;
85
86 /**
87 * Get all scanners with no filtering based on TTL (that happens further down
88 * the line).
89 * @param cacheBlocks
90 * @param isGet
91 * @param usePread
92 * @param isCompaction
93 * @param matcher
94 * @param startRow
95 * @param stopRow
96 * @param readPt
97 * @return all scanners for this store
98 */
99 List<KeyValueScanner> getScanners(
100 boolean cacheBlocks,
101 boolean isGet,
102 boolean usePread,
103 boolean isCompaction,
104 ScanQueryMatcher matcher,
105 byte[] startRow,
106 byte[] stopRow,
107 long readPt
108 ) throws IOException;
109
110 /**
111 * Create scanners on the given files and if needed on the memstore with no filtering based on TTL
112 * (that happens further down the line).
113 * @param files the list of files on which the scanners has to be created
114 * @param cacheBlocks cache the blocks or not
115 * @param isGet true if it is get, false if not
116 * @param usePread true to use pread, false if not
117 * @param isCompaction true if the scanner is created for compaction
118 * @param matcher the scan query matcher
119 * @param startRow the start row
120 * @param stopRow the stop row
121 * @param readPt the read point of the current scan
122 * @param includeMemstoreScanner true if memstore has to be included
123 * @return scanners on the given files and on the memstore if specified
124 */
125 List<KeyValueScanner> getScanners(List<StoreFile> files, boolean cacheBlocks, boolean isGet,
126 boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow,
127 byte[] stopRow, long readPt, boolean includeMemstoreScanner) throws IOException;
128
129 ScanInfo getScanInfo();
130
131 /**
132 * Adds or replaces the specified KeyValues.
133 * <p>
134 * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
135 * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
136 * <p>
137 * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
138 * across all of them.
139 * @param cells
140 * @param readpoint readpoint below which we can safely remove duplicate KVs
141 * @param removedCells collect the removed cells. It can be null.
142 * @return memstore size delta
143 * @throws IOException
144 */
145 long upsert(Iterable<Cell> cells, long readpoint, List<Cell> removedCells) throws IOException;
146
147 /**
148 * Adds a value to the memstore
149 * @param cell
150 * @return memstore size delta
151 */
152 long add(Cell cell);
153
154 /**
155 * Adds the specified value to the memstore
156 * @param cells
157 * @return memstore size delta
158 */
159 long add(Iterable<Cell> cells);
160
161 /**
162 * When was the last edit done in the memstore
163 */
164 long timeOfOldestEdit();
165
166 /**
167 * Removes a Cell from the memstore. The Cell is removed only if its key
168 * & memstoreTS match the key & memstoreTS value of the cell
169 * parameter.
170 * @param cell
171 */
172 void rollback(final Cell cell);
173
174 /**
175 * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING:
176 * Only use this method on a table where writes occur with strictly increasing timestamps. This
177 * method assumes this pattern of writes in order to make it reasonably performant. Also our
178 * search is dependent on the axiom that deletes are for cells that are in the container that
179 * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll
180 * see deletes before we come across cells we are to delete. Presumption is that the
181 * memstore#kvset is processed before memstore#snapshot and so on.
182 * @param row The row key of the targeted row.
183 * @return Found Cell or null if none found.
184 * @throws IOException
185 */
186 Cell getRowKeyAtOrBefore(final byte[] row) throws IOException;
187
188 FileSystem getFileSystem();
189
190
191 /**
192 * @param maxKeyCount
193 * @param compression Compression algorithm to use
194 * @param isCompaction whether we are creating a new file in a compaction
195 * @param includeMVCCReadpoint whether we should out the MVCC readpoint
196 * @return Writer for a new StoreFile in the tmp dir.
197 */
198 StoreFile.Writer createWriterInTmp(
199 long maxKeyCount,
200 Compression.Algorithm compression,
201 boolean isCompaction,
202 boolean includeMVCCReadpoint,
203 boolean includesTags
204 ) throws IOException;
205
206 /**
207 * @param maxKeyCount
208 * @param compression Compression algorithm to use
209 * @param isCompaction whether we are creating a new file in a compaction
210 * @param includeMVCCReadpoint whether we should out the MVCC readpoint
211 * @param shouldDropBehind should the writer drop caches behind writes
212 * @return Writer for a new StoreFile in the tmp dir.
213 */
214 StoreFile.Writer createWriterInTmp(
215 long maxKeyCount,
216 Compression.Algorithm compression,
217 boolean isCompaction,
218 boolean includeMVCCReadpoint,
219 boolean includesTags,
220 boolean shouldDropBehind
221 ) throws IOException;
222
223 /**
224 * @param maxKeyCount
225 * @param compression Compression algorithm to use
226 * @param isCompaction whether we are creating a new file in a compaction
227 * @param includeMVCCReadpoint whether we should out the MVCC readpoint
228 * @param shouldDropBehind should the writer drop caches behind writes
229 * @param totalCompactedFilesSize total compacted file size
230 * @return Writer for a new StoreFile in the tmp dir.
231 */
232 StoreFile.Writer createWriterInTmp(
233 long maxKeyCount,
234 Compression.Algorithm compression,
235 boolean isCompaction,
236 boolean includeMVCCReadpoint,
237 boolean includesTags,
238 boolean shouldDropBehind,
239 long totalCompactedFilesSize
240 ) throws IOException;
241
242 /**
243 * @param maxKeyCount
244 * @param compression Compression algorithm to use
245 * @param isCompaction whether we are creating a new file in a compaction
246 * @param includeMVCCReadpoint whether we should out the MVCC readpoint
247 * @param shouldDropBehind should the writer drop caches behind writes
248 * @param trt Ready-made timetracker to use.
249 * @return Writer for a new StoreFile in the tmp dir.
250 */
251 StoreFile.Writer createWriterInTmp(
252 long maxKeyCount,
253 Compression.Algorithm compression,
254 boolean isCompaction,
255 boolean includeMVCCReadpoint,
256 boolean includesTags,
257 boolean shouldDropBehind,
258 final TimeRangeTracker trt
259 ) throws IOException;
260
261 /**
262 * @param maxKeyCount
263 * @param compression Compression algorithm to use
264 * @param isCompaction whether we are creating a new file in a compaction
265 * @param includeMVCCReadpoint whether we should out the MVCC readpoint
266 * @param shouldDropBehind should the writer drop caches behind writes
267 * @param trt Ready-made timetracker to use.
268 * @param totalCompactedFilesSize total compacted file size
269 * @return Writer for a new StoreFile in the tmp dir.
270 */
271 StoreFile.Writer createWriterInTmp(
272 long maxKeyCount,
273 Compression.Algorithm compression,
274 boolean isCompaction,
275 boolean includeMVCCReadpoint,
276 boolean includesTags,
277 boolean shouldDropBehind,
278 final TimeRangeTracker trt,
279 long totalCompactedFilesSize
280 ) throws IOException;
281
282 // Compaction oriented methods
283
284 boolean throttleCompaction(long compactionSize);
285
286 /**
287 * getter for CompactionProgress object
288 * @return CompactionProgress object; can be null
289 */
290 CompactionProgress getCompactionProgress();
291
292 CompactionContext requestCompaction() throws IOException;
293
294 /**
295 * @deprecated see requestCompaction(int, CompactionRequest, User)
296 */
297 @Deprecated
298 CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
299 throws IOException;
300
301 CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user)
302 throws IOException;
303
304 void cancelRequestedCompaction(CompactionContext compaction);
305
306 /**
307 * @deprecated see compact(CompactionContext, ThroughputController, User)
308 */
309 @Deprecated
310 List<StoreFile> compact(CompactionContext compaction,
311 ThroughputController throughputController) throws IOException;
312
313 List<StoreFile> compact(CompactionContext compaction,
314 ThroughputController throughputController, User user) throws IOException;
315
316 /**
317 * @return true if we should run a major compaction.
318 */
319 boolean isMajorCompaction() throws IOException;
320
321 void triggerMajorCompaction();
322
323 /**
324 * See if there's too much store files in this store
325 * @return true if number of store files is greater than the number defined in minFilesToCompact
326 */
327 boolean needsCompaction();
328
329 int getCompactPriority();
330
331 StoreFlushContext createFlushContext(long cacheFlushId);
332
333 /**
334 * Call to complete a compaction. Its for the case where we find in the WAL a compaction
335 * that was not finished. We could find one recovering a WAL after a regionserver crash.
336 * See HBASE-2331.
337 * @param compaction the descriptor for compaction
338 * @param pickCompactionFiles whether or not pick up the new compaction output files and
339 * add it to the store
340 * @param removeFiles whether to remove/archive files from filesystem
341 */
342 void replayCompactionMarker(CompactionDescriptor compaction, boolean pickCompactionFiles,
343 boolean removeFiles)
344 throws IOException;
345
346 // Split oriented methods
347
348 boolean canSplit();
349
350 /**
351 * Determines if Store should be split
352 * @return byte[] if store should be split, null otherwise.
353 */
354 byte[] getSplitPoint();
355
356 // Bulk Load methods
357
358 /**
359 * This throws a WrongRegionException if the HFile does not fit in this region, or an
360 * InvalidHFileException if the HFile is not valid.
361 */
362 void assertBulkLoadHFileOk(Path srcPath) throws IOException;
363
364 /**
365 * This method should only be called from Region. It is assumed that the ranges of values in the
366 * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
367 *
368 * @param family the column family
369 * @param srcPathStr
370 * @param dstPath
371 */
372 Path bulkLoadHFile(byte[] family, String srcPathStr, Path dstPath) throws IOException;
373
374 // General accessors into the state of the store
375 // TODO abstract some of this out into a metrics class
376
377 /**
378 * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
379 */
380 boolean hasReferences();
381
382 /**
383 * @return The size of this store's memstore, in bytes
384 */
385 long getMemStoreSize();
386
387 /**
388 * @return The amount of memory we could flush from this memstore; usually this is equal to
389 * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
390 * outstanding snapshots.
391 */
392 long getFlushableSize();
393
394 /**
395 * Returns the memstore snapshot size
396 * @return size of the memstore snapshot
397 */
398 long getSnapshotSize();
399
400 HColumnDescriptor getFamily();
401
402 /**
403 * @return The maximum sequence id in all store files.
404 */
405 long getMaxSequenceId();
406
407 /**
408 * @return The maximum memstoreTS in all store files.
409 */
410 long getMaxMemstoreTS();
411
412 /**
413 * @return the data block encoder
414 */
415 HFileDataBlockEncoder getDataBlockEncoder();
416
417 /** @return aggregate size of all HStores used in the last compaction */
418 long getLastCompactSize();
419
420 /** @return aggregate size of HStore */
421 long getSize();
422
423 /**
424 * @return Count of store files
425 */
426 int getStorefilesCount();
427
428 /**
429 * @return Max age of store files in this store
430 */
431 long getMaxStoreFileAge();
432
433 /**
434 * @return Min age of store files in this store
435 */
436 long getMinStoreFileAge();
437
438 /**
439 * @return Average age of store files in this store, 0 if no store files
440 */
441 long getAvgStoreFileAge();
442
443 /**
444 * @return Number of reference files in this store
445 */
446 long getNumReferenceFiles();
447
448 /**
449 * @return Number of HFiles in this store
450 */
451 long getNumHFiles();
452
453 /**
454 * @return The size of the store files, in bytes, uncompressed.
455 */
456 long getStoreSizeUncompressed();
457
458 /**
459 * @return The size of the store files, in bytes.
460 */
461 long getStorefilesSize();
462
463 /**
464 * @return The size of the store file indexes, in bytes.
465 */
466 long getStorefilesIndexSize();
467
468 /**
469 * Returns the total size of all index blocks in the data block indexes, including the root level,
470 * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
471 * single-level indexes.
472 * @return the total size of block indexes in the store
473 */
474 long getTotalStaticIndexSize();
475
476 /**
477 * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
478 * Bloom blocks currently not loaded into the block cache are counted.
479 * @return the total size of all Bloom filters in the store
480 */
481 long getTotalStaticBloomSize();
482
483 // Test-helper methods
484
485 /**
486 * Used for tests.
487 * @return cache configuration for this Store.
488 */
489 CacheConfig getCacheConfig();
490
491 /**
492 * @return the parent region info hosting this store
493 */
494 HRegionInfo getRegionInfo();
495
496 RegionCoprocessorHost getCoprocessorHost();
497
498 boolean areWritesEnabled();
499
500 /**
501 * @return The smallest mvcc readPoint across all the scanners in this
502 * region. Writes older than this readPoint, are included in every
503 * read operation.
504 */
505 long getSmallestReadPoint();
506
507 String getColumnFamilyName();
508
509 TableName getTableName();
510
511 /**
512 * @return The number of cells flushed to disk
513 */
514 long getFlushedCellsCount();
515
516 /**
517 * @return The total size of data flushed to disk, in bytes
518 */
519 long getFlushedCellsSize();
520
521 /**
522 * @return The total size of out output files on disk, in bytes
523 */
524 long getFlushedOutputFileSize();
525
526 /**
527 * @return The number of cells processed during minor compactions
528 */
529 long getCompactedCellsCount();
530
531 /**
532 * @return The total amount of data processed during minor compactions, in bytes
533 */
534 long getCompactedCellsSize();
535
536 /**
537 * @return The number of cells processed during major compactions
538 */
539 long getMajorCompactedCellsCount();
540
541 /**
542 * @return The total amount of data processed during major compactions, in bytes
543 */
544 long getMajorCompactedCellsSize();
545
546 /*
547 * @param o Observer who wants to know about changes in set of Readers
548 */
549 void addChangedReaderObserver(ChangedReadersObserver o);
550
551 /*
552 * @param o Observer no longer interested in changes in set of Readers.
553 */
554 void deleteChangedReaderObserver(ChangedReadersObserver o);
555
556 /**
557 * @return Whether this store has too many store files.
558 */
559 boolean hasTooManyStoreFiles();
560
561 /**
562 * Checks the underlying store files, and opens the files that have not
563 * been opened, and removes the store file readers for store files no longer
564 * available. Mainly used by secondary region replicas to keep up to date with
565 * the primary region files.
566 * @throws IOException
567 */
568 void refreshStoreFiles() throws IOException;
569
570 /**
571 * This value can represent the degree of emergency of compaction for this store. It should be
572 * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files.
573 * <ul>
574 * <li>if getStorefilesCount <= getMinFilesToCompact, return 0.0</li>
575 * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount -
576 * getMinFilesToCompact)</li>
577 * </ul>
578 * <p>
579 * And for striped stores, we should calculate this value by the files in each stripe separately
580 * and return the maximum value.
581 * <p>
582 * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a
583 * linear formula.
584 */
585 double getCompactionPressure();
586
587 /**
588 * Replaces the store files that the store has with the given files. Mainly used by
589 * secondary region replicas to keep up to date with
590 * the primary region files.
591 * @throws IOException
592 */
593 void refreshStoreFiles(Collection<String> newFiles) throws IOException;
594
595 void bulkLoadHFile(StoreFileInfo fileInfo) throws IOException;
596
597 boolean isPrimaryReplicaStore();
598
599 /**
600 * Closes and archives the compacted files under this store
601 */
602 void closeAndArchiveCompactedFiles() throws IOException;
603 }