View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master.normalizer;
20  
21  import com.google.protobuf.ServiceException;
22  import java.io.IOException;
23  import java.sql.Timestamp;
24  import java.util.ArrayList;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.List;
28  import java.util.concurrent.TimeUnit;
29  import org.apache.commons.collections.CollectionUtils;
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.conf.Configuration;
33  import org.apache.hadoop.hbase.HBaseIOException;
34  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
35  import org.apache.hadoop.hbase.HRegionInfo;
36  import org.apache.hadoop.hbase.HTableDescriptor;
37  import org.apache.hadoop.hbase.RegionLoad;
38  import org.apache.hadoop.hbase.ServerLoad;
39  import org.apache.hadoop.hbase.ServerName;
40  import org.apache.hadoop.hbase.TableName;
41  import org.apache.hadoop.hbase.classification.InterfaceAudience;
42  import org.apache.hadoop.hbase.client.Admin.MasterSwitchType;
43  import org.apache.hadoop.hbase.master.MasterRpcServices;
44  import org.apache.hadoop.hbase.master.MasterServices;
45  import org.apache.hadoop.hbase.master.RegionState;
46  import org.apache.hadoop.hbase.master.RegionStates;
47  import org.apache.hadoop.hbase.protobuf.RequestConverter;
48  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
49  
50  /**
51   * Simple implementation of region normalizer. Logic in use:
52   * <ol>
53   * <li>Get all regions of a given table</li>
54   * <li>Get avg size S of the regions in the table (by total size of store files reported in
55   * RegionMetrics)</li>
56   * <li>For each region R0, if R0 is bigger than S * 2, it is kindly requested to split.</li>
57   * <li>Otherwise, for the next region in the chain R1, if R0 + R1 is smaller then S, R0 and R1 are
58   * kindly requested to merge.</li>
59   * </ol>
60   * Region sizes are coarse and approximate on the order of megabytes. Additionally, "empty" regions
61   * (less than 1MB, with the previous note) are not merged away. This is by design to prevent
62   * normalization from undoing the pre-splitting of a table.
63   */
64  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
65  public class SimpleRegionNormalizer implements RegionNormalizer {
66  
67    private static final Log LOG = LogFactory.getLog(SimpleRegionNormalizer.class);
68    static final String SPLIT_ENABLED_KEY = "hbase.normalizer.split.enabled";
69    static final boolean DEFAULT_SPLIT_ENABLED = true;
70    static final String MERGE_ENABLED_KEY = "hbase.normalizer.merge.enabled";
71    static final boolean DEFAULT_MERGE_ENABLED = true;
72    // TODO: after HBASE-24416, `min.region.count` only applies to merge plans; should
73    // deprecate/rename the configuration key.
74    static final String MIN_REGION_COUNT_KEY = "hbase.normalizer.min.region.count";
75    static final int DEFAULT_MIN_REGION_COUNT = 3;
76    static final String MERGE_MIN_REGION_AGE_DAYS_KEY = "hbase.normalizer.merge.min_region_age.days";
77    static final int DEFAULT_MERGE_MIN_REGION_AGE_DAYS = 3;
78    static final String MERGE_MIN_REGION_SIZE_MB_KEY = "hbase.normalizer.merge.min_region_size.mb";
79    static final int DEFAULT_MERGE_MIN_REGION_SIZE_MB = 1;
80  
81    private final long[] skippedCount;
82    private Configuration conf;
83    private MasterServices masterServices;
84    private MasterRpcServices masterRpcServices;
85    private boolean splitEnabled;
86    private boolean mergeEnabled;
87    private int minRegionCount;
88    private int mergeMinRegionAge;
89    private int mergeMinRegionSizeMb;
90  
91    public SimpleRegionNormalizer() {
92      skippedCount = new long[NormalizationPlan.PlanType.values().length];
93      splitEnabled = DEFAULT_SPLIT_ENABLED;
94      mergeEnabled = DEFAULT_MERGE_ENABLED;
95      minRegionCount = DEFAULT_MIN_REGION_COUNT;
96      mergeMinRegionAge = DEFAULT_MERGE_MIN_REGION_AGE_DAYS;
97      mergeMinRegionSizeMb = DEFAULT_MERGE_MIN_REGION_SIZE_MB;
98    }
99  
100   // Comparator that gives higher priority to region Split plan
101   private Comparator<NormalizationPlan> planComparator = new Comparator<NormalizationPlan>() {
102     @Override
103     public int compare(NormalizationPlan plan, NormalizationPlan plan2) {
104       if (plan instanceof SplitNormalizationPlan) {
105         return -1;
106       }
107       if (plan2 instanceof SplitNormalizationPlan) {
108         return 1;
109       }
110       return 0;
111     }
112   };
113 
114   @Override
115   public void setMasterRpcServices(MasterRpcServices masterRpcServices) {
116     this.masterRpcServices = masterRpcServices;
117   }
118 
119   @Override
120   public Configuration getConf() {
121     return conf;
122   }
123 
124   @Override
125   public void setConf(Configuration conf) {
126     if (conf == null) {
127       return;
128     }
129     this.conf = conf;
130     splitEnabled = conf.getBoolean(SPLIT_ENABLED_KEY, DEFAULT_SPLIT_ENABLED);
131     mergeEnabled = conf.getBoolean(MERGE_ENABLED_KEY, DEFAULT_MERGE_ENABLED);
132     minRegionCount = parseMinRegionCount(conf);
133     mergeMinRegionAge = parseMergeMinRegionAge(conf);
134     mergeMinRegionSizeMb = parseMergeMinRegionSizeMb(conf);
135   }
136 
137   private int parseMergeMinRegionSizeMb(Configuration conf) {
138     final int parsedValue =
139         conf.getInt(MERGE_MIN_REGION_SIZE_MB_KEY, DEFAULT_MERGE_MIN_REGION_SIZE_MB);
140     final int settledValue = Math.max(0, parsedValue);
141     if (parsedValue != settledValue) {
142       warnInvalidValue(MERGE_MIN_REGION_SIZE_MB_KEY, parsedValue, settledValue);
143     }
144     return settledValue;
145   }
146 
147   private int parseMinRegionCount(Configuration conf) {
148     final int parsedValue = conf.getInt(MIN_REGION_COUNT_KEY, DEFAULT_MIN_REGION_COUNT);
149     final int settledValue = Math.max(1, parsedValue);
150     if (parsedValue != settledValue) {
151       warnInvalidValue(MIN_REGION_COUNT_KEY, parsedValue, settledValue);
152     }
153     return settledValue;
154   }
155 
156   private int parseMergeMinRegionAge(Configuration conf) {
157     final int parsedValue =
158         conf.getInt(MERGE_MIN_REGION_AGE_DAYS_KEY, DEFAULT_MERGE_MIN_REGION_AGE_DAYS);
159     final int settledValue = Math.max(0, parsedValue);
160     if (parsedValue != settledValue) {
161       warnInvalidValue(MERGE_MIN_REGION_AGE_DAYS_KEY, parsedValue, settledValue);
162     }
163     return settledValue;
164   }
165 
166   private void warnInvalidValue(final String key, final int parsedValue, final int settledValue) {
167     LOG.warn("Configured value " + key + "=" + parsedValue + " is invalid. Setting value to"
168         + settledValue);
169   }
170 
171   /**
172    * Return configured value for MasterSwitchType.SPLIT.
173    */
174   public boolean isSplitEnabled() {
175     return splitEnabled;
176   }
177 
178   /**
179    * Return configured value for MasterSwitchType.MERGE.
180    */
181   public boolean isMergeEnabled() {
182     return mergeEnabled;
183   }
184 
185   private boolean isMasterSwitchEnabled(MasterSwitchType masterSwitchType) {
186     boolean enabled = false;
187     try {
188       enabled = masterRpcServices.isSplitOrMergeEnabled(null,
189         RequestConverter.buildIsSplitOrMergeEnabledRequest(masterSwitchType)).getEnabled();
190     } catch (ServiceException e) {
191       LOG.debug("Unable to determine whether split or merge is enabled", e);
192     }
193     return enabled;
194   }
195 
196   /**
197    * Return this instance's configured value for {@link #MIN_REGION_COUNT_KEY}.
198    */
199   public int getMinRegionCount() {
200     return minRegionCount;
201   }
202 
203   /**
204    * Return this instance's configured value for {@link #MERGE_MIN_REGION_AGE_DAYS_KEY}.
205    */
206   public int getMergeMinRegionAge() {
207     return mergeMinRegionAge;
208   }
209 
210   /**
211    * Return this instance's configured value for {@link #MERGE_MIN_REGION_SIZE_MB_KEY}.
212    */
213   public int getMergeMinRegionSizeMb() {
214     return mergeMinRegionSizeMb;
215   }
216 
217   /**
218    * Set the master service.
219    * @param masterServices inject instance of MasterServices
220    */
221   @Override
222   public void setMasterServices(final MasterServices masterServices) {
223     this.masterServices = masterServices;
224   }
225 
226   /**
227    * Computes next most "urgent" normalization action on the table. Action may be either a split, or
228    * a merge, or no action.
229    * @param table table to normalize
230    * @return normalization plan to execute
231    */
232   @Override
233   public List<NormalizationPlan> computePlansForTable(TableName table) throws HBaseIOException {
234     if (table == null) {
235       return Collections.emptyList();
236     }
237     if (table.isSystemTable()) {
238       LOG.debug("Normalization of system table " + table + " isn't allowed");
239       return Collections.emptyList();
240     }
241 
242     final boolean proceedWithSplitPlanning = proceedWithSplitPlanning();
243     final boolean proceedWithMergePlanning = proceedWithMergePlanning();
244     if (!proceedWithMergePlanning && !proceedWithSplitPlanning) {
245       LOG.debug("Both split and merge are disabled. Skipping normalization of table: " + table);
246       return Collections.emptyList();
247     }
248 
249     final NormalizeContext ctx = new NormalizeContext(table);
250     if (CollectionUtils.isEmpty(ctx.getTableRegions())) {
251       return Collections.emptyList();
252     }
253 
254     LOG.debug("Computing normalization plan for table:  " + table + ", number of regions: "
255         + ctx.getTableRegions().size());
256 
257     final List<NormalizationPlan> plans = new ArrayList<>();
258     if (proceedWithSplitPlanning) {
259       plans.addAll(computeSplitNormalizationPlans(ctx));
260     }
261     if (proceedWithMergePlanning) {
262       plans.addAll(computeMergeNormalizationPlans(ctx));
263     }
264 
265     LOG.debug("Computed " + plans.size() + " normalization plans for table" + table);
266     return plans;
267   }
268 
269   private boolean proceedWithMergePlanning() {
270     return isMergeEnabled() && isMasterSwitchEnabled(MasterSwitchType.MERGE);
271   }
272 
273   private boolean proceedWithSplitPlanning() {
274     return isSplitEnabled() && isMasterSwitchEnabled(MasterSwitchType.SPLIT);
275   }
276 
277   /**
278    * @param hri used to calculate region size
279    * @return region size in MB and if region is not found than -1
280    */
281   private long getRegionSizeMB(HRegionInfo hri) {
282     ServerName sn =
283         masterServices.getAssignmentManager().getRegionStates().getRegionServerOfRegion(hri);
284     if (sn == null) {
285       LOG.debug(hri.getRegionNameAsString() + " region was not found on any Server");
286       return -1;
287     }
288     ServerLoad load = masterServices.getServerManager().getLoad(sn);
289     if (load == null) {
290       LOG.debug(sn.getServerName() + " was not found in online servers");
291       return -1;
292     }
293     RegionLoad regionLoad = load.getRegionsLoad().get(hri.getRegionName());
294     if (regionLoad == null) {
295       LOG.debug(hri.getRegionNameAsString() + " was not found in RegionsLoad");
296       return -1;
297     }
298     return regionLoad.getStorefileSizeMB();
299   }
300 
301   /**
302    * @param tableRegions regions of table to normalize
303    * @return average region size Also make sure tableRegions contains regions of the same table
304    */
305   private double getAverageRegionSizeMb(List<HRegionInfo> tableRegions) {
306     if (CollectionUtils.isEmpty(tableRegions)) {
307       throw new IllegalStateException(
308           "Cannot calculate average size of a table without any regions.");
309     }
310     final int regionCount = tableRegions.size();
311     long totalSizeMb = 0;
312     // tableRegions.stream().mapToLong(this::getRegionSizeMB).sum();
313 
314     for (HRegionInfo rinfo : tableRegions) {
315       totalSizeMb += getRegionSizeMB(rinfo);
316     }
317     TableName table = tableRegions.get(0).getTable();
318     int targetRegionCount = -1;
319     long targetRegionSize = -1;
320     try {
321       HTableDescriptor tableDescriptor = masterServices.getTableDescriptors().get(table);
322       if (tableDescriptor != null && LOG.isDebugEnabled()) {
323         targetRegionCount = tableDescriptor.getNormalizerTargetRegionCount();
324         targetRegionSize = tableDescriptor.getNormalizerTargetRegionSize();
325         LOG.debug("Table " + table + " configured with target region count" + targetRegionCount
326             + ", target region size " + targetRegionSize);
327       }
328     } catch (IOException e) {
329       LOG.warn(
330         "TableDescriptor for " + table + " unavailable, table-level target region count and size"
331             + " configurations cannot be considered.",
332         e);
333     }
334 
335     double avgRegionSize;
336     if (targetRegionSize > 0) {
337       avgRegionSize = targetRegionSize;
338     } else if (targetRegionCount > 0) {
339       avgRegionSize = totalSizeMb / (double) targetRegionCount;
340     } else {
341       avgRegionSize = totalSizeMb / (double) regionCount;
342     }
343 
344     LOG.debug("Table " + table + ", total aggregated regions size: " + totalSizeMb
345         + " and average region size " + avgRegionSize);
346     return avgRegionSize;
347   }
348 
349   /**
350    * Determine if a {@link HRegionInfo} should be considered for a merge operation.
351    */
352   private boolean skipForMerge(final RegionStates regionStates, final HRegionInfo regionInfo) {
353     boolean regionIsOpen = regionStates.isRegionInState(regionInfo, RegionState.State.OPEN);
354     final String name = regionInfo.getEncodedName();
355     if (!regionIsOpen) {
356       LOG.trace("skipping merge of region " + name + " because it is not open");
357       return true;
358     }
359     if (!isOldEnoughForMerge(regionInfo)) {
360       LOG.trace("skipping merge of region " + name + " because it is not old enough.");
361       return true;
362     }
363     if (!isLargeEnoughForMerge(regionInfo)) {
364       LOG.trace("skipping merge region " + name + " because it is not large enough.");
365       return true;
366     }
367     return false;
368   }
369 
370   /**
371    * Computes the merge plans that should be executed for this table to converge average region
372    * towards target average or target region count.
373    */
374   private List<NormalizationPlan> computeMergeNormalizationPlans(final NormalizeContext ctx) {
375     if (ctx.getTableRegions().size() < minRegionCount) {
376       LOG.debug("Table " + ctx.getTableName() + " has " + ctx.getTableRegions().size()
377           + " regions, required min number of regions for normalizer to run" + " is "
378           + minRegionCount + ", not computing merge plans.");
379       return Collections.emptyList();
380     }
381 
382     final double avgRegionSizeMb = ctx.getAverageRegionSizeMb();
383     LOG.debug(
384       "Computing normalization plan for table " + ctx.getTableName() + ". average region size: "
385           + avgRegionSizeMb + ", number of" + " regions: " + ctx.getTableRegions().size());
386 
387     final List<NormalizationPlan> plans = new ArrayList<>();
388     for (int candidateIdx = 0; candidateIdx < ctx.getTableRegions().size() - 1; candidateIdx++) {
389       final HRegionInfo current = ctx.getTableRegions().get(candidateIdx);
390       final HRegionInfo next = ctx.getTableRegions().get(candidateIdx + 1);
391       if (skipForMerge(ctx.getRegionStates(), current)
392           || skipForMerge(ctx.getRegionStates(), next)) {
393         continue;
394       }
395       final long currentSizeMb = getRegionSizeMB(current);
396       final long nextSizeMb = getRegionSizeMB(next);
397       if (currentSizeMb + nextSizeMb < avgRegionSizeMb) {
398         plans.add(new MergeNormalizationPlan(current, next));
399         candidateIdx++;
400       }
401     }
402     return plans;
403   }
404 
405   /**
406    * Computes the split plans that should be executed for this table to converge average region size
407    * towards target average or target region count. <br />
408    * if the region is > 2 times larger than average, we split it. split is more high priority
409    * normalization action than merge.
410    */
411   private List<NormalizationPlan> computeSplitNormalizationPlans(final NormalizeContext ctx) {
412     final double avgRegionSize = ctx.getAverageRegionSizeMb();
413     TableName tableName = ctx.getTableName();
414     LOG.debug("Table " + tableName + ", average region size: " + avgRegionSize);
415 
416     final List<NormalizationPlan> plans = new ArrayList<>();
417     for (final HRegionInfo hri : ctx.getTableRegions()) {
418       boolean regionIsOpen = ctx.getRegionStates().isRegionInState(hri, RegionState.State.OPEN);
419       if (!regionIsOpen) {
420         continue;
421       }
422       final long regionSize = getRegionSizeMB(hri);
423       if (regionSize > 2 * avgRegionSize) {
424         LOG.info(
425           "Table " + tableName + ", large region " + hri.getRegionNameAsString() + " has size "
426               + regionSize + ", more than twice avg size " + avgRegionSize + ", splitting");
427         plans.add(new SplitNormalizationPlan(hri, null));
428       }
429     }
430     return plans;
431   }
432 
433   /**
434    * Return {@code true} when {@code regionInfo} has a creation date that is old enough to be
435    * considered for a merge operation, {@code false} otherwise.
436    */
437   private boolean isOldEnoughForMerge(final HRegionInfo regionInfo) {
438     final Timestamp currentTime = new Timestamp(EnvironmentEdgeManager.currentTime());
439     final Timestamp regionCreateTime = new Timestamp(regionInfo.getRegionId());
440     return new Timestamp(regionCreateTime.getTime() + TimeUnit.DAYS.toMillis(mergeMinRegionAge))
441         .before(currentTime);
442   }
443 
444   /**
445    * Return {@code true} when {@code regionInfo} has a size that is sufficient to be considered for
446    * a merge operation, {@code false} otherwise.
447    */
448   private boolean isLargeEnoughForMerge(final HRegionInfo regionInfo) {
449     return getRegionSizeMB(regionInfo) >= mergeMinRegionSizeMb;
450   }
451 
452   /**
453    * Inner class caries the state necessary to perform a single invocation of
454    * {@link #computePlansForTable(TableName)}. Grabbing this data from the assignment manager
455    * up-front allows any computed values to be realized just once.
456    */
457   private class NormalizeContext {
458     private final TableName tableName;
459     private final RegionStates regionStates;
460     private final List<HRegionInfo> tableRegions;
461     private final double averageRegionSizeMb;
462 
463     public NormalizeContext(final TableName tableName) {
464       this.tableName = tableName;
465       regionStates =
466           SimpleRegionNormalizer.this.masterServices.getAssignmentManager().getRegionStates();
467       tableRegions = regionStates.getRegionsOfTable(tableName);
468       // The list of regionInfo from getRegionsOfTable() is ordered by regionName.
469       // regionName does not necessary guarantee the order by STARTKEY (let's say 'aa1', 'aa1!',
470       // in order by regionName, it will be 'aa1!' followed by 'aa1').
471       // This could result in normalizer merging non-adjacent regions into one and creates overlaps.
472       // In order to avoid that, sort the list by RegionInfo.COMPARATOR.
473       // See HBASE-24376
474       Collections.sort(tableRegions);
475       averageRegionSizeMb = SimpleRegionNormalizer.this.getAverageRegionSizeMb(this.tableRegions);
476     }
477 
478     public TableName getTableName() {
479       return tableName;
480     }
481 
482     public RegionStates getRegionStates() {
483       return regionStates;
484     }
485 
486     public List<HRegionInfo> getTableRegions() {
487       return tableRegions;
488     }
489 
490     public double getAverageRegionSizeMb() {
491       return averageRegionSizeMb;
492     }
493   }
494 }