View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.List;
25  import java.util.Map;
26  
27  import org.apache.commons.collections.MapUtils;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.hbase.ClusterStatus;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.RegionLoad;
33  import org.apache.hadoop.hbase.ScheduledChore;
34  import org.apache.hadoop.hbase.ServerLoad;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.Stoppable;
37  import org.apache.hadoop.hbase.TableName;
38  import org.apache.hadoop.hbase.classification.InterfaceAudience;
39  import org.apache.hadoop.hbase.client.PerClientRandomNonceGenerator;
40  import org.slf4j.Logger;
41  import org.slf4j.LoggerFactory;
42  
43  
44  /**
45   * This chore, every time it runs, will try to recover regions with high store ref count
46   * by reopening them
47   */
48  @InterfaceAudience.Private
49  public class RegionsRecoveryChore extends ScheduledChore {
50  
51    private static final Logger LOG = LoggerFactory.getLogger(RegionsRecoveryChore.class);
52  
53    private static final String REGIONS_RECOVERY_CHORE_NAME = "RegionsRecoveryChore";
54  
55    private static final String ERROR_REOPEN_REIONS_MSG =
56      "Error reopening regions with high storeRefCount. ";
57  
58    private final HMaster hMaster;
59    private final int storeFileRefCountThreshold;
60  
61    private static final PerClientRandomNonceGenerator NONCE_GENERATOR =
62      new PerClientRandomNonceGenerator();
63  
64    /**
65     * Construct RegionsRecoveryChore with provided params
66     *
67     * @param stopper When {@link Stoppable#isStopped()} is true, this chore will cancel and cleanup
68     * @param configuration The configuration params to be used
69     * @param hMaster HMaster instance to initiate RegionTableRegions
70     */
71    RegionsRecoveryChore(final Stoppable stopper, final Configuration configuration,
72        final HMaster hMaster) {
73  
74      super(REGIONS_RECOVERY_CHORE_NAME, stopper, configuration.getInt(
75        HConstants.REGIONS_RECOVERY_INTERVAL, HConstants.DEFAULT_REGIONS_RECOVERY_INTERVAL));
76      this.hMaster = hMaster;
77      this.storeFileRefCountThreshold = configuration.getInt(
78        HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
79        HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);
80  
81    }
82  
83    @Override
84    protected void chore() {
85      if (LOG.isTraceEnabled()) {
86        LOG.trace(
87          "Starting up Regions Recovery chore for reopening regions based on storeFileRefCount...");
88      }
89      try {
90        // only if storeFileRefCountThreshold > 0, consider the feature turned on
91        if (storeFileRefCountThreshold > 0) {
92          final ClusterStatus clusterStatus = hMaster.getClusterStatus();
93          final Map<ServerName, ServerLoad> serverMetricsMap =
94            clusterStatus.getLiveServersLoad();
95          final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap =
96            getTableToRegionsByRefCount(serverMetricsMap);
97          if (MapUtils.isNotEmpty(tableToReopenRegionsMap)) {
98            for (Map.Entry<TableName, List<HRegionInfo>> tableRegionEntry :
99                tableToReopenRegionsMap.entrySet()) {
100             TableName tableName = tableRegionEntry.getKey();
101             List<HRegionInfo> hRegionInfos = tableRegionEntry.getValue();
102             try {
103               LOG.warn("Reopening regions due to high storeFileRefCount. " +
104                 "TableName: {} , noOfRegions: {}", tableName, hRegionInfos.size());
105               hMaster.reopenRegions(tableName, hRegionInfos, NONCE_GENERATOR.getNonceGroup(),
106                 NONCE_GENERATOR.newNonce());
107             } catch (IOException e) {
108               List<String> regionNames = new ArrayList<>();
109               for (HRegionInfo hRegionInfo : hRegionInfos) {
110                 regionNames.add(hRegionInfo.getRegionNameAsString());
111               }
112               LOG.error("{} tableName: {}, regionNames: {}", ERROR_REOPEN_REIONS_MSG,
113                 tableName, regionNames, e);
114             }
115           }
116         }
117       } else {
118         if (LOG.isDebugEnabled()) {
119           LOG.debug("Reopening regions with very high storeFileRefCount is disabled. " +
120               "Provide threshold value > 0 for {} to enable it.",
121             HConstants.STORE_FILE_REF_COUNT_THRESHOLD);
122         }
123       }
124     } catch (Exception e) {
125       LOG.error("Error while reopening regions based on storeRefCount threshold", e);
126     }
127     if (LOG.isTraceEnabled()) {
128       LOG.trace(
129         "Exiting Regions Recovery chore for reopening regions based on storeFileRefCount...");
130     }
131   }
132 
133   private Map<TableName, List<HRegionInfo>> getTableToRegionsByRefCount(
134       final Map<ServerName, ServerLoad> serverMetricsMap) {
135 
136     final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap = new HashMap<>();
137     for (ServerLoad serverLoad : serverMetricsMap.values()) {
138       Map<byte[], RegionLoad> regionLoadsMap = serverLoad.getRegionsLoad();
139       for (RegionLoad regionLoad : regionLoadsMap.values()) {
140         // For each region, each compacted store file can have different ref counts
141         // We need to find maximum of all such ref counts and if that max count of compacted
142         // store files is beyond a threshold value, we should reopen the region.
143         // Here, we take max ref count of all compacted store files and not the cumulative
144         // count of all compacted store files
145         final int maxCompactedStoreFileRefCount = regionLoad
146           .getMaxCompactedStoreFileRefCount();
147 
148         if (maxCompactedStoreFileRefCount > storeFileRefCountThreshold) {
149           final byte[] regionName = regionLoad.getName();
150           prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
151             maxCompactedStoreFileRefCount);
152         }
153       }
154     }
155     return tableToReopenRegionsMap;
156 
157   }
158 
159   private void prepareTableToReopenRegionsMap(
160       final Map<TableName, List<HRegionInfo>> tableToReopenRegionsMap,
161       final byte[] regionName, final int regionStoreRefCount) {
162 
163     final HRegionInfo hRegionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
164     final TableName tableName = hRegionInfo.getTable();
165     if (TableName.META_TABLE_NAME.equals(tableName)) {
166       // Do not reopen regions of meta table even if it has
167       // high store file reference count
168       return;
169     }
170     LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
171       hRegionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
172     if (!tableToReopenRegionsMap.containsKey(tableName)) {
173       tableToReopenRegionsMap.put(tableName, new ArrayList<HRegionInfo>());
174     }
175     tableToReopenRegionsMap.get(tableName).add(hRegionInfo);
176 
177   }
178 
179   // hashcode/equals implementation to ensure at-most one object of RegionsRecoveryChore
180   // is scheduled at a time - RegionsRecoveryConfigManager
181 
182   @Override
183   public boolean equals(Object o) {
184     if (this == o) {
185       return true;
186     }
187     return o != null && getClass() == o.getClass();
188   }
189 
190   @Override
191   public int hashCode() {
192     return 31;
193   }
194 
195 }