View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
3    * agreements. See the NOTICE file distributed with this work for additional information regarding
4    * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
5    * "License"); you may not use this file except in compliance with the License. You may obtain a
6    * copy of the License at
7    * <p>
8    * http://www.apache.org/licenses/LICENSE-2.0
9    * <p>
10   * Unless required by applicable law or agreed to in writing, software distributed under the License
11   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
12   * or implied. See the License for the specific language governing permissions and limitations under
13   * the License.
14   */
15  package org.apache.hadoop.hbase.master.balancer;
16  
17  import java.io.BufferedReader;
18  import java.io.FileReader;
19  import java.io.IOException;
20  import java.io.InputStreamReader;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.regex.Pattern;
26  import java.util.regex.PatternSyntaxException;
27  
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.ServerName;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  
35  /**
36   * This is an optional Cost function designed to allow region count skew across RegionServers. A
37   * rule file is loaded from the local FS or HDFS before balancing. It contains lines of rules. A
38   * rule is composed of a regexp for hostname, and a limit. For example, we could have:
39   * <p>
40   * * rs[0-9] 200 * rs1[0-9] 50
41   * </p>
42   * RegionServers with hostname matching the first rules will have a limit of 200, and the others 50.
43   * If there's no match, a default is set. The costFunction is trying to fill all RegionServers
44   * linearly, meaning that if the global usage is at 50%, then all RegionServers should hold half of
45   * their capacity in terms of regions. In order to use this CostFunction, you need to set the
46   * following options:
47   * <ul>
48   * <li>hbase.master.balancer.stochastic.additionalCostFunctions</li>
49   * <li>hbase.master.balancer.stochastic.heterogeneousRegionCountRulesFile</li>
50   * <li>hbase.master.balancer.stochastic.heterogeneousRegionCountDefault</li>
51   * </ul>
52   * The rule file can be located on local FS or HDFS, depending on the prefix (file//: or hdfs://).
53   */
54  public class HeterogeneousRegionCountCostFunction extends StochasticLoadBalancer.CostFunction {
55  
56    /**
57     * configuration used for the path where the rule file is stored.
58     */
59    static final String HBASE_MASTER_BALANCER_HETEROGENEOUS_RULES_FILE =
60        "hbase.master.balancer.heterogeneousRegionCountRulesFile";
61    private static final Logger LOG =
62        LoggerFactory.getLogger(HeterogeneousRegionCountCostFunction.class);
63    /**
64     * Default rule to apply when the rule file is not found. Default to 200.
65     */
66    private static final String HBASE_MASTER_BALANCER_HETEROGENEOUS_RULES_DEFAULT =
67        "hbase.master.balancer.heterogeneousRegionCountDefault";
68    /**
69     * Cost for the function. Default to 500, can be changed.
70     */
71    private static final String REGION_COUNT_SKEW_COST_KEY =
72        "hbase.master.balancer.stochastic.heterogeneousRegionCountCost";
73    private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
74    private final String rulesPath;
75  
76    /**
77     * Contains the rules, key is the regexp for ServerName, value is the limit
78     */
79    private final Map<Pattern, Integer> limitPerRule;
80  
81    /**
82     * This is a cache, used to not go through all the limitPerRule map when searching for limit
83     */
84    private final Map<ServerName, Integer> limitPerRS;
85    private final Configuration conf;
86    private int defaultNumberOfRegions;
87  
88    /**
89     * Total capacity of regions for the cluster, based on the online RS and their associated rules
90     */
91    private int totalCapacity = 0;
92    double overallUsage;
93  
94    public HeterogeneousRegionCountCostFunction(Configuration conf) {
95      super(conf);
96      this.conf = conf;
97      this.limitPerRS = new HashMap<>();
98      this.limitPerRule = new HashMap<>();
99      this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
100     this.rulesPath = conf.get(HBASE_MASTER_BALANCER_HETEROGENEOUS_RULES_FILE);
101     this.defaultNumberOfRegions =
102         conf.getInt(HBASE_MASTER_BALANCER_HETEROGENEOUS_RULES_DEFAULT, 200);
103 
104     if (this.defaultNumberOfRegions < 0) {
105       LOG.warn("invalid configuration '" + HBASE_MASTER_BALANCER_HETEROGENEOUS_RULES_DEFAULT
106           + "'. Setting default to 200");
107       this.defaultNumberOfRegions = 200;
108     }
109     if (conf.getFloat(StochasticLoadBalancer.RegionCountSkewCostFunction.REGION_COUNT_SKEW_COST_KEY,
110       StochasticLoadBalancer.RegionCountSkewCostFunction.DEFAULT_REGION_COUNT_SKEW_COST) > 0) {
111       LOG.warn("regionCountCost is not set to 0, "
112           + " this will interfere with the HeterogeneousRegionCountCostFunction!");
113     }
114   }
115 
116   /**
117    * Called once per LB invocation to give the cost function to initialize it's state, and perform
118    * any costly calculation.
119    */
120   @Override
121   void init(final BaseLoadBalancer.Cluster cluster) {
122     this.cluster = cluster;
123     this.loadRules();
124   }
125 
126   @Override
127   protected double cost() {
128     double cost = 0;
129     final double targetUsage = ((double) this.cluster.numRegions / (double) this.totalCapacity);
130 
131     for (int i = 0; i < this.cluster.numServers; i++) {
132       // retrieve capacity for each RS
133       final ServerName sn = this.cluster.servers[i];
134       double limit;
135       if (this.limitPerRS.containsKey(sn)) {
136         limit = this.limitPerRS.get(sn);
137       } else {
138         limit = defaultNumberOfRegions;
139       }
140       final double nbrRegions = this.cluster.regionsPerServer[i].length;
141       final double usage = nbrRegions / limit;
142       if (usage > targetUsage) {
143         // cost is the number of regions above the local limit
144         final double localCost = (nbrRegions - Math.round(limit * targetUsage)) / limit;
145         cost += localCost;
146       }
147     }
148 
149     return cost / (double) this.cluster.numServers;
150   }
151 
152   /**
153    * used to load the rule files.
154    */
155   void loadRules() {
156     final List<String> lines = readFile(this.rulesPath);
157     if (null == lines) {
158       LOG.warn("cannot load rules file, keeping latest rules file which has "
159           + this.limitPerRule.size() + " rules");
160       return;
161     }
162 
163     LOG.info("loading rules file '" + this.rulesPath + "'");
164     this.limitPerRule.clear();
165     for (final String line : lines) {
166       try {
167         if (line.length() == 0) {
168           continue;
169         }
170         if (line.startsWith("#")) {
171           continue;
172         }
173         final String[] splits = line.split(" ");
174         if (splits.length != 2) {
175           throw new IOException(
176               "line '" + line + "' is malformated, " + "expected [regexp] [limit]. Skipping line");
177         }
178 
179         final Pattern pattern = Pattern.compile(splits[0]);
180         final Integer limit = Integer.parseInt(splits[1]);
181         this.limitPerRule.put(pattern, limit);
182       } catch (final IOException | NumberFormatException | PatternSyntaxException e) {
183         LOG.error("error on line: " + e);
184       }
185     }
186     this.rebuildCache();
187   }
188 
189   /**
190    * used to read the rule files from either HDFS or local FS
191    */
192   private List<String> readFile(final String filename) {
193     if (null == filename) {
194       return null;
195     }
196     try {
197       if (filename.startsWith("file:")) {
198         return readFileFromLocalFS(filename);
199       }
200       return readFileFromHDFS(filename);
201     } catch (IOException e) {
202       LOG.error("cannot read rules file located at ' " + filename + " ':" + e.getMessage());
203       return null;
204     }
205   }
206 
207   /**
208    * used to read the rule files from HDFS
209    */
210   private List<String> readFileFromHDFS(final String filename) throws IOException {
211     final Path path = new Path(filename);
212     final FileSystem fs = FileSystem.get(this.conf);
213     final BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(path)));
214     return readLines(reader);
215   }
216 
217   /**
218    * used to read the rule files from local FS
219    */
220   private List<String> readFileFromLocalFS(final String filename) throws IOException {
221     BufferedReader reader = new BufferedReader(new FileReader(filename));
222     return readLines(reader);
223   }
224 
225   private List<String> readLines(BufferedReader reader) throws IOException {
226     final List<String> records = new ArrayList<>();
227     try {
228       String line;
229       while ((line = reader.readLine()) != null) {
230         records.add(line);
231       }
232     } finally {
233       reader.close();
234     }
235     return records;
236   }
237 
238   /**
239    * Rebuild cache matching ServerNames and their capacity.
240    */
241   private void rebuildCache() {
242     LOG.debug("Rebuilding cache of capacity for each RS");
243     this.limitPerRS.clear();
244     this.totalCapacity = 0;
245     if (null == this.cluster) {
246       return;
247     }
248     for (int i = 0; i < this.cluster.numServers; i++) {
249       final ServerName sn = this.cluster.servers[i];
250       final int capacity = this.findLimitForRS(sn);
251       LOG.debug(sn.getHostname() + " can hold " + capacity + " regions");
252       this.totalCapacity += capacity;
253     }
254     overallUsage = (double) this.cluster.numRegions / (double) this.totalCapacity;
255     LOG.info("Cluster can hold " + this.cluster.numRegions + "/" + this.totalCapacity + " regions ("
256         + Math.round(overallUsage * 100) + "%)");
257     if (overallUsage >= 1) {
258       LOG.warn("Cluster is overused");
259     }
260   }
261 
262   /**
263    * Find the limit for a ServerName. If not found then return the default value
264    * @param serverName the server we are looking for
265    * @return the limit
266    */
267   int findLimitForRS(final ServerName serverName) {
268     boolean matched = false;
269     int limit = -1;
270     for (final Map.Entry<Pattern, Integer> entry : this.limitPerRule.entrySet()) {
271       if (entry.getKey().matcher(serverName.getHostname()).matches()) {
272         matched = true;
273         limit = entry.getValue();
274         break;
275       }
276     }
277     if (!matched) {
278       limit = this.defaultNumberOfRegions;
279     }
280     // Feeding cache
281     this.limitPerRS.put(serverName, limit);
282     return limit;
283   }
284 
285   int getNumberOfRulesLoaded() {
286     return this.limitPerRule.size();
287   }
288 }