View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.client;
19  
20  import com.google.protobuf.Descriptors.MethodDescriptor;
21  import com.google.protobuf.Message;
22  import com.yammer.metrics.core.Counter;
23  import com.yammer.metrics.core.Histogram;
24  import com.yammer.metrics.core.MetricName;
25  import com.yammer.metrics.core.MetricsRegistry;
26  import com.yammer.metrics.core.Timer;
27  import com.yammer.metrics.reporting.JmxReporter;
28  import com.yammer.metrics.util.RatioGauge;
29  import org.apache.hadoop.hbase.ServerName;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
32  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
33  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest;
34  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
35  import org.apache.hadoop.hbase.util.Bytes;
36  
37  import java.util.concurrent.ConcurrentHashMap;
38  import java.util.concurrent.ConcurrentSkipListMap;
39  import java.util.concurrent.ConcurrentMap;
40  import java.util.concurrent.ThreadPoolExecutor;
41  import java.util.concurrent.TimeUnit;
42  
43  /**
44   * This class is for maintaining the various connection statistics and publishing them through
45   * the metrics interfaces.
46   *
47   * This class manages its own {@link MetricsRegistry} and {@link JmxReporter} so as to not
48   * conflict with other uses of Yammer Metrics within the client application. Instantiating
49   * this class implicitly creates and "starts" instances of these classes; be sure to call
50   * {@link #shutdown()} to terminate the thread pools they allocate.
51   */
52  @InterfaceAudience.Private
53  public class MetricsConnection implements StatisticTrackable {
54  
55    /** Set this key to {@code true} to enable metrics collection of client requests. */
56    public static final String CLIENT_SIDE_METRICS_ENABLED_KEY = "hbase.client.metrics.enable";
57  
58    private static final String CNT_BASE = "rpcCount_";
59    private static final String DRTN_BASE = "rpcCallDurationMs_";
60    private static final String REQ_BASE = "rpcCallRequestSizeBytes_";
61    private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
62    private static final String MEMLOAD_BASE = "memstoreLoad_";
63    private static final String HEAP_BASE = "heapOccupancy_";
64    private static final String CACHE_BASE = "cacheDroppingExceptions_";
65    private static final String UNKNOWN_EXCEPTION = "UnknownException";
66    private static final String NS_LOOKUPS = "nsLookups";
67    private static final String NS_LOOKUPS_FAILED = "nsLookupsFailed";
68    private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
69  
70    /** A container class for collecting details about the RPC call as it percolates. */
71    public static class CallStats {
72      private long requestSizeBytes = 0;
73      private long responseSizeBytes = 0;
74      private long startTime = 0;
75      private long callTimeMs = 0;
76      private int concurrentCallsPerServer = 0;
77  
78      public long getRequestSizeBytes() {
79        return requestSizeBytes;
80      }
81  
82      public void setRequestSizeBytes(long requestSizeBytes) {
83        this.requestSizeBytes = requestSizeBytes;
84      }
85  
86      public long getResponseSizeBytes() {
87        return responseSizeBytes;
88      }
89  
90      public void setResponseSizeBytes(long responseSizeBytes) {
91        this.responseSizeBytes = responseSizeBytes;
92      }
93  
94      public long getStartTime() {
95        return startTime;
96      }
97  
98      public void setStartTime(long startTime) {
99        this.startTime = startTime;
100     }
101 
102     public long getCallTimeMs() {
103       return callTimeMs;
104     }
105 
106     public void setCallTimeMs(long callTimeMs) {
107       this.callTimeMs = callTimeMs;
108     }
109 
110     public int getConcurrentCallsPerServer() {
111       return concurrentCallsPerServer;
112     }
113 
114     public void setConcurrentCallsPerServer(int callsPerServer) {
115       this.concurrentCallsPerServer = callsPerServer;
116     }
117   }
118 
119   protected static final class CallTracker {
120     private final String name;
121     final Timer callTimer;
122     final Histogram reqHist;
123     final Histogram respHist;
124 
125     private CallTracker(MetricsRegistry registry, String name, String subName, String scope) {
126       StringBuilder sb = new StringBuilder(CLIENT_SVC).append("_").append(name);
127       if (subName != null) {
128         sb.append("(").append(subName).append(")");
129       }
130       this.name = sb.toString();
131       this.callTimer = registry.newTimer(MetricsConnection.class, DRTN_BASE + this.name, scope);
132       this.reqHist = registry.newHistogram(MetricsConnection.class, REQ_BASE + this.name, scope);
133       this.respHist = registry.newHistogram(MetricsConnection.class, RESP_BASE + this.name, scope);
134     }
135 
136     private CallTracker(MetricsRegistry registry, String name, String scope) {
137       this(registry, name, null, scope);
138     }
139 
140     public void updateRpc(CallStats stats) {
141       this.callTimer.update(stats.getCallTimeMs(), TimeUnit.MILLISECONDS);
142       this.reqHist.update(stats.getRequestSizeBytes());
143       this.respHist.update(stats.getResponseSizeBytes());
144     }
145 
146     @Override
147     public String toString() {
148       return "CallTracker:" + name;
149     }
150   }
151 
152   protected static class RegionStats {
153     final String name;
154     final Histogram memstoreLoadHist;
155     final Histogram heapOccupancyHist;
156 
157     public RegionStats(MetricsRegistry registry, String name) {
158       this.name = name;
159       this.memstoreLoadHist = registry.newHistogram(MetricsConnection.class,
160           MEMLOAD_BASE + this.name);
161       this.heapOccupancyHist = registry.newHistogram(MetricsConnection.class,
162           HEAP_BASE + this.name);
163     }
164 
165     public void update(ClientProtos.RegionLoadStats regionStatistics) {
166       this.memstoreLoadHist.update(regionStatistics.getMemstoreLoad());
167       this.heapOccupancyHist.update(regionStatistics.getHeapOccupancy());
168     }
169   }
170 
171   protected static class RunnerStats {
172     final Counter normalRunners;
173     final Counter delayRunners;
174     final Histogram delayIntevalHist;
175 
176     public RunnerStats(MetricsRegistry registry) {
177       this.normalRunners = registry.newCounter(MetricsConnection.class, "normalRunnersCount");
178       this.delayRunners = registry.newCounter(MetricsConnection.class, "delayRunnersCount");
179       this.delayIntevalHist = registry.newHistogram(MetricsConnection.class, "delayIntervalHist");
180     }
181 
182     public void incrNormalRunners() {
183       this.normalRunners.inc();
184     }
185 
186     public void incrDelayRunners() {
187       this.delayRunners.inc();
188     }
189 
190     public void updateDelayInterval(long interval) {
191       this.delayIntevalHist.update(interval);
192     }
193   }
194 
195   protected ConcurrentHashMap<ServerName, ConcurrentMap<byte[], RegionStats>> serverStats
196           = new ConcurrentHashMap<ServerName, ConcurrentMap<byte[], RegionStats>>();
197 
198   public void updateServerStats(ServerName serverName, byte[] regionName,
199                                 Object r) {
200     if (!(r instanceof Result)) {
201       return;
202     }
203     Result result = (Result) r;
204     ClientProtos.RegionLoadStats stats = result.getStats();
205     if (stats == null) {
206       return;
207     }
208     updateRegionStats(serverName, regionName, stats);
209   }
210 
211   @Override
212   public void updateRegionStats(ServerName serverName, byte[] regionName,
213     ClientProtos.RegionLoadStats stats) {
214     String name = serverName.getServerName() + "," + Bytes.toStringBinary(regionName);
215     ConcurrentMap<byte[], RegionStats> rsStats = null;
216     if (serverStats.containsKey(serverName)) {
217       rsStats = serverStats.get(serverName);
218     } else {
219       rsStats = serverStats.putIfAbsent(serverName,
220           new ConcurrentSkipListMap<byte[], RegionStats>(Bytes.BYTES_COMPARATOR));
221       if (rsStats == null) {
222         rsStats = serverStats.get(serverName);
223       }
224     }
225     RegionStats regionStats = null;
226     if (rsStats.containsKey(regionName)) {
227       regionStats = rsStats.get(regionName);
228     } else {
229       regionStats = rsStats.putIfAbsent(regionName, new RegionStats(this.registry, name));
230       if (regionStats == null) {
231         regionStats = rsStats.get(regionName);
232       }
233     }
234     regionStats.update(stats);
235   }
236 
237 
238   /** A lambda for dispatching to the appropriate metric factory method */
239   private static interface NewMetric<T> {
240     T newMetric(Class<?> clazz, String name, String scope);
241   }
242 
243   /** Anticipated number of metric entries */
244   private static final int CAPACITY = 50;
245   /** Default load factor from {@link java.util.HashMap#DEFAULT_LOAD_FACTOR} */
246   private static final float LOAD_FACTOR = 0.75f;
247   /**
248    * Anticipated number of concurrent accessor threads, from
249    * {@link ConnectionManager.HConnectionImplementation#getBatchPool()}
250    */
251   private static final int CONCURRENCY_LEVEL = 256;
252 
253   private final MetricsRegistry registry;
254   private final JmxReporter reporter;
255   private final String scope;
256 
257   private final NewMetric<Timer> timerFactory = new NewMetric<Timer>() {
258     @Override public Timer newMetric(Class<?> clazz, String name, String scope) {
259       return registry.newTimer(clazz, name, scope);
260     }
261   };
262 
263   private final NewMetric<Histogram> histogramFactory = new NewMetric<Histogram>() {
264     @Override public Histogram newMetric(Class<?> clazz, String name, String scope) {
265       return registry.newHistogram(clazz, name, scope);
266     }
267   };
268 
269   private final NewMetric<Counter> counterFactory = new NewMetric<Counter>() {
270     @Override public Counter newMetric(Class<?> clazz, String name, String scope) {
271       return registry.newCounter(clazz, name, scope);
272     }
273   };
274 
275   // static metrics
276 
277   protected final Counter metaCacheHits;
278   protected final Counter metaCacheMisses;
279   protected final CallTracker getTracker;
280   protected final CallTracker scanTracker;
281   protected final CallTracker appendTracker;
282   protected final CallTracker deleteTracker;
283   protected final CallTracker incrementTracker;
284   protected final CallTracker putTracker;
285   protected final CallTracker multiTracker;
286   protected final RunnerStats runnerStats;
287   protected final Counter metaCacheNumClearServer;
288   protected final Counter metaCacheNumClearRegion;
289   protected final Counter hedgedReadOps;
290   protected final Counter hedgedReadWin;
291   protected final Histogram concurrentCallsPerServerHist;
292   protected final Counter nsLookups;
293   protected final Counter nsLookupsFailed;
294 
295   // dynamic metrics
296 
297   // These maps are used to cache references to the metric instances that are managed by the
298   // registry. I don't think their use perfectly removes redundant allocations, but it's
299   // a big improvement over calling registry.newMetric each time.
300   protected final ConcurrentMap<String, Timer> rpcTimers =
301       new ConcurrentHashMap<>(CAPACITY, LOAD_FACTOR, CONCURRENCY_LEVEL);
302   protected final ConcurrentMap<String, Histogram> rpcHistograms =
303       new ConcurrentHashMap<>(CAPACITY * 2 /* tracking both request and response sizes */,
304           LOAD_FACTOR, CONCURRENCY_LEVEL);
305   private final ConcurrentMap<String, Counter> cacheDroppingExceptions =
306     new ConcurrentHashMap<>(CAPACITY, LOAD_FACTOR, CONCURRENCY_LEVEL);
307   protected final ConcurrentMap<String, Counter>  rpcCounters =
308       new ConcurrentHashMap<>(CAPACITY, LOAD_FACTOR, CONCURRENCY_LEVEL);
309 
310   public MetricsConnection(final ConnectionManager.HConnectionImplementation conn) {
311     this.scope = conn.toString();
312     this.registry = new MetricsRegistry();
313 
314     this.registry.newGauge(getExecutorPoolName(),
315         new RatioGauge() {
316           @Override protected double getNumerator() {
317             ThreadPoolExecutor batchPool = (ThreadPoolExecutor) conn.getCurrentBatchPool();
318             if (batchPool == null) {
319               return 0;
320             }
321             return batchPool.getActiveCount();
322           }
323           @Override protected double getDenominator() {
324             ThreadPoolExecutor batchPool = (ThreadPoolExecutor) conn.getCurrentBatchPool();
325             if (batchPool == null) {
326               return 0;
327             }
328             return batchPool.getMaximumPoolSize();
329           }
330         });
331     this.registry.newGauge(getMetaPoolName(),
332         new RatioGauge() {
333           @Override protected double getNumerator() {
334             ThreadPoolExecutor metaPool = (ThreadPoolExecutor) conn.getCurrentMetaLookupPool();
335             if (metaPool == null) {
336               return 0;
337             }
338             return metaPool.getActiveCount();
339           }
340           @Override protected double getDenominator() {
341             ThreadPoolExecutor metaPool = (ThreadPoolExecutor) conn.getCurrentMetaLookupPool();
342             if (metaPool == null) {
343               return 0;
344             }
345             return metaPool.getMaximumPoolSize();
346           }
347         });
348     this.metaCacheHits = registry.newCounter(this.getClass(), "metaCacheHits", scope);
349     this.metaCacheMisses = registry.newCounter(this.getClass(), "metaCacheMisses", scope);
350     this.metaCacheNumClearServer = registry.newCounter(this.getClass(),
351       "metaCacheNumClearServer", scope);
352     this.metaCacheNumClearRegion = registry.newCounter(this.getClass(),
353       "metaCacheNumClearRegion", scope);
354     this.hedgedReadOps = registry.newCounter(this.getClass(), "hedgedReadOps", scope);
355     this.hedgedReadWin = registry.newCounter(this.getClass(), "hedgedReadWin", scope);
356     this.getTracker = new CallTracker(this.registry, "Get", scope);
357     this.scanTracker = new CallTracker(this.registry, "Scan", scope);
358     this.appendTracker = new CallTracker(this.registry, "Mutate", "Append", scope);
359     this.deleteTracker = new CallTracker(this.registry, "Mutate", "Delete", scope);
360     this.incrementTracker = new CallTracker(this.registry, "Mutate", "Increment", scope);
361     this.putTracker = new CallTracker(this.registry, "Mutate", "Put", scope);
362     this.multiTracker = new CallTracker(this.registry, "Multi", scope);
363     this.runnerStats = new RunnerStats(this.registry);
364     this.concurrentCallsPerServerHist = registry.newHistogram(this.getClass(), 
365       "concurrentCallsPerServer", scope);
366     this.nsLookups = registry.newCounter(this.getClass(), NS_LOOKUPS, scope);
367     this.nsLookupsFailed = registry.newCounter(this.getClass(), NS_LOOKUPS_FAILED, scope);
368 
369     this.reporter = new JmxReporter(this.registry);
370     this.reporter.start();
371   }
372 
373   final MetricName getExecutorPoolName() {
374     return new MetricName(getClass(), "executorPoolActiveThreads", scope);
375   }
376 
377   final MetricName getMetaPoolName() {
378     return new MetricName(getClass(), "metaPoolActiveThreads", scope);
379   }
380 
381   MetricsRegistry getMetricsRegistry() {
382     return registry;
383   }
384 
385   public void shutdown() {
386     this.reporter.shutdown();
387     this.registry.shutdown();
388   }
389 
390   /** Produce an instance of {@link CallStats} for clients to attach to RPCs. */
391   public static CallStats newCallStats() {
392     // TODO: instance pool to reduce GC?
393     return new CallStats();
394   }
395 
396   /** Increment the number of meta cache hits. */
397   public void incrMetaCacheHit() {
398     metaCacheHits.inc();
399   }
400 
401   /** Increment the number of meta cache misses. */
402   public void incrMetaCacheMiss() {
403     metaCacheMisses.inc();
404   }
405 
406   /** Increment the number of meta cache drops requested for entire RegionServer. */
407   public void incrMetaCacheNumClearServer() {
408     metaCacheNumClearServer.inc();
409   }
410 
411   /** Increment the number of meta cache drops requested for individual region. */
412   public void incrMetaCacheNumClearRegion() {
413     metaCacheNumClearRegion.inc();
414   }
415 
416   /** Increment the number of hedged read that have occurred. */
417   public void incrHedgedReadOps() {
418     hedgedReadOps.inc();
419   }
420 
421   /** Increment the number of hedged read returned faster than the original read. */
422   public void incrHedgedReadWin() {
423     hedgedReadWin.inc();
424   }
425 
426   /** Increment the number of normal runner counts. */
427   public void incrNormalRunners() {
428     this.runnerStats.incrNormalRunners();
429   }
430 
431   /** Increment the number of delay runner counts. */
432   public void incrDelayRunners() {
433     this.runnerStats.incrDelayRunners();
434   }
435 
436   /** Update delay interval of delay runner. */
437   public void updateDelayInterval(long interval) {
438     this.runnerStats.updateDelayInterval(interval);
439   }
440 
441   /**
442    * Get a metric for {@code key} from {@code map}, or create it with {@code factory}.
443    */
444   private <T> T getMetric(String key, ConcurrentMap<String, T> map, NewMetric<T> factory) {
445     T t = map.get(key);
446     if (t == null) {
447       t = factory.newMetric(this.getClass(), key, scope);
448       T tmp = map.putIfAbsent(key, t);
449       t = (tmp == null) ? t : tmp;
450     }
451     return t;
452   }
453 
454   /** Update call stats for non-critical-path methods */
455   private void updateRpcGeneric(String methodName, CallStats stats) {
456     getMetric(DRTN_BASE + methodName, rpcTimers, timerFactory)
457         .update(stats.getCallTimeMs(), TimeUnit.MILLISECONDS);
458     getMetric(REQ_BASE + methodName, rpcHistograms, histogramFactory)
459         .update(stats.getRequestSizeBytes());
460     getMetric(RESP_BASE + methodName, rpcHistograms, histogramFactory)
461         .update(stats.getResponseSizeBytes());
462   }
463 
464   /** Report RPC context to metrics system. */
465   public void updateRpc(MethodDescriptor method, Message param, CallStats stats) {
466     int callsPerServer = stats.getConcurrentCallsPerServer();
467     if (callsPerServer > 0) {
468       concurrentCallsPerServerHist.update(callsPerServer);
469     }
470     // Update the counter that tracks RPCs by type.
471     final String methodName = method.getService().getName() + "_" + method.getName();
472     getMetric(CNT_BASE + methodName, rpcCounters, counterFactory).inc();
473     // this implementation is tied directly to protobuf implementation details. would be better
474     // if we could dispatch based on something static, ie, request Message type.
475     if (method.getService() == ClientService.getDescriptor()) {
476       switch(method.getIndex()) {
477       case 0:
478         assert "Get".equals(method.getName());
479         getTracker.updateRpc(stats);
480         return;
481       case 1:
482         assert "Mutate".equals(method.getName());
483         final MutationType mutationType = ((MutateRequest) param).getMutation().getMutateType();
484         switch(mutationType) {
485         case APPEND:
486           appendTracker.updateRpc(stats);
487           return;
488         case DELETE:
489           deleteTracker.updateRpc(stats);
490           return;
491         case INCREMENT:
492           incrementTracker.updateRpc(stats);
493           return;
494         case PUT:
495           putTracker.updateRpc(stats);
496           return;
497         default:
498           throw new RuntimeException("Unrecognized mutation type " + mutationType);
499         }
500       case 2:
501         assert "Scan".equals(method.getName());
502         scanTracker.updateRpc(stats);
503         return;
504       case 3:
505         assert "BulkLoadHFile".equals(method.getName());
506         // use generic implementation
507         break;
508       case 4:
509         assert "ExecService".equals(method.getName());
510         // use generic implementation
511         break;
512       case 5:
513         assert "ExecRegionServerService".equals(method.getName());
514         // use generic implementation
515         break;
516       case 6:
517         assert "Multi".equals(method.getName());
518         multiTracker.updateRpc(stats);
519         return;
520       default:
521         throw new RuntimeException("Unrecognized ClientService RPC type " + method.getFullName());
522       }
523     }
524     // Fallback to dynamic registry lookup for DDL methods.
525     updateRpcGeneric(methodName, stats);
526   }
527 
528   public void incrCacheDroppingExceptions(Object exception) {
529     getMetric(CACHE_BASE +
530       (exception == null? UNKNOWN_EXCEPTION : exception.getClass().getSimpleName()),
531       cacheDroppingExceptions, counterFactory).inc();
532   }
533 
534   public void incrNsLookups() {
535     this.nsLookups.inc();
536   }
537 
538   public void incrNsLookupsFailed() {
539     this.nsLookupsFailed.inc();
540   }
541 }