View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.util.concurrent.atomic.AtomicBoolean;
21  import java.util.concurrent.atomic.AtomicLong;
22  
23  import org.apache.hadoop.hbase.classification.InterfaceAudience;
24  import org.apache.hadoop.hbase.classification.InterfaceStability;
25  
26  /**
27   * FastLongHistogram is a thread-safe class that estimate distribution of data and computes the
28   * quantiles.
29   */
30  @Deprecated
31  @InterfaceAudience.Public
32  @InterfaceStability.Evolving
33  public class FastLongHistogram {
34  
35    /**
36     * Default number of bins.
37     */
38    public static final int DEFAULT_NBINS = 255;
39  
40    public static final double[] DEFAULT_QUANTILES =
41        new double[]{0.25, 0.5, 0.75, 0.90, 0.95, 0.98, 0.99, 0.999};
42  
43    /**
44     * Bins is a class containing a list of buckets(or bins) for estimation histogram of some data.
45     */
46    private static class Bins {
47      private final Counter[] counts;
48      // inclusive
49      private final long binsMin;
50      // exclusive
51      private final long binsMax;
52      private final long bins10XMax;
53      private final AtomicLong min = new AtomicLong(Long.MAX_VALUE);
54      private final AtomicLong max = new AtomicLong(0L);
55  
56      private final Counter count = new Counter(0);
57      private final Counter total = new Counter(0);
58  
59      // set to true when any of data has been inserted to the Bins. It is set after the counts are
60      // updated.
61      private final AtomicBoolean hasData = new AtomicBoolean(false);
62  
63      /**
64       * The constructor for creating a Bins without any prior data.
65       */
66      public Bins(int numBins) {
67        counts = createCounters(numBins + 3);
68        this.binsMin = 1L;
69  
70        // These two numbers are total guesses
71        // and should be treated as highly suspect.
72        this.binsMax = 1000;
73        this.bins10XMax = binsMax * 10;
74      }
75  
76      /**
77       * The constructor for creating a Bins with last Bins.
78       */
79      public Bins(Bins last, int numOfBins, double minQ, double maxQ) {
80        long[] values = last.getQuantiles(new double[] { minQ, maxQ });
81        long wd = values[1] - values[0] + 1;
82        // expand minQ and maxQ in two ends back assuming uniform distribution
83        this.binsMin = Math.max(0L, (long) (values[0] - wd * minQ));
84        long binsMax = (long) (values[1] + wd * (1 - maxQ)) + 1;
85        // make sure each of bins is at least of width 1
86        this.binsMax = Math.max(binsMax, this.binsMin + numOfBins);
87        this.bins10XMax = Math.max((long) (values[1] + (binsMax - 1) * 9), this.binsMax + 1);
88  
89        this.counts = createCounters(numOfBins + 3);
90      }
91  
92      private Counter[] createCounters(int num) {
93        Counter[] counters = new Counter[num];
94        for (int i = 0; i < num; i++) {
95          counters[i] = new Counter();
96        }
97        return counters;
98      }
99  
100     private int getIndex(long value) {
101       if (value < this.binsMin) {
102         return 0;
103       } else if (value > this.bins10XMax) {
104         return this.counts.length - 1;
105       } else if (value >= this.binsMax) {
106         return this.counts.length - 2;
107       }
108       // compute the position
109       return 1 + (int) ((value - this.binsMin) * (this.counts.length - 3) /
110           (this.binsMax - this.binsMin));
111 
112     }
113 
114     /**
115      * Adds a value to the histogram.
116      */
117     public void add(long value, long count) {
118       if (value < 0) {
119         // The whole computation is completely thrown off if there are negative numbers
120         //
121         // Normally we would throw an IllegalArgumentException however this is the metrics
122         // system and it should be completely safe at all times.
123         // So silently throw it away.
124         return;
125       }
126       AtomicUtils.updateMin(min, value);
127       AtomicUtils.updateMax(max, value);
128 
129       this.count.add(count);
130       this.total.add(value * count);
131 
132       int pos = getIndex(value);
133       this.counts[pos].add(count);
134 
135       // hasData needs to be updated as last
136       this.hasData.set(true);
137     }
138 
139     /**
140      * Computes the quantiles give the ratios.
141      */
142     public long[] getQuantiles(double[] quantiles) {
143       if (!this.hasData.get()) {
144         // No data yet.
145         return new long[quantiles.length];
146       }
147 
148       // Make a snapshot of lowerCounter, higherCounter and bins.counts to counts.
149       // This is not synchronized, but since the counter are accumulating, the result is a good
150       // estimation of a snapshot.
151       long[] counts = new long[this.counts.length];
152       long total = 0L;
153       for (int i = 0; i < this.counts.length; i++) {
154         counts[i] = this.counts[i].get();
155         total += counts[i];
156       }
157 
158       int rIndex = 0;
159       double qCount = total * quantiles[0];
160       long cum = 0L;
161 
162       long[] res = new long[quantiles.length];
163       countsLoop: for (int i = 0; i < counts.length; i++) {
164         // mn and mx define a value range
165         long mn, mx;
166         if (i == 0) {
167           mn = this.min.get();
168           mx = this.binsMin;
169         } else if (i == counts.length - 1) {
170           mn = this.bins10XMax;
171           mx = this.max.get();
172         } else if (i == counts.length - 2) {
173           mn = this.binsMax;
174           mx = this.bins10XMax;
175         } else {
176           mn = this.binsMin + (i - 1) * (this.binsMax - this.binsMin) / (this.counts.length - 3);
177           mx = this.binsMin + i * (this.binsMax - this.binsMin) / (this.counts.length - 3);
178         }
179 
180         if (mx < this.min.get()) {
181           continue;
182         }
183         if (mn > this.max.get()) {
184           break;
185         }
186         mn = Math.max(mn, this.min.get());
187         mx = Math.min(mx, this.max.get());
188 
189         // lastCum/cum are the corresponding counts to mn/mx
190         double lastCum = cum;
191         cum += counts[i];
192 
193         // fill the results for qCount is within current range.
194         while (qCount <= cum) {
195           if (cum == lastCum) {
196             res[rIndex] = mn;
197           } else {
198             res[rIndex] = (long) ((qCount - lastCum) * (mx - mn) / (cum - lastCum) + mn);
199           }
200 
201           // move to next quantile
202           rIndex++;
203           if (rIndex >= quantiles.length) {
204             break countsLoop;
205           }
206           qCount = total * quantiles[rIndex];
207         }
208       }
209       // In case quantiles contains values >= 100%
210       for (; rIndex < quantiles.length; rIndex++) {
211         res[rIndex] = this.max.get();
212       }
213 
214       return res;
215     }
216 
217 
218     long getNumAtOrBelow(long val) {
219       final int targetIndex = getIndex(val);
220       long totalToCurrentIndex = 0;
221       for (int i = 0; i <= targetIndex; i++) {
222         totalToCurrentIndex += this.counts[i].get();
223       }
224       return  totalToCurrentIndex;
225     }
226   }
227 
228   // The bins counting values. It is replaced with a new one in calling of reset().
229   private volatile Bins bins;
230 
231   /**
232    * Constructor.
233    */
234   public FastLongHistogram() {
235     this(DEFAULT_NBINS);
236   }
237 
238   /**
239    * Constructor.
240    * @param numOfBins the number of bins for the histogram. A larger value results in more precise
241    *          results but with lower efficiency, and vice versus.
242    */
243   public FastLongHistogram(int numOfBins) {
244     this.bins = new Bins(numOfBins);
245   }
246 
247   /**
248    * Constructor setting the bins assuming a uniform distribution within a range.
249    * @param numOfBins the number of bins for the histogram. A larger value results in more precise
250    *          results but with lower efficiency, and vice versus.
251    * @param min lower bound of the region, inclusive.
252    * @param max higher bound of the region, inclusive.
253    */
254   public FastLongHistogram(int numOfBins, long min, long max) {
255     this(numOfBins);
256     Bins bins = new Bins(numOfBins);
257     bins.add(min, 1);
258     bins.add(max, 1);
259     this.bins = new Bins(bins, numOfBins, 0.01, 0.999);
260   }
261 
262   private FastLongHistogram(Bins bins) {
263     this.bins = bins;
264   }
265 
266   /**
267    * Adds a value to the histogram.
268    */
269   public void add(long value, long count) {
270     this.bins.add(value, count);
271   }
272 
273   /**
274    * Computes the quantiles give the ratios.
275    */
276   public long[] getQuantiles(double[] quantiles) {
277     return this.bins.getQuantiles(quantiles);
278   }
279 
280   public long[] getQuantiles() {
281     return this.bins.getQuantiles(DEFAULT_QUANTILES);
282   }
283 
284   public long getMin() {
285     long min = this.bins.min.get();
286     return min == Long.MAX_VALUE ? 0 : min; // in case it is not initialized
287   }
288 
289   public long getMax() {
290     return this.bins.max.get();
291   }
292 
293   public long getCount() {
294     return this.bins.count.get();
295   }
296 
297   public long getMean() {
298     Bins bins = this.bins;
299     long count = bins.count.get();
300     long total = bins.total.get();
301     if (count == 0) {
302       return 0;
303     }
304     return total / count;
305   }
306 
307   public long getNumAtOrBelow(long value) {
308     return this.bins.getNumAtOrBelow(value);
309   }
310 
311   /**
312    * Resets the histogram for new counting.
313    */
314   public FastLongHistogram reset() {
315     Bins oldBins = this.bins;
316     this.bins = new Bins(this.bins, this.bins.counts.length - 3, 0.01, 0.99);
317     return new FastLongHistogram(oldBins);
318   }
319 }