View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.util;
20  
21  import java.io.IOException;
22  import java.math.BigInteger;
23  
24  import java.util.Arrays;
25  import java.util.Collection;
26  import java.util.LinkedList;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  import java.util.TreeMap;
31  import org.apache.commons.cli.CommandLine;
32  import org.apache.commons.cli.GnuParser;
33  import org.apache.commons.cli.HelpFormatter;
34  import org.apache.commons.cli.OptionBuilder;
35  import org.apache.commons.cli.Options;
36  import org.apache.commons.cli.ParseException;
37  import org.apache.commons.lang.ArrayUtils;
38  import org.apache.commons.lang.StringUtils;
39  import org.apache.commons.logging.Log;
40  import org.apache.commons.logging.LogFactory;
41  import org.apache.hadoop.conf.Configuration;
42  import org.apache.hadoop.fs.FSDataInputStream;
43  import org.apache.hadoop.fs.FSDataOutputStream;
44  import org.apache.hadoop.fs.FileSystem;
45  import org.apache.hadoop.fs.Path;
46  import org.apache.hadoop.hbase.ClusterStatus;
47  import org.apache.hadoop.hbase.HBaseConfiguration;
48  import org.apache.hadoop.hbase.HConstants;
49  import org.apache.hadoop.hbase.HColumnDescriptor;
50  import org.apache.hadoop.hbase.HRegionInfo;
51  import org.apache.hadoop.hbase.HRegionLocation;
52  import org.apache.hadoop.hbase.HTableDescriptor;
53  import org.apache.hadoop.hbase.MetaTableAccessor;
54  import org.apache.hadoop.hbase.ServerName;
55  import org.apache.hadoop.hbase.TableName;
56  import org.apache.hadoop.hbase.classification.InterfaceAudience;
57  import org.apache.hadoop.hbase.client.Admin;
58  import org.apache.hadoop.hbase.client.ClusterConnection;
59  import org.apache.hadoop.hbase.client.Connection;
60  import org.apache.hadoop.hbase.client.ConnectionFactory;
61  import org.apache.hadoop.hbase.client.NoServerForRegionException;
62  import org.apache.hadoop.hbase.client.RegionLocator;
63  import org.apache.hadoop.hbase.client.Table;
64  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
65  
66  import com.google.common.base.Preconditions;
67  import com.google.common.collect.Lists;
68  import com.google.common.collect.Maps;
69  import com.google.common.collect.Sets;
70  
71  /**
72   * The {@link RegionSplitter} class provides several utilities to help in the
73   * administration lifecycle for developers who choose to manually split regions
74   * instead of having HBase handle that automatically. The most useful utilities
75   * are:
76   * <p>
77   * <ul>
78   * <li>Create a table with a specified number of pre-split regions
79   * <li>Execute a rolling split of all regions on an existing table
80   * </ul>
81   * <p>
82   * Both operations can be safely done on a live server.
83   * <p>
84   * <b>Question:</b> How do I turn off automatic splitting? <br>
85   * <b>Answer:</b> Automatic splitting is determined by the configuration value
86   * <i>HConstants.HREGION_MAX_FILESIZE</i>. It is not recommended that you set this
87   * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting
88   * is 100GB, which would result in &gt; 1hr major compactions if reached.
89   * <p>
90   * <b>Question:</b> Why did the original authors decide to manually split? <br>
91   * <b>Answer:</b> Specific workload characteristics of our use case allowed us
92   * to benefit from a manual split system.
93   * <p>
94   * <ul>
95   * <li>Data (~1k) that would grow instead of being replaced
96   * <li>Data growth was roughly uniform across all regions
97   * <li>OLTP workload. Data loss is a big deal.
98   * </ul>
99   * <p>
100  * <b>Question:</b> Why is manual splitting good for this workload? <br>
101  * <b>Answer:</b> Although automated splitting is not a bad option, there are
102  * benefits to manual splitting.
103  * <p>
104  * <ul>
105  * <li>With growing amounts of data, splits will continually be needed. Since
106  * you always know exactly what regions you have, long-term debugging and
107  * profiling is much easier with manual splits. It is hard to trace the logs to
108  * understand region level problems if it keeps splitting and getting renamed.
109  * <li>Data offlining bugs + unknown number of split regions == oh crap! If an
110  * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and
111  * you notice it a day or so later, you can be assured that the regions
112  * specified in these files are the same as the current regions and you have
113  * less headaches trying to restore/replay your data.
114  * <li>You can finely tune your compaction algorithm. With roughly uniform data
115  * growth, it's easy to cause split / compaction storms as the regions all
116  * roughly hit the same data size at the same time. With manual splits, you can
117  * let staggered, time-based major compactions spread out your network IO load.
118  * </ul>
119  * <p>
120  * <b>Question:</b> What's the optimal number of pre-split regions to create? <br>
121  * <b>Answer:</b> Mileage will vary depending upon your application.
122  * <p>
123  * The short answer for our application is that we started with 10 pre-split
124  * regions / server and watched our data growth over time. It's better to err on
125  * the side of too little regions and rolling split later.
126  * <p>
127  * The more complicated answer is that this depends upon the largest storefile
128  * in your region. With a growing data size, this will get larger over time. You
129  * want the largest region to be just big enough that the
130  * {@link org.apache.hadoop.hbase.regionserver.HStore} compact
131  * selection algorithm only compacts it due to a timed major. If you don't, your
132  * cluster can be prone to compaction storms as the algorithm decides to run
133  * major compactions on a large series of regions all at once. Note that
134  * compaction storms are due to the uniform data growth, not the manual split
135  * decision.
136  * <p>
137  * If you pre-split your regions too thin, you can increase the major compaction
138  * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size
139  * grows too large, use this script to perform a network IO safe rolling split
140  * of all regions.
141  */
142 @InterfaceAudience.Private
143 public class RegionSplitter {
144   private static final Log LOG = LogFactory.getLog(RegionSplitter.class);
145 
146   /**
147    * A generic interface for the RegionSplitter code to use for all it's
148    * functionality. Note that the original authors of this code use
149    * {@link HexStringSplit} to partition their table and set it as default, but
150    * provided this for your custom algorithm. To use, create a new derived class
151    * from this interface and call {@link RegionSplitter#createPresplitTable} or
152    * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the
153    * argument splitClassName giving the name of your class.
154    */
155   public interface SplitAlgorithm {
156     /**
157      * Split a pre-existing region into 2 regions.
158      *
159      * @param start
160      *          first row (inclusive)
161      * @param end
162      *          last row (exclusive)
163      * @return the split row to use
164      */
165     byte[] split(byte[] start, byte[] end);
166 
167     /**
168      * Split an entire table.
169      *
170      * @param numRegions
171      *          number of regions to split the table into
172      *
173      * @throws RuntimeException
174      *           user input is validated at this time. may throw a runtime
175      *           exception in response to a parse failure
176      * @return array of split keys for the initial regions of the table. The
177      *         length of the returned array should be numRegions-1.
178      */
179     byte[][] split(int numRegions);
180 
181     /**
182      * Some MapReduce jobs may want to run multiple mappers per region,
183      * this is intended for such usecase.
184      *
185      * @param start first row (inclusive)
186      * @param end last row (exclusive)
187      * @param numSplits number of splits to generate
188      * @param inclusive whether start and end are returned as split points
189      */
190     byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive);
191 
192     /**
193      * In HBase, the first row is represented by an empty byte array. This might
194      * cause problems with your split algorithm or row printing. All your APIs
195      * will be passed firstRow() instead of empty array.
196      *
197      * @return your representation of your first row
198      */
199     byte[] firstRow();
200 
201     /**
202      * In HBase, the last row is represented by an empty byte array. This might
203      * cause problems with your split algorithm or row printing. All your APIs
204      * will be passed firstRow() instead of empty array.
205      *
206      * @return your representation of your last row
207      */
208     byte[] lastRow();
209 
210     /**
211      * In HBase, the last row is represented by an empty byte array. Set this
212      * value to help the split code understand how to evenly divide the first
213      * region.
214      *
215      * @param userInput
216      *          raw user input (may throw RuntimeException on parse failure)
217      */
218     void setFirstRow(String userInput);
219 
220     /**
221      * In HBase, the last row is represented by an empty byte array. Set this
222      * value to help the split code understand how to evenly divide the last
223      * region. Note that this last row is inclusive for all rows sharing the
224      * same prefix.
225      *
226      * @param userInput
227      *          raw user input (may throw RuntimeException on parse failure)
228      */
229     void setLastRow(String userInput);
230 
231     /**
232      * @param input
233      *          user or file input for row
234      * @return byte array representation of this row for HBase
235      */
236     byte[] strToRow(String input);
237 
238     /**
239      * @param row
240      *          byte array representing a row in HBase
241      * @return String to use for debug &amp; file printing
242      */
243     String rowToStr(byte[] row);
244 
245     /**
246      * @return the separator character to use when storing / printing the row
247      */
248     String separator();
249 
250     /**
251      * Set the first row
252      * @param userInput byte array of the row key.
253      */
254     void setFirstRow(byte[] userInput);
255 
256     /**
257      * Set the last row
258      * @param userInput byte array of the row key.
259      */
260     void setLastRow(byte[] userInput);
261   }
262 
263   /**
264    * The main function for the RegionSplitter application. Common uses:
265    * <p>
266    * <ul>
267    * <li>create a table named 'myTable' with 60 pre-split regions containing 2
268    * column families 'test' &amp; 'rs', assuming the keys are hex-encoded ASCII:
269    * <ul>
270    * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
271    * myTable HexStringSplit
272    * </ul>
273    * <li>create a table named 'myTable' with 50 pre-split regions,
274    * assuming the keys are decimal-encoded ASCII:
275    * <ul>
276    * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50
277    * myTable DecimalStringSplit
278    * </ul>
279    * <li>perform a rolling split of 'myTable' (i.e. 60 =&gt; 120 regions), # 2
280    * outstanding splits at a time, assuming keys are uniformly distributed
281    * bytes:
282    * <ul>
283    * <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
284    * UniformSplit
285    * </ul>
286    * </ul>
287    *
288    * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit,
289    * DecimalStringSplit, and UniformSplit. These are different strategies for
290    * choosing region boundaries. See their source code for details.
291    *
292    * @param args
293    *          Usage: RegionSplitter &lt;TABLE&gt; &lt;SPLITALGORITHM&gt;
294    *          &lt;-c &lt;# regions&gt; -f &lt;family:family:...&gt; | -r
295    *          [-o &lt;# outstanding splits&gt;]&gt;
296    *          [-D &lt;conf.param=value&gt;]
297    * @throws IOException
298    *           HBase IO problem
299    * @throws InterruptedException
300    *           user requested exit
301    * @throws ParseException
302    *           problem parsing user input
303    */
304   @SuppressWarnings("static-access")
305   public static void main(String[] args) throws IOException,
306       InterruptedException, ParseException {
307     Configuration conf = HBaseConfiguration.create();
308 
309     // parse user input
310     Options opt = new Options();
311     opt.addOption(OptionBuilder.withArgName("property=value").hasArg()
312         .withDescription("Override HBase Configuration Settings").create("D"));
313     opt.addOption(OptionBuilder.withArgName("region count").hasArg()
314         .withDescription(
315             "Create a new table with a pre-split number of regions")
316         .create("c"));
317     opt.addOption(OptionBuilder.withArgName("family:family:...").hasArg()
318         .withDescription(
319             "Column Families to create with new table.  Required with -c")
320         .create("f"));
321     opt.addOption("h", false, "Print this usage help");
322     opt.addOption("r", false, "Perform a rolling split of an existing region");
323     opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription(
324         "Max outstanding splits that have unfinished major compactions")
325         .create("o"));
326     opt.addOption(null, "firstrow", true,
327         "First Row in Table for Split Algorithm");
328     opt.addOption(null, "lastrow", true,
329         "Last Row in Table for Split Algorithm");
330     opt.addOption(null, "risky", false,
331         "Skip verification steps to complete quickly."
332             + "STRONGLY DISCOURAGED for production systems.  ");
333     CommandLine cmd = new GnuParser().parse(opt, args);
334 
335     if (cmd.hasOption("D")) {
336       for (String confOpt : cmd.getOptionValues("D")) {
337         String[] kv = confOpt.split("=", 2);
338         if (kv.length == 2) {
339           conf.set(kv[0], kv[1]);
340           LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]);
341         } else {
342           throw new ParseException("-D option format invalid: " + confOpt);
343         }
344       }
345     }
346 
347     if (cmd.hasOption("risky")) {
348       conf.setBoolean("split.verify", false);
349     }
350 
351     boolean createTable = cmd.hasOption("c") && cmd.hasOption("f");
352     boolean rollingSplit = cmd.hasOption("r");
353     boolean oneOperOnly = createTable ^ rollingSplit;
354 
355     if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
356       new HelpFormatter().printHelp("RegionSplitter <TABLE> <SPLITALGORITHM>\n"+
357           "SPLITALGORITHM is a java class name of a class implementing " +
358           "SplitAlgorithm, or one of the special strings HexStringSplit or " +
359           "DecimalStringSplit or UniformSplit, which are built-in split algorithms. " +
360           "HexStringSplit treats keys as hexadecimal ASCII, and " +
361           "DecimalStringSplit treats keys as decimal ASCII, and " +
362           "UniformSplit treats keys as arbitrary bytes.", opt);
363       return;
364     }
365     TableName tableName = TableName.valueOf(cmd.getArgs()[0]);
366     String splitClass = cmd.getArgs()[1];
367     SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass);
368 
369     if (cmd.hasOption("firstrow")) {
370       splitAlgo.setFirstRow(cmd.getOptionValue("firstrow"));
371     }
372     if (cmd.hasOption("lastrow")) {
373       splitAlgo.setLastRow(cmd.getOptionValue("lastrow"));
374     }
375 
376     if (createTable) {
377       conf.set("split.count", cmd.getOptionValue("c"));
378       createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf);
379     }
380 
381     if (rollingSplit) {
382       if (cmd.hasOption("o")) {
383         conf.set("split.outstanding", cmd.getOptionValue("o"));
384       }
385       rollingSplit(tableName, splitAlgo, conf);
386     }
387   }
388 
389   static void createPresplitTable(TableName tableName, SplitAlgorithm splitAlgo,
390           String[] columnFamilies, Configuration conf)
391   throws IOException, InterruptedException {
392     final int splitCount = conf.getInt("split.count", 0);
393     Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");
394 
395     Preconditions.checkArgument(columnFamilies.length > 0,
396         "Must specify at least one column family. ");
397     LOG.debug("Creating table " + tableName + " with " + columnFamilies.length
398         + " column families.  Presplitting to " + splitCount + " regions");
399 
400     HTableDescriptor desc = new HTableDescriptor(tableName);
401     for (String cf : columnFamilies) {
402       desc.addFamily(new HColumnDescriptor(Bytes.toBytes(cf)));
403     }
404     try (Connection connection = ConnectionFactory.createConnection(conf)) {
405       Admin admin = connection.getAdmin();
406       try {
407         Preconditions.checkArgument(!admin.tableExists(tableName),
408           "Table already exists: " + tableName);
409         admin.createTable(desc, splitAlgo.split(splitCount));
410       } finally {
411         admin.close();
412       }
413       LOG.debug("Table created!  Waiting for regions to show online in META...");
414       if (!conf.getBoolean("split.verify", true)) {
415         // NOTE: createTable is synchronous on the table, but not on the regions
416         int onlineRegions = 0;
417         while (onlineRegions < splitCount) {
418           onlineRegions = MetaTableAccessor.getRegionCount(connection, tableName);
419           LOG.debug(onlineRegions + " of " + splitCount + " regions online...");
420           if (onlineRegions < splitCount) {
421             Thread.sleep(10 * 1000); // sleep
422           }
423         }
424       }
425       LOG.debug("Finished creating table with " + splitCount + " regions");
426     }
427   }
428 
429   /**
430    * Alternative getCurrentNrHRS which is no longer available.
431    * @param connection
432    * @return Rough count of regionservers out on cluster.
433    * @throws IOException 
434    */
435   private static int getRegionServerCount(final Connection connection) throws IOException {
436     try (Admin admin = connection.getAdmin()) {
437       ClusterStatus status = admin.getClusterStatus();
438       Collection<ServerName> servers = status.getServers();
439       return servers == null || servers.isEmpty()? 0: servers.size();
440     }
441   }
442 
443   private static byte [] readFile(final FileSystem fs, final Path path) throws IOException {
444     FSDataInputStream tmpIn = fs.open(path);
445     try {
446       byte [] rawData = new byte[tmpIn.available()];
447       tmpIn.readFully(rawData);
448       return rawData;
449     } finally {
450       tmpIn.close();
451     }
452   }
453 
454   static void rollingSplit(TableName tableName, SplitAlgorithm splitAlgo, Configuration conf)
455   throws IOException, InterruptedException {
456     final int minOS = conf.getInt("split.outstanding", 2);
457     try (Connection connection = ConnectionFactory.createConnection(conf)) {
458       // Max outstanding splits. default == 50% of servers
459       final int MAX_OUTSTANDING = Math.max(getRegionServerCount(connection) / 2, minOS);
460 
461       Path hbDir = FSUtils.getRootDir(conf);
462       Path tableDir = FSUtils.getTableDir(hbDir, tableName);
463       Path splitFile = new Path(tableDir, "_balancedSplit");
464       FileSystem fs = FileSystem.get(conf);
465 
466       // Get a list of daughter regions to create
467       LinkedList<Pair<byte[], byte[]>> tmpRegionSet = null;
468       try (Table table = connection.getTable(tableName)) {
469         tmpRegionSet = getSplits(connection, tableName, splitAlgo);
470       }
471       LinkedList<Pair<byte[], byte[]>> outstanding = Lists.newLinkedList();
472       int splitCount = 0;
473       final int origCount = tmpRegionSet.size();
474 
475       // all splits must compact & we have 1 compact thread, so 2 split
476       // requests to the same RS can stall the outstanding split queue.
477       // To fix, group the regions into an RS pool and round-robin through it
478       LOG.debug("Bucketing regions by regionserver...");
479       TreeMap<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegions =
480           Maps.newTreeMap();
481       // Get a regionLocator.  Need it in below.
482       try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
483         for (Pair<byte[], byte[]> dr : tmpRegionSet) {
484           ServerName rsLocation = regionLocator.getRegionLocation(dr.getSecond()).getServerName();
485           if (!daughterRegions.containsKey(rsLocation)) {
486             LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();
487             daughterRegions.put(rsLocation, entry);
488           }
489           daughterRegions.get(rsLocation).add(dr);
490         }
491         LOG.debug("Done with bucketing.  Split time!");
492         long startTime = System.currentTimeMillis();
493 
494         // Open the split file and modify it as splits finish
495         byte[] rawData = readFile(fs, splitFile);
496 
497         FSDataOutputStream splitOut = fs.create(splitFile);
498         try {
499           splitOut.write(rawData);
500 
501           try {
502             // *** split code ***
503             while (!daughterRegions.isEmpty()) {
504               LOG.debug(daughterRegions.size() + " RS have regions to splt.");
505 
506               // Get ServerName to region count mapping
507               final TreeMap<ServerName, Integer> rsSizes = Maps.newTreeMap();
508               List<HRegionLocation> hrls = regionLocator.getAllRegionLocations();
509               for (HRegionLocation hrl: hrls) {
510                 ServerName sn = hrl.getServerName();
511                 if (rsSizes.containsKey(sn)) {
512                   rsSizes.put(sn, rsSizes.get(sn) + 1);
513                 } else {
514                   rsSizes.put(sn, 1);
515                 }
516               }
517 
518               // Round-robin through the ServerName list. Choose the lightest-loaded servers
519               // first to keep the master from load-balancing regions as we split.
520               for (Map.Entry<ServerName, LinkedList<Pair<byte[], byte[]>>> daughterRegion :
521                       daughterRegions.entrySet()) {
522                 Pair<byte[], byte[]> dr = null;
523                 ServerName rsLoc = daughterRegion.getKey();
524                 LinkedList<Pair<byte[], byte[]>> regionList = daughterRegion.getValue();
525 
526                 // Find a region in the ServerName list that hasn't been moved
527                 LOG.debug("Finding a region on " + rsLoc);
528                 while (!regionList.isEmpty()) {
529                   dr = regionList.pop();
530 
531                   // get current region info
532                   byte[] split = dr.getSecond();
533                   HRegionLocation regionLoc = regionLocator.getRegionLocation(split);
534 
535                   // if this region moved locations
536                   ServerName newRs = regionLoc.getServerName();
537                   if (newRs.compareTo(rsLoc) != 0) {
538                     LOG.debug("Region with " + splitAlgo.rowToStr(split)
539                         + " moved to " + newRs + ". Relocating...");
540                     // relocate it, don't use it right now
541                     if (!daughterRegions.containsKey(newRs)) {
542                       LinkedList<Pair<byte[], byte[]>> entry = Lists.newLinkedList();
543                       daughterRegions.put(newRs, entry);
544                     }
545                     daughterRegions.get(newRs).add(dr);
546                     dr = null;
547                     continue;
548                   }
549 
550                   // make sure this region wasn't already split
551                   byte[] sk = regionLoc.getRegionInfo().getStartKey();
552                   if (sk.length != 0) {
553                     if (Bytes.equals(split, sk)) {
554                       LOG.debug("Region already split on "
555                           + splitAlgo.rowToStr(split) + ".  Skipping this region...");
556                       ++splitCount;
557                       dr = null;
558                       continue;
559                     }
560                     byte[] start = dr.getFirst();
561                     Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo
562                         .rowToStr(start) + " != " + splitAlgo.rowToStr(sk));
563                   }
564 
565                   // passed all checks! found a good region
566                   break;
567                 }
568                 if (regionList.isEmpty()) {
569                   daughterRegions.remove(rsLoc);
570                 }
571                 if (dr == null)
572                   continue;
573 
574                 // we have a good region, time to split!
575                 byte[] split = dr.getSecond();
576                 LOG.debug("Splitting at " + splitAlgo.rowToStr(split));
577                 try (Admin admin = connection.getAdmin()) {
578                   admin.split(tableName, split);
579                 }
580 
581                 LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();
582                 LinkedList<Pair<byte[], byte[]>> local_finished = Lists.newLinkedList();
583                 if (conf.getBoolean("split.verify", true)) {
584                   // we need to verify and rate-limit our splits
585                   outstanding.addLast(dr);
586                   // with too many outstanding splits, wait for some to finish
587                   while (outstanding.size() >= MAX_OUTSTANDING) {
588                     LOG.debug("Wait for outstanding splits " + outstanding.size());
589                     local_finished = splitScan(outstanding, connection, tableName, splitAlgo);
590                     if (local_finished.isEmpty()) {
591                       Thread.sleep(30 * 1000);
592                     } else {
593                       finished.addAll(local_finished);
594                       outstanding.removeAll(local_finished);
595                       LOG.debug(local_finished.size() + " outstanding splits finished");
596                     }
597                   }
598                 } else {
599                   finished.add(dr);
600                 }
601 
602                 // mark each finished region as successfully split.
603                 for (Pair<byte[], byte[]> region : finished) {
604                   splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst())
605                       + " " + splitAlgo.rowToStr(region.getSecond()) + "\n");
606                   splitCount++;
607                   if (splitCount % 10 == 0) {
608                     long tDiff = (System.currentTimeMillis() - startTime)
609                         / splitCount;
610                     LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount
611                         + ". Avg Time / Split = "
612                         + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
613                   }
614                 }
615               }
616             }
617             if (conf.getBoolean("split.verify", true)) {
618               while (!outstanding.isEmpty()) {
619                 LOG.debug("Finally Wait for outstanding splits " + outstanding.size());
620                 LinkedList<Pair<byte[], byte[]>> finished = splitScan(outstanding,
621                     connection, tableName, splitAlgo);
622                 if (finished.isEmpty()) {
623                   Thread.sleep(30 * 1000);
624                 } else {
625                   outstanding.removeAll(finished);
626                   for (Pair<byte[], byte[]> region : finished) {
627                     splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst())
628                         + " " + splitAlgo.rowToStr(region.getSecond()) + "\n");
629                     splitCount++;
630                   }
631                   LOG.debug("Finally " + finished.size() + " outstanding splits finished");
632                 }
633               }
634             }
635             LOG.debug("All regions have been successfully split!");
636           } finally {
637             long tDiff = System.currentTimeMillis() - startTime;
638             LOG.debug("TOTAL TIME = "
639                 + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
640             LOG.debug("Splits = " + splitCount);
641             if (0 < splitCount) {
642               LOG.debug("Avg Time / Split = "
643                   + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount));
644             }
645           }
646         } finally {
647           splitOut.close();
648           fs.delete(splitFile, false);
649         }
650       }
651     }
652   }
653 
654   /**
655    * @throws IOException if the specified SplitAlgorithm class couldn't be
656    * instantiated
657    */
658   public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
659           String splitClassName) throws IOException {
660     Class<?> splitClass;
661 
662     // For split algorithms builtin to RegionSplitter, the user can specify
663     // their simple class name instead of a fully qualified class name.
664     if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
665       splitClass = HexStringSplit.class;
666     } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {
667       splitClass = DecimalStringSplit.class;
668     } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
669       splitClass = UniformSplit.class;
670     } else {
671       try {
672         splitClass = conf.getClassByName(splitClassName);
673       } catch (ClassNotFoundException e) {
674         throw new IOException("Couldn't load split class " + splitClassName, e);
675       }
676       if(splitClass == null) {
677         throw new IOException("Failed loading split class " + splitClassName);
678       }
679       if(!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
680         throw new IOException(
681                 "Specified split class doesn't implement SplitAlgorithm");
682       }
683     }
684     try {
685       return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
686     } catch (Exception e) {
687       throw new IOException("Problem loading split algorithm: ", e);
688     }
689   }
690 
691   static LinkedList<Pair<byte[], byte[]>> splitScan(
692       LinkedList<Pair<byte[], byte[]>> regionList,
693       final Connection connection,
694       final TableName tableName,
695       SplitAlgorithm splitAlgo)
696       throws IOException, InterruptedException {
697     LinkedList<Pair<byte[], byte[]>> finished = Lists.newLinkedList();
698     LinkedList<Pair<byte[], byte[]>> logicalSplitting = Lists.newLinkedList();
699     LinkedList<Pair<byte[], byte[]>> physicalSplitting = Lists.newLinkedList();
700 
701     // Get table info
702     Pair<Path, Path> tableDirAndSplitFile =
703       getTableDirAndSplitFile(connection.getConfiguration(), tableName);
704     Path tableDir = tableDirAndSplitFile.getFirst();
705     FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
706     // Clear the cache to forcibly refresh region information
707     ((ClusterConnection)connection).clearRegionCache();
708     HTableDescriptor htd = null;
709     try (Table table = connection.getTable(tableName)) {
710       htd = table.getTableDescriptor();
711     }
712     try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
713 
714       // for every region that hasn't been verified as a finished split
715       for (Pair<byte[], byte[]> region : regionList) {
716         byte[] start = region.getFirst();
717         byte[] split = region.getSecond();
718 
719         // see if the new split daughter region has come online
720         try {
721           HRegionInfo dri = regionLocator.getRegionLocation(split).getRegionInfo();
722           if (dri.isOffline() || !Bytes.equals(dri.getStartKey(), split)) {
723             logicalSplitting.add(region);
724             continue;
725           }
726         } catch (NoServerForRegionException nsfre) {
727           // NSFRE will occur if the old hbase:meta entry has no server assigned
728           LOG.info(nsfre);
729           logicalSplitting.add(region);
730           continue;
731         }
732 
733         try {
734           // when a daughter region is opened, a compaction is triggered
735           // wait until compaction completes for both daughter regions
736           LinkedList<HRegionInfo> check = Lists.newLinkedList();
737           check.add(regionLocator.getRegionLocation(start).getRegionInfo());
738           check.add(regionLocator.getRegionLocation(split).getRegionInfo());
739           for (HRegionInfo hri : check.toArray(new HRegionInfo[check.size()])) {
740             byte[] sk = hri.getStartKey();
741             if (sk.length == 0)
742               sk = splitAlgo.firstRow();
743 
744             HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
745                 connection.getConfiguration(), fs, tableDir, hri, true);
746 
747             // Check every Column Family for that region -- check does not have references.
748             boolean refFound = false;
749             for (HColumnDescriptor c : htd.getFamilies()) {
750               if ((refFound = regionFs.hasReferences(c.getNameAsString()))) {
751                 break;
752               }
753             }
754 
755             // compaction is completed when all reference files are gone
756             if (!refFound) {
757               check.remove(hri);
758             }
759           }
760           if (check.isEmpty()) {
761             finished.add(region);
762           } else {
763             physicalSplitting.add(region);
764           }
765         } catch (NoServerForRegionException nsfre) {
766           LOG.debug("No Server Exception thrown for: " + splitAlgo.rowToStr(start));
767           physicalSplitting.add(region);
768           ((ClusterConnection)connection).clearRegionCache();
769         }
770       }
771 
772       LOG.debug("Split Scan: " + finished.size() + " finished / "
773           + logicalSplitting.size() + " split wait / "
774           + physicalSplitting.size() + " reference wait");
775 
776       return finished;
777     }
778   }
779 
780   /**
781    * @param conf
782    * @param tableName
783    * @return A Pair where first item is table dir and second is the split file.
784    * @throws IOException 
785    */
786   private static Pair<Path, Path> getTableDirAndSplitFile(final Configuration conf,
787       final TableName tableName)
788   throws IOException {
789     Path hbDir = FSUtils.getRootDir(conf);
790     Path tableDir = FSUtils.getTableDir(hbDir, tableName);
791     Path splitFile = new Path(tableDir, "_balancedSplit");
792     return new Pair<Path, Path>(tableDir, splitFile);
793   }
794 
795   static LinkedList<Pair<byte[], byte[]>> getSplits(final Connection connection,
796       TableName tableName, SplitAlgorithm splitAlgo)
797   throws IOException {
798     Pair<Path, Path> tableDirAndSplitFile =
799       getTableDirAndSplitFile(connection.getConfiguration(), tableName);
800     Path tableDir = tableDirAndSplitFile.getFirst();
801     Path splitFile = tableDirAndSplitFile.getSecond();
802  
803     FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
804 
805     // Using strings because (new byte[]{0}).equals(new byte[]{0}) == false
806     Set<Pair<String, String>> daughterRegions = Sets.newHashSet();
807 
808     // Does a split file exist?
809     if (!fs.exists(splitFile)) {
810       // NO = fresh start. calculate splits to make
811       LOG.debug("No " + splitFile.getName() + " file. Calculating splits ");
812 
813       // Query meta for all regions in the table
814       Set<Pair<byte[], byte[]>> rows = Sets.newHashSet();
815       Pair<byte[][], byte[][]> tmp = null;
816       try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
817         tmp = regionLocator.getStartEndKeys();
818       }
819       Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length,
820           "Start and End rows should be equivalent");
821       for (int i = 0; i < tmp.getFirst().length; ++i) {
822         byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i];
823         if (start.length == 0)
824           start = splitAlgo.firstRow();
825         if (end.length == 0)
826           end = splitAlgo.lastRow();
827         rows.add(Pair.newPair(start, end));
828       }
829       LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split.");
830 
831       // prepare the split file
832       Path tmpFile = new Path(tableDir, "_balancedSplit_prepare");
833       FSDataOutputStream tmpOut = fs.create(tmpFile);
834 
835       // calculate all the splits == [daughterRegions] = [(start, splitPoint)]
836       for (Pair<byte[], byte[]> r : rows) {
837         byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond());
838         String startStr = splitAlgo.rowToStr(r.getFirst());
839         String splitStr = splitAlgo.rowToStr(splitPoint);
840         daughterRegions.add(Pair.newPair(startStr, splitStr));
841         LOG.debug("Will Split [" + startStr + " , "
842             + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr);
843         tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr
844             + "\n");
845       }
846       tmpOut.close();
847       fs.rename(tmpFile, splitFile);
848     } else {
849       LOG.debug("_balancedSplit file found. Replay log to restore state...");
850       FSUtils.getInstance(fs, connection.getConfiguration())
851         .recoverFileLease(fs, splitFile, connection.getConfiguration(), null);
852 
853       // parse split file and process remaining splits
854       FSDataInputStream tmpIn = fs.open(splitFile);
855       StringBuilder sb = new StringBuilder(tmpIn.available());
856       while (tmpIn.available() > 0) {
857         sb.append(tmpIn.readChar());
858       }
859       tmpIn.close();
860       for (String line : sb.toString().split("\n")) {
861         String[] cmd = line.split(splitAlgo.separator());
862         Preconditions.checkArgument(3 == cmd.length);
863         byte[] start = splitAlgo.strToRow(cmd[1]);
864         String startStr = splitAlgo.rowToStr(start);
865         byte[] splitPoint = splitAlgo.strToRow(cmd[2]);
866         String splitStr = splitAlgo.rowToStr(splitPoint);
867         Pair<String, String> r = Pair.newPair(startStr, splitStr);
868         if (cmd[0].equals("+")) {
869           LOG.debug("Adding: " + r);
870           daughterRegions.add(r);
871         } else {
872           LOG.debug("Removing: " + r);
873           Preconditions.checkArgument(cmd[0].equals("-"),
874               "Unknown option: " + cmd[0]);
875           Preconditions.checkState(daughterRegions.contains(r),
876               "Missing row: " + r);
877           daughterRegions.remove(r);
878         }
879       }
880       LOG.debug("Done reading. " + daughterRegions.size() + " regions left.");
881     }
882     LinkedList<Pair<byte[], byte[]>> ret = Lists.newLinkedList();
883     for (Pair<String, String> r : daughterRegions) {
884       ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo
885           .strToRow(r.getSecond())));
886     }
887     return ret;
888   }
889 
890   /**
891    * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region
892    * boundaries. The format of a HexStringSplit region boundary is the ASCII
893    * representation of an MD5 checksum, or any other uniformly distributed
894    * hexadecimal value. Row are hex-encoded long values in the range
895    * <b>"00000000" =&gt; "FFFFFFFF"</b> and are left-padded with zeros to keep the
896    * same order lexicographically as if they were binary.
897    *
898    * Since this split algorithm uses hex strings as keys, it is easy to read &amp;
899    * write in the shell but takes up more space and may be non-intuitive.
900    */
901   public static class HexStringSplit extends NumberStringSplit {
902     final static String DEFAULT_MIN_HEX = "00000000";
903     final static String DEFAULT_MAX_HEX = "FFFFFFFF";
904     final static int RADIX_HEX = 16;
905 
906     public HexStringSplit() {
907       super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
908     }
909 
910   }
911 
912   /**
913    * The format of a DecimalStringSplit region boundary is the ASCII representation of
914    * reversed sequential number, or any other uniformly distributed decimal value.
915    * Row are decimal-encoded long values in the range
916    * <b>"00000000" =&gt; "99999999"</b> and are left-padded with zeros to keep the
917    * same order lexicographically as if they were binary.
918    */
919   public static class DecimalStringSplit extends NumberStringSplit {
920     final static String DEFAULT_MIN_DEC = "00000000";
921     final static String DEFAULT_MAX_DEC = "99999999";
922     final static int RADIX_DEC = 10;
923 
924     public DecimalStringSplit() {
925       super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
926     }
927 
928   }
929 
930   public abstract static class NumberStringSplit implements SplitAlgorithm {
931 
932     String firstRow;
933     BigInteger firstRowInt;
934     String lastRow;
935     BigInteger lastRowInt;
936     int rowComparisonLength;
937     int radix;
938 
939     NumberStringSplit(String minRow, String maxRow, int radix) {
940       this.firstRow = minRow;
941       this.lastRow = maxRow;
942       this.radix = radix;
943       this.firstRowInt = BigInteger.ZERO;
944       this.lastRowInt = new BigInteger(lastRow, this.radix);
945       this.rowComparisonLength = lastRow.length();
946     }
947 
948     public byte[] split(byte[] start, byte[] end) {
949       BigInteger s = convertToBigInteger(start);
950       BigInteger e = convertToBigInteger(end);
951       Preconditions.checkArgument(!e.equals(BigInteger.ZERO));
952       return convertToByte(split2(s, e));
953     }
954 
955     public byte[][] split(int n) {
956       Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0,
957           "last row (%s) is configured less than first row (%s)", lastRow,
958           firstRow);
959       // +1 to range because the last row is inclusive
960       BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE);
961       Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0,
962           "split granularity (%s) is greater than the range (%s)", n, range);
963 
964       BigInteger[] splits = new BigInteger[n - 1];
965       BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n));
966       for (int i = 1; i < n; i++) {
967         // NOTE: this means the last region gets all the slop.
968         // This is not a big deal if we're assuming n << MAXHEX
969         splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger
970             .valueOf(i)));
971       }
972       return convertToBytes(splits);
973     }
974 
975     @Override
976     public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
977       BigInteger s = convertToBigInteger(start);
978       BigInteger e = convertToBigInteger(end);
979 
980       Preconditions.checkArgument(e.compareTo(s) > 0,
981               "last row (%s) is configured less than first row (%s)", rowToStr(end),
982               end);
983       // +1 to range because the last row is inclusive
984       BigInteger range = e.subtract(s).add(BigInteger.ONE);
985       Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0,
986               "split granularity (%s) is greater than the range (%s)", numSplits, range);
987 
988       BigInteger[] splits = new BigInteger[numSplits - 1];
989       BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits));
990       for (int i = 1; i < numSplits; i++) {
991         // NOTE: this means the last region gets all the slop.
992         // This is not a big deal if we're assuming n << MAXHEX
993         splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger
994                 .valueOf(i)));
995       }
996 
997       if (inclusive) {
998         BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1];
999         inclusiveSplitPoints[0] = convertToBigInteger(start);
1000         inclusiveSplitPoints[numSplits] = convertToBigInteger(end);
1001         System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length);
1002         return convertToBytes(inclusiveSplitPoints);
1003       } else {
1004         return convertToBytes(splits);
1005       }
1006     }
1007 
1008     public byte[] firstRow() {
1009       return convertToByte(firstRowInt);
1010     }
1011 
1012     public byte[] lastRow() {
1013       return convertToByte(lastRowInt);
1014     }
1015 
1016     public void setFirstRow(String userInput) {
1017       firstRow = userInput;
1018       firstRowInt = new BigInteger(firstRow, radix);
1019     }
1020 
1021     public void setLastRow(String userInput) {
1022       lastRow = userInput;
1023       lastRowInt = new BigInteger(lastRow, radix);
1024       // Precondition: lastRow > firstRow, so last's length is the greater
1025       rowComparisonLength = lastRow.length();
1026     }
1027 
1028     public byte[] strToRow(String in) {
1029       return convertToByte(new BigInteger(in, radix));
1030     }
1031 
1032     public String rowToStr(byte[] row) {
1033       return Bytes.toStringBinary(row);
1034     }
1035 
1036     public String separator() {
1037       return " ";
1038     }
1039 
1040     @Override
1041     public void setFirstRow(byte[] userInput) {
1042       firstRow = Bytes.toString(userInput);
1043     }
1044 
1045     @Override
1046     public void setLastRow(byte[] userInput) {
1047       lastRow = Bytes.toString(userInput);
1048     }
1049 
1050     /**
1051      * Divide 2 numbers in half (for split algorithm)
1052      *
1053      * @param a number #1
1054      * @param b number #2
1055      * @return the midpoint of the 2 numbers
1056      */
1057     public BigInteger split2(BigInteger a, BigInteger b) {
1058       return a.add(b).divide(BigInteger.valueOf(2)).abs();
1059     }
1060 
1061     /**
1062      * Returns an array of bytes corresponding to an array of BigIntegers
1063      *
1064      * @param bigIntegers numbers to convert
1065      * @return bytes corresponding to the bigIntegers
1066      */
1067     public byte[][] convertToBytes(BigInteger[] bigIntegers) {
1068       byte[][] returnBytes = new byte[bigIntegers.length][];
1069       for (int i = 0; i < bigIntegers.length; i++) {
1070         returnBytes[i] = convertToByte(bigIntegers[i]);
1071       }
1072       return returnBytes;
1073     }
1074 
1075     /**
1076      * Returns the bytes corresponding to the BigInteger
1077      *
1078      * @param bigInteger number to convert
1079      * @param pad padding length
1080      * @return byte corresponding to input BigInteger
1081      */
1082     public byte[] convertToByte(BigInteger bigInteger, int pad) {
1083       String bigIntegerString = bigInteger.toString(radix);
1084       bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
1085       return Bytes.toBytes(bigIntegerString);
1086     }
1087 
1088     /**
1089      * Returns the bytes corresponding to the BigInteger
1090      *
1091      * @param bigInteger number to convert
1092      * @return corresponding bytes
1093      */
1094     public byte[] convertToByte(BigInteger bigInteger) {
1095       return convertToByte(bigInteger, rowComparisonLength);
1096     }
1097 
1098     /**
1099      * Returns the BigInteger represented by the byte array
1100      *
1101      * @param row byte array representing row
1102      * @return the corresponding BigInteger
1103      */
1104     public BigInteger convertToBigInteger(byte[] row) {
1105       return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix)
1106           : BigInteger.ZERO;
1107     }
1108 
1109     @Override
1110     public String toString() {
1111       return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
1112           + "," + rowToStr(lastRow()) + "]";
1113     }
1114   }
1115 
1116   /**
1117    * A SplitAlgorithm that divides the space of possible keys evenly. Useful
1118    * when the keys are approximately uniform random bytes (e.g. hashes). Rows
1119    * are raw byte values in the range <b>00 =&gt; FF</b> and are right-padded with
1120    * zeros to keep the same memcmp() order. This is the natural algorithm to use
1121    * for a byte[] environment and saves space, but is not necessarily the
1122    * easiest for readability.
1123    */
1124   public static class UniformSplit implements SplitAlgorithm {
1125     static final byte xFF = (byte) 0xFF;
1126     byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
1127     byte[] lastRowBytes =
1128             new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
1129     public byte[] split(byte[] start, byte[] end) {
1130       return Bytes.split(start, end, 1)[1];
1131     }
1132 
1133     @Override
1134     @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
1135       justification="Preconditions checks insure we are not going to dereference a null value")
1136     public byte[][] split(int numRegions) {
1137       Preconditions.checkArgument(
1138           Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
1139           "last row (%s) is configured less than first row (%s)",
1140           Bytes.toStringBinary(lastRowBytes),
1141           Bytes.toStringBinary(firstRowBytes));
1142 
1143       byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
1144           numRegions - 1);
1145       Preconditions.checkState(splits != null,
1146           "Could not split region with given user input: " + this);
1147 
1148       // remove endpoints, which are included in the splits list
1149       return Arrays.copyOfRange(splits, 1, splits.length - 1);
1150     }
1151 
1152     public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
1153       if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) {
1154         start = firstRowBytes;
1155       }
1156       if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) {
1157         end = lastRowBytes;
1158       }
1159       Preconditions.checkArgument(
1160               Bytes.compareTo(end, start) > 0,
1161               "last row (%s) is configured less than first row (%s)",
1162               Bytes.toStringBinary(end),
1163               Bytes.toStringBinary(start));
1164 
1165       byte[][] splits = Bytes.split(start, end, true,
1166               numSplits - 1);
1167       Preconditions.checkState(splits != null,
1168               "Could not calculate input splits with given user input: " + this);
1169       if (inclusive) {
1170         return splits;
1171       } else {
1172         // remove endpoints, which are included in the splits list
1173         return Arrays.copyOfRange(splits, 1, splits.length - 1);
1174       }
1175     }
1176 
1177     @Override
1178     public byte[] firstRow() {
1179       return firstRowBytes;
1180     }
1181 
1182     @Override
1183     public byte[] lastRow() {
1184       return lastRowBytes;
1185     }
1186 
1187     @Override
1188     public void setFirstRow(String userInput) {
1189       firstRowBytes = Bytes.toBytesBinary(userInput);
1190     }
1191 
1192     @Override
1193     public void setLastRow(String userInput) {
1194       lastRowBytes = Bytes.toBytesBinary(userInput);
1195     }
1196 
1197 
1198     @Override
1199     public void setFirstRow(byte[] userInput) {
1200       firstRowBytes = userInput;
1201     }
1202 
1203     @Override
1204     public void setLastRow(byte[] userInput) {
1205       lastRowBytes = userInput;
1206     }
1207 
1208     @Override
1209     public byte[] strToRow(String input) {
1210       return Bytes.toBytesBinary(input);
1211     }
1212 
1213     @Override
1214     public String rowToStr(byte[] row) {
1215       return Bytes.toStringBinary(row);
1216     }
1217 
1218     @Override
1219     public String separator() {
1220       return ",";
1221     }
1222 
1223     @Override
1224     public String toString() {
1225       return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
1226           + "," + rowToStr(lastRow()) + "]";
1227     }
1228   }
1229 }