View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.IOException;
22  import java.io.InterruptedIOException;
23  import java.util.ArrayList;
24  import java.util.List;
25  import java.util.ListIterator;
26  import java.util.Map;
27  import java.util.concurrent.Callable;
28  import java.util.concurrent.ExecutionException;
29  import java.util.concurrent.Executors;
30  import java.util.concurrent.Future;
31  import java.util.concurrent.ThreadFactory;
32  import java.util.concurrent.ThreadPoolExecutor;
33  import java.util.concurrent.TimeUnit;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.classification.InterfaceAudience;
39  import org.apache.hadoop.hbase.HConstants;
40  import org.apache.hadoop.hbase.HRegionInfo;
41  import org.apache.hadoop.hbase.Server;
42  import org.apache.hadoop.hbase.ServerName;
43  import org.apache.hadoop.hbase.MetaTableAccessor;
44  import org.apache.hadoop.hbase.client.HConnection;
45  import org.apache.hadoop.hbase.client.Mutation;
46  import org.apache.hadoop.hbase.client.Put;
47  import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
48  import org.apache.hadoop.hbase.coordination.SplitTransactionCoordination;
49  import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
50  import org.apache.hadoop.hbase.security.User;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.CancelableProgressable;
53  import org.apache.hadoop.hbase.util.ConfigUtil;
54  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
55  import org.apache.hadoop.hbase.util.FSUtils;
56  import org.apache.hadoop.hbase.util.HasThread;
57  import org.apache.hadoop.hbase.util.Pair;
58  import org.apache.hadoop.hbase.util.PairOfSameType;
59  import org.apache.zookeeper.KeeperException;
60  
61  import com.google.common.util.concurrent.ThreadFactoryBuilder;
62  
63  @InterfaceAudience.Private
64  public class SplitTransactionImpl implements SplitTransaction {
65    private static final Log LOG = LogFactory.getLog(SplitTransaction.class);
66  
67    /*
68     * Region to split
69     */
70    private final HRegion parent;
71    private HRegionInfo hri_a;
72    private HRegionInfo hri_b;
73    private long fileSplitTimeout = 30000;
74    public SplitTransactionCoordination.SplitTransactionDetails std;
75    boolean useZKForAssignment;
76  
77    /*
78     * Row to split around
79     */
80    private final byte [] splitrow;
81  
82    /*
83     * Transaction state for listener, only valid during execute and
84     * rollback
85     */
86    private SplitTransactionPhase currentPhase = SplitTransactionPhase.STARTED;
87    private Server server;
88    private RegionServerServices rsServices;
89  
90    public static class JournalEntryImpl implements JournalEntry {
91      private SplitTransactionPhase type;
92      private long timestamp;
93  
94      public JournalEntryImpl(SplitTransactionPhase type) {
95        this(type, EnvironmentEdgeManager.currentTime());
96      }
97  
98      public JournalEntryImpl(SplitTransactionPhase type, long timestamp) {
99        this.type = type;
100       this.timestamp = timestamp;
101     }
102 
103     @Override
104     public String toString() {
105       StringBuilder sb = new StringBuilder();
106       sb.append(type);
107       sb.append(" at ");
108       sb.append(timestamp);
109       return sb.toString();
110     }
111 
112     @Override
113     public SplitTransactionPhase getPhase() {
114       return type;
115     }
116 
117     @Override
118     public long getTimeStamp() {
119       return timestamp;
120     }
121   }
122 
123   /*
124    * Journal of how far the split transaction has progressed.
125    */
126   private final List<JournalEntry> journal = new ArrayList<JournalEntry>();
127 
128   /**
129    * Listeners
130    */
131   private final ArrayList<TransactionListener> listeners = new ArrayList<TransactionListener>();
132 
133   /**
134    * Constructor
135    * @param r Region to split
136    * @param splitrow Row to split around
137    */
138   public SplitTransactionImpl(final Region r, final byte [] splitrow) {
139     this.parent = (HRegion)r;
140     this.splitrow = splitrow;
141     this.journal.add(new JournalEntryImpl(SplitTransactionPhase.STARTED));
142     useZKForAssignment = ConfigUtil.useZKForAssignment(parent.getBaseConf());
143   }
144 
145   private void transition(SplitTransactionPhase nextPhase) throws IOException {
146     transition(nextPhase, false);
147   }
148 
149   private void transition(SplitTransactionPhase nextPhase, boolean isRollback)
150       throws IOException {
151     if (!isRollback) {
152       // Add to the journal first, because if the listener throws an exception
153       // we need to roll back starting at 'nextPhase'
154       this.journal.add(new JournalEntryImpl(nextPhase));
155     }
156     for (int i = 0; i < listeners.size(); i++) {
157       TransactionListener listener = listeners.get(i);
158       if (!isRollback) {
159         listener.transition(this, currentPhase, nextPhase);
160       } else {
161         listener.rollback(this, currentPhase, nextPhase);
162       }
163     }
164     currentPhase = nextPhase;
165   }
166 
167   /**
168    * Does checks on split inputs.
169    * @return <code>true</code> if the region is splittable else
170    * <code>false</code> if it is not (e.g. its already closed, etc.).
171    */
172   @Override
173   public boolean prepare() throws IOException {
174     if (!this.parent.isSplittable()) return false;
175     // Split key can be null if this region is unsplittable; i.e. has refs.
176     if (this.splitrow == null) return false;
177     HRegionInfo hri = this.parent.getRegionInfo();
178     parent.prepareToSplit();
179     // Check splitrow.
180     byte [] startKey = hri.getStartKey();
181     byte [] endKey = hri.getEndKey();
182     if (Bytes.equals(startKey, splitrow) ||
183         !this.parent.getRegionInfo().containsRow(splitrow)) {
184       LOG.info("Split row is not inside region key range or is equal to " +
185           "startkey: " + Bytes.toStringBinary(this.splitrow));
186       return false;
187     }
188     long rid = getDaughterRegionIdTimestamp(hri);
189     this.hri_a = new HRegionInfo(hri.getTable(), startKey, this.splitrow, false, rid);
190     this.hri_b = new HRegionInfo(hri.getTable(), this.splitrow, endKey, false, rid);
191 
192     transition(SplitTransactionPhase.PREPARED);
193 
194     return true;
195   }
196 
197   /**
198    * Calculate daughter regionid to use.
199    * @param hri Parent {@link HRegionInfo}
200    * @return Daughter region id (timestamp) to use.
201    */
202   private static long getDaughterRegionIdTimestamp(final HRegionInfo hri) {
203     long rid = EnvironmentEdgeManager.currentTime();
204     // Regionid is timestamp.  Can't be less than that of parent else will insert
205     // at wrong location in hbase:meta (See HBASE-710).
206     if (rid < hri.getRegionId()) {
207       LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() +
208         " but current time here is " + rid);
209       rid = hri.getRegionId() + 1;
210     }
211     return rid;
212   }
213 
214   private static IOException closedByOtherException = new IOException(
215       "Failed to close region: already closed by another thread");
216 
217   /* package */PairOfSameType<Region> createDaughters(final Server server,
218       final RegionServerServices services) throws IOException {
219     return createDaughters(server, services, null);
220   }
221 
222   /**
223    * Prepare the regions and region files.
224    * @param server Hosting server instance.  Can be null when testing (won't try
225    * and update in zk if a null server)
226    * @param services Used to online/offline regions.
227    * @param user
228    * @throws IOException If thrown, transaction failed.
229    *    Call {@link #rollback(Server, RegionServerServices)}
230    * @return Regions created
231    */
232   /* package */PairOfSameType<Region> createDaughters(final Server server,
233       final RegionServerServices services, User user) throws IOException {
234     LOG.info("Starting split of region " + this.parent);
235     if ((server != null && server.isStopped()) ||
236         (services != null && services.isStopping())) {
237       throw new IOException("Server is stopped or stopping");
238     }
239     assert !this.parent.lock.writeLock().isHeldByCurrentThread():
240       "Unsafe to hold write lock while performing RPCs";
241 
242     transition(SplitTransactionPhase.BEFORE_PRE_SPLIT_HOOK);
243 
244     // Coprocessor callback
245     if (this.parent.getCoprocessorHost() != null) {
246       // TODO: Remove one of these
247       parent.getCoprocessorHost().preSplit(user);
248       parent.getCoprocessorHost().preSplit(splitrow, user);
249     }
250 
251     transition(SplitTransactionPhase.AFTER_PRE_SPLIT_HOOK);
252 
253     // If true, no cluster to write meta edits to or to update znodes in.
254     boolean testing = server == null? true:
255         server.getConfiguration().getBoolean("hbase.testing.nocluster", false);
256     this.fileSplitTimeout = testing ? this.fileSplitTimeout :
257         server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
258           this.fileSplitTimeout);
259 
260     PairOfSameType<Region> daughterRegions = stepsBeforePONR(server, services, testing);
261 
262     final List<Mutation> metaEntries = new ArrayList<Mutation>();
263     boolean ret = false;
264     if (this.parent.getCoprocessorHost() != null) {
265       ret = parent.getCoprocessorHost().preSplitBeforePONR(splitrow, metaEntries, user);
266       if (ret) {
267           throw new IOException("Coprocessor bypassing region "
268             + this.parent.getRegionInfo().getRegionNameAsString() + " split.");
269       }
270       try {
271         for (Mutation p : metaEntries) {
272           HRegionInfo.parseRegionName(p.getRow());
273         }
274       } catch (IOException e) {
275         LOG.error("Row key of mutation from coprocessor is not parsable as region name."
276             + "Mutations from coprocessor should only for hbase:meta table.");
277         throw e;
278       }
279     }
280 
281     // This is the point of no return.  Adding subsequent edits to .META. as we
282     // do below when we do the daughter opens adding each to .META. can fail in
283     // various interesting ways the most interesting of which is a timeout
284     // BUT the edits all go through (See HBASE-3872).  IF we reach the PONR
285     // then subsequent failures need to crash out this regionserver; the
286     // server shutdown processing should be able to fix-up the incomplete split.
287     // The offlined parent will have the daughters as extra columns.  If
288     // we leave the daughter regions in place and do not remove them when we
289     // crash out, then they will have their references to the parent in place
290     // still and the server shutdown fixup of .META. will point to these
291     // regions.
292     // We should add PONR JournalEntry before offlineParentInMeta,so even if
293     // OfflineParentInMeta timeout,this will cause regionserver exit,and then
294     // master ServerShutdownHandler will fix daughter & avoid data loss. (See
295     // HBase-4562).
296     transition(SplitTransactionPhase.PONR);
297 
298     // Edit parent in meta.  Offlines parent region and adds splita and splitb
299     // as an atomic update. See HBASE-7721. This update to META makes the region
300     // will determine whether the region is split or not in case of failures.
301     // If it is successful, master will roll-forward, if not, master will rollback
302     // and assign the parent region.
303     if (!testing && useZKForAssignment) {
304       if (metaEntries == null || metaEntries.isEmpty()) {
305         MetaTableAccessor.splitRegion(server.getConnection(),
306           parent.getRegionInfo(), daughterRegions.getFirst().getRegionInfo(),
307           daughterRegions.getSecond().getRegionInfo(), server.getServerName(),
308           parent.getTableDesc().getRegionReplication());
309       } else {
310         offlineParentInMetaAndputMetaEntries(server.getConnection(),
311           parent.getRegionInfo(), daughterRegions.getFirst().getRegionInfo(), daughterRegions
312               .getSecond().getRegionInfo(), server.getServerName(), metaEntries,
313               parent.getTableDesc().getRegionReplication());
314       }
315     } else if (services != null && !useZKForAssignment) {
316       if (!services.reportRegionStateTransition(TransitionCode.SPLIT_PONR,
317           parent.getRegionInfo(), hri_a, hri_b)) {
318         // Passed PONR, let SSH clean it up
319         throw new IOException("Failed to notify master that split passed PONR: "
320           + parent.getRegionInfo().getRegionNameAsString());
321       }
322     }
323     return daughterRegions;
324   }
325 
326   public PairOfSameType<Region> stepsBeforePONR(final Server server,
327       final RegionServerServices services, boolean testing) throws IOException {
328 
329     if (useCoordinatedStateManager(server)) {
330       if (std == null) {
331         std =
332             ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
333                 .getSplitTransactionCoordination().getDefaultDetails();
334       }
335       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
336           .getSplitTransactionCoordination().startSplitTransaction(parent, server.getServerName(),
337             hri_a, hri_b);
338     } else if (services != null && !useZKForAssignment) {
339       if (!services.reportRegionStateTransition(TransitionCode.READY_TO_SPLIT,
340           parent.getRegionInfo(), hri_a, hri_b)) {
341         throw new IOException("Failed to get ok from master to split "
342           + parent.getRegionInfo().getRegionNameAsString());
343       }
344     }
345 
346     transition(SplitTransactionPhase.SET_SPLITTING);
347 
348     if (useCoordinatedStateManager(server)) {
349       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
350           .getSplitTransactionCoordination().waitForSplitTransaction(services, parent, hri_a,
351             hri_b, std);
352     }
353 
354     this.parent.getRegionFileSystem().createSplitsDir(hri_a, hri_b);
355 
356     transition(SplitTransactionPhase.CREATE_SPLIT_DIR);
357 
358     Map<byte[], List<StoreFile>> hstoreFilesToSplit = null;
359     Exception exceptionToThrow = null;
360     try{
361       hstoreFilesToSplit = this.parent.close(false);
362     } catch (Exception e) {
363       exceptionToThrow = e;
364     }
365     if (exceptionToThrow == null && hstoreFilesToSplit == null) {
366       // The region was closed by a concurrent thread.  We can't continue
367       // with the split, instead we must just abandon the split.  If we
368       // reopen or split this could cause problems because the region has
369       // probably already been moved to a different server, or is in the
370       // process of moving to a different server.
371       exceptionToThrow = closedByOtherException;
372     }
373     if (exceptionToThrow != closedByOtherException) {
374       transition(SplitTransactionPhase.CLOSED_PARENT_REGION);
375     }
376     if (exceptionToThrow != null) {
377       if (exceptionToThrow instanceof IOException) throw (IOException)exceptionToThrow;
378       throw new IOException(exceptionToThrow);
379     }
380     if (!testing) {
381       services.removeFromOnlineRegions(this.parent, null);
382     }
383 
384     transition(SplitTransactionPhase.OFFLINED_PARENT);
385 
386     // TODO: If splitStoreFiles were multithreaded would we complete steps in
387     // less elapsed time?  St.Ack 20100920
388     //
389     // splitStoreFiles creates daughter region dirs under the parent splits dir
390     // Nothing to unroll here if failure -- clean up of CREATE_SPLIT_DIR will
391     // clean this up.
392     Pair<Integer, Integer> expectedReferences = splitStoreFiles(hstoreFilesToSplit);
393 
394     // Log to the journal that we are creating region A, the first daughter
395     // region.  We could fail halfway through.  If we do, we could have left
396     // stuff in fs that needs cleanup -- a storefile or two.  Thats why we
397     // add entry to journal BEFORE rather than AFTER the change.
398     transition(SplitTransactionPhase.STARTED_REGION_A_CREATION);
399 
400     assertReferenceFileCount(expectedReferences.getFirst(),
401         this.parent.getRegionFileSystem().getSplitsDir(this.hri_a));
402     Region a = this.parent.createDaughterRegionFromSplits(this.hri_a);
403     assertReferenceFileCount(expectedReferences.getFirst(),
404         new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_a.getEncodedName()));
405 
406     // Ditto
407     transition(SplitTransactionPhase.STARTED_REGION_B_CREATION);
408 
409     assertReferenceFileCount(expectedReferences.getSecond(),
410         this.parent.getRegionFileSystem().getSplitsDir(this.hri_b));
411     Region b = this.parent.createDaughterRegionFromSplits(this.hri_b);
412     assertReferenceFileCount(expectedReferences.getSecond(),
413         new Path(this.parent.getRegionFileSystem().getTableDir(), this.hri_b.getEncodedName()));
414 
415     return new PairOfSameType<Region>(a, b);
416   }
417 
418   void assertReferenceFileCount(int expectedReferenceFileCount, Path dir)
419       throws IOException {
420     if (expectedReferenceFileCount != 0 &&
421         expectedReferenceFileCount != FSUtils.getRegionReferenceFileCount(parent.getFilesystem(),
422           dir)) {
423       throw new IOException("Failing split. Expected reference file count isn't equal.");
424     }
425   }
426 
427   /**
428    * Perform time consuming opening of the daughter regions.
429    * @param server Hosting server instance.  Can be null when testing
430    * @param services Used to online/offline regions.
431    * @param a first daughter region
432    * @param a second daughter region
433    * @throws IOException If thrown, transaction failed.
434    *          Call {@link #rollback(Server, RegionServerServices)}
435    */
436   /* package */void openDaughters(final Server server,
437       final RegionServerServices services, Region a, Region b)
438       throws IOException {
439     boolean stopped = server != null && server.isStopped();
440     boolean stopping = services != null && services.isStopping();
441     // TODO: Is this check needed here?
442     if (stopped || stopping) {
443       LOG.info("Not opening daughters " +
444           b.getRegionInfo().getRegionNameAsString() +
445           " and " +
446           a.getRegionInfo().getRegionNameAsString() +
447           " because stopping=" + stopping + ", stopped=" + stopped);
448     } else {
449       // Open daughters in parallel.
450       DaughterOpener aOpener = new DaughterOpener(server, (HRegion)a);
451       DaughterOpener bOpener = new DaughterOpener(server, (HRegion)b);
452       aOpener.start();
453       bOpener.start();
454       try {
455         aOpener.join();
456         if (aOpener.getException() == null) {
457           transition(SplitTransactionPhase.OPENED_REGION_A);
458         }
459         bOpener.join();
460         if (bOpener.getException() == null) {
461           transition(SplitTransactionPhase.OPENED_REGION_B);
462         }
463       } catch (InterruptedException e) {
464         throw (InterruptedIOException)new InterruptedIOException().initCause(e);
465       }
466       if (aOpener.getException() != null) {
467         throw new IOException("Failed " +
468           aOpener.getName(), aOpener.getException());
469       }
470       if (bOpener.getException() != null) {
471         throw new IOException("Failed " +
472           bOpener.getName(), bOpener.getException());
473       }
474       if (services != null) {
475         try {
476           if (useZKForAssignment) {
477             // add 2nd daughter first (see HBASE-4335)
478             services.postOpenDeployTasks(b);
479           } else if (!services.reportRegionStateTransition(TransitionCode.SPLIT,
480               parent.getRegionInfo(), hri_a, hri_b)) {
481             throw new IOException("Failed to report split region to master: "
482               + parent.getRegionInfo().getShortNameToLog());
483           }
484           // Should add it to OnlineRegions
485           services.addToOnlineRegions(b);
486           if (useZKForAssignment) {
487             services.postOpenDeployTasks(a);
488           }
489           services.addToOnlineRegions(a);
490         } catch (KeeperException ke) {
491           throw new IOException(ke);
492         }
493       }
494     }
495   }
496 
497   public PairOfSameType<Region> execute(final Server server,
498     final RegionServerServices services)
499         throws IOException {
500     if (User.isHBaseSecurityEnabled(parent.getBaseConf())) {
501       LOG.warn("Should use execute(Server, RegionServerServices, User)");
502     }
503     return execute(server, services, null);
504   }
505 
506   /**
507    * Run the transaction.
508    * @param server Hosting server instance.  Can be null when testing
509    * @param services Used to online/offline regions.
510    * @throws IOException If thrown, transaction failed.
511    *          Call {@link #rollback(Server, RegionServerServices)}
512    * @return Regions created
513    * @throws IOException
514    * @see #rollback(Server, RegionServerServices)
515    */
516   @Override
517   public PairOfSameType<Region> execute(final Server server,
518       final RegionServerServices services, User user) throws IOException {
519     this.server = server;
520     this.rsServices = services;
521     useZKForAssignment = server == null ? true :
522       ConfigUtil.useZKForAssignment(server.getConfiguration());
523     if (useCoordinatedStateManager(server)) {
524       std =
525           ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
526               .getSplitTransactionCoordination().getDefaultDetails();
527     }
528     PairOfSameType<Region> regions = createDaughters(server, services, user);
529     if (this.parent.getCoprocessorHost() != null) {
530       parent.getCoprocessorHost().preSplitAfterPONR(user);
531     }
532     regions = stepsAfterPONR(server, services, regions, user);
533 
534     transition(SplitTransactionPhase.COMPLETED);
535 
536     return regions;
537   }
538 
539   @Deprecated
540   public PairOfSameType<Region> stepsAfterPONR(final Server server,
541       final RegionServerServices services, final PairOfSameType<Region> regions)
542       throws IOException {
543     return stepsAfterPONR(server, services, regions, null);
544   }
545 
546   public PairOfSameType<Region> stepsAfterPONR(final Server server,
547       final RegionServerServices services, final PairOfSameType<Region> regions, User user)
548       throws IOException {
549     openDaughters(server, services, regions.getFirst(), regions.getSecond());
550     if (useCoordinatedStateManager(server)) {
551       ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
552           .getSplitTransactionCoordination().completeSplitTransaction(services, regions.getFirst(),
553             regions.getSecond(), std, parent);
554     }
555 
556     transition(SplitTransactionPhase.BEFORE_POST_SPLIT_HOOK);
557 
558     // Coprocessor callback
559     if (parent.getCoprocessorHost() != null) {
560       this.parent.getCoprocessorHost().postSplit(regions.getFirst(), regions.getSecond(), user);
561     }
562 
563     transition(SplitTransactionPhase.AFTER_POST_SPLIT_HOOK);
564 
565     return regions;
566   }
567 
568   private void offlineParentInMetaAndputMetaEntries(HConnection hConnection,
569       HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB,
570       ServerName serverName, List<Mutation> metaEntries, int regionReplication)
571           throws IOException {
572     List<Mutation> mutations = metaEntries;
573     HRegionInfo copyOfParent = new HRegionInfo(parent);
574     copyOfParent.setOffline(true);
575     copyOfParent.setSplit(true);
576 
577     //Put for parent
578     Put putParent = MetaTableAccessor.makePutFromRegionInfo(copyOfParent);
579     MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB);
580     mutations.add(putParent);
581 
582     //Puts for daughters
583     Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA);
584     Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB);
585 
586     addLocation(putA, serverName, 1); //these are new regions, openSeqNum = 1 is fine.
587     addLocation(putB, serverName, 1);
588     mutations.add(putA);
589     mutations.add(putB);
590 
591     // Add empty locations for region replicas of daughters so that number of replicas can be
592     // cached whenever the primary region is looked up from meta
593     for (int i = 1; i < regionReplication; i++) {
594       addEmptyLocation(putA, i);
595       addEmptyLocation(putB, i);
596     }
597 
598     MetaTableAccessor.mutateMetaTable(hConnection, mutations);
599   }
600 
601   private static Put addEmptyLocation(final Put p, int replicaId){
602     p.addImmutable(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(replicaId), null);
603     p.addImmutable(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(replicaId),
604       null);
605     p.addImmutable(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(replicaId), null);
606     return p;
607   }
608 
609   public Put addLocation(final Put p, final ServerName sn, long openSeqNum) {
610     p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER,
611       Bytes.toBytes(sn.getHostAndPort()));
612     p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER,
613       Bytes.toBytes(sn.getStartcode()));
614     p.addImmutable(HConstants.CATALOG_FAMILY, HConstants.SEQNUM_QUALIFIER,
615         Bytes.toBytes(openSeqNum));
616     return p;
617   }
618 
619   /*
620    * Open daughter region in its own thread.
621    * If we fail, abort this hosting server.
622    */
623   class DaughterOpener extends HasThread {
624     private final Server server;
625     private final HRegion r;
626     private Throwable t = null;
627 
628     DaughterOpener(final Server s, final HRegion r) {
629       super((s == null? "null-services": s.getServerName()) +
630         "-daughterOpener=" + r.getRegionInfo().getEncodedName());
631       setDaemon(true);
632       this.server = s;
633       this.r = r;
634     }
635 
636     /**
637      * @return Null if open succeeded else exception that causes us fail open.
638      * Call it after this thread exits else you may get wrong view on result.
639      */
640     Throwable getException() {
641       return this.t;
642     }
643 
644     @Override
645     public void run() {
646       try {
647         openDaughterRegion(this.server, r);
648       } catch (Throwable t) {
649         this.t = t;
650       }
651     }
652   }
653 
654   /**
655    * Open daughter regions, add them to online list and update meta.
656    * @param server
657    * @param daughter
658    * @throws IOException
659    * @throws KeeperException
660    */
661   void openDaughterRegion(final Server server, final HRegion daughter)
662   throws IOException, KeeperException {
663     HRegionInfo hri = daughter.getRegionInfo();
664     LoggingProgressable reporter = server == null ? null
665         : new LoggingProgressable(hri, server.getConfiguration().getLong(
666             "hbase.regionserver.split.daughter.open.log.interval", 10000));
667     daughter.openHRegion(reporter);
668   }
669 
670   static class LoggingProgressable implements CancelableProgressable {
671     private final HRegionInfo hri;
672     private long lastLog = -1;
673     private final long interval;
674 
675     LoggingProgressable(final HRegionInfo hri, final long interval) {
676       this.hri = hri;
677       this.interval = interval;
678     }
679 
680     @Override
681     public boolean progress() {
682       long now = EnvironmentEdgeManager.currentTime();
683       if (now - lastLog > this.interval) {
684         LOG.info("Opening " + this.hri.getRegionNameAsString());
685         this.lastLog = now;
686       }
687       return true;
688     }
689   }
690 
691   private boolean useCoordinatedStateManager(final Server server) {
692     return server != null && useZKForAssignment && server.getCoordinatedStateManager() != null;
693   }
694 
695   /**
696    * Creates reference files for top and bottom half of the
697    * @param hstoreFilesToSplit map of store files to create half file references for.
698    * @return the number of reference files that were created.
699    * @throws IOException
700    */
701   private Pair<Integer, Integer> splitStoreFiles(
702       final Map<byte[], List<StoreFile>> hstoreFilesToSplit)
703       throws IOException {
704     if (hstoreFilesToSplit == null) {
705       // Could be null because close didn't succeed -- for now consider it fatal
706       throw new IOException("Close returned empty list of StoreFiles");
707     }
708     // The following code sets up a thread pool executor with as many slots as
709     // there's files to split. It then fires up everything, waits for
710     // completion and finally checks for any exception
711     int nbFiles = 0;
712     for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
713         nbFiles += entry.getValue().size();
714     }
715     if (nbFiles == 0) {
716       // no file needs to be splitted.
717       return new Pair<Integer, Integer>(0,0);
718     }
719     // Default max #threads to use is the smaller of table's configured number of blocking store
720     // files or the available number of logical cores.
721     int defMaxThreads = Math.min(parent.conf.getInt(HStore.BLOCKING_STOREFILES_KEY,
722                 HStore.DEFAULT_BLOCKING_STOREFILE_COUNT),
723             Runtime.getRuntime().availableProcessors());
724     // Max #threads is the smaller of the number of storefiles or the default max determined above.
725     int maxThreads = Math.min(parent.conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX,
726                 defMaxThreads), nbFiles);
727     LOG.info("Preparing to split " + nbFiles + " storefiles for region " + this.parent +
728             " using " + maxThreads + " threads");
729     ThreadFactoryBuilder builder = new ThreadFactoryBuilder();
730     builder.setNameFormat("StoreFileSplitter-%1$d");
731     ThreadFactory factory = builder.build();
732     ThreadPoolExecutor threadPool =
733       (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, factory);
734     List<Future<Pair<Path,Path>>> futures = new ArrayList<Future<Pair<Path,Path>>> (nbFiles);
735 
736     // Split each store file.
737     for (Map.Entry<byte[], List<StoreFile>> entry: hstoreFilesToSplit.entrySet()) {
738       for (StoreFile sf: entry.getValue()) {
739         StoreFileSplitter sfs = new StoreFileSplitter(entry.getKey(), sf);
740         futures.add(threadPool.submit(sfs));
741       }
742     }
743     // Shutdown the pool
744     threadPool.shutdown();
745 
746     // Wait for all the tasks to finish
747     try {
748       boolean stillRunning = !threadPool.awaitTermination(
749           this.fileSplitTimeout, TimeUnit.MILLISECONDS);
750       if (stillRunning) {
751         threadPool.shutdownNow();
752         // wait for the thread to shutdown completely.
753         while (!threadPool.isTerminated()) {
754           Thread.sleep(50);
755         }
756         throw new IOException("Took too long to split the" +
757             " files and create the references, aborting split");
758       }
759     } catch (InterruptedException e) {
760       throw (InterruptedIOException)new InterruptedIOException().initCause(e);
761     }
762 
763     int created_a = 0;
764     int created_b = 0;
765     // Look for any exception
766     for (Future<Pair<Path, Path>> future : futures) {
767       try {
768         Pair<Path, Path> p = future.get();
769         created_a += p.getFirst() != null ? 1 : 0;
770         created_b += p.getSecond() != null ? 1 : 0;
771       } catch (InterruptedException e) {
772         throw (InterruptedIOException) new InterruptedIOException().initCause(e);
773       } catch (ExecutionException e) {
774         throw new IOException(e);
775       }
776     }
777 
778     if (LOG.isDebugEnabled()) {
779       LOG.debug("Split storefiles for region " + this.parent + " Daughter A: " + created_a
780           + " storefiles, Daughter B: " + created_b + " storefiles.");
781     }
782     return new Pair<Integer, Integer>(created_a, created_b);
783   }
784 
785   private Pair<Path, Path> splitStoreFile(final byte[] family, final StoreFile sf)
786       throws IOException {
787     if (LOG.isDebugEnabled()) {
788         LOG.debug("Splitting started for store file: " + sf.getPath() + " for region: " +
789                   this.parent);
790     }
791     HRegionFileSystem fs = this.parent.getRegionFileSystem();
792     String familyName = Bytes.toString(family);
793 
794     Path path_a =
795         fs.splitStoreFile(this.hri_a, familyName, sf, this.splitrow, false,
796           this.parent.getSplitPolicy());
797     Path path_b =
798         fs.splitStoreFile(this.hri_b, familyName, sf, this.splitrow, true,
799           this.parent.getSplitPolicy());
800     if (LOG.isDebugEnabled()) {
801         LOG.debug("Splitting complete for store file: " + sf.getPath() + " for region: " +
802                   this.parent);
803     }
804     return new Pair<Path,Path>(path_a, path_b);
805   }
806 
807   /**
808    * Utility class used to do the file splitting / reference writing
809    * in parallel instead of sequentially.
810    */
811   class StoreFileSplitter implements Callable<Pair<Path,Path>> {
812     private final byte[] family;
813     private final StoreFile sf;
814 
815     /**
816      * Constructor that takes what it needs to split
817      * @param family Family that contains the store file
818      * @param sf which file
819      */
820     public StoreFileSplitter(final byte[] family, final StoreFile sf) {
821       this.sf = sf;
822       this.family = family;
823     }
824 
825     @Override
826     public Pair<Path,Path> call() throws IOException {
827       return splitStoreFile(family, sf);
828     }
829   }
830 
831   @Override
832   public boolean rollback(final Server server, final RegionServerServices services)
833       throws IOException {
834     if (User.isHBaseSecurityEnabled(parent.getBaseConf())) {
835       LOG.warn("Should use rollback(Server, RegionServerServices, User)");
836     }
837     return rollback(server, services, null);
838   }
839 
840   /**
841    * @param server Hosting server instance (May be null when testing).
842    * @param services
843    * @throws IOException If thrown, rollback failed.  Take drastic action.
844    * @return True if we successfully rolled back, false if we got to the point
845    * of no return and so now need to abort the server to minimize damage.
846    */
847   @Override
848   @SuppressWarnings("deprecation")
849   public boolean rollback(final Server server, final RegionServerServices services, User user)
850   throws IOException {
851     // Coprocessor callback
852     if (this.parent.getCoprocessorHost() != null) {
853       this.parent.getCoprocessorHost().preRollBackSplit(user);
854     }
855 
856     boolean result = true;
857     ListIterator<JournalEntry> iterator =
858       this.journal.listIterator(this.journal.size());
859     // Iterate in reverse.
860     while (iterator.hasPrevious()) {
861       JournalEntry je = iterator.previous();
862 
863       transition(je.getPhase(), true);
864 
865       switch(je.getPhase()) {
866 
867       case SET_SPLITTING:
868         if (useCoordinatedStateManager(server) && server instanceof HRegionServer) {
869           ((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
870               .getSplitTransactionCoordination().clean(this.parent.getRegionInfo());
871         } else if (services != null && !useZKForAssignment
872             && !services.reportRegionStateTransition(TransitionCode.SPLIT_REVERTED,
873                 parent.getRegionInfo(), hri_a, hri_b)) {
874           return false;
875         }
876         break;
877 
878       case CREATE_SPLIT_DIR:
879         this.parent.writestate.writesEnabled = true;
880         this.parent.getRegionFileSystem().cleanupSplitsDir();
881         break;
882 
883       case CLOSED_PARENT_REGION:
884         try {
885           // So, this returns a seqid but if we just closed and then reopened, we
886           // should be ok. On close, we flushed using sequenceid obtained from
887           // hosting regionserver so no need to propagate the sequenceid returned
888           // out of initialize below up into regionserver as we normally do.
889           // TODO: Verify.
890           this.parent.initialize();
891         } catch (IOException e) {
892           LOG.error("Failed rollbacking CLOSED_PARENT_REGION of region " +
893             this.parent.getRegionInfo().getRegionNameAsString(), e);
894           throw new RuntimeException(e);
895         }
896         break;
897 
898       case STARTED_REGION_A_CREATION:
899         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_a);
900         break;
901 
902       case STARTED_REGION_B_CREATION:
903         this.parent.getRegionFileSystem().cleanupDaughterRegion(this.hri_b);
904         break;
905 
906       case OFFLINED_PARENT:
907         if (services != null) services.addToOnlineRegions(this.parent);
908         break;
909 
910       case PONR:
911         // We got to the point-of-no-return so we need to just abort. Return
912         // immediately.  Do not clean up created daughter regions.  They need
913         // to be in place so we don't delete the parent region mistakenly.
914         // See HBASE-3872.
915         return false;
916 
917       // Informational only cases
918       case STARTED:
919       case PREPARED:
920       case BEFORE_PRE_SPLIT_HOOK:
921       case AFTER_PRE_SPLIT_HOOK:
922       case BEFORE_POST_SPLIT_HOOK:
923       case AFTER_POST_SPLIT_HOOK:
924       case OPENED_REGION_A:
925       case OPENED_REGION_B:
926       case COMPLETED:
927         break;
928 
929       default:
930         throw new RuntimeException("Unhandled journal entry: " + je);
931       }
932     }
933     // Coprocessor callback
934     if (this.parent.getCoprocessorHost() != null) {
935       this.parent.getCoprocessorHost().postRollBackSplit(user);
936     }
937     return result;
938   }
939 
940   HRegionInfo getFirstDaughter() {
941     return hri_a;
942   }
943 
944   HRegionInfo getSecondDaughter() {
945     return hri_b;
946   }
947 
948   @Override
949   public List<JournalEntry> getJournal() {
950     return journal;
951   }
952 
953   @Override
954   public SplitTransaction registerTransactionListener(TransactionListener listener) {
955     listeners.add(listener);
956     return this;
957   }
958 
959   @Override
960   public Server getServer() {
961     return server;
962   }
963 
964   @Override
965   public RegionServerServices getRegionServerServices() {
966     return rsServices;
967   }
968 
969   @Override
970   public String toString() {
971     StringBuilder sb = new StringBuilder();
972     for (int i = 0; i < journal.size(); i++) {
973       JournalEntry je = journal.get(i);
974       sb.append(je.toString());
975       if (i != 0) {
976         JournalEntry jep = journal.get(i-1);
977         long delta = je.getTimeStamp() - jep.getTimeStamp();
978         if (delta != 0) {
979           sb.append(" (+" + delta + " ms)");
980         }
981       }
982       sb.append("\n");
983     }
984     return sb.toString();
985   }
986 }