View Javadoc

1   /*
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.zookeeper;
20  
21  import static org.apache.hadoop.hbase.HConstants.DEFAULT_META_REPLICA_NUM;
22  import static org.apache.hadoop.hbase.HConstants.META_REPLICAS_NUM;
23  import static org.apache.hadoop.hbase.HRegionInfo.DEFAULT_REPLICA_ID;
24  import static org.apache.hadoop.hbase.zookeeper.ZKUtil.joinZNode;
25  import com.google.common.collect.ImmutableMap;
26  import java.io.Closeable;
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.HashMap;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.concurrent.CopyOnWriteArrayList;
33  import java.util.concurrent.CountDownLatch;
34  import java.util.concurrent.ExecutorService;
35  import java.util.concurrent.Executors;
36  import java.util.concurrent.TimeUnit;
37  import java.util.regex.Matcher;
38  import java.util.regex.Pattern;
39  
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.hbase.classification.InterfaceAudience;
43  import org.apache.hadoop.conf.Configuration;
44  import org.apache.hadoop.hbase.Abortable;
45  import org.apache.hadoop.hbase.AuthUtil;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.ZooKeeperConnectionException;
48  import org.apache.hadoop.hbase.security.Superusers;
49  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
50  import org.apache.hadoop.hbase.util.Threads;
51  import org.apache.hadoop.security.UserGroupInformation;
52  import org.apache.zookeeper.AsyncCallback;
53  import org.apache.zookeeper.KeeperException;
54  import org.apache.zookeeper.WatchedEvent;
55  import org.apache.zookeeper.Watcher;
56  import org.apache.zookeeper.ZooDefs;
57  import org.apache.zookeeper.ZooDefs.Ids;
58  import org.apache.zookeeper.ZooDefs.Perms;
59  import org.apache.zookeeper.data.ACL;
60  import org.apache.zookeeper.data.Id;
61  import org.apache.zookeeper.data.Stat;
62  
63  /**
64   * Acts as the single ZooKeeper Watcher.  One instance of this is instantiated
65   * for each Master, RegionServer, and client process.
66   *
67   * <p>This is the only class that implements {@link Watcher}.  Other internal
68   * classes which need to be notified of ZooKeeper events must register with
69   * the local instance of this watcher via {@link #registerListener}.
70   *
71   * <p>This class also holds and manages the connection to ZooKeeper.  Code to
72   * deal with connection related events and exceptions are handled here.
73   */
74  @InterfaceAudience.Private
75  public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
76    private static final Log LOG = LogFactory.getLog(ZooKeeperWatcher.class);
77  
78    public static final String META_ZNODE_PREFIX_CONF_KEY = "zookeeper.znode.metaserver";
79    public static final String META_ZNODE_PREFIX = "meta-region-server";
80  
81    // Identifier for this watcher (for logging only).  It is made of the prefix
82    // passed on construction and the zookeeper sessionid.
83    private String prefix;
84    private String identifier;
85  
86    // zookeeper quorum
87    private String quorum;
88  
89    // zookeeper connection
90    private final RecoverableZooKeeper recoverableZooKeeper;
91  
92    // abortable in case of zk failure
93    protected Abortable abortable;
94    // Used if abortable is null
95    private boolean aborted = false;
96  
97    // listeners to be notified
98    private final List<ZooKeeperListener> listeners =
99      new CopyOnWriteArrayList<ZooKeeperListener>();
100 
101   /**
102    * znodes containing the locations of the servers hosting the meta replicas
103    */
104   private final ImmutableMap<Integer, String> metaReplicaZNodes;
105 
106   // Single threaded executor pool that processes event notifications from Zookeeper. Events are
107   // processed in the order in which they arrive (pool backed by an unbounded fifo queue). We do
108   // this to decouple the event processing from Zookeeper's ClientCnxn's EventThread context.
109   // EventThread internally runs a single while loop to serially process all the events. When events
110   // are processed by the listeners in the same thread, that blocks the EventThread from processing
111   // subsequent events. Processing events in a separate thread frees up the event thread to continue
112   // and further prevents deadlocks if the process method itself makes other zookeeper calls.
113   // It is ok to do it in a single thread because the Zookeeper ClientCnxn already serializes the
114   // requests using a single while loop and hence there is no performance degradation.
115   private final ExecutorService zkEventProcessor =
116       Executors.newSingleThreadExecutor(Threads.getNamedThreadFactory("zk-event-processor"));
117 
118   // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL
119   // negotiation to complete
120   public CountDownLatch saslLatch = new CountDownLatch(1);
121 
122   // node names
123 
124   // base znode for this cluster
125   public String baseZNode;
126   //znodes containing the locations of the servers hosting the meta replicas
127   private Map<Integer,String> metaReplicaZnodes = new HashMap<Integer, String>();
128   // znode containing ephemeral nodes of the regionservers
129   public String rsZNode;
130   // znode containing ephemeral nodes of the draining regionservers
131   public String drainingZNode;
132   // znode of currently active master
133   private String masterAddressZNode;
134   // znode of this master in backup master directory, if not the active master
135   public String backupMasterAddressesZNode;
136   // znode containing the current cluster state
137   public String clusterStateZNode;
138   // znode used for region transitioning and assignment
139   public String assignmentZNode;
140   // znode used for table disabling/enabling
141   public String tableZNode;
142   // znode containing the unique cluster ID
143   public String clusterIdZNode;
144   // znode used for log splitting work assignment
145   public String splitLogZNode;
146   // znode containing the state of the load balancer
147   public String balancerZNode;
148   // znode containing the state of region normalizer
149   private String regionNormalizerZNode;
150   // znode containing the state of all switches, currently there are split and merge child node.
151   private String switchZNode;
152   // znode containing the lock for the tables
153   public String tableLockZNode;
154   // znode containing the state of the snapshot auto-cleanup
155   String snapshotCleanupZNode;
156   // znode containing the state of recovering regions
157   public String recoveringRegionsZNode;
158   // znode containing namespace descriptors
159   public static String namespaceZNode = "namespace";
160   // znode of indicating master maintenance mode
161   public static String masterMaintZNode = "masterMaintenance";
162 
163   /**
164    * The prefix of meta znode. Does not include baseZNode.
165    * Its a 'prefix' because meta replica id integer can be tagged on the end (if
166    * no number present, it is 'default' replica).
167    */
168   private final String metaZNodePrefix;
169 
170   // Certain ZooKeeper nodes need to be world-readable
171   public static final ArrayList<ACL> CREATOR_ALL_AND_WORLD_READABLE =
172     new ArrayList<ACL>() { {
173       add(new ACL(ZooDefs.Perms.READ,ZooDefs.Ids.ANYONE_ID_UNSAFE));
174       add(new ACL(ZooDefs.Perms.ALL,ZooDefs.Ids.AUTH_IDS));
175     }};
176 
177   private static final String DEFAULT_SNAPSHOT_CLEANUP_ZNODE = "snapshot-cleanup";
178 
179   private final Configuration conf;
180 
181   private final long zkSyncTimeout;
182 
183   /* A pattern that matches a Kerberos name, borrowed from Hadoop's KerberosName */
184   private static final Pattern NAME_PATTERN = Pattern.compile("([^/@]*)(/([^/@]*))?@([^/@]*)");
185 
186   /**
187    * Instantiate a ZooKeeper connection and watcher.
188    * @param identifier string that is passed to RecoverableZookeeper to be used as
189    * identifier for this instance. Use null for default.
190    * @throws IOException
191    * @throws ZooKeeperConnectionException
192    */
193   public ZooKeeperWatcher(Configuration conf, String identifier,
194       Abortable abortable) throws ZooKeeperConnectionException, IOException {
195     this(conf, identifier, abortable, false);
196   }
197 
198   /**
199    * Instantiate a ZooKeeper connection and watcher.
200    * @param conf
201    * @param identifier string that is passed to RecoverableZookeeper to be used as identifier for
202    *          this instance. Use null for default.
203    * @param abortable Can be null if there is on error there is no host to abort: e.g. client
204    *          context.
205    * @param canCreateBaseZNode
206    * @throws IOException
207    * @throws ZooKeeperConnectionException
208    */
209   public ZooKeeperWatcher(Configuration conf, String identifier,
210       Abortable abortable, boolean canCreateBaseZNode)
211   throws IOException, ZooKeeperConnectionException {
212     this.conf = conf;
213     this.quorum = ZKConfig.getZKQuorumServersString(conf);
214     this.prefix = identifier;
215     // Identifier will get the sessionid appended later below down when we
216     // handle the syncconnect event.
217     this.identifier = identifier + "0x0";
218     this.abortable = abortable;
219     setNodeNames(conf);
220     PendingWatcher pendingWatcher = new PendingWatcher();
221     this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, pendingWatcher, identifier);
222     pendingWatcher.prepare(this);
223     ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
224     metaZNodePrefix = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
225     String defaultMetaReplicaZNode = joinZNode(baseZNode, metaZNodePrefix);
226     builder.put(DEFAULT_REPLICA_ID, defaultMetaReplicaZNode);
227     int numMetaReplicas = conf.getInt(META_REPLICAS_NUM, DEFAULT_META_REPLICA_NUM);
228     for (int i = 1; i < numMetaReplicas; i++) {
229       builder.put(i, defaultMetaReplicaZNode + "-" + i);
230     }
231     metaReplicaZNodes = builder.build();
232     if (canCreateBaseZNode) {
233       try {
234         createBaseZNodes();
235       } catch (ZooKeeperConnectionException zce) {
236         try {
237           this.recoverableZooKeeper.close();
238         } catch (InterruptedException ie) {
239           LOG.debug("Encountered InterruptedException when closing " + this.recoverableZooKeeper);
240           Thread.currentThread().interrupt();
241         }
242         throw zce;
243       }
244     }
245     this.zkSyncTimeout = conf.getLong(HConstants.ZK_SYNC_BLOCKING_TIMEOUT_MS,
246         HConstants.ZK_SYNC_BLOCKING_TIMEOUT_DEFAULT_MS);
247   }
248 
249   /**
250    * @return true if the znode is a meta region replica
251    */
252   public boolean isAnyMetaReplicaZNode(String node) {
253     return this.metaReplicaZNodes.containsValue(node);
254   }
255 
256   private void createBaseZNodes() throws ZooKeeperConnectionException {
257     try {
258       // Create all the necessary "directories" of znodes
259       ZKUtil.createWithParents(this, baseZNode);
260       if (conf.getBoolean("hbase.assignment.usezk", true)) {
261         ZKUtil.createAndFailSilent(this, assignmentZNode);
262       }
263       ZKUtil.createAndFailSilent(this, rsZNode);
264       ZKUtil.createAndFailSilent(this, drainingZNode);
265       ZKUtil.createAndFailSilent(this, tableZNode);
266       ZKUtil.createAndFailSilent(this, splitLogZNode);
267       ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode);
268       ZKUtil.createAndFailSilent(this, tableLockZNode);
269       ZKUtil.createAndFailSilent(this, recoveringRegionsZNode);
270       ZKUtil.createAndFailSilent(this, masterMaintZNode);
271     } catch (KeeperException e) {
272       throw new ZooKeeperConnectionException(
273           prefix("Unexpected KeeperException creating base node"), e);
274     }
275   }
276 
277   /** Returns whether the znode is supposed to be readable by the client
278    * and DOES NOT contain sensitive information (world readable).*/
279   public boolean isClientReadable(String node) {
280     // Developer notice: These znodes are world readable. DO NOT add more znodes here UNLESS
281     // all clients need to access this data to work. Using zk for sharing data to clients (other
282     // than service lookup case is not a recommended design pattern.
283     return
284         node.equals(baseZNode) ||
285         isAnyMetaReplicaZnode(node) ||
286         node.equals(getMasterAddressZNode()) ||
287         node.equals(clusterIdZNode)||
288         node.equals(rsZNode) ||
289         // /hbase/table and /hbase/table/foo is allowed, /hbase/table-lock is not
290         node.equals(tableZNode) ||
291         node.startsWith(tableZNode + "/");
292   }
293 
294   /**
295    * On master start, we check the znode ACLs under the root directory and set the ACLs properly
296    * if needed. If the cluster goes from an unsecure setup to a secure setup, this step is needed
297    * so that the existing znodes created with open permissions are now changed with restrictive
298    * perms.
299    */
300   public void checkAndSetZNodeAcls() {
301     if (!ZKUtil.isSecureZooKeeper(getConfiguration())) {
302       LOG.info("not a secure deployment, proceeding");
303       return;
304     }
305 
306     // Check the base znodes permission first. Only do the recursion if base znode's perms are not
307     // correct.
308     try {
309       List<ACL> actualAcls = recoverableZooKeeper.getAcl(baseZNode, new Stat());
310 
311       if (!isBaseZnodeAclSetup(actualAcls)) {
312         LOG.info("setting znode ACLs");
313         setZnodeAclsRecursive(baseZNode);
314       }
315     } catch(KeeperException.NoNodeException nne) {
316       return;
317     } catch(InterruptedException ie) {
318       interruptedExceptionNoThrow(ie, false);
319     } catch (IOException|KeeperException e) {
320       LOG.warn("Received exception while checking and setting zookeeper ACLs", e);
321     }
322   }
323 
324   /**
325    * Set the znode perms recursively. This will do post-order recursion, so that baseZnode ACLs
326    * will be set last in case the master fails in between.
327    * @param znode
328    */
329   private void setZnodeAclsRecursive(String znode) throws KeeperException, InterruptedException {
330     List<String> children = recoverableZooKeeper.getChildren(znode, false);
331 
332     for (String child : children) {
333       setZnodeAclsRecursive(joinZNode(znode, child));
334     }
335     List<ACL> acls = ZKUtil.createACL(this, znode, true);
336     LOG.info("Setting ACLs for znode:" + znode + " , acl:" + acls);
337     recoverableZooKeeper.setAcl(znode, acls, -1);
338   }
339 
340   /**
341    * Checks whether the ACLs returned from the base znode (/hbase) is set for secure setup.
342    * @param acls acls from zookeeper
343    * @return whether ACLs are set for the base znode
344    * @throws IOException
345    */
346   private boolean isBaseZnodeAclSetup(List<ACL> acls) throws IOException {
347     if (LOG.isDebugEnabled()) {
348       LOG.debug("Checking znode ACLs");
349     }
350     String[] superUsers = conf.getStrings(Superusers.SUPERUSER_CONF_KEY);
351     // Check whether ACL set for all superusers
352     if (superUsers != null && !checkACLForSuperUsers(superUsers, acls)) {
353       return false;
354     }
355 
356     // this assumes that current authenticated user is the same as zookeeper client user
357     // configured via JAAS
358     String hbaseUser = UserGroupInformation.getCurrentUser().getShortUserName();
359 
360     if (acls.isEmpty()) {
361       if (LOG.isDebugEnabled()) {
362         LOG.debug("ACL is empty");
363       }
364       return false;
365     }
366 
367     for (ACL acl : acls) {
368       int perms = acl.getPerms();
369       Id id = acl.getId();
370       // We should only set at most 3 possible ACLs for 3 Ids. One for everyone, one for superuser
371       // and one for the hbase user
372       if (Ids.ANYONE_ID_UNSAFE.equals(id)) {
373         if (perms != Perms.READ) {
374           if (LOG.isDebugEnabled()) {
375             LOG.debug(String.format("permissions for '%s' are not correct: have 0x%x, want 0x%x",
376               id, perms, Perms.READ));
377           }
378           return false;
379         }
380       } else if (superUsers != null && isSuperUserId(superUsers, id)) {
381         if (perms != Perms.ALL) {
382           if (LOG.isDebugEnabled()) {
383             LOG.debug(String.format("permissions for '%s' are not correct: have 0x%x, want 0x%x",
384               id, perms, Perms.ALL));
385           }
386           return false;
387         }
388       } else if ("sasl".equals(id.getScheme())) {
389         String name = id.getId();
390         // If ZooKeeper recorded the Kerberos full name in the ACL, use only the shortname
391         Matcher match = NAME_PATTERN.matcher(name);
392         if (match.matches()) {
393           name = match.group(1);
394         }
395         if (name.equals(hbaseUser)) {
396           if (perms != Perms.ALL) {
397             if (LOG.isDebugEnabled()) {
398               LOG.debug(String.format("permissions for '%s' are not correct: have 0x%x, want 0x%x",
399                 id, perms, Perms.ALL));
400             }
401             return false;
402           }
403         } else {
404           if (LOG.isDebugEnabled()) {
405             LOG.debug("Unexpected shortname in SASL ACL: " + id);
406           }
407           return false;
408         }
409       } else {
410         if (LOG.isDebugEnabled()) {
411           LOG.debug("unexpected ACL id '" + id + "'");
412         }
413         return false;
414       }
415     }
416     return true;
417   }
418   
419   /*
420    * Validate whether ACL set for all superusers.
421    */
422   private boolean checkACLForSuperUsers(String[] superUsers, List<ACL> acls) {
423     for (String user : superUsers) {
424       boolean hasAccess = false;
425       // TODO: Validate super group members also when ZK supports setting node ACL for groups.
426       if (!AuthUtil.isGroupPrincipal(user)) {
427         for (ACL acl : acls) {
428           if (user.equals(acl.getId().getId())) {
429             if (acl.getPerms() == Perms.ALL) {
430               hasAccess = true;
431             } else {
432               if (LOG.isDebugEnabled()) {
433                 LOG.debug(String.format(
434                   "superuser '%s' does not have correct permissions: have 0x%x, want 0x%x",
435                   acl.getId().getId(), acl.getPerms(), Perms.ALL));
436               }
437             }
438             break;
439           }
440         }
441         if (!hasAccess) {
442           return false;
443         }
444       }
445     }
446     return true;
447   }
448   
449   /*
450    * Validate whether ACL ID is superuser.
451    */
452   public static boolean isSuperUserId(String[] superUsers, Id id) {
453     for (String user : superUsers) {
454       // TODO: Validate super group members also when ZK supports setting node ACL for groups.
455       if (!AuthUtil.isGroupPrincipal(user) && new Id("sasl", user).equals(id)) {
456         return true;
457       }
458     }
459     return false;
460   }
461 
462   @Override
463   public String toString() {
464     return this.identifier + ", quorum=" + quorum + ", baseZNode=" + baseZNode;
465   }
466 
467   /**
468    * Adds this instance's identifier as a prefix to the passed <code>str</code>
469    * @param str String to amend.
470    * @return A new string with this instance's identifier as prefix: e.g.
471    * if passed 'hello world', the returned string could be
472    */
473   public String prefix(final String str) {
474     return this.toString() + " " + str;
475   }
476 
477   /**
478    * Set the local variable node names using the specified configuration.
479    */
480   private void setNodeNames(Configuration conf) {
481     baseZNode = conf.get(HConstants.ZOOKEEPER_ZNODE_PARENT,
482         HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
483     metaReplicaZnodes.put(0, joinZNode(baseZNode,
484            conf.get("zookeeper.znode.metaserver", "meta-region-server")));
485     int numMetaReplicas = conf.getInt(META_REPLICAS_NUM,
486             DEFAULT_META_REPLICA_NUM);
487     for (int i = 1; i < numMetaReplicas; i++) {
488       String str = joinZNode(baseZNode,
489         conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + i);
490       metaReplicaZnodes.put(i, str);
491     }
492     rsZNode = joinZNode(baseZNode,
493         conf.get("zookeeper.znode.rs", "rs"));
494     drainingZNode = joinZNode(baseZNode,
495         conf.get("zookeeper.znode.draining.rs", "draining"));
496     masterAddressZNode = joinZNode(baseZNode,
497         conf.get("zookeeper.znode.master", "master"));
498     backupMasterAddressesZNode = joinZNode(baseZNode,
499         conf.get("zookeeper.znode.backup.masters", "backup-masters"));
500     clusterStateZNode = joinZNode(baseZNode,
501         conf.get("zookeeper.znode.state", "running"));
502     assignmentZNode = joinZNode(baseZNode,
503         conf.get("zookeeper.znode.unassigned", "region-in-transition"));
504     tableZNode = joinZNode(baseZNode,
505         conf.get("zookeeper.znode.tableEnableDisable", "table"));
506     clusterIdZNode = joinZNode(baseZNode,
507         conf.get("zookeeper.znode.clusterId", "hbaseid"));
508     splitLogZNode = joinZNode(baseZNode,
509         conf.get("zookeeper.znode.splitlog", HConstants.SPLIT_LOGDIR_NAME));
510     balancerZNode = joinZNode(baseZNode,
511         conf.get("zookeeper.znode.balancer", "balancer"));
512     regionNormalizerZNode = joinZNode(baseZNode,
513       conf.get("zookeeper.znode.regionNormalizer", "normalizer"));
514     switchZNode = joinZNode(baseZNode, conf.get("zookeeper.znode.switch", "switch"));
515     tableLockZNode = joinZNode(baseZNode,
516         conf.get("zookeeper.znode.tableLock", "table-lock"));
517     snapshotCleanupZNode = joinZNode(baseZNode,
518         conf.get("zookeeper.znode.snapshot.cleanup", DEFAULT_SNAPSHOT_CLEANUP_ZNODE));
519     recoveringRegionsZNode = joinZNode(baseZNode,
520         conf.get("zookeeper.znode.recovering.regions", "recovering-regions"));
521     namespaceZNode = joinZNode(baseZNode,
522         conf.get("zookeeper.znode.namespace", "namespace"));
523     masterMaintZNode = joinZNode(baseZNode,
524       conf.get("zookeeper.znode.masterMaintenance", "master-maintenance"));
525   }
526 
527   /**
528    * Is the znode of any meta replica
529    * @param node
530    * @return true or false
531    */
532   public boolean isAnyMetaReplicaZnode(String node) {
533     if (metaReplicaZnodes.values().contains(node)) {
534       return true;
535     }
536     return false;
537   }
538 
539   /**
540    * Is it the default meta replica's znode
541    * @param node
542    * @return true or false
543    */
544   public boolean isDefaultMetaReplicaZnode(String node) {
545     if (getZNodeForReplica(DEFAULT_REPLICA_ID).equals(node)) {
546       return true;
547     }
548     return false;
549   }
550 
551   /**
552    * Get the znodes corresponding to the meta replicas from ZK
553    * @return list of znodes
554    * @throws KeeperException
555    */
556   public List<String> getMetaReplicaNodes() throws KeeperException {
557     List<String> childrenOfBaseNode = ZKUtil.listChildrenNoWatch(this, baseZNode);
558     List<String> metaReplicaNodes = new ArrayList<String>(2);
559     if (childrenOfBaseNode != null) {
560       String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server");
561       for (String child : childrenOfBaseNode) {
562         if (child.startsWith(pattern)) metaReplicaNodes.add(child);
563       }
564     }
565     return metaReplicaNodes;
566   }
567 
568   /**
569    * Get the znode string corresponding to a replicaId
570    * @param replicaId
571    * @return znode
572    */
573   public String getZNodeForReplica(int replicaId) {
574     String str = metaReplicaZnodes.get(replicaId);
575     // return a newly created path but don't update the cache of paths
576     // This is mostly needed for tests that attempt to create meta replicas
577     // from outside the master
578     if (str == null) {
579       str = joinZNode(baseZNode,
580           conf.get("zookeeper.znode.metaserver", "meta-region-server") + "-" + replicaId);
581     }
582     return str;
583   }
584 
585   /**
586    * Parse the meta replicaId from the passed znode
587    * @param znode
588    * @return replicaId
589    */
590   public int getMetaReplicaIdFromZnode(String znode) {
591     String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server");
592     if (znode.equals(pattern)) {
593       return DEFAULT_REPLICA_ID;
594     }
595     // the non-default replicas are of the pattern meta-region-server-<replicaId>
596     String nonDefaultPattern = pattern + "-";
597     return Integer.parseInt(znode.substring(nonDefaultPattern.length()));
598   }
599 
600   /**
601    * Register the specified listener to receive ZooKeeper events.
602    * @param listener
603    */
604   public void registerListener(ZooKeeperListener listener) {
605     listeners.add(listener);
606   }
607 
608   /**
609    * Register the specified listener to receive ZooKeeper events and add it as
610    * the first in the list of current listeners.
611    * @param listener
612    */
613   public void registerListenerFirst(ZooKeeperListener listener) {
614     listeners.add(0, listener);
615   }
616 
617   public void unregisterListener(ZooKeeperListener listener) {
618     listeners.remove(listener);
619   }
620 
621   /**
622    * Clean all existing listeners
623    */
624   public void unregisterAllListeners() {
625     listeners.clear();
626   }
627 
628   /**
629    * Get a copy of current registered listeners
630    */
631   public List<ZooKeeperListener> getListeners() {
632     return new ArrayList<ZooKeeperListener>(listeners);
633   }
634 
635   /**
636    * @return The number of currently registered listeners
637    */
638   public int getNumberOfListeners() {
639     return listeners.size();
640   }
641 
642   /**
643    * Get the connection to ZooKeeper.
644    * @return connection reference to zookeeper
645    */
646   public RecoverableZooKeeper getRecoverableZooKeeper() {
647     return recoverableZooKeeper;
648   }
649 
650   public void reconnectAfterExpiration() throws IOException, KeeperException, InterruptedException {
651     recoverableZooKeeper.reconnectAfterExpiration();
652   }
653 
654   /**
655    * Get the quorum address of this instance.
656    * @return quorum string of this zookeeper connection instance
657    */
658   public String getQuorum() {
659     return quorum;
660   }
661 
662   /**
663    * @return the base znode of this zookeeper connection instance.
664    */
665   public String getBaseZNode() {
666     return baseZNode;
667   }
668 
669   private void processEvent(WatchedEvent event) {
670     switch(event.getType()) {
671       // If event type is NONE, this is a connection status change
672       case None: {
673         connectionEvent(event);
674         break;
675       }
676       // Otherwise pass along to the listeners
677       case NodeCreated: {
678         for(ZooKeeperListener listener : listeners) {
679           listener.nodeCreated(event.getPath());
680         }
681         break;
682       }
683       case NodeDeleted: {
684         for(ZooKeeperListener listener : listeners) {
685           listener.nodeDeleted(event.getPath());
686         }
687         break;
688       }
689       case NodeDataChanged: {
690         for(ZooKeeperListener listener : listeners) {
691           listener.nodeDataChanged(event.getPath());
692         }
693         break;
694       }
695       case NodeChildrenChanged: {
696         for(ZooKeeperListener listener : listeners) {
697           listener.nodeChildrenChanged(event.getPath());
698         }
699         break;
700       }
701       default: {
702         LOG.error(String.format("Invalid event of type %s received for path %s. Ignoring",
703             event.getType(), event.getPath()));
704         break;
705       }
706     }
707   }
708 
709   /**
710    * Method called from ZooKeeper for events and connection status.
711    * <p>
712    * Valid events are passed along to listeners.  Connection status changes
713    * are dealt with locally.
714    */
715   @Override
716   public void process(final WatchedEvent event) {
717     LOG.debug(prefix("Received ZooKeeper Event, " +
718         "type=" + event.getType() + ", " +
719         "state=" + event.getState() + ", " +
720         "path=" + event.getPath()));
721     zkEventProcessor.submit(new Runnable() {
722       @Override
723       public void run() {
724         processEvent(event);
725       }
726     });
727   }
728 
729   // Connection management
730 
731   /**
732    * Called when there is a connection-related event via the Watcher callback.
733    * <p>
734    * If Disconnected or Expired, this should shutdown the cluster. But, since
735    * we send a KeeperException.SessionExpiredException along with the abort
736    * call, it's possible for the Abortable to catch it and try to create a new
737    * session with ZooKeeper. This is what the client does in HCM.
738    * <p>
739    * @param event
740    */
741   private void connectionEvent(WatchedEvent event) {
742     switch(event.getState()) {
743       case SyncConnected:
744         this.identifier = this.prefix + "-0x" +
745           Long.toHexString(this.recoverableZooKeeper.getSessionId());
746         // Update our identifier.  Otherwise ignore.
747         LOG.debug(this.identifier + " connected");
748         break;
749 
750       // Abort the server if Disconnected or Expired
751       case Disconnected:
752         LOG.debug(prefix("Received Disconnected from ZooKeeper, ignoring"));
753         break;
754 
755       case Expired:
756         String msg = prefix(this.identifier + " received expired from " +
757           "ZooKeeper, aborting");
758         // TODO: One thought is to add call to ZooKeeperListener so say,
759         // ZooKeeperNodeTracker can zero out its data values.
760         if (this.abortable != null) {
761           this.abortable.abort(msg, new KeeperException.SessionExpiredException());
762         }
763         break;
764 
765       case ConnectedReadOnly:
766       case SaslAuthenticated:
767       case AuthFailed:
768         break;
769 
770       default:
771         throw new IllegalStateException("Received event is not valid: " + event.getState());
772     }
773   }
774 
775   /**
776    * Forces a synchronization of this ZooKeeper client connection within a timeout. Enforcing a
777    * timeout lets the callers fail-fast rather than wait forever for the sync to finish.
778    * <p>
779    * Executing this method before running other methods will ensure that the
780    * subsequent operations are up-to-date and consistent as of the time that
781    * the sync is complete.
782    * <p>
783    * This is used for compareAndSwap type operations where we need to read the
784    * data of an existing node and delete or transition that node, utilizing the
785    * previously read version and data.  We want to ensure that the version read
786    * is up-to-date from when we begin the operation.
787    */
788   public void syncOrTimeout(String path) throws KeeperException {
789     final CountDownLatch latch = new CountDownLatch(1);
790     long startTime = EnvironmentEdgeManager.currentTime();
791     this.recoverableZooKeeper.sync(path, new AsyncCallback.VoidCallback() {
792       @Override
793       public void processResult(int i, String s, Object o) {
794         latch.countDown();
795       }
796     }, null);
797     try {
798       if (!latch.await(zkSyncTimeout, TimeUnit.MILLISECONDS)) {
799         LOG.warn(String.format("sync() operation to ZK timed out. Configured timeout: %s ms. " +
800             "This usually points to a ZK side issue. Check ZK server logs and metrics.",
801             zkSyncTimeout));
802         throw new KeeperException.OperationTimeoutException();
803       }
804     } catch (InterruptedException e) {
805       LOG.warn("Interrupted waiting for ZK sync() to finish.", e);
806       Thread.currentThread().interrupt();
807       return;
808     }
809     if (LOG.isDebugEnabled()) {
810       // TODO: Switch to a metric once server side ZK watcher metrics are implemented. This is a
811       // useful metric to have since the latency of sync() impacts the callers.
812       LOG.debug(String.format("ZK sync() operation took %d ms",
813           EnvironmentEdgeManager.currentTime() - startTime));
814     }
815   }
816 
817   /**
818    * Handles KeeperExceptions in client calls.
819    * <p>
820    * This may be temporary but for now this gives one place to deal with these.
821    * <p>
822    * TODO: Currently this method rethrows the exception to let the caller handle
823    * <p>
824    * @param ke
825    * @throws KeeperException
826    */
827   public void keeperException(KeeperException ke)
828   throws KeeperException {
829     LOG.error(prefix("Received unexpected KeeperException, re-throwing exception"), ke);
830     throw ke;
831   }
832 
833   /**
834    * Handles InterruptedExceptions in client calls.
835    * @param ie the InterruptedException instance thrown
836    * @throws KeeperException the exception to throw, transformed from the InterruptedException
837    */
838   public void interruptedException(InterruptedException ie) throws KeeperException {
839     interruptedExceptionNoThrow(ie, true);
840     // Throw a system error exception to let upper level handle it
841     throw new KeeperException.SystemErrorException();
842   }
843 
844   /**
845    * Log the InterruptedException and interrupt current thread
846    * @param ie The IterruptedException to log
847    * @param throwLater Whether we will throw the exception latter
848    */
849   public void interruptedExceptionNoThrow(InterruptedException ie, boolean throwLater) {
850     LOG.debug(prefix("Received InterruptedException, will interrupt current thread"
851         + (throwLater ? " and rethrow a SystemErrorException" : "")),
852       ie);
853     // At least preserve interrupt.
854     Thread.currentThread().interrupt();
855   }
856 
857   /**
858    * Close the connection to ZooKeeper.
859    *
860    */
861   @Override
862   public void close() {
863     try {
864       recoverableZooKeeper.close();
865     } catch (InterruptedException e) {
866       Thread.currentThread().interrupt();
867     } finally {
868       zkEventProcessor.shutdownNow();
869     }
870   }
871 
872   public Configuration getConfiguration() {
873     return conf;
874   }
875 
876   @Override
877   public void abort(String why, Throwable e) {
878     if (this.abortable != null) this.abortable.abort(why, e);
879     else this.aborted = true;
880   }
881 
882   @Override
883   public boolean isAborted() {
884     return this.abortable == null? this.aborted: this.abortable.isAborted();
885   }
886 
887   /**
888    * @return Path to the currently active master.
889    */
890   public String getMasterAddressZNode() {
891     return this.masterAddressZNode;
892   }
893 
894   /**
895    * @return ZooKeeper znode for region normalizer state
896    */
897   public String getRegionNormalizerZNode() {
898     return regionNormalizerZNode;
899   }
900 
901   /**
902    *  @return ZK node for switch
903    * */
904   public String getSwitchZNode() {
905     return switchZNode;
906   }
907 
908   /**
909    * Parses the meta replicaId from the passed path.
910    * @param path the name of the full path which includes baseZNode.
911    * @return replicaId
912    */
913   public int getMetaReplicaIdFromPath(String path) {
914     // Extract the znode from path. The prefix is of the following format.
915     // baseZNode + PATH_SEPARATOR.
916     int prefixLen = baseZNode.length() + 1;
917     return getMetaReplicaIdFromZnode(path.substring(prefixLen));
918   }
919 
920   /**
921    * Same as {@link #getMetaReplicaNodes()} except that this also registers a watcher on base znode
922    * for subsequent CREATE/DELETE operations on child nodes.
923    */
924   public List<String> getMetaReplicaNodesAndWatchChildren() throws KeeperException {
925     List<String> childrenOfBaseNode =
926         ZKUtil.listChildrenAndWatchForNewChildren(this, baseZNode);
927     return filterMetaReplicaNodes(childrenOfBaseNode);
928   }
929 
930   /**
931    * @param nodes Input list of znodes
932    * @return Filtered list of znodes from nodes that belong to meta replica(s).
933    */
934   private List<String> filterMetaReplicaNodes(List<String> nodes) {
935     if (nodes == null || nodes.isEmpty()) {
936       return new ArrayList<>();
937     }
938     List<String> metaReplicaNodes = new ArrayList<>(2);
939     String pattern = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
940     for (String child : nodes) {
941       if (child.startsWith(pattern)) {
942         metaReplicaNodes.add(child);
943       }
944     }
945     return metaReplicaNodes;
946   }
947 
948 }