001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.zookeeper;
019
020import static org.apache.zookeeper.client.FourLetterWordMain.send4LetterWord;
021
022import java.io.File;
023import java.io.IOException;
024import java.io.InterruptedIOException;
025import java.io.PrintWriter;
026import java.io.StringWriter;
027import java.net.BindException;
028import java.net.ConnectException;
029import java.net.InetAddress;
030import java.net.InetSocketAddress;
031import java.util.ArrayList;
032import java.util.List;
033import java.util.concurrent.ThreadLocalRandom;
034import org.apache.hadoop.conf.Configuration;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.net.Address;
037import org.apache.hadoop.hbase.util.Threads;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.apache.zookeeper.common.X509Exception;
040import org.apache.zookeeper.server.NIOServerCnxnFactory;
041import org.apache.zookeeper.server.ZooKeeperServer;
042import org.apache.zookeeper.server.persistence.FileTxnLog;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046/**
047 * TODO: Most of the code in this class is ripped from ZooKeeper tests. Instead of redoing it, we
048 * should contribute updates to their code which let us more easily access testing helper objects.
049 */
050@InterfaceAudience.Public
051public class MiniZooKeeperCluster {
052  private static final Logger LOG = LoggerFactory.getLogger(MiniZooKeeperCluster.class);
053  private static final int TICK_TIME = 2000;
054  private static final int TIMEOUT = 1000;
055  private static final int DEFAULT_CONNECTION_TIMEOUT = 30000;
056  private int connectionTimeout;
057  public static final String LOOPBACK_HOST = InetAddress.getLoopbackAddress().getHostName();
058  public static final String HOST = LOOPBACK_HOST;
059
060  private boolean started;
061
062  /**
063   * The default port. If zero, we use a random port.
064   */
065  private int defaultClientPort = 0;
066
067  private final List<NIOServerCnxnFactory> standaloneServerFactoryList;
068  private final List<ZooKeeperServer> zooKeeperServers;
069  private final List<Integer> clientPortList;
070
071  private int activeZKServerIndex;
072  private int tickTime = 0;
073
074  private final Configuration configuration;
075
076  public MiniZooKeeperCluster() {
077    this(new Configuration());
078  }
079
080  public MiniZooKeeperCluster(Configuration configuration) {
081    this.started = false;
082    this.configuration = configuration;
083    activeZKServerIndex = -1;
084    zooKeeperServers = new ArrayList<>();
085    clientPortList = new ArrayList<>();
086    standaloneServerFactoryList = new ArrayList<>();
087    connectionTimeout = configuration.getInt(HConstants.ZK_SESSION_TIMEOUT + ".localHBaseCluster",
088      DEFAULT_CONNECTION_TIMEOUT);
089  }
090
091  /**
092   * Add a client port to the list.
093   * @param clientPort the specified port
094   */
095  public void addClientPort(int clientPort) {
096    clientPortList.add(clientPort);
097  }
098
099  /**
100   * Get the list of client ports.
101   * @return clientPortList the client port list
102   */
103  @InterfaceAudience.Private
104  public List<Integer> getClientPortList() {
105    return clientPortList;
106  }
107
108  /**
109   * Check whether the client port in a specific position of the client port list is valid.
110   * @param index the specified position
111   */
112  private boolean hasValidClientPortInList(int index) {
113    return (clientPortList.size() > index && clientPortList.get(index) > 0);
114  }
115
116  public void setDefaultClientPort(int clientPort) {
117    if (clientPort <= 0) {
118      throw new IllegalArgumentException("Invalid default ZK client port: " + clientPort);
119    }
120    this.defaultClientPort = clientPort;
121  }
122
123  /**
124   * Selects a ZK client port.
125   * @param seedPort the seed port to start with; -1 means first time.
126   * @return a valid and unused client port
127   */
128  private int selectClientPort(int seedPort) {
129    int i;
130    int returnClientPort = seedPort + 1;
131    if (returnClientPort == 0) {
132      // If the new port is invalid, find one - starting with the default client port.
133      // If the default client port is not specified, starting with a random port.
134      // The random port is selected from the range between 49152 to 65535. These ports cannot be
135      // registered with IANA and are intended for dynamic allocation (see http://bit.ly/dynports).
136      if (defaultClientPort > 0) {
137        returnClientPort = defaultClientPort;
138      } else {
139        returnClientPort = 0xc000 + ThreadLocalRandom.current().nextInt(0x3f00);
140      }
141    }
142    // Make sure that the port is unused.
143    // break when an unused port is found
144    do {
145      for (i = 0; i < clientPortList.size(); i++) {
146        if (returnClientPort == clientPortList.get(i)) {
147          // Already used. Update the port and retry.
148          returnClientPort++;
149          break;
150        }
151      }
152    } while (i != clientPortList.size());
153    return returnClientPort;
154  }
155
156  public void setTickTime(int tickTime) {
157    this.tickTime = tickTime;
158  }
159
160  public int getBackupZooKeeperServerNum() {
161    return zooKeeperServers.size() - 1;
162  }
163
164  public int getZooKeeperServerNum() {
165    return zooKeeperServers.size();
166  }
167
168  // / XXX: From o.a.zk.t.ClientBase
169  private static void setupTestEnv() {
170    // during the tests we run with 100K prealloc in the logs.
171    // on windows systems prealloc of 64M was seen to take ~15seconds
172    // resulting in test failure (client timeout on first session).
173    // set env and directly in order to handle static init/gc issues
174    System.setProperty("zookeeper.preAllocSize", "100");
175    FileTxnLog.setPreallocSize(100 * 1024);
176    // allow all 4 letter words
177    System.setProperty("zookeeper.4lw.commands.whitelist", "*");
178  }
179
180  public int startup(File baseDir) throws IOException, InterruptedException {
181    int numZooKeeperServers = clientPortList.size();
182    if (numZooKeeperServers == 0) {
183      numZooKeeperServers = 1; // need at least 1 ZK server for testing
184    }
185    return startup(baseDir, numZooKeeperServers);
186  }
187
188  /**
189   * @param baseDir             the base directory to use
190   * @param numZooKeeperServers the number of ZooKeeper servers
191   * @return ClientPort server bound to, -1 if there was a binding problem and we couldn't pick
192   *         another port.
193   * @throws IOException          if an operation fails during the startup
194   * @throws InterruptedException if the startup fails
195   */
196  public int startup(File baseDir, int numZooKeeperServers)
197    throws IOException, InterruptedException {
198    if (numZooKeeperServers <= 0) {
199      return -1;
200    }
201
202    setupTestEnv();
203    shutdown();
204
205    int tentativePort = -1; // the seed port
206    int currentClientPort;
207
208    // running all the ZK servers
209    for (int i = 0; i < numZooKeeperServers; i++) {
210      File dir = new File(baseDir, "zookeeper_" + i).getAbsoluteFile();
211      createDir(dir);
212      int tickTimeToUse;
213      if (this.tickTime > 0) {
214        tickTimeToUse = this.tickTime;
215      } else {
216        tickTimeToUse = TICK_TIME;
217      }
218
219      // Set up client port - if we have already had a list of valid ports, use it.
220      if (hasValidClientPortInList(i)) {
221        currentClientPort = clientPortList.get(i);
222      } else {
223        tentativePort = selectClientPort(tentativePort); // update the seed
224        currentClientPort = tentativePort;
225      }
226
227      ZooKeeperServer server = new ZooKeeperServer(dir, dir, tickTimeToUse);
228      // Setting {min,max}SessionTimeout defaults to be the same as in Zookeeper
229      server.setMinSessionTimeout(
230        configuration.getInt("hbase.zookeeper.property.minSessionTimeout", -1));
231      server.setMaxSessionTimeout(
232        configuration.getInt("hbase.zookeeper.property.maxSessionTimeout", -1));
233      NIOServerCnxnFactory standaloneServerFactory;
234      while (true) {
235        try {
236          standaloneServerFactory = new NIOServerCnxnFactory();
237          String bindAddr =
238            configuration.get("hbase.zookeeper.property.clientPortAddress", LOOPBACK_HOST);
239          standaloneServerFactory.configure(new InetSocketAddress(bindAddr, currentClientPort),
240            configuration.getInt(HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS,
241              HConstants.DEFAULT_ZOOKEEPER_MAX_CLIENT_CNXNS));
242        } catch (BindException e) {
243          LOG.debug("Failed binding ZK Server to client port: " + currentClientPort, e);
244          // We're told to use some port but it's occupied, fail
245          if (hasValidClientPortInList(i)) {
246            return -1;
247          }
248          // This port is already in use, try to use another.
249          tentativePort = selectClientPort(tentativePort);
250          currentClientPort = tentativePort;
251          continue;
252        }
253        break;
254      }
255
256      // Start up this ZK server. Dump its stats.
257      standaloneServerFactory.startup(server);
258      LOG.info("Started connectionTimeout={}, dir={}, {}", connectionTimeout, dir,
259        getServerConfigurationOnOneLine(server));
260      // Runs a 'stat' against the servers.
261      if (!waitForServerUp(currentClientPort, connectionTimeout)) {
262        Threads.printThreadInfo(System.out, "Why is zk standalone server not coming up?");
263        throw new IOException(
264          "Waiting for startup of standalone server; " + "server isRunning=" + server.isRunning());
265      }
266
267      // We have selected a port as a client port. Update clientPortList if necessary.
268      if (clientPortList.size() <= i) { // it is not in the list, add the port
269        clientPortList.add(currentClientPort);
270      } else if (clientPortList.get(i) <= 0) { // the list has invalid port, update with valid port
271        clientPortList.remove(i);
272        clientPortList.add(i, currentClientPort);
273      }
274
275      standaloneServerFactoryList.add(standaloneServerFactory);
276      zooKeeperServers.add(server);
277    }
278
279    // set the first one to be active ZK; Others are backups
280    activeZKServerIndex = 0;
281    started = true;
282    int clientPort = clientPortList.get(activeZKServerIndex);
283    LOG.info("Started MiniZooKeeperCluster and ran 'stat' on client port={}", clientPort);
284    return clientPort;
285  }
286
287  private String getServerConfigurationOnOneLine(ZooKeeperServer server) {
288    StringWriter sw = new StringWriter();
289    try (PrintWriter pw = new PrintWriter(sw) {
290      @Override
291      public void println(int x) {
292        super.print(x);
293        super.print(", ");
294      }
295
296      @Override
297      public void println(String x) {
298        super.print(x);
299        super.print(", ");
300      }
301    }) {
302      server.dumpConf(pw);
303    }
304    return sw.toString();
305  }
306
307  private void createDir(File dir) throws IOException {
308    try {
309      if (!dir.exists()) {
310        dir.mkdirs();
311      }
312    } catch (SecurityException e) {
313      throw new IOException("creating dir: " + dir, e);
314    }
315  }
316
317  /**
318   * @throws IOException if waiting for the shutdown of a server fails
319   */
320  public void shutdown() throws IOException {
321    // shut down all the zk servers
322    for (int i = 0; i < standaloneServerFactoryList.size(); i++) {
323      NIOServerCnxnFactory standaloneServerFactory = standaloneServerFactoryList.get(i);
324      int clientPort = clientPortList.get(i);
325      standaloneServerFactory.shutdown();
326      if (!waitForServerDown(clientPort, connectionTimeout)) {
327        throw new IOException("Waiting for shutdown of standalone server at port=" + clientPort
328          + ", timeout=" + this.connectionTimeout);
329      }
330    }
331    standaloneServerFactoryList.clear();
332
333    for (ZooKeeperServer zkServer : zooKeeperServers) {
334      // Explicitly close ZKDatabase since ZookeeperServer does not close them
335      zkServer.getZKDatabase().close();
336    }
337    zooKeeperServers.clear();
338
339    // clear everything
340    if (started) {
341      started = false;
342      activeZKServerIndex = 0;
343      clientPortList.clear();
344      LOG.info("Shutdown MiniZK cluster with all ZK servers");
345    }
346  }
347
348  /**
349   * @return clientPort return clientPort if there is another ZK backup can run when killing the
350   *         current active; return -1, if there is no backups.
351   * @throws IOException if waiting for the shutdown of a server fails
352   */
353  public int killCurrentActiveZooKeeperServer() throws IOException, InterruptedException {
354    if (!started || activeZKServerIndex < 0) {
355      return -1;
356    }
357
358    // Shutdown the current active one
359    NIOServerCnxnFactory standaloneServerFactory =
360      standaloneServerFactoryList.get(activeZKServerIndex);
361    int clientPort = clientPortList.get(activeZKServerIndex);
362
363    standaloneServerFactory.shutdown();
364    if (!waitForServerDown(clientPort, connectionTimeout)) {
365      throw new IOException("Waiting for shutdown of standalone server");
366    }
367
368    zooKeeperServers.get(activeZKServerIndex).getZKDatabase().close();
369
370    // remove the current active zk server
371    standaloneServerFactoryList.remove(activeZKServerIndex);
372    clientPortList.remove(activeZKServerIndex);
373    zooKeeperServers.remove(activeZKServerIndex);
374    LOG.info("Kill the current active ZK servers in the cluster on client port: {}", clientPort);
375
376    if (standaloneServerFactoryList.isEmpty()) {
377      // there is no backup servers;
378      return -1;
379    }
380    clientPort = clientPortList.get(activeZKServerIndex);
381    LOG.info("Activate a backup zk server in the cluster on client port: {}", clientPort);
382    // return the next back zk server's port
383    return clientPort;
384  }
385
386  /**
387   * Kill one back up ZK servers.
388   * @throws IOException if waiting for the shutdown of a server fails
389   */
390  public void killOneBackupZooKeeperServer() throws IOException, InterruptedException {
391    if (!started || activeZKServerIndex < 0 || standaloneServerFactoryList.size() <= 1) {
392      return;
393    }
394
395    int backupZKServerIndex = activeZKServerIndex + 1;
396    // Shutdown the current active one
397    NIOServerCnxnFactory standaloneServerFactory =
398      standaloneServerFactoryList.get(backupZKServerIndex);
399    int clientPort = clientPortList.get(backupZKServerIndex);
400
401    standaloneServerFactory.shutdown();
402    if (!waitForServerDown(clientPort, connectionTimeout)) {
403      throw new IOException("Waiting for shutdown of standalone server");
404    }
405
406    zooKeeperServers.get(backupZKServerIndex).getZKDatabase().close();
407
408    // remove this backup zk server
409    standaloneServerFactoryList.remove(backupZKServerIndex);
410    clientPortList.remove(backupZKServerIndex);
411    zooKeeperServers.remove(backupZKServerIndex);
412    LOG.info("Kill one backup ZK servers in the cluster on client port: {}", clientPort);
413  }
414
415  // XXX: From o.a.zk.t.ClientBase. We just dropped the check for ssl/secure.
416  private static boolean waitForServerDown(int port, long timeout) throws IOException {
417    long start = System.currentTimeMillis();
418    while (true) {
419      try {
420        send4LetterWord(HOST, port, "stat", false, (int) timeout);
421      } catch (IOException | X509Exception.SSLContextException e) {
422        return true;
423      }
424
425      if (System.currentTimeMillis() > start + timeout) {
426        break;
427      }
428      try {
429        Thread.sleep(TIMEOUT);
430      } catch (InterruptedException e) {
431        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
432      }
433    }
434    return false;
435  }
436
437  // XXX: From o.a.zk.t.ClientBase. Its in the test jar but we don't depend on zk test jar.
438  // We remove the SSL/secure bit. Not used in here.
439  private static boolean waitForServerUp(int port, long timeout) throws IOException {
440    long start = System.currentTimeMillis();
441    while (true) {
442      try {
443        String result = send4LetterWord(HOST, port, "stat", false, (int) timeout);
444        if (result.startsWith("Zookeeper version:") && !result.contains("READ-ONLY")) {
445          return true;
446        } else {
447          LOG.debug("Read {}", result);
448        }
449      } catch (ConnectException e) {
450        // ignore as this is expected, do not log stacktrace
451        LOG.info("{}:{} not up: {}", HOST, port, e.toString());
452      } catch (IOException | X509Exception.SSLContextException e) {
453        // ignore as this is expected
454        LOG.info("{}:{} not up", HOST, port, e);
455      }
456
457      if (System.currentTimeMillis() > start + timeout) {
458        break;
459      }
460      try {
461        Thread.sleep(TIMEOUT);
462      } catch (InterruptedException e) {
463        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
464      }
465    }
466    return false;
467  }
468
469  public int getClientPort() {
470    return activeZKServerIndex < 0 || activeZKServerIndex >= clientPortList.size()
471      ? -1
472      : clientPortList.get(activeZKServerIndex);
473  }
474
475  /** Returns Address for this cluster instance. */
476  public Address getAddress() {
477    return Address.fromParts(HOST, getClientPort());
478  }
479
480  List<ZooKeeperServer> getZooKeeperServers() {
481    return zooKeeperServers;
482  }
483}