View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase;
20  
21  import java.io.File;
22  import java.io.IOException;
23  import java.util.Locale;
24  import java.util.Map;
25  
26  import org.apache.commons.lang.StringUtils;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.commons.logging.Log;
29  import org.apache.commons.logging.LogFactory;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.conf.Configured;
32  import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
33  import org.apache.hadoop.hbase.util.Pair;
34  import org.apache.hadoop.hbase.util.RetryCounter;
35  import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
36  import org.apache.hadoop.hbase.util.RetryCounterFactory;
37  import org.apache.hadoop.util.Shell;
38  
39  /**
40   * A default cluster manager for HBase. Uses SSH, and hbase shell scripts
41   * to manage the cluster. Assumes Unix-like commands are available like 'ps',
42   * 'kill', etc. Also assumes the user running the test has enough "power" to start & stop
43   * servers on the remote machines (for example, the test user could be the same user as the
44   * user the daemon is running as)
45   */
46  @InterfaceAudience.Private
47  public class HBaseClusterManager extends Configured implements ClusterManager {
48  
49    protected enum Signal {
50      SIGKILL,
51      SIGSTOP,
52      SIGCONT,
53    }
54  
55    protected static final Log LOG = LogFactory.getLog(HBaseClusterManager.class);
56    private String sshUserName;
57    private String sshOptions;
58  
59    /**
60     * The command format that is used to execute the remote command. Arguments:
61     * 1 SSH options, 2 user name , 3 "@" if username is set, 4 host,
62     * 5 original command, 6 service user.
63     */
64    private static final String DEFAULT_TUNNEL_CMD =
65        "/usr/bin/ssh %1$s %2$s%3$s%4$s \"sudo -u %6$s %5$s\"";
66    private String tunnelCmd;
67  
68    private static final String RETRY_ATTEMPTS_KEY = "hbase.it.clustermanager.retry.attempts";
69    private static final int DEFAULT_RETRY_ATTEMPTS = 5;
70  
71    private static final String RETRY_SLEEP_INTERVAL_KEY = "hbase.it.clustermanager.retry.sleep.interval";
72    private static final int DEFAULT_RETRY_SLEEP_INTERVAL = 1000;
73  
74    protected RetryCounterFactory retryCounterFactory;
75  
76    @Override
77    public void setConf(Configuration conf) {
78      super.setConf(conf);
79      if (conf == null) {
80        // Configured gets passed null before real conf. Why? I don't know.
81        return;
82      }
83      sshUserName = conf.get("hbase.it.clustermanager.ssh.user", "");
84      String extraSshOptions = conf.get("hbase.it.clustermanager.ssh.opts", "");
85      sshOptions = System.getenv("HBASE_SSH_OPTS");
86      if (!extraSshOptions.isEmpty()) {
87        sshOptions = StringUtils.join(new Object[] { sshOptions, extraSshOptions }, " ");
88      }
89      sshOptions = (sshOptions == null) ? "" : sshOptions;
90      sshUserName = (sshUserName == null) ? "" : sshUserName;
91      tunnelCmd = conf.get("hbase.it.clustermanager.ssh.cmd", DEFAULT_TUNNEL_CMD);
92      // Print out ssh special config if any.
93      if ((sshUserName != null && sshUserName.length() > 0) ||
94          (sshOptions != null && sshOptions.length() > 0)) {
95        LOG.info("Running with SSH user [" + sshUserName + "] and options [" + sshOptions + "]");
96      }
97  
98      this.retryCounterFactory = new RetryCounterFactory(new RetryConfig()
99          .setMaxAttempts(conf.getInt(RETRY_ATTEMPTS_KEY, DEFAULT_RETRY_ATTEMPTS))
100         .setSleepInterval(conf.getLong(RETRY_SLEEP_INTERVAL_KEY, DEFAULT_RETRY_SLEEP_INTERVAL)));
101   }
102 
103   protected String getServiceUser(ServiceType service) {
104     Configuration conf = getConf();
105     switch (service) {
106       case HADOOP_DATANODE:
107       case HADOOP_NAMENODE:
108         return conf.get("hbase.it.clustermanager.hadoop.hdfs.user", "hdfs");
109       case ZOOKEEPER_SERVER:
110         return conf.get("hbase.it.clustermanager.zookeeper.user", "zookeeper");
111       default:
112         return conf.get("hbase.it.clustermanager.hbase.user", "hbase");
113     }
114   }
115 
116   /**
117    * Executes commands over SSH
118    */
119   protected class RemoteShell extends Shell.ShellCommandExecutor {
120     private String hostname;
121     private String user;
122 
123     public RemoteShell(String hostname, String[] execString, File dir, Map<String, String> env,
124         long timeout) {
125       super(execString, dir, env, timeout);
126       this.hostname = hostname;
127     }
128 
129     public RemoteShell(String hostname, String[] execString, File dir, Map<String, String> env) {
130       super(execString, dir, env);
131       this.hostname = hostname;
132     }
133 
134     public RemoteShell(String hostname, String[] execString, File dir) {
135       super(execString, dir);
136       this.hostname = hostname;
137     }
138 
139     public RemoteShell(String hostname, String[] execString) {
140       super(execString);
141       this.hostname = hostname;
142     }
143 
144     public RemoteShell(String hostname, String user, String[] execString) {
145       super(execString);
146       this.hostname = hostname;
147       this.user = user;
148     }
149 
150     @Override
151     public String[] getExecString() {
152       String at = sshUserName.isEmpty() ? "" : "@";
153       String remoteCmd = StringUtils.join(super.getExecString(), " ");
154       String cmd = String.format(tunnelCmd, sshOptions, sshUserName, at, hostname, remoteCmd, user);
155       LOG.info("Executing full command [" + cmd + "]");
156       return new String[] { "/usr/bin/env", "bash", "-c", cmd };
157     }
158 
159     @Override
160     public void execute() throws IOException {
161       super.execute();
162     }
163   }
164 
165   /**
166    * Provides command strings for services to be executed by Shell. CommandProviders are
167    * pluggable, and different deployments(windows, bigtop, etc) can be managed by
168    * plugging-in custom CommandProvider's or ClusterManager's.
169    */
170   static abstract class CommandProvider {
171 
172     enum Operation {
173       START, STOP, RESTART
174     }
175 
176     public abstract String getCommand(ServiceType service, Operation op);
177 
178     public String isRunningCommand(ServiceType service) {
179       return findPidCommand(service);
180     }
181 
182     protected String findPidCommand(ServiceType service) {
183       return String.format("ps ux | grep proc_%s | grep -v grep | tr -s ' ' | cut -d ' ' -f2",
184           service);
185     }
186 
187     public String signalCommand(ServiceType service, String signal) {
188       return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
189     }
190   }
191 
192   /**
193    * CommandProvider to manage the service using bin/hbase-* scripts
194    */
195   static class HBaseShellCommandProvider extends CommandProvider {
196     private final String hbaseHome;
197     private final String confDir;
198 
199     HBaseShellCommandProvider(Configuration conf) {
200       hbaseHome = conf.get("hbase.it.clustermanager.hbase.home",
201         System.getenv("HBASE_HOME"));
202       String tmp = conf.get("hbase.it.clustermanager.hbase.conf.dir",
203         System.getenv("HBASE_CONF_DIR"));
204       if (tmp != null) {
205         confDir = String.format("--config %s", tmp);
206       } else {
207         confDir = "";
208       }
209     }
210 
211     @Override
212     public String getCommand(ServiceType service, Operation op) {
213       return String.format("%s/bin/hbase-daemon.sh %s %s %s", hbaseHome, confDir,
214           op.toString().toLowerCase(Locale.ROOT), service);
215     }
216   }
217 
218   /**
219    * CommandProvider to manage the service using sbin/hadoop-* scripts.
220    */
221   static class HadoopShellCommandProvider extends CommandProvider {
222     private final String hadoopHome;
223     private final String confDir;
224 
225     HadoopShellCommandProvider(Configuration conf) throws IOException {
226       hadoopHome = conf.get("hbase.it.clustermanager.hadoop.home",
227           System.getenv("HADOOP_HOME"));
228       String tmp = conf.get("hbase.it.clustermanager.hadoop.conf.dir",
229           System.getenv("HADOOP_CONF_DIR"));
230       if (hadoopHome == null) {
231         throw new IOException("Hadoop home configuration parameter i.e. " +
232           "'hbase.it.clustermanager.hadoop.home' is not configured properly.");
233       }
234       if (tmp != null) {
235         confDir = String.format("--config %s", tmp);
236       } else {
237         confDir = "";
238       }
239     }
240 
241     @Override
242     public String getCommand(ServiceType service, Operation op) {
243       return String.format("%s/sbin/hadoop-daemon.sh %s %s %s", hadoopHome, confDir,
244           op.toString().toLowerCase(Locale.ROOT), service);
245     }
246   }
247 
248   /**
249    * CommandProvider to manage the service using bin/zk* scripts.
250    */
251   static class ZookeeperShellCommandProvider extends CommandProvider {
252     private final String zookeeperHome;
253     private final String confDir;
254 
255     ZookeeperShellCommandProvider(Configuration conf) throws IOException {
256       zookeeperHome = conf.get("hbase.it.clustermanager.zookeeper.home",
257           System.getenv("ZOOBINDIR"));
258       String tmp = conf.get("hbase.it.clustermanager.zookeeper.conf.dir",
259           System.getenv("ZOOCFGDIR"));
260       if (zookeeperHome == null) {
261         throw new IOException("Zookeeper home configuration parameter i.e. " +
262           "'hbase.it.clustermanager.zookeeper.home' is not configured properly.");
263       }
264       if (tmp != null) {
265         confDir = String.format("--config %s", tmp);
266       } else {
267         confDir = "";
268       }
269     }
270 
271     @Override
272     public String getCommand(ServiceType service, Operation op) {
273       return String.format("%s/bin/zkServer.sh %s", zookeeperHome, op.toString().toLowerCase(Locale.ROOT));
274     }
275 
276     @Override
277     protected String findPidCommand(ServiceType service) {
278       return String.format("ps ux | grep %s | grep -v grep | tr -s ' ' | cut -d ' ' -f2",
279         service);
280     }
281   }
282 
283   public HBaseClusterManager() {
284   }
285 
286   protected CommandProvider getCommandProvider(ServiceType service) throws IOException {
287     switch (service) {
288       case HADOOP_DATANODE:
289       case HADOOP_NAMENODE:
290         return new HadoopShellCommandProvider(getConf());
291       case ZOOKEEPER_SERVER:
292         return new ZookeeperShellCommandProvider(getConf());
293       default:
294         return new HBaseShellCommandProvider(getConf());
295     }
296   }
297 
298   /**
299    * Execute the given command on the host using SSH
300    * @return pair of exit code and command output
301    * @throws IOException if something goes wrong.
302    */
303   protected Pair<Integer, String> exec(String hostname, ServiceType service, String... cmd)
304     throws IOException {
305     LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname);
306     RemoteShell shell = new RemoteShell(hostname, getServiceUser(service), cmd);
307     try {
308       shell.execute();
309     } catch (Shell.ExitCodeException ex) {
310       // capture the stdout of the process as well.
311       String output = shell.getOutput();
312       // add output for the ExitCodeException.
313       throw new Shell.ExitCodeException(ex.getExitCode(), "stderr: " + ex.getMessage()
314         + ", stdout: " + output);
315     }
316 
317     LOG.info("Executed remote command, exit code:" + shell.getExitCode()
318         + " , output:" + shell.getOutput());
319 
320     return new Pair<Integer, String>(shell.getExitCode(), shell.getOutput());
321   }
322 
323   private Pair<Integer, String> execWithRetries(String hostname, ServiceType service, String... cmd)
324       throws IOException {
325     RetryCounter retryCounter = retryCounterFactory.create();
326     while (true) {
327       try {
328         return exec(hostname, service, cmd);
329       } catch (IOException e) {
330         retryOrThrow(retryCounter, e, hostname, cmd);
331       }
332       try {
333         retryCounter.sleepUntilNextRetry();
334       } catch (InterruptedException ex) {
335         // ignore
336         LOG.warn("Sleep Interrupted:" + ex);
337       }
338     }
339   }
340 
341   private <E extends Exception> void retryOrThrow(RetryCounter retryCounter, E ex,
342       String hostname, String[] cmd) throws E {
343     if (retryCounter.shouldRetry()) {
344       LOG.warn("Remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname
345         + " failed at attempt " + retryCounter.getAttemptTimes() + ". Retrying until maxAttempts: "
346           + retryCounter.getMaxAttempts() + ". Exception: " + ex.getMessage());
347       return;
348     }
349     throw ex;
350   }
351 
352   private void exec(String hostname, ServiceType service, Operation op) throws IOException {
353     execWithRetries(hostname, service, getCommandProvider(service).getCommand(service, op));
354   }
355 
356   @Override
357   public void start(ServiceType service, String hostname, int port) throws IOException {
358     exec(hostname, service, Operation.START);
359   }
360 
361   @Override
362   public void stop(ServiceType service, String hostname, int port) throws IOException {
363     exec(hostname, service, Operation.STOP);
364   }
365 
366   @Override
367   public void restart(ServiceType service, String hostname, int port) throws IOException {
368     exec(hostname, service, Operation.RESTART);
369   }
370 
371   public void signal(ServiceType service, Signal signal, String hostname) throws IOException {
372     execWithRetries(hostname, service,
373       getCommandProvider(service).signalCommand(service, signal.toString()));
374   }
375 
376   @Override
377   public boolean isRunning(ServiceType service, String hostname, int port) throws IOException {
378     String ret = execWithRetries(hostname, service,
379       getCommandProvider(service).isRunningCommand(service)).getSecond();
380     return ret.length() > 0;
381   }
382 
383   @Override
384   public void kill(ServiceType service, String hostname, int port) throws IOException {
385     signal(service, Signal.SIGKILL, hostname);
386   }
387 
388   @Override
389   public void suspend(ServiceType service, String hostname, int port) throws IOException {
390     signal(service, Signal.SIGSTOP, hostname);
391   }
392 
393   @Override
394   public void resume(ServiceType service, String hostname, int port) throws IOException {
395     signal(service, Signal.SIGCONT, hostname);
396   }
397 }