1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase;
20
21 import java.io.File;
22 import java.io.IOException;
23 import java.util.Locale;
24 import java.util.Map;
25
26 import org.apache.commons.lang.StringUtils;
27 import org.apache.hadoop.hbase.classification.InterfaceAudience;
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.conf.Configured;
32 import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
33 import org.apache.hadoop.hbase.util.Pair;
34 import org.apache.hadoop.hbase.util.RetryCounter;
35 import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
36 import org.apache.hadoop.hbase.util.RetryCounterFactory;
37 import org.apache.hadoop.util.Shell;
38
39
40
41
42
43
44
45
46 @InterfaceAudience.Private
47 public class HBaseClusterManager extends Configured implements ClusterManager {
48
49 protected enum Signal {
50 SIGKILL,
51 SIGSTOP,
52 SIGCONT,
53 }
54
55 protected static final Log LOG = LogFactory.getLog(HBaseClusterManager.class);
56 private String sshUserName;
57 private String sshOptions;
58
59
60
61
62
63
64 private static final String DEFAULT_TUNNEL_CMD =
65 "/usr/bin/ssh %1$s %2$s%3$s%4$s \"sudo -u %6$s %5$s\"";
66 private String tunnelCmd;
67
68 private static final String RETRY_ATTEMPTS_KEY = "hbase.it.clustermanager.retry.attempts";
69 private static final int DEFAULT_RETRY_ATTEMPTS = 5;
70
71 private static final String RETRY_SLEEP_INTERVAL_KEY = "hbase.it.clustermanager.retry.sleep.interval";
72 private static final int DEFAULT_RETRY_SLEEP_INTERVAL = 1000;
73
74 protected RetryCounterFactory retryCounterFactory;
75
76 @Override
77 public void setConf(Configuration conf) {
78 super.setConf(conf);
79 if (conf == null) {
80
81 return;
82 }
83 sshUserName = conf.get("hbase.it.clustermanager.ssh.user", "");
84 String extraSshOptions = conf.get("hbase.it.clustermanager.ssh.opts", "");
85 sshOptions = System.getenv("HBASE_SSH_OPTS");
86 if (!extraSshOptions.isEmpty()) {
87 sshOptions = StringUtils.join(new Object[] { sshOptions, extraSshOptions }, " ");
88 }
89 sshOptions = (sshOptions == null) ? "" : sshOptions;
90 sshUserName = (sshUserName == null) ? "" : sshUserName;
91 tunnelCmd = conf.get("hbase.it.clustermanager.ssh.cmd", DEFAULT_TUNNEL_CMD);
92
93 if ((sshUserName != null && sshUserName.length() > 0) ||
94 (sshOptions != null && sshOptions.length() > 0)) {
95 LOG.info("Running with SSH user [" + sshUserName + "] and options [" + sshOptions + "]");
96 }
97
98 this.retryCounterFactory = new RetryCounterFactory(new RetryConfig()
99 .setMaxAttempts(conf.getInt(RETRY_ATTEMPTS_KEY, DEFAULT_RETRY_ATTEMPTS))
100 .setSleepInterval(conf.getLong(RETRY_SLEEP_INTERVAL_KEY, DEFAULT_RETRY_SLEEP_INTERVAL)));
101 }
102
103 protected String getServiceUser(ServiceType service) {
104 Configuration conf = getConf();
105 switch (service) {
106 case HADOOP_DATANODE:
107 case HADOOP_NAMENODE:
108 return conf.get("hbase.it.clustermanager.hadoop.hdfs.user", "hdfs");
109 case ZOOKEEPER_SERVER:
110 return conf.get("hbase.it.clustermanager.zookeeper.user", "zookeeper");
111 default:
112 return conf.get("hbase.it.clustermanager.hbase.user", "hbase");
113 }
114 }
115
116
117
118
119 protected class RemoteShell extends Shell.ShellCommandExecutor {
120 private String hostname;
121 private String user;
122
123 public RemoteShell(String hostname, String[] execString, File dir, Map<String, String> env,
124 long timeout) {
125 super(execString, dir, env, timeout);
126 this.hostname = hostname;
127 }
128
129 public RemoteShell(String hostname, String[] execString, File dir, Map<String, String> env) {
130 super(execString, dir, env);
131 this.hostname = hostname;
132 }
133
134 public RemoteShell(String hostname, String[] execString, File dir) {
135 super(execString, dir);
136 this.hostname = hostname;
137 }
138
139 public RemoteShell(String hostname, String[] execString) {
140 super(execString);
141 this.hostname = hostname;
142 }
143
144 public RemoteShell(String hostname, String user, String[] execString) {
145 super(execString);
146 this.hostname = hostname;
147 this.user = user;
148 }
149
150 @Override
151 public String[] getExecString() {
152 String at = sshUserName.isEmpty() ? "" : "@";
153 String remoteCmd = StringUtils.join(super.getExecString(), " ");
154 String cmd = String.format(tunnelCmd, sshOptions, sshUserName, at, hostname, remoteCmd, user);
155 LOG.info("Executing full command [" + cmd + "]");
156 return new String[] { "/usr/bin/env", "bash", "-c", cmd };
157 }
158
159 @Override
160 public void execute() throws IOException {
161 super.execute();
162 }
163 }
164
165
166
167
168
169
170 static abstract class CommandProvider {
171
172 enum Operation {
173 START, STOP, RESTART
174 }
175
176 public abstract String getCommand(ServiceType service, Operation op);
177
178 public String isRunningCommand(ServiceType service) {
179 return findPidCommand(service);
180 }
181
182 protected String findPidCommand(ServiceType service) {
183 return String.format("ps ux | grep proc_%s | grep -v grep | tr -s ' ' | cut -d ' ' -f2",
184 service);
185 }
186
187 public String signalCommand(ServiceType service, String signal) {
188 return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
189 }
190 }
191
192
193
194
195 static class HBaseShellCommandProvider extends CommandProvider {
196 private final String hbaseHome;
197 private final String confDir;
198
199 HBaseShellCommandProvider(Configuration conf) {
200 hbaseHome = conf.get("hbase.it.clustermanager.hbase.home",
201 System.getenv("HBASE_HOME"));
202 String tmp = conf.get("hbase.it.clustermanager.hbase.conf.dir",
203 System.getenv("HBASE_CONF_DIR"));
204 if (tmp != null) {
205 confDir = String.format("--config %s", tmp);
206 } else {
207 confDir = "";
208 }
209 }
210
211 @Override
212 public String getCommand(ServiceType service, Operation op) {
213 return String.format("%s/bin/hbase-daemon.sh %s %s %s", hbaseHome, confDir,
214 op.toString().toLowerCase(Locale.ROOT), service);
215 }
216 }
217
218
219
220
221 static class HadoopShellCommandProvider extends CommandProvider {
222 private final String hadoopHome;
223 private final String confDir;
224
225 HadoopShellCommandProvider(Configuration conf) throws IOException {
226 hadoopHome = conf.get("hbase.it.clustermanager.hadoop.home",
227 System.getenv("HADOOP_HOME"));
228 String tmp = conf.get("hbase.it.clustermanager.hadoop.conf.dir",
229 System.getenv("HADOOP_CONF_DIR"));
230 if (hadoopHome == null) {
231 throw new IOException("Hadoop home configuration parameter i.e. " +
232 "'hbase.it.clustermanager.hadoop.home' is not configured properly.");
233 }
234 if (tmp != null) {
235 confDir = String.format("--config %s", tmp);
236 } else {
237 confDir = "";
238 }
239 }
240
241 @Override
242 public String getCommand(ServiceType service, Operation op) {
243 return String.format("%s/sbin/hadoop-daemon.sh %s %s %s", hadoopHome, confDir,
244 op.toString().toLowerCase(Locale.ROOT), service);
245 }
246 }
247
248
249
250
251 static class ZookeeperShellCommandProvider extends CommandProvider {
252 private final String zookeeperHome;
253 private final String confDir;
254
255 ZookeeperShellCommandProvider(Configuration conf) throws IOException {
256 zookeeperHome = conf.get("hbase.it.clustermanager.zookeeper.home",
257 System.getenv("ZOOBINDIR"));
258 String tmp = conf.get("hbase.it.clustermanager.zookeeper.conf.dir",
259 System.getenv("ZOOCFGDIR"));
260 if (zookeeperHome == null) {
261 throw new IOException("Zookeeper home configuration parameter i.e. " +
262 "'hbase.it.clustermanager.zookeeper.home' is not configured properly.");
263 }
264 if (tmp != null) {
265 confDir = String.format("--config %s", tmp);
266 } else {
267 confDir = "";
268 }
269 }
270
271 @Override
272 public String getCommand(ServiceType service, Operation op) {
273 return String.format("%s/bin/zkServer.sh %s", zookeeperHome, op.toString().toLowerCase(Locale.ROOT));
274 }
275
276 @Override
277 protected String findPidCommand(ServiceType service) {
278 return String.format("ps ux | grep %s | grep -v grep | tr -s ' ' | cut -d ' ' -f2",
279 service);
280 }
281 }
282
283 public HBaseClusterManager() {
284 }
285
286 protected CommandProvider getCommandProvider(ServiceType service) throws IOException {
287 switch (service) {
288 case HADOOP_DATANODE:
289 case HADOOP_NAMENODE:
290 return new HadoopShellCommandProvider(getConf());
291 case ZOOKEEPER_SERVER:
292 return new ZookeeperShellCommandProvider(getConf());
293 default:
294 return new HBaseShellCommandProvider(getConf());
295 }
296 }
297
298
299
300
301
302
303 protected Pair<Integer, String> exec(String hostname, ServiceType service, String... cmd)
304 throws IOException {
305 LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname);
306 RemoteShell shell = new RemoteShell(hostname, getServiceUser(service), cmd);
307 try {
308 shell.execute();
309 } catch (Shell.ExitCodeException ex) {
310
311 String output = shell.getOutput();
312
313 throw new Shell.ExitCodeException(ex.getExitCode(), "stderr: " + ex.getMessage()
314 + ", stdout: " + output);
315 }
316
317 LOG.info("Executed remote command, exit code:" + shell.getExitCode()
318 + " , output:" + shell.getOutput());
319
320 return new Pair<Integer, String>(shell.getExitCode(), shell.getOutput());
321 }
322
323 private Pair<Integer, String> execWithRetries(String hostname, ServiceType service, String... cmd)
324 throws IOException {
325 RetryCounter retryCounter = retryCounterFactory.create();
326 while (true) {
327 try {
328 return exec(hostname, service, cmd);
329 } catch (IOException e) {
330 retryOrThrow(retryCounter, e, hostname, cmd);
331 }
332 try {
333 retryCounter.sleepUntilNextRetry();
334 } catch (InterruptedException ex) {
335
336 LOG.warn("Sleep Interrupted:" + ex);
337 }
338 }
339 }
340
341 private <E extends Exception> void retryOrThrow(RetryCounter retryCounter, E ex,
342 String hostname, String[] cmd) throws E {
343 if (retryCounter.shouldRetry()) {
344 LOG.warn("Remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname
345 + " failed at attempt " + retryCounter.getAttemptTimes() + ". Retrying until maxAttempts: "
346 + retryCounter.getMaxAttempts() + ". Exception: " + ex.getMessage());
347 return;
348 }
349 throw ex;
350 }
351
352 private void exec(String hostname, ServiceType service, Operation op) throws IOException {
353 execWithRetries(hostname, service, getCommandProvider(service).getCommand(service, op));
354 }
355
356 @Override
357 public void start(ServiceType service, String hostname, int port) throws IOException {
358 exec(hostname, service, Operation.START);
359 }
360
361 @Override
362 public void stop(ServiceType service, String hostname, int port) throws IOException {
363 exec(hostname, service, Operation.STOP);
364 }
365
366 @Override
367 public void restart(ServiceType service, String hostname, int port) throws IOException {
368 exec(hostname, service, Operation.RESTART);
369 }
370
371 public void signal(ServiceType service, Signal signal, String hostname) throws IOException {
372 execWithRetries(hostname, service,
373 getCommandProvider(service).signalCommand(service, signal.toString()));
374 }
375
376 @Override
377 public boolean isRunning(ServiceType service, String hostname, int port) throws IOException {
378 String ret = execWithRetries(hostname, service,
379 getCommandProvider(service).isRunningCommand(service)).getSecond();
380 return ret.length() > 0;
381 }
382
383 @Override
384 public void kill(ServiceType service, String hostname, int port) throws IOException {
385 signal(service, Signal.SIGKILL, hostname);
386 }
387
388 @Override
389 public void suspend(ServiceType service, String hostname, int port) throws IOException {
390 signal(service, Signal.SIGSTOP, hostname);
391 }
392
393 @Override
394 public void resume(ServiceType service, String hostname, int port) throws IOException {
395 signal(service, Signal.SIGCONT, hostname);
396 }
397 }