View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.util.Shell.ExitCodeException;
26  import org.apache.hadoop.util.Shell.ShellCommandExecutor;
27  
28  /**
29   * A utility for executing an external script that checks the health of
30   * the node. An example script can be found at
31   * <tt>src/main/sh/healthcheck/healthcheck.sh</tt> in the
32   * <tt>hbase-examples</tt> module.
33   */
34  class HealthChecker {
35  
36    private static final Log LOG = LogFactory.getLog(HealthChecker.class);
37    private ShellCommandExecutor shexec = null;
38    private String exceptionStackTrace;
39  
40    /** Pattern used for searching in the output of the node health script */
41    static private final String ERROR_PATTERN = "ERROR";
42  
43    private String healthCheckScript;
44    private long scriptTimeout;
45  
46    enum HealthCheckerExitStatus {
47      SUCCESS,
48      TIMED_OUT,
49      FAILED_WITH_EXIT_CODE,
50      FAILED_WITH_EXCEPTION,
51      FAILED
52    }
53  
54    /**
55     * Initialize.
56     *
57     * @param location the location of the health script
58     * @param timeout the timeout to be used for the health script
59     */
60    public void init(String location, long timeout) {
61      this.healthCheckScript = location;
62      this.scriptTimeout = timeout;
63      ArrayList<String> execScript = new ArrayList<String>();
64      execScript.add(healthCheckScript);
65      this.shexec = new ShellCommandExecutor(execScript.toArray(new String[execScript.size()]), null,
66          null, scriptTimeout);
67      LOG.info("HealthChecker initialized with script at " + this.healthCheckScript +
68        ", timeout=" + timeout);
69    }
70  
71    public HealthReport checkHealth() {
72      HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS;
73      try {
74        // Calling this execute leaves around running executor threads.
75        shexec.execute();
76      } catch (ExitCodeException e) {
77        // ignore the exit code of the script
78        LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode());
79        status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE;
80      } catch (IOException e) {
81        LOG.warn("Caught exception : " + e);
82        status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION;
83        exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e);
84      } finally {
85        if (shexec.isTimedOut()) {
86          status = HealthCheckerExitStatus.TIMED_OUT;
87        }
88        if (status == HealthCheckerExitStatus.SUCCESS) {
89          if (hasErrors(shexec.getOutput())) {
90            status = HealthCheckerExitStatus.FAILED;
91          }
92        }
93      }
94      return new HealthReport(status, getHealthReport(status));
95    }
96  
97    private boolean hasErrors(String output) {
98      String[] splits = output.split("\n");
99      for (String split : splits) {
100       if (split.startsWith(ERROR_PATTERN)) {
101         return true;
102       }
103     }
104     return false;
105   }
106 
107   private String getHealthReport(HealthCheckerExitStatus status){
108     String healthReport = null;
109     switch (status) {
110     case SUCCESS:
111       healthReport = "Server is healthy.";
112       break;
113     case TIMED_OUT:
114       healthReport = "Health script timed out";
115       break;
116     case FAILED_WITH_EXCEPTION:
117       healthReport = exceptionStackTrace;
118       break;
119     case FAILED_WITH_EXIT_CODE:
120       healthReport = "Health script failed with exit code.";
121       break;
122     case FAILED:
123       healthReport = shexec.getOutput();
124       break;
125     }
126     return healthReport;
127   }
128 }