View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase;
19  
20  import java.io.Closeable;
21  import java.io.IOException;
22  
23  import org.apache.commons.logging.Log;
24  import org.apache.commons.logging.LogFactory;
25  import org.apache.hadoop.hbase.classification.InterfaceAudience;
26  import org.apache.hadoop.conf.Configurable;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
29  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
30  import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MasterService;
31  import org.apache.hadoop.hbase.util.Threads;
32  
33  /**
34   * This class defines methods that can help with managing HBase clusters
35   * from unit tests and system tests. There are 3 types of cluster deployments:
36   * <ul>
37   * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads,
38   * used by unit tests</li>
39   * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
40   * interact with the cluster. </li>
41   * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate
42   * JVMs. </li>
43   * </ul>
44   * <p>
45   * HBaseCluster unifies the way tests interact with the cluster, so that the same test can
46   * be run against a mini-cluster during unit test execution, or a distributed cluster having
47   * tens/hundreds of nodes during execution of integration tests.
48   *
49   * <p>
50   * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
51   * running in a particular mode. Not all the tests are suitable to be run on an actual cluster,
52   * and some tests will still need to mock stuff and introspect internal state. For those use
53   * cases from unit tests, or if more control is needed, you can use the subclasses directly.
54   * In that sense, this class does not abstract away <strong>every</strong> interface that
55   * MiniHBaseCluster or DistributedHBaseCluster provide.
56   */
57  @InterfaceAudience.Private
58  public abstract class HBaseCluster implements Closeable, Configurable {
59    // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope
60    static final Log LOG = LogFactory.getLog(HBaseCluster.class.getName());
61    protected Configuration conf;
62  
63    /** the status of the cluster before we begin */
64    protected ClusterStatus initialClusterStatus;
65  
66    /**
67     * Construct an HBaseCluster
68     * @param conf Configuration to be used for cluster
69     */
70    public HBaseCluster(Configuration conf) {
71      setConf(conf);
72    }
73  
74    @Override
75    public void setConf(Configuration conf) {
76      this.conf = conf;
77    }
78  
79    @Override
80    public Configuration getConf() {
81      return conf;
82    }
83  
84    /**
85     * Returns a ClusterStatus for this HBase cluster.
86     * @see #getInitialClusterStatus()
87     */
88    public abstract ClusterStatus getClusterStatus() throws IOException;
89  
90    /**
91     * Returns a ClusterStatus for this HBase cluster as observed at the
92     * starting of the HBaseCluster
93     */
94    public ClusterStatus getInitialClusterStatus() throws IOException {
95      return initialClusterStatus;
96    }
97  
98    /**
99     * Returns an {@link MasterService.BlockingInterface} to the active master
100    */
101   public abstract MasterService.BlockingInterface getMasterAdminService()
102   throws IOException;
103 
104   /**
105    * Returns an AdminProtocol interface to the regionserver
106    */
107   public abstract AdminService.BlockingInterface getAdminProtocol(ServerName serverName)
108   throws IOException;
109 
110   /**
111    * Returns a ClientProtocol interface to the regionserver
112    */
113   public abstract ClientService.BlockingInterface getClientProtocol(ServerName serverName)
114   throws IOException;
115 
116   /**
117    * Starts a new region server on the given hostname or if this is a mini/local cluster,
118    * starts a region server locally.
119    * @param hostname the hostname to start the regionserver on
120    * @throws IOException if something goes wrong
121    */
122   public abstract void startRegionServer(String hostname, int port) throws IOException;
123 
124   /**
125    * Kills the region server process if this is a distributed cluster, otherwise
126    * this causes the region server to exit doing basic clean up only.
127    * @throws IOException if something goes wrong
128    */
129   public abstract void killRegionServer(ServerName serverName) throws IOException;
130 
131   /**
132    * Stops the given region server, by attempting a gradual stop.
133    * @throws IOException if something goes wrong
134    */
135   public abstract void stopRegionServer(ServerName serverName) throws IOException;
136 
137   /**
138    * Wait for the specified region server to join the cluster
139    * @throws IOException if something goes wrong or timeout occurs
140    */
141   public void waitForRegionServerToStart(String hostname, int port, long timeout)
142       throws IOException {
143     long start = System.currentTimeMillis();
144     while ((System.currentTimeMillis() - start) < timeout) {
145       for (ServerName server : getClusterStatus().getServers()) {
146         if (server.getHostname().equals(hostname) && server.getPort() == port) {
147           return;
148         }
149       }
150       Threads.sleep(100);
151     }
152     throw new IOException("did timeout " + timeout + "ms waiting for region server to start: "
153         + hostname);
154   }
155 
156   /**
157    * Wait for the specified region server to stop the thread / process.
158    * @throws IOException if something goes wrong or timeout occurs
159    */
160   public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
161       throws IOException;
162 
163   /**
164    * Suspend the region server
165    * @param serverName the hostname to suspend the regionserver on
166    * @throws IOException if something goes wrong
167    */
168   public abstract void suspendRegionServer(ServerName serverName) throws IOException;
169 
170   /**
171    * Resume the region server
172    * @param serverName the hostname to resume the regionserver on
173    * @throws IOException if something goes wrong
174    */
175   public abstract void resumeRegionServer(ServerName serverName) throws IOException;
176 
177   /**
178    * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster,
179    * silently logs warning message.
180    * @param hostname the hostname to start the regionserver on
181    * @throws IOException if something goes wrong
182    */
183   public abstract void startZkNode(String hostname, int port) throws IOException;
184 
185   /**
186    * Kills the zookeeper node process if this is a distributed cluster, otherwise,
187    * this causes master to exit doing basic clean up only.
188    * @throws IOException if something goes wrong
189    */
190   public abstract void killZkNode(ServerName serverName) throws IOException;
191 
192   /**
193    * Stops the region zookeeper if this is a distributed cluster, otherwise
194    * silently logs warning message.
195    * @throws IOException if something goes wrong
196    */
197   public abstract void stopZkNode(ServerName serverName) throws IOException;
198 
199   /**
200    * Wait for the specified zookeeper node to join the cluster
201    * @throws IOException if something goes wrong or timeout occurs
202    */
203   public abstract void waitForZkNodeToStart(ServerName serverName, long timeout)
204     throws IOException;
205 
206   /**
207    * Wait for the specified zookeeper node to stop the thread / process.
208    * @throws IOException if something goes wrong or timeout occurs
209    */
210   public abstract void waitForZkNodeToStop(ServerName serverName, long timeout)
211     throws IOException;
212 
213   /**
214    * Starts a new datanode on the given hostname or if this is a mini/local cluster,
215    * silently logs warning message.
216    * @throws IOException if something goes wrong
217    */
218   public abstract void startDataNode(ServerName serverName) throws IOException;
219 
220   /**
221    * Kills the datanode process if this is a distributed cluster, otherwise,
222    * this causes master to exit doing basic clean up only.
223    * @throws IOException if something goes wrong
224    */
225   public abstract void killDataNode(ServerName serverName) throws IOException;
226 
227   /**
228    * Stops the datanode if this is a distributed cluster, otherwise
229    * silently logs warning message.
230    * @throws IOException if something goes wrong
231    */
232   public abstract void stopDataNode(ServerName serverName) throws IOException;
233 
234   /**
235    * Wait for the specified datanode to join the cluster
236    * @throws IOException if something goes wrong or timeout occurs
237    */
238   public abstract void waitForDataNodeToStart(ServerName serverName, long timeout)
239     throws IOException;
240 
241   /**
242    * Wait for the specified datanode to stop the thread / process.
243    * @throws IOException if something goes wrong or timeout occurs
244    */
245   public abstract void waitForDataNodeToStop(ServerName serverName, long timeout)
246     throws IOException;
247 
248   /**
249    * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs
250    * warning message.
251    * @throws IOException if something goes wrong
252    */
253   public abstract void startNameNode(ServerName serverName) throws IOException;
254 
255   /**
256    * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to
257    * exit doing basic clean up only.
258    * @throws IOException if something goes wrong
259    */
260   public abstract void killNameNode(ServerName serverName) throws IOException;
261 
262   /**
263    * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message.
264    * @throws IOException if something goes wrong
265    */
266   public abstract void stopNameNode(ServerName serverName) throws IOException;
267 
268   /**
269    * Wait for the specified namenode to join the cluster
270    * @throws IOException if something goes wrong or timeout occurs
271    */
272   public abstract void waitForNameNodeToStart(ServerName serverName, long timeout)
273       throws IOException;
274 
275   /**
276    * Wait for the specified namenode to stop
277    * @throws IOException if something goes wrong or timeout occurs
278    */
279   public abstract void waitForNameNodeToStop(ServerName serverName, long timeout)
280       throws IOException;
281 
282   /**
283    * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master
284    * locally.
285    * @param hostname the hostname to start the master on
286    * @throws IOException if something goes wrong
287    */
288   public abstract void startMaster(String hostname, int port) throws IOException;
289 
290   /**
291    * Kills the master process if this is a distributed cluster, otherwise,
292    * this causes master to exit doing basic clean up only.
293    * @throws IOException if something goes wrong
294    */
295   public abstract void killMaster(ServerName serverName) throws IOException;
296 
297   /**
298    * Stops the given master, by attempting a gradual stop.
299    * @throws IOException if something goes wrong
300    */
301   public abstract void stopMaster(ServerName serverName) throws IOException;
302 
303   /**
304    * Wait for the specified master to stop the thread / process.
305    * @throws IOException if something goes wrong or timeout occurs
306    */
307   public abstract void waitForMasterToStop(ServerName serverName, long timeout)
308       throws IOException;
309 
310   /**
311    * Blocks until there is an active master and that master has completed
312    * initialization.
313    *
314    * @return true if an active master becomes available.  false if there are no
315    *         masters left.
316    * @throws IOException if something goes wrong or timeout occurs
317    */
318   public boolean waitForActiveAndReadyMaster()
319       throws IOException {
320     return waitForActiveAndReadyMaster(Long.MAX_VALUE);
321   }
322 
323   /**
324    * Blocks until there is an active master and that master has completed
325    * initialization.
326    * @param timeout the timeout limit in ms
327    * @return true if an active master becomes available.  false if there are no
328    *         masters left.
329    */
330   public abstract boolean waitForActiveAndReadyMaster(long timeout)
331       throws IOException;
332 
333   /**
334    * Wait for HBase Cluster to shut down.
335    */
336   public abstract void waitUntilShutDown() throws IOException;
337 
338   /**
339    * Shut down the HBase cluster
340    */
341   public abstract void shutdown() throws IOException;
342 
343   /**
344    * Restores the cluster to it's initial state if this is a real cluster,
345    * otherwise does nothing.
346    * This is a best effort restore. If the servers are not reachable, or insufficient
347    * permissions, etc. restoration might be partial.
348    * @return whether restoration is complete
349    */
350   public boolean restoreInitialStatus() throws IOException {
351     return restoreClusterStatus(getInitialClusterStatus());
352   }
353 
354   /**
355    * Restores the cluster to given state if this is a real cluster,
356    * otherwise does nothing.
357    * This is a best effort restore. If the servers are not reachable, or insufficient
358    * permissions, etc. restoration might be partial.
359    * @return whether restoration is complete
360    */
361   public boolean restoreClusterStatus(ClusterStatus desiredStatus) throws IOException {
362     return true;
363   }
364 
365   /**
366    * Get the ServerName of region server serving the first hbase:meta region
367    */
368   public ServerName getServerHoldingMeta() throws IOException {
369     return getServerHoldingRegion(TableName.META_TABLE_NAME,
370       HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
371   }
372 
373   /**
374    * Get the ServerName of region server serving the specified region
375    * @param regionName Name of the region in bytes
376    * @param tn Table name that has the region.
377    * @return ServerName that hosts the region or null
378    */
379   public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName)
380   throws IOException;
381 
382   /**
383    * @return whether we are interacting with a distributed cluster as opposed to an
384    * in-process mini/local cluster.
385    */
386   public boolean isDistributedCluster() {
387     return false;
388   }
389 
390   /**
391    * Closes all the resources held open for this cluster. Note that this call does not shutdown
392    * the cluster.
393    * @see #shutdown()
394    */
395   @Override
396   public abstract void close() throws IOException;
397 
398   /**
399    * Wait for the namenode.
400    *
401    * @throws InterruptedException
402    */
403   public void waitForNamenodeAvailable() throws InterruptedException {
404   }
405 
406   public void waitForDatanodesRegistered(int nbDN) throws Exception {
407   }
408 }