1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18 package org.apache.hadoop.hbase;
19
20 import java.io.Closeable;
21 import java.io.IOException;
22
23 import org.apache.commons.logging.Log;
24 import org.apache.commons.logging.LogFactory;
25 import org.apache.hadoop.hbase.classification.InterfaceAudience;
26 import org.apache.hadoop.conf.Configurable;
27 import org.apache.hadoop.conf.Configuration;
28 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
29 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
30 import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MasterService;
31 import org.apache.hadoop.hbase.util.Threads;
32
33 /**
34 * This class defines methods that can help with managing HBase clusters
35 * from unit tests and system tests. There are 3 types of cluster deployments:
36 * <ul>
37 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads,
38 * used by unit tests</li>
39 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can
40 * interact with the cluster. </li>
41 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate
42 * JVMs. </li>
43 * </ul>
44 * <p>
45 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can
46 * be run against a mini-cluster during unit test execution, or a distributed cluster having
47 * tens/hundreds of nodes during execution of integration tests.
48 *
49 * <p>
50 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume
51 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster,
52 * and some tests will still need to mock stuff and introspect internal state. For those use
53 * cases from unit tests, or if more control is needed, you can use the subclasses directly.
54 * In that sense, this class does not abstract away <strong>every</strong> interface that
55 * MiniHBaseCluster or DistributedHBaseCluster provide.
56 */
57 @InterfaceAudience.Private
58 public abstract class HBaseCluster implements Closeable, Configurable {
59 // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope
60 static final Log LOG = LogFactory.getLog(HBaseCluster.class.getName());
61 protected Configuration conf;
62
63 /** the status of the cluster before we begin */
64 protected ClusterStatus initialClusterStatus;
65
66 /**
67 * Construct an HBaseCluster
68 * @param conf Configuration to be used for cluster
69 */
70 public HBaseCluster(Configuration conf) {
71 setConf(conf);
72 }
73
74 @Override
75 public void setConf(Configuration conf) {
76 this.conf = conf;
77 }
78
79 @Override
80 public Configuration getConf() {
81 return conf;
82 }
83
84 /**
85 * Returns a ClusterStatus for this HBase cluster.
86 * @see #getInitialClusterStatus()
87 */
88 public abstract ClusterStatus getClusterStatus() throws IOException;
89
90 /**
91 * Returns a ClusterStatus for this HBase cluster as observed at the
92 * starting of the HBaseCluster
93 */
94 public ClusterStatus getInitialClusterStatus() throws IOException {
95 return initialClusterStatus;
96 }
97
98 /**
99 * Returns an {@link MasterService.BlockingInterface} to the active master
100 */
101 public abstract MasterService.BlockingInterface getMasterAdminService()
102 throws IOException;
103
104 /**
105 * Returns an AdminProtocol interface to the regionserver
106 */
107 public abstract AdminService.BlockingInterface getAdminProtocol(ServerName serverName)
108 throws IOException;
109
110 /**
111 * Returns a ClientProtocol interface to the regionserver
112 */
113 public abstract ClientService.BlockingInterface getClientProtocol(ServerName serverName)
114 throws IOException;
115
116 /**
117 * Starts a new region server on the given hostname or if this is a mini/local cluster,
118 * starts a region server locally.
119 * @param hostname the hostname to start the regionserver on
120 * @throws IOException if something goes wrong
121 */
122 public abstract void startRegionServer(String hostname, int port) throws IOException;
123
124 /**
125 * Kills the region server process if this is a distributed cluster, otherwise
126 * this causes the region server to exit doing basic clean up only.
127 * @throws IOException if something goes wrong
128 */
129 public abstract void killRegionServer(ServerName serverName) throws IOException;
130
131 /**
132 * Stops the given region server, by attempting a gradual stop.
133 * @throws IOException if something goes wrong
134 */
135 public abstract void stopRegionServer(ServerName serverName) throws IOException;
136
137 /**
138 * Wait for the specified region server to join the cluster
139 * @throws IOException if something goes wrong or timeout occurs
140 */
141 public void waitForRegionServerToStart(String hostname, int port, long timeout)
142 throws IOException {
143 long start = System.currentTimeMillis();
144 while ((System.currentTimeMillis() - start) < timeout) {
145 for (ServerName server : getClusterStatus().getServers()) {
146 if (server.getHostname().equals(hostname) && server.getPort() == port) {
147 return;
148 }
149 }
150 Threads.sleep(100);
151 }
152 throw new IOException("did timeout " + timeout + "ms waiting for region server to start: "
153 + hostname);
154 }
155
156 /**
157 * Wait for the specified region server to stop the thread / process.
158 * @throws IOException if something goes wrong or timeout occurs
159 */
160 public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
161 throws IOException;
162
163 /**
164 * Suspend the region server
165 * @param serverName the hostname to suspend the regionserver on
166 * @throws IOException if something goes wrong
167 */
168 public abstract void suspendRegionServer(ServerName serverName) throws IOException;
169
170 /**
171 * Resume the region server
172 * @param serverName the hostname to resume the regionserver on
173 * @throws IOException if something goes wrong
174 */
175 public abstract void resumeRegionServer(ServerName serverName) throws IOException;
176
177 /**
178 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster,
179 * silently logs warning message.
180 * @param hostname the hostname to start the regionserver on
181 * @throws IOException if something goes wrong
182 */
183 public abstract void startZkNode(String hostname, int port) throws IOException;
184
185 /**
186 * Kills the zookeeper node process if this is a distributed cluster, otherwise,
187 * this causes master to exit doing basic clean up only.
188 * @throws IOException if something goes wrong
189 */
190 public abstract void killZkNode(ServerName serverName) throws IOException;
191
192 /**
193 * Stops the region zookeeper if this is a distributed cluster, otherwise
194 * silently logs warning message.
195 * @throws IOException if something goes wrong
196 */
197 public abstract void stopZkNode(ServerName serverName) throws IOException;
198
199 /**
200 * Wait for the specified zookeeper node to join the cluster
201 * @throws IOException if something goes wrong or timeout occurs
202 */
203 public abstract void waitForZkNodeToStart(ServerName serverName, long timeout)
204 throws IOException;
205
206 /**
207 * Wait for the specified zookeeper node to stop the thread / process.
208 * @throws IOException if something goes wrong or timeout occurs
209 */
210 public abstract void waitForZkNodeToStop(ServerName serverName, long timeout)
211 throws IOException;
212
213 /**
214 * Starts a new datanode on the given hostname or if this is a mini/local cluster,
215 * silently logs warning message.
216 * @throws IOException if something goes wrong
217 */
218 public abstract void startDataNode(ServerName serverName) throws IOException;
219
220 /**
221 * Kills the datanode process if this is a distributed cluster, otherwise,
222 * this causes master to exit doing basic clean up only.
223 * @throws IOException if something goes wrong
224 */
225 public abstract void killDataNode(ServerName serverName) throws IOException;
226
227 /**
228 * Stops the datanode if this is a distributed cluster, otherwise
229 * silently logs warning message.
230 * @throws IOException if something goes wrong
231 */
232 public abstract void stopDataNode(ServerName serverName) throws IOException;
233
234 /**
235 * Wait for the specified datanode to join the cluster
236 * @throws IOException if something goes wrong or timeout occurs
237 */
238 public abstract void waitForDataNodeToStart(ServerName serverName, long timeout)
239 throws IOException;
240
241 /**
242 * Wait for the specified datanode to stop the thread / process.
243 * @throws IOException if something goes wrong or timeout occurs
244 */
245 public abstract void waitForDataNodeToStop(ServerName serverName, long timeout)
246 throws IOException;
247
248 /**
249 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs
250 * warning message.
251 * @throws IOException if something goes wrong
252 */
253 public abstract void startNameNode(ServerName serverName) throws IOException;
254
255 /**
256 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to
257 * exit doing basic clean up only.
258 * @throws IOException if something goes wrong
259 */
260 public abstract void killNameNode(ServerName serverName) throws IOException;
261
262 /**
263 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message.
264 * @throws IOException if something goes wrong
265 */
266 public abstract void stopNameNode(ServerName serverName) throws IOException;
267
268 /**
269 * Wait for the specified namenode to join the cluster
270 * @throws IOException if something goes wrong or timeout occurs
271 */
272 public abstract void waitForNameNodeToStart(ServerName serverName, long timeout)
273 throws IOException;
274
275 /**
276 * Wait for the specified namenode to stop
277 * @throws IOException if something goes wrong or timeout occurs
278 */
279 public abstract void waitForNameNodeToStop(ServerName serverName, long timeout)
280 throws IOException;
281
282 /**
283 * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master
284 * locally.
285 * @param hostname the hostname to start the master on
286 * @throws IOException if something goes wrong
287 */
288 public abstract void startMaster(String hostname, int port) throws IOException;
289
290 /**
291 * Kills the master process if this is a distributed cluster, otherwise,
292 * this causes master to exit doing basic clean up only.
293 * @throws IOException if something goes wrong
294 */
295 public abstract void killMaster(ServerName serverName) throws IOException;
296
297 /**
298 * Stops the given master, by attempting a gradual stop.
299 * @throws IOException if something goes wrong
300 */
301 public abstract void stopMaster(ServerName serverName) throws IOException;
302
303 /**
304 * Wait for the specified master to stop the thread / process.
305 * @throws IOException if something goes wrong or timeout occurs
306 */
307 public abstract void waitForMasterToStop(ServerName serverName, long timeout)
308 throws IOException;
309
310 /**
311 * Blocks until there is an active master and that master has completed
312 * initialization.
313 *
314 * @return true if an active master becomes available. false if there are no
315 * masters left.
316 * @throws IOException if something goes wrong or timeout occurs
317 */
318 public boolean waitForActiveAndReadyMaster()
319 throws IOException {
320 return waitForActiveAndReadyMaster(Long.MAX_VALUE);
321 }
322
323 /**
324 * Blocks until there is an active master and that master has completed
325 * initialization.
326 * @param timeout the timeout limit in ms
327 * @return true if an active master becomes available. false if there are no
328 * masters left.
329 */
330 public abstract boolean waitForActiveAndReadyMaster(long timeout)
331 throws IOException;
332
333 /**
334 * Wait for HBase Cluster to shut down.
335 */
336 public abstract void waitUntilShutDown() throws IOException;
337
338 /**
339 * Shut down the HBase cluster
340 */
341 public abstract void shutdown() throws IOException;
342
343 /**
344 * Restores the cluster to it's initial state if this is a real cluster,
345 * otherwise does nothing.
346 * This is a best effort restore. If the servers are not reachable, or insufficient
347 * permissions, etc. restoration might be partial.
348 * @return whether restoration is complete
349 */
350 public boolean restoreInitialStatus() throws IOException {
351 return restoreClusterStatus(getInitialClusterStatus());
352 }
353
354 /**
355 * Restores the cluster to given state if this is a real cluster,
356 * otherwise does nothing.
357 * This is a best effort restore. If the servers are not reachable, or insufficient
358 * permissions, etc. restoration might be partial.
359 * @return whether restoration is complete
360 */
361 public boolean restoreClusterStatus(ClusterStatus desiredStatus) throws IOException {
362 return true;
363 }
364
365 /**
366 * Get the ServerName of region server serving the first hbase:meta region
367 */
368 public ServerName getServerHoldingMeta() throws IOException {
369 return getServerHoldingRegion(TableName.META_TABLE_NAME,
370 HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
371 }
372
373 /**
374 * Get the ServerName of region server serving the specified region
375 * @param regionName Name of the region in bytes
376 * @param tn Table name that has the region.
377 * @return ServerName that hosts the region or null
378 */
379 public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName)
380 throws IOException;
381
382 /**
383 * @return whether we are interacting with a distributed cluster as opposed to an
384 * in-process mini/local cluster.
385 */
386 public boolean isDistributedCluster() {
387 return false;
388 }
389
390 /**
391 * Closes all the resources held open for this cluster. Note that this call does not shutdown
392 * the cluster.
393 * @see #shutdown()
394 */
395 @Override
396 public abstract void close() throws IOException;
397
398 /**
399 * Wait for the namenode.
400 *
401 * @throws InterruptedException
402 */
403 public void waitForNamenodeAvailable() throws InterruptedException {
404 }
405
406 public void waitForDatanodesRegistered(int nbDN) throws Exception {
407 }
408 }