1
2
3
4
5
6
7
8
9
10
11
12 package org.apache.hadoop.hbase.coordination;
13
14 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLIT;
15 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REGION_SPLITTING;
16 import static org.apache.hadoop.hbase.executor.EventType.RS_ZK_REQUEST_REGION_SPLIT;
17
18 import java.io.IOException;
19 import java.util.List;
20
21 import org.apache.commons.logging.Log;
22 import org.apache.commons.logging.LogFactory;
23 import org.apache.hadoop.hbase.CoordinatedStateManager;
24 import org.apache.hadoop.hbase.HRegionInfo;
25 import org.apache.hadoop.hbase.RegionTransition;
26 import org.apache.hadoop.hbase.ServerName;
27 import org.apache.hadoop.hbase.coordination.SplitTransactionCoordination;
28 import org.apache.hadoop.hbase.executor.EventType;
29 import org.apache.hadoop.hbase.regionserver.HRegion;
30 import org.apache.hadoop.hbase.regionserver.Region;
31 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
32 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
33 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
34 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
35 import org.apache.zookeeper.KeeperException;
36 import org.apache.zookeeper.data.Stat;
37
38 public class ZKSplitTransactionCoordination implements SplitTransactionCoordination {
39
40 private CoordinatedStateManager coordinationManager;
41 private final ZooKeeperWatcher watcher;
42
43
44
45
46
47
48 private static final int SPIN_WAIT_TIMEOUT = 100;
49
50 private static final Log LOG = LogFactory.getLog(ZKSplitTransactionCoordination.class);
51
52 public ZKSplitTransactionCoordination(CoordinatedStateManager coordinationProvider,
53 ZooKeeperWatcher watcher) {
54 this.coordinationManager = coordinationProvider;
55 this.watcher = watcher;
56 }
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71 @Override
72 public void startSplitTransaction(HRegion parent, ServerName serverName, HRegionInfo hri_a,
73 HRegionInfo hri_b) throws IOException {
74
75 HRegionInfo region = parent.getRegionInfo();
76 try {
77
78 LOG.debug(watcher.prefix("Creating ephemeral node for " + region.getEncodedName()
79 + " in PENDING_SPLIT state"));
80 byte[] payload = HRegionInfo.toDelimitedByteArray(hri_a, hri_b);
81 RegionTransition rt =
82 RegionTransition.createRegionTransition(RS_ZK_REQUEST_REGION_SPLIT,
83 region.getRegionName(), serverName, payload);
84 String node = ZKAssign.getNodeName(watcher, region.getEncodedName());
85 if (!ZKUtil.createEphemeralNodeAndWatch(watcher, node, rt.toByteArray())) {
86 throw new IOException("Failed create of ephemeral " + node);
87 }
88
89 } catch (KeeperException e) {
90 throw new IOException("Failed creating PENDING_SPLIT znode on "
91 + parent.getRegionInfo().getRegionNameAsString(), e);
92 }
93
94 }
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129 private int transitionSplittingNode(HRegionInfo parent, HRegionInfo a, HRegionInfo b,
130 ServerName serverName, SplitTransactionDetails std, final EventType beginState,
131 final EventType endState) throws IOException {
132 ZkSplitTransactionDetails zstd = (ZkSplitTransactionDetails) std;
133 byte[] payload = HRegionInfo.toDelimitedByteArray(a, b);
134 try {
135 return ZKAssign.transitionNode(watcher, parent, serverName, beginState, endState,
136 zstd.getZnodeVersion(), payload);
137 } catch (KeeperException e) {
138 throw new IOException(
139 "Failed transition of splitting node " + parent.getRegionNameAsString(), e);
140 }
141 }
142
143
144
145
146
147
148
149 @Override
150 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
151 justification="Intended")
152 public void waitForSplitTransaction(final RegionServerServices services, Region parent,
153 HRegionInfo hri_a, HRegionInfo hri_b, SplitTransactionDetails sptd) throws IOException {
154 ZkSplitTransactionDetails zstd = (ZkSplitTransactionDetails) sptd;
155
156
157
158
159 try {
160 int spins = 0;
161 Stat stat = new Stat();
162 ServerName expectedServer = coordinationManager.getServer().getServerName();
163 String node = parent.getRegionInfo().getEncodedName();
164 while (!(coordinationManager.getServer().isStopped() || services.isStopping())) {
165 if (spins % 5 == 0) {
166 LOG.debug("Still waiting for master to process " + "the pending_split for " + node);
167 SplitTransactionDetails temp = getDefaultDetails();
168 transitionSplittingNode(parent.getRegionInfo(), hri_a, hri_b, expectedServer, temp,
169 RS_ZK_REQUEST_REGION_SPLIT, RS_ZK_REQUEST_REGION_SPLIT);
170 }
171 Thread.sleep(100);
172 spins++;
173 if (spins > SPIN_WAIT_TIMEOUT) {
174 throw new IOException("Waiting time for Split Transaction exceeded for region: "
175 + parent.getRegionInfo().getRegionNameAsString());
176 }
177 byte[] data = ZKAssign.getDataNoWatch(watcher, node, stat);
178 if (data == null) {
179 throw new IOException("Data is null, splitting node " + node + " no longer exists");
180 }
181 RegionTransition rt = RegionTransition.parseFrom(data);
182 EventType et = rt.getEventType();
183 if (et == RS_ZK_REGION_SPLITTING) {
184 ServerName serverName = rt.getServerName();
185 if (!serverName.equals(expectedServer)) {
186 throw new IOException("Splitting node " + node + " is for " + serverName + ", not us "
187 + expectedServer);
188 }
189 byte[] payloadOfSplitting = rt.getPayload();
190 List<HRegionInfo> splittingRegions =
191 HRegionInfo.parseDelimitedFrom(payloadOfSplitting, 0, payloadOfSplitting.length);
192 assert splittingRegions.size() == 2;
193 HRegionInfo a = splittingRegions.get(0);
194 HRegionInfo b = splittingRegions.get(1);
195 if (!(hri_a.equals(a) && hri_b.equals(b))) {
196 throw new IOException("Splitting node " + node + " is for " + a + ", " + b
197 + ", not expected daughters: " + hri_a + ", " + hri_b);
198 }
199
200 zstd.setZnodeVersion(stat.getVersion());
201 return;
202 }
203 if (et != RS_ZK_REQUEST_REGION_SPLIT) {
204 throw new IOException("Splitting node " + node + " moved out of splitting to " + et);
205 }
206 }
207
208 throw new IOException("Server is " + (services.isStopping() ? "stopping" : "stopped"));
209 } catch (Exception e) {
210 if (e instanceof InterruptedException) {
211 Thread.currentThread().interrupt();
212 }
213 throw new IOException("Failed getting SPLITTING znode on " +
214 parent.getRegionInfo().getRegionNameAsString(), e);
215 }
216 }
217
218
219
220
221
222
223
224
225
226
227
228
229 @Override
230 public void completeSplitTransaction(final RegionServerServices services, Region a, Region b,
231 SplitTransactionDetails std, Region parent) throws IOException {
232 ZkSplitTransactionDetails zstd = (ZkSplitTransactionDetails) std;
233
234 if (coordinationManager.getServer() != null) {
235 try {
236 int newNodeVersion = transitionSplittingNode(parent.getRegionInfo(), a.getRegionInfo(),
237 b.getRegionInfo(), coordinationManager.getServer().getServerName(), zstd,
238 RS_ZK_REGION_SPLITTING, RS_ZK_REGION_SPLIT);
239 if (newNodeVersion == -1) {
240 throw new IOException("Notifying master of RS split failed for region: "
241 + parent.getRegionInfo().getRegionNameAsString());
242 }
243 zstd.setZnodeVersion(newNodeVersion);
244
245 int spins = 0;
246
247
248
249 do {
250 if (spins % 10 == 0) {
251 LOG.debug("Still waiting on the master to process the split for "
252 + parent.getRegionInfo().getEncodedName());
253 }
254 Thread.sleep(100);
255
256 zstd.setZnodeVersion(transitionSplittingNode(parent.getRegionInfo(), a.getRegionInfo(),
257 b.getRegionInfo(), coordinationManager.getServer().getServerName(), zstd,
258 RS_ZK_REGION_SPLIT, RS_ZK_REGION_SPLIT));
259 spins++;
260 } while (zstd.getZnodeVersion() != -1 && !coordinationManager.getServer().isStopped()
261 && !services.isStopping());
262 } catch (Exception e) {
263 if (e instanceof InterruptedException) {
264 Thread.currentThread().interrupt();
265 }
266 throw new IOException("Failed telling master about split", e);
267 }
268 }
269
270
271
272
273 }
274
275 @Override
276 public void clean(final HRegionInfo hri) {
277 try {
278
279 if (!ZKAssign.deleteNode(coordinationManager.getServer().getZooKeeper(),
280 hri.getEncodedName(), RS_ZK_REQUEST_REGION_SPLIT, coordinationManager.getServer()
281 .getServerName())) {
282 ZKAssign.deleteNode(coordinationManager.getServer().getZooKeeper(), hri.getEncodedName(),
283 RS_ZK_REGION_SPLITTING, coordinationManager.getServer().getServerName());
284 }
285 } catch (KeeperException.NoNodeException e) {
286 LOG.info("Failed cleanup zk node of " + hri.getRegionNameAsString(), e);
287 } catch (KeeperException e) {
288 coordinationManager.getServer().abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
289 }
290 }
291
292
293
294
295
296 public static class ZkSplitTransactionDetails implements
297 SplitTransactionCoordination.SplitTransactionDetails {
298 private int znodeVersion;
299
300 public ZkSplitTransactionDetails() {
301 }
302
303
304
305
306 public int getZnodeVersion() {
307 return znodeVersion;
308 }
309
310
311
312
313 public void setZnodeVersion(int znodeVersion) {
314 this.znodeVersion = znodeVersion;
315 }
316 }
317
318 @Override
319 public SplitTransactionDetails getDefaultDetails() {
320 ZkSplitTransactionDetails zstd = new ZkSplitTransactionDetails();
321 zstd.setZnodeVersion(-1);
322 return zstd;
323 }
324
325 @Override
326 public int processTransition(HRegionInfo p, HRegionInfo hri_a, HRegionInfo hri_b, ServerName sn,
327 SplitTransactionDetails std) throws IOException {
328 return transitionSplittingNode(p, hri_a, hri_b, sn, std, RS_ZK_REQUEST_REGION_SPLIT,
329 RS_ZK_REGION_SPLITTING);
330
331 }
332 }