1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.util;
20
21 import java.io.FileNotFoundException;
22 import java.io.IOException;
23 import java.io.InterruptedIOException;
24 import java.lang.reflect.Method;
25 import java.net.InetSocketAddress;
26 import java.net.URI;
27 import java.util.HashSet;
28 import java.util.Map;
29 import java.util.Set;
30 import java.util.Collection;
31
32 import com.google.common.collect.Sets;
33 import org.apache.commons.logging.Log;
34 import org.apache.commons.logging.LogFactory;
35 import org.apache.hadoop.hbase.classification.InterfaceAudience;
36 import org.apache.hadoop.hbase.classification.InterfaceStability;
37 import org.apache.hadoop.conf.Configuration;
38 import org.apache.hadoop.fs.FileSystem;
39 import org.apache.hadoop.fs.Path;
40 import org.apache.hadoop.hdfs.DistributedFileSystem;
41 import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
42
43
44
45
46
47 @InterfaceAudience.Private
48 @InterfaceStability.Evolving
49 public class FSHDFSUtils extends FSUtils {
50 private static final Log LOG = LogFactory.getLog(FSHDFSUtils.class);
51 private static Class dfsUtilClazz;
52 private static Method getNNAddressesMethod;
53
54
55
56
57
58
59 private static Set<InetSocketAddress> getNNAddresses(DistributedFileSystem fs,
60 Configuration conf) {
61 Set<InetSocketAddress> addresses = new HashSet<InetSocketAddress>();
62 String serviceName = fs.getCanonicalServiceName();
63
64 if (serviceName.startsWith("ha-hdfs")) {
65 try {
66 if (dfsUtilClazz == null) {
67 dfsUtilClazz = Class.forName("org.apache.hadoop.hdfs.DFSUtil");
68 }
69 if (getNNAddressesMethod == null) {
70 try {
71
72
73 getNNAddressesMethod =
74 dfsUtilClazz.getMethod("getNNServiceRpcAddressesForCluster", Configuration.class);
75 } catch (NoSuchMethodException e) {
76
77 getNNAddressesMethod =
78 dfsUtilClazz.getMethod("getNNServiceRpcAddresses", Configuration.class);
79 }
80
81 }
82
83 Map<String, Map<String, InetSocketAddress>> addressMap =
84 (Map<String, Map<String, InetSocketAddress>>) getNNAddressesMethod
85 .invoke(null, conf);
86 String nameService = serviceName.substring(serviceName.indexOf(":") + 1);
87 if (addressMap.containsKey(nameService)) {
88 Map<String, InetSocketAddress> nnMap = addressMap.get(nameService);
89 for (Map.Entry<String, InetSocketAddress> e2 : nnMap.entrySet()) {
90 InetSocketAddress addr = e2.getValue();
91 addresses.add(addr);
92 }
93 }
94 } catch (Exception e) {
95 LOG.warn("DFSUtil.getNNServiceRpcAddresses failed. serviceName=" + serviceName, e);
96 }
97 } else {
98 URI uri = fs.getUri();
99 int port = uri.getPort();
100 if (port < 0) {
101 int idx = serviceName.indexOf(':');
102 port = Integer.parseInt(serviceName.substring(idx+1));
103 }
104 InetSocketAddress addr = new InetSocketAddress(uri.getHost(), port);
105 addresses.add(addr);
106 }
107
108 return addresses;
109 }
110
111
112
113
114
115
116
117 public static boolean isSameHdfs(Configuration conf, FileSystem srcFs, FileSystem desFs) {
118
119
120 String srcServiceName = srcFs.getCanonicalServiceName();
121 String desServiceName = desFs.getCanonicalServiceName();
122
123 if (srcServiceName == null || desServiceName == null) {
124 return false;
125 }
126 if (srcServiceName.equals(desServiceName)) {
127 return true;
128 }
129 if (srcServiceName.startsWith("ha-hdfs") && desServiceName.startsWith("ha-hdfs")) {
130 Collection<String> internalNameServices =
131 conf.getTrimmedStringCollection("dfs.internal.nameservices");
132 if (!internalNameServices.isEmpty()) {
133 if (internalNameServices.contains(srcServiceName.split(":")[1])) {
134 return true;
135 } else {
136 return false;
137 }
138 }
139 }
140 if (srcFs instanceof DistributedFileSystem && desFs instanceof DistributedFileSystem) {
141
142
143
144 Set<InetSocketAddress> srcAddrs = getNNAddresses((DistributedFileSystem) srcFs, conf);
145 Set<InetSocketAddress> desAddrs = getNNAddresses((DistributedFileSystem) desFs, conf);
146 if (Sets.intersection(srcAddrs, desAddrs).size() > 0) {
147 return true;
148 }
149 }
150
151 return false;
152 }
153
154
155
156
157 @Override
158 public void recoverFileLease(final FileSystem fs, final Path p,
159 Configuration conf, CancelableProgressable reporter)
160 throws IOException {
161
162 if (!(fs instanceof DistributedFileSystem)) return;
163 recoverDFSFileLease((DistributedFileSystem)fs, p, conf, reporter);
164 }
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192 boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p,
193 final Configuration conf, final CancelableProgressable reporter)
194 throws IOException {
195 LOG.info("Recover lease on dfs file " + p);
196 long startWaiting = EnvironmentEdgeManager.currentTime();
197
198
199
200 long recoveryTimeout = conf.getInt("hbase.lease.recovery.timeout", 900000) + startWaiting;
201
202 long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
203
204
205
206
207
208 long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
209
210 Method isFileClosedMeth = null;
211
212 boolean findIsFileClosedMeth = true;
213 boolean recovered = false;
214
215 for (int nbAttempt = 0; !recovered; nbAttempt++) {
216 recovered = recoverLease(dfs, nbAttempt, p, startWaiting);
217 if (recovered) break;
218 checkIfCancelled(reporter);
219 if (checkIfTimedout(conf, recoveryTimeout, nbAttempt, p, startWaiting)) break;
220 try {
221
222 if (nbAttempt == 0) {
223 Thread.sleep(firstPause);
224 } else {
225
226
227 long localStartWaiting = EnvironmentEdgeManager.currentTime();
228 while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
229 subsequentPauseBase * nbAttempt) {
230 Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
231 if (findIsFileClosedMeth) {
232 try {
233 isFileClosedMeth = dfs.getClass().getMethod("isFileClosed",
234 new Class[]{ Path.class });
235 } catch (NoSuchMethodException nsme) {
236 LOG.debug("isFileClosed not available");
237 } finally {
238 findIsFileClosedMeth = false;
239 }
240 }
241 if (isFileClosedMeth != null && isFileClosed(dfs, isFileClosedMeth, p)) {
242 recovered = true;
243 break;
244 }
245 checkIfCancelled(reporter);
246 }
247 }
248 } catch (InterruptedException ie) {
249 InterruptedIOException iioe = new InterruptedIOException();
250 iioe.initCause(ie);
251 throw iioe;
252 }
253 }
254 return recovered;
255 }
256
257 boolean checkIfTimedout(final Configuration conf, final long recoveryTimeout,
258 final int nbAttempt, final Path p, final long startWaiting) {
259 if (recoveryTimeout < EnvironmentEdgeManager.currentTime()) {
260 LOG.warn("Cannot recoverLease after trying for " +
261 conf.getInt("hbase.lease.recovery.timeout", 900000) +
262 "ms (hbase.lease.recovery.timeout); continuing, but may be DATALOSS!!!; " +
263 getLogMessageDetail(nbAttempt, p, startWaiting));
264 return true;
265 }
266 return false;
267 }
268
269
270
271
272
273
274
275
276
277
278 boolean recoverLease(final DistributedFileSystem dfs, final int nbAttempt, final Path p,
279 final long startWaiting)
280 throws FileNotFoundException {
281 boolean recovered = false;
282 try {
283 recovered = dfs.recoverLease(p);
284 LOG.info((recovered? "Recovered lease, ": "Failed to recover lease, ") +
285 getLogMessageDetail(nbAttempt, p, startWaiting));
286 } catch (IOException e) {
287 if (e instanceof LeaseExpiredException && e.getMessage().contains("File does not exist")) {
288
289 throw new FileNotFoundException("The given WAL wasn't found at " + p);
290 } else if (e instanceof FileNotFoundException) {
291 throw (FileNotFoundException)e;
292 }
293 LOG.warn(getLogMessageDetail(nbAttempt, p, startWaiting), e);
294 }
295 return recovered;
296 }
297
298
299
300
301
302
303
304 private String getLogMessageDetail(final int nbAttempt, final Path p, final long startWaiting) {
305 return "attempt=" + nbAttempt + " on file=" + p + " after " +
306 (EnvironmentEdgeManager.currentTime() - startWaiting) + "ms";
307 }
308
309
310
311
312
313
314
315
316 private boolean isFileClosed(final DistributedFileSystem dfs, final Method m, final Path p) {
317 try {
318 return (Boolean) m.invoke(dfs, p);
319 } catch (SecurityException e) {
320 LOG.warn("No access", e);
321 } catch (Exception e) {
322 LOG.warn("Failed invocation for " + p.toString(), e);
323 }
324 return false;
325 }
326
327 void checkIfCancelled(final CancelableProgressable reporter)
328 throws InterruptedIOException {
329 if (reporter == null) return;
330 if (!reporter.progress()) throw new InterruptedIOException("Operation cancelled");
331 }
332 }