1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.master.cleaner;
20
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Set;
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.conf.Configuration;
27 import org.apache.hadoop.hbase.Abortable;
28 import org.apache.hadoop.hbase.ScheduledChore;
29 import org.apache.hadoop.hbase.Stoppable;
30 import org.apache.hadoop.hbase.classification.InterfaceAudience;
31 import org.apache.hadoop.hbase.replication.ReplicationException;
32 import org.apache.hadoop.hbase.replication.ReplicationFactory;
33 import org.apache.hadoop.hbase.replication.ReplicationQueuesZKImpl;
34 import org.apache.hadoop.hbase.replication.ReplicationTracker;
35 import org.apache.hadoop.hbase.util.Bytes;
36 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
37 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
38 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
39 import org.apache.zookeeper.KeeperException;
40 import org.apache.zookeeper.data.Stat;
41
42
43
44
45 @InterfaceAudience.Private
46 public class ReplicationZKLockCleanerChore extends ScheduledChore {
47 private static final Log LOG = LogFactory.getLog(ReplicationZKLockCleanerChore.class);
48 private ZooKeeperWatcher zk;
49 private ReplicationTracker tracker;
50 private long ttl;
51 private ReplicationQueuesZKImpl queues;
52
53
54 private static final long DEFAULT_TTL = 60 * 10 * 1000;
55
56 public static final String TTL_CONFIG_KEY = "hbase.replication.zk.deadrs.lock.ttl";
57
58 public ReplicationZKLockCleanerChore(Stoppable stopper, Abortable abortable, int period,
59 ZooKeeperWatcher zk, Configuration conf) throws Exception {
60 super("ReplicationZKLockCleanerChore", stopper, period);
61
62 this.zk = zk;
63 this.ttl = conf.getLong(TTL_CONFIG_KEY, DEFAULT_TTL);
64 tracker = ReplicationFactory.getReplicationTracker(zk,
65 ReplicationFactory.getReplicationPeers(zk, conf, abortable), conf, abortable, stopper);
66 queues = new ReplicationQueuesZKImpl(zk, conf, abortable);
67 }
68
69 @Override protected void chore() {
70 try {
71 List<String> regionServers = tracker.getListOfRegionServers();
72 if (regionServers == null) {
73 return;
74 }
75 Set<String> rsSet = new HashSet<String>(regionServers);
76 List<String> replicators = queues.getListOfReplicators();
77 if (replicators == null || replicators.isEmpty()) {
78 return;
79 }
80 for (String replicator: replicators) {
81 try {
82 String lockNode = queues.getLockZNode(replicator);
83 byte[] data = ZKUtil.getData(zk, lockNode);
84 if (data == null) {
85 continue;
86 }
87 String rsServerNameZnode = Bytes.toString(data);
88 String[] array = rsServerNameZnode.split("/");
89 String znode = array[array.length - 1];
90 if (!rsSet.contains(znode)) {
91 Stat s = zk.getRecoverableZooKeeper().exists(lockNode, false);
92 if (s != null && EnvironmentEdgeManager.currentTime() - s.getMtime() > this.ttl) {
93
94 ZKUtil.deleteNode(zk, lockNode);
95 LOG.info("Remove lock acquired by dead RS: " + lockNode + " by " + znode);
96 }
97 continue;
98 }
99 LOG.info("Skip lock acquired by live RS: " + lockNode + " by " + znode);
100
101 } catch (KeeperException.NoNodeException ignore) {
102 } catch (InterruptedException e) {
103 LOG.warn("zk operation interrupted", e);
104 Thread.currentThread().interrupt();
105 }
106 }
107 } catch (KeeperException e) {
108 LOG.warn("zk operation interrupted", e);
109 } catch (ReplicationException e2) {
110 LOG.warn("replication exception", e2);
111 }
112 }
113 }