View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master;
19  
20  import static org.apache.hadoop.hbase.zookeeper.ZKUtil.joinZNode;
21  import com.google.common.util.concurrent.ThreadFactoryBuilder;
22  import java.util.ArrayList;
23  import java.util.List;
24  import java.util.concurrent.ConcurrentNavigableMap;
25  import java.util.concurrent.ThreadFactory;
26  import org.apache.hadoop.hbase.HRegionLocation;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.exceptions.DeserializationException;
29  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
30  import org.apache.hadoop.hbase.types.CopyOnWriteArrayMap;
31  import org.apache.hadoop.hbase.util.RetryCounter;
32  import org.apache.hadoop.hbase.util.RetryCounterFactory;
33  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
34  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
35  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
36  import org.apache.zookeeper.KeeperException;
37  import org.slf4j.Logger;
38  import org.slf4j.LoggerFactory;
39  
40  /**
41   * A cache of meta region location metadata. Registers a listener on ZK to track changes to the
42   * meta table znodes. Clients are expected to retry if the meta information is stale. This class
43   * is thread-safe (a single instance of this class can be shared by multiple threads without race
44   * conditions).
45   */
46  @InterfaceAudience.Private
47  public class MetaRegionLocationCache extends ZooKeeperListener {
48  
49    private static final Logger LOG = LoggerFactory.getLogger(MetaRegionLocationCache.class);
50  
51    /**
52     * Maximum number of times we retry when ZK operation times out.
53     */
54    private static final int MAX_ZK_META_FETCH_RETRIES = 10;
55    /**
56     * Sleep interval ms between ZK operation retries.
57     */
58    private static final int SLEEP_INTERVAL_MS_BETWEEN_RETRIES = 1000;
59    private static final int SLEEP_INTERVAL_MS_MAX = 10000;
60    private final RetryCounterFactory retryCounterFactory =
61        new RetryCounterFactory(MAX_ZK_META_FETCH_RETRIES, SLEEP_INTERVAL_MS_BETWEEN_RETRIES);
62  
63    /**
64     * Cached meta region locations indexed by replica ID.
65     * CopyOnWriteArrayMap ensures synchronization during updates and a consistent snapshot during
66     * client requests. Even though CopyOnWriteArrayMap copies the data structure for every write,
67     * that should be OK since the size of the list is often small and mutations are not too often
68     * and we do not need to block client requests while mutations are in progress.
69     */
70    private final CopyOnWriteArrayMap<Integer, HRegionLocation> cachedMetaLocations;
71  
72    private enum ZNodeOpType {
73      INIT,
74      CREATED,
75      CHANGED,
76      DELETED
77    }
78  
79    public MetaRegionLocationCache(ZooKeeperWatcher zkWatcher) {
80      super(zkWatcher);
81      cachedMetaLocations = new CopyOnWriteArrayMap<>();
82      watcher.registerListener(this);
83      // Populate the initial snapshot of data from meta znodes.
84      // This is needed because stand-by masters can potentially start after the initial znode
85      // creation. It blocks forever until the initial meta locations are loaded from ZK and watchers
86      // are established. Subsequent updates are handled by the registered listener. Also, this runs
87      // in a separate thread in the background to not block master init.
88      ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).build();
89      final RetryCounterFactory retryFactory = new RetryCounterFactory(
90          Integer.MAX_VALUE, SLEEP_INTERVAL_MS_BETWEEN_RETRIES, SLEEP_INTERVAL_MS_MAX);
91      threadFactory.newThread(
92          new Runnable() {
93            @Override
94            public void run() {
95              MetaRegionLocationCache.this.loadMetaLocationsFromZk(
96                  retryFactory.create(), ZNodeOpType.INIT);
97            }
98          }).start();
99    }
100 
101   /**
102    * Populates the current snapshot of meta locations from ZK. If no meta znodes exist, it registers
103    * a watcher on base znode to check for any CREATE/DELETE events on the children.
104    * @param retryCounter controls the number of retries and sleep between retries.
105    */
106   private void loadMetaLocationsFromZk(RetryCounter retryCounter, ZNodeOpType opType) {
107     List<String> znodes = null;
108     while (retryCounter.shouldRetry()) {
109       try {
110         znodes = watcher.getMetaReplicaNodesAndWatchChildren();
111         break;
112       } catch (KeeperException ke) {
113         LOG.debug("Error populating initial meta locations", ke);
114         if (!retryCounter.shouldRetry()) {
115           // Retries exhausted and watchers not set. This is not a desirable state since the cache
116           // could remain stale forever. Propagate the exception.
117           watcher.abort("Error populating meta locations", ke);
118           return;
119         }
120         try {
121           retryCounter.sleepUntilNextRetry();
122         } catch (InterruptedException ie) {
123           LOG.error("Interrupted while loading meta locations from ZK", ie);
124           Thread.currentThread().interrupt();
125           return;
126         }
127       }
128     }
129     if (znodes == null || znodes.isEmpty()) {
130       // No meta znodes exist at this point but we registered a watcher on the base znode to listen
131       // for updates. They will be handled via nodeChildrenChanged().
132       return;
133     }
134     if (znodes.size() == cachedMetaLocations.size()) {
135       // No new meta znodes got added.
136       return;
137     }
138     for (String znode: znodes) {
139       String path = joinZNode(watcher.baseZNode, znode);
140       updateMetaLocation(path, opType);
141     }
142   }
143 
144   /**
145    * Gets the HRegionLocation for a given meta replica ID. Renews the watch on the znode for
146    * future updates.
147    * @param replicaId ReplicaID of the region.
148    * @return HRegionLocation for the meta replica.
149    * @throws KeeperException if there is any issue fetching/parsing the serialized data.
150    */
151   private HRegionLocation getMetaRegionLocation(int replicaId)
152       throws KeeperException {
153     RegionState metaRegionState;
154     try {
155       byte[] data = ZKUtil.getDataAndWatch(watcher,
156           watcher.getZNodeForReplica(replicaId));
157       metaRegionState = ProtobufUtil.parseMetaRegionStateFrom(data, replicaId);
158     } catch (DeserializationException e) {
159       throw ZKUtil.convert(e);
160     }
161     return new HRegionLocation(metaRegionState.getRegion(), metaRegionState.getServerName());
162   }
163 
164   private void updateMetaLocation(String path, ZNodeOpType opType) {
165     if (!isValidMetaZNode(path)) {
166       return;
167     }
168     LOG.debug("Updating meta znode for path {}: {}", path, opType.name());
169     int replicaId = watcher.getMetaReplicaIdFromPath(path);
170     RetryCounter retryCounter = retryCounterFactory.create();
171     HRegionLocation location = null;
172     while (retryCounter.shouldRetry()) {
173       try {
174         if (opType == ZNodeOpType.DELETED) {
175           if (!ZKUtil.watchAndCheckExists(watcher, path)) {
176             // The path does not exist, we've set the watcher and we can break for now.
177             break;
178           }
179           // If it is a transient error and the node appears right away, we fetch the
180           // latest meta state.
181         }
182         location = getMetaRegionLocation(replicaId);
183         break;
184       } catch (KeeperException e) {
185         LOG.debug("Error getting meta location for path {}", path, e);
186         if (!retryCounter.shouldRetry()) {
187           LOG.warn("Error getting meta location for path {}. Retries exhausted.", path, e);
188           break;
189         }
190         try {
191           retryCounter.sleepUntilNextRetry();
192         } catch (InterruptedException ie) {
193           Thread.currentThread().interrupt();
194           return;
195         }
196       }
197     }
198     if (location == null) {
199       cachedMetaLocations.remove(replicaId);
200       return;
201     }
202     cachedMetaLocations.put(replicaId, location);
203   }
204 
205   /**
206    * @return Optional list of HRegionLocations for meta replica(s), null if the cache is empty.
207    *
208    */
209   public List<HRegionLocation> getMetaRegionLocations() {
210     ConcurrentNavigableMap<Integer, HRegionLocation> snapshot =
211         cachedMetaLocations.tailMap(cachedMetaLocations.firstKey());
212     List<HRegionLocation> result = new ArrayList<>();
213     if (snapshot.isEmpty()) {
214       // This could be possible if the master has not successfully initialized yet or meta region
215       // is stuck in some weird state.
216       return result;
217     }
218     // Explicitly iterate instead of new ArrayList<>(snapshot.values()) because the underlying
219     // ArrayValueCollection does not implement toArray().
220     for (HRegionLocation location: snapshot.values()) {
221       result.add(location);
222     }
223     return result;
224   }
225 
226   /**
227    * Helper to check if the given 'path' corresponds to a meta znode. This listener is only
228    * interested in changes to meta znodes.
229    */
230   private boolean isValidMetaZNode(String path) {
231     return watcher.isAnyMetaReplicaZNode(path);
232   }
233 
234   @Override
235   public void nodeCreated(String path) {
236     updateMetaLocation(path, ZNodeOpType.CREATED);
237   }
238 
239   @Override
240   public void nodeDeleted(String path) {
241     updateMetaLocation(path, ZNodeOpType.DELETED);
242   }
243 
244   @Override
245   public void nodeDataChanged(String path) {
246     updateMetaLocation(path, ZNodeOpType.CHANGED);
247   }
248 
249   @Override
250   public void nodeChildrenChanged(String path) {
251     if (!path.equals(watcher.baseZNode)) {
252       return;
253     }
254     loadMetaLocationsFromZk(retryCounterFactory.create(), ZNodeOpType.CHANGED);
255   }
256 }