View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import com.google.common.base.Preconditions;
22  
23  import java.util.ArrayList;
24  import java.util.Collections;
25  import java.util.Comparator;
26  import java.util.Date;
27  import java.util.HashMap;
28  import java.util.HashSet;
29  import java.util.Iterator;
30  import java.util.List;
31  import java.util.Map;
32  import java.util.Set;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.hbase.ServerName;
38  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
39  import org.apache.hadoop.hbase.util.Pair;
40  
41  /**
42   * Class to hold dead servers list and utility querying dead server list.
43   * On znode expiration, servers are added here.
44   */
45  @InterfaceAudience.Private
46  public class DeadServer {
47    private static final Log LOG = LogFactory.getLog(DeadServer.class);
48  
49    /**
50     * Set of known dead servers.  On znode expiration, servers are added here.
51     * This is needed in case of a network partitioning where the server's lease
52     * expires, but the server is still running. After the network is healed,
53     * and it's server logs are recovered, it will be told to call server startup
54     * because by then, its regions have probably been reassigned.
55     */
56    private final Map<ServerName, Long> deadServers = new HashMap<ServerName, Long>();
57  
58    /**
59     * Set of dead servers currently being processed
60     */
61    private final Set<ServerName> processingServers = new HashSet<ServerName>();
62  
63    /**
64     * A dead server that comes back alive has a different start code. The new start code should be
65     *  greater than the old one, but we don't take this into account in this method.
66     *
67     * @param newServerName Servername as either <code>host:port</code> or
68     *                      <code>host,port,startcode</code>.
69     * @return true if this server was dead before and coming back alive again
70     */
71    public synchronized boolean cleanPreviousInstance(final ServerName newServerName) {
72      Iterator<ServerName> it = deadServers.keySet().iterator();
73      while (it.hasNext()) {
74        ServerName sn = it.next();
75        if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
76          // remove from deadServers
77          it.remove();
78          // remove from processingServers
79          boolean removed = processingServers.remove(sn);
80          if (removed) {
81            LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
82          }
83          return true;
84        }
85      }
86  
87      return false;
88    }
89  
90    /**
91     * @param serverName server name.
92     * @return true if this server is on the dead servers list false otherwise
93     */
94    public synchronized boolean isDeadServer(final ServerName serverName) {
95      return deadServers.containsKey(serverName);
96    }
97  
98    /**
99     * @param serverName server name.
100    * @return true if this server is on the processing servers list false otherwise
101    */
102   public synchronized boolean isProcessingServer(final ServerName serverName) {
103     return processingServers.contains(serverName);
104   }
105 
106   /**
107    * Checks if there are currently any dead servers being processed by the
108    * master.  Returns true if at least one region server is currently being
109    * processed as dead.
110    *
111    * @return true if any RS are being processed as dead
112    */
113   public synchronized boolean areDeadServersInProgress() {
114     return !processingServers.isEmpty();
115   }
116 
117   public synchronized Set<ServerName> copyServerNames() {
118     Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
119     clone.addAll(deadServers.keySet());
120     return clone;
121   }
122 
123   /**
124    * Adds the server to the dead server list if it's not there already.
125    * @param sn the server name
126    */
127   public synchronized void add(ServerName sn) {
128     if (!deadServers.containsKey(sn)){
129       deadServers.put(sn, EnvironmentEdgeManager.currentTime());
130     }
131     boolean added = processingServers.add(sn);
132     if (LOG.isDebugEnabled() && added) {
133       LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
134     }
135   }
136 
137   /**
138    * Notify that we started processing this dead server.
139    * @param sn ServerName for the dead server.
140    */
141   public synchronized void notifyServer(ServerName sn) {
142     boolean added = processingServers.add(sn);
143     if (LOG.isDebugEnabled()) {
144       if (added) {
145         LOG.debug("Added " + sn + "; numProcessing=" + processingServers.size());
146       }
147       LOG.debug("Started processing " + sn + "; numProcessing=" + processingServers.size());
148     }
149   }
150 
151   /**
152    * Complete processing for this dead server.
153    * @param sn ServerName for the dead server.
154    */
155   public synchronized void finish(ServerName sn) {
156     boolean removed = processingServers.remove(sn);
157     if (LOG.isDebugEnabled()) {
158       LOG.debug("Finished processing " + sn + "; numProcessing=" + processingServers.size());
159       if (removed) {
160         LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
161       }
162     }
163   }
164 
165   public synchronized int size() {
166     return deadServers.size();
167   }
168 
169   public synchronized boolean isEmpty() {
170     return deadServers.isEmpty();
171   }
172 
173   public synchronized void cleanAllPreviousInstances(final ServerName newServerName) {
174     Iterator<ServerName> it = deadServers.keySet().iterator();
175     while (it.hasNext()) {
176       ServerName sn = it.next();
177       if (ServerName.isSameHostnameAndPort(sn, newServerName)) {
178         // remove from deadServers
179         it.remove();
180         // remove from processingServers
181         boolean removed = processingServers.remove(sn);
182         if (removed) {
183           LOG.debug("Removed " + sn + " ; numProcessing=" + processingServers.size());
184         }
185       }
186     }
187   }
188 
189   @Override
190   public synchronized String toString() {
191     // Display unified set of servers from both maps
192     Set<ServerName> servers = new HashSet<ServerName>();
193     servers.addAll(deadServers.keySet());
194     servers.addAll(processingServers);
195     StringBuilder sb = new StringBuilder();
196     for (ServerName sn : servers) {
197       if (sb.length() > 0) {
198         sb.append(", ");
199       }
200       sb.append(sn.toString());
201       // Star entries that are being processed
202       if (processingServers.contains(sn)) {
203         sb.append("*");
204       }
205     }
206     sb.append(" (numProcessing=");
207     sb.append(processingServers.size());
208     sb.append(')');
209     return sb.toString();
210   }
211 
212   /**
213    * Extract all the servers dead since a given time, and sort them.
214    * @param ts the time, 0 for all
215    * @return a sorted array list, by death time, lowest values first.
216    */
217   public synchronized List<Pair<ServerName, Long>> copyDeadServersSince(long ts){
218     List<Pair<ServerName, Long>> res =  new ArrayList<Pair<ServerName, Long>>(size());
219 
220     for (Map.Entry<ServerName, Long> entry:deadServers.entrySet()){
221       if (entry.getValue() >= ts){
222         res.add(new Pair<ServerName, Long>(entry.getKey(), entry.getValue()));
223       }
224     }
225 
226     Collections.sort(res, ServerNameDeathDateComparator);
227     return res;
228   }
229   
230   /**
231    * Get the time when a server died
232    * @param deadServerName the dead server name
233    * @return the date when the server died 
234    */
235   public synchronized Date getTimeOfDeath(final ServerName deadServerName){
236     Long time = deadServers.get(deadServerName);
237     return time == null ? null : new Date(time);
238   }
239 
240   private static Comparator<Pair<ServerName, Long>> ServerNameDeathDateComparator =
241       new Comparator<Pair<ServerName, Long>>(){
242 
243     @Override
244     public int compare(Pair<ServerName, Long> o1, Pair<ServerName, Long> o2) {
245       return o1.getSecond().compareTo(o2.getSecond());
246     }
247   };
248 
249   /**
250    * remove the specified dead server
251    * @param deadServerName the dead server name
252    * @return true if this server was removed
253    */
254   public synchronized boolean removeDeadServer(final ServerName deadServerName) {
255     Preconditions.checkState(!processingServers.contains(deadServerName),
256       "Asked to remove server still in processingServers set " + deadServerName +
257           " (numProcessing=" + processingServers.size() + ")");
258     if (deadServers.remove(deadServerName) == null) {
259       return false;
260     }
261     return true;
262   }
263 
264 }