View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.coordination;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.util.Set;
25  import java.util.concurrent.ConcurrentMap;
26  
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.ServerName;
30  import org.apache.hadoop.hbase.master.MasterServices;
31  import org.apache.hadoop.hbase.master.SplitLogManager.ResubmitDirective;
32  import org.apache.hadoop.hbase.master.SplitLogManager.Task;
33  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
34  
35  /**
36   * Coordination for SplitLogManager. It creates and works with tasks for split log operations<BR>
37   * Manager prepares task by calling {@link #prepareTask} and submit it by
38   * {@link #submitTask(String)}. After that it periodically check the number of remaining tasks by
39   * {@link #remainingTasksInCoordination()} and waits until it become zero.
40   * <P>
41   * Methods required for task life circle: <BR>
42   * {@link #markRegionsRecovering(ServerName, Set)} mark regions for log replaying. Used by
43   * {@link org.apache.hadoop.hbase.master.MasterFileSystem} <BR>
44   * {@link #removeRecoveringRegions(Set, Boolean)} make regions cleanup that previous were marked as
45   * recovering. Called after all tasks processed <BR>
46   * {@link #removeStaleRecoveringRegions(Set)} remove stale recovering. called by
47   * {@link org.apache.hadoop.hbase.master.MasterFileSystem} after Active Master is initialized <BR>
48   * {@link #getLastRecoveryTime()} required for garbage collector and should indicate when the last
49   * recovery has been made<BR>
50   * {@link #checkTaskStillAvailable(String)} Check that task is still there <BR>
51   * {@link #checkTasks()} check for unassigned tasks and resubmit them
52   */
53  @InterfaceAudience.Private
54  public interface SplitLogManagerCoordination {
55  
56    /**
57     * Detail class that shares data between coordination and split log manager
58     */
59    public static class SplitLogManagerDetails {
60      final private ConcurrentMap<String, Task> tasks;
61      final private MasterServices master;
62      final private Set<String> failedDeletions;
63      final private ServerName serverName;
64  
65      public SplitLogManagerDetails(ConcurrentMap<String, Task> tasks, MasterServices master,
66          Set<String> failedDeletions, ServerName serverName) {
67        this.tasks = tasks;
68        this.master = master;
69        this.failedDeletions = failedDeletions;
70        this.serverName = serverName;
71      }
72  
73      /**
74       * @return the master value
75       */
76      public MasterServices getMaster() {
77        return master;
78      }
79  
80      /**
81       * @return map of tasks
82       */
83      public ConcurrentMap<String, Task> getTasks() {
84        return tasks;
85      }
86  
87      /**
88       * @return a set of failed deletions
89       */
90      public Set<String> getFailedDeletions() {
91        return failedDeletions;
92      }
93  
94      /**
95       * @return server name
96       */
97      public ServerName getServerName() {
98        return serverName;
99      }
100   }
101 
102   /**
103    * Provide the configuration from the SplitLogManager
104    */
105   void setDetails(SplitLogManagerDetails details);
106 
107   /**
108    * Returns the configuration that was provided previously
109    */
110   SplitLogManagerDetails getDetails();
111 
112   /**
113    * Prepare the new task
114    * @param taskName name of the task
115    * @return the task id
116    */
117   String prepareTask(String taskName);
118 
119   /**
120    * Mark regions in recovering state for distributed log replay
121    * @param serverName server name
122    * @param userRegions set of regions to be marked
123    * @throws IOException in case of failure
124    * @throws InterruptedIOException
125    */
126   void markRegionsRecovering(final ServerName serverName, Set<HRegionInfo> userRegions)
127       throws IOException, InterruptedIOException;
128 
129   /**
130    * tells Coordination that it should check for new tasks
131    */
132   void checkTasks();
133 
134   /**
135    * It removes recovering regions from Coordination
136    * @param serverNames servers which are just recovered
137    * @param isMetaRecovery whether current recovery is for the meta region on
138    *          <code>serverNames</code>
139    */
140   void removeRecoveringRegions(Set<String> serverNames, Boolean isMetaRecovery) throws IOException;
141 
142   /**
143    * Return the number of remaining tasks
144    */
145   int remainingTasksInCoordination();
146 
147   /**
148    * Check that the task is still there
149    * @param task node to check
150    */
151   void checkTaskStillAvailable(String task);
152 
153   /**
154    * Change the recovery mode.
155    * @param b the recovery mode state
156    * @throws InterruptedIOException
157    * @throws IOException in case of failure
158    */
159   void setRecoveryMode(boolean b) throws InterruptedIOException, IOException;
160 
161   /**
162    * Removes known stale servers
163    * @param knownServers set of previously failed servers
164    * @throws IOException in case of failure
165    * @throws InterruptedIOException
166    */
167   void removeStaleRecoveringRegions(Set<String> knownServers) throws IOException,
168       InterruptedIOException;
169 
170   /**
171    * Resubmit the task in case if found unassigned or failed
172    * @param taskName path related to task
173    * @param task to resubmit
174    * @param force whether it should be forced
175    * @return whether it was successful
176    */
177 
178   boolean resubmitTask(String taskName, Task task, ResubmitDirective force);
179 
180   /**
181    * @param taskName to be submitted
182    */
183   void submitTask(String taskName);
184 
185   /**
186    * @param taskName to be removed
187    */
188   void deleteTask(String taskName);
189 
190   /**
191    * @return shows whether the log recovery mode is in replaying state
192    */
193   boolean isReplaying();
194 
195   /**
196    * @return shows whether the log recovery mode is in splitting state
197    */
198   boolean isSplitting();
199 
200   /**
201    * @return the time of last attempt to recover
202    */
203   long getLastRecoveryTime();
204 
205   /**
206    * Temporary function, mostly for UTs. In the regular code isReplaying or isSplitting should be
207    * used.
208    * @return the current log recovery mode.
209    */
210   RecoveryMode getRecoveryMode();
211 
212   /**
213    * Support method to init constants such as timeout. Mostly required for UTs.
214    * @throws IOException
215    */
216   void init() throws IOException;
217 }