View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver.snapshot;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.concurrent.Callable;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.hbase.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.classification.InterfaceStability;
28  import org.apache.hadoop.hbase.client.IsolationLevel;
29  import org.apache.hadoop.hbase.errorhandling.ForeignException;
30  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
31  import org.apache.hadoop.hbase.procedure.ProcedureMember;
32  import org.apache.hadoop.hbase.procedure.Subprocedure;
33  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.Region;
36  import org.apache.hadoop.hbase.regionserver.Region.FlushResult;
37  import org.apache.hadoop.hbase.regionserver.Region.Operation;
38  import org.apache.hadoop.hbase.regionserver.snapshot.RegionServerSnapshotManager.SnapshotSubprocedurePool;
39  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
40  
41  /**
42   * This online snapshot implementation uses the distributed procedure framework to force a
43   * store flush and then records the hfiles.  Its enter stage does nothing.  Its leave stage then
44   * flushes the memstore, builds the region server's snapshot manifest from its hfiles list, and
45   * copies .regioninfos into the snapshot working directory.  At the master side, there is an atomic
46   * rename of the working dir into the proper snapshot directory.
47   */
48  @InterfaceAudience.Private
49  @InterfaceStability.Unstable
50  public class FlushSnapshotSubprocedure extends Subprocedure {
51    private static final Log LOG = LogFactory.getLog(FlushSnapshotSubprocedure.class);
52  
53    private final List<Region> regions;
54    private final SnapshotDescription snapshot;
55    private final SnapshotSubprocedurePool taskManager;
56    private boolean snapshotSkipFlush = false;
57  
58    // the maximum number of attempts we flush
59    final static int MAX_RETRIES = 3;
60  
61    public FlushSnapshotSubprocedure(ProcedureMember member,
62        ForeignExceptionDispatcher errorListener, long wakeFrequency, long timeout,
63        List<Region> regions, SnapshotDescription snapshot,
64        SnapshotSubprocedurePool taskManager) {
65      super(member, snapshot.getName(), errorListener, wakeFrequency, timeout);
66      this.snapshot = snapshot;
67  
68      if (this.snapshot.getType() == SnapshotDescription.Type.SKIPFLUSH) {
69        snapshotSkipFlush = true;
70      }
71      this.regions = regions;
72      this.taskManager = taskManager;
73    }
74  
75    /**
76     * Callable for adding files to snapshot manifest working dir.  Ready for multithreading.
77     */
78    public static class RegionSnapshotTask implements Callable<Void> {
79      private Region region;
80      private boolean skipFlush;
81      private ForeignExceptionDispatcher monitor;
82      private SnapshotDescription snapshotDesc;
83  
84      public RegionSnapshotTask(Region region, SnapshotDescription snapshotDesc,
85          boolean skipFlush, ForeignExceptionDispatcher monitor) {
86        this.region = region;
87        this.skipFlush = skipFlush;
88        this.monitor = monitor;
89        this.snapshotDesc = snapshotDesc;
90      }
91  
92      @Override
93      public Void call() throws Exception {
94        // Taking the region read lock prevents the individual region from being closed while a
95        // snapshot is in progress.  This is helpful but not sufficient for preventing races with
96        // snapshots that involve multiple regions and regionservers.  It is still possible to have
97        // an interleaving such that globally regions are missing, so we still need the verification
98        // step.
99        LOG.debug("Starting snapshot operation on " + region);
100       region.startRegionOperation(Operation.SNAPSHOT);
101       try {
102         if (skipFlush) {
103         /*
104          * This is to take an online-snapshot without force a coordinated flush to prevent pause
105          * The snapshot type is defined inside the snapshot description. FlushSnapshotSubprocedure
106          * should be renamed to distributedSnapshotSubprocedure, and the flush() behavior can be
107          * turned on/off based on the flush type.
108          * To minimized the code change, class name is not changed.
109          */
110           LOG.debug("take snapshot without flush memstore first");
111         } else {
112           LOG.debug("Flush Snapshotting region " + region.toString() + " started...");
113           boolean succeeded = false;
114           long readPt = region.getReadpoint(IsolationLevel.READ_COMMITTED);
115           for (int i = 0; i < MAX_RETRIES; i++) {
116             FlushResult res = region.flush(true);
117             if (res.getResult() == FlushResult.Result.CANNOT_FLUSH) {
118               // CANNOT_FLUSH may mean that a flush is already on-going
119               // we need to wait for that flush to complete
120               region.waitForFlushes();
121               if (region.getMaxFlushedSeqId() >= readPt) {
122                 // writes at the start of the snapshot have been persisted
123                 succeeded = true;
124                 break;
125               }
126             } else {
127               succeeded = true;
128               break;
129             }
130           }
131           if (!succeeded) {
132             throw new IOException("Unable to complete flush after " + MAX_RETRIES + " attempts");
133           }
134         }
135         ((HRegion)region).addRegionToSnapshot(snapshotDesc, monitor);
136         if (skipFlush) {
137           LOG.debug("... SkipFlush Snapshotting region " + region.toString() + " completed.");
138         } else {
139           LOG.debug("... Flush Snapshotting region " + region.toString() + " completed.");
140         }
141       } finally {
142         LOG.debug("Closing snapshot operation on " + region);
143         region.closeRegionOperation(Operation.SNAPSHOT);
144       }
145       return null;
146     }
147   }
148 
149   private void flushSnapshot() throws ForeignException {
150     if (regions.isEmpty()) {
151       // No regions on this RS, we are basically done.
152       return;
153     }
154 
155     monitor.rethrowException();
156 
157     // assert that the taskManager is empty.
158     if (taskManager.hasTasks()) {
159       throw new IllegalStateException("Attempting to take snapshot "
160           + ClientSnapshotDescriptionUtils.toString(snapshot)
161           + " but we currently have outstanding tasks");
162     }
163 
164     // Add all hfiles already existing in region.
165     for (Region region : regions) {
166       // submit one task per region for parallelize by region.
167       taskManager.submitTask(new RegionSnapshotTask(region, snapshot, snapshotSkipFlush, monitor));
168       monitor.rethrowException();
169     }
170 
171     // wait for everything to complete.
172     LOG.debug("Flush Snapshot Tasks submitted for " + regions.size() + " regions");
173     try {
174       taskManager.waitForOutstandingTasks();
175     } catch (InterruptedException e) {
176       LOG.error("got interrupted exception for " + getMemberName());
177       throw new ForeignException(getMemberName(), e);
178     }
179   }
180 
181   /**
182    * do nothing, core of snapshot is executed in {@link #insideBarrier} step.
183    */
184   @Override
185   public void acquireBarrier() throws ForeignException {
186     // NO OP
187   }
188 
189   /**
190    * do a flush snapshot of every region on this rs from the target table.
191    */
192   @Override
193   public byte[] insideBarrier() throws ForeignException {
194     flushSnapshot();
195     return new byte[0];
196   }
197 
198   /**
199    * Cancel threads if they haven't finished.
200    */
201   @Override
202   public void cleanup(Exception e) {
203     LOG.info("Aborting all online FLUSH snapshot subprocedure task threads for '"
204         + snapshot.getName() + "' due to error", e);
205     try {
206       taskManager.cancelTasks();
207     } catch (InterruptedException e1) {
208       Thread.currentThread().interrupt();
209     }
210   }
211 
212   /**
213    * Hooray!
214    */
215   public void releaseBarrier() {
216     // NO OP
217   }
218 
219 }