View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.procedure2.store.wal;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collections;
26  import java.util.Comparator;
27  import java.util.HashSet;
28  import java.util.Iterator;
29  import java.util.LinkedList;
30  import java.util.Set;
31  import java.util.concurrent.LinkedTransferQueue;
32  import java.util.concurrent.TimeUnit;
33  import java.util.concurrent.atomic.AtomicBoolean;
34  import java.util.concurrent.atomic.AtomicLong;
35  import java.util.concurrent.atomic.AtomicReference;
36  import java.util.concurrent.locks.Condition;
37  import java.util.concurrent.locks.ReentrantLock;
38  
39  import org.apache.commons.collections.buffer.CircularFifoBuffer;
40  import org.apache.commons.logging.Log;
41  import org.apache.commons.logging.LogFactory;
42  import org.apache.hadoop.conf.Configuration;
43  import org.apache.hadoop.fs.FSDataOutputStream;
44  import org.apache.hadoop.fs.FileAlreadyExistsException;
45  import org.apache.hadoop.fs.FileStatus;
46  import org.apache.hadoop.fs.FileSystem;
47  import org.apache.hadoop.fs.Path;
48  import org.apache.hadoop.fs.PathFilter;
49  import org.apache.hadoop.hbase.HConstants;
50  import org.apache.hadoop.hbase.classification.InterfaceAudience;
51  import org.apache.hadoop.hbase.classification.InterfaceStability;
52  import org.apache.hadoop.hbase.procedure2.Procedure;
53  import org.apache.hadoop.hbase.procedure2.store.ProcedureStoreBase;
54  import org.apache.hadoop.hbase.procedure2.store.ProcedureStoreTracker;
55  import org.apache.hadoop.hbase.procedure2.util.ByteSlot;
56  import org.apache.hadoop.hbase.procedure2.util.StringUtils;
57  import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos.ProcedureWALHeader;
58  import org.apache.hadoop.hbase.util.CommonFSUtils;
59  import org.apache.hadoop.hbase.util.Threads;
60  import org.apache.hadoop.ipc.RemoteException;
61  
62  /**
63   * WAL implementation of the ProcedureStore.
64   */
65  @InterfaceAudience.Private
66  @InterfaceStability.Evolving
67  public class WALProcedureStore extends ProcedureStoreBase {
68    private static final Log LOG = LogFactory.getLog(WALProcedureStore.class);
69  
70    /** Used to construct the name of the log directory for master procedures */
71    public static final String MASTER_PROCEDURE_LOGDIR = "MasterProcWALs";
72  
73    public interface LeaseRecovery {
74      void recoverFileLease(FileSystem fs, Path path) throws IOException;
75    }
76  
77    public static final String MAX_RETRIES_BEFORE_ROLL_CONF_KEY =
78      "hbase.procedure.store.wal.max.retries.before.roll";
79    private static final int DEFAULT_MAX_RETRIES_BEFORE_ROLL = 3;
80  
81    public static final String WAIT_BEFORE_ROLL_CONF_KEY =
82      "hbase.procedure.store.wal.wait.before.roll";
83    private static final int DEFAULT_WAIT_BEFORE_ROLL = 500;
84  
85    public static final String ROLL_RETRIES_CONF_KEY =
86      "hbase.procedure.store.wal.max.roll.retries";
87    private static final int DEFAULT_ROLL_RETRIES = 3;
88  
89    public static final String MAX_SYNC_FAILURE_ROLL_CONF_KEY =
90      "hbase.procedure.store.wal.sync.failure.roll.max";
91    private static final int DEFAULT_MAX_SYNC_FAILURE_ROLL = 3;
92  
93    public static final String PERIODIC_ROLL_CONF_KEY =
94      "hbase.procedure.store.wal.periodic.roll.msec";
95    private static final int DEFAULT_PERIODIC_ROLL = 60 * 60 * 1000; // 1h
96  
97    public static final String SYNC_WAIT_MSEC_CONF_KEY = "hbase.procedure.store.wal.sync.wait.msec";
98    private static final int DEFAULT_SYNC_WAIT_MSEC = 100;
99  
100   public static final String USE_HSYNC_CONF_KEY = "hbase.procedure.store.wal.use.hsync";
101   private static final boolean DEFAULT_USE_HSYNC = true;
102 
103   public static final String ROLL_THRESHOLD_CONF_KEY = "hbase.procedure.store.wal.roll.threshold";
104   private static final long DEFAULT_ROLL_THRESHOLD = 32 * 1024 * 1024; // 32M
105 
106   public static final String STORE_WAL_SYNC_STATS_COUNT =
107       "hbase.procedure.store.wal.sync.stats.count";
108   private static final int DEFAULT_SYNC_STATS_COUNT = 10;
109 
110   private final LinkedList<ProcedureWALFile> logs = new LinkedList<ProcedureWALFile>();
111   private final ProcedureStoreTracker storeTracker = new ProcedureStoreTracker();
112   private final ReentrantLock lock = new ReentrantLock();
113   private final Condition waitCond = lock.newCondition();
114   private final Condition slotCond = lock.newCondition();
115   private final Condition syncCond = lock.newCondition();
116 
117   private final LeaseRecovery leaseRecovery;
118   private final Configuration conf;
119   private final FileSystem fs;
120   private final Path walDir;
121   private final boolean enforceStreamCapability;
122 
123   private final AtomicReference<Throwable> syncException = new AtomicReference<Throwable>();
124   private final AtomicBoolean loading = new AtomicBoolean(true);
125   private final AtomicBoolean inSync = new AtomicBoolean(false);
126   private final AtomicLong totalSynced = new AtomicLong(0);
127   private final AtomicLong lastRollTs = new AtomicLong(0);
128 
129   private LinkedTransferQueue<ByteSlot> slotsCache = null;
130   private Set<ProcedureWALFile> corruptedLogs = null;
131   private FSDataOutputStream stream = null;
132   private long flushLogId = 0;
133   private int slotIndex = 0;
134   private Thread syncThread;
135   private ByteSlot[] slots;
136 
137   private int maxRetriesBeforeRoll;
138   private int maxSyncFailureRoll;
139   private int waitBeforeRoll;
140   private int rollRetries;
141   private int periodicRollMsec;
142   private long rollThreshold;
143   private boolean useHsync;
144   private int syncWaitMsec;
145 
146   // Variables used for UI display
147   private CircularFifoBuffer syncMetricsBuffer;
148 
149   public static class SyncMetrics {
150     private long timestamp;
151     private long syncWaitMs;
152     private long totalSyncedBytes;
153     private int syncedEntries;
154     private float syncedPerSec;
155 
156     public long getTimestamp() {
157       return timestamp;
158     }
159 
160     public long getSyncWaitMs() {
161       return syncWaitMs;
162     }
163 
164     public long getTotalSyncedBytes() {
165       return totalSyncedBytes;
166     }
167 
168     public long getSyncedEntries() {
169       return syncedEntries;
170     }
171 
172     public float getSyncedPerSec() {
173       return syncedPerSec;
174     }
175   }
176 
177   public WALProcedureStore(final Configuration conf, final Path walDir,
178       final LeaseRecovery leaseRecovery) throws IOException {
179     this.conf = conf;
180     this.walDir = walDir;
181     this.leaseRecovery = leaseRecovery;
182     this.fs = walDir.getFileSystem(conf);
183     this.enforceStreamCapability =
184       conf.getBoolean(CommonFSUtils.UNSAFE_STREAM_CAPABILITY_ENFORCE, true);
185 
186     // Create the log directory for the procedure store
187     if (!fs.exists(walDir)) {
188       if (!fs.mkdirs(walDir)) {
189         throw new IOException("Unable to mkdir " + walDir);
190       }
191     }
192     // Now that it exists, set the log policy
193     CommonFSUtils.setStoragePolicy(fs, conf, walDir, HConstants.WAL_STORAGE_POLICY,
194       HConstants.DEFAULT_WAL_STORAGE_POLICY);
195   }
196 
197   @Override
198   public void start(int numSlots) throws IOException {
199     if (!setRunning(true)) {
200       return;
201     }
202 
203     // Init buffer slots
204     loading.set(true);
205     slots = new ByteSlot[numSlots];
206     slotsCache = new LinkedTransferQueue();
207     while (slotsCache.size() < numSlots) {
208       slotsCache.offer(new ByteSlot());
209     }
210 
211     // Tunings
212     maxRetriesBeforeRoll =
213       conf.getInt(MAX_RETRIES_BEFORE_ROLL_CONF_KEY, DEFAULT_MAX_RETRIES_BEFORE_ROLL);
214     maxSyncFailureRoll = conf.getInt(MAX_SYNC_FAILURE_ROLL_CONF_KEY, DEFAULT_MAX_SYNC_FAILURE_ROLL);
215     waitBeforeRoll = conf.getInt(WAIT_BEFORE_ROLL_CONF_KEY, DEFAULT_WAIT_BEFORE_ROLL);
216     rollRetries = conf.getInt(ROLL_RETRIES_CONF_KEY, DEFAULT_ROLL_RETRIES);
217     rollThreshold = conf.getLong(ROLL_THRESHOLD_CONF_KEY, DEFAULT_ROLL_THRESHOLD);
218     periodicRollMsec = conf.getInt(PERIODIC_ROLL_CONF_KEY, DEFAULT_PERIODIC_ROLL);
219     syncWaitMsec = conf.getInt(SYNC_WAIT_MSEC_CONF_KEY, DEFAULT_SYNC_WAIT_MSEC);
220     useHsync = conf.getBoolean(USE_HSYNC_CONF_KEY, DEFAULT_USE_HSYNC);
221 
222     // WebUI
223     syncMetricsBuffer = new CircularFifoBuffer(
224       conf.getInt(STORE_WAL_SYNC_STATS_COUNT, DEFAULT_SYNC_STATS_COUNT));
225 
226     // Init sync thread
227     syncThread = new Thread("WALProcedureStoreSyncThread") {
228       @Override
229       public void run() {
230         try {
231           syncLoop();
232         } catch (Throwable e) {
233           LOG.error("Got an exception from the sync-loop", e);
234           if (!isSyncAborted()) {
235             sendAbortProcessSignal();
236           }
237         }
238       }
239     };
240     syncThread.start();
241   }
242 
243   @Override
244   public void stop(boolean abort) {
245     if (!setRunning(false)) {
246       return;
247     }
248 
249     LOG.info("Stopping the WAL Procedure Store");
250     sendStopSignal();
251 
252     if (!abort) {
253       try {
254         while (syncThread.isAlive()) {
255           sendStopSignal();
256           syncThread.join(250);
257         }
258       } catch (InterruptedException e) {
259         LOG.warn("join interrupted", e);
260         Thread.currentThread().interrupt();
261       }
262     }
263 
264     // Close the writer
265     closeStream();
266 
267     // Close the old logs
268     // they should be already closed, this is just in case the load fails
269     // and we call start() and then stop()
270     for (ProcedureWALFile log: logs) {
271       log.close();
272     }
273     logs.clear();
274   }
275 
276   private void sendStopSignal() {
277     if (lock.tryLock()) {
278       try {
279         waitCond.signalAll();
280         syncCond.signalAll();
281       } finally {
282         lock.unlock();
283       }
284     }
285   }
286 
287   @Override
288   public int getNumThreads() {
289     return slots == null ? 0 : slots.length;
290   }
291 
292   public ProcedureStoreTracker getStoreTracker() {
293     return storeTracker;
294   }
295 
296   public ArrayList<ProcedureWALFile> getActiveLogs() {
297     lock.lock();
298     try {
299       return new ArrayList<ProcedureWALFile>(logs);
300     } finally {
301       lock.unlock();
302     }
303   }
304 
305   public Set<ProcedureWALFile> getCorruptedLogs() {
306     return corruptedLogs;
307   }
308 
309   @Override
310   public void recoverLease() throws IOException {
311     lock.lock();
312     try {
313       LOG.info("Starting WAL Procedure Store lease recovery");
314       FileStatus[] oldLogs = getLogFiles();
315       while (isRunning()) {
316         // Get Log-MaxID and recover lease on old logs
317         try {
318           flushLogId = initOldLogs(oldLogs);
319         } catch (FileNotFoundException e) {
320           LOG.warn("someone else is active and deleted logs. retrying.", e);
321           oldLogs = getLogFiles();
322           continue;
323         }
324 
325         // Create new state-log
326         if (!rollWriter(flushLogId + 1)) {
327           // someone else has already created this log
328           LOG.debug("someone else has already created log " + flushLogId);
329           continue;
330         }
331 
332         // We have the lease on the log
333         oldLogs = getLogFiles();
334         if (getMaxLogId(oldLogs) > flushLogId) {
335           if (LOG.isDebugEnabled()) {
336             LOG.debug("Someone else created new logs. Expected maxLogId < " + flushLogId);
337           }
338           logs.getLast().removeFile();
339           continue;
340         }
341 
342         LOG.info("Lease acquired for flushLogId: " + flushLogId);
343         break;
344       }
345     } finally {
346       lock.unlock();
347     }
348   }
349 
350   @Override
351   public void load(final ProcedureLoader loader) throws IOException {
352     if (logs.isEmpty()) {
353       throw new RuntimeException("recoverLease() must be called before loading data");
354     }
355 
356     // Nothing to do, If we have only the current log.
357     if (logs.size() == 1) {
358       if (LOG.isDebugEnabled()) {
359         LOG.debug("No state logs to replay.");
360       }
361       loader.setMaxProcId(0);
362       loading.set(false);
363       return;
364     }
365 
366     // Load the old logs
367     Iterator<ProcedureWALFile> it = logs.descendingIterator();
368     it.next(); // Skip the current log
369     try {
370       ProcedureWALFormat.load(it, storeTracker, new ProcedureWALFormat.Loader() {
371         @Override
372         public void setMaxProcId(long maxProcId) {
373           loader.setMaxProcId(maxProcId);
374         }
375 
376         @Override
377         public void load(ProcedureIterator procIter) throws IOException {
378           loader.load(procIter);
379         }
380 
381         @Override
382         public void handleCorrupted(ProcedureIterator procIter) throws IOException {
383           loader.handleCorrupted(procIter);
384         }
385 
386         @Override
387         public void markCorruptedWAL(ProcedureWALFile log, IOException e) {
388           if (corruptedLogs == null) {
389             corruptedLogs = new HashSet<ProcedureWALFile>();
390           }
391           corruptedLogs.add(log);
392           // TODO: sideline corrupted log
393         }
394       });
395     } finally {
396       loading.set(false);
397     }
398   }
399 
400   @Override
401   public void insert(final Procedure proc, final Procedure[] subprocs) {
402     if (LOG.isTraceEnabled()) {
403       LOG.trace("Insert " + proc + ", subproc=" + Arrays.toString(subprocs));
404     }
405 
406     ByteSlot slot = acquireSlot();
407     try {
408       // Serialize the insert
409       long[] subProcIds = null;
410       if (subprocs != null) {
411         ProcedureWALFormat.writeInsert(slot, proc, subprocs);
412         subProcIds = new long[subprocs.length];
413         for (int i = 0; i < subprocs.length; ++i) {
414           subProcIds[i] = subprocs[i].getProcId();
415         }
416       } else {
417         assert !proc.hasParent();
418         ProcedureWALFormat.writeInsert(slot, proc);
419       }
420 
421       // Push the transaction data and wait until it is persisted
422       pushData(PushType.INSERT, slot, proc.getProcId(), subProcIds);
423     } catch (IOException e) {
424       // We are not able to serialize the procedure.
425       // this is a code error, and we are not able to go on.
426       LOG.fatal("Unable to serialize one of the procedure: proc=" + proc +
427                 ", subprocs=" + Arrays.toString(subprocs), e);
428       throw new RuntimeException(e);
429     } finally {
430       releaseSlot(slot);
431     }
432   }
433 
434   @Override
435   public void update(final Procedure proc) {
436     if (LOG.isTraceEnabled()) {
437       LOG.trace("Update " + proc);
438     }
439 
440     ByteSlot slot = acquireSlot();
441     try {
442       // Serialize the update
443       ProcedureWALFormat.writeUpdate(slot, proc);
444 
445       // Push the transaction data and wait until it is persisted
446       pushData(PushType.UPDATE, slot, proc.getProcId(), null);
447     } catch (IOException e) {
448       // We are not able to serialize the procedure.
449       // this is a code error, and we are not able to go on.
450       LOG.fatal("Unable to serialize the procedure: " + proc, e);
451       throw new RuntimeException(e);
452     } finally {
453       releaseSlot(slot);
454     }
455   }
456 
457   @Override
458   public void delete(final long procId) {
459     if (LOG.isTraceEnabled()) {
460       LOG.trace("Delete " + procId);
461     }
462 
463     ByteSlot slot = acquireSlot();
464     try {
465       // Serialize the delete
466       ProcedureWALFormat.writeDelete(slot, procId);
467 
468       // Push the transaction data and wait until it is persisted
469       pushData(PushType.DELETE, slot, procId, null);
470     } catch (IOException e) {
471       // We are not able to serialize the procedure.
472       // this is a code error, and we are not able to go on.
473       LOG.fatal("Unable to serialize the procedure: " + procId, e);
474       throw new RuntimeException(e);
475     } finally {
476       releaseSlot(slot);
477     }
478   }
479 
480   @Override
481   public void delete(final Procedure proc, final long[] subProcIds) {
482     if (LOG.isTraceEnabled()) {
483       LOG.trace("Update " + proc + " and Delete " + Arrays.toString(subProcIds));
484     }
485 
486     ByteSlot slot = acquireSlot();
487     try {
488       // Serialize the delete
489       ProcedureWALFormat.writeDelete(slot, proc, subProcIds);
490 
491       // Push the transaction data and wait until it is persisted
492       pushData(PushType.DELETE, slot, proc.getProcId(), subProcIds);
493     } catch (IOException e) {
494       // We are not able to serialize the procedure.
495       // this is a code error, and we are not able to go on.
496       LOG.fatal("Unable to serialize the procedure: " + proc, e);
497       throw new RuntimeException(e);
498     } finally {
499       releaseSlot(slot);
500     }
501   }
502 
503   private ByteSlot acquireSlot() {
504     ByteSlot slot = slotsCache.poll();
505     return slot != null ? slot : new ByteSlot();
506   }
507 
508   private void releaseSlot(final ByteSlot slot) {
509     slot.reset();
510     slotsCache.offer(slot);
511   }
512 
513   private enum PushType { INSERT, UPDATE, DELETE };
514 
515   private long pushData(final PushType type, final ByteSlot slot,
516       final long procId, final long[] subProcIds) {
517     if (!isRunning()) {
518       throw new RuntimeException("the store must be running before inserting data");
519     }
520     if (logs.isEmpty()) {
521       throw new RuntimeException("recoverLease() must be called before inserting data");
522     }
523 
524     long logId = -1;
525     lock.lock();
526     try {
527       // Wait for the sync to be completed
528       while (true) {
529         if (!isRunning()) {
530           throw new RuntimeException("store no longer running");
531         } else if (isSyncAborted()) {
532           throw new RuntimeException("sync aborted", syncException.get());
533         } else if (inSync.get()) {
534           syncCond.await();
535         } else if (slotIndex == slots.length) {
536           slotCond.signal();
537           syncCond.await();
538         } else {
539           break;
540         }
541       }
542 
543       updateStoreTracker(type, procId, subProcIds);
544       slots[slotIndex++] = slot;
545       logId = flushLogId;
546 
547       // Notify that there is new data
548       if (slotIndex == 1) {
549         waitCond.signal();
550       }
551 
552       // Notify that the slots are full
553       if (slotIndex == slots.length) {
554         waitCond.signal();
555         slotCond.signal();
556       }
557 
558       syncCond.await();
559     } catch (InterruptedException e) {
560       Thread.currentThread().interrupt();
561       sendAbortProcessSignal();
562       throw new RuntimeException(e);
563     } finally {
564       lock.unlock();
565       if (isSyncAborted()) {
566         throw new RuntimeException("sync aborted", syncException.get());
567       }
568     }
569     return logId;
570   }
571 
572   private void updateStoreTracker(final PushType type,
573       final long procId, final long[] subProcIds) {
574     switch (type) {
575       case INSERT:
576         if (subProcIds == null) {
577           storeTracker.insert(procId);
578         } else {
579           storeTracker.insert(procId, subProcIds);
580         }
581         break;
582       case UPDATE:
583         storeTracker.update(procId);
584         break;
585       case DELETE:
586         if (subProcIds != null && subProcIds.length > 0) {
587           storeTracker.delete(subProcIds);
588         } else {
589           storeTracker.delete(procId);
590         }
591         break;
592       default:
593         throw new RuntimeException("invalid push type " + type);
594     }
595   }
596 
597   private boolean isSyncAborted() {
598     return syncException.get() != null;
599   }
600 
601   private void syncLoop() throws Throwable {
602     long totalSyncedToStore = 0;
603     inSync.set(false);
604     lock.lock();
605     try {
606       while (isRunning()) {
607         try {
608           // Wait until new data is available
609           if (slotIndex == 0) {
610             if (!loading.get()) {
611               periodicRoll();
612             }
613 
614             if (LOG.isTraceEnabled()) {
615               float rollTsSec = getMillisFromLastRoll() / 1000.0f;
616               LOG.trace(String.format("Waiting for data. flushed=%s (%s/sec)",
617                         StringUtils.humanSize(totalSynced.get()),
618                         StringUtils.humanSize(totalSynced.get() / rollTsSec)));
619             }
620 
621             waitCond.await(getMillisToNextPeriodicRoll(), TimeUnit.MILLISECONDS);
622             if (slotIndex == 0) {
623               // no data.. probably a stop() or a periodic roll
624               continue;
625             }
626           }
627           // Wait SYNC_WAIT_MSEC or the signal of "slots full" before flushing
628           final long syncWaitSt = System.currentTimeMillis();
629           if (slotIndex != slots.length) {
630             slotCond.await(syncWaitMsec, TimeUnit.MILLISECONDS);
631           }
632 
633           final long currentTs = System.currentTimeMillis();
634           final long syncWaitMs = currentTs - syncWaitSt;
635           final float rollSec = getMillisFromLastRoll() / 1000.0f;
636           final float syncedPerSec = totalSyncedToStore / rollSec;
637           if (LOG.isTraceEnabled() && (syncWaitMs > 10 || slotIndex < slots.length)) {
638             LOG.trace(String.format("Sync wait %s, slotIndex=%s , totalSynced=%s (%s/sec)",
639                       StringUtils.humanTimeDiff(syncWaitMs), slotIndex,
640                       StringUtils.humanSize(totalSyncedToStore),
641                       StringUtils.humanSize(syncedPerSec)));
642           }
643 
644           // update webui circular buffers (TODO: get rid of allocations)
645           final SyncMetrics syncMetrics = new SyncMetrics();
646           syncMetrics.timestamp = currentTs;
647           syncMetrics.syncWaitMs = syncWaitMs;
648           syncMetrics.syncedEntries = slotIndex;
649           syncMetrics.totalSyncedBytes = totalSyncedToStore;
650           syncMetrics.syncedPerSec = syncedPerSec;
651           syncMetricsBuffer.add(syncMetrics);
652 
653           // sync
654           inSync.set(true);
655           long slotSize = syncSlots();
656           logs.getLast().addToSize(slotSize);
657           totalSyncedToStore = totalSynced.addAndGet(slotSize);
658           slotIndex = 0;
659           inSync.set(false);
660         } catch (InterruptedException e) {
661           Thread.currentThread().interrupt();
662           sendAbortProcessSignal();
663           syncException.compareAndSet(null, e);
664           throw e;
665         } catch (Throwable t) {
666           syncException.compareAndSet(null, t);
667           throw t;
668         } finally {
669           syncCond.signalAll();
670         }
671       }
672     } finally {
673       lock.unlock();
674     }
675   }
676 
677   public ArrayList<SyncMetrics> getSyncMetrics() {
678     lock.lock();
679     try {
680       return new ArrayList<SyncMetrics>(syncMetricsBuffer);
681     } finally {
682       lock.unlock();
683     }
684   }
685 
686   private long syncSlots() throws Throwable {
687     int retry = 0;
688     int logRolled = 0;
689     long totalSynced = 0;
690     do {
691       try {
692         totalSynced = syncSlots(stream, slots, 0, slotIndex);
693         break;
694       } catch (Throwable e) {
695         LOG.warn("unable to sync slots, retry=" + retry);
696         if (++retry >= maxRetriesBeforeRoll) {
697           if (logRolled >= maxSyncFailureRoll) {
698             LOG.error("Sync slots after log roll failed, abort.", e);
699             sendAbortProcessSignal();
700             throw e;
701           }
702 
703           if (!rollWriterOrDie()) {
704             throw e;
705           }
706 
707           logRolled++;
708           retry = 0;
709         }
710       }
711     } while (isRunning());
712     return totalSynced;
713   }
714 
715   protected long syncSlots(FSDataOutputStream stream, ByteSlot[] slots, int offset, int count)
716       throws IOException {
717     long totalSynced = 0;
718     for (int i = 0; i < count; ++i) {
719       ByteSlot data = slots[offset + i];
720       data.writeTo(stream);
721       totalSynced += data.size();
722     }
723 
724     if (useHsync) {
725       stream.hsync();
726     } else {
727       stream.hflush();
728     }
729     sendPostSyncSignal();
730 
731     if (LOG.isTraceEnabled()) {
732       LOG.trace("Sync slots=" + count + '/' + slots.length +
733                 ", flushed=" + StringUtils.humanSize(totalSynced));
734     }
735     return totalSynced;
736   }
737 
738   private boolean rollWriterOrDie() {
739     for (int i = 0; i < rollRetries; ++i) {
740       if (i > 0) Threads.sleepWithoutInterrupt(waitBeforeRoll * i);
741 
742       try {
743         if (rollWriter()) {
744           return true;
745         }
746       } catch (IOException e) {
747         LOG.warn("Unable to roll the log, attempt=" + (i + 1), e);
748       }
749     }
750     LOG.fatal("Unable to roll the log");
751     sendAbortProcessSignal();
752     throw new RuntimeException("unable to roll the log");
753   }
754 
755   private boolean tryRollWriter() {
756     try {
757       return rollWriter();
758     } catch (IOException e) {
759       LOG.warn("Unable to roll the log", e);
760       return false;
761     }
762   }
763 
764   public long getMillisToNextPeriodicRoll() {
765     if (lastRollTs.get() > 0 && periodicRollMsec > 0) {
766       return periodicRollMsec - getMillisFromLastRoll();
767     }
768     return Long.MAX_VALUE;
769   }
770 
771   public long getMillisFromLastRoll() {
772     return (System.currentTimeMillis() - lastRollTs.get());
773   }
774 
775   protected void periodicRollForTesting() throws IOException {
776     lock.lock();
777     try {
778       periodicRoll();
779     } finally {
780       lock.unlock();
781     }
782   }
783 
784   protected boolean rollWriterForTesting() throws IOException {
785     lock.lock();
786     try {
787       return rollWriter();
788     } finally {
789       lock.unlock();
790     }
791   }
792 
793   private void periodicRoll() throws IOException {
794     if (storeTracker.isEmpty()) {
795       if (LOG.isTraceEnabled()) {
796         LOG.trace("no active procedures");
797       }
798       tryRollWriter();
799       removeAllLogs(flushLogId - 1);
800     } else {
801       if (storeTracker.isUpdated()) {
802         if (LOG.isTraceEnabled()) {
803           LOG.trace("all the active procedures are in the latest log");
804         }
805         removeAllLogs(flushLogId - 1);
806       }
807 
808       // if the log size has exceeded the roll threshold
809       // or the periodic roll timeout is expired, try to roll the wal.
810       if (totalSynced.get() > rollThreshold || getMillisToNextPeriodicRoll() <= 0) {
811         tryRollWriter();
812       }
813 
814       removeInactiveLogs();
815     }
816   }
817 
818   private boolean rollWriter() throws IOException {
819     // Create new state-log
820     if (!rollWriter(flushLogId + 1)) {
821       LOG.warn("someone else has already created log " + flushLogId);
822       return false;
823     }
824 
825     // We have the lease on the log,
826     // but we should check if someone else has created new files
827     if (getMaxLogId(getLogFiles()) > flushLogId) {
828       LOG.warn("Someone else created new logs. Expected maxLogId < " + flushLogId);
829       logs.getLast().removeFile();
830       return false;
831     }
832 
833     // We have the lease on the log
834     return true;
835   }
836 
837   private boolean rollWriter(final long logId) throws IOException {
838     assert logId > flushLogId : "logId=" + logId + " flushLogId=" + flushLogId;
839     assert lock.isHeldByCurrentThread() : "expected to be the lock owner. " + lock.isLocked();
840 
841     ProcedureWALHeader header = ProcedureWALHeader.newBuilder()
842       .setVersion(ProcedureWALFormat.HEADER_VERSION)
843       .setType(ProcedureWALFormat.LOG_TYPE_STREAM)
844       .setMinProcId(storeTracker.getMinProcId())
845       .setLogId(logId)
846       .build();
847 
848     FSDataOutputStream newStream = null;
849     Path newLogFile = null;
850     long startPos = -1;
851     newLogFile = getLogFilePath(logId);
852     try {
853       newStream = fs.create(newLogFile, false);
854     } catch (FileAlreadyExistsException e) {
855       LOG.error("Log file with id=" + logId + " already exists", e);
856       return false;
857     } catch (RemoteException re) {
858       LOG.warn("failed to create log file with id=" + logId, re);
859       return false;
860     }
861     // After we create the stream but before we attempt to use it at all
862     // ensure that we can provide the level of data safety we're configured
863     // to provide.
864     final String durability = useHsync ? "hsync" : "hflush";
865     if (enforceStreamCapability && !(CommonFSUtils.hasCapability(newStream, durability))) {
866       throw new IllegalStateException("The procedure WAL relies on the ability to " + durability +
867         " for proper operation during component failures, but the underlying filesystem does " +
868         "not support doing so. Please check the config value of '" + USE_HSYNC_CONF_KEY +
869         "' to set the desired level of robustness and ensure the config value of '" +
870         CommonFSUtils.HBASE_WAL_DIR + "' points to a FileSystem mount that can provide it.");
871     }
872     try {
873       ProcedureWALFormat.writeHeader(newStream, header);
874       startPos = newStream.getPos();
875     } catch (IOException ioe) {
876       LOG.warn("Encountered exception writing header", ioe);
877       newStream.close();
878       return false;
879     }
880 
881     closeStream();
882 
883     storeTracker.resetUpdates();
884     stream = newStream;
885     flushLogId = logId;
886     totalSynced.set(0);
887     long rollTs = System.currentTimeMillis();
888     lastRollTs.set(rollTs);
889     logs.add(new ProcedureWALFile(fs, newLogFile, header, startPos, rollTs));
890 
891     if (LOG.isDebugEnabled()) {
892       LOG.debug("Roll new state log: " + logId);
893     }
894     return true;
895   }
896 
897   private void closeStream() {
898     try {
899       if (stream != null) {
900         try {
901           ProcedureWALFile log = logs.getLast();
902           log.setProcIds(storeTracker.getUpdatedMinProcId(), storeTracker.getUpdatedMaxProcId());
903           long trailerSize = ProcedureWALFormat.writeTrailer(stream, storeTracker);
904           log.addToSize(trailerSize);
905         } catch (IOException e) {
906           LOG.warn("Unable to write the trailer: " + e.getMessage());
907         }
908         stream.close();
909       }
910     } catch (IOException e) {
911       LOG.error("Unable to close the stream", e);
912     } finally {
913       stream = null;
914     }
915   }
916 
917   // ==========================================================================
918   //  Log Files cleaner helpers
919   // ==========================================================================
920   private void removeInactiveLogs() {
921     // Verify if the ProcId of the first oldest is still active. if not remove the file.
922     while (logs.size() > 1) {
923       ProcedureWALFile log = logs.getFirst();
924       if (storeTracker.isTracking(log.getMinProcId(), log.getMaxProcId())) {
925         break;
926       }
927       removeLogFile(log);
928     }
929   }
930 
931   private void removeAllLogs(long lastLogId) {
932     if (logs.size() <= 1) return;
933 
934     if (LOG.isDebugEnabled()) {
935       LOG.debug("Remove all state logs with ID less than " + lastLogId);
936     }
937     while (logs.size() > 1) {
938       ProcedureWALFile log = logs.getFirst();
939       if (lastLogId < log.getLogId()) {
940         break;
941       }
942       removeLogFile(log);
943     }
944   }
945 
946   private boolean removeLogFile(final ProcedureWALFile log) {
947     try {
948       if (LOG.isTraceEnabled()) {
949         LOG.trace("Removing log=" + log);
950       }
951       log.removeFile();
952       logs.remove(log);
953       if (LOG.isDebugEnabled()) {
954         LOG.info("Removed log=" + log + " activeLogs=" + logs);
955       }
956       assert logs.size() > 0 : "expected at least one log";
957     } catch (IOException e) {
958       LOG.error("Unable to remove log: " + log, e);
959       return false;
960     }
961     return true;
962   }
963 
964   // ==========================================================================
965   //  FileSystem Log Files helpers
966   // ==========================================================================
967   public Path getWALDir() {
968     return this.walDir;
969   }
970 
971   public FileSystem getFileSystem() {
972     return this.fs;
973   }
974 
975   protected Path getLogFilePath(final long logId) throws IOException {
976     return new Path(walDir, String.format("state-%020d.log", logId));
977   }
978 
979   private static long getLogIdFromName(final String name) {
980     int end = name.lastIndexOf(".log");
981     int start = name.lastIndexOf('-') + 1;
982     while (start < end) {
983       if (name.charAt(start) != '0')
984         break;
985       start++;
986     }
987     return Long.parseLong(name.substring(start, end));
988   }
989 
990   private static final PathFilter WALS_PATH_FILTER = new PathFilter() {
991     @Override
992     public boolean accept(Path path) {
993       String name = path.getName();
994       return name.startsWith("state-") && name.endsWith(".log");
995     }
996   };
997 
998   private static final Comparator<FileStatus> FILE_STATUS_ID_COMPARATOR =
999       new Comparator<FileStatus>() {
1000     @Override
1001     public int compare(FileStatus a, FileStatus b) {
1002       final long aId = getLogIdFromName(a.getPath().getName());
1003       final long bId = getLogIdFromName(b.getPath().getName());
1004       return Long.compare(aId, bId);
1005     }
1006   };
1007 
1008   private FileStatus[] getLogFiles() throws IOException {
1009     try {
1010       FileStatus[] files = fs.listStatus(walDir, WALS_PATH_FILTER);
1011       Arrays.sort(files, FILE_STATUS_ID_COMPARATOR);
1012       return files;
1013     } catch (FileNotFoundException e) {
1014       LOG.warn("Log directory not found: " + e.getMessage());
1015       return null;
1016     }
1017   }
1018 
1019   private static long getMaxLogId(final FileStatus[] logFiles) {
1020     long maxLogId = 0;
1021     if (logFiles != null && logFiles.length > 0) {
1022       for (int i = 0; i < logFiles.length; ++i) {
1023         maxLogId = Math.max(maxLogId, getLogIdFromName(logFiles[i].getPath().getName()));
1024       }
1025     }
1026     return maxLogId;
1027   }
1028 
1029   /**
1030    * @return Max-LogID of the specified log file set
1031    */
1032   private long initOldLogs(final FileStatus[] logFiles) throws IOException {
1033     this.logs.clear();
1034 
1035     long maxLogId = 0;
1036     if (logFiles != null && logFiles.length > 0) {
1037       for (int i = 0; i < logFiles.length; ++i) {
1038         final Path logPath = logFiles[i].getPath();
1039         leaseRecovery.recoverFileLease(fs, logPath);
1040         maxLogId = Math.max(maxLogId, getLogIdFromName(logPath.getName()));
1041 
1042         ProcedureWALFile log = initOldLog(logFiles[i]);
1043         if (log != null) {
1044           this.logs.add(log);
1045         }
1046       }
1047       Collections.sort(this.logs);
1048       initTrackerFromOldLogs();
1049     }
1050     return maxLogId;
1051   }
1052 
1053   private void initTrackerFromOldLogs() {
1054     // TODO: Load the most recent tracker available
1055     if (!logs.isEmpty()) {
1056       ProcedureWALFile log = logs.getLast();
1057       try {
1058         log.readTracker(storeTracker);
1059       } catch (IOException e) {
1060         LOG.warn("Unable to read tracker for " + log + " - " + e.getMessage());
1061         // try the next one...
1062         storeTracker.reset();
1063         storeTracker.setPartialFlag(true);
1064       }
1065     }
1066   }
1067 
1068   private ProcedureWALFile initOldLog(final FileStatus logFile) throws IOException {
1069     ProcedureWALFile log = new ProcedureWALFile(fs, logFile);
1070     if (logFile.getLen() == 0) {
1071       LOG.warn("Remove uninitialized log: " + logFile);
1072       log.removeFile();
1073       return null;
1074     }
1075     if (LOG.isDebugEnabled()) {
1076       LOG.debug("Opening state-log: " + logFile);
1077     }
1078     try {
1079       log.open();
1080     } catch (ProcedureWALFormat.InvalidWALDataException e) {
1081       LOG.warn("Remove uninitialized log: " + logFile, e);
1082       log.removeFile();
1083       return null;
1084     } catch (IOException e) {
1085       String msg = "Unable to read state log: " + logFile;
1086       LOG.error(msg, e);
1087       throw new IOException(msg, e);
1088     }
1089 
1090     if (log.isCompacted()) {
1091       try {
1092         log.readTrailer();
1093       } catch (IOException e) {
1094         LOG.warn("Unfinished compacted log: " + logFile, e);
1095         log.removeFile();
1096         return null;
1097       }
1098     }
1099     return log;
1100   }
1101 }