1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.procedure2;
20
21 import com.google.common.base.Preconditions;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Collections;
27 import java.util.HashSet;
28 import java.util.Iterator;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.concurrent.atomic.AtomicBoolean;
32 import java.util.concurrent.atomic.AtomicInteger;
33 import java.util.concurrent.atomic.AtomicLong;
34 import java.util.concurrent.locks.ReentrantLock;
35 import java.util.concurrent.ConcurrentHashMap;
36 import java.util.concurrent.CopyOnWriteArrayList;
37 import java.util.concurrent.TimeUnit;
38
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.conf.Configuration;
42 import org.apache.hadoop.hbase.HConstants;
43 import org.apache.hadoop.hbase.ProcedureInfo;
44 import org.apache.hadoop.hbase.classification.InterfaceAudience;
45 import org.apache.hadoop.hbase.classification.InterfaceStability;
46 import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
47 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore;
48 import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator;
49 import org.apache.hadoop.hbase.procedure2.util.StringUtils;
50 import org.apache.hadoop.hbase.procedure2.util.TimeoutBlockingQueue;
51 import org.apache.hadoop.hbase.procedure2.util.TimeoutBlockingQueue.TimeoutRetriever;
52 import org.apache.hadoop.hbase.protobuf.generated.ErrorHandlingProtos;
53 import org.apache.hadoop.hbase.protobuf.generated.ProcedureProtos.ProcedureState;
54 import org.apache.hadoop.hbase.security.User;
55 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
56 import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
57 import org.apache.hadoop.hbase.util.NonceKey;
58 import org.apache.hadoop.hbase.util.Pair;
59 import org.apache.hadoop.hbase.util.Threads;
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 @InterfaceAudience.Private
75 @InterfaceStability.Evolving
76 public class ProcedureExecutor<TEnvironment> {
77 private static final Log LOG = LogFactory.getLog(ProcedureExecutor.class);
78
79 Testing testing = null;
80 public static class Testing {
81 protected boolean killBeforeStoreUpdate = false;
82 protected boolean toggleKillBeforeStoreUpdate = false;
83
84 protected boolean shouldKillBeforeStoreUpdate() {
85 final boolean kill = this.killBeforeStoreUpdate;
86 if (this.toggleKillBeforeStoreUpdate) {
87 this.killBeforeStoreUpdate = !kill;
88 LOG.warn("Toggle Kill before store update to: " + this.killBeforeStoreUpdate);
89 }
90 return kill;
91 }
92 }
93
94 public interface ProcedureExecutorListener {
95 void procedureLoaded(long procId);
96 void procedureAdded(long procId);
97 void procedureFinished(long procId);
98 }
99
100
101
102
103 private static class ProcedureTimeoutRetriever implements TimeoutRetriever<Procedure> {
104 @Override
105 public long getTimeout(Procedure proc) {
106 return proc.getTimeRemaining();
107 }
108
109 @Override
110 public TimeUnit getTimeUnit(Procedure proc) {
111 return TimeUnit.MILLISECONDS;
112 }
113 }
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130 private static class CompletedProcedureCleaner<TEnvironment>
131 extends ProcedureInMemoryChore<TEnvironment> {
132 private static final Log LOG = LogFactory.getLog(CompletedProcedureCleaner.class);
133
134 private static final String CLEANER_INTERVAL_CONF_KEY = "hbase.procedure.cleaner.interval";
135 private static final int DEFAULT_CLEANER_INTERVAL = 30 * 1000;
136
137 private static final String EVICT_TTL_CONF_KEY = "hbase.procedure.cleaner.evict.ttl";
138 private static final int DEFAULT_EVICT_TTL = 15 * 60000;
139
140 private static final String EVICT_ACKED_TTL_CONF_KEY ="hbase.procedure.cleaner.acked.evict.ttl";
141 private static final int DEFAULT_ACKED_EVICT_TTL = 5 * 60000;
142
143 private final Map<Long, ProcedureInfo> completed;
144 private final Map<NonceKey, Long> nonceKeysToProcIdsMap;
145 private final ProcedureStore store;
146 private final Configuration conf;
147
148 public CompletedProcedureCleaner(final Configuration conf, final ProcedureStore store,
149 final Map<Long, ProcedureInfo> completedMap,
150 final Map<NonceKey, Long> nonceKeysToProcIdsMap) {
151
152 super(conf.getInt(CLEANER_INTERVAL_CONF_KEY, DEFAULT_CLEANER_INTERVAL));
153 this.completed = completedMap;
154 this.nonceKeysToProcIdsMap = nonceKeysToProcIdsMap;
155 this.store = store;
156 this.conf = conf;
157 }
158
159 @Override
160 protected void periodicExecute(final TEnvironment env) {
161 if (completed.isEmpty()) {
162 if (LOG.isTraceEnabled()) {
163 LOG.trace("No completed procedures to cleanup.");
164 }
165 return;
166 }
167
168 final long evictTtl = conf.getInt(EVICT_TTL_CONF_KEY, DEFAULT_EVICT_TTL);
169 final long evictAckTtl = conf.getInt(EVICT_ACKED_TTL_CONF_KEY, DEFAULT_ACKED_EVICT_TTL);
170
171 final long now = EnvironmentEdgeManager.currentTime();
172 final Iterator<Map.Entry<Long, ProcedureInfo>> it = completed.entrySet().iterator();
173 final boolean isDebugEnabled = LOG.isDebugEnabled();
174 while (it.hasNext() && store.isRunning()) {
175 final Map.Entry<Long, ProcedureInfo> entry = it.next();
176 final ProcedureInfo procInfo = entry.getValue();
177
178
179 if ((procInfo.hasClientAckTime() && (now - procInfo.getClientAckTime()) >= evictAckTtl) ||
180 (now - procInfo.getLastUpdate()) >= evictTtl) {
181
182 if (!(procInfo instanceof FailedProcedureInfo)) {
183 store.delete(entry.getKey());
184 }
185 it.remove();
186
187 NonceKey nonceKey = procInfo.getNonceKey();
188 if (nonceKey != null) {
189 nonceKeysToProcIdsMap.remove(nonceKey);
190 }
191 if (isDebugEnabled) {
192 LOG.debug("Evict completed procedure: " + procInfo);
193 }
194 }
195 }
196 }
197 }
198
199
200
201
202
203
204 private final ConcurrentHashMap<Long, ProcedureInfo> completed =
205 new ConcurrentHashMap<Long, ProcedureInfo>();
206
207
208
209
210
211
212 private final ConcurrentHashMap<Long, RootProcedureState> rollbackStack =
213 new ConcurrentHashMap<Long, RootProcedureState>();
214
215
216
217
218
219 private final ConcurrentHashMap<Long, Procedure> procedures =
220 new ConcurrentHashMap<Long, Procedure>();
221
222
223
224
225
226 private ConcurrentHashMap<NonceKey, Long> nonceKeysToProcIdsMap =
227 new ConcurrentHashMap<NonceKey, Long>();
228
229
230
231
232
233 private final TimeoutBlockingQueue<Procedure> waitingTimeout =
234 new TimeoutBlockingQueue<Procedure>(new ProcedureTimeoutRetriever());
235
236
237
238
239 private final ProcedureRunnableSet runnables;
240
241
242 private final ReentrantLock submitLock = new ReentrantLock();
243 private final AtomicLong lastProcId = new AtomicLong(-1);
244
245 private final CopyOnWriteArrayList<ProcedureExecutorListener> listeners =
246 new CopyOnWriteArrayList<ProcedureExecutorListener>();
247
248 private final AtomicInteger activeExecutorCount = new AtomicInteger(0);
249 private final AtomicBoolean running = new AtomicBoolean(false);
250 private final TEnvironment environment;
251 private final ProcedureStore store;
252 private final Configuration conf;
253
254 private Thread[] threads;
255
256 public ProcedureExecutor(final Configuration conf, final TEnvironment environment,
257 final ProcedureStore store) {
258 this(conf, environment, store, new ProcedureSimpleRunQueue());
259 }
260
261 public ProcedureExecutor(final Configuration conf, final TEnvironment environment,
262 final ProcedureStore store, final ProcedureRunnableSet runqueue) {
263 this.environment = environment;
264 this.runnables = runqueue;
265 this.store = store;
266 this.conf = conf;
267 }
268
269 private void load(final boolean abortOnCorruption) throws IOException {
270 Preconditions.checkArgument(completed.isEmpty());
271 Preconditions.checkArgument(rollbackStack.isEmpty());
272 Preconditions.checkArgument(procedures.isEmpty());
273 Preconditions.checkArgument(waitingTimeout.isEmpty());
274 Preconditions.checkArgument(runnables.size() == 0);
275
276 store.load(new ProcedureStore.ProcedureLoader() {
277 @Override
278 public void setMaxProcId(long maxProcId) {
279 assert lastProcId.get() < 0 : "expected only one call to setMaxProcId()";
280 LOG.debug("load procedures maxProcId=" + maxProcId);
281 lastProcId.set(maxProcId);
282 }
283
284 @Override
285 public void load(ProcedureIterator procIter) throws IOException {
286 loadProcedures(procIter, abortOnCorruption);
287 }
288
289 @Override
290 public void handleCorrupted(ProcedureIterator procIter) throws IOException {
291 int corruptedCount = 0;
292 while (procIter.hasNext()) {
293 ProcedureInfo proc = procIter.nextAsProcedureInfo();
294 LOG.error("corrupted procedure: " + proc);
295 corruptedCount++;
296 }
297 if (abortOnCorruption && corruptedCount > 0) {
298 throw new IOException("found " + corruptedCount + " corrupted procedure(s) on replay");
299 }
300 }
301 });
302 }
303
304 private void loadProcedures(final ProcedureIterator procIter,
305 final boolean abortOnCorruption) throws IOException {
306 final boolean isDebugEnabled = LOG.isDebugEnabled();
307
308
309 int runnablesCount = 0;
310 while (procIter.hasNext()) {
311 final NonceKey nonceKey;
312 final long procId;
313
314 if (procIter.isNextCompleted()) {
315 ProcedureInfo proc = procIter.nextAsProcedureInfo();
316 nonceKey = proc.getNonceKey();
317 procId = proc.getProcId();
318 completed.put(proc.getProcId(), proc);
319 if (isDebugEnabled) {
320 LOG.debug("The procedure is completed: " + proc);
321 }
322 } else {
323 Procedure proc = procIter.nextAsProcedure();
324 nonceKey = proc.getNonceKey();
325 procId = proc.getProcId();
326
327 if (!proc.hasParent()) {
328 assert !proc.isFinished() : "unexpected finished procedure";
329 rollbackStack.put(proc.getProcId(), new RootProcedureState());
330 }
331
332
333 proc.beforeReplay(getEnvironment());
334 procedures.put(proc.getProcId(), proc);
335
336 if (proc.getState() == ProcedureState.RUNNABLE) {
337 runnablesCount++;
338 }
339 }
340
341
342 if (nonceKey != null) {
343 nonceKeysToProcIdsMap.put(nonceKey, procId);
344 }
345 }
346
347
348 ArrayList<Procedure> runnableList = new ArrayList(runnablesCount);
349 HashSet<Procedure> waitingSet = null;
350 procIter.reset();
351 while (procIter.hasNext()) {
352 if (procIter.isNextCompleted()) {
353 procIter.skipNext();
354 continue;
355 }
356
357 Procedure proc = procIter.nextAsProcedure();
358 assert !(proc.isFinished() && !proc.hasParent()) : "unexpected completed proc=" + proc;
359
360 if (isDebugEnabled) {
361 LOG.debug(String.format("Loading procedure state=%s isFailed=%s: %s",
362 proc.getState(), proc.hasException(), proc));
363 }
364
365 Long rootProcId = getRootProcedureId(proc);
366 if (rootProcId == null) {
367
368 runnables.addBack(proc);
369 continue;
370 }
371
372 if (proc.hasParent()) {
373 Procedure parent = procedures.get(proc.getParentProcId());
374
375 if (parent != null && !proc.isFinished()) {
376 parent.incChildrenLatch();
377 }
378 }
379
380 RootProcedureState procStack = rollbackStack.get(rootProcId);
381 procStack.loadStack(proc);
382
383 switch (proc.getState()) {
384 case RUNNABLE:
385 runnableList.add(proc);
386 break;
387 case WAITING:
388 if (!proc.hasChildren()) {
389 runnableList.add(proc);
390 }
391 break;
392 case WAITING_TIMEOUT:
393 if (waitingSet == null) {
394 waitingSet = new HashSet<Procedure>();
395 }
396 waitingSet.add(proc);
397 break;
398 case FINISHED:
399 if (proc.hasException()) {
400
401 runnables.addBack(proc);
402 }
403 break;
404 case ROLLEDBACK:
405 case INITIALIZING:
406 String msg = "Unexpected " + proc.getState() + " state for " + proc;
407 LOG.error(msg);
408 throw new UnsupportedOperationException(msg);
409 default:
410 break;
411 }
412 }
413
414
415 int corruptedCount = 0;
416 Iterator<Map.Entry<Long, RootProcedureState>> itStack = rollbackStack.entrySet().iterator();
417 while (itStack.hasNext()) {
418 Map.Entry<Long, RootProcedureState> entry = itStack.next();
419 RootProcedureState procStack = entry.getValue();
420 if (procStack.isValid()) continue;
421
422 for (Procedure proc: procStack.getSubproceduresStack()) {
423 LOG.error("corrupted procedure: " + proc);
424 procedures.remove(proc.getProcId());
425 runnableList.remove(proc);
426 if (waitingSet != null) waitingSet.remove(proc);
427 corruptedCount++;
428 }
429 itStack.remove();
430 }
431
432 if (abortOnCorruption && corruptedCount > 0) {
433 throw new IOException("found " + corruptedCount + " procedures on replay");
434 }
435
436
437 if (!runnableList.isEmpty()) {
438
439
440 for (int i = runnableList.size() - 1; i >= 0; --i) {
441 Procedure proc = runnableList.get(i);
442 if (!proc.hasParent()) {
443 sendProcedureLoadedNotification(proc.getProcId());
444 }
445 if (proc.wasExecuted()) {
446 runnables.addFront(proc);
447 } else {
448
449 runnables.addBack(proc);
450 }
451 }
452 }
453 }
454
455
456
457
458
459
460
461
462
463
464
465 public void start(int numThreads, boolean abortOnCorruption) throws IOException {
466 if (running.getAndSet(true)) {
467 LOG.warn("Already running");
468 return;
469 }
470
471
472
473 threads = new Thread[numThreads + 1];
474 LOG.info("Starting procedure executor threads=" + threads.length);
475
476
477 for (int i = 0; i < numThreads; ++i) {
478 threads[i] = new Thread("ProcedureExecutor-" + i) {
479 @Override
480 public void run() {
481 execLoop();
482 }
483 };
484 }
485
486
487 threads[numThreads] = new Thread("ProcedureExecutorTimeout") {
488 @Override
489 public void run() {
490 timeoutLoop();
491 }
492 };
493
494
495 store.recoverLease();
496
497
498
499
500
501
502 load(abortOnCorruption);
503
504
505 for (int i = 0; i < threads.length; ++i) {
506 threads[i].start();
507 }
508
509
510 addChore(new CompletedProcedureCleaner(conf, store, completed, nonceKeysToProcIdsMap));
511 }
512
513 public void stop() {
514 if (!running.getAndSet(false)) {
515 return;
516 }
517
518 LOG.info("Stopping the procedure executor");
519 runnables.signalAll();
520 waitingTimeout.signalAll();
521 }
522
523 public void join() {
524 boolean interrupted = false;
525
526 for (int i = 0; i < threads.length; ++i) {
527 try {
528 threads[i].join();
529 } catch (InterruptedException ex) {
530 interrupted = true;
531 }
532 }
533
534 if (interrupted) {
535 Thread.currentThread().interrupt();
536 }
537
538 completed.clear();
539 rollbackStack.clear();
540 procedures.clear();
541 nonceKeysToProcIdsMap.clear();
542 waitingTimeout.clear();
543 runnables.clear();
544 lastProcId.set(-1);
545 }
546
547 public boolean isRunning() {
548 return running.get();
549 }
550
551
552
553
554 public int getNumThreads() {
555 return threads == null ? 0 : (threads.length - 1);
556 }
557
558 public int getActiveExecutorCount() {
559 return activeExecutorCount.get();
560 }
561
562 public TEnvironment getEnvironment() {
563 return this.environment;
564 }
565
566 public ProcedureStore getStore() {
567 return this.store;
568 }
569
570 public void registerListener(ProcedureExecutorListener listener) {
571 this.listeners.add(listener);
572 }
573
574 public boolean unregisterListener(ProcedureExecutorListener listener) {
575 return this.listeners.remove(listener);
576 }
577
578
579
580
581
582 public List<ProcedureInfo> listProcedures() {
583 List<ProcedureInfo> procedureLists =
584 new ArrayList<ProcedureInfo>(procedures.size() + completed.size());
585 for (java.util.Map.Entry<Long, Procedure> p: procedures.entrySet()) {
586 procedureLists.add(Procedure.createProcedureInfo(p.getValue(), null));
587 }
588 for (java.util.Map.Entry<Long, ProcedureInfo> e: completed.entrySet()) {
589
590
591
592
593 procedureLists.add(e.getValue());
594 }
595 return procedureLists;
596 }
597
598
599
600
601
602 public void addChore(final ProcedureInMemoryChore chore) {
603 chore.setState(ProcedureState.RUNNABLE);
604 waitingTimeout.add(chore);
605 }
606
607
608
609
610
611
612 public boolean removeChore(final ProcedureInMemoryChore chore) {
613 chore.setState(ProcedureState.FINISHED);
614 return waitingTimeout.remove(chore);
615 }
616
617
618
619
620
621
622
623
624
625
626 public NonceKey createNonceKey(final long nonceGroup, final long nonce) {
627 return (nonce == HConstants.NO_NONCE) ? null : new NonceKey(nonceGroup, nonce);
628 }
629
630
631
632
633
634
635
636
637
638
639
640
641 public long registerNonce(final NonceKey nonceKey) {
642 if (nonceKey == null) return -1;
643
644
645 Long oldProcId = nonceKeysToProcIdsMap.get(nonceKey);
646 if (oldProcId == null) {
647
648
649 final long newProcId = nextProcId();
650 oldProcId = nonceKeysToProcIdsMap.putIfAbsent(nonceKey, newProcId);
651 if (oldProcId == null) return -1;
652 }
653
654
655
656 final boolean isTraceEnabled = LOG.isTraceEnabled();
657 while (isRunning() &&
658 !(procedures.containsKey(oldProcId) || completed.containsKey(oldProcId)) &&
659 nonceKeysToProcIdsMap.containsKey(nonceKey)) {
660 if (isTraceEnabled) {
661 LOG.trace("waiting for procId=" + oldProcId.longValue() + " to be submitted");
662 }
663 Threads.sleep(100);
664 }
665 return oldProcId.longValue();
666 }
667
668
669
670
671
672 public void unregisterNonceIfProcedureWasNotSubmitted(final NonceKey nonceKey) {
673 if (nonceKey == null) return;
674
675 final Long procId = nonceKeysToProcIdsMap.get(nonceKey);
676 if (procId == null) return;
677
678
679 if (!(procedures.containsKey(procId) || completed.containsKey(procId))) {
680 nonceKeysToProcIdsMap.remove(nonceKey);
681 }
682 }
683
684
685
686
687
688
689
690
691
692
693 public void setFailureResultForNonce(final NonceKey nonceKey, final String procName,
694 final User procOwner, final IOException exception) {
695 if (nonceKey == null) return;
696
697 final Long procId = nonceKeysToProcIdsMap.get(nonceKey);
698 if (procId == null || completed.containsKey(procId)) return;
699
700 final long currentTime = EnvironmentEdgeManager.currentTime();
701 final ProcedureInfo result = new FailedProcedureInfo(
702 procId.longValue(),
703 procName,
704 procOwner != null ? procOwner.getShortName() : null,
705 ProcedureState.ROLLEDBACK,
706 -1,
707 nonceKey,
708 ForeignExceptionUtil.toProtoForeignException("ProcedureExecutor", exception),
709 currentTime,
710 currentTime,
711 null);
712 completed.putIfAbsent(procId, result);
713 }
714
715 public static class FailedProcedureInfo extends ProcedureInfo {
716
717 public FailedProcedureInfo(long procId, String procName, String procOwner,
718 ProcedureState procState, long parentId, NonceKey nonceKey,
719 ErrorHandlingProtos.ForeignExceptionMessage exception, long lastUpdate, long startTime,
720 byte[] result) {
721 super(procId, procName, procOwner, procState, parentId, nonceKey, exception, lastUpdate,
722 startTime, result);
723 }
724 }
725
726
727
728
729
730
731
732
733
734 public long submitProcedure(final Procedure proc) {
735 return submitProcedure(proc, null);
736 }
737
738
739
740
741
742
743
744 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
745 justification = "FindBugs is blind to the check-for-null")
746 public long submitProcedure(final Procedure proc, final NonceKey nonceKey) {
747 Preconditions.checkArgument(proc.getState() == ProcedureState.INITIALIZING);
748 Preconditions.checkArgument(isRunning(), "executor not running");
749 Preconditions.checkArgument(lastProcId.get() >= 0);
750 Preconditions.checkArgument(!proc.hasParent(), "unexpected parent", proc);
751
752 final Long currentProcId;
753 if (nonceKey != null) {
754 currentProcId = nonceKeysToProcIdsMap.get(nonceKey);
755 Preconditions.checkArgument(currentProcId != null,
756 "expected nonceKey=" + nonceKey + " to be reserved, use registerNonce()");
757 } else {
758 currentProcId = nextProcId();
759 }
760
761
762 proc.setNonceKey(nonceKey);
763 proc.setProcId(currentProcId.longValue());
764
765
766 store.insert(proc, null);
767 if (LOG.isDebugEnabled()) {
768 LOG.debug("Procedure " + proc + " added to the store.");
769 }
770
771
772 RootProcedureState stack = new RootProcedureState();
773 rollbackStack.put(currentProcId, stack);
774
775
776 assert !procedures.containsKey(currentProcId);
777 procedures.put(currentProcId, proc);
778 sendProcedureAddedNotification(currentProcId);
779 runnables.addBack(proc);
780 return currentProcId;
781 }
782
783 public ProcedureInfo getResult(final long procId) {
784 return completed.get(procId);
785 }
786
787
788
789
790
791
792
793
794 public boolean isFinished(final long procId) {
795 return completed.containsKey(procId);
796 }
797
798
799
800
801
802
803 public boolean isStarted(final long procId) {
804 Procedure proc = procedures.get(procId);
805 if (proc == null) {
806 return completed.get(procId) != null;
807 }
808 return proc.wasExecuted();
809 }
810
811
812
813
814
815 public void removeResult(final long procId) {
816 ProcedureInfo result = completed.get(procId);
817 if (result == null) {
818 assert !procedures.containsKey(procId) : "procId=" + procId + " is still running";
819 if (LOG.isDebugEnabled()) {
820 LOG.debug("Procedure procId=" + procId + " already removed by the cleaner.");
821 }
822 return;
823 }
824
825
826 result.setClientAckTime(EnvironmentEdgeManager.currentTime());
827 }
828
829
830
831
832
833
834
835 public boolean abort(final long procId) {
836 return abort(procId, true);
837 }
838
839
840
841
842
843
844
845
846 public boolean abort(final long procId, final boolean mayInterruptIfRunning) {
847 Procedure proc = procedures.get(procId);
848 if (proc != null) {
849 if (!mayInterruptIfRunning && proc.wasExecuted()) {
850 return false;
851 } else {
852 return proc.abort(getEnvironment());
853 }
854 }
855 return false;
856 }
857
858
859
860
861
862
863
864
865 public boolean isProcedureOwner(final long procId, final User user) {
866 if (user == null) {
867 return false;
868 }
869
870 Procedure proc = procedures.get(procId);
871 if (proc != null) {
872 return proc.getOwner().equals(user.getShortName());
873 }
874 ProcedureInfo procInfo = completed.get(procId);
875 if (procInfo == null) {
876
877
878 return false;
879 }
880 return ProcedureInfo.isProcedureOwner(procInfo, user);
881 }
882
883 public Map<Long, ProcedureInfo> getResults() {
884 return Collections.unmodifiableMap(completed);
885 }
886
887 public Procedure getProcedure(final long procId) {
888 return procedures.get(procId);
889 }
890
891 protected ProcedureRunnableSet getRunnableSet() {
892 return runnables;
893 }
894
895
896
897
898
899
900 private void execLoop() {
901 while (isRunning()) {
902 Procedure proc = runnables.poll();
903 if (proc == null) continue;
904
905 try {
906 activeExecutorCount.incrementAndGet();
907 execLoop(proc);
908 } finally {
909 activeExecutorCount.decrementAndGet();
910 }
911 }
912 }
913
914 private void execLoop(Procedure proc) {
915 if (LOG.isTraceEnabled()) {
916 LOG.trace("Trying to start the execution of " + proc);
917 }
918
919 Long rootProcId = getRootProcedureId(proc);
920 if (rootProcId == null) {
921
922 executeRollback(proc);
923 return;
924 }
925
926 RootProcedureState procStack = rollbackStack.get(rootProcId);
927 if (procStack == null) return;
928
929 do {
930
931 if (!procStack.acquire(proc)) {
932 if (procStack.setRollback()) {
933
934 if (!executeRollback(rootProcId, procStack)) {
935 procStack.unsetRollback();
936 runnables.yield(proc);
937 }
938 } else {
939
940
941
942 if (!proc.wasExecuted()) {
943 if (!executeRollback(proc)) {
944 runnables.yield(proc);
945 }
946 }
947 }
948 break;
949 }
950
951
952 assert proc.getState() == ProcedureState.RUNNABLE;
953 if (proc.acquireLock(getEnvironment())) {
954 execProcedure(procStack, proc);
955 proc.releaseLock(getEnvironment());
956 } else {
957 runnables.yield(proc);
958 }
959 procStack.release(proc);
960
961
962
963 if (testing != null && !isRunning()) {
964 break;
965 }
966
967 if (proc.isSuccess()) {
968 if (LOG.isDebugEnabled()) {
969 LOG.debug("Procedure completed in " +
970 StringUtils.humanTimeDiff(proc.elapsedTime()) + ": " + proc);
971 }
972
973 if (proc.getProcId() == rootProcId) {
974 procedureFinished(proc);
975 }
976 break;
977 }
978 } while (procStack.isFailed());
979 }
980
981 private void timeoutLoop() {
982 while (isRunning()) {
983 Procedure proc = waitingTimeout.poll();
984 if (proc == null) continue;
985
986 if (proc.getTimeRemaining() > 100) {
987
988
989 waitingTimeout.add(proc);
990 continue;
991 }
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004 if (proc instanceof ProcedureInMemoryChore) {
1005 if (proc.isRunnable()) {
1006 try {
1007 ((ProcedureInMemoryChore)proc).periodicExecute(getEnvironment());
1008 } catch (Throwable e) {
1009 LOG.error("Ignoring CompletedProcedureCleaner exception: " + e.getMessage(), e);
1010 }
1011 proc.setStartTime(EnvironmentEdgeManager.currentTime());
1012 if (proc.isRunnable()) waitingTimeout.add(proc);
1013 }
1014 continue;
1015 }
1016
1017
1018
1019 if (proc.setTimeoutFailure()) {
1020 long rootProcId = Procedure.getRootProcedureId(procedures, proc);
1021 RootProcedureState procStack = rollbackStack.get(rootProcId);
1022 procStack.abort();
1023 store.update(proc);
1024 runnables.addFront(proc);
1025 continue;
1026 }
1027 }
1028 }
1029
1030
1031
1032
1033
1034
1035 private boolean executeRollback(final long rootProcId, final RootProcedureState procStack) {
1036 Procedure rootProc = procedures.get(rootProcId);
1037 RemoteProcedureException exception = rootProc.getException();
1038 if (exception == null) {
1039 exception = procStack.getException();
1040 rootProc.setFailure(exception);
1041 store.update(rootProc);
1042 }
1043
1044 List<Procedure> subprocStack = procStack.getSubproceduresStack();
1045 assert subprocStack != null : "Called rollback with no steps executed rootProc=" + rootProc;
1046
1047 int stackTail = subprocStack.size();
1048 boolean reuseLock = false;
1049 while (stackTail --> 0) {
1050 final Procedure proc = subprocStack.get(stackTail);
1051
1052 if (!reuseLock && !proc.acquireLock(getEnvironment())) {
1053
1054
1055 return false;
1056 }
1057
1058 boolean abortRollback = !executeRollback(proc);
1059 abortRollback |= !isRunning() || !store.isRunning();
1060
1061
1062
1063
1064 reuseLock = stackTail > 0 && (subprocStack.get(stackTail - 1) == proc) && !abortRollback;
1065 if (!reuseLock) {
1066 proc.releaseLock(getEnvironment());
1067 }
1068
1069
1070
1071 if (abortRollback) {
1072 return false;
1073 }
1074
1075 subprocStack.remove(stackTail);
1076
1077
1078 if (proc.isYieldAfterExecutionStep(getEnvironment())) {
1079 return false;
1080 }
1081 }
1082
1083
1084 LOG.info("Rolledback procedure " + rootProc +
1085 " exec-time=" + StringUtils.humanTimeDiff(rootProc.elapsedTime()) +
1086 " exception=" + exception.getMessage());
1087 procedureFinished(rootProc);
1088 return true;
1089 }
1090
1091
1092
1093
1094
1095
1096 private boolean executeRollback(final Procedure proc) {
1097 try {
1098 proc.doRollback(getEnvironment());
1099 } catch (IOException e) {
1100 if (LOG.isDebugEnabled()) {
1101 LOG.debug("rollback attempt failed for " + proc, e);
1102 }
1103 return false;
1104 } catch (InterruptedException e) {
1105 handleInterruptedException(proc, e);
1106 return false;
1107 } catch (Throwable e) {
1108
1109 LOG.fatal("CODE-BUG: Uncatched runtime exception for procedure: " + proc, e);
1110 }
1111
1112
1113
1114 if (testing != null && testing.shouldKillBeforeStoreUpdate()) {
1115 LOG.debug("TESTING: Kill before store update");
1116 stop();
1117 return false;
1118 }
1119
1120 if (proc.removeStackIndex()) {
1121 proc.setState(ProcedureState.ROLLEDBACK);
1122 if (proc.hasParent()) {
1123 store.delete(proc.getProcId());
1124 procedures.remove(proc.getProcId());
1125 } else {
1126 final long[] childProcIds = rollbackStack.get(proc.getProcId()).getSubprocedureIds();
1127 if (childProcIds != null) {
1128 store.delete(proc, childProcIds);
1129 } else {
1130 store.update(proc);
1131 }
1132 }
1133 } else {
1134 store.update(proc);
1135 }
1136
1137 return true;
1138 }
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157 private void execProcedure(final RootProcedureState procStack, final Procedure procedure) {
1158 Preconditions.checkArgument(procedure.getState() == ProcedureState.RUNNABLE);
1159
1160
1161 boolean reExecute = false;
1162 Procedure[] subprocs = null;
1163 do {
1164 reExecute = false;
1165 try {
1166 subprocs = procedure.doExecute(getEnvironment());
1167 if (subprocs != null && subprocs.length == 0) {
1168 subprocs = null;
1169 }
1170 } catch (ProcedureYieldException e) {
1171 if (LOG.isTraceEnabled()) {
1172 LOG.trace("Yield procedure: " + procedure + ": " + e.getMessage());
1173 }
1174 runnables.yield(procedure);
1175 return;
1176 } catch (InterruptedException e) {
1177 handleInterruptedException(procedure, e);
1178 runnables.yield(procedure);
1179 return;
1180 } catch (Throwable e) {
1181
1182 String msg = "CODE-BUG: Uncatched runtime exception for procedure: " + procedure;
1183 LOG.error(msg, e);
1184 procedure.setFailure(new RemoteProcedureException(msg, e));
1185 }
1186
1187 if (!procedure.isFailed()) {
1188 if (subprocs != null) {
1189 if (subprocs.length == 1 && subprocs[0] == procedure) {
1190
1191 subprocs = null;
1192 reExecute = true;
1193 } else {
1194
1195 for (int i = 0; i < subprocs.length; ++i) {
1196 Procedure subproc = subprocs[i];
1197 if (subproc == null) {
1198 String msg = "subproc[" + i + "] is null, aborting the procedure";
1199 procedure.setFailure(new RemoteProcedureException(msg,
1200 new IllegalArgumentIOException(msg)));
1201 subprocs = null;
1202 break;
1203 }
1204
1205 assert subproc.getState() == ProcedureState.INITIALIZING;
1206 subproc.setParentProcId(procedure.getProcId());
1207 subproc.setProcId(nextProcId());
1208 procStack.addSubProcedure(subproc);
1209 }
1210
1211 if (!procedure.isFailed()) {
1212 procedure.setChildrenLatch(subprocs.length);
1213 switch (procedure.getState()) {
1214 case RUNNABLE:
1215 procedure.setState(ProcedureState.WAITING);
1216 break;
1217 case WAITING_TIMEOUT:
1218 waitingTimeout.add(procedure);
1219 break;
1220 default:
1221 break;
1222 }
1223 }
1224 }
1225 } else if (procedure.getState() == ProcedureState.WAITING_TIMEOUT) {
1226 waitingTimeout.add(procedure);
1227 } else {
1228
1229 procedure.setState(ProcedureState.FINISHED);
1230 }
1231 }
1232
1233
1234 procStack.addRollbackStep(procedure);
1235
1236
1237
1238 if (testing != null && testing.shouldKillBeforeStoreUpdate()) {
1239 LOG.debug("TESTING: Kill before store update");
1240 stop();
1241 return;
1242 }
1243
1244
1245 updateStoreOnExec(procStack, procedure, subprocs);
1246
1247
1248 if (!store.isRunning()) {
1249 return;
1250 }
1251
1252
1253 if (procedure.getState() == ProcedureState.RUNNABLE &&
1254 procedure.isYieldAfterExecutionStep(getEnvironment())) {
1255 runnables.yield(procedure);
1256 return;
1257 }
1258
1259 assert (reExecute && subprocs == null) || !reExecute;
1260 } while (reExecute);
1261
1262
1263 if (subprocs != null && !procedure.isFailed()) {
1264 for (int i = 0; i < subprocs.length; ++i) {
1265 Procedure subproc = subprocs[i];
1266 assert !procedures.containsKey(subproc.getProcId());
1267 procedures.put(subproc.getProcId(), subproc);
1268 runnables.addFront(subproc);
1269 }
1270 }
1271
1272 if (procedure.isFinished() && procedure.hasParent()) {
1273 Procedure parent = procedures.get(procedure.getParentProcId());
1274 if (parent == null) {
1275 assert procStack.isRollingback();
1276 return;
1277 }
1278
1279
1280 if (LOG.isTraceEnabled()) {
1281 LOG.trace(parent + " child is done: " + procedure);
1282 }
1283 if (parent.childrenCountDown() && parent.getState() == ProcedureState.WAITING) {
1284 parent.setState(ProcedureState.RUNNABLE);
1285 store.update(parent);
1286 runnables.addFront(parent);
1287 if (LOG.isTraceEnabled()) {
1288 LOG.trace(parent + " all the children finished their work, resume.");
1289 }
1290 return;
1291 }
1292 }
1293 }
1294
1295 private void updateStoreOnExec(final RootProcedureState procStack,
1296 final Procedure procedure, final Procedure[] subprocs) {
1297 if (subprocs != null && !procedure.isFailed()) {
1298 if (LOG.isTraceEnabled()) {
1299 LOG.trace("Store add " + procedure + " children " + Arrays.toString(subprocs));
1300 }
1301 store.insert(procedure, subprocs);
1302 } else {
1303 if (LOG.isTraceEnabled()) {
1304 LOG.trace("Store update " + procedure);
1305 }
1306 if (procedure.isFinished() && !procedure.hasParent()) {
1307
1308 final long[] childProcIds = procStack.getSubprocedureIds();
1309 if (childProcIds != null) {
1310 store.delete(procedure, childProcIds);
1311 for (int i = 0; i < childProcIds.length; ++i) {
1312 procedures.remove(childProcIds[i]);
1313 }
1314 } else {
1315 store.update(procedure);
1316 }
1317 } else {
1318 store.update(procedure);
1319 }
1320 }
1321 }
1322
1323 private void handleInterruptedException(final Procedure proc, final InterruptedException e) {
1324 if (LOG.isTraceEnabled()) {
1325 LOG.trace("got an interrupt during " + proc + ". suspend and retry it later.", e);
1326 }
1327
1328
1329
1330
1331
1332
1333 }
1334
1335 private void sendProcedureLoadedNotification(final long procId) {
1336 if (!this.listeners.isEmpty()) {
1337 for (ProcedureExecutorListener listener: this.listeners) {
1338 try {
1339 listener.procedureLoaded(procId);
1340 } catch (Throwable e) {
1341 LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e);
1342 }
1343 }
1344 }
1345 }
1346
1347 private void sendProcedureAddedNotification(final long procId) {
1348 if (!this.listeners.isEmpty()) {
1349 for (ProcedureExecutorListener listener: this.listeners) {
1350 try {
1351 listener.procedureAdded(procId);
1352 } catch (Throwable e) {
1353 LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e);
1354 }
1355 }
1356 }
1357 }
1358
1359 private void sendProcedureFinishedNotification(final long procId) {
1360 if (!this.listeners.isEmpty()) {
1361 for (ProcedureExecutorListener listener: this.listeners) {
1362 try {
1363 listener.procedureFinished(procId);
1364 } catch (Throwable e) {
1365 LOG.error("The listener " + listener + " had an error: " + e.getMessage(), e);
1366 }
1367 }
1368 }
1369 }
1370
1371 private long nextProcId() {
1372 long procId = lastProcId.incrementAndGet();
1373 if (procId < 0) {
1374 while (!lastProcId.compareAndSet(procId, 0)) {
1375 procId = lastProcId.get();
1376 if (procId >= 0)
1377 break;
1378 }
1379 while (procedures.containsKey(procId)) {
1380 procId = lastProcId.incrementAndGet();
1381 }
1382 }
1383 return procId;
1384 }
1385
1386 protected long getLastProcId() {
1387 return lastProcId.get();
1388 }
1389
1390 private Long getRootProcedureId(Procedure proc) {
1391 return Procedure.getRootProcedureId(procedures, proc);
1392 }
1393
1394 private void procedureFinished(final Procedure proc) {
1395
1396 try {
1397 proc.completionCleanup(getEnvironment());
1398 } catch (Throwable e) {
1399
1400 LOG.error("CODE-BUG: uncatched runtime exception for procedure: " + proc, e);
1401 }
1402
1403
1404 ProcedureInfo procInfo = Procedure.createProcedureInfo(proc, proc.getNonceKey());
1405 if (!proc.shouldWaitClientAck(getEnvironment())) {
1406 procInfo.setClientAckTime(0);
1407 }
1408
1409 completed.put(procInfo.getProcId(), procInfo);
1410 rollbackStack.remove(proc.getProcId());
1411 procedures.remove(proc.getProcId());
1412
1413
1414 try {
1415 runnables.completionCleanup(proc);
1416 } catch (Throwable e) {
1417
1418 LOG.error("CODE-BUG: uncatched runtime exception for runnableSet: " + runnables, e);
1419 }
1420
1421
1422 sendProcedureFinishedNotification(proc.getProcId());
1423 }
1424
1425 public Pair<ProcedureInfo, Procedure> getResultOrProcedure(final long procId) {
1426 ProcedureInfo result = completed.get(procId);
1427 Procedure proc = null;
1428 if (result == null) {
1429 proc = procedures.get(procId);
1430 if (proc == null) {
1431 result = completed.get(procId);
1432 }
1433 }
1434 return new Pair(result, proc);
1435 }
1436 }