1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.regionserver;
20
21 import com.google.common.base.Preconditions;
22 import com.google.common.collect.Maps;
23 import com.google.protobuf.BlockingRpcChannel;
24 import com.google.protobuf.Descriptors;
25 import com.google.protobuf.Message;
26 import com.google.protobuf.RpcCallback;
27 import com.google.protobuf.RpcController;
28 import com.google.protobuf.Service;
29 import com.google.protobuf.ServiceException;
30
31 import java.io.IOException;
32 import java.io.InterruptedIOException;
33 import java.lang.Thread.UncaughtExceptionHandler;
34 import java.lang.management.MemoryUsage;
35 import java.lang.reflect.Constructor;
36 import java.net.BindException;
37 import java.net.InetAddress;
38 import java.net.InetSocketAddress;
39 import java.net.UnknownHostException;
40 import java.security.PrivilegedExceptionAction;
41 import java.text.MessageFormat;
42 import java.util.ArrayList;
43 import java.util.Collection;
44 import java.util.Collections;
45 import java.util.Comparator;
46 import java.util.HashMap;
47 import java.util.HashSet;
48 import java.util.Iterator;
49 import java.util.List;
50 import java.util.Map;
51 import java.util.Map.Entry;
52 import java.util.Set;
53 import java.util.SortedMap;
54 import java.util.Timer;
55 import java.util.TimerTask;
56 import java.util.TreeMap;
57 import java.util.TreeSet;
58 import java.util.concurrent.ConcurrentHashMap;
59 import java.util.concurrent.ConcurrentMap;
60 import java.util.concurrent.ConcurrentSkipListMap;
61 import java.util.concurrent.CountDownLatch;
62 import java.util.concurrent.TimeUnit;
63 import java.util.concurrent.atomic.AtomicBoolean;
64 import java.util.concurrent.atomic.AtomicReference;
65 import java.util.concurrent.locks.ReentrantReadWriteLock;
66
67 import javax.management.MalformedObjectNameException;
68 import javax.management.ObjectName;
69 import javax.servlet.http.HttpServlet;
70
71 import org.apache.commons.lang.SystemUtils;
72 import org.apache.commons.lang.math.RandomUtils;
73 import org.apache.commons.logging.Log;
74 import org.apache.commons.logging.LogFactory;
75 import org.apache.hadoop.conf.Configuration;
76 import org.apache.hadoop.fs.FileSystem;
77 import org.apache.hadoop.fs.Path;
78 import org.apache.hadoop.hbase.ChoreService;
79 import org.apache.hadoop.hbase.ClockOutOfSyncException;
80 import org.apache.hadoop.hbase.CoordinatedStateManager;
81 import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
82 import org.apache.hadoop.hbase.ExecutorStatusChore;
83 import org.apache.hadoop.hbase.HBaseConfiguration;
84 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
85 import org.apache.hadoop.hbase.HConstants;
86 import org.apache.hadoop.hbase.HRegionInfo;
87 import org.apache.hadoop.hbase.HealthCheckChore;
88 import org.apache.hadoop.hbase.MetaTableAccessor;
89 import org.apache.hadoop.hbase.NotServingRegionException;
90 import org.apache.hadoop.hbase.RemoteExceptionHandler;
91 import org.apache.hadoop.hbase.ScheduledChore;
92 import org.apache.hadoop.hbase.ServerName;
93 import org.apache.hadoop.hbase.Stoppable;
94 import org.apache.hadoop.hbase.TableDescriptors;
95 import org.apache.hadoop.hbase.TableName;
96 import org.apache.hadoop.hbase.YouAreDeadException;
97 import org.apache.hadoop.hbase.ZNodeClearer;
98 import org.apache.hadoop.hbase.classification.InterfaceAudience;
99 import org.apache.hadoop.hbase.client.ClusterConnection;
100 import org.apache.hadoop.hbase.client.ConnectionUtils;
101 import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory;
102 import org.apache.hadoop.hbase.conf.ConfigurationManager;
103 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
104 import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
105 import org.apache.hadoop.hbase.coordination.CloseRegionCoordination;
106 import org.apache.hadoop.hbase.coordination.SplitLogWorkerCoordination;
107 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
108 import org.apache.hadoop.hbase.exceptions.RegionMovedException;
109 import org.apache.hadoop.hbase.exceptions.RegionOpeningException;
110 import org.apache.hadoop.hbase.exceptions.UnknownProtocolException;
111 import org.apache.hadoop.hbase.executor.ExecutorService;
112 import org.apache.hadoop.hbase.executor.ExecutorType;
113 import org.apache.hadoop.hbase.fs.HFileSystem;
114 import org.apache.hadoop.hbase.http.InfoServer;
115 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
116 import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
117 import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
118 import org.apache.hadoop.hbase.ipc.RpcClient;
119 import org.apache.hadoop.hbase.ipc.RpcClientFactory;
120 import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
121 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
122 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
123 import org.apache.hadoop.hbase.ipc.ServerRpcController;
124 import org.apache.hadoop.hbase.master.HMaster;
125 import org.apache.hadoop.hbase.master.RegionState.State;
126 import org.apache.hadoop.hbase.master.TableLockManager;
127 import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
128 import org.apache.hadoop.hbase.namequeues.NamedQueueRecorder;
129 import org.apache.hadoop.hbase.namequeues.SlowLogTableOpsChore;
130 import org.apache.hadoop.hbase.net.Address;
131 import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost;
132 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
133 import org.apache.hadoop.hbase.protobuf.RequestConverter;
134 import org.apache.hadoop.hbase.protobuf.ResponseConverter;
135 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
136 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
137 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceRequest;
138 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceResponse;
139 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos;
140 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
141 import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds;
142 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.Coprocessor;
143 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.Coprocessor.Builder;
144 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameStringPair;
145 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
146 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier;
147 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
148 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest;
149 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdResponse;
150 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
151 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
152 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
153 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionServerStatusService;
154 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
155 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
156 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
157 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
158 import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
159 import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager;
160 import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration;
161 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
162 import org.apache.hadoop.hbase.regionserver.handler.CloseMetaHandler;
163 import org.apache.hadoop.hbase.regionserver.handler.CloseRegionHandler;
164 import org.apache.hadoop.hbase.regionserver.handler.RegionReplicaFlushHandler;
165 import org.apache.hadoop.hbase.regionserver.throttle.FlushThroughputControllerFactory;
166 import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
167 import org.apache.hadoop.hbase.regionserver.wal.MetricsWAL;
168 import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
169 import org.apache.hadoop.hbase.replication.regionserver.Replication;
170 import org.apache.hadoop.hbase.replication.regionserver.ReplicationLoad;
171 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
172 import org.apache.hadoop.hbase.replication.regionserver.ReplicationStatus;
173 import org.apache.hadoop.hbase.security.Superusers;
174 import org.apache.hadoop.hbase.security.User;
175 import org.apache.hadoop.hbase.security.UserProvider;
176 import org.apache.hadoop.hbase.trace.SpanReceiverHost;
177 import org.apache.hadoop.hbase.util.Addressing;
178 import org.apache.hadoop.hbase.util.ByteStringer;
179 import org.apache.hadoop.hbase.util.Bytes;
180 import org.apache.hadoop.hbase.util.CompressionTest;
181 import org.apache.hadoop.hbase.util.ConfigUtil;
182 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
183 import org.apache.hadoop.hbase.util.FSTableDescriptors;
184 import org.apache.hadoop.hbase.util.FSUtils;
185 import org.apache.hadoop.hbase.util.HasThread;
186 import org.apache.hadoop.hbase.util.JSONBean;
187 import org.apache.hadoop.hbase.util.JvmPauseMonitor;
188 import org.apache.hadoop.hbase.util.MBeanUtil;
189 import org.apache.hadoop.hbase.util.RetryCounter;
190 import org.apache.hadoop.hbase.util.RetryCounterFactory;
191 import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
192 import org.apache.hadoop.hbase.util.Sleeper;
193 import org.apache.hadoop.hbase.util.Threads;
194 import org.apache.hadoop.hbase.util.VersionInfo;
195 import org.apache.hadoop.hbase.wal.DefaultWALProvider;
196 import org.apache.hadoop.hbase.wal.WAL;
197 import org.apache.hadoop.hbase.wal.WALFactory;
198 import org.apache.hadoop.hbase.zookeeper.ClusterStatusTracker;
199 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
200 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
201 import org.apache.hadoop.hbase.zookeeper.RecoveringRegionWatcher;
202 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
203 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
204 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
205 import org.apache.hadoop.hbase.zookeeper.ZooKeeperNodeTracker;
206 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
207 import org.apache.hadoop.ipc.RemoteException;
208 import org.apache.hadoop.util.ReflectionUtils;
209 import org.apache.hadoop.util.StringUtils;
210 import org.apache.zookeeper.KeeperException;
211 import org.apache.zookeeper.KeeperException.NoNodeException;
212 import org.apache.zookeeper.data.Stat;
213
214 import sun.misc.Signal;
215 import sun.misc.SignalHandler;
216
217
218
219
220
221 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
222 @SuppressWarnings("deprecation")
223 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="JLM_JSR166_UTILCONCURRENT_MONITORENTER",
224 justification="Use of an atomic type both as monitor and condition variable is intended")
225 public class HRegionServer extends HasThread implements
226 RegionServerServices, LastSequenceId, ConfigurationObserver {
227
228 private static final Log LOG = LogFactory.getLog(HRegionServer.class);
229
230
231
232
233
234 protected static final String OPEN = "OPEN";
235 protected static final String CLOSE = "CLOSE";
236
237
238
239
240 protected final ConcurrentMap<byte[], Boolean> regionsInTransitionInRS =
241 new ConcurrentSkipListMap<byte[], Boolean>(Bytes.BYTES_COMPARATOR);
242
243
244 protected MemStoreFlusher cacheFlusher;
245
246 protected HeapMemoryManager hMemManager;
247 protected CountDownLatch initLatch = null;
248
249
250
251
252
253
254 protected ClusterConnection clusterConnection;
255
256
257
258
259
260
261
262 protected MetaTableLocator metaTableLocator;
263
264
265 @SuppressWarnings("unused")
266 private RecoveringRegionWatcher recoveringRegionWatcher;
267
268
269
270
271 protected TableDescriptors tableDescriptors;
272
273
274 protected ReplicationSourceService replicationSourceHandler;
275 protected ReplicationSinkService replicationSinkHandler;
276
277
278 public CompactSplitThread compactSplitThread;
279
280
281
282
283
284 protected final Map<String, Region> onlineRegions = new ConcurrentHashMap<String, Region>();
285
286
287
288
289
290
291
292
293
294
295
296
297 protected final Map<String, Address[]> regionFavoredNodesMap =
298 new ConcurrentHashMap<String, Address[]>();
299
300
301
302
303
304 protected final Map<String, Region> recoveringRegions = Collections
305 .synchronizedMap(new HashMap<String, Region>());
306
307
308 protected Leases leases;
309
310
311 protected ExecutorService service;
312
313
314 protected volatile boolean fsOk;
315 protected HFileSystem fs;
316 protected HFileSystem walFs;
317
318
319
320
321 private volatile boolean stopped = false;
322
323
324 private boolean isShutdownHookInstalled = false;
325
326
327
328 private AtomicBoolean abortRequested;
329 public static final String ABORT_TIMEOUT = "hbase.regionserver.abort.timeout";
330
331 private static final long DEFAULT_ABORT_TIMEOUT = 1200000;
332
333 public static final String ABORT_TIMEOUT_TASK = "hbase.regionserver.abort.timeout.task";
334
335 ConcurrentMap<String, Integer> rowlocks = new ConcurrentHashMap<String, Integer>();
336
337
338
339 private boolean stopping = false;
340
341 volatile boolean killed = false;
342
343 protected final Configuration conf;
344
345 private Path rootDir;
346 private Path walRootDir;
347
348 protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
349
350 final int numRetries;
351 protected final int threadWakeFrequency;
352 protected final int msgInterval;
353
354 private static final String PERIOD_COMPACTION = "hbase.regionserver.compaction.check.period";
355 private final int compactionCheckFrequency;
356 private static final String PERIOD_FLUSH = "hbase.regionserver.flush.check.period";
357 private final int flushCheckFrequency;
358
359 protected final int numRegionsToReport;
360
361
362 private volatile RegionServerStatusService.BlockingInterface rssStub;
363
364 RpcClient rpcClient;
365
366 private RpcRetryingCallerFactory rpcRetryingCallerFactory;
367 private RpcControllerFactory rpcControllerFactory;
368
369 private UncaughtExceptionHandler uncaughtExceptionHandler;
370
371
372
373
374 protected InfoServer infoServer;
375 private JvmPauseMonitor pauseMonitor;
376
377
378 public static final String REGIONSERVER = "regionserver";
379
380 MetricsRegionServer metricsRegionServer;
381 MetricsRegionServerWrapperImpl metricsRegionServerImpl;
382 MetricsTable metricsTable;
383 private SpanReceiverHost spanReceiverHost;
384
385
386
387
388 private final ChoreService choreService;
389
390
391
392
393 ScheduledChore compactionChecker;
394
395
396
397
398 ScheduledChore periodicFlusher;
399
400 protected volatile WALFactory walFactory;
401
402
403
404 final LogRoller walRoller;
405
406 final AtomicReference<LogRoller> metawalRoller = new AtomicReference<LogRoller>();
407
408
409 final AtomicBoolean online = new AtomicBoolean(false);
410
411
412 protected ZooKeeperWatcher zooKeeper;
413
414
415 private MasterAddressTracker masterAddressTracker;
416
417
418 protected ClusterStatusTracker clusterStatusTracker;
419
420
421 private SplitLogWorker splitLogWorker;
422
423
424 protected final Sleeper sleeper;
425
426 private final int operationTimeout;
427 private final int shortOperationTimeout;
428
429 private SlowLogTableOpsChore slowLogTableOpsChore = null;
430
431 private final RegionServerAccounting regionServerAccounting;
432
433
434 protected CacheConfig cacheConfig;
435
436
437 private HealthCheckChore healthCheckChore;
438
439
440 private ExecutorStatusChore executorStatusChore;
441
442
443 private ScheduledChore nonceManagerChore;
444
445 private Map<String, Service> coprocessorServiceHandlers = Maps.newHashMap();
446
447
448
449
450
451
452 protected ServerName serverName;
453
454
455
456
457 protected String useThisHostnameInstead;
458
459
460
461
462 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
463 final static String RS_HOSTNAME_KEY = "hbase.regionserver.hostname";
464 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
465 protected final static String MASTER_HOSTNAME_KEY = "hbase.master.hostname";
466
467
468
469 final static String RS_HOSTNAME_DISABLE_MASTER_REVERSEDNS_KEY =
470 "hbase.regionserver.hostname.disable.master.reversedns";
471
472
473
474
475 protected final long startcode;
476
477
478
479
480 String clusterId;
481
482
483
484
485 private ObjectName mxBean = null;
486
487
488
489
490 private MovedRegionsCleaner movedRegionsCleaner;
491
492
493 private StorefileRefresherChore storefileRefresher;
494
495 private RegionServerCoprocessorHost rsHost;
496
497 private RegionServerProcedureManagerHost rspmHost;
498
499 private RegionServerQuotaManager rsQuotaManager;
500
501
502 protected TableLockManager tableLockManager;
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522 final ServerNonceManager nonceManager;
523
524 private UserProvider userProvider;
525
526 protected final RSRpcServices rpcServices;
527
528 protected BaseCoordinatedStateManager csm;
529
530 private final boolean useZKForAssignment;
531
532
533
534
535
536 protected final ConfigurationManager configurationManager;
537
538 private CompactedHFilesDischarger compactedFileDischarger;
539
540 private volatile ThroughputController flushThroughputController;
541
542
543
544
545 private NamedQueueRecorder namedQueueRecorder = null;
546
547
548
549
550 public HRegionServer(Configuration conf) throws IOException, InterruptedException {
551 this(conf, CoordinatedStateManagerFactory.getCoordinatedStateManager(conf));
552 }
553
554
555
556
557
558 public HRegionServer(Configuration conf, CoordinatedStateManager csm)
559 throws IOException, InterruptedException {
560 super("RegionServer");
561 this.startcode = System.currentTimeMillis();
562 this.fsOk = true;
563 this.conf = conf;
564 checkCodecs(this.conf);
565 this.userProvider = UserProvider.instantiate(conf);
566 FSUtils.setupShortCircuitRead(this.conf);
567
568 Replication.decorateRegionServerConfiguration(this.conf);
569
570
571 this.conf.setBoolean(HConstants.USE_META_REPLICAS, false);
572
573
574 this.numRetries = this.conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
575 HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
576 this.threadWakeFrequency = conf.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
577 this.compactionCheckFrequency = conf.getInt(PERIOD_COMPACTION, this.threadWakeFrequency);
578 this.flushCheckFrequency = conf.getInt(PERIOD_FLUSH, this.threadWakeFrequency);
579 this.msgInterval = conf.getInt("hbase.regionserver.msginterval", 3 * 1000);
580
581 this.sleeper = new Sleeper(this.msgInterval, this);
582
583 boolean isNoncesEnabled = conf.getBoolean(HConstants.HBASE_RS_NONCES_ENABLED, true);
584 this.nonceManager = isNoncesEnabled ? new ServerNonceManager(this.conf) : null;
585
586 this.numRegionsToReport = conf.getInt(
587 "hbase.regionserver.numregionstoreport", 10);
588
589 this.operationTimeout = conf.getInt(
590 HConstants.HBASE_CLIENT_OPERATION_TIMEOUT,
591 HConstants.DEFAULT_HBASE_CLIENT_OPERATION_TIMEOUT);
592
593 this.shortOperationTimeout = conf.getInt(
594 HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
595 HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
596
597 this.abortRequested = new AtomicBoolean(false);
598 this.stopped = false;
599
600 initNamedQueueRecorder(conf);
601
602 rpcServices = createRpcServices();
603 if (this instanceof HMaster) {
604 useThisHostnameInstead = conf.get(MASTER_HOSTNAME_KEY);
605 } else {
606 useThisHostnameInstead = conf.get(RS_HOSTNAME_KEY);
607 if (conf.getBoolean(RS_HOSTNAME_DISABLE_MASTER_REVERSEDNS_KEY, false)) {
608 if (shouldUseThisHostnameInstead()) {
609 String msg = RS_HOSTNAME_DISABLE_MASTER_REVERSEDNS_KEY + " and " + RS_HOSTNAME_KEY +
610 " are mutually exclusive. Do not set " + RS_HOSTNAME_DISABLE_MASTER_REVERSEDNS_KEY +
611 " to true while " + RS_HOSTNAME_KEY + " is used";
612 throw new IOException(msg);
613 } else {
614 useThisHostnameInstead = rpcServices.isa.getHostName();
615 }
616 }
617 }
618 String hostName = shouldUseThisHostnameInstead() ? useThisHostnameInstead :
619 rpcServices.isa.getHostName();
620 serverName = ServerName.valueOf(hostName, rpcServices.isa.getPort(), startcode);
621
622 rpcControllerFactory = RpcControllerFactory.instantiate(this.conf);
623 rpcRetryingCallerFactory = RpcRetryingCallerFactory.instantiate(this.conf);
624
625
626 ZKUtil.loginClient(this.conf, HConstants.ZK_CLIENT_KEYTAB_FILE,
627 HConstants.ZK_CLIENT_KERBEROS_PRINCIPAL, hostName);
628
629 login(userProvider, hostName);
630
631
632 Superusers.initialize(conf);
633
634 regionServerAccounting = new RegionServerAccounting();
635 uncaughtExceptionHandler = new UncaughtExceptionHandler() {
636 @Override
637 public void uncaughtException(Thread t, Throwable e) {
638 abort("Uncaught exception in service thread " + t.getName(), e);
639 }
640 };
641
642 useZKForAssignment = ConfigUtil.useZKForAssignment(conf);
643
644 initializeFileSystem();
645
646 service = new ExecutorService(getServerName().toShortString());
647 spanReceiverHost = SpanReceiverHost.getInstance(getConfiguration());
648
649
650 if (!conf.getBoolean("hbase.testing.nocluster", false)) {
651
652 zooKeeper = new ZooKeeperWatcher(conf, getProcessName() + ":" +
653 rpcServices.isa.getPort(), this, canCreateBaseZNode());
654
655 this.csm = (BaseCoordinatedStateManager) csm;
656 this.csm.initialize(this);
657 this.csm.start();
658
659 tableLockManager = TableLockManager.createTableLockManager(
660 conf, zooKeeper, serverName);
661
662 masterAddressTracker = new MasterAddressTracker(getZooKeeper(), this);
663 masterAddressTracker.start();
664
665 clusterStatusTracker = new ClusterStatusTracker(zooKeeper, this);
666 clusterStatusTracker.start();
667 }
668 this.configurationManager = new ConfigurationManager();
669
670 rpcServices.start(zooKeeper);
671 putUpWebUI();
672 this.walRoller = new LogRoller(this, this);
673 this.choreService = new ChoreService(getServerName().toString(), true);
674 this.flushThroughputController = FlushThroughputControllerFactory.create(this, conf);
675
676 if (!SystemUtils.IS_OS_WINDOWS) {
677 Signal.handle(new Signal("HUP"), new SignalHandler() {
678 @Override
679 public void handle(Signal signal) {
680 getConfiguration().reloadConfiguration();
681 configurationManager.notifyAllObservers(getConfiguration());
682 }
683 });
684 }
685
686
687
688
689
690 int cleanerInterval = conf
691 .getInt(CompactionConfiguration.HBASE_HFILE_COMPACTION_DISCHARGER_INTERVAL, 2 * 60 * 1000);
692 this.compactedFileDischarger =
693 new CompactedHFilesDischarger(cleanerInterval, (Stoppable)this, (RegionServerServices)this);
694 choreService.scheduleChore(compactedFileDischarger);
695 }
696
697 private void initializeFileSystem() throws IOException {
698
699
700 boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
701 FSUtils.setFsDefault(this.conf, FSUtils.getWALRootDir(this.conf));
702 this.walFs = new HFileSystem(this.conf, useHBaseChecksum);
703 this.walRootDir = FSUtils.getWALRootDir(this.conf);
704
705
706
707 FSUtils.setFsDefault(this.conf, FSUtils.getRootDir(this.conf));
708 this.fs = new HFileSystem(this.conf, useHBaseChecksum);
709 this.rootDir = FSUtils.getRootDir(this.conf);
710 this.tableDescriptors = new FSTableDescriptors(
711 this.conf, this.fs, this.rootDir, !canUpdateTableDescriptor(), false);
712 }
713
714 protected void setInitLatch(CountDownLatch latch) {
715 this.initLatch = latch;
716 }
717
718 private void initNamedQueueRecorder(Configuration conf) {
719 if (!(this instanceof HMaster)) {
720 final boolean isOnlineLogProviderEnabled = conf.getBoolean(
721 HConstants.SLOW_LOG_BUFFER_ENABLED_KEY,
722 HConstants.DEFAULT_ONLINE_LOG_PROVIDER_ENABLED);
723 if (isOnlineLogProviderEnabled) {
724 this.namedQueueRecorder = NamedQueueRecorder.getInstance(this.conf);
725 }
726 } else {
727 final boolean isBalancerDecisionRecording = conf
728 .getBoolean(BaseLoadBalancer.BALANCER_DECISION_BUFFER_ENABLED,
729 BaseLoadBalancer.DEFAULT_BALANCER_DECISION_BUFFER_ENABLED);
730 if (isBalancerDecisionRecording) {
731 this.namedQueueRecorder = NamedQueueRecorder.getInstance(this.conf);
732 }
733 }
734 }
735
736
737
738
739 protected boolean shouldUseThisHostnameInstead() {
740 return useThisHostnameInstead != null && !useThisHostnameInstead.isEmpty();
741 }
742
743 protected void login(UserProvider user, String host) throws IOException {
744 user.login("hbase.regionserver.keytab.file",
745 "hbase.regionserver.kerberos.principal", host);
746 }
747
748 protected void waitForMasterActive(){
749 }
750
751 protected String getProcessName() {
752 return REGIONSERVER;
753 }
754
755 protected boolean canCreateBaseZNode() {
756 return false;
757 }
758
759 protected boolean canUpdateTableDescriptor() {
760 return false;
761 }
762
763 protected RSRpcServices createRpcServices() throws IOException {
764 return new RSRpcServices(this);
765 }
766
767 protected void configureInfoServer() {
768 infoServer.addServlet("rs-status", "/rs-status", RSStatusServlet.class);
769 infoServer.setAttribute(REGIONSERVER, this);
770 }
771
772 protected Class<? extends HttpServlet> getDumpServlet() {
773 return RSDumpServlet.class;
774 }
775
776 @Override
777 public boolean registerService(Service instance) {
778
779
780
781 Descriptors.ServiceDescriptor serviceDesc = instance.getDescriptorForType();
782 String serviceName = CoprocessorRpcUtils.getServiceName(serviceDesc);
783 if (coprocessorServiceHandlers.containsKey(serviceName)) {
784 LOG.error("Coprocessor service " + serviceName
785 + " already registered, rejecting request from " + instance);
786 return false;
787 }
788
789 coprocessorServiceHandlers.put(serviceName, instance);
790 if (LOG.isDebugEnabled()) {
791 LOG.debug("Registered regionserver coprocessor service: service=" + serviceName);
792 }
793 return true;
794 }
795
796
797
798
799
800
801
802 @InterfaceAudience.Private
803 protected ClusterConnection createClusterConnection() throws IOException {
804
805
806
807 return ConnectionUtils.createShortCircuitConnection(conf, null, userProvider.getCurrent(),
808 serverName, rpcServices, rpcServices);
809 }
810
811
812
813
814
815
816 private static void checkCodecs(final Configuration c) throws IOException {
817
818 String [] codecs = c.getStrings("hbase.regionserver.codecs", (String[])null);
819 if (codecs == null) return;
820 for (String codec : codecs) {
821 if (!CompressionTest.testCompression(codec)) {
822 throw new IOException("Compression codec " + codec +
823 " not supported, aborting RS construction");
824 }
825 }
826 }
827
828 public String getClusterId() {
829 return this.clusterId;
830 }
831
832
833
834
835
836 protected synchronized void setupClusterConnection() throws IOException {
837 if (clusterConnection == null) {
838 clusterConnection = createClusterConnection();
839 metaTableLocator = new MetaTableLocator();
840 }
841 }
842
843
844
845
846
847
848
849 private void preRegistrationInitialization(){
850 try {
851 setupClusterConnection();
852
853
854 if (isHealthCheckerConfigured()) {
855 int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
856 HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
857 healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
858 }
859
860 initializeZooKeeper();
861 if (!isStopped() && !isAborted()) {
862 initializeThreads();
863 }
864 } catch (Throwable t) {
865
866
867 this.rpcServices.stop();
868 abort("Initialization of RS failed. Hence aborting RS.", t);
869 }
870 }
871
872
873
874
875
876
877
878
879
880 @edu.umd.cs.findbugs.annotations.SuppressWarnings(
881 value={"RV_RETURN_VALUE_IGNORED_BAD_PRACTICE", "RV_RETURN_VALUE_IGNORED"},
882 justification="cluster Id znode read would give us correct response")
883 private void initializeZooKeeper() throws IOException, InterruptedException {
884
885
886
887 blockAndCheckIfStopped(this.masterAddressTracker);
888
889
890
891 blockAndCheckIfStopped(this.clusterStatusTracker);
892
893 if (this.initLatch != null) {
894 this.initLatch.await(20, TimeUnit.SECONDS);
895 }
896
897
898
899 try {
900 clusterId = ZKClusterId.readClusterIdZNode(this.zooKeeper);
901 if (clusterId == null) {
902 this.abort("Cluster ID has not been set");
903 }
904 LOG.info("ClusterId : "+clusterId);
905 } catch (KeeperException e) {
906 this.abort("Failed to retrieve Cluster ID",e);
907 }
908
909
910
911
912
913 waitForMasterActive();
914 if (isStopped() || isAborted()) {
915 return;
916 }
917
918
919 try {
920 rspmHost = new RegionServerProcedureManagerHost();
921 rspmHost.loadProcedures(conf);
922 rspmHost.initialize(this);
923 } catch (KeeperException e) {
924 this.abort("Failed to reach zk cluster when creating procedure handler.", e);
925 }
926
927 this.recoveringRegionWatcher = new RecoveringRegionWatcher(this.zooKeeper, this);
928 }
929
930
931
932
933
934
935
936
937 private void blockAndCheckIfStopped(ZooKeeperNodeTracker tracker)
938 throws IOException, InterruptedException {
939 while (tracker.blockUntilAvailable(this.msgInterval, false) == null) {
940 if (this.stopped) {
941 throw new IOException("Received the shutdown message while waiting.");
942 }
943 }
944 }
945
946
947
948
949 private boolean isClusterUp() {
950 return clusterStatusTracker != null && clusterStatusTracker.isClusterUp();
951 }
952
953 private void initializeThreads() throws IOException {
954
955 this.cacheFlusher = new MemStoreFlusher(conf, this);
956
957
958 this.compactSplitThread = new CompactSplitThread(this);
959
960
961
962 this.compactionChecker = new CompactionChecker(this, this.compactionCheckFrequency, this);
963 this.periodicFlusher = new PeriodicMemstoreFlusher(this.flushCheckFrequency, this);
964 this.leases = new Leases(this.threadWakeFrequency);
965
966 final boolean isSlowLogTableEnabled = conf.getBoolean(HConstants.SLOW_LOG_SYS_TABLE_ENABLED_KEY,
967 HConstants.DEFAULT_SLOW_LOG_SYS_TABLE_ENABLED_KEY);
968 if (isSlowLogTableEnabled) {
969
970 final int duration = conf.getInt("hbase.slowlog.systable.chore.duration", 10 * 60 * 1000);
971 slowLogTableOpsChore = new SlowLogTableOpsChore(this, duration, this.namedQueueRecorder);
972 }
973
974
975 movedRegionsCleaner = MovedRegionsCleaner.create(this);
976
977 if (this.nonceManager != null) {
978
979 nonceManagerChore = this.nonceManager.createCleanupScheduledChore(this);
980 }
981
982
983 rsQuotaManager = new RegionServerQuotaManager(this);
984
985
986 rpcClient = RpcClientFactory.createClient(conf, clusterId, new InetSocketAddress(
987 rpcServices.isa.getAddress(), 0), clusterConnection.getConnectionMetrics());
988
989 boolean onlyMetaRefresh = false;
990 int storefileRefreshPeriod = conf.getInt(
991 StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD
992 , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD);
993 if (storefileRefreshPeriod == 0) {
994 storefileRefreshPeriod = conf.getInt(
995 StorefileRefresherChore.REGIONSERVER_META_STOREFILE_REFRESH_PERIOD,
996 StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD);
997 onlyMetaRefresh = true;
998 }
999 if (storefileRefreshPeriod > 0) {
1000 this.storefileRefresher = new StorefileRefresherChore(storefileRefreshPeriod,
1001 onlyMetaRefresh, this, this);
1002 }
1003 registerConfigurationObservers();
1004 }
1005
1006 private void registerConfigurationObservers() {
1007
1008 configurationManager.registerObserver(this.compactSplitThread);
1009 configurationManager.registerObserver(this.rpcServices);
1010 configurationManager.registerObserver(this);
1011 }
1012
1013
1014
1015
1016 @Override
1017 public void run() {
1018 if (isStopped()) {
1019 LOG.info("Skipping run; stopped");
1020 return;
1021 }
1022 try {
1023 installShutdownHook();
1024
1025 preRegistrationInitialization();
1026 } catch (Throwable e) {
1027 abort("Fatal exception during initialization", e);
1028 }
1029
1030 try {
1031 if (!isStopped() && !isAborted()) {
1032
1033
1034 this.rsHost = new RegionServerCoprocessorHost(this, this.conf);
1035 }
1036
1037
1038
1039
1040 RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE,
1041 this.sleeper.getPeriod(), 1000 * 60 * 5);
1042 RetryCounter rc = rcf.create();
1043 while (keepLooping()) {
1044 RegionServerStartupResponse w = reportForDuty();
1045 if (w == null) {
1046 long sleepTime = rc.getBackoffTimeAndIncrementAttempts();
1047 LOG.warn("reportForDuty failed; sleeping " + sleepTime + " ms and then retrying");
1048 this.sleeper.sleep(sleepTime);
1049 } else {
1050 handleReportForDutyResponse(w);
1051 break;
1052 }
1053 }
1054
1055 if (!isStopped() && isHealthy()){
1056
1057
1058 rspmHost.start();
1059 }
1060
1061
1062 if (this.rsQuotaManager != null) {
1063 rsQuotaManager.start(getRpcServer().getScheduler());
1064 }
1065
1066
1067 long lastMsg = System.currentTimeMillis();
1068 long oldRequestCount = -1;
1069
1070 while (!isStopped() && isHealthy()) {
1071 if (!isClusterUp()) {
1072 if (isOnlineRegionsEmpty()) {
1073 stop("Exiting; cluster shutdown set and not carrying any regions");
1074 } else if (!this.stopping) {
1075 this.stopping = true;
1076 LOG.info("Closing user regions");
1077 closeUserRegions(this.abortRequested.get());
1078 } else if (this.stopping) {
1079 boolean allUserRegionsOffline = areAllUserRegionsOffline();
1080 if (allUserRegionsOffline) {
1081
1082
1083
1084 if (oldRequestCount == getWriteRequestCount()) {
1085 stop("Stopped; only catalog regions remaining online");
1086 break;
1087 }
1088 oldRequestCount = getWriteRequestCount();
1089 } else {
1090
1091
1092
1093 closeUserRegions(this.abortRequested.get());
1094 }
1095 LOG.debug("Waiting on " + getOnlineRegionsAsPrintableString());
1096 }
1097 }
1098 long now = System.currentTimeMillis();
1099 if ((now - lastMsg) >= msgInterval) {
1100 tryRegionServerReport(lastMsg, now);
1101 lastMsg = System.currentTimeMillis();
1102 }
1103 if (!isStopped() && !isAborted()) {
1104 this.sleeper.sleep();
1105 }
1106 }
1107 } catch (Throwable t) {
1108 if (!rpcServices.checkOOME(t)) {
1109 String prefix = t instanceof YouAreDeadException? "": "Unhandled: ";
1110 abort(prefix + t.getMessage(), t);
1111 }
1112 }
1113
1114
1115 if (mxBean != null) {
1116 MBeanUtil.unregisterMBean(mxBean);
1117 mxBean = null;
1118 }
1119
1120 if (abortRequested.get()) {
1121 Timer abortMonitor = new Timer("Abort regionserver monitor", true);
1122 TimerTask abortTimeoutTask = null;
1123 try {
1124 Constructor<? extends TimerTask> timerTaskCtor =
1125 Class.forName(conf.get(ABORT_TIMEOUT_TASK, SystemExitWhenAbortTimeout.class.getName()))
1126 .asSubclass(TimerTask.class).getDeclaredConstructor();
1127 timerTaskCtor.setAccessible(true);
1128 abortTimeoutTask = timerTaskCtor.newInstance();
1129 } catch (Exception e) {
1130 LOG.warn("Initialize abort timeout task failed", e);
1131 }
1132 if (abortTimeoutTask != null) {
1133 abortMonitor.schedule(abortTimeoutTask, conf.getLong(ABORT_TIMEOUT, DEFAULT_ABORT_TIMEOUT));
1134 }
1135 }
1136
1137 if (this.leases != null) {
1138 this.leases.closeAfterLeasesExpire();
1139 }
1140 if (this.splitLogWorker != null) {
1141 splitLogWorker.stop();
1142 }
1143 if (this.infoServer != null) {
1144 LOG.info("Stopping infoServer");
1145 try {
1146 this.infoServer.stop();
1147 } catch (Exception e) {
1148 LOG.error("Failed to stop infoServer", e);
1149 }
1150 }
1151
1152 if (cacheConfig != null && cacheConfig.isBlockCacheEnabled()) {
1153 cacheConfig.getBlockCache().shutdown();
1154 }
1155
1156 if (movedRegionsCleaner != null) {
1157 movedRegionsCleaner.stop("Region Server stopping");
1158 }
1159
1160
1161
1162 if (this.hMemManager != null) this.hMemManager.stop();
1163 if (this.cacheFlusher != null) this.cacheFlusher.interruptIfNecessary();
1164 if (this.compactSplitThread != null) this.compactSplitThread.interruptIfNecessary();
1165 sendShutdownInterrupt();
1166
1167
1168 if (rsQuotaManager != null) {
1169 rsQuotaManager.stop();
1170 }
1171
1172
1173 if (rspmHost != null) {
1174 rspmHost.stop(this.abortRequested.get() || this.killed);
1175 }
1176
1177 if (this.killed) {
1178
1179 } else if (abortRequested.get()) {
1180 if (this.fsOk) {
1181 closeUserRegions(abortRequested.get());
1182 }
1183 LOG.info("aborting server " + this.serverName);
1184 } else {
1185 closeUserRegions(abortRequested.get());
1186 LOG.info("stopping server " + this.serverName);
1187 }
1188
1189
1190 if (this.metaTableLocator != null) this.metaTableLocator.stop();
1191 if (this.clusterConnection != null && !clusterConnection.isClosed()) {
1192 try {
1193 this.clusterConnection.close();
1194 } catch (IOException e) {
1195
1196
1197 LOG.warn("Attempt to close server's short circuit HConnection failed.", e);
1198 }
1199 }
1200
1201
1202 if (!this.killed && containsMetaTableRegions()) {
1203 if (!abortRequested.get() || this.fsOk) {
1204 if (this.compactSplitThread != null) {
1205 this.compactSplitThread.join();
1206 this.compactSplitThread = null;
1207 }
1208 closeMetaTableRegions(abortRequested.get());
1209 }
1210 }
1211
1212 if (!this.killed && this.fsOk) {
1213 waitOnAllRegionsToClose(abortRequested.get());
1214 LOG.info("stopping server " + this.serverName +
1215 "; all regions closed.");
1216 }
1217
1218
1219 if (this.fsOk) {
1220 shutdownWAL(!abortRequested.get());
1221 }
1222
1223
1224 if (this.rssStub != null) {
1225 this.rssStub = null;
1226 }
1227 if (this.rpcClient != null) {
1228 this.rpcClient.close();
1229 }
1230 if (this.leases != null) {
1231 this.leases.close();
1232 }
1233 if (this.pauseMonitor != null) {
1234 this.pauseMonitor.stop();
1235 }
1236
1237 if (!killed) {
1238 stopServiceThreads();
1239 }
1240
1241 try {
1242 if (this.namedQueueRecorder != null) {
1243 namedQueueRecorder.close();
1244 }
1245 } catch (IOException ioe) {
1246 LOG.warn("Attempt to close NamedQueueRecorder failed", ioe);
1247 }
1248
1249 if (this.rpcServices != null) {
1250 this.rpcServices.stop();
1251 }
1252
1253 try {
1254 deleteMyEphemeralNode();
1255 } catch (KeeperException.NoNodeException nn) {
1256 } catch (KeeperException e) {
1257 LOG.warn("Failed deleting my ephemeral node", e);
1258 }
1259
1260
1261 ZNodeClearer.deleteMyEphemeralNodeOnDisk();
1262
1263 if (this.zooKeeper != null) {
1264 this.zooKeeper.close();
1265 }
1266 LOG.info("stopping server " + this.serverName +
1267 "; zookeeper connection closed.");
1268
1269 LOG.info(Thread.currentThread().getName() + " exiting");
1270 }
1271
1272
1273
1274
1275
1276 private void installShutdownHook() {
1277 ShutdownHook.install(conf, fs, this, Thread.currentThread());
1278 isShutdownHookInstalled = true;
1279 }
1280
1281
1282
1283
1284 public boolean isShutdownHookInstalled() {
1285 return isShutdownHookInstalled;
1286 }
1287
1288 private boolean containsMetaTableRegions() {
1289 return onlineRegions.containsKey(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
1290 }
1291
1292 private boolean areAllUserRegionsOffline() {
1293 if (getNumberOfOnlineRegions() > 2) return false;
1294 boolean allUserRegionsOffline = true;
1295 for (Map.Entry<String, Region> e: this.onlineRegions.entrySet()) {
1296 if (!e.getValue().getRegionInfo().isMetaTable()) {
1297 allUserRegionsOffline = false;
1298 break;
1299 }
1300 }
1301 return allUserRegionsOffline;
1302 }
1303
1304
1305
1306
1307 private long getWriteRequestCount() {
1308 long writeCount = 0;
1309 for (Map.Entry<String, Region> e: this.onlineRegions.entrySet()) {
1310 writeCount += e.getValue().getWriteRequestsCount();
1311 }
1312 return writeCount;
1313 }
1314
1315 @InterfaceAudience.Private
1316 protected void tryRegionServerReport(long reportStartTime, long reportEndTime)
1317 throws IOException {
1318 RegionServerStatusService.BlockingInterface rss = rssStub;
1319 if (rss == null) {
1320
1321 return;
1322 }
1323 ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime);
1324 try {
1325 RegionServerReportRequest.Builder request = RegionServerReportRequest.newBuilder();
1326 request.setServer(ProtobufUtil.toServerName(this.serverName));
1327 request.setLoad(sl);
1328 rss.regionServerReport(null, request.build());
1329 } catch (ServiceException se) {
1330 IOException ioe = ProtobufUtil.getRemoteException(se);
1331 if (ioe instanceof YouAreDeadException) {
1332
1333 throw ioe;
1334 }
1335 if (rssStub == rss) {
1336 rssStub = null;
1337 }
1338
1339
1340 createRegionServerStatusStub(true);
1341 }
1342 }
1343
1344 ClusterStatusProtos.ServerLoad buildServerLoad(long reportStartTime, long reportEndTime)
1345 throws IOException {
1346
1347
1348
1349
1350
1351
1352
1353 MetricsRegionServerWrapper regionServerWrapper = metricsRegionServer.getRegionServerWrapper();
1354 Collection<Region> regions = getOnlineRegionsLocalContext();
1355 long usedMemory = -1L;
1356 long maxMemory = -1L;
1357 final MemoryUsage usage = HeapMemorySizeUtil.safeGetHeapMemoryUsage();
1358 if (usage != null) {
1359 usedMemory = usage.getUsed();
1360 maxMemory = usage.getMax();
1361 }
1362
1363 ClusterStatusProtos.ServerLoad.Builder serverLoad =
1364 ClusterStatusProtos.ServerLoad.newBuilder();
1365 serverLoad.setNumberOfRequests((int) regionServerWrapper.getRequestsPerSecond());
1366 serverLoad.setTotalNumberOfRequests(regionServerWrapper.getTotalRequestCount());
1367 serverLoad.setUsedHeapMB((int)(usedMemory / 1024 / 1024));
1368 serverLoad.setMaxHeapMB((int) (maxMemory / 1024 / 1024));
1369 Set<String> coprocessors = getWAL(null).getCoprocessorHost().getCoprocessors();
1370 Builder coprocessorBuilder = Coprocessor.newBuilder();
1371 for (String coprocessor : coprocessors) {
1372 serverLoad.addCoprocessors(coprocessorBuilder.setName(coprocessor).build());
1373 }
1374 RegionLoad.Builder regionLoadBldr = RegionLoad.newBuilder();
1375 RegionSpecifier.Builder regionSpecifier = RegionSpecifier.newBuilder();
1376 for (Region region : regions) {
1377 if (region.getCoprocessorHost() != null) {
1378 Set<String> regionCoprocessors = region.getCoprocessorHost().getCoprocessors();
1379 Iterator<String> iterator = regionCoprocessors.iterator();
1380 while (iterator.hasNext()) {
1381 serverLoad.addCoprocessors(coprocessorBuilder.setName(iterator.next()).build());
1382 }
1383 }
1384 serverLoad.addRegionLoads(createRegionLoad(region, regionLoadBldr, regionSpecifier));
1385 for (String coprocessor : getWAL(region.getRegionInfo()).getCoprocessorHost()
1386 .getCoprocessors()) {
1387 serverLoad.addCoprocessors(coprocessorBuilder.setName(coprocessor).build());
1388 }
1389 }
1390 serverLoad.setReportStartTime(reportStartTime);
1391 serverLoad.setReportEndTime(reportEndTime);
1392 if (this.infoServer != null) {
1393 serverLoad.setInfoServerPort(this.infoServer.getPort());
1394 } else {
1395 serverLoad.setInfoServerPort(-1);
1396 }
1397
1398
1399
1400 ReplicationSourceService rsources = getReplicationSourceService();
1401
1402 if (rsources != null) {
1403
1404 ReplicationLoad rLoad = rsources.refreshAndGetReplicationLoad();
1405 if (rLoad != null) {
1406 serverLoad.setReplLoadSink(rLoad.getReplicationLoadSink());
1407 for (ClusterStatusProtos.ReplicationLoadSource rLS : rLoad.getReplicationLoadSourceList()) {
1408 serverLoad.addReplLoadSource(rLS);
1409 }
1410 }
1411 }
1412
1413 return serverLoad.build();
1414 }
1415
1416 String getOnlineRegionsAsPrintableString() {
1417 StringBuilder sb = new StringBuilder();
1418 for (Region r: this.onlineRegions.values()) {
1419 if (sb.length() > 0) sb.append(", ");
1420 sb.append(r.getRegionInfo().getEncodedName());
1421 }
1422 return sb.toString();
1423 }
1424
1425
1426
1427
1428 private void waitOnAllRegionsToClose(final boolean abort) {
1429
1430 int lastCount = -1;
1431 long previousLogTime = 0;
1432 Set<String> closedRegions = new HashSet<String>();
1433 boolean interrupted = false;
1434 try {
1435 while (!isOnlineRegionsEmpty()) {
1436 int count = getNumberOfOnlineRegions();
1437
1438 if (count != lastCount) {
1439
1440 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
1441 previousLogTime = System.currentTimeMillis();
1442 lastCount = count;
1443 LOG.info("Waiting on " + count + " regions to close");
1444
1445
1446 if (count < 10 && LOG.isDebugEnabled()) {
1447 LOG.debug(this.onlineRegions);
1448 }
1449 }
1450 }
1451
1452
1453
1454 for (Map.Entry<String, Region> e : this.onlineRegions.entrySet()) {
1455 HRegionInfo hri = e.getValue().getRegionInfo();
1456 if (!this.regionsInTransitionInRS.containsKey(hri.getEncodedNameAsBytes())
1457 && !closedRegions.contains(hri.getEncodedName())) {
1458 closedRegions.add(hri.getEncodedName());
1459
1460 closeRegionIgnoreErrors(hri, abort);
1461 }
1462 }
1463
1464 if (this.regionsInTransitionInRS.isEmpty()) {
1465 if (!isOnlineRegionsEmpty()) {
1466 LOG.info("We were exiting though online regions are not empty," +
1467 " because some regions failed closing");
1468 }
1469 break;
1470 }
1471 if (sleep(200)) {
1472 interrupted = true;
1473 }
1474 }
1475 } finally {
1476 if (interrupted) {
1477 Thread.currentThread().interrupt();
1478 }
1479 }
1480 }
1481
1482 private boolean sleep(long millis) {
1483 boolean interrupted = false;
1484 try {
1485 Thread.sleep(millis);
1486 } catch (InterruptedException e) {
1487 LOG.warn("Interrupted while sleeping");
1488 interrupted = true;
1489 }
1490 return interrupted;
1491 }
1492
1493 private void shutdownWAL(final boolean close) {
1494 if (this.walFactory != null) {
1495 try {
1496 if (close) {
1497 walFactory.close();
1498 } else {
1499 walFactory.shutdown();
1500 }
1501 } catch (Throwable e) {
1502 e = RemoteExceptionHandler.checkThrowable(e);
1503 LOG.error("Shutdown / close of WAL failed: " + e);
1504 LOG.debug("Shutdown / close exception details:", e);
1505 }
1506 }
1507 }
1508
1509
1510
1511
1512
1513
1514 public NamedQueueRecorder getNamedQueueRecorder() {
1515 return this.namedQueueRecorder;
1516 }
1517
1518
1519
1520
1521
1522
1523 protected void handleReportForDutyResponse(final RegionServerStartupResponse c)
1524 throws IOException {
1525 try {
1526 boolean updateRootDir = false;
1527 for (NameStringPair e : c.getMapEntriesList()) {
1528 String key = e.getName();
1529
1530 if (key.equals(HConstants.KEY_FOR_HOSTNAME_SEEN_BY_MASTER)) {
1531 String hostnameFromMasterPOV = e.getValue();
1532 this.serverName = ServerName.valueOf(hostnameFromMasterPOV,
1533 rpcServices.isa.getPort(), this.startcode);
1534 if (shouldUseThisHostnameInstead() &&
1535 !hostnameFromMasterPOV.equals(useThisHostnameInstead)) {
1536 String msg = "Master passed us a different hostname to use; was=" +
1537 this.useThisHostnameInstead + ", but now=" + hostnameFromMasterPOV;
1538 LOG.error(msg);
1539 throw new IOException(msg);
1540 }
1541 if (!shouldUseThisHostnameInstead() &&
1542 !hostnameFromMasterPOV.equals(rpcServices.isa.getHostName())) {
1543 String msg = "Master passed us a different hostname to use; was=" +
1544 rpcServices.isa.getHostName() + ", but now=" + hostnameFromMasterPOV;
1545 LOG.error(msg);
1546 }
1547 continue;
1548 }
1549 String value = e.getValue();
1550 if (key.equals(HConstants.HBASE_DIR)) {
1551 if (value != null && !value.equals(conf.get(HConstants.HBASE_DIR))) {
1552 updateRootDir = true;
1553 }
1554 }
1555 if (LOG.isDebugEnabled()) {
1556 LOG.info("Config from master: " + key + "=" + value);
1557 }
1558 this.conf.set(key, value);
1559 }
1560
1561 createMyEphemeralNode();
1562
1563 if (updateRootDir) {
1564
1565 initializeFileSystem();
1566 }
1567
1568
1569
1570 if (this.conf.get("mapreduce.task.attempt.id") == null) {
1571 this.conf.set("mapreduce.task.attempt.id", "hb_rs_" +
1572 this.serverName.toString());
1573 }
1574
1575
1576 ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
1577
1578 this.cacheConfig = new CacheConfig(conf);
1579 this.walFactory = setupWALAndReplication();
1580
1581 this.metricsRegionServerImpl = new MetricsRegionServerWrapperImpl(this);
1582 this.metricsRegionServer = new MetricsRegionServer(metricsRegionServerImpl, conf);
1583 this.metricsTable = new MetricsTable(new MetricsTableWrapperAggregateImpl(this));
1584
1585 this.pauseMonitor = new JvmPauseMonitor(conf, getMetrics().getMetricsSource());
1586 pauseMonitor.start();
1587
1588 startServiceThreads();
1589
1590
1591 if (this.conf.getBoolean(HConstants.EXECUTOR_STATUS_COLLECT_ENABLED,
1592 HConstants.DEFAULT_EXECUTOR_STATUS_COLLECT_ENABLED)) {
1593 int sleepTime = this.conf.getInt(ExecutorStatusChore.WAKE_FREQ,
1594 ExecutorStatusChore.DEFAULT_WAKE_FREQ);
1595 executorStatusChore = new ExecutorStatusChore(sleepTime, this, this.getExecutorService(),
1596 this.getRegionServerMetrics().getMetricsSource());
1597 }
1598
1599 startHeapMemoryManager();
1600 LOG.info("Serving as " + this.serverName +
1601 ", RpcServer on " + rpcServices.isa +
1602 ", sessionid=0x" +
1603 Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()));
1604
1605
1606 synchronized (online) {
1607 online.set(true);
1608 online.notifyAll();
1609 }
1610 } catch (Throwable e) {
1611 stop("Failed initialization");
1612 throw convertThrowableToIOE(cleanup(e, "Failed init"),
1613 "Region server startup failed");
1614 } finally {
1615 sleeper.skipSleepCycle();
1616 }
1617 }
1618
1619 private void startHeapMemoryManager() {
1620 this.hMemManager = HeapMemoryManager.create(this.conf, this.cacheFlusher,
1621 this, this.regionServerAccounting);
1622 if (this.hMemManager != null) {
1623 this.hMemManager.start(getChoreService());
1624 }
1625 }
1626
1627 private void createMyEphemeralNode() throws KeeperException, IOException {
1628 RegionServerInfo.Builder rsInfo = RegionServerInfo.newBuilder();
1629 rsInfo.setInfoPort(infoServer != null ? infoServer.getPort() : -1);
1630 rsInfo.setVersionInfo(ProtobufUtil.getVersionInfo());
1631 byte[] data = ProtobufUtil.prependPBMagic(rsInfo.build().toByteArray());
1632 ZKUtil.createEphemeralNodeAndWatch(this.zooKeeper,
1633 getMyEphemeralNodePath(), data);
1634 }
1635
1636 private void deleteMyEphemeralNode() throws KeeperException {
1637 ZKUtil.deleteNode(this.zooKeeper, getMyEphemeralNodePath());
1638 }
1639
1640 @Override
1641 public RegionServerAccounting getRegionServerAccounting() {
1642 return regionServerAccounting;
1643 }
1644
1645 @Override
1646 public TableLockManager getTableLockManager() {
1647 return tableLockManager;
1648 }
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658 private RegionLoad createRegionLoad(final Region r, RegionLoad.Builder regionLoadBldr,
1659 RegionSpecifier.Builder regionSpecifier) throws IOException {
1660 byte[] name = r.getRegionInfo().getRegionName();
1661 int stores = 0;
1662 int storefiles = 0;
1663 int storeRefCount = 0;
1664 int maxCompactedStoreFileRefCount = 0;
1665 int storeUncompressedSizeMB = 0;
1666 int storefileSizeMB = 0;
1667 int memstoreSizeMB = (int) (r.getMemstoreSize() / 1024 / 1024);
1668 int storefileIndexSizeMB = 0;
1669 int rootIndexSizeKB = 0;
1670 int totalStaticIndexSizeKB = 0;
1671 int totalStaticBloomSizeKB = 0;
1672 long totalCompactingKVs = 0;
1673 long currentCompactedKVs = 0;
1674 List<Store> storeList = r.getStores();
1675 stores += storeList.size();
1676 for (Store store : storeList) {
1677 storefiles += store.getStorefilesCount();
1678 if (store instanceof HStore) {
1679 HStore hStore = (HStore) store;
1680 int currentStoreRefCount = hStore.getStoreRefCount();
1681 storeRefCount += currentStoreRefCount;
1682 int currentMaxCompactedStoreFileRefCount = hStore.getMaxCompactedStoreFileRefCount();
1683 maxCompactedStoreFileRefCount = Math.max(maxCompactedStoreFileRefCount,
1684 currentMaxCompactedStoreFileRefCount);
1685 }
1686 storeUncompressedSizeMB += (int) (store.getStoreSizeUncompressed() / 1024 / 1024);
1687 storefileSizeMB += (int) (store.getStorefilesSize() / 1024 / 1024);
1688 storefileIndexSizeMB += (int) (store.getStorefilesIndexSize() / 1024 / 1024);
1689 CompactionProgress progress = store.getCompactionProgress();
1690 if (progress != null) {
1691 totalCompactingKVs += progress.totalCompactingKVs;
1692 currentCompactedKVs += progress.currentCompactedKVs;
1693 }
1694 rootIndexSizeKB += (int) (store.getStorefilesIndexSize() / 1024);
1695 totalStaticIndexSizeKB += (int) (store.getTotalStaticIndexSize() / 1024);
1696 totalStaticBloomSizeKB += (int) (store.getTotalStaticBloomSize() / 1024);
1697 }
1698
1699 float dataLocality =
1700 r.getHDFSBlocksDistribution().getBlockLocalityIndex(serverName.getHostname());
1701 if (regionLoadBldr == null) {
1702 regionLoadBldr = RegionLoad.newBuilder();
1703 }
1704 if (regionSpecifier == null) {
1705 regionSpecifier = RegionSpecifier.newBuilder();
1706 }
1707 regionSpecifier.setType(RegionSpecifierType.REGION_NAME);
1708 regionSpecifier.setValue(ByteStringer.wrap(name));
1709 regionLoadBldr.setRegionSpecifier(regionSpecifier.build())
1710 .setStores(stores)
1711 .setStorefiles(storefiles)
1712 .setStoreRefCount(storeRefCount)
1713 .setMaxCompactedStoreFileRefCount(maxCompactedStoreFileRefCount)
1714 .setStoreUncompressedSizeMB(storeUncompressedSizeMB)
1715 .setStorefileSizeMB(storefileSizeMB)
1716 .setMemstoreSizeMB(memstoreSizeMB)
1717 .setStorefileIndexSizeMB(storefileIndexSizeMB)
1718 .setRootIndexSizeKB(rootIndexSizeKB)
1719 .setTotalStaticIndexSizeKB(totalStaticIndexSizeKB)
1720 .setTotalStaticBloomSizeKB(totalStaticBloomSizeKB)
1721 .setReadRequestsCount(r.getReadRequestsCount())
1722 .setWriteRequestsCount(r.getWriteRequestsCount())
1723 .setTotalCompactingKVs(totalCompactingKVs)
1724 .setCurrentCompactedKVs(currentCompactedKVs)
1725 .setDataLocality(dataLocality)
1726 .setLastMajorCompactionTs(r.getOldestHfileTs(true));
1727 ((HRegion)r).setCompleteSequenceId(regionLoadBldr);
1728
1729 return regionLoadBldr.build();
1730 }
1731
1732
1733
1734
1735
1736 public RegionLoad createRegionLoad(final String encodedRegionName) throws IOException {
1737 Region r = onlineRegions.get(encodedRegionName);
1738 return r != null ? createRegionLoad(r, null, null) : null;
1739 }
1740
1741
1742
1743
1744 private static class CompactionChecker extends ScheduledChore {
1745 private final HRegionServer instance;
1746 private final int majorCompactPriority;
1747 private final static int DEFAULT_PRIORITY = Integer.MAX_VALUE;
1748
1749
1750 private long iteration = 1;
1751
1752 CompactionChecker(final HRegionServer h, final int sleepTime,
1753 final Stoppable stopper) {
1754 super("CompactionChecker", stopper, sleepTime);
1755 this.instance = h;
1756 LOG.info(this.getName() + " runs every " + StringUtils.formatTime(sleepTime));
1757
1758
1759
1760
1761 this.majorCompactPriority = this.instance.conf.
1762 getInt("hbase.regionserver.compactionChecker.majorCompactPriority",
1763 DEFAULT_PRIORITY);
1764 }
1765
1766 @Override
1767 protected void chore() {
1768 for (Region r : this.instance.onlineRegions.values()) {
1769 if (r == null)
1770 continue;
1771 for (Store s : r.getStores()) {
1772 try {
1773 long multiplier = s.getCompactionCheckMultiplier();
1774 assert multiplier > 0;
1775 if (iteration % multiplier != 0) continue;
1776 if (s.needsCompaction()) {
1777
1778 this.instance.compactSplitThread.requestSystemCompaction(r, s, getName()
1779 + " requests compaction");
1780 } else if (s.isMajorCompaction()) {
1781 s.triggerMajorCompaction();
1782 if (majorCompactPriority == DEFAULT_PRIORITY
1783 || majorCompactPriority > ((HRegion)r).getCompactPriority()) {
1784 this.instance.compactSplitThread.requestCompaction(r, s, getName()
1785 + " requests major compaction; use default priority", null);
1786 } else {
1787 this.instance.compactSplitThread.requestCompaction(r, s, getName()
1788 + " requests major compaction; use configured priority",
1789 this.majorCompactPriority, null, null);
1790 }
1791 }
1792 } catch (IOException e) {
1793 LOG.warn("Failed major compaction check on " + r, e);
1794 }
1795 }
1796 }
1797 iteration = (iteration == Long.MAX_VALUE) ? 0 : (iteration + 1);
1798 }
1799 }
1800
1801 static class PeriodicMemstoreFlusher extends ScheduledChore {
1802 final HRegionServer server;
1803 final static int RANGE_OF_DELAY = 5 * 60;
1804 final static int MIN_DELAY_TIME = 0;
1805 final int rangeOfDelay;
1806 public PeriodicMemstoreFlusher(int cacheFlushInterval, final HRegionServer server) {
1807 super(server.getServerName() + "-MemstoreFlusherChore", server, cacheFlushInterval);
1808 this.server = server;
1809
1810 this.rangeOfDelay = this.server.conf.getInt("hbase.regionserver.periodicmemstoreflusher.rangeofdelayseconds",
1811 RANGE_OF_DELAY)*1000;
1812 }
1813
1814 @Override
1815 protected void chore() {
1816 final StringBuffer whyFlush = new StringBuffer();
1817 for (Region r : this.server.onlineRegions.values()) {
1818 if (r == null) continue;
1819 if (((HRegion) r).shouldFlush(whyFlush)) {
1820 FlushRequester requester = server.getFlushRequester();
1821 if (requester != null) {
1822 long randomDelay = (long) RandomUtils.nextInt(rangeOfDelay) + MIN_DELAY_TIME;
1823
1824
1825
1826 if (requester.requestDelayedFlush(r, randomDelay, false)) {
1827 LOG.info(MessageFormat.format("{0} requesting flush of {1} because {2} " +
1828 "after random delay {3} ms", getName(),
1829 r.getRegionInfo().getRegionNameAsString(), whyFlush.toString(), randomDelay));
1830 }
1831 }
1832 }
1833 }
1834 }
1835 }
1836
1837
1838
1839
1840
1841
1842
1843
1844 public boolean isOnline() {
1845 return online.get();
1846 }
1847
1848
1849
1850
1851
1852
1853
1854 private WALFactory setupWALAndReplication() throws IOException {
1855
1856 final Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
1857 final String logName = DefaultWALProvider.getWALDirectoryName(this.serverName.toString());
1858
1859 Path logDir = new Path(walRootDir, logName);
1860 if (LOG.isDebugEnabled()) LOG.debug("logDir=" + logDir);
1861 if (this.walFs.exists(logDir)) {
1862 throw new RegionServerRunningException("Region server has already " +
1863 "created directory at " + this.serverName.toString());
1864 }
1865
1866
1867
1868 createNewReplicationInstance(conf, this, this.walFs, logDir, oldLogDir);
1869
1870
1871 final List<WALActionsListener> listeners = new ArrayList<WALActionsListener>();
1872 listeners.add(new MetricsWAL());
1873 if (this.replicationSourceHandler != null &&
1874 this.replicationSourceHandler.getWALActionsListener() != null) {
1875
1876 listeners.add(this.replicationSourceHandler.getWALActionsListener());
1877 }
1878
1879 return new WALFactory(conf, listeners, serverName.toString());
1880 }
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890 protected LogRoller ensureMetaWALRoller() {
1891
1892
1893 LogRoller roller = metawalRoller.get();
1894 if (null == roller) {
1895 LogRoller tmpLogRoller = new LogRoller(this, this);
1896 String n = Thread.currentThread().getName();
1897 Threads.setDaemonThreadRunning(tmpLogRoller.getThread(),
1898 n + "-MetaLogRoller", uncaughtExceptionHandler);
1899 if (metawalRoller.compareAndSet(null, tmpLogRoller)) {
1900 roller = tmpLogRoller;
1901 } else {
1902
1903 Threads.shutdown(tmpLogRoller.getThread());
1904 roller = metawalRoller.get();
1905 }
1906 }
1907 return roller;
1908 }
1909
1910 public MetricsRegionServer getRegionServerMetrics() {
1911 return this.metricsRegionServer;
1912 }
1913
1914
1915
1916
1917 public MasterAddressTracker getMasterAddressTracker() {
1918 return this.masterAddressTracker;
1919 }
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933 private void startServiceThreads() throws IOException {
1934
1935 this.service.startExecutorService(ExecutorType.RS_OPEN_REGION,
1936 conf.getInt("hbase.regionserver.executor.openregion.threads", 3));
1937 this.service.startExecutorService(ExecutorType.RS_OPEN_META,
1938 conf.getInt("hbase.regionserver.executor.openmeta.threads", 1));
1939 this.service.startExecutorService(ExecutorType.RS_OPEN_PRIORITY_REGION,
1940 conf.getInt("hbase.regionserver.executor.openpriorityregion.threads", 3));
1941 this.service.startExecutorService(ExecutorType.RS_CLOSE_REGION,
1942 conf.getInt("hbase.regionserver.executor.closeregion.threads", 3));
1943 this.service.startExecutorService(ExecutorType.RS_CLOSE_META,
1944 conf.getInt("hbase.regionserver.executor.closemeta.threads", 1));
1945 if (conf.getBoolean(StoreScanner.STORESCANNER_PARALLEL_SEEK_ENABLE, false)) {
1946 this.service.startExecutorService(ExecutorType.RS_PARALLEL_SEEK,
1947 conf.getInt("hbase.storescanner.parallel.seek.threads", 10));
1948 }
1949 this.service.startExecutorService(ExecutorType.RS_LOG_REPLAY_OPS, conf.getInt(
1950 "hbase.regionserver.wal.max.splitters", SplitLogWorkerCoordination.DEFAULT_MAX_SPLITTERS));
1951
1952 this.service.startExecutorService(ExecutorType.RS_COMPACTED_FILES_DISCHARGER,
1953 conf.getInt(CompactionConfiguration.HBASE_HFILE_COMPACTION_DISCHARGER_THREAD_COUNT, 10));
1954 if (ServerRegionReplicaUtil.isRegionReplicaWaitForPrimaryFlushEnabled(conf)) {
1955 this.service.startExecutorService(ExecutorType.RS_REGION_REPLICA_FLUSH_OPS,
1956 conf.getInt("hbase.regionserver.region.replica.flusher.threads",
1957 conf.getInt("hbase.regionserver.executor.openregion.threads", 3)));
1958 }
1959
1960 Threads.setDaemonThreadRunning(this.walRoller.getThread(), getName() + ".logRoller",
1961 uncaughtExceptionHandler);
1962 if (this.cacheFlusher != null) {
1963 this.cacheFlusher.start(uncaughtExceptionHandler);
1964 }
1965 if (this.compactionChecker != null) {
1966 choreService.scheduleChore(compactionChecker);
1967 }
1968 if (this.periodicFlusher != null) {
1969 choreService.scheduleChore(periodicFlusher);
1970 }
1971 if (this.healthCheckChore != null) {
1972 choreService.scheduleChore(healthCheckChore);
1973 }
1974 if (this.executorStatusChore != null) {
1975 choreService.scheduleChore(executorStatusChore);
1976 }
1977 if (this.nonceManagerChore != null) {
1978 choreService.scheduleChore(nonceManagerChore);
1979 }
1980 if (this.storefileRefresher != null) {
1981 choreService.scheduleChore(storefileRefresher);
1982 }
1983 if (this.movedRegionsCleaner != null) {
1984 choreService.scheduleChore(movedRegionsCleaner);
1985 }
1986 if (this.slowLogTableOpsChore != null) {
1987 choreService.scheduleChore(slowLogTableOpsChore);
1988 }
1989
1990
1991
1992 Threads.setDaemonThreadRunning(this.leases.getThread(), getName() + ".leaseChecker",
1993 uncaughtExceptionHandler);
1994
1995 if (this.replicationSourceHandler == this.replicationSinkHandler &&
1996 this.replicationSourceHandler != null) {
1997 this.replicationSourceHandler.startReplicationService();
1998 } else {
1999 if (this.replicationSourceHandler != null) {
2000 this.replicationSourceHandler.startReplicationService();
2001 }
2002 if (this.replicationSinkHandler != null) {
2003 this.replicationSinkHandler.startReplicationService();
2004 }
2005 }
2006
2007
2008
2009
2010
2011 Configuration sinkConf = HBaseConfiguration.create(conf);
2012 sinkConf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
2013 conf.getInt("hbase.log.replay.retries.number", 8));
2014 sinkConf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY,
2015 conf.getInt("hbase.log.replay.rpc.timeout", 30000));
2016 sinkConf.setInt("hbase.client.serverside.retries.multiplier", 1);
2017 this.splitLogWorker = new SplitLogWorker(this, sinkConf, this, this, walFactory);
2018 splitLogWorker.start();
2019 }
2020
2021
2022
2023
2024
2025
2026 private int putUpWebUI() throws IOException {
2027 int port = this.conf.getInt(HConstants.REGIONSERVER_INFO_PORT,
2028 HConstants.DEFAULT_REGIONSERVER_INFOPORT);
2029 String addr = this.conf.get("hbase.regionserver.info.bindAddress", "0.0.0.0");
2030
2031 if(this instanceof HMaster) {
2032 port = conf.getInt(HConstants.MASTER_INFO_PORT,
2033 HConstants.DEFAULT_MASTER_INFOPORT);
2034 addr = this.conf.get("hbase.master.info.bindAddress", "0.0.0.0");
2035 }
2036
2037 if (port < 0) return port;
2038
2039 if (!Addressing.isLocalAddress(InetAddress.getByName(addr))) {
2040 String msg =
2041 "Failed to start http info server. Address " + addr
2042 + " does not belong to this host. Correct configuration parameter: "
2043 + "hbase.regionserver.info.bindAddress";
2044 LOG.error(msg);
2045 throw new IOException(msg);
2046 }
2047
2048 boolean auto = this.conf.getBoolean(HConstants.REGIONSERVER_INFO_PORT_AUTO,
2049 false);
2050 while (true) {
2051 try {
2052 this.infoServer = new InfoServer(getProcessName(), addr, port, false, this.conf);
2053 infoServer.addServlet("dump", "/dump", getDumpServlet());
2054 configureInfoServer();
2055 this.infoServer.start();
2056 break;
2057 } catch (BindException e) {
2058 if (!auto) {
2059
2060 LOG.error("Failed binding http info server to port: " + port);
2061 throw e;
2062 }
2063
2064 LOG.info("Failed binding http info server to port: " + port);
2065 port++;
2066 LOG.info("Retry starting http info server with port: " + port);
2067 }
2068 }
2069 port = this.infoServer.getPort();
2070 conf.setInt(HConstants.REGIONSERVER_INFO_PORT, port);
2071 int masterInfoPort = conf.getInt(HConstants.MASTER_INFO_PORT,
2072 HConstants.DEFAULT_MASTER_INFOPORT);
2073 conf.setInt("hbase.master.info.port.orig", masterInfoPort);
2074 conf.setInt(HConstants.MASTER_INFO_PORT, port);
2075 return port;
2076 }
2077
2078
2079
2080
2081 private boolean isHealthy() {
2082 if (!fsOk) {
2083
2084 return false;
2085 }
2086
2087 if (!(leases.isAlive()
2088 && cacheFlusher.isAlive() && walRoller.isAlive()
2089 && this.compactionChecker.isScheduled()
2090 && this.periodicFlusher.isScheduled())) {
2091 stop("One or more threads are no longer alive -- stop");
2092 return false;
2093 }
2094 final LogRoller metawalRoller = this.metawalRoller.get();
2095 if (metawalRoller != null && !metawalRoller.isAlive()) {
2096 stop("Meta WAL roller thread is no longer alive -- stop");
2097 return false;
2098 }
2099 return true;
2100 }
2101
2102 private static final byte[] UNSPECIFIED_REGION = new byte[]{};
2103
2104 public List<WAL> getWALs() throws IOException {
2105 return walFactory.getWALs();
2106 }
2107
2108 @Override
2109 public WAL getWAL(HRegionInfo regionInfo) throws IOException {
2110 WAL wal;
2111 LogRoller roller = walRoller;
2112
2113 if (regionInfo != null && regionInfo.isMetaTable() &&
2114 regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
2115 roller = ensureMetaWALRoller();
2116 wal = walFactory.getMetaWAL(regionInfo.getEncodedNameAsBytes());
2117 } else if (regionInfo == null) {
2118 wal = walFactory.getWAL(UNSPECIFIED_REGION, null);
2119 } else {
2120 byte[] namespace = regionInfo.getTable().getNamespace();
2121 wal = walFactory.getWAL(regionInfo.getEncodedNameAsBytes(), namespace);
2122 }
2123 roller.addWAL(wal);
2124 return wal;
2125 }
2126
2127 @Override
2128 public ClusterConnection getConnection() {
2129 return this.clusterConnection;
2130 }
2131
2132 @Override
2133 public MetaTableLocator getMetaTableLocator() {
2134 return this.metaTableLocator;
2135 }
2136
2137 @Override
2138 public void stop(final String msg) {
2139 stop(msg, false);
2140 }
2141
2142
2143
2144
2145
2146
2147 public void stop(final String msg, final boolean force) {
2148 if (!this.stopped) {
2149 if (this.rsHost != null) {
2150
2151 try {
2152 this.rsHost.preStop(msg);
2153 } catch (IOException ioe) {
2154 if (!force) {
2155 LOG.warn("The region server did not stop", ioe);
2156 return;
2157 }
2158 LOG.warn("Skipping coprocessor exception on preStop() due to forced shutdown", ioe);
2159 }
2160 }
2161 this.stopped = true;
2162 LOG.info("STOPPED: " + msg);
2163
2164 sleeper.skipSleepCycle();
2165 }
2166 }
2167
2168 public void waitForServerOnline(){
2169 while (!isStopped() && !isOnline()) {
2170 synchronized (online) {
2171 try {
2172 online.wait(msgInterval);
2173 } catch (InterruptedException ie) {
2174 Thread.currentThread().interrupt();
2175 break;
2176 }
2177 }
2178 }
2179 }
2180
2181 @Override
2182 public void postOpenDeployTasks(final Region r) throws KeeperException, IOException {
2183 postOpenDeployTasks(new PostOpenDeployContext(r, -1));
2184 }
2185
2186 @Override
2187 public void postOpenDeployTasks(final PostOpenDeployContext context)
2188 throws KeeperException, IOException {
2189 Region r = context.getRegion();
2190 long masterSystemTime = context.getMasterSystemTime();
2191 Preconditions.checkArgument(r instanceof HRegion, "r must be an HRegion");
2192 rpcServices.checkOpen();
2193 LOG.info("Post open deploy tasks for " + r.getRegionInfo().getRegionNameAsString());
2194
2195 for (Store s : r.getStores()) {
2196 if (s.hasReferences() || s.needsCompaction()) {
2197 this.compactSplitThread.requestSystemCompaction(r, s, "Opening Region");
2198 }
2199 }
2200 long openSeqNum = r.getOpenSeqNum();
2201 if (openSeqNum == HConstants.NO_SEQNUM) {
2202
2203 LOG.error("No sequence number found when opening " +
2204 r.getRegionInfo().getRegionNameAsString());
2205 openSeqNum = 0;
2206 }
2207
2208
2209 updateRecoveringRegionLastFlushedSequenceId(r);
2210
2211
2212 if (r.getRegionInfo().isMetaRegion()) {
2213 MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, r.getRegionInfo().getReplicaId(),
2214 State.OPEN);
2215 } else if (useZKForAssignment) {
2216 MetaTableAccessor.updateRegionLocation(getConnection(), r.getRegionInfo(),
2217 this.serverName, openSeqNum, masterSystemTime);
2218 }
2219 if (!useZKForAssignment && !reportRegionStateTransition(new RegionStateTransitionContext(
2220 TransitionCode.OPENED, openSeqNum, masterSystemTime, r.getRegionInfo()))) {
2221 throw new IOException("Failed to report opened region to master: "
2222 + r.getRegionInfo().getRegionNameAsString());
2223 }
2224
2225 triggerFlushInPrimaryRegion((HRegion)r);
2226
2227 LOG.debug("Finished post open deploy task for " + r.getRegionInfo().getRegionNameAsString());
2228 }
2229
2230 @Override
2231 public boolean reportRegionStateTransition(TransitionCode code, HRegionInfo... hris) {
2232 return reportRegionStateTransition(code, HConstants.NO_SEQNUM, hris);
2233 }
2234
2235 @Override
2236 public boolean reportRegionStateTransition(
2237 TransitionCode code, long openSeqNum, HRegionInfo... hris) {
2238 return reportRegionStateTransition(
2239 new RegionStateTransitionContext(code, HConstants.NO_SEQNUM, -1, hris));
2240 }
2241
2242 @Override
2243 public boolean reportRegionStateTransition(final RegionStateTransitionContext context) {
2244 TransitionCode code = context.getCode();
2245 long openSeqNum = context.getOpenSeqNum();
2246 HRegionInfo[] hris = context.getHris();
2247
2248 ReportRegionStateTransitionRequest.Builder builder =
2249 ReportRegionStateTransitionRequest.newBuilder();
2250 builder.setServer(ProtobufUtil.toServerName(serverName));
2251 RegionStateTransition.Builder transition = builder.addTransitionBuilder();
2252 transition.setTransitionCode(code);
2253 if (code == TransitionCode.OPENED && openSeqNum >= 0) {
2254 transition.setOpenSeqNum(openSeqNum);
2255 }
2256 for (HRegionInfo hri: hris) {
2257 transition.addRegionInfo(HRegionInfo.convert(hri));
2258 }
2259 ReportRegionStateTransitionRequest request = builder.build();
2260 while (keepLooping()) {
2261 RegionServerStatusService.BlockingInterface rss = rssStub;
2262 try {
2263 if (rss == null) {
2264 createRegionServerStatusStub();
2265 continue;
2266 }
2267 ReportRegionStateTransitionResponse response =
2268 rss.reportRegionStateTransition(null, request);
2269 if (response.hasErrorMessage()) {
2270 LOG.info("Failed to transition " + hris[0]
2271 + " to " + code + ": " + response.getErrorMessage());
2272 return false;
2273 }
2274 return true;
2275 } catch (ServiceException se) {
2276 IOException ioe = ProtobufUtil.getRemoteException(se);
2277 LOG.info("Failed to report region transition, will retry", ioe);
2278 if (rssStub == rss) {
2279 rssStub = null;
2280 }
2281 }
2282 }
2283 return false;
2284 }
2285
2286
2287
2288
2289
2290 void triggerFlushInPrimaryRegion(final HRegion region) {
2291 if (ServerRegionReplicaUtil.isDefaultReplica(region.getRegionInfo())) {
2292 return;
2293 }
2294 if (!ServerRegionReplicaUtil.isRegionReplicaReplicationEnabled(region.conf) ||
2295 !ServerRegionReplicaUtil.isRegionReplicaWaitForPrimaryFlushEnabled(
2296 region.conf)) {
2297 region.setReadsEnabled(true);
2298 return;
2299 }
2300
2301 region.setReadsEnabled(false);
2302
2303
2304
2305 this.service.submit(
2306 new RegionReplicaFlushHandler(this, clusterConnection,
2307 rpcRetryingCallerFactory, rpcControllerFactory, operationTimeout, region));
2308 }
2309
2310 @Override
2311 public RpcServerInterface getRpcServer() {
2312 return rpcServices.rpcServer;
2313 }
2314
2315 @InterfaceAudience.Private
2316 public RSRpcServices getRSRpcServices() {
2317 return rpcServices;
2318 }
2319
2320
2321
2322
2323
2324
2325 protected boolean setAbortRequested() {
2326 return abortRequested.compareAndSet(false, true);
2327 }
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339 @Override
2340 public void abort(final String reason, Throwable cause) {
2341 if (!setAbortRequested()) {
2342
2343 LOG.debug(String.format(
2344 "Abort already in progress. Ignoring the current request with reason: %s", reason));
2345 return;
2346 }
2347 String msg = "ABORTING region server " + this + ": " + reason;
2348 if (cause != null) {
2349 LOG.fatal(msg, cause);
2350 } else {
2351 LOG.fatal(msg);
2352 }
2353
2354
2355
2356 LOG.fatal("RegionServer abort: loaded coprocessors are: " +
2357 CoprocessorHost.getLoadedCoprocessors());
2358
2359 try {
2360 LOG.info("Dump of metrics as JSON on abort: " + JSONBean.dumpRegionServerMetrics());
2361 } catch (MalformedObjectNameException | IOException e) {
2362 LOG.warn("Failed dumping metrics", e);
2363 }
2364
2365
2366 try {
2367 if (cause != null) {
2368 msg += "\nCause:\n" + StringUtils.stringifyException(cause);
2369 }
2370
2371 if (rssStub != null && this.serverName != null) {
2372 ReportRSFatalErrorRequest.Builder builder =
2373 ReportRSFatalErrorRequest.newBuilder();
2374 builder.setServer(ProtobufUtil.toServerName(this.serverName));
2375 builder.setErrorMessage(msg);
2376 rssStub.reportRSFatalError(null, builder.build());
2377 }
2378 } catch (Throwable t) {
2379 LOG.warn("Unable to report fatal error to master", t);
2380 }
2381
2382 if (User.isHBaseSecurityEnabled(conf)) {
2383 try {
2384 User.runAsLoginUser(new PrivilegedExceptionAction<Object>() {
2385 @Override
2386 public Object run() throws Exception {
2387 stop(reason, true);
2388 return null;
2389 }
2390 });
2391 } catch (IOException neverThrown) {
2392 }
2393 } else {
2394 stop(reason, true);
2395 }
2396 }
2397
2398
2399
2400
2401 public void abort(String reason) {
2402 abort(reason, null);
2403 }
2404
2405 @Override
2406 public boolean isAborted() {
2407 return this.abortRequested.get();
2408 }
2409
2410
2411
2412
2413
2414
2415 @InterfaceAudience.Private
2416 protected void kill() {
2417 this.killed = true;
2418 abort("Simulated kill");
2419 }
2420
2421
2422
2423
2424 protected void sendShutdownInterrupt() {
2425 }
2426
2427
2428
2429
2430
2431 protected void stopServiceThreads() {
2432
2433 if (this.choreService != null) {
2434 choreService.cancelChore(nonceManagerChore);
2435 choreService.cancelChore(compactionChecker);
2436 choreService.cancelChore(periodicFlusher);
2437 choreService.cancelChore(healthCheckChore);
2438 choreService.cancelChore(executorStatusChore);
2439 choreService.cancelChore(storefileRefresher);
2440 choreService.cancelChore(movedRegionsCleaner);
2441 choreService.cancelChore(slowLogTableOpsChore);
2442
2443 choreService.shutdown();
2444 }
2445
2446 if (this.cacheFlusher != null) {
2447 this.cacheFlusher.join();
2448 }
2449
2450 if (this.spanReceiverHost != null) {
2451 this.spanReceiverHost.closeReceivers();
2452 }
2453 if (this.walRoller != null) {
2454 Threads.shutdown(this.walRoller.getThread());
2455 }
2456 final LogRoller metawalRoller = this.metawalRoller.get();
2457 if (metawalRoller != null) {
2458 Threads.shutdown(metawalRoller.getThread());
2459 }
2460 if (this.compactSplitThread != null) {
2461 this.compactSplitThread.join();
2462 }
2463 if (this.service != null) this.service.shutdown();
2464 if (this.replicationSourceHandler != null &&
2465 this.replicationSourceHandler == this.replicationSinkHandler) {
2466 this.replicationSourceHandler.stopReplicationService();
2467 } else {
2468 if (this.replicationSourceHandler != null) {
2469 this.replicationSourceHandler.stopReplicationService();
2470 }
2471 if (this.replicationSinkHandler != null) {
2472 this.replicationSinkHandler.stopReplicationService();
2473 }
2474 }
2475 }
2476
2477
2478
2479
2480
2481 @InterfaceAudience.Private
2482 public ReplicationSourceService getReplicationSourceService() {
2483 return replicationSourceHandler;
2484 }
2485
2486
2487
2488
2489
2490 ReplicationSinkService getReplicationSinkService() {
2491 return replicationSinkHandler;
2492 }
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502 @InterfaceAudience.Private
2503 protected synchronized ServerName createRegionServerStatusStub() {
2504
2505 return createRegionServerStatusStub(false);
2506 }
2507
2508
2509
2510
2511
2512
2513
2514
2515 @InterfaceAudience.Private
2516 protected synchronized ServerName createRegionServerStatusStub(boolean refresh) {
2517 if (rssStub != null) {
2518 return masterAddressTracker.getMasterAddress();
2519 }
2520 ServerName sn = null;
2521 long previousLogTime = 0;
2522 RegionServerStatusService.BlockingInterface intf = null;
2523 boolean interrupted = false;
2524 try {
2525 while (keepLooping()) {
2526 sn = this.masterAddressTracker.getMasterAddress(refresh);
2527 if (sn == null) {
2528 if (!keepLooping()) {
2529
2530 LOG.debug("No master found and cluster is stopped; bailing out");
2531 return null;
2532 }
2533 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
2534 LOG.debug("No master found; retry");
2535 previousLogTime = System.currentTimeMillis();
2536 }
2537 refresh = true;
2538 if (sleep(200)) {
2539 interrupted = true;
2540 }
2541 continue;
2542 }
2543
2544
2545 if (this instanceof HMaster && sn.equals(getServerName())) {
2546 intf = ((HMaster)this).getMasterRpcServices();
2547 break;
2548 }
2549 try {
2550 BlockingRpcChannel channel =
2551 this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(),
2552 shortOperationTimeout);
2553 intf = RegionServerStatusService.newBlockingStub(channel);
2554 break;
2555 } catch (IOException e) {
2556 if (System.currentTimeMillis() > (previousLogTime + 1000)) {
2557 e = e instanceof RemoteException ?
2558 ((RemoteException)e).unwrapRemoteException() : e;
2559 if (e instanceof ServerNotRunningYetException) {
2560 LOG.info("Master isn't available yet, retrying");
2561 } else {
2562 LOG.warn("Unable to connect to master. Retrying. Error was:", e);
2563 }
2564 previousLogTime = System.currentTimeMillis();
2565 }
2566 if (sleep(200)) {
2567 interrupted = true;
2568 }
2569 }
2570 }
2571 } finally {
2572 if (interrupted) {
2573 Thread.currentThread().interrupt();
2574 }
2575 }
2576 rssStub = intf;
2577 return sn;
2578 }
2579
2580
2581
2582
2583
2584 private boolean keepLooping() {
2585 return !this.stopped && isClusterUp();
2586 }
2587
2588
2589
2590
2591
2592
2593
2594
2595 private RegionServerStartupResponse reportForDuty() throws IOException {
2596 ServerName masterServerName = createRegionServerStatusStub(true);
2597 if (masterServerName == null) return null;
2598 RegionServerStartupResponse result = null;
2599 try {
2600 rpcServices.requestCount.set(0);
2601 rpcServices.rpcGetRequestCount.set(0);
2602 rpcServices.rpcScanRequestCount.set(0);
2603 rpcServices.rpcMultiRequestCount.set(0);
2604 rpcServices.rpcMutateRequestCount.set(0);
2605 LOG.info("reportForDuty to master=" + masterServerName + " with port="
2606 + rpcServices.isa.getPort() + ", startcode=" + this.startcode);
2607 long now = EnvironmentEdgeManager.currentTime();
2608 int port = rpcServices.isa.getPort();
2609 RegionServerStartupRequest.Builder request = RegionServerStartupRequest.newBuilder();
2610 if (shouldUseThisHostnameInstead()) {
2611 request.setUseThisHostnameInstead(useThisHostnameInstead);
2612 }
2613 request.setPort(port);
2614 request.setServerStartCode(this.startcode);
2615 request.setServerCurrentTime(now);
2616 result = this.rssStub.regionServerStartup(null, request.build());
2617 } catch (ServiceException se) {
2618 IOException ioe = ProtobufUtil.getRemoteException(se);
2619 if (ioe instanceof ClockOutOfSyncException) {
2620 LOG.fatal("Master rejected startup because clock is out of sync", ioe);
2621
2622 throw ioe;
2623 } else if (ioe instanceof ServerNotRunningYetException) {
2624 LOG.debug("Master is not running yet");
2625 } else {
2626 LOG.warn("error telling master we are up", se);
2627 }
2628 rssStub = null;
2629 }
2630 return result;
2631 }
2632
2633 @Override
2634 public RegionStoreSequenceIds getLastSequenceId(byte[] encodedRegionName) {
2635 try {
2636 GetLastFlushedSequenceIdRequest req =
2637 RequestConverter.buildGetLastFlushedSequenceIdRequest(encodedRegionName);
2638 RegionServerStatusService.BlockingInterface rss = rssStub;
2639 if (rss == null) {
2640 createRegionServerStatusStub();
2641 rss = rssStub;
2642 if (rss == null) {
2643
2644 LOG.warn("Unable to connect to the master to check " + "the last flushed sequence id");
2645 return RegionStoreSequenceIds.newBuilder().setLastFlushedSequenceId(HConstants.NO_SEQNUM)
2646 .build();
2647 }
2648 }
2649 GetLastFlushedSequenceIdResponse resp = rss.getLastFlushedSequenceId(null, req);
2650 return RegionStoreSequenceIds.newBuilder()
2651 .setLastFlushedSequenceId(resp.getLastFlushedSequenceId())
2652 .addAllStoreSequenceId(resp.getStoreLastFlushedSequenceIdList()).build();
2653 } catch (ServiceException e) {
2654 LOG.warn("Unable to connect to the master to check the last flushed sequence id", e);
2655 return RegionStoreSequenceIds.newBuilder().setLastFlushedSequenceId(HConstants.NO_SEQNUM)
2656 .build();
2657 }
2658 }
2659
2660
2661
2662
2663
2664
2665 protected void closeAllRegions(final boolean abort) {
2666 closeUserRegions(abort);
2667 closeMetaTableRegions(abort);
2668 }
2669
2670
2671
2672
2673
2674 void closeMetaTableRegions(final boolean abort) {
2675 Region meta = null;
2676 this.lock.writeLock().lock();
2677 try {
2678 for (Map.Entry<String, Region> e: onlineRegions.entrySet()) {
2679 HRegionInfo hri = e.getValue().getRegionInfo();
2680 if (hri.isMetaRegion()) {
2681 meta = e.getValue();
2682 }
2683 if (meta != null) break;
2684 }
2685 } finally {
2686 this.lock.writeLock().unlock();
2687 }
2688 if (meta != null) closeRegionIgnoreErrors(meta.getRegionInfo(), abort);
2689 }
2690
2691
2692
2693
2694
2695
2696
2697 void closeUserRegions(final boolean abort) {
2698 this.lock.writeLock().lock();
2699 try {
2700 for (Map.Entry<String, Region> e: this.onlineRegions.entrySet()) {
2701 Region r = e.getValue();
2702 if (!r.getRegionInfo().isMetaTable() && r.isAvailable()) {
2703
2704 closeRegionIgnoreErrors(r.getRegionInfo(), abort);
2705 }
2706 }
2707 } finally {
2708 this.lock.writeLock().unlock();
2709 }
2710 }
2711
2712
2713 public InfoServer getInfoServer() {
2714 return infoServer;
2715 }
2716
2717
2718
2719
2720 @Override
2721 public boolean isStopped() {
2722 return this.stopped;
2723 }
2724
2725 @Override
2726 public boolean isStopping() {
2727 return this.stopping;
2728 }
2729
2730 @Override
2731 public Map<String, Region> getRecoveringRegions() {
2732 return this.recoveringRegions;
2733 }
2734
2735
2736
2737
2738
2739 @Override
2740 public Configuration getConfiguration() {
2741 return conf;
2742 }
2743
2744
2745 ReentrantReadWriteLock.WriteLock getWriteLock() {
2746 return lock.writeLock();
2747 }
2748
2749 public int getNumberOfOnlineRegions() {
2750 return this.onlineRegions.size();
2751 }
2752
2753 boolean isOnlineRegionsEmpty() {
2754 return this.onlineRegions.isEmpty();
2755 }
2756
2757
2758
2759
2760
2761
2762 public Collection<Region> getOnlineRegionsLocalContext() {
2763 Collection<Region> regions = this.onlineRegions.values();
2764 return Collections.unmodifiableCollection(regions);
2765 }
2766
2767 @Override
2768 public void addToOnlineRegions(Region region) {
2769 this.onlineRegions.put(region.getRegionInfo().getEncodedName(), region);
2770 configurationManager.registerObserver(region);
2771 }
2772
2773
2774
2775
2776
2777
2778 SortedMap<Long, Region> getCopyOfOnlineRegionsSortedBySize() {
2779
2780 SortedMap<Long, Region> sortedRegions = new TreeMap<Long, Region>(
2781 new Comparator<Long>() {
2782 @Override
2783 public int compare(Long a, Long b) {
2784 return -1 * a.compareTo(b);
2785 }
2786 });
2787
2788 for (Region region : this.onlineRegions.values()) {
2789 sortedRegions.put(region.getMemstoreSize(), region);
2790 }
2791 return sortedRegions;
2792 }
2793
2794
2795
2796
2797 public long getStartcode() {
2798 return this.startcode;
2799 }
2800
2801
2802 @Override
2803 public FlushRequester getFlushRequester() {
2804 return this.cacheFlusher;
2805 }
2806
2807 @Override
2808 public Leases getLeases() {
2809 return leases;
2810 }
2811
2812
2813
2814
2815 protected Path getRootDir() {
2816 return rootDir;
2817 }
2818
2819
2820
2821
2822 @Override
2823 public FileSystem getFileSystem() {
2824 return fs;
2825 }
2826
2827
2828
2829
2830 protected Path getWALRootDir() {
2831 return walRootDir;
2832 }
2833
2834
2835
2836
2837 protected FileSystem getWALFileSystem() {
2838 return walFs;
2839 }
2840
2841 @Override
2842 public String toString() {
2843 return getServerName().toString();
2844 }
2845
2846
2847
2848
2849
2850
2851 public int getThreadWakeFrequency() {
2852 return threadWakeFrequency;
2853 }
2854
2855 @Override
2856 public ZooKeeperWatcher getZooKeeper() {
2857 return zooKeeper;
2858 }
2859
2860 @Override
2861 public BaseCoordinatedStateManager getCoordinatedStateManager() {
2862 return csm;
2863 }
2864
2865 @Override
2866 public ServerName getServerName() {
2867 return serverName;
2868 }
2869
2870 @Override
2871 public CompactionRequestor getCompactionRequester() {
2872 return this.compactSplitThread;
2873 }
2874
2875 public RegionServerCoprocessorHost getRegionServerCoprocessorHost(){
2876 return this.rsHost;
2877 }
2878
2879 @Override
2880 public ConcurrentMap<byte[], Boolean> getRegionsInTransitionInRS() {
2881 return this.regionsInTransitionInRS;
2882 }
2883
2884 @Override
2885 public ExecutorService getExecutorService() {
2886 return service;
2887 }
2888
2889 @Override
2890 public ChoreService getChoreService() {
2891 return choreService;
2892 }
2893
2894 @Override
2895 public RegionServerQuotaManager getRegionServerQuotaManager() {
2896 return rsQuotaManager;
2897 }
2898
2899
2900
2901
2902
2903
2904
2905
2906 static private void createNewReplicationInstance(Configuration conf,
2907 HRegionServer server, FileSystem walFs, Path walDir, Path oldWALDir) throws IOException{
2908
2909
2910 if (!conf.getBoolean(HConstants.REPLICATION_ENABLE_KEY,
2911 HConstants.REPLICATION_ENABLE_DEFAULT)) {
2912 return;
2913 }
2914
2915 if ((server instanceof HMaster) &&
2916 (!BaseLoadBalancer.userTablesOnMaster(conf))) {
2917 return;
2918 }
2919
2920
2921 String sourceClassname = conf.get(HConstants.REPLICATION_SOURCE_SERVICE_CLASSNAME,
2922 HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
2923
2924
2925 String sinkClassname = conf.get(HConstants.REPLICATION_SINK_SERVICE_CLASSNAME,
2926 HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
2927
2928
2929
2930 if (sourceClassname.equals(sinkClassname)) {
2931 server.replicationSourceHandler = (ReplicationSourceService)
2932 newReplicationInstance(sourceClassname,
2933 conf, server, walFs, walDir, oldWALDir);
2934 server.replicationSinkHandler = (ReplicationSinkService)
2935 server.replicationSourceHandler;
2936 } else {
2937 server.replicationSourceHandler = (ReplicationSourceService)
2938 newReplicationInstance(sourceClassname,
2939 conf, server, walFs, walDir, oldWALDir);
2940 server.replicationSinkHandler = (ReplicationSinkService)
2941 newReplicationInstance(sinkClassname,
2942 conf, server, walFs, walDir, oldWALDir);
2943 }
2944 }
2945
2946 static private ReplicationService newReplicationInstance(String classname,
2947 Configuration conf, HRegionServer server, FileSystem walFs, Path walDir,
2948 Path oldLogDir) throws IOException{
2949
2950 Class<?> clazz = null;
2951 try {
2952 ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
2953 clazz = Class.forName(classname, true, classLoader);
2954 } catch (java.lang.ClassNotFoundException nfe) {
2955 throw new IOException("Could not find class for " + classname);
2956 }
2957
2958
2959 ReplicationService service = (ReplicationService)
2960 ReflectionUtils.newInstance(clazz, conf);
2961 service.initialize(server, walFs, walDir, oldLogDir);
2962 return service;
2963 }
2964
2965 public Map<String, ReplicationStatus> getWalGroupsReplicationStatus(){
2966 Map<String, ReplicationStatus> walGroupsReplicationStatus = new TreeMap<>();
2967 if(!this.isOnline()){
2968 return walGroupsReplicationStatus;
2969 }
2970 List<ReplicationSourceInterface> allSources = new ArrayList<>();
2971 allSources.addAll(replicationSourceHandler.getReplicationManager().getSources());
2972 allSources.addAll(replicationSourceHandler.getReplicationManager().getOldSources());
2973 for(ReplicationSourceInterface source: allSources){
2974 walGroupsReplicationStatus.putAll(source.getWalGroupStatus());
2975 }
2976 return walGroupsReplicationStatus;
2977 }
2978
2979
2980
2981
2982
2983
2984
2985
2986 public static HRegionServer constructRegionServer(
2987 Class<? extends HRegionServer> regionServerClass,
2988 final Configuration conf2, CoordinatedStateManager cp) {
2989 try {
2990 Constructor<? extends HRegionServer> c = regionServerClass
2991 .getConstructor(Configuration.class, CoordinatedStateManager.class);
2992 return c.newInstance(conf2, cp);
2993 } catch (Exception e) {
2994 throw new RuntimeException("Failed construction of " + "Regionserver: "
2995 + regionServerClass.toString(), e);
2996 }
2997 }
2998
2999
3000
3001
3002 public static void main(String[] args) throws Exception {
3003 VersionInfo.logVersion();
3004 Configuration conf = HBaseConfiguration.create();
3005 @SuppressWarnings("unchecked")
3006 Class<? extends HRegionServer> regionServerClass = (Class<? extends HRegionServer>) conf
3007 .getClass(HConstants.REGION_SERVER_IMPL, HRegionServer.class);
3008
3009 new HRegionServerCommandLine(regionServerClass).doMain(args);
3010 }
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022 @Override
3023 public List<Region> getOnlineRegions(TableName tableName) {
3024 List<Region> tableRegions = new ArrayList<Region>();
3025 synchronized (this.onlineRegions) {
3026 for (Region region: this.onlineRegions.values()) {
3027 HRegionInfo regionInfo = region.getRegionInfo();
3028 if(regionInfo.getTable().equals(tableName)) {
3029 tableRegions.add(region);
3030 }
3031 }
3032 }
3033 return tableRegions;
3034 }
3035
3036
3037
3038
3039
3040
3041 @Override
3042 public Set<TableName> getOnlineTables() {
3043 Set<TableName> tables = new HashSet<TableName>();
3044 synchronized (this.onlineRegions) {
3045 for (Region region: this.onlineRegions.values()) {
3046 tables.add(region.getTableDesc().getTableName());
3047 }
3048 }
3049 return tables;
3050 }
3051
3052
3053 public String[] getRegionServerCoprocessors() {
3054 TreeSet<String> coprocessors = new TreeSet<String>();
3055 try {
3056 coprocessors.addAll(getWAL(null).getCoprocessorHost().getCoprocessors());
3057 } catch (IOException exception) {
3058 LOG.warn("Exception attempting to fetch wal coprocessor information for the common wal; " +
3059 "skipping.");
3060 LOG.debug("Exception details for failure to fetch wal coprocessor information.", exception);
3061 }
3062 Collection<Region> regions = getOnlineRegionsLocalContext();
3063 for (Region region: regions) {
3064 coprocessors.addAll(region.getCoprocessorHost().getCoprocessors());
3065 try {
3066 coprocessors.addAll(getWAL(region.getRegionInfo()).getCoprocessorHost().getCoprocessors());
3067 } catch (IOException exception) {
3068 LOG.warn("Exception attempting to fetch wal coprocessor information for region " + region +
3069 "; skipping.");
3070 LOG.debug("Exception details for failure to fetch wal coprocessor information.", exception);
3071 }
3072 }
3073 coprocessors.addAll(rsHost.getCoprocessors());
3074 return coprocessors.toArray(new String[coprocessors.size()]);
3075 }
3076
3077 @Override
3078 public List<Region> getOnlineRegions() {
3079 List<Region> allRegions = new ArrayList<Region>();
3080 synchronized (this.onlineRegions) {
3081
3082 allRegions.addAll(onlineRegions.values());
3083 }
3084 return allRegions;
3085 }
3086
3087
3088
3089
3090 private void closeRegionIgnoreErrors(HRegionInfo region, final boolean abort) {
3091 try {
3092 CloseRegionCoordination.CloseRegionDetails details =
3093 csm.getCloseRegionCoordination().getDetaultDetails();
3094 if (!closeRegion(region.getEncodedName(), abort, details, null)) {
3095 LOG.warn("Failed to close " + region.getRegionNameAsString() +
3096 " - ignoring and continuing");
3097 }
3098 } catch (IOException e) {
3099 LOG.warn("Failed to close " + region.getRegionNameAsString() +
3100 " - ignoring and continuing", e);
3101 }
3102 }
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124 protected boolean closeRegion(String encodedName, final boolean abort,
3125 CloseRegionCoordination.CloseRegionDetails crd, final ServerName sn)
3126 throws NotServingRegionException, RegionAlreadyInTransitionException {
3127
3128 Region actualRegion = this.getFromOnlineRegions(encodedName);
3129
3130 if ((actualRegion != null) && (actualRegion.getCoprocessorHost() != null)) {
3131 try {
3132 actualRegion.getCoprocessorHost().preClose(false);
3133 } catch (IOException exp) {
3134 LOG.warn("Unable to close region: the coprocessor launched an error ", exp);
3135 return false;
3136 }
3137 }
3138
3139 final Boolean previous = this.regionsInTransitionInRS
3140 .putIfAbsent(Bytes.toBytes(encodedName), Boolean.FALSE);
3141
3142 if (Boolean.TRUE.equals(previous)) {
3143 LOG.info("Received CLOSE for the region:" + encodedName + " , which we are already " +
3144 "trying to OPEN. Cancelling OPENING.");
3145 if (!regionsInTransitionInRS.replace(Bytes.toBytes(encodedName), previous, Boolean.FALSE)) {
3146
3147
3148 LOG.warn("The opening for region " + encodedName + " was done before we could cancel it." +
3149 " Doing a standard close now");
3150 return closeRegion(encodedName, abort, crd, sn);
3151 }
3152
3153 actualRegion = this.getFromOnlineRegions(encodedName);
3154 if (actualRegion == null) {
3155 LOG.info("The opening previously in progress has been cancelled by a CLOSE request.");
3156
3157 throw new RegionAlreadyInTransitionException("The region " + encodedName +
3158 " was opening but not yet served. Opening is cancelled.");
3159 }
3160 } else if (Boolean.FALSE.equals(previous)) {
3161 LOG.info("Received CLOSE for the region: " + encodedName +
3162 ", which we are already trying to CLOSE, but not completed yet");
3163
3164
3165
3166
3167
3168
3169 throw new RegionAlreadyInTransitionException("The region " + encodedName +
3170 " was already closing. New CLOSE request is ignored.");
3171 }
3172
3173 if (actualRegion == null) {
3174 LOG.error("Received CLOSE for a region which is not online, and we're not opening.");
3175 this.regionsInTransitionInRS.remove(Bytes.toBytes(encodedName));
3176
3177 throw new NotServingRegionException("The region " + encodedName +
3178 " is not online, and is not opening.");
3179 }
3180
3181 CloseRegionHandler crh;
3182 final HRegionInfo hri = actualRegion.getRegionInfo();
3183 if (hri.isMetaRegion()) {
3184 crh = new CloseMetaHandler(this, this, hri, abort,
3185 csm.getCloseRegionCoordination(), crd);
3186 } else {
3187 crh = new CloseRegionHandler(this, this, hri, abort,
3188 csm.getCloseRegionCoordination(), crd, sn);
3189 }
3190 this.service.submit(crh);
3191 return true;
3192 }
3193
3194
3195
3196
3197
3198
3199 public Region getOnlineRegion(final byte[] regionName) {
3200 String encodedRegionName = HRegionInfo.encodeRegionName(regionName);
3201 return this.onlineRegions.get(encodedRegionName);
3202 }
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212 public InetSocketAddress[] getRegionBlockLocations(final String encodedRegionName)
3213 throws UnknownHostException {
3214 return Address.toSocketAddress(regionFavoredNodesMap.get(encodedRegionName));
3215 }
3216
3217 @Override
3218 public Region getFromOnlineRegions(final String encodedRegionName) {
3219 return this.onlineRegions.get(encodedRegionName);
3220 }
3221
3222 @Override
3223 public boolean removeFromOnlineRegions(final Region r, ServerName destination) {
3224 Region toReturn = this.onlineRegions.remove(r.getRegionInfo().getEncodedName());
3225 metricsRegionServerImpl.requestsCountCache.remove(r.getRegionInfo().getEncodedName());
3226 if (destination != null) {
3227 long closeSeqNum = r.getMaxFlushedSeqId();
3228 if (closeSeqNum == HConstants.NO_SEQNUM) {
3229
3230 closeSeqNum = r.getOpenSeqNum();
3231 if (closeSeqNum == HConstants.NO_SEQNUM) closeSeqNum = 0;
3232 }
3233 addToMovedRegions(r.getRegionInfo().getEncodedName(), destination, closeSeqNum);
3234 }
3235 this.regionFavoredNodesMap.remove(r.getRegionInfo().getEncodedName());
3236 return toReturn != null;
3237 }
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247 protected Region getRegion(final byte[] regionName)
3248 throws NotServingRegionException {
3249 String encodedRegionName = HRegionInfo.encodeRegionName(regionName);
3250 return getRegionByEncodedName(regionName, encodedRegionName);
3251 }
3252
3253 public Region getRegionByEncodedName(String encodedRegionName)
3254 throws NotServingRegionException {
3255 return getRegionByEncodedName(null, encodedRegionName);
3256 }
3257
3258 protected Region getRegionByEncodedName(byte[] regionName, String encodedRegionName)
3259 throws NotServingRegionException {
3260 Region region = this.onlineRegions.get(encodedRegionName);
3261 if (region == null) {
3262 MovedRegionInfo moveInfo = getMovedRegion(encodedRegionName);
3263 if (moveInfo != null) {
3264 throw new RegionMovedException(moveInfo.getServerName(), moveInfo.getSeqNum());
3265 }
3266 Boolean isOpening = this.regionsInTransitionInRS.get(Bytes.toBytes(encodedRegionName));
3267 String regionNameStr = regionName == null?
3268 encodedRegionName: Bytes.toStringBinary(regionName);
3269 if (isOpening != null && isOpening.booleanValue()) {
3270 throw new RegionOpeningException("Region " + regionNameStr +
3271 " is opening on " + this.serverName);
3272 }
3273 throw new NotServingRegionException("Region " + regionNameStr +
3274 " is not online on " + this.serverName);
3275 }
3276 return region;
3277 }
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289 private Throwable cleanup(final Throwable t, final String msg) {
3290
3291 if (t instanceof NotServingRegionException) {
3292 LOG.debug("NotServingRegionException; " + t.getMessage());
3293 return t;
3294 }
3295 if (msg == null) {
3296 LOG.error("", RemoteExceptionHandler.checkThrowable(t));
3297 } else {
3298 LOG.error(msg, RemoteExceptionHandler.checkThrowable(t));
3299 }
3300 if (!rpcServices.checkOOME(t)) {
3301 checkFileSystem();
3302 }
3303 return t;
3304 }
3305
3306
3307
3308
3309
3310
3311
3312
3313 protected IOException convertThrowableToIOE(final Throwable t, final String msg) {
3314 return (t instanceof IOException ? (IOException) t : msg == null
3315 || msg.length() == 0 ? new IOException(t) : new IOException(msg, t));
3316 }
3317
3318
3319
3320
3321
3322
3323
3324 public boolean checkFileSystem() {
3325 if (this.fsOk && this.fs != null) {
3326 try {
3327 FSUtils.checkFileSystemAvailable(this.fs);
3328 } catch (IOException e) {
3329 abort("File System not available", e);
3330 this.fsOk = false;
3331 }
3332 }
3333 return this.fsOk;
3334 }
3335
3336 @Override
3337 public void updateRegionFavoredNodesMapping(String encodedRegionName,
3338 List<org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName> favoredNodes) {
3339 Address[] addr = new Address[favoredNodes.size()];
3340
3341
3342 for (int i = 0; i < favoredNodes.size(); i++) {
3343 addr[i] = Address.fromParts(favoredNodes.get(i).getHostName(),
3344 favoredNodes.get(i).getPort());
3345 }
3346 regionFavoredNodesMap.put(encodedRegionName, addr);
3347 }
3348
3349
3350
3351
3352
3353
3354
3355
3356 @Override
3357 public InetSocketAddress[] getFavoredNodesForRegion(String encodedRegionName) {
3358 return Address.toSocketAddress(regionFavoredNodesMap.get(encodedRegionName));
3359 }
3360
3361 @Override
3362 public ServerNonceManager getNonceManager() {
3363 return this.nonceManager;
3364 }
3365
3366 private static class MovedRegionInfo {
3367 private final ServerName serverName;
3368 private final long seqNum;
3369 private final long ts;
3370
3371 public MovedRegionInfo(ServerName serverName, long closeSeqNum) {
3372 this.serverName = serverName;
3373 this.seqNum = closeSeqNum;
3374 ts = EnvironmentEdgeManager.currentTime();
3375 }
3376
3377 public ServerName getServerName() {
3378 return serverName;
3379 }
3380
3381 public long getSeqNum() {
3382 return seqNum;
3383 }
3384
3385 public long getMoveTime() {
3386 return ts;
3387 }
3388 }
3389
3390
3391
3392 protected Map<String, MovedRegionInfo> movedRegions =
3393 new ConcurrentHashMap<String, MovedRegionInfo>(3000);
3394
3395
3396
3397 private static final int TIMEOUT_REGION_MOVED = (2 * 60 * 1000);
3398
3399 protected void addToMovedRegions(String encodedName, ServerName destination, long closeSeqNum) {
3400 if (ServerName.isSameHostnameAndPort(destination, this.getServerName())) {
3401 LOG.warn("Not adding moved region record: " + encodedName + " to self.");
3402 return;
3403 }
3404 LOG.info("Adding moved region record: "
3405 + encodedName + " to " + destination + " as of " + closeSeqNum);
3406 movedRegions.put(encodedName, new MovedRegionInfo(destination, closeSeqNum));
3407 }
3408
3409 void removeFromMovedRegions(String encodedName) {
3410 movedRegions.remove(encodedName);
3411 }
3412
3413 private MovedRegionInfo getMovedRegion(final String encodedRegionName) {
3414 MovedRegionInfo dest = movedRegions.get(encodedRegionName);
3415
3416 long now = EnvironmentEdgeManager.currentTime();
3417 if (dest != null) {
3418 if (dest.getMoveTime() > (now - TIMEOUT_REGION_MOVED)) {
3419 return dest;
3420 } else {
3421 movedRegions.remove(encodedRegionName);
3422 }
3423 }
3424
3425 return null;
3426 }
3427
3428
3429
3430
3431 protected void cleanMovedRegions() {
3432 final long cutOff = System.currentTimeMillis() - TIMEOUT_REGION_MOVED;
3433 Iterator<Entry<String, MovedRegionInfo>> it = movedRegions.entrySet().iterator();
3434
3435 while (it.hasNext()){
3436 Map.Entry<String, MovedRegionInfo> e = it.next();
3437 if (e.getValue().getMoveTime() < cutOff) {
3438 it.remove();
3439 }
3440 }
3441 }
3442
3443
3444
3445
3446
3447 protected int movedRegionCleanerPeriod() {
3448 return TIMEOUT_REGION_MOVED;
3449 }
3450
3451
3452
3453
3454
3455 protected final static class MovedRegionsCleaner extends ScheduledChore implements Stoppable {
3456 private HRegionServer regionServer;
3457 Stoppable stoppable;
3458
3459 private MovedRegionsCleaner(
3460 HRegionServer regionServer, Stoppable stoppable){
3461 super("MovedRegionsCleaner for region " + regionServer, stoppable,
3462 regionServer.movedRegionCleanerPeriod());
3463 this.regionServer = regionServer;
3464 this.stoppable = stoppable;
3465 }
3466
3467 static MovedRegionsCleaner create(HRegionServer rs){
3468 Stoppable stoppable = new Stoppable() {
3469 private volatile boolean isStopped = false;
3470 @Override public void stop(String why) { isStopped = true;}
3471 @Override public boolean isStopped() {return isStopped;}
3472 };
3473
3474 return new MovedRegionsCleaner(rs, stoppable);
3475 }
3476
3477 @Override
3478 protected void chore() {
3479 regionServer.cleanMovedRegions();
3480 }
3481
3482 @Override
3483 public void stop(String why) {
3484 stoppable.stop(why);
3485 }
3486
3487 @Override
3488 public boolean isStopped() {
3489 return stoppable.isStopped();
3490 }
3491 }
3492
3493 private String getMyEphemeralNodePath() {
3494 return ZKUtil.joinZNode(this.zooKeeper.rsZNode, getServerName().toString());
3495 }
3496
3497 private boolean isHealthCheckerConfigured() {
3498 String healthScriptLocation = this.conf.get(HConstants.HEALTH_SCRIPT_LOC);
3499 return org.apache.commons.lang.StringUtils.isNotBlank(healthScriptLocation);
3500 }
3501
3502
3503
3504
3505 public CompactSplitThread getCompactSplitThread() {
3506 return this.compactSplitThread;
3507 }
3508
3509
3510
3511
3512
3513
3514
3515
3516 private void updateRecoveringRegionLastFlushedSequenceId(Region r) throws KeeperException,
3517 IOException {
3518 if (!r.isRecovering()) {
3519
3520 return;
3521 }
3522
3523 HRegionInfo regionInfo = r.getRegionInfo();
3524 ZooKeeperWatcher zkw = getZooKeeper();
3525 String previousRSName = this.getLastFailedRSFromZK(regionInfo.getEncodedName());
3526 Map<byte[], Long> maxSeqIdInStores = r.getMaxStoreSeqId();
3527 long minSeqIdForLogReplay = -1;
3528 for (Long storeSeqIdForReplay : maxSeqIdInStores.values()) {
3529 if (minSeqIdForLogReplay == -1 || storeSeqIdForReplay < minSeqIdForLogReplay) {
3530 minSeqIdForLogReplay = storeSeqIdForReplay;
3531 }
3532 }
3533
3534 try {
3535 long lastRecordedFlushedSequenceId = -1;
3536 String nodePath = ZKUtil.joinZNode(this.zooKeeper.recoveringRegionsZNode,
3537 regionInfo.getEncodedName());
3538
3539 byte[] data;
3540 try {
3541 data = ZKUtil.getData(zkw, nodePath);
3542 } catch (InterruptedException e) {
3543 throw new InterruptedIOException();
3544 }
3545 if (data != null) {
3546 lastRecordedFlushedSequenceId = ZKSplitLog.parseLastFlushedSequenceIdFrom(data);
3547 }
3548 if (data == null || lastRecordedFlushedSequenceId < minSeqIdForLogReplay) {
3549 ZKUtil.setData(zkw, nodePath, ZKUtil.positionToByteArray(minSeqIdForLogReplay));
3550 }
3551 if (previousRSName != null) {
3552
3553 nodePath = ZKUtil.joinZNode(nodePath, previousRSName);
3554 ZKUtil.setData(zkw, nodePath,
3555 ZKUtil.regionSequenceIdsToByteArray(minSeqIdForLogReplay, maxSeqIdInStores));
3556 LOG.debug("Update last flushed sequence id of region " + regionInfo.getEncodedName() +
3557 " for " + previousRSName);
3558 } else {
3559 LOG.warn("Can't find failed region server for recovering region " +
3560 regionInfo.getEncodedName());
3561 }
3562 } catch (NoNodeException ignore) {
3563 LOG.debug("Region " + regionInfo.getEncodedName() +
3564 " must have completed recovery because its recovery znode has been removed", ignore);
3565 }
3566 }
3567
3568
3569
3570
3571
3572
3573 private String getLastFailedRSFromZK(String encodedRegionName) throws KeeperException {
3574 String result = null;
3575 long maxZxid = 0;
3576 ZooKeeperWatcher zkw = this.getZooKeeper();
3577 String nodePath = ZKUtil.joinZNode(zkw.recoveringRegionsZNode, encodedRegionName);
3578 List<String> failedServers = ZKUtil.listChildrenNoWatch(zkw, nodePath);
3579 if (failedServers == null || failedServers.isEmpty()) {
3580 return result;
3581 }
3582 for (String failedServer : failedServers) {
3583 String rsPath = ZKUtil.joinZNode(nodePath, failedServer);
3584 Stat stat = new Stat();
3585 ZKUtil.getDataNoWatch(zkw, rsPath, stat);
3586 if (maxZxid < stat.getCzxid()) {
3587 maxZxid = stat.getCzxid();
3588 result = failedServer;
3589 }
3590 }
3591 return result;
3592 }
3593
3594 public CoprocessorServiceResponse execRegionServerService(
3595 @SuppressWarnings("UnusedParameters") final RpcController controller,
3596 final CoprocessorServiceRequest serviceRequest) throws ServiceException {
3597 try {
3598 ServerRpcController serviceController = new ServerRpcController();
3599 CoprocessorServiceCall call = serviceRequest.getCall();
3600 String serviceName = call.getServiceName();
3601 String methodName = call.getMethodName();
3602 if (!coprocessorServiceHandlers.containsKey(serviceName)) {
3603 throw new UnknownProtocolException(null,
3604 "No registered coprocessor service found for name " + serviceName);
3605 }
3606 Service service = coprocessorServiceHandlers.get(serviceName);
3607 Descriptors.ServiceDescriptor serviceDesc = service.getDescriptorForType();
3608 Descriptors.MethodDescriptor methodDesc = serviceDesc.findMethodByName(methodName);
3609 if (methodDesc == null) {
3610 throw new UnknownProtocolException(service.getClass(), "Unknown method " + methodName
3611 + " called on service " + serviceName);
3612 }
3613 Message.Builder builderForType = service.getRequestPrototype(methodDesc).newBuilderForType();
3614 ProtobufUtil.mergeFrom(builderForType, call.getRequest());
3615 Message request = builderForType.build();
3616 final Message.Builder responseBuilder =
3617 service.getResponsePrototype(methodDesc).newBuilderForType();
3618 service.callMethod(methodDesc, serviceController, request, new RpcCallback<Message>() {
3619 @Override
3620 public void run(Message message) {
3621 if (message != null) {
3622 responseBuilder.mergeFrom(message);
3623 }
3624 }
3625 });
3626 IOException exception = ResponseConverter.getControllerException(serviceController);
3627 if (exception != null) {
3628 throw exception;
3629 }
3630 Message execResult = responseBuilder.build();
3631 ClientProtos.CoprocessorServiceResponse.Builder builder =
3632 ClientProtos.CoprocessorServiceResponse.newBuilder();
3633 builder.setRegion(RequestConverter.buildRegionSpecifier(RegionSpecifierType.REGION_NAME,
3634 HConstants.EMPTY_BYTE_ARRAY));
3635 builder.setValue(builder.getValueBuilder().setName(execResult.getClass().getName())
3636 .setValue(execResult.toByteString()));
3637 return builder.build();
3638 } catch (IOException ie) {
3639 throw new ServiceException(ie);
3640 }
3641 }
3642
3643
3644
3645
3646 public CacheConfig getCacheConfig() {
3647 return this.cacheConfig;
3648 }
3649
3650
3651
3652
3653 protected ConfigurationManager getConfigurationManager() {
3654 return configurationManager;
3655 }
3656
3657
3658
3659
3660 public TableDescriptors getTableDescriptors() {
3661 return this.tableDescriptors;
3662 }
3663
3664
3665
3666
3667 public void updateConfiguration() {
3668 LOG.info("Reloading the configuration from disk.");
3669
3670 conf.reloadConfiguration();
3671 configurationManager.notifyAllObservers(conf);
3672 }
3673
3674 @Override
3675 public HeapMemoryManager getHeapMemoryManager() {
3676 return hMemManager;
3677 }
3678
3679 @Override
3680 public double getCompactionPressure() {
3681 double max = 0;
3682 for (Region region : onlineRegions.values()) {
3683 for (Store store : region.getStores()) {
3684 double normCount = store.getCompactionPressure();
3685 if (normCount > max) {
3686 max = normCount;
3687 }
3688 }
3689 }
3690 return max;
3691 }
3692
3693
3694
3695
3696
3697 @InterfaceAudience.Private
3698 public boolean walRollRequestFinished() {
3699 return this.walRoller.walRollFinished();
3700 }
3701
3702 @Override
3703 public ThroughputController getFlushThroughputController() {
3704 return flushThroughputController;
3705 }
3706
3707 @Override
3708 public double getFlushPressure() {
3709 if (getRegionServerAccounting() == null || cacheFlusher == null) {
3710
3711 return 0.0;
3712 }
3713 return getRegionServerAccounting().getGlobalMemstoreSize() * 1.0
3714 / cacheFlusher.globalMemStoreLimitLowMark;
3715 }
3716
3717 @Override
3718 public void onConfigurationChange(Configuration newConf) {
3719 ThroughputController old = this.flushThroughputController;
3720 if (old != null) {
3721 old.stop("configuration change");
3722 }
3723 this.flushThroughputController = FlushThroughputControllerFactory.create(this, newConf);
3724 }
3725
3726 @Override
3727 public MetricsRegionServer getMetrics() {
3728 return metricsRegionServer;
3729 }
3730
3731 @Override
3732 public void unassign(byte[] regionName) throws IOException {
3733 clusterConnection.getAdmin().unassign(regionName, false);
3734 }
3735
3736
3737
3738
3739 private static class SystemExitWhenAbortTimeout extends TimerTask {
3740 @Override
3741 public void run() {
3742 LOG.warn("Aborting region server timed out, terminating forcibly" +
3743 " and does not wait for any running shutdown hooks or finalizers to finish their work." +
3744 " Thread dump to stdout.");
3745 Threads.printThreadInfo(System.out, "Zombie HRegionServer");
3746 Runtime.getRuntime().halt(1);
3747 }
3748 }
3749 }