1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.handler;
21
22 import java.io.IOException;
23 import java.util.concurrent.atomic.AtomicBoolean;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.apache.hadoop.fs.Path;
28 import org.apache.hadoop.hbase.HRegionInfo;
29 import org.apache.hadoop.hbase.HTableDescriptor;
30 import org.apache.hadoop.hbase.Server;
31 import org.apache.hadoop.hbase.executor.EventHandler;
32 import org.apache.hadoop.hbase.regionserver.HRegion;
33 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
34 import org.apache.hadoop.hbase.util.CancelableProgressable;
35 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
36 import org.apache.zookeeper.KeeperException;
37
38
39
40
41
42
43 public class OpenRegionHandler extends EventHandler {
44 private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
45
46 private final RegionServerServices rsServices;
47
48 private final HRegionInfo regionInfo;
49 private final HTableDescriptor htd;
50
51
52
53
54 private volatile int version = -1;
55
56 private volatile int versionOfOfflineNode = -1;
57
58 public OpenRegionHandler(final Server server,
59 final RegionServerServices rsServices, HRegionInfo regionInfo,
60 HTableDescriptor htd) {
61 this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, -1);
62 }
63 public OpenRegionHandler(final Server server,
64 final RegionServerServices rsServices, HRegionInfo regionInfo,
65 HTableDescriptor htd, int versionOfOfflineNode) {
66 this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION,
67 versionOfOfflineNode);
68 }
69
70 protected OpenRegionHandler(final Server server,
71 final RegionServerServices rsServices, final HRegionInfo regionInfo,
72 final HTableDescriptor htd, EventType eventType,
73 final int versionOfOfflineNode) {
74 super(server, eventType);
75 this.rsServices = rsServices;
76 this.regionInfo = regionInfo;
77 this.htd = htd;
78 this.versionOfOfflineNode = versionOfOfflineNode;
79 }
80
81 public HRegionInfo getRegionInfo() {
82 return regionInfo;
83 }
84
85 @Override
86 public void process() throws IOException {
87 try {
88 final String name = regionInfo.getRegionNameAsString();
89 if (this.server.isStopped() || this.rsServices.isStopping()) {
90 return;
91 }
92 final String encodedName = regionInfo.getEncodedName();
93
94
95 HRegion region = this.rsServices.getFromOnlineRegions(encodedName);
96
97
98
99 if (!transitionZookeeperOfflineToOpening(encodedName,
100 versionOfOfflineNode)) {
101 LOG.warn("Region was hijacked? It no longer exists, encodedName=" +
102 encodedName);
103 return;
104 }
105
106
107
108 region = openRegion();
109 if (region == null) {
110 tryTransitionToFailedOpen(regionInfo);
111 return;
112 }
113
114 boolean failed = true;
115 if (tickleOpening("post_region_open")) {
116 if (updateMeta(region)) failed = false;
117 }
118 if (failed || this.server.isStopped() ||
119 this.rsServices.isStopping()) {
120 cleanupFailedOpen(region);
121 tryTransitionToFailedOpen(regionInfo);
122 return;
123 }
124
125 if (!transitionToOpened(region)) {
126
127
128
129
130
131
132 cleanupFailedOpen(region);
133 return;
134 }
135
136
137 LOG.debug("Opened " + name + " on server:" +
138 this.server.getServerName());
139 } finally {
140 this.rsServices.getRegionsInTransitionInRS().
141 remove(this.regionInfo.getEncodedNameAsBytes());
142 }
143 }
144
145
146
147
148
149
150
151
152 boolean updateMeta(final HRegion r) {
153 if (this.server.isStopped() || this.rsServices.isStopping()) {
154 return false;
155 }
156
157
158 final AtomicBoolean signaller = new AtomicBoolean(false);
159 PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
160 this.server, this.rsServices, signaller);
161 t.start();
162 int assignmentTimeout = this.server.getConfiguration().
163 getInt("hbase.master.assignment.timeoutmonitor.period", 10000);
164
165
166 long timeout = assignmentTimeout * 10;
167 long now = System.currentTimeMillis();
168 long endTime = now + timeout;
169
170
171 long period = Math.max(1, assignmentTimeout/ 3);
172 long lastUpdate = now;
173 boolean tickleOpening = true;
174 while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
175 !this.rsServices.isStopping() && (endTime > now)) {
176 long elapsed = now - lastUpdate;
177 if (elapsed > period) {
178
179 lastUpdate = now;
180 tickleOpening = tickleOpening("post_open_deploy");
181 }
182 synchronized (signaller) {
183 try {
184 signaller.wait(period);
185 } catch (InterruptedException e) {
186
187 }
188 }
189 now = System.currentTimeMillis();
190 }
191
192
193 if (t.isAlive()) {
194 if (!signaller.get()) {
195
196 LOG.debug("Interrupting thread " + t);
197 t.interrupt();
198 }
199 try {
200 t.join();
201 } catch (InterruptedException ie) {
202 LOG.warn("Interrupted joining " +
203 r.getRegionInfo().getRegionNameAsString(), ie);
204 Thread.currentThread().interrupt();
205 }
206 }
207
208
209
210
211 return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
212 }
213
214
215
216
217
218
219 static class PostOpenDeployTasksThread extends Thread {
220 private Exception exception = null;
221 private final Server server;
222 private final RegionServerServices services;
223 private final HRegion region;
224 private final AtomicBoolean signaller;
225
226 PostOpenDeployTasksThread(final HRegion region, final Server server,
227 final RegionServerServices services, final AtomicBoolean signaller) {
228 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
229 this.setDaemon(true);
230 this.server = server;
231 this.services = services;
232 this.region = region;
233 this.signaller = signaller;
234 }
235
236 public void run() {
237 try {
238 this.services.postOpenDeployTasks(this.region,
239 this.server.getCatalogTracker(), false);
240 } catch (Exception e) {
241 LOG.warn("Exception running postOpenDeployTasks; region=" +
242 this.region.getRegionInfo().getEncodedName(), e);
243 this.exception = e;
244 }
245
246 this.signaller.set(true);
247 synchronized (this.signaller) {
248 this.signaller.notify();
249 }
250 }
251
252
253
254
255 Exception getException() {
256 return this.exception;
257 }
258 }
259
260
261
262
263
264
265
266 private boolean transitionToOpened(final HRegion r) throws IOException {
267 boolean result = false;
268 HRegionInfo hri = r.getRegionInfo();
269 final String name = hri.getRegionNameAsString();
270
271 try {
272 if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
273 this.server.getServerName(), this.version) == -1) {
274 LOG.warn("Completed the OPEN of region " + name +
275 " but when transitioning from " +
276 " OPENING to OPENED got a version mismatch, someone else clashed " +
277 "so now unassigning -- closing region on server: " +
278 this.server.getServerName());
279 } else {
280 LOG.debug("region transitioned to opened in zookeeper: " +
281 r.getRegionInfo() + ", server: " + this.server.getServerName());
282 result = true;
283 }
284 } catch (KeeperException e) {
285 LOG.error("Failed transitioning node " + name +
286 " from OPENING to OPENED -- closing region", e);
287 }
288 return result;
289 }
290
291
292
293
294
295
296 private boolean tryTransitionToFailedOpen(final HRegionInfo hri) {
297 boolean result = false;
298 final String name = hri.getRegionNameAsString();
299 try {
300 LOG.info("Opening of region " + hri + " failed, marking as FAILED_OPEN in ZK");
301 if (ZKAssign.transitionNode(
302 this.server.getZooKeeper(), hri,
303 this.server.getServerName(),
304 EventType.RS_ZK_REGION_OPENING,
305 EventType.RS_ZK_REGION_FAILED_OPEN,
306 this.version) == -1) {
307 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
308 "It's likely that the master already timed out this open " +
309 "attempt, and thus another RS already has the region.");
310 } else {
311 result = true;
312 }
313 } catch (KeeperException e) {
314 LOG.error("Failed transitioning node " + name +
315 " from OPENING to FAILED_OPEN", e);
316 }
317 return result;
318 }
319
320
321
322
323 HRegion openRegion() {
324 HRegion region = null;
325 try {
326
327
328 region = HRegion.openHRegion(this.regionInfo, this.htd,
329 this.rsServices.getWAL(), this.server.getConfiguration(),
330 this.rsServices,
331 new CancelableProgressable() {
332 public boolean progress() {
333
334
335
336 return tickleOpening("open_region_progress");
337 }
338 });
339 } catch (Throwable t) {
340
341
342
343 LOG.error("Failed open of region=" +
344 this.regionInfo.getRegionNameAsString(), t);
345 }
346 return region;
347 }
348
349 private void cleanupFailedOpen(final HRegion region) throws IOException {
350 if (region != null) region.close();
351 this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
352 }
353
354
355
356
357
358
359
360
361
362 boolean transitionZookeeperOfflineToOpening(final String encodedName,
363 int versionOfOfflineNode) {
364
365 try {
366
367 this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
368 server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
369 EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
370 } catch (KeeperException e) {
371 LOG.error("Error transition from OFFLINE to OPENING for region=" +
372 encodedName, e);
373 }
374 boolean b = isGoodVersion();
375 if (!b) {
376 LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
377 encodedName);
378 }
379 return b;
380 }
381
382
383
384
385
386
387
388 boolean tickleOpening(final String context) {
389
390 if (!isGoodVersion()) return false;
391 String encodedName = this.regionInfo.getEncodedName();
392 try {
393 this.version =
394 ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
395 this.regionInfo, this.server.getServerName(), this.version);
396 } catch (KeeperException e) {
397 server.abort("Exception refreshing OPENING; region=" + encodedName +
398 ", context=" + context, e);
399 this.version = -1;
400 }
401 boolean b = isGoodVersion();
402 if (!b) {
403 LOG.warn("Failed refreshing OPENING; region=" + encodedName +
404 ", context=" + context);
405 }
406 return b;
407 }
408
409 private boolean isGoodVersion() {
410 return this.version != -1;
411 }
412 }