1 /**
2 * Copyright 2010 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.zookeeper;
21
22 import java.util.List;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.hadoop.hbase.HRegionInfo;
27 import org.apache.hadoop.hbase.ServerName;
28 import org.apache.hadoop.hbase.executor.RegionTransitionData;
29 import org.apache.hadoop.hbase.executor.EventHandler.EventType;
30 import org.apache.zookeeper.AsyncCallback;
31 import org.apache.zookeeper.KeeperException;
32 import org.apache.zookeeper.KeeperException.Code;
33 import org.apache.zookeeper.KeeperException.NoNodeException;
34 import org.apache.zookeeper.KeeperException.NodeExistsException;
35 import org.apache.zookeeper.data.Stat;
36
37 /**
38 * Utility class for doing region assignment in ZooKeeper. This class extends
39 * stuff done in {@link ZKUtil} to cover specific assignment operations.
40 * <p>
41 * Contains only static methods and constants.
42 * <p>
43 * Used by both the Master and RegionServer.
44 * <p>
45 * All valid transitions outlined below:
46 * <p>
47 * <b>MASTER</b>
48 * <ol>
49 * <li>
50 * Master creates an unassigned node as OFFLINE.
51 * - Cluster startup and table enabling.
52 * </li>
53 * <li>
54 * Master forces an existing unassigned node to OFFLINE.
55 * - RegionServer failure.
56 * - Allows transitions from all states to OFFLINE.
57 * </li>
58 * <li>
59 * Master deletes an unassigned node that was in a OPENED state.
60 * - Normal region transitions. Besides cluster startup, no other deletions
61 * of unassigned nodes is allowed.
62 * </li>
63 * <li>
64 * Master deletes all unassigned nodes regardless of state.
65 * - Cluster startup before any assignment happens.
66 * </li>
67 * </ol>
68 * <p>
69 * <b>REGIONSERVER</b>
70 * <ol>
71 * <li>
72 * RegionServer creates an unassigned node as CLOSING.
73 * - All region closes will do this in response to a CLOSE RPC from Master.
74 * - A node can never be transitioned to CLOSING, only created.
75 * </li>
76 * <li>
77 * RegionServer transitions an unassigned node from CLOSING to CLOSED.
78 * - Normal region closes. CAS operation.
79 * </li>
80 * <li>
81 * RegionServer transitions an unassigned node from OFFLINE to OPENING.
82 * - All region opens will do this in response to an OPEN RPC from the Master.
83 * - Normal region opens. CAS operation.
84 * </li>
85 * <li>
86 * RegionServer transitions an unassigned node from OPENING to OPENED.
87 * - Normal region opens. CAS operation.
88 * </li>
89 * </ol>
90 */
91 public class ZKAssign {
92 private static final Log LOG = LogFactory.getLog(ZKAssign.class);
93
94 /**
95 * Gets the full path node name for the unassigned node for the specified
96 * region.
97 * @param zkw zk reference
98 * @param regionName region name
99 * @return full path node name
100 */
101 public static String getNodeName(ZooKeeperWatcher zkw, String regionName) {
102 return ZKUtil.joinZNode(zkw.assignmentZNode, regionName);
103 }
104
105 /**
106 * Gets the region name from the full path node name of an unassigned node.
107 * @param path full zk path
108 * @return region name
109 */
110 public static String getRegionName(ZooKeeperWatcher zkw, String path) {
111 return path.substring(zkw.assignmentZNode.length()+1);
112 }
113
114 // Master methods
115
116 /**
117 * Creates a new unassigned node in the OFFLINE state for the specified region.
118 *
119 * <p>Does not transition nodes from other states. If a node already exists
120 * for this region, a {@link NodeExistsException} will be thrown.
121 *
122 * <p>Sets a watcher on the unassigned region node if the method is successful.
123 *
124 * <p>This method should only be used during cluster startup and the enabling
125 * of a table.
126 *
127 * @param zkw zk reference
128 * @param region region to be created as offline
129 * @param serverName server event originates from
130 * @throws KeeperException if unexpected zookeeper exception
131 * @throws KeeperException.NodeExistsException if node already exists
132 */
133 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
134 ServerName serverName)
135 throws KeeperException, KeeperException.NodeExistsException {
136 createNodeOffline(zkw, region, serverName, EventType.M_ZK_REGION_OFFLINE);
137 }
138
139 public static void createNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
140 ServerName serverName, final EventType event)
141 throws KeeperException, KeeperException.NodeExistsException {
142 LOG.debug(zkw.prefix("Creating unassigned node for " +
143 region.getEncodedName() + " in OFFLINE state"));
144 RegionTransitionData data = new RegionTransitionData(event,
145 region.getRegionName(), serverName);
146 String node = getNodeName(zkw, region.getEncodedName());
147 ZKUtil.createAndWatch(zkw, node, data.getBytes());
148 }
149
150 /**
151 * Creates an unassigned node in the OFFLINE state for the specified region.
152 * <p>
153 * Runs asynchronously. Depends on no pre-existing znode.
154 *
155 * <p>Sets a watcher on the unassigned region node.
156 *
157 * @param zkw zk reference
158 * @param region region to be created as offline
159 * @param serverName server event originates from
160 * @param cb
161 * @param ctx
162 * @throws KeeperException if unexpected zookeeper exception
163 * @throws KeeperException.NodeExistsException if node already exists
164 */
165 public static void asyncCreateNodeOffline(ZooKeeperWatcher zkw,
166 HRegionInfo region, ServerName serverName,
167 final AsyncCallback.StringCallback cb, final Object ctx)
168 throws KeeperException {
169 LOG.debug(zkw.prefix("Async create of unassigned node for " +
170 region.getEncodedName() + " with OFFLINE state"));
171 RegionTransitionData data = new RegionTransitionData(
172 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
173 String node = getNodeName(zkw, region.getEncodedName());
174 ZKUtil.asyncCreate(zkw, node, data.getBytes(), cb, ctx);
175 }
176
177 /**
178 * Forces an existing unassigned node to the OFFLINE state for the specified
179 * region.
180 *
181 * <p>Does not create a new node. If a node does not already exist for this
182 * region, a {@link NoNodeException} will be thrown.
183 *
184 * <p>Sets a watcher on the unassigned region node if the method is
185 * successful.
186 *
187 * <p>This method should only be used during recovery of regionserver failure.
188 *
189 * @param zkw zk reference
190 * @param region region to be forced as offline
191 * @param serverName server event originates from
192 * @throws KeeperException if unexpected zookeeper exception
193 * @throws KeeperException.NoNodeException if node does not exist
194 */
195 public static void forceNodeOffline(ZooKeeperWatcher zkw, HRegionInfo region,
196 ServerName serverName)
197 throws KeeperException, KeeperException.NoNodeException {
198 LOG.debug(zkw.prefix("Forcing existing unassigned node for " +
199 region.getEncodedName() + " to OFFLINE state"));
200 RegionTransitionData data = new RegionTransitionData(
201 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
202 String node = getNodeName(zkw, region.getEncodedName());
203 ZKUtil.setData(zkw, node, data.getBytes());
204 }
205
206 /**
207 * Creates or force updates an unassigned node to the OFFLINE state for the
208 * specified region.
209 * <p>
210 * Attempts to create the node but if it exists will force it to transition to
211 * and OFFLINE state.
212 *
213 * <p>Sets a watcher on the unassigned region node if the method is
214 * successful.
215 *
216 * <p>This method should be used when assigning a region.
217 *
218 * @param zkw zk reference
219 * @param region region to be created as offline
220 * @param serverName server event originates from
221 * @return the version of the znode created in OFFLINE state, -1 if
222 * unsuccessful.
223 * @throws KeeperException if unexpected zookeeper exception
224 * @throws KeeperException.NodeExistsException if node already exists
225 */
226 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
227 HRegionInfo region, ServerName serverName) throws KeeperException {
228 return createOrForceNodeOffline(zkw, region, serverName, false, true);
229 }
230
231 /**
232 * Creates or force updates an unassigned node to the OFFLINE state for the
233 * specified region.
234 * <p>
235 * Attempts to create the node but if it exists will force it to transition to
236 * and OFFLINE state.
237 * <p>
238 * Sets a watcher on the unassigned region node if the method is successful.
239 *
240 * <p>
241 * This method should be used when assigning a region.
242 *
243 * @param zkw
244 * zk reference
245 * @param region
246 * region to be created as offline
247 * @param serverName
248 * server event originates from
249 * @param hijack
250 * - true if to be hijacked and reassigned, false otherwise
251 * @param allowCreation
252 * - true if the node has to be created newly, false otherwise
253 * @throws KeeperException
254 * if unexpected zookeeper exception
255 * @return the version of the znode created in OFFLINE state, -1 if
256 * unsuccessful.
257 * @throws KeeperException.NodeExistsException
258 * if node already exists
259 */
260 public static int createOrForceNodeOffline(ZooKeeperWatcher zkw,
261 HRegionInfo region, ServerName serverName,
262 boolean hijack, boolean allowCreation)
263 throws KeeperException {
264 LOG.debug(zkw.prefix("Creating (or updating) unassigned node for " +
265 region.getEncodedName() + " with OFFLINE state"));
266 RegionTransitionData data = new RegionTransitionData(
267 EventType.M_ZK_REGION_OFFLINE, region.getRegionName(), serverName);
268 String node = getNodeName(zkw, region.getEncodedName());
269 Stat stat = new Stat();
270 zkw.sync(node);
271 int version = ZKUtil.checkExists(zkw, node);
272 if (version == -1) {
273 // While trying to transit a node to OFFLINE that was in previously in
274 // OPENING state but before it could transit to OFFLINE state if RS had
275 // opened the region then the Master deletes the assigned region znode.
276 // In that case the znode will not exist. So we should not
277 // create the znode again which will lead to double assignment.
278 if (hijack && !allowCreation) {
279 return -1;
280 }
281 return ZKUtil.createAndWatch(zkw, node, data.getBytes());
282 } else {
283 RegionTransitionData curDataInZNode = ZKAssign.getDataNoWatch(zkw, region
284 .getEncodedName(), stat);
285 // Do not move the node to OFFLINE if znode is in any of the following
286 // state.
287 // Because these are already executed states.
288 if (hijack && null != curDataInZNode) {
289 EventType eventType = curDataInZNode.getEventType();
290 if (eventType.equals(EventType.M_ZK_REGION_CLOSING)
291 || eventType.equals(EventType.RS_ZK_REGION_CLOSED)
292 || eventType.equals(EventType.RS_ZK_REGION_OPENED)) {
293 return -1;
294 }
295 }
296
297 boolean setData = false;
298 try {
299 setData = ZKUtil.setData(zkw, node, data.getBytes(), version);
300 // Setdata throws KeeperException which aborts the Master. So we are
301 // catching it here.
302 // If just before setting the znode to OFFLINE if the RS has made any
303 // change to the
304 // znode state then we need to return -1.
305 } catch (KeeperException kpe) {
306 LOG.info("Version mismatch while setting the node to OFFLINE state.");
307 return -1;
308 }
309 if (!setData) {
310 return -1;
311 } else {
312 // We successfully forced to OFFLINE, reset watch and handle if
313 // the state changed in between our set and the watch
314 RegionTransitionData curData =
315 ZKAssign.getData(zkw, region.getEncodedName());
316 if (curData.getEventType() != data.getEventType()) {
317 // state changed, need to process
318 return -1;
319 }
320 }
321 }
322 return stat.getVersion() + 1;
323 }
324
325 /**
326 * Deletes an existing unassigned node that is in the OPENED state for the
327 * specified region.
328 *
329 * <p>If a node does not already exist for this region, a
330 * {@link NoNodeException} will be thrown.
331 *
332 * <p>No watcher is set whether this succeeds or not.
333 *
334 * <p>Returns false if the node was not in the proper state but did exist.
335 *
336 * <p>This method is used during normal region transitions when a region
337 * finishes successfully opening. This is the Master acknowledging completion
338 * of the specified regions transition.
339 *
340 * @param zkw zk reference
341 * @param regionName opened region to be deleted from zk
342 * @throws KeeperException if unexpected zookeeper exception
343 * @throws KeeperException.NoNodeException if node does not exist
344 */
345 public static boolean deleteOpenedNode(ZooKeeperWatcher zkw,
346 String regionName)
347 throws KeeperException, KeeperException.NoNodeException {
348 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_OPENED);
349 }
350
351 /**
352 * Deletes an existing unassigned node that is in the OFFLINE state for the
353 * specified region.
354 *
355 * <p>If a node does not already exist for this region, a
356 * {@link NoNodeException} will be thrown.
357 *
358 * <p>No watcher is set whether this succeeds or not.
359 *
360 * <p>Returns false if the node was not in the proper state but did exist.
361 *
362 * <p>This method is used during master failover when the regions on an RS
363 * that has died are all set to OFFLINE before being processed.
364 *
365 * @param zkw zk reference
366 * @param regionName closed region to be deleted from zk
367 * @throws KeeperException if unexpected zookeeper exception
368 * @throws KeeperException.NoNodeException if node does not exist
369 */
370 public static boolean deleteOfflineNode(ZooKeeperWatcher zkw,
371 String regionName)
372 throws KeeperException, KeeperException.NoNodeException {
373 return deleteNode(zkw, regionName, EventType.M_ZK_REGION_OFFLINE);
374 }
375
376 /**
377 * Deletes an existing unassigned node that is in the CLOSED state for the
378 * specified region.
379 *
380 * <p>If a node does not already exist for this region, a
381 * {@link NoNodeException} will be thrown.
382 *
383 * <p>No watcher is set whether this succeeds or not.
384 *
385 * <p>Returns false if the node was not in the proper state but did exist.
386 *
387 * <p>This method is used during table disables when a region finishes
388 * successfully closing. This is the Master acknowledging completion
389 * of the specified regions transition to being closed.
390 *
391 * @param zkw zk reference
392 * @param regionName closed region to be deleted from zk
393 * @throws KeeperException if unexpected zookeeper exception
394 * @throws KeeperException.NoNodeException if node does not exist
395 */
396 public static boolean deleteClosedNode(ZooKeeperWatcher zkw,
397 String regionName)
398 throws KeeperException, KeeperException.NoNodeException {
399 return deleteNode(zkw, regionName, EventType.RS_ZK_REGION_CLOSED);
400 }
401
402 /**
403 * Deletes an existing unassigned node that is in the CLOSING state for the
404 * specified region.
405 *
406 * <p>If a node does not already exist for this region, a
407 * {@link NoNodeException} will be thrown.
408 *
409 * <p>No watcher is set whether this succeeds or not.
410 *
411 * <p>Returns false if the node was not in the proper state but did exist.
412 *
413 * <p>This method is used during table disables when a region finishes
414 * successfully closing. This is the Master acknowledging completion
415 * of the specified regions transition to being closed.
416 *
417 * @param zkw zk reference
418 * @param region closing region to be deleted from zk
419 * @throws KeeperException if unexpected zookeeper exception
420 * @throws KeeperException.NoNodeException if node does not exist
421 */
422 public static boolean deleteClosingNode(ZooKeeperWatcher zkw,
423 HRegionInfo region)
424 throws KeeperException, KeeperException.NoNodeException {
425 String regionName = region.getEncodedName();
426 return deleteNode(zkw, regionName, EventType.M_ZK_REGION_CLOSING);
427 }
428
429 /**
430 * Deletes an existing unassigned node that is in the specified state for the
431 * specified region.
432 *
433 * <p>If a node does not already exist for this region, a
434 * {@link NoNodeException} will be thrown.
435 *
436 * <p>No watcher is set whether this succeeds or not.
437 *
438 * <p>Returns false if the node was not in the proper state but did exist.
439 *
440 * <p>This method is used when a region finishes opening/closing.
441 * The Master acknowledges completion
442 * of the specified regions transition to being closed/opened.
443 *
444 * @param zkw zk reference
445 * @param regionName region to be deleted from zk
446 * @param expectedState state region must be in for delete to complete
447 * @throws KeeperException if unexpected zookeeper exception
448 * @throws KeeperException.NoNodeException if node does not exist
449 */
450 public static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
451 EventType expectedState)
452 throws KeeperException, KeeperException.NoNodeException {
453 return deleteNode(zkw, regionName, expectedState, -1);
454 }
455
456 /**
457 * Deletes an existing unassigned node that is in the specified state for the
458 * specified region.
459 *
460 * <p>If a node does not already exist for this region, a
461 * {@link NoNodeException} will be thrown.
462 *
463 * <p>No watcher is set whether this succeeds or not.
464 *
465 * <p>Returns false if the node was not in the proper state but did exist.
466 *
467 * <p>This method is used when a region finishes opening/closing.
468 * The Master acknowledges completion
469 * of the specified regions transition to being closed/opened.
470 *
471 * @param zkw zk reference
472 * @param regionName region to be deleted from zk
473 * @param expectedState state region must be in for delete to complete
474 * @param expectedVersion of the znode that is to be deleted.
475 * If expectedVersion need not be compared while deleting the znode
476 * pass -1
477 * @throws KeeperException if unexpected zookeeper exception
478 * @throws KeeperException.NoNodeException if node does not exist
479 */
480 public static boolean deleteNode(ZooKeeperWatcher zkw, String regionName,
481 EventType expectedState, int expectedVersion)
482 throws KeeperException, KeeperException.NoNodeException {
483 LOG.debug(zkw.prefix("Deleting existing unassigned " +
484 "node for " + regionName + " that is in expected state " + expectedState));
485 String node = getNodeName(zkw, regionName);
486 zkw.sync(node);
487 Stat stat = new Stat();
488 byte [] bytes = ZKUtil.getDataNoWatch(zkw, node, stat);
489 if (bytes == null) {
490 // If it came back null, node does not exist.
491 throw KeeperException.create(Code.NONODE);
492 }
493 RegionTransitionData data = RegionTransitionData.fromBytes(bytes);
494 if (!data.getEventType().equals(expectedState)) {
495 LOG.warn(zkw.prefix("Attempting to delete unassigned " +
496 "node " + regionName + " in " + expectedState +
497 " state but node is in " + data.getEventType() + " state"));
498 return false;
499 }
500 if (expectedVersion != -1
501 && stat.getVersion() != expectedVersion) {
502 LOG.warn("The node " + regionName + " we are trying to delete is not" +
503 " the expected one. Got a version mismatch");
504 return false;
505 }
506 if(!ZKUtil.deleteNode(zkw, node, stat.getVersion())) {
507 LOG.warn(zkw.prefix("Attempting to delete " +
508 "unassigned node " + regionName + " in " + expectedState +
509 " state but after verifying state, we got a version mismatch"));
510 return false;
511 }
512 LOG.debug(zkw.prefix("Successfully deleted unassigned node for region " +
513 regionName + " in expected state " + expectedState));
514 return true;
515 }
516
517 /**
518 * Deletes all unassigned nodes regardless of their state.
519 *
520 * <p>No watchers are set.
521 *
522 * <p>This method is used by the Master during cluster startup to clear out
523 * any existing state from other cluster runs.
524 *
525 * @param zkw zk reference
526 * @throws KeeperException if unexpected zookeeper exception
527 */
528 public static void deleteAllNodes(ZooKeeperWatcher zkw)
529 throws KeeperException {
530 LOG.debug(zkw.prefix("Deleting any existing unassigned nodes"));
531 ZKUtil.deleteChildrenRecursively(zkw, zkw.assignmentZNode);
532 }
533
534 // RegionServer methods
535
536 /**
537 * Creates a new unassigned node in the CLOSING state for the specified
538 * region.
539 *
540 * <p>Does not transition nodes from any states. If a node already exists
541 * for this region, a {@link NodeExistsException} will be thrown.
542 *
543 * <p>If creation is successful, returns the version number of the CLOSING
544 * node created.
545 *
546 * <p>Does not set any watches.
547 *
548 * <p>This method should only be used by a RegionServer when initiating a
549 * close of a region after receiving a CLOSE RPC from the Master.
550 *
551 * @param zkw zk reference
552 * @param region region to be created as closing
553 * @param serverName server event originates from
554 * @return version of node after transition, -1 if unsuccessful transition
555 * @throws KeeperException if unexpected zookeeper exception
556 * @throws KeeperException.NodeExistsException if node already exists
557 */
558 public static int createNodeClosing(ZooKeeperWatcher zkw, HRegionInfo region,
559 ServerName serverName)
560 throws KeeperException, KeeperException.NodeExistsException {
561 LOG.debug(zkw.prefix("Creating unassigned node for " +
562 region.getEncodedName() + " in a CLOSING state"));
563
564 RegionTransitionData data = new RegionTransitionData(
565 EventType.M_ZK_REGION_CLOSING, region.getRegionName(), serverName);
566
567 String node = getNodeName(zkw, region.getEncodedName());
568 return ZKUtil.createAndWatch(zkw, node, data.getBytes());
569 }
570
571 /**
572 * Transitions an existing unassigned node for the specified region which is
573 * currently in the CLOSING state to be in the CLOSED state.
574 *
575 * <p>Does not transition nodes from other states. If for some reason the
576 * node could not be transitioned, the method returns -1. If the transition
577 * is successful, the version of the node after transition is returned.
578 *
579 * <p>This method can fail and return false for three different reasons:
580 * <ul><li>Unassigned node for this region does not exist</li>
581 * <li>Unassigned node for this region is not in CLOSING state</li>
582 * <li>After verifying CLOSING state, update fails because of wrong version
583 * (someone else already transitioned the node)</li>
584 * </ul>
585 *
586 * <p>Does not set any watches.
587 *
588 * <p>This method should only be used by a RegionServer when initiating a
589 * close of a region after receiving a CLOSE RPC from the Master.
590 *
591 * @param zkw zk reference
592 * @param region region to be transitioned to closed
593 * @param serverName server event originates from
594 * @return version of node after transition, -1 if unsuccessful transition
595 * @throws KeeperException if unexpected zookeeper exception
596 */
597 public static int transitionNodeClosed(ZooKeeperWatcher zkw,
598 HRegionInfo region, ServerName serverName, int expectedVersion)
599 throws KeeperException {
600 return transitionNode(zkw, region, serverName,
601 EventType.M_ZK_REGION_CLOSING,
602 EventType.RS_ZK_REGION_CLOSED, expectedVersion);
603 }
604
605 /**
606 * Transitions an existing unassigned node for the specified region which is
607 * currently in the OFFLINE state to be in the OPENING state.
608 *
609 * <p>Does not transition nodes from other states. If for some reason the
610 * node could not be transitioned, the method returns -1. If the transition
611 * is successful, the version of the node written as OPENING is returned.
612 *
613 * <p>This method can fail and return -1 for three different reasons:
614 * <ul><li>Unassigned node for this region does not exist</li>
615 * <li>Unassigned node for this region is not in OFFLINE state</li>
616 * <li>After verifying OFFLINE state, update fails because of wrong version
617 * (someone else already transitioned the node)</li>
618 * </ul>
619 *
620 * <p>Does not set any watches.
621 *
622 * <p>This method should only be used by a RegionServer when initiating an
623 * open of a region after receiving an OPEN RPC from the Master.
624 *
625 * @param zkw zk reference
626 * @param region region to be transitioned to opening
627 * @param serverName server event originates from
628 * @return version of node after transition, -1 if unsuccessful transition
629 * @throws KeeperException if unexpected zookeeper exception
630 */
631 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
632 HRegionInfo region, ServerName serverName)
633 throws KeeperException {
634 return transitionNodeOpening(zkw, region, serverName,
635 EventType.M_ZK_REGION_OFFLINE);
636 }
637
638 public static int transitionNodeOpening(ZooKeeperWatcher zkw,
639 HRegionInfo region, ServerName serverName, final EventType beginState)
640 throws KeeperException {
641 return transitionNode(zkw, region, serverName, beginState,
642 EventType.RS_ZK_REGION_OPENING, -1);
643 }
644
645 /**
646 * Retransitions an existing unassigned node for the specified region which is
647 * currently in the OPENING state to be in the OPENING state.
648 *
649 * <p>Does not transition nodes from other states. If for some reason the
650 * node could not be transitioned, the method returns -1. If the transition
651 * is successful, the version of the node rewritten as OPENING is returned.
652 *
653 * <p>This method can fail and return -1 for three different reasons:
654 * <ul><li>Unassigned node for this region does not exist</li>
655 * <li>Unassigned node for this region is not in OPENING state</li>
656 * <li>After verifying OPENING state, update fails because of wrong version
657 * (someone else already transitioned the node)</li>
658 * </ul>
659 *
660 * <p>Does not set any watches.
661 *
662 * <p>This method should only be used by a RegionServer when initiating an
663 * open of a region after receiving an OPEN RPC from the Master.
664 *
665 * @param zkw zk reference
666 * @param region region to be transitioned to opening
667 * @param serverName server event originates from
668 * @return version of node after transition, -1 if unsuccessful transition
669 * @throws KeeperException if unexpected zookeeper exception
670 */
671 public static int retransitionNodeOpening(ZooKeeperWatcher zkw,
672 HRegionInfo region, ServerName serverName, int expectedVersion)
673 throws KeeperException {
674 return transitionNode(zkw, region, serverName,
675 EventType.RS_ZK_REGION_OPENING,
676 EventType.RS_ZK_REGION_OPENING, expectedVersion);
677 }
678
679 /**
680 * Transitions an existing unassigned node for the specified region which is
681 * currently in the OPENING state to be in the OPENED state.
682 *
683 * <p>Does not transition nodes from other states. If for some reason the
684 * node could not be transitioned, the method returns -1. If the transition
685 * is successful, the version of the node after transition is returned.
686 *
687 * <p>This method can fail and return false for three different reasons:
688 * <ul><li>Unassigned node for this region does not exist</li>
689 * <li>Unassigned node for this region is not in OPENING state</li>
690 * <li>After verifying OPENING state, update fails because of wrong version
691 * (this should never actually happen since an RS only does this transition
692 * following a transition to OPENING. if two RS are conflicting, one would
693 * fail the original transition to OPENING and not this transition)</li>
694 * </ul>
695 *
696 * <p>Does not set any watches.
697 *
698 * <p>This method should only be used by a RegionServer when completing the
699 * open of a region.
700 *
701 * @param zkw zk reference
702 * @param region region to be transitioned to opened
703 * @param serverName server event originates from
704 * @return version of node after transition, -1 if unsuccessful transition
705 * @throws KeeperException if unexpected zookeeper exception
706 */
707 public static int transitionNodeOpened(ZooKeeperWatcher zkw,
708 HRegionInfo region, ServerName serverName, int expectedVersion)
709 throws KeeperException {
710 return transitionNode(zkw, region, serverName,
711 EventType.RS_ZK_REGION_OPENING,
712 EventType.RS_ZK_REGION_OPENED, expectedVersion);
713 }
714
715 /**
716 * Method that actually performs unassigned node transitions.
717 *
718 * <p>Attempts to transition the unassigned node for the specified region
719 * from the expected state to the state in the specified transition data.
720 *
721 * <p>Method first reads existing data and verifies it is in the expected
722 * state. If the node does not exist or the node is not in the expected
723 * state, the method returns -1. If the transition is successful, the
724 * version number of the node following the transition is returned.
725 *
726 * <p>If the read state is what is expected, it attempts to write the new
727 * state and data into the node. When doing this, it includes the expected
728 * version (determined when the existing state was verified) to ensure that
729 * only one transition is successful. If there is a version mismatch, the
730 * method returns -1.
731 *
732 * <p>If the write is successful, no watch is set and the method returns true.
733 *
734 * @param zkw zk reference
735 * @param region region to be transitioned to opened
736 * @param serverName server event originates from
737 * @param endState state to transition node to if all checks pass
738 * @param beginState state the node must currently be in to do transition
739 * @param expectedVersion expected version of data before modification, or -1
740 * @return version of node after transition, -1 if unsuccessful transition
741 * @throws KeeperException if unexpected zookeeper exception
742 */
743 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
744 ServerName serverName, EventType beginState, EventType endState,
745 int expectedVersion)
746 throws KeeperException {
747 return transitionNode(zkw, region, serverName, beginState, endState,
748 expectedVersion, null);
749 }
750
751 public static int transitionNode(ZooKeeperWatcher zkw, HRegionInfo region,
752 ServerName serverName, EventType beginState, EventType endState,
753 int expectedVersion, final byte [] payload)
754 throws KeeperException {
755 String encoded = region.getEncodedName();
756 if(LOG.isDebugEnabled()) {
757 LOG.debug(zkw.prefix("Attempting to transition node " +
758 HRegionInfo.prettyPrint(encoded) +
759 " from " + beginState.toString() + " to " + endState.toString()));
760 }
761
762 String node = getNodeName(zkw, encoded);
763 zkw.sync(node);
764
765 // Read existing data of the node
766 Stat stat = new Stat();
767 byte [] existingBytes = ZKUtil.getDataNoWatch(zkw, node, stat);
768 if (existingBytes == null) {
769 // Node no longer exists. Return -1. It means unsuccessful transition.
770 return -1;
771 }
772 RegionTransitionData existingData =
773 RegionTransitionData.fromBytes(existingBytes);
774
775 // Verify it is the expected version
776 if(expectedVersion != -1 && stat.getVersion() != expectedVersion) {
777 LOG.warn(zkw.prefix("Attempt to transition the " +
778 "unassigned node for " + encoded +
779 " from " + beginState + " to " + endState + " failed, " +
780 "the node existed but was version " + stat.getVersion() +
781 " not the expected version " + expectedVersion));
782 return -1;
783 } else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
784 && endState.equals(EventType.RS_ZK_REGION_OPENING)
785 && expectedVersion == -1 && stat.getVersion() != 0) {
786 // the below check ensures that double assignment doesnot happen.
787 // When the node is created for the first time then the expected version
788 // that is passed will be -1 and the version in znode will be 0.
789 // In all other cases the version in znode will be > 0.
790 LOG.warn(zkw.prefix("Attempt to transition the " + "unassigned node for "
791 + encoded + " from " + beginState + " to " + endState + " failed, "
792 + "the node existed but was version " + stat.getVersion()
793 + " not the expected version " + expectedVersion));
794 return -1;
795 }
796
797 // Verify it is in expected state
798 if(!existingData.getEventType().equals(beginState)) {
799 LOG.warn(zkw.prefix("Attempt to transition the " +
800 "unassigned node for " + encoded +
801 " from " + beginState + " to " + endState + " failed, " +
802 "the node existed but was in the state " + existingData.getEventType() +
803 " set by the server " + serverName));
804 return -1;
805 }
806
807 // Write new data, ensuring data has not changed since we last read it
808 try {
809 RegionTransitionData data = new RegionTransitionData(endState,
810 region.getRegionName(), serverName, payload);
811 if(!ZKUtil.setData(zkw, node, data.getBytes(), stat.getVersion())) {
812 LOG.warn(zkw.prefix("Attempt to transition the " +
813 "unassigned node for " + encoded +
814 " from " + beginState + " to " + endState + " failed, " +
815 "the node existed and was in the expected state but then when " +
816 "setting data we got a version mismatch"));
817 return -1;
818 }
819 if(LOG.isDebugEnabled()) {
820 LOG.debug(zkw.prefix("Successfully transitioned node " + encoded +
821 " from " + beginState + " to " + endState));
822 }
823 return stat.getVersion() + 1;
824 } catch (KeeperException.NoNodeException nne) {
825 LOG.warn(zkw.prefix("Attempt to transition the " +
826 "unassigned node for " + encoded +
827 " from " + beginState + " to " + endState + " failed, " +
828 "the node existed and was in the expected state but then when " +
829 "setting data it no longer existed"));
830 return -1;
831 }
832 }
833
834 /**
835 * Gets the current data in the unassigned node for the specified region name
836 * or fully-qualified path.
837 *
838 * <p>Returns null if the region does not currently have a node.
839 *
840 * <p>Sets a watch on the node if the node exists.
841 *
842 * @param zkw zk reference
843 * @param pathOrRegionName fully-specified path or region name
844 * @return data for the unassigned node
845 * @throws KeeperException if unexpected zookeeper exception
846 */
847 public static RegionTransitionData getData(ZooKeeperWatcher zkw,
848 String pathOrRegionName)
849 throws KeeperException {
850 String node = pathOrRegionName.startsWith("/") ?
851 pathOrRegionName : getNodeName(zkw, pathOrRegionName);
852 byte [] data = ZKUtil.getDataAndWatch(zkw, node);
853 if(data == null) {
854 return null;
855 }
856 return RegionTransitionData.fromBytes(data);
857 }
858
859 /**
860 * Gets the current data in the unassigned node for the specified region name
861 * or fully-qualified path.
862 *
863 * <p>Returns null if the region does not currently have a node.
864 *
865 * <p>Sets a watch on the node if the node exists.
866 *
867 * @param zkw zk reference
868 * @param pathOrRegionName fully-specified path or region name
869 * @param stat object to populate the version.
870 * @return data for the unassigned node
871 * @throws KeeperException if unexpected zookeeper exception
872 */
873 public static RegionTransitionData getDataAndWatch(ZooKeeperWatcher zkw,
874 String pathOrRegionName, Stat stat)
875 throws KeeperException {
876 String node = pathOrRegionName.startsWith("/") ?
877 pathOrRegionName : getNodeName(zkw, pathOrRegionName);
878 byte [] data = ZKUtil.getDataAndWatch(zkw, node, stat);
879 if(data == null) {
880 return null;
881 }
882 return RegionTransitionData.fromBytes(data);
883 }
884
885 /**
886 * Gets the current data in the unassigned node for the specified region name
887 * or fully-qualified path.
888 *
889 * <p>Returns null if the region does not currently have a node.
890 *
891 * <p>Does not set a watch.
892 *
893 * @param zkw zk reference
894 * @param pathOrRegionName fully-specified path or region name
895 * @param stat object to store node info into on getData call
896 * @return data for the unassigned node or null if node does not exist
897 * @throws KeeperException if unexpected zookeeper exception
898 */
899 public static RegionTransitionData getDataNoWatch(ZooKeeperWatcher zkw,
900 String pathOrRegionName, Stat stat)
901 throws KeeperException {
902 String node = pathOrRegionName.startsWith("/") ?
903 pathOrRegionName : getNodeName(zkw, pathOrRegionName);
904 byte [] data = ZKUtil.getDataNoWatch(zkw, node, stat);
905 if (data == null) {
906 return null;
907 }
908 return RegionTransitionData.fromBytes(data);
909 }
910
911 /**
912 * Get the version of the specified znode
913 * @param zkw zk reference
914 * @param region region's info
915 * @return the version of the znode, -1 if it doesn't exist
916 * @throws KeeperException
917 */
918 public static int getVersion(ZooKeeperWatcher zkw, HRegionInfo region)
919 throws KeeperException {
920 String znode = getNodeName(zkw, region.getEncodedName());
921 return ZKUtil.checkExists(zkw, znode);
922 }
923
924 /**
925 * Delete the assignment node regardless of its current state.
926 * <p>
927 * Fail silent even if the node does not exist at all.
928 * @param watcher
929 * @param regionInfo
930 * @throws KeeperException
931 */
932 public static void deleteNodeFailSilent(ZooKeeperWatcher watcher,
933 HRegionInfo regionInfo)
934 throws KeeperException {
935 String node = getNodeName(watcher, regionInfo.getEncodedName());
936 ZKUtil.deleteNodeFailSilent(watcher, node);
937 }
938
939 /**
940 * Blocks until there are no node in regions in transition.
941 * <p>
942 * Used in testing only.
943 * @param zkw zk reference
944 * @throws KeeperException
945 * @throws InterruptedException
946 */
947 public static void blockUntilNoRIT(ZooKeeperWatcher zkw)
948 throws KeeperException, InterruptedException {
949 while (ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
950 List<String> znodes =
951 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
952 if (znodes != null && !znodes.isEmpty()) {
953 for (String znode : znodes) {
954 LOG.debug("ZK RIT -> " + znode);
955 }
956 }
957 Thread.sleep(100);
958 }
959 }
960
961 /**
962 * Blocks until there is at least one node in regions in transition.
963 * <p>
964 * Used in testing only.
965 * @param zkw zk reference
966 * @throws KeeperException
967 * @throws InterruptedException
968 */
969 public static void blockUntilRIT(ZooKeeperWatcher zkw)
970 throws KeeperException, InterruptedException {
971 while (!ZKUtil.nodeHasChildren(zkw, zkw.assignmentZNode)) {
972 List<String> znodes =
973 ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.assignmentZNode);
974 if (znodes == null || znodes.isEmpty()) {
975 LOG.debug("No RIT in ZK");
976 }
977 Thread.sleep(100);
978 }
979 }
980
981 /**
982 * Verifies that the specified region is in the specified state in ZooKeeper.
983 * <p>
984 * Returns true if region is in transition and in the specified state in
985 * ZooKeeper. Returns false if the region does not exist in ZK or is in
986 * a different state.
987 * <p>
988 * Method synchronizes() with ZK so will yield an up-to-date result but is
989 * a slow read.
990 * @param zkw
991 * @param region
992 * @param expectedState
993 * @return true if region exists and is in expected state
994 */
995 public static boolean verifyRegionState(ZooKeeperWatcher zkw,
996 HRegionInfo region, EventType expectedState)
997 throws KeeperException {
998 String encoded = region.getEncodedName();
999
1000 String node = getNodeName(zkw, encoded);
1001 zkw.sync(node);
1002
1003 // Read existing data of the node
1004 byte [] existingBytes = null;
1005 try {
1006 existingBytes = ZKUtil.getDataAndWatch(zkw, node);
1007 } catch (KeeperException.NoNodeException nne) {
1008 return false;
1009 } catch (KeeperException e) {
1010 throw e;
1011 }
1012 if (existingBytes == null) return false;
1013 RegionTransitionData existingData =
1014 RegionTransitionData.fromBytes(existingBytes);
1015 if (existingData.getEventType() == expectedState){
1016 return true;
1017 }
1018 return false;
1019 }
1020 }