1   /**
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.resetCounters;
23  import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_failed_to_grab_task_lost_race;
24  import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_failed_to_grab_task_owned;
25  import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_preempt_task;
26  import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_acquired;
27  import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.tot_wkr_task_acquired_rescan;
28  import static org.junit.Assert.assertEquals;
29  import static org.junit.Assert.assertTrue;
30  
31  import java.util.List;
32  import java.util.concurrent.atomic.AtomicLong;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.HBaseTestingUtility;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.CancelableProgressable;
39  import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
40  import org.apache.hadoop.hbase.zookeeper.ZKSplitLog.TaskState;
41  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
42  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
43  import org.apache.log4j.Level;
44  import org.apache.log4j.Logger;
45  import org.apache.zookeeper.CreateMode;
46  import org.apache.zookeeper.ZooDefs.Ids;
47  import org.junit.After;
48  import org.junit.Before;
49  import org.junit.Test;
50  
51  
52  
53  public class TestSplitLogWorker {
54    private static final Log LOG = LogFactory.getLog(TestSplitLogWorker.class);
55    static {
56      Logger.getLogger("org.apache.hadoop.hbase").setLevel(Level.DEBUG);
57    }
58    private final static HBaseTestingUtility TEST_UTIL =
59      new HBaseTestingUtility();
60    private ZooKeeperWatcher zkw;
61    private SplitLogWorker slw;
62  
63    private void waitForCounter(AtomicLong ctr, long oldval, long newval,
64        long timems) {
65      assertTrue("ctr=" + ctr.get() + ", oldval=" + oldval + ", newval=" + newval,
66        waitForCounterBoolean(ctr, oldval, newval, timems));
67    }
68  
69    private boolean waitForCounterBoolean(AtomicLong ctr, long oldval, long newval,
70        long timems) {
71      long curt = System.currentTimeMillis();
72      long endt = curt + timems;
73      while (curt < endt) {
74        if (ctr.get() == oldval) {
75          try {
76            Thread.sleep(10);
77          } catch (InterruptedException e) {
78          }
79          curt = System.currentTimeMillis();
80        } else {
81          assertEquals(newval, ctr.get());
82          return true;
83        }
84      }
85      return false;
86    }
87  
88    @Before
89    public void setup() throws Exception {
90      TEST_UTIL.startMiniZKCluster();
91      zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(),
92          "split-log-worker-tests", null);
93      ZKUtil.deleteChildrenRecursively(zkw, zkw.baseZNode);
94      ZKUtil.createAndFailSilent(zkw, zkw.baseZNode);
95      assertTrue(ZKUtil.checkExists(zkw, zkw.baseZNode) != -1);
96      LOG.debug(zkw.baseZNode + " created");
97      ZKUtil.createAndFailSilent(zkw, zkw.splitLogZNode);
98      assertTrue(ZKUtil.checkExists(zkw, zkw.splitLogZNode) != -1);
99      LOG.debug(zkw.splitLogZNode + " created");
100     resetCounters();
101 
102   }
103 
104   @After
105   public void teardown() throws Exception {
106     TEST_UTIL.shutdownMiniZKCluster();
107   }
108 
109   SplitLogWorker.TaskExecutor neverEndingTask =
110     new SplitLogWorker.TaskExecutor() {
111 
112       @Override
113       public Status exec(String name, CancelableProgressable p) {
114         while (true) {
115           try {
116             Thread.sleep(1000);
117           } catch (InterruptedException e) {
118             return Status.PREEMPTED;
119           }
120           if (!p.progress()) {
121             return Status.PREEMPTED;
122           }
123         }
124       }
125 
126   };
127 
128   @Test
129   public void testAcquireTaskAtStartup() throws Exception {
130     LOG.info("testAcquireTaskAtStartup");
131     ZKSplitLog.Counters.resetCounters();
132 
133     zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "tatas"),
134         TaskState.TASK_UNASSIGNED.get("mgr"), Ids.OPEN_ACL_UNSAFE,
135         CreateMode.PERSISTENT);
136 
137     SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(),
138       "rs", neverEndingTask);
139     slw.start();
140     try {
141       waitForCounter(tot_wkr_task_acquired, 0, 1, 100);
142       assertTrue(TaskState.TASK_OWNED.equals(ZKUtil.getData(zkw,
143         ZKSplitLog.getEncodedNodeName(zkw, "tatas")), "rs"));
144     } finally {
145       stopSplitLogWorker(slw);
146     }
147   }
148 
149   private void stopSplitLogWorker(final SplitLogWorker slw)
150   throws InterruptedException {
151     if (slw != null) {
152       slw.stop();
153       slw.worker.join(3000);
154       if (slw.worker.isAlive()) {
155         assertTrue(("Could not stop the worker thread slw=" + slw) == null);
156       }
157     }
158   }
159 
160   @Test
161   public void testRaceForTask() throws Exception {
162     LOG.info("testRaceForTask");
163     ZKSplitLog.Counters.resetCounters();
164 
165     zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "trft"),
166         TaskState.TASK_UNASSIGNED.get("manager"), Ids.OPEN_ACL_UNSAFE,
167         CreateMode.PERSISTENT);
168 
169     SplitLogWorker slw1 = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(),
170         "svr1", neverEndingTask);
171     SplitLogWorker slw2 = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(),
172         "svr2", neverEndingTask);
173     slw1.start();
174     slw2.start();
175     try {
176       waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
177       // Assert that either the tot_wkr_failed_to_grab_task_owned count was set of if
178       // not it, that we fell through to the next counter in line and it was set.
179       assertTrue(waitForCounterBoolean(tot_wkr_failed_to_grab_task_owned, 0, 1, 1000) ||
180         tot_wkr_failed_to_grab_task_lost_race.get() == 1);
181       assertTrue(TaskState.TASK_OWNED.equals(ZKUtil.getData(zkw,
182         ZKSplitLog.getEncodedNodeName(zkw, "trft")), "svr1") ||
183         TaskState.TASK_OWNED.equals(ZKUtil.getData(zkw,
184             ZKSplitLog.getEncodedNodeName(zkw, "trft")), "svr2"));
185     } finally {
186       stopSplitLogWorker(slw1);
187       stopSplitLogWorker(slw2);
188     }
189   }
190 
191   @Test
192   public void testPreemptTask() throws Exception {
193     LOG.info("testPreemptTask");
194     ZKSplitLog.Counters.resetCounters();
195 
196     SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(),
197         "tpt_svr", neverEndingTask);
198     slw.start();
199     try {
200       Thread.yield(); // let the worker start
201       Thread.sleep(100);
202 
203       // this time create a task node after starting the splitLogWorker
204       zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "tpt_task"),
205         TaskState.TASK_UNASSIGNED.get("manager"), Ids.OPEN_ACL_UNSAFE,
206         CreateMode.PERSISTENT);
207 
208       waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
209       assertEquals(1, slw.taskReadySeq);
210       assertTrue(TaskState.TASK_OWNED.equals(ZKUtil.getData(zkw,
211         ZKSplitLog.getEncodedNodeName(zkw, "tpt_task")), "tpt_svr"));
212 
213       ZKUtil.setData(zkw, ZKSplitLog.getEncodedNodeName(zkw, "tpt_task"),
214         TaskState.TASK_UNASSIGNED.get("manager"));
215       waitForCounter(tot_wkr_preempt_task, 0, 1, 1000);
216     } finally {
217       stopSplitLogWorker(slw);
218     }
219   }
220 
221   @Test
222   public void testMultipleTasks() throws Exception {
223     LOG.info("testMultipleTasks");
224     ZKSplitLog.Counters.resetCounters();
225     SplitLogWorker slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(),
226         "tmt_svr", neverEndingTask);
227     slw.start();
228     try {
229       Thread.yield(); // let the worker start
230       Thread.sleep(100);
231 
232       zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "tmt_task"),
233         TaskState.TASK_UNASSIGNED.get("manager"), Ids.OPEN_ACL_UNSAFE,
234         CreateMode.PERSISTENT);
235 
236       waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
237       // now the worker is busy doing the above task
238 
239       // create another task
240       zkw.getRecoverableZooKeeper().create(ZKSplitLog.getEncodedNodeName(zkw, "tmt_task_2"),
241         TaskState.TASK_UNASSIGNED.get("manager"), Ids.OPEN_ACL_UNSAFE,
242         CreateMode.PERSISTENT);
243 
244       // preempt the first task, have it owned by another worker
245       ZKUtil.setData(zkw, ZKSplitLog.getEncodedNodeName(zkw, "tmt_task"),
246         TaskState.TASK_OWNED.get("another-worker"));
247       waitForCounter(tot_wkr_preempt_task, 0, 1, 1000);
248 
249       waitForCounter(tot_wkr_task_acquired, 1, 2, 1000);
250       assertEquals(2, slw.taskReadySeq);
251       assertTrue(TaskState.TASK_OWNED.equals(ZKUtil.getData(zkw,
252         ZKSplitLog.getEncodedNodeName(zkw, "tmt_task_2")), "tmt_svr"));
253     } finally {
254       stopSplitLogWorker(slw);
255     }
256   }
257 
258   @Test
259   public void testRescan() throws Exception {
260     LOG.info("testRescan");
261     ZKSplitLog.Counters.resetCounters();
262     slw = new SplitLogWorker(zkw, TEST_UTIL.getConfiguration(),
263         "svr", neverEndingTask);
264     slw.start();
265     Thread.yield(); // let the worker start
266     Thread.sleep(100);
267 
268     String task = ZKSplitLog.getEncodedNodeName(zkw, "task");
269     zkw.getRecoverableZooKeeper().create(task,
270       TaskState.TASK_UNASSIGNED.get("manager"), Ids.OPEN_ACL_UNSAFE,
271       CreateMode.PERSISTENT);
272 
273     waitForCounter(tot_wkr_task_acquired, 0, 1, 1000);
274     // now the worker is busy doing the above task
275 
276     // preempt the task, have it owned by another worker
277     ZKUtil.setData(zkw, task, TaskState.TASK_UNASSIGNED.get("manager"));
278     waitForCounter(tot_wkr_preempt_task, 0, 1, 1000);
279 
280     // create a RESCAN node
281     String rescan = ZKSplitLog.getEncodedNodeName(zkw, "RESCAN");
282     rescan = zkw.getRecoverableZooKeeper().create(rescan,
283       TaskState.TASK_UNASSIGNED.get("manager"), Ids.OPEN_ACL_UNSAFE,
284       CreateMode.PERSISTENT_SEQUENTIAL);
285 
286     waitForCounter(tot_wkr_task_acquired, 1, 2, 1000);
287     // RESCAN node might not have been processed if the worker became busy
288     // with the above task. preempt the task again so that now the RESCAN
289     // node is processed
290     ZKUtil.setData(zkw, task, TaskState.TASK_UNASSIGNED.get("manager"));
291     waitForCounter(tot_wkr_preempt_task, 1, 2, 1000);
292     waitForCounter(tot_wkr_task_acquired_rescan, 0, 1, 1000);
293 
294     List<String> nodes = ZKUtil.listChildrenNoWatch(zkw, zkw.splitLogZNode);
295     LOG.debug(nodes);
296     int num = 0;
297     for (String node : nodes) {
298       num++;
299       if (node.startsWith("RESCAN")) {
300         String name = ZKSplitLog.getEncodedNodeName(zkw, node);
301         String fn = ZKSplitLog.getFileName(name);
302         byte [] data = ZKUtil.getData(zkw, ZKUtil.joinZNode(zkw.splitLogZNode, fn));
303         String datastr = Bytes.toString(data);
304         assertTrue("data=" + datastr, TaskState.TASK_DONE.equals(data, "svr"));
305       }
306     }
307     assertEquals(2, num);
308   }
309 }