1   /*
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   * http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.coprocessor;
22  
23  import java.io.IOException;
24  
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.*;
27  import org.apache.hadoop.hbase.client.HTable;
28  import org.apache.hadoop.hbase.client.Put;
29  import org.apache.hadoop.hbase.client.RetriesExhaustedWithDetailsException;
30  import org.apache.hadoop.hbase.regionserver.HRegionServer;
31  import org.apache.hadoop.hbase.util.Bytes;
32  import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
33  import org.junit.AfterClass;
34  import org.junit.BeforeClass;
35  import org.junit.Test;
36  
37  import static org.junit.Assert.*;
38  
39  /**
40   * Tests unhandled exceptions thrown by coprocessors running on regionserver.
41   * Expected result is that the master will remove the buggy coprocessor from
42   * its set of coprocessors and throw a org.apache.hadoop.hbase.DoNotRetryIOException
43   * back to the client.
44   * (HBASE-4014).
45   */
46  public class TestRegionServerCoprocessorExceptionWithRemove {
47    public static class BuggyRegionObserver extends SimpleRegionObserver {
48      @SuppressWarnings("null")
49      @Override
50      public void prePut(final ObserverContext<RegionCoprocessorEnvironment> c,
51                         final Put put, final WALEdit edit,
52                         final boolean writeToWAL) {
53        String tableName =
54            c.getEnvironment().getRegion().getRegionInfo().getTableNameAsString();
55        if (tableName.equals("observed_table")) {
56          Integer i = null;
57          i = i + 1;
58        }
59      }
60    }
61  
62    private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
63  
64    @BeforeClass
65    public static void setupBeforeClass() throws Exception {
66      // set configure to indicate which cp should be loaded
67      Configuration conf = TEST_UTIL.getConfiguration();
68      conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
69          BuggyRegionObserver.class.getName());
70      TEST_UTIL.startMiniCluster(2);
71    }
72  
73    @AfterClass
74    public static void teardownAfterClass() throws Exception {
75      TEST_UTIL.shutdownMiniCluster();
76    }
77  
78    @Test(timeout=30000)
79    public void testExceptionFromCoprocessorDuringPut()
80        throws IOException {
81      // Set watches on the zookeeper nodes for all of the regionservers in the
82      // cluster. When we try to write to TEST_TABLE, the buggy coprocessor will
83      // cause a NullPointerException, which will cause the regionserver (which
84      // hosts the region we attempted to write to) to abort. In turn, this will
85      // cause the nodeDeleted() method of the DeadRegionServer tracker to
86      // execute, which will set the rsZKNodeDeleted flag to true, which will
87      // pass this test.
88  
89      byte[] TEST_TABLE = Bytes.toBytes("observed_table");
90      byte[] TEST_FAMILY = Bytes.toBytes("aaa");
91  
92      HTable table = TEST_UTIL.createTable(TEST_TABLE, TEST_FAMILY);
93      TEST_UTIL.waitUntilAllRegionsAssigned(
94          TEST_UTIL.createMultiRegions(table, TEST_FAMILY));
95      // Note which regionServer that should survive the buggy coprocessor's
96      // prePut().
97      HRegionServer regionServer =
98          TEST_UTIL.getRSForFirstRegionInTable(TEST_TABLE);
99  
100     // same logic as {@link TestMasterCoprocessorExceptionWithRemove},
101     // but exception will be RetriesExhaustedWithDetailException rather
102     // than DoNotRetryIOException. The latter exception is what the RegionServer
103     // will have actually thrown, but the client will wrap this in a
104     // RetriesExhaustedWithDetailException.
105     // We will verify that "DoNotRetryIOException" appears in the text of the
106     // the exception's detailMessage.
107     boolean threwDNRE = false;
108     try {
109       final byte[] ROW = Bytes.toBytes("aaa");
110       Put put = new Put(ROW);
111       put.add(TEST_FAMILY, ROW, ROW);
112       table.put(put);
113     } catch (RetriesExhaustedWithDetailsException e) {
114       // below, could call instead :
115       // startsWith("Failed 1 action: DoNotRetryIOException.")
116       // But that might be too brittle if client-side
117       // DoNotRetryIOException-handler changes its message.
118       assertTrue(e.getMessage().contains("DoNotRetryIOException"));
119       threwDNRE = true;
120     } finally {
121       assertTrue(threwDNRE);
122     }
123 
124     // Wait 3 seconds for the regionserver to abort: expected result is that
125     // it will survive and not abort.
126     for (int i = 0; i < 3; i++) {
127       assertFalse(regionServer.isAborted());
128       try {
129         Thread.sleep(1000);
130       } catch (InterruptedException e) {
131         fail("InterruptedException while waiting for regionserver " +
132             "zk node to be deleted.");
133       }
134     }
135   }
136 }