1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver.wal;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import java.io.IOException;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.commons.logging.impl.Log4JLogger;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.hbase.HBaseTestingUtility;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.MiniHBaseCluster;
35  import org.apache.hadoop.hbase.client.HBaseAdmin;
36  import org.apache.hadoop.hbase.client.HTable;
37  import org.apache.hadoop.hbase.client.Put;
38  import org.apache.hadoop.hbase.regionserver.HRegion;
39  import org.apache.hadoop.hbase.regionserver.HRegionServer;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.FSUtils;
42  import org.apache.hadoop.hdfs.DFSClient;
43  import org.apache.hadoop.hdfs.MiniDFSCluster;
44  import org.apache.hadoop.hdfs.server.datanode.DataNode;
45  import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
46  import org.apache.log4j.Level;
47  import org.junit.After;
48  import org.junit.Before;
49  import org.junit.BeforeClass;
50  import org.junit.Test;
51  
52  /**
53   * Tests for conditions that should trigger RegionServer aborts when
54   * rolling the current HLog fails.
55   */
56  public class TestLogRollAbort {
57    private static final Log LOG = LogFactory.getLog(TestLogRolling.class);
58    private static MiniDFSCluster dfsCluster;
59    private static HBaseAdmin admin;
60    private static MiniHBaseCluster cluster;
61    private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
62  
63    // verbose logging on classes that are touched in these tests
64    {
65      ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
66      ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL);
67      ((Log4JLogger)LogFactory.getLog("org.apache.hadoop.hdfs.server.namenode.FSNamesystem"))
68          .getLogger().setLevel(Level.ALL);
69      ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL);
70      ((Log4JLogger)HRegionServer.LOG).getLogger().setLevel(Level.ALL);
71      ((Log4JLogger)HRegion.LOG).getLogger().setLevel(Level.ALL);
72      ((Log4JLogger)HLog.LOG).getLogger().setLevel(Level.ALL);
73    }
74  
75    // Need to override this setup so we can edit the config before it gets sent
76    // to the HDFS & HBase cluster startup.
77    @BeforeClass
78    public static void setUpBeforeClass() throws Exception {
79      // Tweak default timeout values down for faster recovery
80      TEST_UTIL.getConfiguration().setInt(
81          "hbase.regionserver.logroll.errors.tolerated", 2);
82      TEST_UTIL.getConfiguration().setInt("ipc.ping.interval", 10 * 1000);
83      TEST_UTIL.getConfiguration().setInt("ipc.socket.timeout", 10 * 1000);
84      TEST_UTIL.getConfiguration().setInt("hbase.rpc.timeout", 10 * 1000);
85  
86      // Increase the amount of time between client retries
87      TEST_UTIL.getConfiguration().setLong("hbase.client.pause", 5 * 1000);
88  
89      // make sure log.hflush() calls syncFs() to open a pipeline
90      TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true);
91      // lower the namenode & datanode heartbeat so the namenode
92      // quickly detects datanode failures
93      TEST_UTIL.getConfiguration().setInt("heartbeat.recheck.interval", 5000);
94      TEST_UTIL.getConfiguration().setInt("dfs.heartbeat.interval", 1);
95      // the namenode might still try to choose the recently-dead datanode
96      // for a pipeline, so try to a new pipeline multiple times
97      TEST_UTIL.getConfiguration().setInt("dfs.client.block.write.retries", 10);
98      // set periodic sync to 2 min so it doesn't run during test
99      TEST_UTIL.getConfiguration().setInt("hbase.regionserver.optionallogflushinterval",
100         120 * 1000);
101   }
102 
103   @Before
104   public void setUp() throws Exception {
105     TEST_UTIL.startMiniCluster(2);
106 
107     cluster = TEST_UTIL.getHBaseCluster();
108     dfsCluster = TEST_UTIL.getDFSCluster();
109     admin = TEST_UTIL.getHBaseAdmin();
110 
111     // disable region rebalancing (interferes with log watching)
112     cluster.getMaster().balanceSwitch(false);
113   }
114 
115   @After
116   public void tearDown() throws Exception {
117     TEST_UTIL.shutdownMiniCluster();
118   }
119 
120   /**
121    * Tests that RegionServer aborts if we hit an error closing the WAL when
122    * there are unsynced WAL edits.  See HBASE-4282.
123    */
124   @Test
125   public void testRSAbortWithUnflushedEdits() throws Exception {
126     LOG.info("Starting testRSAbortWithUnflushedEdits()");
127 
128     // When the META table can be opened, the region servers are running
129     new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
130 
131     // Create the test table and open it
132     String tableName = this.getClass().getSimpleName();
133     HTableDescriptor desc = new HTableDescriptor(tableName);
134     desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
135     desc.setDeferredLogFlush(true);
136 
137     admin.createTable(desc);
138     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
139 
140     HRegionServer server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
141     HLog log = server.getWAL();
142 
143     assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
144     // don't run this test without append support (HDFS-200 & HDFS-142)
145     assertTrue("Need append support for this test",
146         FSUtils.isAppendSupported(TEST_UTIL.getConfiguration()));
147 
148     Put p = new Put(Bytes.toBytes("row2001"));
149     p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2001));
150     table.put(p);
151 
152     log.sync();
153     assertEquals("Last deferred edit should have been cleared by sync()",
154         log.getLastDeferredSeq(), 0);
155 
156     p = new Put(Bytes.toBytes("row2002"));
157     p.add(HConstants.CATALOG_FAMILY, Bytes.toBytes("col"), Bytes.toBytes(2002));
158     table.put(p);
159 
160     dfsCluster.restartDataNodes();
161     LOG.info("Restarted datanodes");
162 
163     assertTrue("Should have an outstanding WAL edit",
164         log.getLastDeferredSeq() > 0);
165     try {
166       log.rollWriter(true);
167       fail("Log roll should have triggered FailedLogCloseException");
168     } catch (FailedLogCloseException flce) {
169       assertTrue("Should have deferred flush log edits outstanding",
170           log.getLastDeferredSeq() > 0);
171     }
172   }
173 }