1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.mapreduce;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  import static org.mockito.Matchers.anyObject;
24  import static org.mockito.Mockito.doThrow;
25  import static org.mockito.Mockito.mock;
26  
27  import java.io.IOException;
28  import java.nio.ByteBuffer;
29  import java.util.Collection;
30  import java.util.Deque;
31  import java.util.List;
32  import java.util.NavigableMap;
33  import java.util.concurrent.ExecutorService;
34  import java.util.concurrent.atomic.AtomicInteger;
35  
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.hbase.HBaseTestingUtility;
41  import org.apache.hadoop.hbase.HColumnDescriptor;
42  import org.apache.hadoop.hbase.HRegionInfo;
43  import org.apache.hadoop.hbase.HTableDescriptor;
44  import org.apache.hadoop.hbase.TableExistsException;
45  import org.apache.hadoop.hbase.client.HBaseAdmin;
46  import org.apache.hadoop.hbase.client.HConnection;
47  import org.apache.hadoop.hbase.client.HTable;
48  import org.apache.hadoop.hbase.client.Result;
49  import org.apache.hadoop.hbase.client.ResultScanner;
50  import org.apache.hadoop.hbase.client.Scan;
51  import org.apache.hadoop.hbase.client.ServerCallable;
52  import org.apache.hadoop.hbase.regionserver.HRegionServer;
53  import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad;
54  import org.apache.hadoop.hbase.util.Bytes;
55  import org.apache.hadoop.hbase.util.Pair;
56  import org.junit.AfterClass;
57  import org.junit.BeforeClass;
58  import org.junit.Test;
59  
60  import com.google.common.collect.Multimap;
61  
62  /**
63   * Test cases for the atomic load error handling of the bulk load functionality.
64   */
65  public class TestLoadIncrementalHFilesSplitRecovery {
66    final static Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
67  
68    private static HBaseTestingUtility util;
69  
70    final static int NUM_CFS = 10;
71    final static byte[] QUAL = Bytes.toBytes("qual");
72    final static int ROWCOUNT = 100;
73  
74    private final static byte[][] families = new byte[NUM_CFS][];
75    static {
76      for (int i = 0; i < NUM_CFS; i++) {
77        families[i] = Bytes.toBytes(family(i));
78      }
79    }
80  
81    static byte[] rowkey(int i) {
82      return Bytes.toBytes(String.format("row_%08d", i));
83    }
84  
85    static String family(int i) {
86      return String.format("family_%04d", i);
87    }
88  
89    static byte[] value(int i) {
90      return Bytes.toBytes(String.format("%010d", i));
91    }
92  
93    public static void buildHFiles(FileSystem fs, Path dir, int value)
94        throws IOException {
95      byte[] val = value(value);
96      for (int i = 0; i < NUM_CFS; i++) {
97        Path testIn = new Path(dir, family(i));
98  
99        TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i),
100           Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT);
101     }
102   }
103 
104   /**
105    * Creates a table with given table name and specified number of column
106    * families if the table does not already exist.
107    */
108   private void setupTable(String table, int cfs) throws IOException {
109     try {
110       LOG.info("Creating table " + table);
111       HTableDescriptor htd = new HTableDescriptor(table);
112       for (int i = 0; i < 10; i++) {
113         htd.addFamily(new HColumnDescriptor(family(i)));
114       }
115 
116       HBaseAdmin admin = util.getHBaseAdmin();
117       admin.createTable(htd);
118     } catch (TableExistsException tee) {
119       LOG.info("Table " + table + " already exists");
120     }
121   }
122 
123   private Path buildBulkFiles(String table, int value) throws Exception {
124     Path dir = util.getDataTestDir(table);
125     Path bulk1 = new Path(dir, table+value);
126     FileSystem fs = util.getTestFileSystem();
127     buildHFiles(fs, bulk1, value);
128     return bulk1;
129   }
130 
131   /**
132    * Populate table with known values.
133    */
134   private void populateTable(String table, int value) throws Exception {
135     // create HFiles for different column families
136     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
137         util.getConfiguration());
138     Path bulk1 = buildBulkFiles(table, value);
139     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
140     lih.doBulkLoad(bulk1, t);
141   }
142 
143   /**
144    * Split the known table in half.  (this is hard coded for this test suite)
145    */
146   private void forceSplit(String table) {
147     try {
148       // need to call regions server to by synchronous but isn't visible.
149       HRegionServer hrs = util.getRSForFirstRegionInTable(Bytes
150           .toBytes(table));
151 
152       for (HRegionInfo hri : hrs.getOnlineRegions()) {
153         if (Bytes.equals(hri.getTableName(), Bytes.toBytes(table))) {
154           // splitRegion doesn't work if startkey/endkey are null
155           hrs.splitRegion(hri, rowkey(ROWCOUNT / 2)); // hard code split
156         }
157       }
158 
159       // verify that split completed.
160       int regions;
161       do {
162         regions = 0;
163         for (HRegionInfo hri : hrs.getOnlineRegions()) {
164           if (Bytes.equals(hri.getTableName(), Bytes.toBytes(table))) {
165             regions++;
166           }
167         }
168         if (regions != 2) {
169           LOG.info("Taking some time to complete split...");
170           Thread.sleep(250);
171         }
172       } while (regions != 2);
173     } catch (IOException e) {
174       e.printStackTrace();
175     } catch (InterruptedException e) {
176       e.printStackTrace();
177     }
178   }
179 
180   @BeforeClass
181   public static void setupCluster() throws Exception {
182     util = new HBaseTestingUtility();
183     util.startMiniCluster(1);
184   }
185 
186   @AfterClass
187   public static void teardownCluster() throws Exception {
188     util.shutdownMiniCluster();
189   }
190 
191   /**
192    * Checks that all columns have the expected value and that there is the
193    * expected number of rows.
194    */
195   void assertExpectedTable(String table, int count, int value) {
196     try {
197       assertEquals(util.getHBaseAdmin().listTables(table).length, 1);
198 
199       HTable t = new HTable(util.getConfiguration(), table);
200       Scan s = new Scan();
201       ResultScanner sr = t.getScanner(s);
202       int i = 0;
203       for (Result r : sr) {
204         i++;
205         for (NavigableMap<byte[], byte[]> nm : r.getNoVersionMap().values()) {
206           for (byte[] val : nm.values()) {
207             assertTrue(Bytes.equals(val, value(value)));
208           }
209         }
210       }
211       assertEquals(count, i);
212     } catch (IOException e) {
213       fail("Failed due to exception");
214     }
215   }
216 
217   /**
218    * Test that shows that exception thrown from the RS side will result in an
219    * exception on the LIHFile client.
220    */
221   @Test(expected=IOException.class)
222   public void testBulkLoadPhaseFailure() throws Exception {
223     String table = "bulkLoadPhaseFailure";
224     setupTable(table, 10);
225 
226     final AtomicInteger attmptedCalls = new AtomicInteger();
227     final AtomicInteger failedCalls = new AtomicInteger();
228     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
229         util.getConfiguration()) {
230 
231       protected List<LoadQueueItem> tryAtomicRegionLoad(final HConnection conn,
232           byte[] tableName, final byte[] first, Collection<LoadQueueItem> lqis) throws IOException {
233         int i = attmptedCalls.incrementAndGet();
234         if (i == 1) {
235           HConnection errConn = mock(HConnection.class);
236           try {
237             doThrow(new IOException("injecting bulk load error")).when(errConn)
238                 .getRegionServerWithRetries((ServerCallable) anyObject());
239           } catch (Exception e) {
240             LOG.fatal("mocking cruft, should never happen", e);
241             throw new RuntimeException("mocking cruft, should never happen");
242           }
243           failedCalls.incrementAndGet();
244           return super.tryAtomicRegionLoad(errConn, tableName, first, lqis);
245         }
246 
247         return super.tryAtomicRegionLoad(conn, tableName, first, lqis);
248       }
249     };
250 
251     // create HFiles for different column families
252     Path dir = buildBulkFiles(table, 1);
253     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
254     lih.doBulkLoad(dir, t);
255 
256     fail("doBulkLoad should have thrown an exception");
257   }
258 
259   /**
260    * This test exercises the path where there is a split after initial
261    * validation but before the atomic bulk load call. We cannot use presplitting
262    * to test this path, so we actually inject a split just before the atomic
263    * region load.
264    */
265   @Test
266   public void testSplitWhileBulkLoadPhase() throws Exception {
267     final String table = "splitWhileBulkloadPhase";
268     setupTable(table, 10);
269     populateTable(table,1);
270     assertExpectedTable(table, ROWCOUNT, 1);
271 
272     // Now let's cause trouble.  This will occur after checks and cause bulk
273     // files to fail when attempt to atomically import.  This is recoverable.
274     final AtomicInteger attemptedCalls = new AtomicInteger();
275     LoadIncrementalHFiles lih2 = new LoadIncrementalHFiles(
276         util.getConfiguration()) {
277 
278       protected void bulkLoadPhase(final HTable htable, final HConnection conn,
279           ExecutorService pool, Deque<LoadQueueItem> queue,
280           final Multimap<ByteBuffer, LoadQueueItem> regionGroups) throws IOException {
281         int i = attemptedCalls.incrementAndGet();
282         if (i == 1) {
283           // On first attempt force a split.
284           forceSplit(table);
285         }
286 
287         super.bulkLoadPhase(htable, conn, pool, queue, regionGroups);
288       }
289     };
290 
291     // create HFiles for different column families
292     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
293     Path bulk = buildBulkFiles(table, 2);
294     lih2.doBulkLoad(bulk, t);
295 
296     // check that data was loaded
297     // The three expected attempts are 1) failure because need to split, 2)
298     // load of split top 3) load of split bottom
299     assertEquals(attemptedCalls.get(), 3);
300     assertExpectedTable(table, ROWCOUNT, 2);
301   }
302 
303   /**
304    * This test splits a table and attempts to bulk load.  The bulk import files
305    * should be split before atomically importing.
306    */
307   @Test
308   public void testGroupOrSplitPresplit() throws Exception {
309     final String table = "groupOrSplitPresplit";
310     setupTable(table, 10);
311     populateTable(table, 1);
312     assertExpectedTable(table, ROWCOUNT, 1);
313     forceSplit(table);
314 
315     final AtomicInteger countedLqis= new AtomicInteger();
316     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
317         util.getConfiguration()) {
318       protected List<LoadQueueItem> groupOrSplit(
319           Multimap<ByteBuffer, LoadQueueItem> regionGroups,
320           final LoadQueueItem item, final HTable htable,
321           final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
322         List<LoadQueueItem> lqis = super.groupOrSplit(regionGroups, item, htable, startEndKeys);
323         if (lqis != null) {
324           countedLqis.addAndGet(lqis.size());
325         }
326         return lqis;
327       }
328     };
329 
330     // create HFiles for different column families
331     Path bulk = buildBulkFiles(table, 2);
332     HTable ht = new HTable(util.getConfiguration(), Bytes.toBytes(table));
333     lih.doBulkLoad(bulk, ht);
334 
335     assertExpectedTable(table, ROWCOUNT, 2);
336     assertEquals(20, countedLqis.get());
337   }
338 
339   /**
340    * This simulates an remote exception which should cause LIHF to exit with an
341    * exception.
342    */
343   @Test(expected = IOException.class)
344   public void testGroupOrSplitFailure() throws Exception {
345     String table = "groupOrSplitFailure";
346     setupTable(table, 10);
347 
348     LoadIncrementalHFiles lih = new LoadIncrementalHFiles(
349         util.getConfiguration()) {
350       int i = 0;
351 
352       protected List<LoadQueueItem> groupOrSplit(
353           Multimap<ByteBuffer, LoadQueueItem> regionGroups,
354           final LoadQueueItem item, final HTable table,
355           final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
356         i++;
357 
358         if (i == 5) {
359           throw new IOException("failure");
360         }
361         return super.groupOrSplit(regionGroups, item, table, startEndKeys);
362       }
363     };
364 
365     // create HFiles for different column families
366     Path dir = buildBulkFiles(table,1);
367     HTable t = new HTable(util.getConfiguration(), Bytes.toBytes(table));
368     lih.doBulkLoad(dir, t);
369 
370     fail("doBulkLoad should have thrown an exception");
371   }
372 }