1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.ArrayList;
22  import java.util.List;
23  import java.util.concurrent.atomic.AtomicLong;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.fs.FileSystem;
29  import org.apache.hadoop.fs.Path;
30  import org.apache.hadoop.hbase.HBaseConfiguration;
31  import org.apache.hadoop.hbase.HBaseTestingUtility;
32  import org.apache.hadoop.hbase.HColumnDescriptor;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.KeyValue;
35  import org.apache.hadoop.hbase.MultithreadedTestUtil.RepeatingTestThread;
36  import org.apache.hadoop.hbase.MultithreadedTestUtil.TestContext;
37  import org.apache.hadoop.hbase.TableExistsException;
38  import org.apache.hadoop.hbase.client.HBaseAdmin;
39  import org.apache.hadoop.hbase.client.HConnection;
40  import org.apache.hadoop.hbase.client.HTable;
41  import org.apache.hadoop.hbase.client.Result;
42  import org.apache.hadoop.hbase.client.ResultScanner;
43  import org.apache.hadoop.hbase.client.Scan;
44  import org.apache.hadoop.hbase.client.ServerCallable;
45  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
46  import org.apache.hadoop.hbase.io.hfile.Compression;
47  import org.apache.hadoop.hbase.io.hfile.HFile;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.Pair;
50  import org.junit.Test;
51  
52  import com.google.common.collect.Lists;
53  
54  /**
55   * Tests bulk loading of HFiles and shows the atomicity or lack of atomicity of
56   * the region server's bullkLoad functionality.
57   */
58  public class TestHRegionServerBulkLoad {
59    final static Log LOG = LogFactory.getLog(TestHRegionServerBulkLoad.class);
60    private static HBaseTestingUtility UTIL = new HBaseTestingUtility();
61    private final static Configuration conf = UTIL.getConfiguration();
62    private final static byte[] QUAL = Bytes.toBytes("qual");
63    private final static int NUM_CFS = 10;
64    public static int BLOCKSIZE = 64 * 1024;
65    public static String COMPRESSION = Compression.Algorithm.NONE.getName();
66  
67    private final static byte[][] families = new byte[NUM_CFS][];
68    static {
69      for (int i = 0; i < NUM_CFS; i++) {
70        families[i] = Bytes.toBytes(family(i));
71      }
72    }
73  
74    static byte[] rowkey(int i) {
75      return Bytes.toBytes(String.format("row_%08d", i));
76    }
77  
78    static String family(int i) {
79      return String.format("family_%04d", i);
80    }
81  
82    /**
83     * Create an HFile with the given number of rows with a specified value.
84     */
85    public static void createHFile(FileSystem fs, Path path, byte[] family,
86        byte[] qualifier, byte[] value, int numRows) throws IOException {
87      HFile.Writer writer = HFile
88          .getWriterFactory(conf, new CacheConfig(conf))
89          .createWriter(fs, path, BLOCKSIZE, COMPRESSION, KeyValue.KEY_COMPARATOR);
90      long now = System.currentTimeMillis();
91      try {
92        // subtract 2 since iterateOnSplits doesn't include boundary keys
93        for (int i = 0; i < numRows; i++) {
94          KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value);
95          writer.append(kv);
96        }
97      } finally {
98        writer.close();
99      }
100   }
101 
102   /**
103    * Thread that does full scans of the table looking for any partially
104    * completed rows.
105    * 
106    * Each iteration of this loads 10 hdfs files, which occupies 5 file open file
107    * handles. So every 10 iterations (500 file handles) it does a region
108    * compaction to reduce the number of open file handles.
109    */
110   public static class AtomicHFileLoader extends RepeatingTestThread {
111     final AtomicLong numBulkLoads = new AtomicLong();
112     final AtomicLong numCompactions = new AtomicLong();
113     private String tableName;
114 
115     public AtomicHFileLoader(String tableName, TestContext ctx,
116         byte targetFamilies[][]) throws IOException {
117       super(ctx);
118       this.tableName = tableName;
119     }
120 
121     public void doAnAction() throws Exception {
122       long iteration = numBulkLoads.getAndIncrement();
123       Path dir =  UTIL.getDataTestDir(String.format("bulkLoad_%08d",
124           iteration));
125 
126       // create HFiles for different column families
127       FileSystem fs = UTIL.getTestFileSystem();
128       byte[] val = Bytes.toBytes(String.format("%010d", iteration));
129       final List<Pair<byte[], String>> famPaths = new ArrayList<Pair<byte[], String>>(
130           NUM_CFS);
131       for (int i = 0; i < NUM_CFS; i++) {
132         Path hfile = new Path(dir, family(i));
133         byte[] fam = Bytes.toBytes(family(i));
134         createHFile(fs, hfile, fam, QUAL, val, 1000);
135         famPaths.add(new Pair<byte[], String>(fam, hfile.toString()));
136       }
137 
138       // bulk load HFiles
139       HConnection conn = UTIL.getHBaseAdmin().getConnection();
140       byte[] tbl = Bytes.toBytes(tableName);
141       conn.getRegionServerWithRetries(new ServerCallable<Void>(conn, tbl, Bytes
142           .toBytes("aaa")) {
143         @Override
144         public Void call() throws Exception {
145           LOG.debug("Going to connect to server " + location + " for row "
146               + Bytes.toStringBinary(row));
147           byte[] regionName = location.getRegionInfo().getRegionName();
148           server.bulkLoadHFiles(famPaths, regionName);
149           return null;
150         }
151       });
152 
153       // Periodically do compaction to reduce the number of open file handles.
154       if (numBulkLoads.get() % 10 == 0) {
155         // 10 * 50 = 500 open file handles!
156         conn.getRegionServerWithRetries(new ServerCallable<Void>(conn, tbl,
157             Bytes.toBytes("aaa")) {
158           @Override
159           public Void call() throws Exception {
160             LOG.debug("compacting " + location + " for row "
161                 + Bytes.toStringBinary(row));
162             server.compactRegion(location.getRegionInfo(), true);
163             numCompactions.incrementAndGet();
164             return null;
165           }
166         });
167       }
168     }
169   }
170 
171   /**
172    * Thread that does full scans of the table looking for any partially
173    * completed rows.
174    */
175   public static class AtomicScanReader extends RepeatingTestThread {
176     byte targetFamilies[][];
177     HTable table;
178     AtomicLong numScans = new AtomicLong();
179     AtomicLong numRowsScanned = new AtomicLong();
180     String TABLE_NAME;
181 
182     public AtomicScanReader(String TABLE_NAME, TestContext ctx,
183         byte targetFamilies[][]) throws IOException {
184       super(ctx);
185       this.TABLE_NAME = TABLE_NAME;
186       this.targetFamilies = targetFamilies;
187       table = new HTable(conf, TABLE_NAME);
188     }
189 
190     public void doAnAction() throws Exception {
191       Scan s = new Scan();
192       for (byte[] family : targetFamilies) {
193         s.addFamily(family);
194       }
195       ResultScanner scanner = table.getScanner(s);
196 
197       for (Result res : scanner) {
198         byte[] lastRow = null, lastFam = null, lastQual = null;
199         byte[] gotValue = null;
200         for (byte[] family : targetFamilies) {
201           byte qualifier[] = QUAL;
202           byte thisValue[] = res.getValue(family, qualifier);
203           if (gotValue != null && thisValue != null
204               && !Bytes.equals(gotValue, thisValue)) {
205 
206             StringBuilder msg = new StringBuilder();
207             msg.append("Failed on scan ").append(numScans)
208                 .append(" after scanning ").append(numRowsScanned)
209                 .append(" rows!\n");
210             msg.append("Current  was " + Bytes.toString(res.getRow()) + "/"
211                 + Bytes.toString(family) + ":" + Bytes.toString(qualifier)
212                 + " = " + Bytes.toString(thisValue) + "\n");
213             msg.append("Previous  was " + Bytes.toString(lastRow) + "/"
214                 + Bytes.toString(lastFam) + ":" + Bytes.toString(lastQual)
215                 + " = " + Bytes.toString(gotValue));
216             throw new RuntimeException(msg.toString());
217           }
218 
219           lastFam = family;
220           lastQual = qualifier;
221           lastRow = res.getRow();
222           gotValue = thisValue;
223         }
224         numRowsScanned.getAndIncrement();
225       }
226       numScans.getAndIncrement();
227     }
228   }
229 
230   /**
231    * Creates a table with given table name and specified number of column
232    * families if the table does not already exist.
233    */
234   private void setupTable(String table, int cfs) throws IOException {
235     try {
236       LOG.info("Creating table " + table);
237       HTableDescriptor htd = new HTableDescriptor(table);
238       for (int i = 0; i < 10; i++) {
239         htd.addFamily(new HColumnDescriptor(family(i)));
240       }
241 
242       HBaseAdmin admin = UTIL.getHBaseAdmin();
243       admin.createTable(htd);
244     } catch (TableExistsException tee) {
245       LOG.info("Table " + table + " already exists");
246     }
247   }
248 
249   /**
250    * Atomic bulk load.
251    */
252   @Test
253   public void testAtomicBulkLoad() throws Exception {
254     String TABLE_NAME = "atomicBulkLoad";
255 
256     int millisToRun = 30000;
257     int numScanners = 50;
258 
259     UTIL.startMiniCluster(1);
260     try {
261       runAtomicBulkloadTest(TABLE_NAME, millisToRun, numScanners);
262     } finally {
263       UTIL.shutdownMiniCluster();
264     }
265   }
266 
267   void runAtomicBulkloadTest(String tableName, int millisToRun, int numScanners)
268       throws Exception {
269     setupTable(tableName, 10);
270 
271     TestContext ctx = new TestContext(UTIL.getConfiguration());
272 
273     AtomicHFileLoader loader = new AtomicHFileLoader(tableName, ctx, null);
274     ctx.addThread(loader);
275 
276     List<AtomicScanReader> scanners = Lists.newArrayList();
277     for (int i = 0; i < numScanners; i++) {
278       AtomicScanReader scanner = new AtomicScanReader(tableName, ctx, families);
279       scanners.add(scanner);
280       ctx.addThread(scanner);
281     }
282 
283     ctx.startThreads();
284     ctx.waitFor(millisToRun);
285     ctx.stop();
286 
287     LOG.info("Loaders:");
288     LOG.info("  loaded " + loader.numBulkLoads.get());
289     LOG.info("  compations " + loader.numCompactions.get());
290 
291     LOG.info("Scanners:");
292     for (AtomicScanReader scanner : scanners) {
293       LOG.info("  scanned " + scanner.numScans.get());
294       LOG.info("  verified " + scanner.numRowsScanned.get() + " rows");
295     }
296   }
297 
298   /**
299    * Run test on an HBase instance for 5 minutes. This assumes that the table
300    * under test only has a single region.
301    */
302   public static void main(String args[]) throws Exception {
303     try {
304       Configuration c = HBaseConfiguration.create();
305       TestHRegionServerBulkLoad test = new TestHRegionServerBulkLoad();
306       test.setConf(c);
307       test.runAtomicBulkloadTest("atomicTableTest", 5 * 60 * 1000, 50);
308     } finally {
309       System.exit(0); // something hangs (believe it is lru threadpool)
310     }
311   }
312 
313   private void setConf(Configuration c) {
314     UTIL = new HBaseTestingUtility(c);
315   }
316 }