1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import static org.mockito.Mockito.doAnswer;
23  import static org.mockito.Mockito.spy;
24  
25  import java.io.IOException;
26  import java.util.ArrayList;
27  import java.util.Collection;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Set;
32  
33  import static org.junit.Assert.fail;
34  
35  import org.apache.commons.logging.Log;
36  import org.apache.commons.logging.LogFactory;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileStatus;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.hbase.*;
42  import org.apache.hadoop.hbase.client.Delete;
43  import org.apache.hadoop.hbase.client.Get;
44  import org.apache.hadoop.hbase.client.Put;
45  import org.apache.hadoop.hbase.client.Result;
46  import org.apache.hadoop.hbase.client.Scan;
47  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
48  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
49  import org.apache.hadoop.hbase.regionserver.StoreFile;
50  import org.apache.hadoop.hbase.regionserver.wal.HLog;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hdfs.MiniDFSCluster;
53  import org.junit.AfterClass;
54  import org.junit.BeforeClass;
55  import org.mockito.invocation.InvocationOnMock;
56  import org.mockito.stubbing.Answer;
57  
58  
59  /**
60   * Test compactions
61   */
62  public class TestCompaction extends HBaseTestCase {
63    static final Log LOG = LogFactory.getLog(TestCompaction.class.getName());
64    private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
65  
66    private HRegion r = null;
67    private Path compactionDir = null;
68    private Path regionCompactionDir = null;
69    private static final byte [] COLUMN_FAMILY = fam1;
70    private final byte [] STARTROW = Bytes.toBytes(START_KEY);
71    private static final byte [] COLUMN_FAMILY_TEXT = COLUMN_FAMILY;
72    private int compactionThreshold;
73    private byte[] firstRowBytes, secondRowBytes, thirdRowBytes;
74    final private byte[] col1, col2;
75  
76  
77    /** constructor */
78    public TestCompaction() throws Exception {
79      super();
80  
81      // Set cache flush size to 1MB
82      conf.setInt("hbase.hregion.memstore.flush.size", 1024*1024);
83      conf.setInt("hbase.hregion.memstore.block.multiplier", 100);
84      compactionThreshold = conf.getInt("hbase.hstore.compactionThreshold", 3);
85  
86      firstRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
87      secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
88      // Increment the least significant character so we get to next row.
89      secondRowBytes[START_KEY_BYTES.length - 1]++;
90      thirdRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
91      thirdRowBytes[START_KEY_BYTES.length - 1]++;
92      thirdRowBytes[START_KEY_BYTES.length - 1]++;
93      col1 = "column1".getBytes(HConstants.UTF8_ENCODING);
94      col2 = "column2".getBytes(HConstants.UTF8_ENCODING);
95    }
96  
97    @Override
98    public void setUp() throws Exception {
99      super.setUp();
100     HTableDescriptor htd = createTableDescriptor(getName());
101     this.r = createNewHRegion(htd, null, null);
102   }
103 
104   @Override
105   public void tearDown() throws Exception {
106     HLog hlog = r.getLog();
107     this.r.close();
108     hlog.closeAndDelete();
109     super.tearDown();
110   }
111 
112   /**
113    * Test that on a major compaction, if all cells are expired or deleted, then
114    * we'll end up with no product.  Make sure scanner over region returns
115    * right answer in this case - and that it just basically works.
116    * @throws IOException
117    */
118   public void testMajorCompactingToNoOutput() throws IOException {
119     createStoreFile(r);
120     for (int i = 0; i < compactionThreshold; i++) {
121       createStoreFile(r);
122     }
123     // Now delete everything.
124     InternalScanner s = r.getScanner(new Scan());
125     do {
126       List<KeyValue> results = new ArrayList<KeyValue>();
127       boolean result = s.next(results);
128       r.delete(new Delete(results.get(0).getRow()), null, false);
129       if (!result) break;
130     } while(true);
131     // Flush
132     r.flushcache();
133     // Major compact.
134     r.compactStores(true);
135     s = r.getScanner(new Scan());
136     int counter = 0;
137     do {
138       List<KeyValue> results = new ArrayList<KeyValue>();
139       boolean result = s.next(results);
140       if (!result) break;
141       counter++;
142     } while(true);
143     assertEquals(0, counter);
144   }
145 
146   /**
147    * Run compaction and flushing memstore
148    * Assert deletes get cleaned up.
149    * @throws Exception
150    */
151   public void testMajorCompaction() throws Exception {
152     createStoreFile(r);
153     for (int i = 0; i < compactionThreshold; i++) {
154       createStoreFile(r);
155     }
156     // Add more content.
157     addContent(new HRegionIncommon(r), Bytes.toString(COLUMN_FAMILY));
158 
159     // Now there are about 5 versions of each column.
160     // Default is that there only 3 (MAXVERSIONS) versions allowed per column.
161     //
162     // Assert == 3 when we ask for versions.
163     Result result = r.get(new Get(STARTROW).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null);
164     assertEquals(compactionThreshold, result.size());
165 
166     // see if CompactionProgress is in place but null
167     for (Store store: this.r.stores.values()) {
168       assertNull(store.getCompactionProgress());
169     }
170 
171     r.flushcache();
172     r.compactStores(true);
173 
174     // see if CompactionProgress has done its thing on at least one store
175     int storeCount = 0;
176     for (Store store: this.r.stores.values()) {
177       CompactionProgress progress = store.getCompactionProgress();
178       if( progress != null ) {
179         ++storeCount;
180         assert(progress.currentCompactedKVs > 0);
181         assert(progress.totalCompactingKVs > 0);
182       }
183       assert(storeCount > 0);
184     }
185 
186     // look at the second row
187     // Increment the least significant character so we get to next row.
188     byte [] secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING);
189     secondRowBytes[START_KEY_BYTES.length - 1]++;
190 
191     // Always 3 versions if that is what max versions is.
192     result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null);
193     assertEquals(compactionThreshold, result.size());
194 
195     // Now add deletes to memstore and then flush it.
196     // That will put us over
197     // the compaction threshold of 3 store files.  Compacting these store files
198     // should result in a compacted store file that has no references to the
199     // deleted row.
200     Delete delete = new Delete(secondRowBytes, System.currentTimeMillis(), null);
201     byte [][] famAndQf = {COLUMN_FAMILY, null};
202     delete.deleteFamily(famAndQf[0]);
203     r.delete(delete, null, true);
204 
205     // Assert deleted.
206     result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null );
207     assertTrue("Second row should have been deleted", result.isEmpty());
208 
209     r.flushcache();
210 
211     result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null );
212     assertTrue("Second row should have been deleted", result.isEmpty());
213 
214     // Add a bit of data and flush.  Start adding at 'bbb'.
215     createSmallerStoreFile(this.r);
216     r.flushcache();
217     // Assert that the second row is still deleted.
218     result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null );
219     assertTrue("Second row should still be deleted", result.isEmpty());
220 
221     // Force major compaction.
222     r.compactStores(true);
223     assertEquals(r.getStore(COLUMN_FAMILY_TEXT).getStorefiles().size(), 1);
224 
225     result = r.get(new Get(secondRowBytes).addFamily(COLUMN_FAMILY_TEXT).setMaxVersions(100), null );
226     assertTrue("Second row should still be deleted", result.isEmpty());
227 
228     // Make sure the store files do have some 'aaa' keys in them -- exactly 3.
229     // Also, that compacted store files do not have any secondRowBytes because
230     // they were deleted.
231     verifyCounts(3,0);
232 
233     // Multiple versions allowed for an entry, so the delete isn't enough
234     // Lower TTL and expire to ensure that all our entries have been wiped
235     final int ttlInSeconds = 1;
236     for (Store store: this.r.stores.values()) {
237       store.ttl = ttlInSeconds * 1000;
238     }
239     Thread.sleep(ttlInSeconds * 1000);
240 
241     r.compactStores(true);
242     int count = count();
243     assertTrue("Should not see anything after TTL has expired", count == 0);
244   }
245 
246   public void testMinorCompactionWithDeleteRow() throws Exception {
247     Delete deleteRow = new Delete(secondRowBytes);
248     testMinorCompactionWithDelete(deleteRow);
249   }
250   public void testMinorCompactionWithDeleteColumn1() throws Exception {
251     Delete dc = new Delete(secondRowBytes);
252     /* delete all timestamps in the column */
253     dc.deleteColumns(fam2, col2);
254     testMinorCompactionWithDelete(dc);
255   }
256   public void testMinorCompactionWithDeleteColumn2() throws Exception {
257     Delete dc = new Delete(secondRowBytes);
258     dc.deleteColumn(fam2, col2);
259     /* compactionThreshold is 3. The table has 4 versions: 0, 1, 2, and 3.
260      * we only delete the latest version. One might expect to see only
261      * versions 1 and 2. HBase differs, and gives us 0, 1 and 2.
262      * This is okay as well. Since there was no compaction done before the
263      * delete, version 0 seems to stay on.
264      */
265     //testMinorCompactionWithDelete(dc, 2);
266     testMinorCompactionWithDelete(dc, 3);
267   }
268   public void testMinorCompactionWithDeleteColumnFamily() throws Exception {
269     Delete deleteCF = new Delete(secondRowBytes);
270     deleteCF.deleteFamily(fam2);
271     testMinorCompactionWithDelete(deleteCF);
272   }
273   public void testMinorCompactionWithDeleteVersion1() throws Exception {
274     Delete deleteVersion = new Delete(secondRowBytes);
275     deleteVersion.deleteColumns(fam2, col2, 2);
276     /* compactionThreshold is 3. The table has 4 versions: 0, 1, 2, and 3.
277      * We delete versions 0 ... 2. So, we still have one remaining.
278      */
279     testMinorCompactionWithDelete(deleteVersion, 1);
280   }
281   public void testMinorCompactionWithDeleteVersion2() throws Exception {
282     Delete deleteVersion = new Delete(secondRowBytes);
283     deleteVersion.deleteColumn(fam2, col2, 1);
284     /*
285      * the table has 4 versions: 0, 1, 2, and 3.
286      * 0 does not count.
287      * We delete 1.
288      * Should have 2 remaining.
289      */
290     testMinorCompactionWithDelete(deleteVersion, 2);
291   }
292 
293   /*
294    * A helper function to test the minor compaction algorithm. We check that
295    * the delete markers are left behind. Takes delete as an argument, which
296    * can be any delete (row, column, columnfamliy etc), that essentially
297    * deletes row2 and column2. row1 and column1 should be undeleted
298    */
299   private void testMinorCompactionWithDelete(Delete delete) throws Exception {
300     testMinorCompactionWithDelete(delete, 0);
301   }
302   private void testMinorCompactionWithDelete(Delete delete, int expectedResultsAfterDelete) throws Exception {
303     HRegionIncommon loader = new HRegionIncommon(r);
304     for (int i = 0; i < compactionThreshold + 1; i++) {
305       addContent(loader, Bytes.toString(fam1), Bytes.toString(col1), firstRowBytes, thirdRowBytes, i);
306       addContent(loader, Bytes.toString(fam1), Bytes.toString(col2), firstRowBytes, thirdRowBytes, i);
307       addContent(loader, Bytes.toString(fam2), Bytes.toString(col1), firstRowBytes, thirdRowBytes, i);
308       addContent(loader, Bytes.toString(fam2), Bytes.toString(col2), firstRowBytes, thirdRowBytes, i);
309       r.flushcache();
310     }
311 
312     Result result = r.get(new Get(firstRowBytes).addColumn(fam1, col1).setMaxVersions(100), null);
313     assertEquals(compactionThreshold, result.size());
314     result = r.get(new Get(secondRowBytes).addColumn(fam2, col2).setMaxVersions(100), null);
315     assertEquals(compactionThreshold, result.size());
316 
317     // Now add deletes to memstore and then flush it.  That will put us over
318     // the compaction threshold of 3 store files.  Compacting these store files
319     // should result in a compacted store file that has no references to the
320     // deleted row.
321     r.delete(delete, null, true);
322 
323     // Make sure that we have only deleted family2 from secondRowBytes
324     result = r.get(new Get(secondRowBytes).addColumn(fam2, col2).setMaxVersions(100), null);
325     assertEquals(expectedResultsAfterDelete, result.size());
326     // but we still have firstrow
327     result = r.get(new Get(firstRowBytes).addColumn(fam1, col1).setMaxVersions(100), null);
328     assertEquals(compactionThreshold, result.size());
329 
330     r.flushcache();
331     // should not change anything.
332     // Let us check again
333 
334     // Make sure that we have only deleted family2 from secondRowBytes
335     result = r.get(new Get(secondRowBytes).addColumn(fam2, col2).setMaxVersions(100), null);
336     assertEquals(expectedResultsAfterDelete, result.size());
337     // but we still have firstrow
338     result = r.get(new Get(firstRowBytes).addColumn(fam1, col1).setMaxVersions(100), null);
339     assertEquals(compactionThreshold, result.size());
340 
341     // do a compaction
342     Store store2 = this.r.stores.get(fam2);
343     int numFiles1 = store2.getStorefiles().size();
344     assertTrue("Was expecting to see 4 store files", numFiles1 > compactionThreshold); // > 3
345     store2.compactRecent(compactionThreshold);   // = 3
346     int numFiles2 = store2.getStorefiles().size();
347     // Check that we did compact
348     assertTrue("Number of store files should go down", numFiles1 > numFiles2);
349     // Check that it was a minor compaction.
350     assertTrue("Was not supposed to be a major compaction", numFiles2 > 1);
351 
352     // Make sure that we have only deleted family2 from secondRowBytes
353     result = r.get(new Get(secondRowBytes).addColumn(fam2, col2).setMaxVersions(100), null);
354     assertEquals(expectedResultsAfterDelete, result.size());
355     // but we still have firstrow
356     result = r.get(new Get(firstRowBytes).addColumn(fam1, col1).setMaxVersions(100), null);
357     assertEquals(compactionThreshold, result.size());
358   }
359 
360   private void verifyCounts(int countRow1, int countRow2) throws Exception {
361     int count1 = 0;
362     int count2 = 0;
363     for (StoreFile f: this.r.stores.get(COLUMN_FAMILY_TEXT).getStorefiles()) {
364       HFileScanner scanner = f.getReader().getScanner(false, false);
365       scanner.seekTo();
366       do {
367         byte [] row = scanner.getKeyValue().getRow();
368         if (Bytes.equals(row, STARTROW)) {
369           count1++;
370         } else if(Bytes.equals(row, secondRowBytes)) {
371           count2++;
372         }
373       } while(scanner.next());
374     }
375     assertEquals(countRow1,count1);
376     assertEquals(countRow2,count2);
377   }
378 
379   /**
380    * Verify that you can stop a long-running compaction
381    * (used during RS shutdown)
382    * @throws Exception
383    */
384   public void testInterruptCompaction() throws Exception {
385     assertEquals(0, count());
386 
387     // lower the polling interval for this test
388     int origWI = Store.closeCheckInterval;
389     Store.closeCheckInterval = 10*1000; // 10 KB
390 
391     try {
392       // Create a couple store files w/ 15KB (over 10KB interval)
393       int jmax = (int) Math.ceil(15.0/compactionThreshold);
394       byte [] pad = new byte[1000]; // 1 KB chunk
395       for (int i = 0; i < compactionThreshold; i++) {
396         HRegionIncommon loader = new HRegionIncommon(r);
397         Put p = new Put(Bytes.add(STARTROW, Bytes.toBytes(i)));
398         p.setWriteToWAL(false);
399         for (int j = 0; j < jmax; j++) {
400           p.add(COLUMN_FAMILY, Bytes.toBytes(j), pad);
401         }
402         addContent(loader, Bytes.toString(COLUMN_FAMILY));
403         loader.put(p);
404         loader.flushcache();
405       }
406 
407       HRegion spyR = spy(r);
408       doAnswer(new Answer() {
409         public Object answer(InvocationOnMock invocation) throws Throwable {
410           r.writestate.writesEnabled = false;
411           return invocation.callRealMethod();
412         }
413       }).when(spyR).doRegionCompactionPrep();
414 
415       // force a minor compaction, but not before requesting a stop
416       spyR.compactStores();
417 
418       // ensure that the compaction stopped, all old files are intact,
419       Store s = r.stores.get(COLUMN_FAMILY);
420       assertEquals(compactionThreshold, s.getStorefilesCount());
421       assertTrue(s.getStorefilesSize() > 15*1000);
422       // and no new store files persisted past compactStores()
423       FileStatus[] ls = FileSystem.get(conf).listStatus(r.getTmpDir());
424       assertEquals(0, ls.length);
425 
426     } finally {
427       // don't mess up future tests
428       r.writestate.writesEnabled = true;
429       Store.closeCheckInterval = origWI;
430 
431       // Delete all Store information once done using
432       for (int i = 0; i < compactionThreshold; i++) {
433         Delete delete = new Delete(Bytes.add(STARTROW, Bytes.toBytes(i)));
434         byte [][] famAndQf = {COLUMN_FAMILY, null};
435         delete.deleteFamily(famAndQf[0]);
436         r.delete(delete, null, true);
437       }
438       r.flushcache();
439 
440       // Multiple versions allowed for an entry, so the delete isn't enough
441       // Lower TTL and expire to ensure that all our entries have been wiped
442       final int ttlInSeconds = 1;
443       for (Store store: this.r.stores.values()) {
444         store.ttl = ttlInSeconds * 1000;
445       }
446       Thread.sleep(ttlInSeconds * 1000);
447 
448       r.compactStores(true);
449       assertEquals(0, count());
450     }
451   }
452 
453   private int count() throws IOException {
454     int count = 0;
455     for (StoreFile f: this.r.stores.
456         get(COLUMN_FAMILY_TEXT).getStorefiles()) {
457       HFileScanner scanner = f.getReader().getScanner(false, false);
458       if (!scanner.seekTo()) {
459         continue;
460       }
461       do {
462         count++;
463       } while(scanner.next());
464     }
465     return count;
466   }
467 
468   private void createStoreFile(final HRegion region) throws IOException {
469     HRegionIncommon loader = new HRegionIncommon(region);
470     addContent(loader, Bytes.toString(COLUMN_FAMILY));
471     loader.flushcache();
472   }
473 
474   private void createSmallerStoreFile(final HRegion region) throws IOException {
475     HRegionIncommon loader = new HRegionIncommon(region);
476     addContent(loader, Bytes.toString(COLUMN_FAMILY), ("" +
477     		"bbb").getBytes(), null);
478     loader.flushcache();
479   }
480 
481   public void testCompactionWithCorruptResult() throws Exception {
482     int nfiles = 10;
483     for (int i = 0; i < nfiles; i++) {
484       createStoreFile(r);
485     }
486     Store store = r.getStore(COLUMN_FAMILY);
487 
488     List<StoreFile> storeFiles = store.getStorefiles();
489     long maxId = StoreFile.getMaxSequenceIdInList(storeFiles);
490 
491     StoreFile.Writer compactedFile = store.compactStore(storeFiles, false, maxId);
492 
493     // Now lets corrupt the compacted file.
494     FileSystem fs = FileSystem.get(conf);
495     Path origPath = compactedFile.getPath();
496     Path homedir = store.getHomedir();
497     Path dstPath = new Path(homedir, origPath.getName());
498     FSDataOutputStream stream = fs.create(origPath, null, true, 512, (short) 3,
499         (long) 1024,
500         null);
501     stream.writeChars("CORRUPT FILE!!!!");
502     stream.close();
503 
504     try {
505       store.completeCompaction(storeFiles, compactedFile);
506     } catch (Exception e) {
507       // The complete compaction should fail and the corrupt file should remain
508       // in the 'tmp' directory;
509       assert (fs.exists(origPath));
510       assert (!fs.exists(dstPath));
511       System.out.println("testCompactionWithCorruptResult Passed");
512       return;
513     }
514     fail("testCompactionWithCorruptResult failed since no exception was" +
515         "thrown while completing a corrupt file");
516   }
517 }