1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import java.io.DataInput;
23 import java.io.DataOutput;
24 import java.io.FileNotFoundException;
25 import java.io.IOException;
26 import java.io.OutputStream;
27 import java.io.UnsupportedEncodingException;
28 import java.lang.reflect.InvocationTargetException;
29 import java.lang.reflect.Method;
30 import java.net.URLEncoder;
31 import java.util.ArrayList;
32 import java.util.Arrays;
33 import java.util.Collections;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.NavigableSet;
37 import java.util.SortedMap;
38 import java.util.TreeMap;
39 import java.util.TreeSet;
40 import java.util.UUID;
41 import java.util.concurrent.ConcurrentSkipListMap;
42 import java.util.concurrent.CopyOnWriteArrayList;
43 import java.util.concurrent.atomic.AtomicInteger;
44 import java.util.concurrent.atomic.AtomicLong;
45 import java.util.concurrent.locks.Lock;
46 import java.util.concurrent.locks.ReentrantLock;
47 import java.util.regex.Matcher;
48 import java.util.regex.Pattern;
49
50 import org.apache.commons.logging.Log;
51 import org.apache.commons.logging.LogFactory;
52 import org.apache.hadoop.conf.Configuration;
53 import org.apache.hadoop.fs.FSDataOutputStream;
54 import org.apache.hadoop.fs.FileStatus;
55 import org.apache.hadoop.fs.FileSystem;
56 import org.apache.hadoop.fs.Path;
57 import org.apache.hadoop.fs.PathFilter;
58 import org.apache.hadoop.fs.Syncable;
59 import org.apache.hadoop.hbase.HBaseConfiguration;
60 import org.apache.hadoop.hbase.HConstants;
61 import org.apache.hadoop.hbase.HRegionInfo;
62 import org.apache.hadoop.hbase.HTableDescriptor;
63 import org.apache.hadoop.hbase.KeyValue;
64 import org.apache.hadoop.hbase.ServerName;
65 import org.apache.hadoop.hbase.util.Bytes;
66 import org.apache.hadoop.hbase.util.ClassSize;
67 import org.apache.hadoop.hbase.util.FSUtils;
68 import org.apache.hadoop.hbase.util.HasThread;
69 import org.apache.hadoop.hbase.util.Threads;
70 import org.apache.hadoop.io.Writable;
71 import org.apache.hadoop.util.StringUtils;
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 public class HLog implements Syncable {
113 static final Log LOG = LogFactory.getLog(HLog.class);
114 public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY");
115 static final byte [] METAROW = Bytes.toBytes("METAROW");
116
117
118 public static final String SPLITTING_EXT = "-splitting";
119 public static final boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
120
121
122
123
124
125 private static final String RECOVERED_EDITS_DIR = "recovered.edits";
126 private static final Pattern EDITFILES_NAME_PATTERN =
127 Pattern.compile("-?[0-9]+");
128 static final String RECOVERED_LOG_TMPFILE_SUFFIX = ".temp";
129
130 private final FileSystem fs;
131 private final Path dir;
132 private final Configuration conf;
133
134 private List<WALActionsListener> listeners =
135 new CopyOnWriteArrayList<WALActionsListener>();
136 private final long optionalFlushInterval;
137 private final long blocksize;
138 private final String prefix;
139 private final Path oldLogDir;
140 private boolean logRollRunning;
141
142 private static Class<? extends Writer> logWriterClass;
143 private static Class<? extends Reader> logReaderClass;
144
145 private WALCoprocessorHost coprocessorHost;
146
147 static void resetLogReaderClass() {
148 HLog.logReaderClass = null;
149 }
150
151 private FSDataOutputStream hdfs_out;
152
153
154 private int minTolerableReplication;
155 private Method getNumCurrentReplicas;
156 final static Object [] NO_ARGS = new Object []{};
157
158 public interface Reader {
159 void init(FileSystem fs, Path path, Configuration c) throws IOException;
160 void close() throws IOException;
161 Entry next() throws IOException;
162 Entry next(Entry reuse) throws IOException;
163 void seek(long pos) throws IOException;
164 long getPosition() throws IOException;
165 }
166
167 public interface Writer {
168 void init(FileSystem fs, Path path, Configuration c) throws IOException;
169 void close() throws IOException;
170 void sync() throws IOException;
171 void append(Entry entry) throws IOException;
172 long getLength() throws IOException;
173 }
174
175
176
177
178 Writer writer;
179
180
181
182
183 final SortedMap<Long, Path> outputfiles =
184 Collections.synchronizedSortedMap(new TreeMap<Long, Path>());
185
186
187
188
189
190 private final ConcurrentSkipListMap<byte [], Long> lastSeqWritten =
191 new ConcurrentSkipListMap<byte [], Long>(Bytes.BYTES_COMPARATOR);
192
193 private boolean closed = false;
194
195 private final AtomicLong logSeqNum = new AtomicLong(0);
196
197
198 private volatile long filenum = -1;
199
200
201 private final AtomicInteger numEntries = new AtomicInteger(0);
202
203 private long lastDeferredSeq;
204
205
206
207
208 private volatile int consecutiveLogRolls = 0;
209 private final int lowReplicationRollLimit;
210
211
212
213
214 private volatile boolean lowReplicationRollEnabled = true;
215
216
217
218 private final long logrollsize;
219
220
221
222 private final Lock cacheFlushLock = new ReentrantLock();
223
224
225
226
227 private final Object updateLock = new Object();
228
229 private final boolean enabled;
230
231
232
233
234
235
236 private final int maxLogs;
237
238
239
240
241 private final LogSyncer logSyncerThread;
242
243
244 private final int closeErrorsTolerated;
245
246 private final AtomicInteger closeErrorCount = new AtomicInteger();
247
248
249
250
251 private static final Pattern pattern = Pattern.compile(".*\\.\\d*");
252
253 static byte [] COMPLETE_CACHE_FLUSH;
254 static {
255 try {
256 COMPLETE_CACHE_FLUSH =
257 "HBASE::CACHEFLUSH".getBytes(HConstants.UTF8_ENCODING);
258 } catch (UnsupportedEncodingException e) {
259 assert(false);
260 }
261 }
262
263
264 private static volatile long writeOps;
265 private static volatile long writeTime;
266
267 private static volatile long syncOps;
268 private static volatile long syncTime;
269
270 public static long getWriteOps() {
271 long ret = writeOps;
272 writeOps = 0;
273 return ret;
274 }
275
276 public static long getWriteTime() {
277 long ret = writeTime;
278 writeTime = 0;
279 return ret;
280 }
281
282 public static long getSyncOps() {
283 long ret = syncOps;
284 syncOps = 0;
285 return ret;
286 }
287
288 public static long getSyncTime() {
289 long ret = syncTime;
290 syncTime = 0;
291 return ret;
292 }
293
294
295
296
297
298
299
300
301
302
303 public HLog(final FileSystem fs, final Path dir, final Path oldLogDir,
304 final Configuration conf)
305 throws IOException {
306 this(fs, dir, oldLogDir, conf, null, true, null);
307 }
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328 public HLog(final FileSystem fs, final Path dir, final Path oldLogDir,
329 final Configuration conf, final List<WALActionsListener> listeners,
330 final String prefix) throws IOException {
331 this(fs, dir, oldLogDir, conf, listeners, true, prefix);
332 }
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354 public HLog(final FileSystem fs, final Path dir, final Path oldLogDir,
355 final Configuration conf, final List<WALActionsListener> listeners,
356 final boolean failIfLogDirExists, final String prefix)
357 throws IOException {
358 super();
359 this.fs = fs;
360 this.dir = dir;
361 this.conf = conf;
362 if (listeners != null) {
363 for (WALActionsListener i: listeners) {
364 registerWALActionsListener(i);
365 }
366 }
367 this.blocksize = conf.getLong("hbase.regionserver.hlog.blocksize",
368 this.fs.getDefaultBlockSize());
369
370 float multi = conf.getFloat("hbase.regionserver.logroll.multiplier", 0.95f);
371 this.logrollsize = (long)(this.blocksize * multi);
372 this.optionalFlushInterval =
373 conf.getLong("hbase.regionserver.optionallogflushinterval", 1 * 1000);
374 if (failIfLogDirExists && fs.exists(dir)) {
375 throw new IOException("Target HLog directory already exists: " + dir);
376 }
377 if (!fs.mkdirs(dir)) {
378 throw new IOException("Unable to mkdir " + dir);
379 }
380 this.oldLogDir = oldLogDir;
381 if (!fs.exists(oldLogDir)) {
382 if (!fs.mkdirs(this.oldLogDir)) {
383 throw new IOException("Unable to mkdir " + this.oldLogDir);
384 }
385 }
386 this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
387 this.minTolerableReplication = conf.getInt(
388 "hbase.regionserver.hlog.tolerable.lowreplication",
389 this.fs.getDefaultReplication());
390 this.lowReplicationRollLimit = conf.getInt(
391 "hbase.regionserver.hlog.lowreplication.rolllimit", 5);
392 this.enabled = conf.getBoolean("hbase.regionserver.hlog.enabled", true);
393 this.closeErrorsTolerated = conf.getInt(
394 "hbase.regionserver.logroll.errors.tolerated", 0);
395
396 LOG.info("HLog configuration: blocksize=" +
397 StringUtils.byteDesc(this.blocksize) +
398 ", rollsize=" + StringUtils.byteDesc(this.logrollsize) +
399 ", enabled=" + this.enabled +
400 ", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
401
402 this.prefix = prefix == null || prefix.isEmpty() ?
403 "hlog" : URLEncoder.encode(prefix, "UTF8");
404
405 rollWriter();
406
407
408 this.getNumCurrentReplicas = getGetNumCurrentReplicas(this.hdfs_out);
409
410 logSyncerThread = new LogSyncer(this.optionalFlushInterval);
411 Threads.setDaemonThreadRunning(logSyncerThread.getThread(),
412 Thread.currentThread().getName() + ".logSyncer");
413 coprocessorHost = new WALCoprocessorHost(this, conf);
414 }
415
416
417
418
419
420 private Method getGetNumCurrentReplicas(final FSDataOutputStream os) {
421 Method m = null;
422 Exception exception = null;
423 if (os != null) {
424 try {
425 m = os.getWrappedStream().getClass().
426 getMethod("getNumCurrentReplicas", new Class<?> []{});
427 m.setAccessible(true);
428 } catch (NoSuchMethodException e) {
429
430 exception = e;
431 } catch (SecurityException e) {
432
433 exception = e;
434 m = null;
435 }
436 }
437 if (m != null) {
438 LOG.info("Using getNumCurrentReplicas--HDFS-826");
439 } else {
440 LOG.info("getNumCurrentReplicas--HDFS-826 not available; hdfs_out=" +
441 os + ", exception=" + exception.getMessage());
442 }
443 return m;
444 }
445
446 public void registerWALActionsListener(final WALActionsListener listener) {
447 this.listeners.add(listener);
448 }
449
450 public boolean unregisterWALActionsListener(final WALActionsListener listener) {
451 return this.listeners.remove(listener);
452 }
453
454
455
456
457 public long getFilenum() {
458 return this.filenum;
459 }
460
461
462
463
464
465
466
467
468
469 public void setSequenceNumber(final long newvalue) {
470 for (long id = this.logSeqNum.get(); id < newvalue &&
471 !this.logSeqNum.compareAndSet(id, newvalue); id = this.logSeqNum.get()) {
472
473
474 LOG.debug("Changed sequenceid from " + logSeqNum + " to " + newvalue);
475 }
476 }
477
478
479
480
481 public long getSequenceNumber() {
482 return logSeqNum.get();
483 }
484
485
486
487
488
489
490
491
492 OutputStream getOutputStream() {
493 return this.hdfs_out.getWrappedStream();
494 }
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516 public byte [][] rollWriter() throws FailedLogCloseException, IOException {
517 return rollWriter(false);
518 }
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542 public byte [][] rollWriter(boolean force)
543 throws FailedLogCloseException, IOException {
544
545 if (!force && this.writer != null && this.numEntries.get() <= 0) {
546 return null;
547 }
548 byte [][] regionsToFlush = null;
549 this.cacheFlushLock.lock();
550 this.logRollRunning = true;
551 try {
552
553
554 long currentFilenum = this.filenum;
555 this.filenum = System.currentTimeMillis();
556 Path newPath = computeFilename();
557 HLog.Writer nextWriter = this.createWriterInstance(fs, newPath, conf);
558
559
560
561 FSDataOutputStream nextHdfsOut = null;
562 if (nextWriter instanceof SequenceFileLogWriter) {
563 nextHdfsOut = ((SequenceFileLogWriter)nextWriter).getWriterFSDataOutputStream();
564 }
565
566 if (!this.listeners.isEmpty()) {
567 for (WALActionsListener i : this.listeners) {
568 i.logRolled(newPath);
569 }
570 }
571
572 synchronized (updateLock) {
573 if (closed) {
574 LOG.debug("HLog closed. Skipping rolling of writer");
575 nextWriter.close();
576 return regionsToFlush;
577 }
578
579 Path oldFile = cleanupCurrentWriter(currentFilenum);
580 this.writer = nextWriter;
581 this.hdfs_out = nextHdfsOut;
582
583 LOG.info((oldFile != null?
584 "Roll " + FSUtils.getPath(oldFile) + ", entries=" +
585 this.numEntries.get() +
586 ", filesize=" +
587 this.fs.getFileStatus(oldFile).getLen() + ". ": "") +
588 " for " + FSUtils.getPath(newPath));
589 this.numEntries.set(0);
590 }
591
592 if (this.outputfiles.size() > 0) {
593 if (this.lastSeqWritten.isEmpty()) {
594 LOG.debug("Last sequenceid written is empty. Deleting all old hlogs");
595
596
597
598 for (Map.Entry<Long, Path> e : this.outputfiles.entrySet()) {
599 archiveLogFile(e.getValue(), e.getKey());
600 }
601 this.outputfiles.clear();
602 } else {
603 regionsToFlush = cleanOldLogs();
604 }
605 }
606 } finally {
607 this.logRollRunning = false;
608 this.cacheFlushLock.unlock();
609 }
610 return regionsToFlush;
611 }
612
613
614
615
616
617
618
619
620
621
622
623 protected Writer createWriterInstance(final FileSystem fs, final Path path,
624 final Configuration conf) throws IOException {
625 return createWriter(fs, path, conf);
626 }
627
628
629
630
631
632
633
634
635
636 public static Reader getReader(final FileSystem fs,
637 final Path path, Configuration conf)
638 throws IOException {
639 try {
640
641 if (logReaderClass == null) {
642
643 logReaderClass = conf.getClass("hbase.regionserver.hlog.reader.impl",
644 SequenceFileLogReader.class, Reader.class);
645 }
646
647
648 HLog.Reader reader = logReaderClass.newInstance();
649 reader.init(fs, path, conf);
650 return reader;
651 } catch (IOException e) {
652 throw e;
653 }
654 catch (Exception e) {
655 throw new IOException("Cannot get log reader", e);
656 }
657 }
658
659
660
661
662
663
664
665
666 public static Writer createWriter(final FileSystem fs,
667 final Path path, Configuration conf)
668 throws IOException {
669 try {
670 if (logWriterClass == null) {
671 logWriterClass = conf.getClass("hbase.regionserver.hlog.writer.impl",
672 SequenceFileLogWriter.class, Writer.class);
673 }
674 HLog.Writer writer = (HLog.Writer) logWriterClass.newInstance();
675 writer.init(fs, path, conf);
676 return writer;
677 } catch (Exception e) {
678 IOException ie = new IOException("cannot get log writer");
679 ie.initCause(e);
680 throw ie;
681 }
682 }
683
684
685
686
687
688
689
690
691 private byte [][] cleanOldLogs() throws IOException {
692 Long oldestOutstandingSeqNum = getOldestOutstandingSeqNum();
693
694
695 TreeSet<Long> sequenceNumbers =
696 new TreeSet<Long>(this.outputfiles.headMap(
697 (Long.valueOf(oldestOutstandingSeqNum.longValue()))).keySet());
698
699 int logsToRemove = sequenceNumbers.size();
700 if (logsToRemove > 0) {
701 if (LOG.isDebugEnabled()) {
702
703 byte [] oldestRegion = getOldestRegion(oldestOutstandingSeqNum);
704 LOG.debug("Found " + logsToRemove + " hlogs to remove" +
705 " out of total " + this.outputfiles.size() + ";" +
706 " oldest outstanding sequenceid is " + oldestOutstandingSeqNum +
707 " from region " + Bytes.toStringBinary(oldestRegion));
708 }
709 for (Long seq : sequenceNumbers) {
710 archiveLogFile(this.outputfiles.remove(seq), seq);
711 }
712 }
713
714
715
716 byte [][] regions = null;
717 int logCount = this.outputfiles == null? 0: this.outputfiles.size();
718 if (logCount > this.maxLogs && logCount > 0) {
719
720 regions = findMemstoresWithEditsEqualOrOlderThan(this.outputfiles.firstKey(),
721 this.lastSeqWritten);
722 if (regions != null) {
723 StringBuilder sb = new StringBuilder();
724 for (int i = 0; i < regions.length; i++) {
725 if (i > 0) sb.append(", ");
726 sb.append(Bytes.toStringBinary(regions[i]));
727 }
728 LOG.info("Too many hlogs: logs=" + logCount + ", maxlogs=" +
729 this.maxLogs + "; forcing flush of " + regions.length + " regions(s): " +
730 sb.toString());
731 }
732 }
733 return regions;
734 }
735
736
737
738
739
740
741
742
743
744 static byte [][] findMemstoresWithEditsEqualOrOlderThan(final long oldestWALseqid,
745 final Map<byte [], Long> regionsToSeqids) {
746
747 List<byte []> regions = null;
748 for (Map.Entry<byte [], Long> e: regionsToSeqids.entrySet()) {
749 if (e.getValue().longValue() <= oldestWALseqid) {
750 if (regions == null) regions = new ArrayList<byte []>();
751
752 regions.add(e.getKey());
753 }
754 }
755 return regions == null?
756 null: regions.toArray(new byte [][] {HConstants.EMPTY_BYTE_ARRAY});
757 }
758
759
760
761
762 private Long getOldestOutstandingSeqNum() {
763 return Collections.min(this.lastSeqWritten.values());
764 }
765
766
767
768
769
770 private byte [] getOldestRegion(final Long oldestOutstandingSeqNum) {
771 byte [] oldestRegion = null;
772 for (Map.Entry<byte [], Long> e: this.lastSeqWritten.entrySet()) {
773 if (e.getValue().longValue() == oldestOutstandingSeqNum.longValue()) {
774
775 oldestRegion = e.getKey();
776 break;
777 }
778 }
779 return oldestRegion;
780 }
781
782
783
784
785
786
787
788 Path cleanupCurrentWriter(final long currentfilenum) throws IOException {
789 Path oldFile = null;
790 if (this.writer != null) {
791
792 try {
793 this.writer.close();
794 this.writer = null;
795 closeErrorCount.set(0);
796 } catch (IOException e) {
797 LOG.error("Failed close of HLog writer", e);
798 int errors = closeErrorCount.incrementAndGet();
799 if (errors <= closeErrorsTolerated && lastDeferredSeq == 0) {
800 LOG.warn("Riding over HLog close failure! error count="+errors);
801 } else {
802 if (lastDeferredSeq > 0) {
803 LOG.error("Aborting due to unflushed edits in HLog");
804 }
805
806
807
808 FailedLogCloseException flce =
809 new FailedLogCloseException("#" + currentfilenum);
810 flce.initCause(e);
811 throw flce;
812 }
813 }
814 if (currentfilenum >= 0) {
815 oldFile = computeFilename(currentfilenum);
816 this.outputfiles.put(Long.valueOf(this.logSeqNum.get()), oldFile);
817 }
818 }
819 return oldFile;
820 }
821
822 private void archiveLogFile(final Path p, final Long seqno) throws IOException {
823 Path newPath = getHLogArchivePath(this.oldLogDir, p);
824 LOG.info("moving old hlog file " + FSUtils.getPath(p) +
825 " whose highest sequenceid is " + seqno + " to " +
826 FSUtils.getPath(newPath));
827 if (!this.fs.rename(p, newPath)) {
828 throw new IOException("Unable to rename " + p + " to " + newPath);
829 }
830 }
831
832
833
834
835
836
837 protected Path computeFilename() {
838 return computeFilename(this.filenum);
839 }
840
841
842
843
844
845
846
847 protected Path computeFilename(long filenum) {
848 if (filenum < 0) {
849 throw new RuntimeException("hlog file number can't be < 0");
850 }
851 return new Path(dir, prefix + "." + filenum);
852 }
853
854
855
856
857
858
859 public void closeAndDelete() throws IOException {
860 close();
861 if (!fs.exists(this.dir)) return;
862 FileStatus[] files = fs.listStatus(this.dir);
863 for(FileStatus file : files) {
864 Path p = getHLogArchivePath(this.oldLogDir, file.getPath());
865 if (!fs.rename(file.getPath(),p)) {
866 throw new IOException("Unable to rename " + file.getPath() + " to " + p);
867 }
868 }
869 LOG.debug("Moved " + files.length + " log files to " +
870 FSUtils.getPath(this.oldLogDir));
871 if (!fs.delete(dir, true)) {
872 LOG.info("Unable to delete " + dir);
873 }
874 }
875
876
877
878
879
880
881 public void close() throws IOException {
882 try {
883 logSyncerThread.interrupt();
884
885 logSyncerThread.join(this.optionalFlushInterval*2);
886 } catch (InterruptedException e) {
887 LOG.error("Exception while waiting for syncer thread to die", e);
888 }
889
890 cacheFlushLock.lock();
891 try {
892
893 if (!this.listeners.isEmpty()) {
894 for (WALActionsListener i : this.listeners) {
895 i.logCloseRequested();
896 }
897 }
898 synchronized (updateLock) {
899 this.closed = true;
900 if (LOG.isDebugEnabled()) {
901 LOG.debug("closing hlog writer in " + this.dir.toString());
902 }
903 if (this.writer != null) {
904 this.writer.close();
905 }
906 }
907 } finally {
908 cacheFlushLock.unlock();
909 }
910 }
911
912
913
914
915
916
917
918
919 protected HLogKey makeKey(byte[] regionName, byte[] tableName, long seqnum,
920 long now, UUID clusterId) {
921 return new HLogKey(regionName, tableName, seqnum, now, clusterId);
922 }
923
924
925
926
927
928
929
930
931
932 public void append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit,
933 HTableDescriptor htd)
934 throws IOException {
935 synchronized (updateLock) {
936 if (this.closed) {
937 throw new IOException("Cannot append; log is closed");
938 }
939 long seqNum = obtainSeqNum();
940 logKey.setLogSeqNum(seqNum);
941
942
943
944
945
946 this.lastSeqWritten.putIfAbsent(regionInfo.getEncodedNameAsBytes(),
947 Long.valueOf(seqNum));
948 doWrite(regionInfo, logKey, logEdit, htd);
949 this.numEntries.incrementAndGet();
950 if (htd.isDeferredLogFlush()) {
951 lastDeferredSeq = seqNum;
952 }
953 }
954
955
956
957 if (regionInfo.isMetaRegion() ||
958 !htd.isDeferredLogFlush()) {
959
960 this.sync();
961 }
962 }
963
964
965
966
967
968
969
970
971
972
973
974 public void append(HRegionInfo info, byte [] tableName, WALEdit edits,
975 final long now, HTableDescriptor htd)
976 throws IOException {
977 append(info, tableName, edits, HConstants.DEFAULT_CLUSTER_ID, now, htd);
978 }
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004 public void append(HRegionInfo info, byte [] tableName, WALEdit edits, UUID clusterId,
1005 final long now, HTableDescriptor htd)
1006 throws IOException {
1007 if (edits.isEmpty()) return;
1008 synchronized (this.updateLock) {
1009 if (this.closed) {
1010 throw new IOException("Cannot append; log is closed");
1011 }
1012 long seqNum = obtainSeqNum();
1013
1014
1015
1016
1017
1018
1019
1020 byte [] encodedRegionName = info.getEncodedNameAsBytes();
1021 this.lastSeqWritten.putIfAbsent(encodedRegionName, seqNum);
1022 HLogKey logKey = makeKey(encodedRegionName, tableName, seqNum, now, clusterId);
1023 doWrite(info, logKey, edits, htd);
1024 this.numEntries.incrementAndGet();
1025 if (htd.isDeferredLogFlush()) {
1026 lastDeferredSeq = seqNum;
1027 }
1028 }
1029
1030
1031 if (info.isMetaRegion() ||
1032 !htd.isDeferredLogFlush()) {
1033
1034 this.sync();
1035 }
1036 }
1037
1038
1039
1040
1041
1042 class LogSyncer extends HasThread {
1043
1044 private final long optionalFlushInterval;
1045
1046 LogSyncer(long optionalFlushInterval) {
1047 this.optionalFlushInterval = optionalFlushInterval;
1048 }
1049
1050 @Override
1051 public void run() {
1052 try {
1053
1054
1055 while(!this.isInterrupted()) {
1056
1057 try {
1058 Thread.sleep(this.optionalFlushInterval);
1059 sync();
1060 } catch (IOException e) {
1061 LOG.error("Error while syncing, requesting close of hlog ", e);
1062 requestLogRoll();
1063 }
1064 }
1065 } catch (InterruptedException e) {
1066 LOG.debug(getName() + " interrupted while waiting for sync requests");
1067 } finally {
1068 LOG.info(getName() + " exiting");
1069 }
1070 }
1071 }
1072
1073 private void syncer() throws IOException {
1074 long currentDeferredSeq;
1075 synchronized (this.updateLock) {
1076 if (this.closed) {
1077 return;
1078 }
1079 currentDeferredSeq = lastDeferredSeq;
1080 }
1081 try {
1082 long now = System.currentTimeMillis();
1083
1084 boolean syncSuccessful = true;
1085 try {
1086 this.writer.sync();
1087 } catch(IOException io) {
1088 syncSuccessful = false;
1089 }
1090 synchronized (this.updateLock) {
1091 if (!syncSuccessful) {
1092
1093 this.writer.sync();
1094 }
1095 if (currentDeferredSeq == lastDeferredSeq) {
1096 lastDeferredSeq = 0;
1097 }
1098 syncTime += System.currentTimeMillis() - now;
1099 syncOps++;
1100 if (!this.logRollRunning) {
1101 checkLowReplication();
1102 if (this.writer.getLength() > this.logrollsize) {
1103 requestLogRoll();
1104 }
1105 }
1106 }
1107
1108 } catch (IOException e) {
1109 LOG.fatal("Could not sync. Requesting close of hlog", e);
1110 requestLogRoll();
1111 throw e;
1112 }
1113 }
1114
1115 private void checkLowReplication() {
1116
1117
1118 try {
1119 int numCurrentReplicas = getLogReplication();
1120 if (numCurrentReplicas != 0
1121 && numCurrentReplicas < this.minTolerableReplication) {
1122 if (this.lowReplicationRollEnabled) {
1123 if (this.consecutiveLogRolls < this.lowReplicationRollLimit) {
1124 LOG.warn("HDFS pipeline error detected. " + "Found "
1125 + numCurrentReplicas + " replicas but expecting no less than "
1126 + this.minTolerableReplication + " replicas. "
1127 + " Requesting close of hlog.");
1128 requestLogRoll();
1129
1130
1131
1132 this.consecutiveLogRolls++;
1133 } else {
1134 LOG.warn("Too many consecutive RollWriter requests, it's a sign of "
1135 + "the total number of live datanodes is lower than the tolerable replicas.");
1136 this.consecutiveLogRolls = 0;
1137 this.lowReplicationRollEnabled = false;
1138 }
1139 }
1140 } else if (numCurrentReplicas >= this.minTolerableReplication) {
1141
1142 if (!this.lowReplicationRollEnabled) {
1143
1144
1145
1146 if (this.numEntries.get() <= 1) {
1147 return;
1148 }
1149
1150
1151 this.lowReplicationRollEnabled = true;
1152 LOG.info("LowReplication-Roller was enabled.");
1153 }
1154 }
1155 } catch (Exception e) {
1156 LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
1157 " still proceeding ahead...");
1158 }
1159 }
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173 int getLogReplication()
1174 throws IllegalArgumentException, IllegalAccessException, InvocationTargetException {
1175 if (this.getNumCurrentReplicas != null && this.hdfs_out != null) {
1176 Object repl = this.getNumCurrentReplicas.invoke(getOutputStream(), NO_ARGS);
1177 if (repl instanceof Integer) {
1178 return ((Integer)repl).intValue();
1179 }
1180 }
1181 return 0;
1182 }
1183
1184 boolean canGetCurReplicas() {
1185 return this.getNumCurrentReplicas != null;
1186 }
1187
1188 public void hsync() throws IOException {
1189 syncer();
1190 }
1191
1192 public void hflush() throws IOException {
1193 syncer();
1194 }
1195
1196 public void sync() throws IOException {
1197 syncer();
1198 }
1199
1200 private void requestLogRoll() {
1201 if (!this.listeners.isEmpty()) {
1202 for (WALActionsListener i: this.listeners) {
1203 i.logRollRequested();
1204 }
1205 }
1206 }
1207
1208 protected void doWrite(HRegionInfo info, HLogKey logKey, WALEdit logEdit,
1209 HTableDescriptor htd)
1210 throws IOException {
1211 if (!this.enabled) {
1212 return;
1213 }
1214 if (!this.listeners.isEmpty()) {
1215 for (WALActionsListener i: this.listeners) {
1216 i.visitLogEntryBeforeWrite(htd, logKey, logEdit);
1217 }
1218 }
1219 try {
1220 long now = System.currentTimeMillis();
1221
1222 if (!coprocessorHost.preWALWrite(info, logKey, logEdit)) {
1223
1224 this.writer.append(new HLog.Entry(logKey, logEdit));
1225 }
1226 long took = System.currentTimeMillis() - now;
1227 coprocessorHost.postWALWrite(info, logKey, logEdit);
1228 writeTime += took;
1229 writeOps++;
1230 if (took > 1000) {
1231 long len = 0;
1232 for(KeyValue kv : logEdit.getKeyValues()) {
1233 len += kv.getLength();
1234 }
1235 LOG.warn(String.format(
1236 "%s took %d ms appending an edit to hlog; editcount=%d, len~=%s",
1237 Thread.currentThread().getName(), took, this.numEntries.get(),
1238 StringUtils.humanReadableInt(len)));
1239 }
1240 } catch (IOException e) {
1241 LOG.fatal("Could not append. Requesting close of hlog", e);
1242 requestLogRoll();
1243 throw e;
1244 }
1245 }
1246
1247
1248
1249 int getNumEntries() {
1250 return numEntries.get();
1251 }
1252
1253
1254
1255
1256 private long obtainSeqNum() {
1257 return this.logSeqNum.incrementAndGet();
1258 }
1259
1260
1261 int getNumLogFiles() {
1262 return outputfiles.size();
1263 }
1264
1265 private byte[] getSnapshotName(byte[] encodedRegionName) {
1266 byte snp[] = new byte[encodedRegionName.length + 3];
1267
1268
1269
1270 snp[0] = 's'; snp[1] = 'n'; snp[2] = 'p';
1271 for (int i = 0; i < encodedRegionName.length; i++) {
1272 snp[i+3] = encodedRegionName[i];
1273 }
1274 return snp;
1275 }
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304 public long startCacheFlush(final byte[] encodedRegionName) {
1305 this.cacheFlushLock.lock();
1306 Long seq = this.lastSeqWritten.remove(encodedRegionName);
1307
1308
1309
1310 if (seq != null) {
1311
1312
1313
1314
1315 Long oldseq =
1316 lastSeqWritten.put(getSnapshotName(encodedRegionName), seq);
1317 if (oldseq != null) {
1318 LOG.error("Logic Error Snapshot seq id from earlier flush still" +
1319 " present! for region " + Bytes.toString(encodedRegionName) +
1320 " overwritten oldseq=" + oldseq + "with new seq=" + seq);
1321 Runtime.getRuntime().halt(1);
1322 }
1323 }
1324 return obtainSeqNum();
1325 }
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338 public void completeCacheFlush(final byte [] encodedRegionName,
1339 final byte [] tableName, final long logSeqId, final boolean isMetaRegion)
1340 throws IOException {
1341 try {
1342 synchronized (updateLock) {
1343 if (this.closed) {
1344 return;
1345 }
1346 long now = System.currentTimeMillis();
1347 WALEdit edit = completeCacheFlushLogEdit();
1348 HLogKey key = makeKey(encodedRegionName, tableName, logSeqId,
1349 System.currentTimeMillis(), HConstants.DEFAULT_CLUSTER_ID);
1350 this.writer.append(new Entry(key, edit));
1351 writeTime += System.currentTimeMillis() - now;
1352 writeOps++;
1353 this.numEntries.incrementAndGet();
1354 }
1355
1356 this.sync();
1357
1358 } finally {
1359
1360
1361
1362 this.lastSeqWritten.remove(getSnapshotName(encodedRegionName));
1363 this.cacheFlushLock.unlock();
1364 }
1365 }
1366
1367 private WALEdit completeCacheFlushLogEdit() {
1368 KeyValue kv = new KeyValue(METAROW, METAFAMILY, null,
1369 System.currentTimeMillis(), COMPLETE_CACHE_FLUSH);
1370 WALEdit e = new WALEdit();
1371 e.add(kv);
1372 return e;
1373 }
1374
1375
1376
1377
1378
1379
1380
1381 public void abortCacheFlush(byte[] encodedRegionName) {
1382 Long snapshot_seq =
1383 this.lastSeqWritten.remove(getSnapshotName(encodedRegionName));
1384 if (snapshot_seq != null) {
1385
1386
1387
1388
1389 Long current_memstore_earliest_seq =
1390 this.lastSeqWritten.put(encodedRegionName, snapshot_seq);
1391 if (current_memstore_earliest_seq != null &&
1392 (current_memstore_earliest_seq.longValue() <=
1393 snapshot_seq.longValue())) {
1394 LOG.error("Logic Error region " + Bytes.toString(encodedRegionName) +
1395 "acquired edits out of order current memstore seq=" +
1396 current_memstore_earliest_seq + " snapshot seq=" + snapshot_seq);
1397 Runtime.getRuntime().halt(1);
1398 }
1399 }
1400 this.cacheFlushLock.unlock();
1401 }
1402
1403
1404
1405
1406
1407 public static boolean isMetaFamily(byte [] family) {
1408 return Bytes.equals(METAFAMILY, family);
1409 }
1410
1411
1412
1413
1414
1415
1416 public boolean isLowReplicationRollEnabled() {
1417 return lowReplicationRollEnabled;
1418 }
1419
1420 @SuppressWarnings("unchecked")
1421 public static Class<? extends HLogKey> getKeyClass(Configuration conf) {
1422 return (Class<? extends HLogKey>)
1423 conf.getClass("hbase.regionserver.hlog.keyclass", HLogKey.class);
1424 }
1425
1426 public static HLogKey newKey(Configuration conf) throws IOException {
1427 Class<? extends HLogKey> keyClass = getKeyClass(conf);
1428 try {
1429 return keyClass.newInstance();
1430 } catch (InstantiationException e) {
1431 throw new IOException("cannot create hlog key");
1432 } catch (IllegalAccessException e) {
1433 throw new IOException("cannot create hlog key");
1434 }
1435 }
1436
1437
1438
1439
1440
1441 public static class Entry implements Writable {
1442 private WALEdit edit;
1443 private HLogKey key;
1444
1445 public Entry() {
1446 edit = new WALEdit();
1447 key = new HLogKey();
1448 }
1449
1450
1451
1452
1453
1454
1455 public Entry(HLogKey key, WALEdit edit) {
1456 super();
1457 this.key = key;
1458 this.edit = edit;
1459 }
1460
1461
1462
1463
1464 public WALEdit getEdit() {
1465 return edit;
1466 }
1467
1468
1469
1470
1471 public HLogKey getKey() {
1472 return key;
1473 }
1474
1475 @Override
1476 public String toString() {
1477 return this.key + "=" + this.edit;
1478 }
1479
1480 @Override
1481 public void write(DataOutput dataOutput) throws IOException {
1482 this.key.write(dataOutput);
1483 this.edit.write(dataOutput);
1484 }
1485
1486 @Override
1487 public void readFields(DataInput dataInput) throws IOException {
1488 this.key.readFields(dataInput);
1489 this.edit.readFields(dataInput);
1490 }
1491 }
1492
1493
1494
1495
1496
1497
1498
1499 public static String getHLogDirectoryName(final String serverName) {
1500 StringBuilder dirName = new StringBuilder(HConstants.HREGION_LOGDIR_NAME);
1501 dirName.append("/");
1502 dirName.append(serverName);
1503 return dirName.toString();
1504 }
1505
1506
1507
1508
1509
1510
1511 protected Path getDir() {
1512 return dir;
1513 }
1514
1515 public static boolean validateHLogFilename(String filename) {
1516 return pattern.matcher(filename).matches();
1517 }
1518
1519 static Path getHLogArchivePath(Path oldLogDir, Path p) {
1520 return new Path(oldLogDir, p.getName());
1521 }
1522
1523 static String formatRecoveredEditsFileName(final long seqid) {
1524 return String.format("%019d", seqid);
1525 }
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535 public static NavigableSet<Path> getSplitEditFilesSorted(final FileSystem fs,
1536 final Path regiondir)
1537 throws IOException {
1538 NavigableSet<Path> filesSorted = new TreeSet<Path>();
1539 Path editsdir = getRegionDirRecoveredEditsDir(regiondir);
1540 if (!fs.exists(editsdir)) return filesSorted;
1541 FileStatus[] files = FSUtils.listStatus(fs, editsdir, new PathFilter() {
1542 @Override
1543 public boolean accept(Path p) {
1544 boolean result = false;
1545 try {
1546
1547
1548
1549
1550 Matcher m = EDITFILES_NAME_PATTERN.matcher(p.getName());
1551 result = fs.isFile(p) && m.matches();
1552
1553
1554 if (p.getName().endsWith(RECOVERED_LOG_TMPFILE_SUFFIX)) {
1555 result = false;
1556 }
1557 } catch (IOException e) {
1558 LOG.warn("Failed isFile check on " + p);
1559 }
1560 return result;
1561 }
1562 });
1563 if (files == null) return filesSorted;
1564 for (FileStatus status: files) {
1565 filesSorted.add(status.getPath());
1566 }
1567 return filesSorted;
1568 }
1569
1570
1571
1572
1573
1574
1575
1576
1577 public static Path moveAsideBadEditsFile(final FileSystem fs,
1578 final Path edits)
1579 throws IOException {
1580 Path moveAsideName = new Path(edits.getParent(), edits.getName() + "." +
1581 System.currentTimeMillis());
1582 if (!fs.rename(edits, moveAsideName)) {
1583 LOG.warn("Rename failed from " + edits + " to " + moveAsideName);
1584 }
1585 return moveAsideName;
1586 }
1587
1588
1589
1590
1591
1592
1593 public static Path getRegionDirRecoveredEditsDir(final Path regiondir) {
1594 return new Path(regiondir, RECOVERED_EDITS_DIR);
1595 }
1596
1597 public static final long FIXED_OVERHEAD = ClassSize.align(
1598 ClassSize.OBJECT + (5 * ClassSize.REFERENCE) +
1599 ClassSize.ATOMIC_INTEGER + Bytes.SIZEOF_INT + (3 * Bytes.SIZEOF_LONG));
1600
1601 private static void usage() {
1602 System.err.println("Usage: HLog <ARGS>");
1603 System.err.println("Arguments:");
1604 System.err.println(" --dump Dump textual representation of passed one or more files");
1605 System.err.println(" For example: HLog --dump hdfs://example.com:9000/hbase/.logs/MACHINE/LOGFILE");
1606 System.err.println(" --split Split the passed directory of WAL logs");
1607 System.err.println(" For example: HLog --split hdfs://example.com:9000/hbase/.logs/DIR");
1608 }
1609
1610 private static void split(final Configuration conf, final Path p)
1611 throws IOException {
1612 FileSystem fs = FileSystem.get(conf);
1613 if (!fs.exists(p)) {
1614 throw new FileNotFoundException(p.toString());
1615 }
1616 final Path baseDir = new Path(conf.get(HConstants.HBASE_DIR));
1617 final Path oldLogDir = new Path(baseDir, HConstants.HREGION_OLDLOGDIR_NAME);
1618 if (!fs.getFileStatus(p).isDir()) {
1619 throw new IOException(p + " is not a directory");
1620 }
1621
1622 HLogSplitter logSplitter = HLogSplitter.createLogSplitter(
1623 conf, baseDir, p, oldLogDir, fs);
1624 logSplitter.splitLog();
1625 }
1626
1627
1628
1629
1630 public WALCoprocessorHost getCoprocessorHost() {
1631 return coprocessorHost;
1632 }
1633
1634
1635 long getLastDeferredSeq() {
1636 return lastDeferredSeq;
1637 }
1638
1639
1640
1641
1642
1643
1644
1645
1646 public static void main(String[] args) throws IOException {
1647 if (args.length < 2) {
1648 usage();
1649 System.exit(-1);
1650 }
1651
1652 if (args[0].compareTo("--dump") == 0) {
1653 HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length));
1654 } else if (args[0].compareTo("--split") == 0) {
1655 Configuration conf = HBaseConfiguration.create();
1656 for (int i = 1; i < args.length; i++) {
1657 try {
1658 conf.set("fs.default.name", args[i]);
1659 conf.set("fs.defaultFS", args[i]);
1660 Path logPath = new Path(args[i]);
1661 split(conf, logPath);
1662 } catch (Throwable t) {
1663 t.printStackTrace(System.err);
1664 System.exit(-1);
1665 }
1666 }
1667 } else {
1668 usage();
1669 System.exit(-1);
1670 }
1671 }
1672 }