1 /**
2 * Copyright 2009 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20 package org.apache.hadoop.hbase.regionserver.wal;
21
22 import java.io.DataInput;
23 import java.io.DataOutput;
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.NavigableMap;
28 import java.util.TreeMap;
29
30 import org.apache.hadoop.hbase.io.HeapSize;
31 import org.apache.hadoop.hbase.KeyValue;
32 import org.apache.hadoop.hbase.util.Bytes;
33 import org.apache.hadoop.hbase.util.ClassSize;
34 import org.apache.hadoop.io.Writable;
35
36 /**
37 * WALEdit: Used in HBase's transaction log (WAL) to represent
38 * the collection of edits (KeyValue objects) corresponding to a
39 * single transaction. The class implements "Writable" interface
40 * for serializing/deserializing a set of KeyValue items.
41 *
42 * Previously, if a transaction contains 3 edits to c1, c2, c3 for a row R,
43 * the HLog would have three log entries as follows:
44 *
45 * <logseq1-for-edit1>:<KeyValue-for-edit-c1>
46 * <logseq2-for-edit2>:<KeyValue-for-edit-c2>
47 * <logseq3-for-edit3>:<KeyValue-for-edit-c3>
48 *
49 * This presents problems because row level atomicity of transactions
50 * was not guaranteed. If we crash after few of the above appends make
51 * it, then recovery will restore a partial transaction.
52 *
53 * In the new world, all the edits for a given transaction are written
54 * out as a single record, for example:
55 *
56 * <logseq#-for-entire-txn>:<WALEdit-for-entire-txn>
57 *
58 * where, the WALEdit is serialized as:
59 * <-1, # of edits, <KeyValue>, <KeyValue>, ... >
60 * For example:
61 * <-1, 3, <Keyvalue-for-edit-c1>, <KeyValue-for-edit-c2>, <KeyValue-for-edit-c3>>
62 *
63 * The -1 marker is just a special way of being backward compatible with
64 * an old HLog which would have contained a single <KeyValue>.
65 *
66 * The deserializer for WALEdit backward compatibly detects if the record
67 * is an old style KeyValue or the new style WALEdit.
68 *
69 */
70 public class WALEdit implements Writable, HeapSize {
71
72 private final int VERSION_2 = -1;
73
74 private final ArrayList<KeyValue> kvs = new ArrayList<KeyValue>();
75 private NavigableMap<byte[], Integer> scopes;
76
77 public WALEdit() {
78 }
79
80 public void add(KeyValue kv) {
81 this.kvs.add(kv);
82 }
83
84 public boolean isEmpty() {
85 return kvs.isEmpty();
86 }
87
88 public int size() {
89 return kvs.size();
90 }
91
92 public List<KeyValue> getKeyValues() {
93 return kvs;
94 }
95
96 public NavigableMap<byte[], Integer> getScopes() {
97 return scopes;
98 }
99
100
101 public void setScopes (NavigableMap<byte[], Integer> scopes) {
102 // We currently process the map outside of WALEdit,
103 // TODO revisit when replication is part of core
104 this.scopes = scopes;
105 }
106
107 public void readFields(DataInput in) throws IOException {
108 kvs.clear();
109 if (scopes != null) {
110 scopes.clear();
111 }
112 int versionOrLength = in.readInt();
113 if (versionOrLength == VERSION_2) {
114 // this is new style HLog entry containing multiple KeyValues.
115 int numEdits = in.readInt();
116 for (int idx = 0; idx < numEdits; idx++) {
117 KeyValue kv = new KeyValue();
118 kv.readFields(in);
119 this.add(kv);
120 }
121 int numFamilies = in.readInt();
122 if (numFamilies > 0) {
123 if (scopes == null) {
124 scopes = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
125 }
126 for (int i = 0; i < numFamilies; i++) {
127 byte[] fam = Bytes.readByteArray(in);
128 int scope = in.readInt();
129 scopes.put(fam, scope);
130 }
131 }
132 } else {
133 // this is an old style HLog entry. The int that we just
134 // read is actually the length of a single KeyValue.
135 KeyValue kv = new KeyValue();
136 kv.readFields(versionOrLength, in);
137 this.add(kv);
138 }
139
140 }
141
142 public void write(DataOutput out) throws IOException {
143 out.writeInt(VERSION_2);
144 out.writeInt(kvs.size());
145 // We interleave the two lists for code simplicity
146 for (KeyValue kv : kvs) {
147 kv.write(out);
148 }
149 if (scopes == null) {
150 out.writeInt(0);
151 } else {
152 out.writeInt(scopes.size());
153 for (byte[] key : scopes.keySet()) {
154 Bytes.writeByteArray(out, key);
155 out.writeInt(scopes.get(key));
156 }
157 }
158 }
159
160 public long heapSize() {
161 long ret = 0;
162 for (KeyValue kv : kvs) {
163 ret += kv.heapSize();
164 }
165 if (scopes != null) {
166 ret += ClassSize.TREEMAP;
167 ret += ClassSize.align(scopes.size() * ClassSize.MAP_ENTRY);
168 // TODO this isn't quite right, need help here
169 }
170 return ret;
171 }
172
173 public String toString() {
174 StringBuilder sb = new StringBuilder();
175
176 sb.append("[#edits: " + kvs.size() + " = <");
177 for (KeyValue kv : kvs) {
178 sb.append(kv.toString());
179 sb.append("; ");
180 }
181 if (scopes != null) {
182 sb.append(" scopes: " + scopes.toString());
183 }
184 sb.append(">]");
185 return sb.toString();
186 }
187
188 }