View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.util.LinkedList;
23  
24  import org.apache.hadoop.hbase.util.Bytes;
25  import org.apache.hadoop.hbase.util.ClassSize;
26  
27  /**
28   * Manages the read/write consistency within memstore. This provides
29   * an interface for readers to determine what entries to ignore, and
30   * a mechanism for writers to obtain new write numbers, then "commit"
31   * the new writes for readers to read (thus forming atomic transactions).
32   */
33  public class MultiVersionConsistencyControl {
34    private volatile long memstoreRead = 0;
35    private volatile long memstoreWrite = 0;
36  
37    private final Object readWaiters = new Object();
38  
39    // This is the pending queue of writes.
40    private final LinkedList<WriteEntry> writeQueue =
41        new LinkedList<WriteEntry>();
42  
43    private static final ThreadLocal<Long> perThreadReadPoint =
44        new ThreadLocal<Long>() {
45         @Override
46        protected
47         Long initialValue() {
48           return Long.MAX_VALUE;
49         }
50    };
51  
52    /**
53     * Default constructor. Initializes the memstoreRead/Write points to 0.
54     */
55    public MultiVersionConsistencyControl() {
56      this.memstoreRead = this.memstoreWrite = 0;
57    }
58  
59    /**
60     * Initializes the memstoreRead/Write points appropriately.
61     * @param startPoint
62     */
63    public void initialize(long startPoint) {
64      synchronized (writeQueue) {
65        if (this.memstoreWrite != this.memstoreRead) {
66          throw new RuntimeException("Already used this mvcc. Too late to initialize");
67        }
68  
69        this.memstoreRead = this.memstoreWrite = startPoint;
70      }
71    }
72  
73    /**
74     * Get this thread's read point. Used primarily by the memstore scanner to
75     * know which values to skip (ie: have not been completed/committed to
76     * memstore).
77     */
78    public static long getThreadReadPoint() {
79        return perThreadReadPoint.get();
80    }
81  
82    /**
83     * Set the thread read point to the given value. The thread MVCC
84     * is used by the Memstore scanner so it knows which values to skip.
85     * Give it a value of 0 if you want everything.
86     */
87    public static void setThreadReadPoint(long readPoint) {
88      perThreadReadPoint.set(readPoint);
89    }
90  
91    /**
92     * Set the thread MVCC read point to whatever the current read point is in
93     * this particular instance of MVCC.  Returns the new thread read point value.
94     */
95    public static long resetThreadReadPoint(MultiVersionConsistencyControl mvcc) {
96      perThreadReadPoint.set(mvcc.memstoreReadPoint());
97      return getThreadReadPoint();
98    }
99  
100   /**
101    * Set the thread MVCC read point to 0 (include everything).
102    */
103   public static void resetThreadReadPoint() {
104     perThreadReadPoint.set(0L);
105   }
106 
107   public WriteEntry beginMemstoreInsert() {
108     synchronized (writeQueue) {
109       long nextWriteNumber = ++memstoreWrite;
110       WriteEntry e = new WriteEntry(nextWriteNumber);
111       writeQueue.add(e);
112       return e;
113     }
114   }
115 
116   public void completeMemstoreInsert(WriteEntry e) {
117     advanceMemstore(e);
118     waitForRead(e);
119   }
120 
121   boolean advanceMemstore(WriteEntry e) {
122     synchronized (writeQueue) {
123       e.markCompleted();
124 
125       long nextReadValue = -1;
126       boolean ranOnce=false;
127       while (!writeQueue.isEmpty()) {
128         ranOnce=true;
129         WriteEntry queueFirst = writeQueue.getFirst();
130 
131         if (nextReadValue > 0) {
132           if (nextReadValue+1 != queueFirst.getWriteNumber()) {
133             throw new RuntimeException("invariant in completeMemstoreInsert violated, prev: "
134                 + nextReadValue + " next: " + queueFirst.getWriteNumber());
135           }
136         }
137 
138         if (queueFirst.isCompleted()) {
139           nextReadValue = queueFirst.getWriteNumber();
140           writeQueue.removeFirst();
141         } else {
142           break;
143         }
144       }
145 
146       if (!ranOnce) {
147         throw new RuntimeException("never was a first");
148       }
149 
150       if (nextReadValue > 0) {
151         synchronized (readWaiters) {
152           memstoreRead = nextReadValue;
153           readWaiters.notifyAll();
154         }
155       }
156       if (memstoreRead >= e.getWriteNumber()) {
157         return true;
158       }
159       return false;
160     }
161   }
162 
163   /**
164    * Wait for the global readPoint to advance upto
165    * the specified transaction number.
166    */
167   public void waitForRead(WriteEntry e) {
168     boolean interrupted = false;
169     synchronized (readWaiters) {
170       while (memstoreRead < e.getWriteNumber()) {
171         try {
172           readWaiters.wait(0);
173         } catch (InterruptedException ie) {
174           // We were interrupted... finish the loop -- i.e. cleanup --and then
175           // on our way out, reset the interrupt flag.
176           interrupted = true;
177         }
178       }
179     }
180     if (interrupted) Thread.currentThread().interrupt();
181   }
182 
183   public long memstoreReadPoint() {
184     return memstoreRead;
185   }
186 
187 
188   public static class WriteEntry {
189     private long writeNumber;
190     private boolean completed = false;
191     WriteEntry(long writeNumber) {
192       this.writeNumber = writeNumber;
193     }
194     void markCompleted() {
195       this.completed = true;
196     }
197     boolean isCompleted() {
198       return this.completed;
199     }
200     long getWriteNumber() {
201       return this.writeNumber;
202     }
203   }
204 
205   public static final long FIXED_SIZE = ClassSize.align(
206       ClassSize.OBJECT +
207       2 * Bytes.SIZEOF_LONG +
208       2 * ClassSize.REFERENCE);
209 
210 }