1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.coprocessor;
21
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.List;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.hadoop.hbase.KeyValue;
29 import org.apache.hadoop.hbase.client.Scan;
30 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
31 import org.apache.hadoop.hbase.ipc.ProtocolSignature;
32 import org.apache.hadoop.hbase.regionserver.InternalScanner;
33 import org.apache.hadoop.hbase.util.Pair;
34
35
36
37
38
39 public class AggregateImplementation extends BaseEndpointCoprocessor implements
40 AggregateProtocol {
41 protected static Log log = LogFactory.getLog(AggregateImplementation.class);
42
43 @Override
44 public ProtocolSignature getProtocolSignature(
45 String protocol, long version, int clientMethodsHashCode)
46 throws IOException {
47 if (AggregateProtocol.class.getName().equals(protocol)) {
48 return new ProtocolSignature(AggregateProtocol.VERSION, null);
49 }
50 throw new IOException("Unknown protocol: " + protocol);
51 }
52
53 @Override
54 public <T, S> T getMax(ColumnInterpreter<T, S> ci, Scan scan)
55 throws IOException {
56 T temp;
57 T max = null;
58 InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())
59 .getRegion().getScanner(scan);
60 List<KeyValue> results = new ArrayList<KeyValue>();
61 byte[] colFamily = scan.getFamilies()[0];
62 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();
63
64 try {
65 boolean hasMoreRows = false;
66 do {
67 hasMoreRows = scanner.next(results);
68 for (KeyValue kv : results) {
69 temp = ci.getValue(colFamily, qualifier, kv);
70 max = (max == null || ci.compare(temp, max) > 0) ? temp : max;
71 }
72 results.clear();
73 } while (hasMoreRows);
74 } finally {
75 scanner.close();
76 }
77 log.info("Maximum from this region is "
78 + ((RegionCoprocessorEnvironment) getEnvironment()).getRegion()
79 .getRegionNameAsString() + ": " + max);
80 return max;
81 }
82
83 @Override
84 public <T, S> T getMin(ColumnInterpreter<T, S> ci, Scan scan)
85 throws IOException {
86 T min = null;
87 T temp;
88 InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())
89 .getRegion().getScanner(scan);
90 List<KeyValue> results = new ArrayList<KeyValue>();
91 byte[] colFamily = scan.getFamilies()[0];
92 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();
93 try {
94 boolean hasMoreRows = false;
95 do {
96 hasMoreRows = scanner.next(results);
97 for (KeyValue kv : results) {
98 temp = ci.getValue(colFamily, qualifier, kv);
99 min = (min == null || ci.compare(temp, min) < 0) ? temp : min;
100 }
101 results.clear();
102 } while (hasMoreRows);
103 } finally {
104 scanner.close();
105 }
106 log.info("Minimum from this region is "
107 + ((RegionCoprocessorEnvironment) getEnvironment()).getRegion()
108 .getRegionNameAsString() + ": " + min);
109 return min;
110 }
111
112 @Override
113 public <T, S> S getSum(ColumnInterpreter<T, S> ci, Scan scan)
114 throws IOException {
115 long sum = 0l;
116 S sumVal = null;
117 T temp;
118 InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())
119 .getRegion().getScanner(scan);
120 byte[] colFamily = scan.getFamilies()[0];
121 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();
122 List<KeyValue> results = new ArrayList<KeyValue>();
123 try {
124 boolean hasMoreRows = false;
125 do {
126 hasMoreRows = scanner.next(results);
127 for (KeyValue kv : results) {
128 temp = ci.getValue(colFamily, qualifier, kv);
129 if (temp != null)
130 sumVal = ci.add(sumVal, ci.castToReturnType(temp));
131 }
132 results.clear();
133 } while (hasMoreRows);
134 } finally {
135 scanner.close();
136 }
137 log.debug("Sum from this region is "
138 + ((RegionCoprocessorEnvironment) getEnvironment()).getRegion()
139 .getRegionNameAsString() + ": " + sum);
140 return sumVal;
141 }
142
143 @Override
144 public <T, S> long getRowNum(ColumnInterpreter<T, S> ci, Scan scan)
145 throws IOException {
146 long counter = 0l;
147 List<KeyValue> results = new ArrayList<KeyValue>();
148 byte[] colFamily = scan.getFamilies()[0];
149 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();
150 if (scan.getFilter() == null && qualifier == null)
151 scan.setFilter(new FirstKeyOnlyFilter());
152 InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())
153 .getRegion().getScanner(scan);
154 try {
155 boolean hasMoreRows = false;
156 do {
157 hasMoreRows = scanner.next(results);
158 if (results.size() > 0) {
159 counter++;
160 }
161 results.clear();
162 } while (hasMoreRows);
163 } finally {
164 scanner.close();
165 }
166 log.info("Row counter from this region is "
167 + ((RegionCoprocessorEnvironment) getEnvironment()).getRegion()
168 .getRegionNameAsString() + ": " + counter);
169 return counter;
170 }
171
172 @Override
173 public <T, S> Pair<S, Long> getAvg(ColumnInterpreter<T, S> ci, Scan scan)
174 throws IOException {
175 S sumVal = null;
176 Long rowCountVal = 0l;
177 InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())
178 .getRegion().getScanner(scan);
179 byte[] colFamily = scan.getFamilies()[0];
180 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();
181 List<KeyValue> results = new ArrayList<KeyValue>();
182 boolean hasMoreRows = false;
183 try {
184 do {
185 results.clear();
186 hasMoreRows = scanner.next(results);
187 for (KeyValue kv : results) {
188 sumVal = ci.add(sumVal, ci.castToReturnType(ci.getValue(colFamily,
189 qualifier, kv)));
190 }
191 rowCountVal++;
192 } while (hasMoreRows);
193 } finally {
194 scanner.close();
195 }
196 Pair<S, Long> pair = new Pair<S, Long>(sumVal, rowCountVal);
197 return pair;
198 }
199
200 @Override
201 public <T, S> Pair<List<S>, Long> getStd(ColumnInterpreter<T, S> ci, Scan scan)
202 throws IOException {
203 S sumVal = null, sumSqVal = null, tempVal = null;
204 long rowCountVal = 0l;
205 InternalScanner scanner = ((RegionCoprocessorEnvironment) getEnvironment())
206 .getRegion().getScanner(scan);
207 byte[] colFamily = scan.getFamilies()[0];
208 byte[] qualifier = scan.getFamilyMap().get(colFamily).pollFirst();
209 List<KeyValue> results = new ArrayList<KeyValue>();
210
211 boolean hasMoreRows = false;
212 try {
213 do {
214 tempVal = null;
215 hasMoreRows = scanner.next(results);
216 for (KeyValue kv : results) {
217 tempVal = ci.add(tempVal, ci.castToReturnType(ci.getValue(colFamily,
218 qualifier, kv)));
219 }
220 results.clear();
221 sumVal = ci.add(sumVal, tempVal);
222 sumSqVal = ci.add(sumSqVal, ci.multiply(tempVal, tempVal));
223 rowCountVal++;
224 } while (hasMoreRows);
225 } finally {
226 scanner.close();
227 }
228 List<S> l = new ArrayList<S>();
229 l.add(sumVal);
230 l.add(sumSqVal);
231 Pair<List<S>, Long> p = new Pair<List<S>, Long>(l, rowCountVal);
232 return p;
233 }
234
235 }