1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.apache.hadoop.hbase.client.coprocessor;
22
23 import java.io.IOException;
24 import java.util.ArrayList;
25 import java.util.List;
26 import java.util.concurrent.atomic.AtomicLong;
27
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30 import org.apache.hadoop.conf.Configuration;
31 import org.apache.hadoop.hbase.HConstants;
32 import org.apache.hadoop.hbase.client.HTable;
33 import org.apache.hadoop.hbase.client.Scan;
34 import org.apache.hadoop.hbase.coprocessor.AggregateProtocol;
35 import org.apache.hadoop.hbase.coprocessor.ColumnInterpreter;
36 import org.apache.hadoop.hbase.util.Bytes;
37 import org.apache.hadoop.hbase.util.Pair;
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 public class AggregationClient {
59
60 private static final Log log = LogFactory.getLog(AggregationClient.class);
61 Configuration conf;
62
63
64
65
66
67 public AggregationClient(Configuration cfg) {
68 this.conf = cfg;
69 }
70
71
72
73
74
75
76
77
78
79
80
81
82
83 public <R, S> R max(final byte[] tableName, final ColumnInterpreter<R, S> ci,
84 final Scan scan) throws Throwable {
85 validateParameters(scan);
86 HTable table = new HTable(conf, tableName);
87
88 class MaxCallBack implements Batch.Callback<R> {
89 R max = null;
90
91 R getMax() {
92 return max;
93 }
94
95 @Override
96 public synchronized void update(byte[] region, byte[] row, R result) {
97 max = ci.compare(max, result) < 0 ? result : max;
98 }
99 }
100 MaxCallBack aMaxCallBack = new MaxCallBack();
101 table.coprocessorExec(AggregateProtocol.class, scan.getStartRow(), scan
102 .getStopRow(), new Batch.Call<AggregateProtocol, R>() {
103 @Override
104 public R call(AggregateProtocol instance) throws IOException {
105 return instance.getMax(ci, scan);
106 }
107 }, aMaxCallBack);
108 return aMaxCallBack.getMax();
109 }
110
111 private void validateParameters(Scan scan) throws IOException {
112 if (scan == null
113 || (Bytes.equals(scan.getStartRow(), scan.getStopRow()) && !Bytes
114 .equals(scan.getStartRow(), HConstants.EMPTY_START_ROW))
115 || Bytes.compareTo(scan.getStartRow(), scan.getStopRow()) > 0) {
116 throw new IOException(
117 "Agg client Exception: Startrow should be smaller than Stoprow");
118 } else if (scan.getFamilyMap().size() != 1) {
119 throw new IOException("There must be only one family.");
120 }
121 }
122
123
124
125
126
127
128
129
130
131
132
133 public <R, S> R min(final byte[] tableName, final ColumnInterpreter<R, S> ci,
134 final Scan scan) throws Throwable {
135 validateParameters(scan);
136 class MinCallBack implements Batch.Callback<R> {
137
138 private R min = null;
139
140 public R getMinimum() {
141 return min;
142 }
143
144 @Override
145 public synchronized void update(byte[] region, byte[] row, R result) {
146 min = (min == null || ci.compare(result, min) < 0) ? result : min;
147 }
148 }
149 HTable table = new HTable(conf, tableName);
150 MinCallBack minCallBack = new MinCallBack();
151 table.coprocessorExec(AggregateProtocol.class, scan.getStartRow(), scan
152 .getStopRow(), new Batch.Call<AggregateProtocol, R>() {
153
154 @Override
155 public R call(AggregateProtocol instance) throws IOException {
156 return instance.getMin(ci, scan);
157 }
158 }, minCallBack);
159 log.debug("Min fom all regions is: " + minCallBack.getMinimum());
160 return minCallBack.getMinimum();
161 }
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176 public <R, S> long rowCount(final byte[] tableName,
177 final ColumnInterpreter<R, S> ci, final Scan scan) throws Throwable {
178 validateParameters(scan);
179 class RowNumCallback implements Batch.Callback<Long> {
180 private final AtomicLong rowCountL = new AtomicLong(0);
181
182 public long getRowNumCount() {
183 return rowCountL.get();
184 }
185
186 @Override
187 public void update(byte[] region, byte[] row, Long result) {
188 rowCountL.addAndGet(result.longValue());
189 }
190 }
191 RowNumCallback rowNum = new RowNumCallback();
192 HTable table = new HTable(conf, tableName);
193 table.coprocessorExec(AggregateProtocol.class, scan.getStartRow(), scan
194 .getStopRow(), new Batch.Call<AggregateProtocol, Long>() {
195 @Override
196 public Long call(AggregateProtocol instance) throws IOException {
197 return instance.getRowNum(ci, scan);
198 }
199 }, rowNum);
200 return rowNum.getRowNumCount();
201 }
202
203
204
205
206
207
208
209
210
211
212 public <R, S> S sum(final byte[] tableName, final ColumnInterpreter<R, S> ci,
213 final Scan scan) throws Throwable {
214 validateParameters(scan);
215 class SumCallBack implements Batch.Callback<S> {
216 S sumVal = null;
217
218 public S getSumResult() {
219 return sumVal;
220 }
221
222 @Override
223 public synchronized void update(byte[] region, byte[] row, S result) {
224 sumVal = ci.add(sumVal, result);
225 }
226 }
227 SumCallBack sumCallBack = new SumCallBack();
228 HTable table = new HTable(conf, tableName);
229 table.coprocessorExec(AggregateProtocol.class, scan.getStartRow(), scan
230 .getStopRow(), new Batch.Call<AggregateProtocol, S>() {
231 @Override
232 public S call(AggregateProtocol instance) throws IOException {
233 return instance.getSum(ci, scan);
234 }
235 }, sumCallBack);
236 return sumCallBack.getSumResult();
237 }
238
239
240
241
242
243
244
245
246
247 private <R, S> Pair<S, Long> getAvgArgs(final byte[] tableName,
248 final ColumnInterpreter<R, S> ci, final Scan scan) throws Throwable {
249 validateParameters(scan);
250 class AvgCallBack implements Batch.Callback<Pair<S, Long>> {
251 S sum = null;
252 Long rowCount = 0l;
253
254 public Pair<S, Long> getAvgArgs() {
255 return new Pair<S, Long>(sum, rowCount);
256 }
257
258 @Override
259 public synchronized void update(byte[] region, byte[] row, Pair<S, Long> result) {
260 sum = ci.add(sum, result.getFirst());
261 rowCount += result.getSecond();
262 }
263 }
264 AvgCallBack avgCallBack = new AvgCallBack();
265 HTable table = new HTable(conf, tableName);
266 table.coprocessorExec(AggregateProtocol.class, scan.getStartRow(), scan
267 .getStopRow(), new Batch.Call<AggregateProtocol, Pair<S, Long>>() {
268 @Override
269 public Pair<S, Long> call(AggregateProtocol instance) throws IOException {
270 return instance.getAvg(ci, scan);
271 }
272 }, avgCallBack);
273 return avgCallBack.getAvgArgs();
274 }
275
276
277
278
279
280
281
282
283
284
285
286
287
288 public <R, S> double avg(final byte[] tableName,
289 final ColumnInterpreter<R, S> ci, Scan scan) throws Throwable {
290 Pair<S, Long> p = getAvgArgs(tableName, ci, scan);
291 return ci.divideForAvg(p.getFirst(), p.getSecond());
292 }
293
294
295
296
297
298
299
300
301
302
303
304
305 private <R, S> Pair<List<S>, Long> getStdArgs(final byte[] tableName,
306 final ColumnInterpreter<R, S> ci, final Scan scan) throws Throwable {
307 validateParameters(scan);
308 class StdCallback implements Batch.Callback<Pair<List<S>, Long>> {
309 long rowCountVal = 0l;
310 S sumVal = null, sumSqVal = null;
311
312 public Pair<List<S>, Long> getStdParams() {
313 List<S> l = new ArrayList<S>();
314 l.add(sumVal);
315 l.add(sumSqVal);
316 Pair<List<S>, Long> p = new Pair<List<S>, Long>(l, rowCountVal);
317 return p;
318 }
319
320 @Override
321 public synchronized void update(byte[] region, byte[] row, Pair<List<S>, Long> result) {
322 sumVal = ci.add(sumVal, result.getFirst().get(0));
323 sumSqVal = ci.add(sumSqVal, result.getFirst().get(1));
324 rowCountVal += result.getSecond();
325 }
326 }
327 StdCallback stdCallback = new StdCallback();
328 HTable table = new HTable(conf, tableName);
329 table.coprocessorExec(AggregateProtocol.class, scan.getStartRow(), scan
330 .getStopRow(),
331 new Batch.Call<AggregateProtocol, Pair<List<S>, Long>>() {
332 @Override
333 public Pair<List<S>, Long> call(AggregateProtocol instance)
334 throws IOException {
335 return instance.getStd(ci, scan);
336 }
337
338 }, stdCallback);
339 return stdCallback.getStdParams();
340 }
341
342
343
344
345
346
347
348
349
350
351
352
353
354 public <R, S> double std(final byte[] tableName, ColumnInterpreter<R, S> ci,
355 Scan scan) throws Throwable {
356 Pair<List<S>, Long> p = getStdArgs(tableName, ci, scan);
357 double res = 0d;
358 double avg = ci.divideForAvg(p.getFirst().get(0), p.getSecond());
359 double avgOfSumSq = ci.divideForAvg(p.getFirst().get(1), p.getSecond());
360 res = avgOfSumSq - (avg) * (avg);
361 res = Math.pow(res, 0.5);
362 return res;
363 }
364
365 }