1 /**
2 * Copyright 2011 The Apache Software Foundation
3 *
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21 package org.apache.hadoop.hbase.coprocessor;
22
23 import java.io.IOException;
24 import java.util.List;
25
26 import org.apache.hadoop.hbase.client.Scan;
27 import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
28 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
29 import org.apache.hadoop.hbase.util.Pair;
30
31 /**
32 * Defines the aggregation functions that are to be supported in this
33 * Coprocessor. For each method, it takes a Scan object and a columnInterpreter.
34 * The scan object should have a column family (else an exception will be
35 * thrown), and an optional column qualifier. In the current implementation
36 * {@link AggregateImplementation}, only one column family and column qualifier
37 * combination is served. In case there are more than one, only first one will
38 * be picked. Refer to {@link AggregationClient} for some general conditions on
39 * input parameters.
40 */
41 public interface AggregateProtocol extends CoprocessorProtocol {
42 public static final long VERSION = 1L;
43
44 /**
45 * Gives the maximum for a given combination of column qualifier and column
46 * family, in the given row range as defined in the Scan object. In its
47 * current implementation, it takes one column family and one column qualifier
48 * (if provided). In case of null column qualifier, maximum value for the
49 * entire column family will be returned.
50 * @param ci
51 * @param scan
52 * @return max value as mentioned above
53 * @throws IOException
54 */
55 <T, S> T getMax(ColumnInterpreter<T, S> ci, Scan scan) throws IOException;
56
57 /**
58 * Gives the minimum for a given combination of column qualifier and column
59 * family, in the given row range as defined in the Scan object. In its
60 * current implementation, it takes one column family and one column qualifier
61 * (if provided). In case of null column qualifier, minimum value for the
62 * entire column family will be returned.
63 * @param ci
64 * @param scan
65 * @return min as mentioned above
66 * @throws IOException
67 */
68 <T, S> T getMin(ColumnInterpreter<T, S> ci, Scan scan) throws IOException;
69
70 /**
71 * Gives the sum for a given combination of column qualifier and column
72 * family, in the given row range as defined in the Scan object. In its
73 * current implementation, it takes one column family and one column qualifier
74 * (if provided). In case of null column qualifier, sum for the entire column
75 * family will be returned.
76 * @param ci
77 * @param scan
78 * @return sum of values as defined by the column interpreter
79 * @throws IOException
80 */
81 <T, S> S getSum(ColumnInterpreter<T, S> ci, Scan scan) throws IOException;
82
83 /**
84 * @param ci
85 * @param scan
86 * @return Row count for the given column family and column qualifier, in
87 * the given row range as defined in the Scan object.
88 * @throws IOException
89 */
90 <T, S> long getRowNum(ColumnInterpreter<T, S> ci, Scan scan)
91 throws IOException;
92
93 /**
94 * Gives a Pair with first object as Sum and second object as row count,
95 * computed for a given combination of column qualifier and column family in
96 * the given row range as defined in the Scan object. In its current
97 * implementation, it takes one column family and one column qualifier (if
98 * provided). In case of null column qualifier, an aggregate sum over all the
99 * entire column family will be returned.
100 * <p>
101 * The average is computed in
102 * {@link AggregationClient#avg(byte[], ColumnInterpreter, Scan)} by
103 * processing results from all regions, so its "ok" to pass sum and a Long
104 * type.
105 * @param ci
106 * @param scan
107 * @return Average
108 * @throws IOException
109 */
110 <T, S> Pair<S, Long> getAvg(ColumnInterpreter<T, S> ci, Scan scan)
111 throws IOException;
112
113 /**
114 * Gives a Pair with first object a List containing Sum and sum of squares,
115 * and the second object as row count. It is computed for a given combination of
116 * column qualifier and column family in the given row range as defined in the
117 * Scan object. In its current implementation, it takes one column family and
118 * one column qualifier (if provided). The idea is get the value of variance first:
119 * the average of the squares less the square of the average a standard
120 * deviation is square root of variance.
121 * @param ci
122 * @param scan
123 * @return STD
124 * @throws IOException
125 */
126 <T, S> Pair<List<S>, Long> getStd(ColumnInterpreter<T, S> ci, Scan scan)
127 throws IOException;
128
129 }