Aggregate functions
Aggregate functions are functions that combine values from multiple rows into one.
Aggregate functions differ from scalar functions and window functions in that they change the cardinality of the result. That is why they can be used in queries only in expressions SELECT
and HAVING
.
DISTINCT
expression in aggregate functions
When the expression DISTINCT
is used, only unique values are considered when calculating the value of an aggregate function. Often this expression is used in conjunction with an aggregate function count()
to get the number of unique items, but it can be used with other aggregate functions as well.
Example
CREATE TABLE cities(city_name VARCHAR);
INSERT INTO cities VALUES
('Moscow'),
('Moscow'),
('Paris'),
('Madrid');
SELECT
count(DISTINCT city_name) AS distinct_cities_num,
count(city_name) AS cities_num
FROM cities;
+---------------------+------------+
| distinct_cities_num | cities_num |
+---------------------+------------+
| 3 | 4 |
+---------------------+------------+
any_value()
Description |
Returns the first value from |
Usage |
|
This function is affected by the sort order in the table. |
See example
CREATE TABLE numbers(number_asc BIGINT, number_desc BIGINT);
INSERT INTO numbers VALUES
(NULL, 3),
(1, 2),
(2, 1),
(3, NULL);
SELECT
any_value(number_asc) AS "any value from number asc",
any_value(number_desc) AS "any value from number desc"
FROM numbers;
+---------------------------+----------------------------+
| any value from number asc | any value from number desc |
+---------------------------+----------------------------+
| 1 | 3 |
+---------------------------+----------------------------+
array_agg()
Description |
Returns a list containing all the values of a column. |
Usage |
|
Aliases |
|
This function is affected by the sort order in the table. |
See example
CREATE TABLE numbers(number BIGINT);
INSERT INTO numbers VALUES
(1),
(2),
(3),
(NULL);
SELECT
array_agg(number) AS array_agg,
list(number) AS list
FROM numbers;
+--------------+--------------+
| array_agg | list |
+--------------+--------------+
| {1,2,3,None} | {1,2,3,None} |
+--------------+--------------+
avg()
Description |
Calculates the average of all non-empty values in |
Usage |
|
Aliases |
|
See example
CREATE TABLE numbers(number BIGINT);
INSERT INTO numbers VALUES
(1),
(2),
(3),
(NULL);
SELECT
avg(number) AS average,
mean(number) AS mean
FROM numbers;
+---------+------+
| average | mean |
+---------+------+
| 2 | 2 |
+---------+------+
count()
Description |
Calculates the number of rows in the group. |
Usage |
|
Aliases |
|
See example
CREATE TABLE numbers(number BIGINT);
INSERT INTO numbers VALUES
(1),
(2),
(3),
(NULL);
SELECT
count() AS rows_count,
count(*) AS rows_count_star
FROM numbers;
+------------+-----------------+
| rows_count | rows_count_star |
+------------+-----------------+
| 4 | 4 |
+------------+-----------------+
count(argument)
Description |
Calculates the number of non-empty values in |
Usage |
|
See example
CREATE TABLE numbers(number BIGINT);
INSERT INTO numbers VALUES
(1),
(2),
(3),
(NULL);
SELECT
count() AS rows_count,
count(number) AS values_count
FROM numbers;
+------------+--------------+
| rows_count | values_count |
+------------+--------------+
| 4 | 3 |
+------------+--------------+
count_if()
Description |
Returns the number of records that satisfy the condition, or |
Usage |
|
See example
CREATE TABLE text_table(text_data VARCHAR);
INSERT INTO text_table VALUES
('Tengri'),
('Tengri'),
('TNGRi'),
(NULL);
SELECT
COUNT_IF(TRUE) AS row_number,
COUNT_IF(text_data = 'Tengri') AS tengri_number
FROM text_table;
+------------+---------------+
| row_number | tengri_number |
+------------+---------------+
| 4 | 2 |
+------------+---------------+
max()
Description |
Returns the maximum value available in |
Usage |
|
See example
CREATE TABLE numbers(number BIGINT);
INSERT INTO numbers VALUES
(1),
(2),
(3),
(NULL);
SELECT
max(number) AS max,
min(number) AS min
FROM numbers;
+-----+-----+
| max | min |
+-----+-----+
| 3 | 1 |
+-----+-----+
min()
Description |
Returns the minimum value present in |
Usage |
|
See example
CREATE TABLE numbers(number BIGINT);
INSERT INTO numbers VALUES
(1),
(2),
(3),
(NULL);
SELECT
max(number) AS max,
min(number) AS min
FROM numbers;
+-----+-----+
| max | min |
+-----+-----+
| 3 | 1 |
+-----+-----+
median()
Description |
Returns the median value of all non-empty values in |
Usage |
|
In case of an even number of values, the average between the two centre values is taken.
See example
Let’s show the difference between the median value and the mean value (avg()
) on examples of even (_even
) and odd (_odd
) number sets containing empty values.
CREATE TABLE numbers(
number_even BIGINT,
number_odd BIGINT);
INSERT INTO numbers VALUES
(1, 1),
(2, 2),
(3, 10),
(10, NULL),
(NULL, NULL);
SELECT
median(number_even) AS median_even,
avg(number_even) AS avg_even,
median(number_odd) AS median_odd,
avg(number_odd) AS avg_odd
FROM numbers;
+-------------+----------+------------+-------------------+
| median_even | avg_even | median_odd | avg_odd |
+-------------+----------+------------+-------------------+
| 2.5 | 4 | 2 | 4.333333333333333 |
+-------------+----------+------------+-------------------+
sum()
Description |
Calculates the sum of all non-empty values in |
Usage |
|
In case of boolean values, counts the number of True
values.
See example
CREATE TABLE numbers(
number BIGINT,
boolean BOOL);
INSERT INTO numbers VALUES
(1, True),
(2, False),
(3, False),
(NULL, NULL);
SELECT
sum(number) AS sum_number,
sum(boolean) AS sum_boolean
FROM numbers;
+------------+-------------+
| sum_number | sum_boolean |
+------------+-------------+
| 6 | 1 |
+------------+-------------+