Aggregate functions

Aggregate functions are functions that combine values from multiple rows into one.

Aggregate functions differ from scalar functions and window functions in that they change the cardinality of the result. That is why they can be used in queries only in expressions SELECT and HAVING.

DISTINCT expression in aggregate functions

When the expression DISTINCT is used, only unique values are considered when calculating the value of an aggregate function. Often this expression is used in conjunction with an aggregate function count() to get the number of unique items, but it can be used with other aggregate functions as well.

Example

CREATE TABLE cities(city_name VARCHAR);

INSERT INTO cities VALUES
    ('Moscow'),
    ('Moscow'),
    ('Paris'),
    ('Madrid');

SELECT
    count(DISTINCT city_name) AS distinct_cities_num,
    count(city_name) AS cities_num
FROM cities;
+---------------------+------------+
| distinct_cities_num | cities_num |
+---------------------+------------+
| 3                   | 4          |
+---------------------+------------+

Some aggregate functions are insensitive to repeated values (e.g, min() и max()), and for them the DISTINCT expression is ignored.

any_value()

Description

Returns the first value from argument other than NULL.

Usage

any_value(argument)

This function is affected by the sort order in the table.
See example
CREATE TABLE numbers(number_asc BIGINT, number_desc BIGINT);

INSERT INTO numbers VALUES
    (NULL,  3),
    (1,     2),
    (2,     1),
    (3,     NULL);

SELECT
    any_value(number_asc) AS "any value from number asc",
    any_value(number_desc) AS "any value from number desc"
FROM numbers;
+---------------------------+----------------------------+
| any value from number asc | any value from number desc |
+---------------------------+----------------------------+
| 1                         | 3                          |
+---------------------------+----------------------------+

array_agg()

Description

Returns a list containing all the values of a column.

Usage

array_agg(argument)

Aliases

list()

This function is affected by the sort order in the table.
See example
CREATE TABLE numbers(number BIGINT);

INSERT INTO numbers VALUES
    (1),
    (2),
    (3),
    (NULL);

SELECT
    array_agg(number) AS array_agg,
    list(number) AS list
FROM numbers;
+--------------+--------------+
|   array_agg  |     list     |
+--------------+--------------+
| {1,2,3,None} | {1,2,3,None} |
+--------------+--------------+

avg()

Description

Calculates the average of all non-empty values in argument.

Usage

avg(argument)

Aliases

mean()

See example
CREATE TABLE numbers(number BIGINT);

INSERT INTO numbers VALUES
    (1),
    (2),
    (3),
    (NULL);

SELECT
    avg(number) AS average,
    mean(number) AS mean
FROM numbers;
+---------+------+
| average | mean |
+---------+------+
| 2       | 2    |
+---------+------+

count()

Description

Calculates the number of rows in the group.

Usage

count()

Aliases

count(*)

See example
CREATE TABLE numbers(number BIGINT);

INSERT INTO numbers VALUES
    (1),
    (2),
    (3),
    (NULL);

SELECT
    count() AS rows_count,
    count(*) AS rows_count_star
FROM numbers;
+------------+-----------------+
| rows_count | rows_count_star |
+------------+-----------------+
| 4          | 4               |
+------------+-----------------+

count(argument)

Description

Calculates the number of non-empty values in argument.

Usage

count(argument)

See example
CREATE TABLE numbers(number BIGINT);

INSERT INTO numbers VALUES
    (1),
    (2),
    (3),
    (NULL);

SELECT
    count() AS rows_count,
    count(number) AS values_count
FROM numbers;
+------------+--------------+
| rows_count | values_count |
+------------+--------------+
| 4          | 3            |
+------------+--------------+

count_if()

Description

Returns the number of records that satisfy the condition, or NULL if no records satisfy the condition.

Usage

count_if(<condition>)

See example
CREATE TABLE text_table(text_data VARCHAR);

INSERT INTO text_table VALUES
('Tengri'),
('Tengri'),
('TNGRi'),
(NULL);

SELECT
    COUNT_IF(TRUE) AS row_number,
    COUNT_IF(text_data = 'Tengri') AS tengri_number
FROM text_table;
+------------+---------------+
| row_number | tengri_number |
+------------+---------------+
| 4          | 2             |
+------------+---------------+

max()

Description

Returns the maximum value available in argument.

Usage

max(argument)

See example
CREATE TABLE numbers(number BIGINT);

INSERT INTO numbers VALUES
    (1),
    (2),
    (3),
    (NULL);

SELECT
    max(number) AS max,
    min(number) AS min
FROM numbers;
+-----+-----+
| max | min |
+-----+-----+
| 3   | 1   |
+-----+-----+

min()

Description

Returns the minimum value present in argument.

Usage

min(argument)

See example
CREATE TABLE numbers(number BIGINT);

INSERT INTO numbers VALUES
    (1),
    (2),
    (3),
    (NULL);

SELECT
    max(number) AS max,
    min(number) AS min
FROM numbers;
+-----+-----+
| max | min |
+-----+-----+
| 3   | 1   |
+-----+-----+

median()

Description

Returns the median value of all non-empty values in argument.

Usage

median(argument)

In case of an even number of values, the average between the two centre values is taken.

See example

Let’s show the difference between the median value and the mean value (avg()) on examples of even (_even) and odd (_odd) number sets containing empty values.

CREATE TABLE numbers(
    number_even BIGINT,
    number_odd BIGINT);

INSERT INTO numbers VALUES
    (1,    1),
    (2,    2),
    (3,    10),
    (10,   NULL),
    (NULL, NULL);

SELECT
    median(number_even) AS median_even,
    avg(number_even) AS avg_even,
    median(number_odd) AS median_odd,
    avg(number_odd) AS avg_odd
FROM numbers;
+-------------+----------+------------+-------------------+
| median_even | avg_even | median_odd |      avg_odd      |
+-------------+----------+------------+-------------------+
| 2.5         | 4        | 2          | 4.333333333333333 |
+-------------+----------+------------+-------------------+

sum()

Description

Calculates the sum of all non-empty values in argument.

Usage

sum(argument)

In case of boolean values, counts the number of True values.

See example
CREATE TABLE numbers(
    number BIGINT,
    boolean BOOL);

INSERT INTO numbers VALUES
    (1,    True),
    (2,    False),
    (3,    False),
    (NULL, NULL);

SELECT
    sum(number) AS sum_number,
    sum(boolean) AS sum_boolean
FROM numbers;
+------------+-------------+
| sum_number | sum_boolean |
+------------+-------------+
| 6          | 1           |
+------------+-------------+