Aggregating by bands of values can clarify data patterns. The banding trick is a simple transformation on the banding column.
-- ---------------------------------------------------------------------------
-- Aggregates from bands of values
-- from the Artful Common Queries page
-- http://www.artfulsoftware.com/infotree/qrytip.php?id=61
-- ---------------------------------------------------------------------------
-- Aggregating by bands of values can clarify data patterns. The banding
-- trick is a simple transformation on the banding column:
-- <band width> * Floor( <banding column> / <band width> )
-- so to count and average scores in bands of 10, ie 0-9,10-19 and so on ...
create table scores(score int);
insert into scores values(5),(15),(25),(35);
SELECT 10 * FLOOR( score / 10 ) AS Bottom,
10 * FLOOR( score / 10 ) + 9 AS Top,
Count( score ),
Avg( score )
FROM scores
GROUP BY 10 * FLOOR( score / 10 );
-- +--------+------+----------------+--------------+
-- | Bottom | Top | Count( score ) | Avg( score ) |
-- +--------+------+----------------+--------------+
-- | 0 | 9 | 1 | 5.0000 |
-- | 10 | 19 | 1 | 15.0000 |
-- | 20 | 29 | 1 | 25.0000 |
-- | 30 | 39 | 1 | 35.0000 |
-- +--------+------+----------------+--------------+
-- Here's another example, from the world database, grouping UK cities in
-- bands of 100,000:
select
floor(population/100000)*100000 as 'Population Band',
group_concat(name) as Cities
from world.city
where countrycode='GBR'
group by 1
order by 1 desc limit 5
-- +-----------------+--------------------------------------------------------+
-- | Population Band | Cities |
-- +-----------------+--------------------------------------------------------+
-- | 7200000 | London |
-- | 1000000 | Birmingham |
-- | 600000 | Glasgow |
-- | 400000 | Bristol,Leeds,Manchester,Sheffield,Edinburgh,Liverpool |
-- | 300000 | Cardiff,Coventry |
-- +-----------------+--------------------------------------------------------+
-- Here is an example from a MySQL forum. You have a table of IDs and point
-- scores, and you wish to show the distribution of counts across ranges.
-- A traditional way of doing this is to build a table of ranges, then join
-- from ranges to scores:
drop table if exists points,ranges;
create table points (
id int not null auto_increment primary key,
points int not null
);
create table ranges (
low int not null,
high int not null
);
insert into points (points) values
(456),(401),(543),(234),(303),(521),(478),(643),(575),(456),(432),
(312),(564),(423),(411),(395),(543);
insert into ranges (low, high) values
(0,50),(51,100),(101,150),(151,200),(201,250),(251,300),(301,350),
(351,400),(401,450),(451,500),(501,550),(551,600),(601,650),(651,700);
select r.low, r.high, count(p.id)
from ranges r
left join points p on p.points between r.low and r.high
group by r.low, r.high;
-- That's fine, but the ranges should be consistent, eg 0-49,50-99,100-149...,
-- and if you keep a simple utility table of ints 0..9, you never need to
-- build another range table:
drop table if exists ints,ranges;
create table ints(i tinyint);
insert into ints values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
select ranges.low, ranges.high, count(p.id)
from (
select ( t.i*100 + u.i*10 + v.i ) as low,( t.i*100 + u.i*10 + v.i ) + 49 as high
from ints t
join ints u
join ints v
where (t.i*100 + u.i*10 + v.i) mod 50 = 0 and (t.i*100 + u.i*10 + v.i) < 701
) ranges
left join points p on p.points between ranges.low and ranges.high
group by ranges.low;
-- +-----+------+-------------+
-- | low | high | count(p.id) |
-- +-----+------+-------------+
-- | 0 | 50 | 0 |
-- | 51 | 100 | 0 |
-- | 101 | 150 | 0 |
-- | 151 | 200 | 0 |
-- | 201 | 250 | 1 |
-- | 251 | 300 | 0 |
-- | 301 | 350 | 2 |
-- | 351 | 400 | 1 |
-- | 401 | 450 | 4 |
-- | 451 | 500 | 3 |
-- | 501 | 550 | 3 |
-- | 551 | 600 | 2 |
-- | 601 | 650 | 1 |
-- | 651 | 700 | 0 |
-- +-----+------+-------------+
-- Google Visualisation tools don't include a histogram widget, but they do
-- have a line chart widget, so if you pass banding query results like the
-- above to the Google Visualisation Line Chart widget, eg with TheUsual, you
-- get a line chart representation of the histogram.