データベースからグループごとに最大値であったレコードを取得したい

データベースからグループごとに最大値であったレコードを取得したい。最大値そのものではなくて、あるカラムが最大値になっているレコードすべてを取得。

学校のテストのスコアを管理するDBを想定してみる。

まず生徒の情報を保存するテーブルを作る。

create table student (
    student_id integer primary key,
    class integer,
    name varchar(128)
);

次に、科目の情報を管理するテーブルを作る

create table subject (
    subject_id integer primary key,
    name varchar(32)
);

そして、各生徒の科目ごとの得点を管理するテーブルを作る

create table exam (
    id integer primary key,
    student_id integer,
    subject_id integer,
    score integer
);

テーブルの準備ができたので、テストの結果を挿入。

insert into student values(0,1,'佐藤');
insert into student values(1,2,'鈴木');
insert into student values(2,3,'高橋');
insert into student values(3,1,'田中');
insert into student values(4,2,'伊藤');
insert into student values(5,3,'渡辺');

insert into subject values(0, '国語');
insert into subject values(1, '算数');

insert into exam values(0, 0, 0, 80);
insert into exam values(1, 0, 1, 70);
insert into exam values(2, 1, 0, 50);
insert into exam values(3, 1, 1, 40);
insert into exam values(4, 2, 0, 20);
insert into exam values(5, 2, 1, 90);
insert into exam values(6, 3, 0, 50);
insert into exam values(7, 3, 1, 76);
insert into exam values(8, 4, 0, 39);
insert into exam values(9, 4, 1, 75);
insert into exam values(10, 5, 0, 19);
insert into exam values(11, 5, 1, 100);

わかりやすいように一旦全部が載っている表にして、この後の実行結果と見比べることにする。

select
    subject.subject_id,
    subject.name as subject_name,
    student.student_id,
    student.class,
    student.name as student_name,
    score,
    id
from exam 
left join subject on exam.subject_id = subject.subject_id
left join student on exam.student_id = student.student_id
order by subject.name, student.class, student.name;

各科目の最高得点をとってみる

select subject.name, max(score)
from exam 
join subject on exam.subject_id = subject.subject_id
group by subject.name;

クラスごとの最高得点

select student.class, subject.name, max(score)
from exam 
join student on exam.student_id = student.student_id
join subject on exam.subject_id = subject.subject_id
group by student.class, subject.name
order by student.class, subject.name;

では誰がこの最高得点だったのか?

select l.class, r.subject_name, max_score, r.name
from (
    select
        student.class,
        subject.subject_id,
        max(score) as max_score
    from exam 
    join student on exam.student_id = student.student_id
    join subject on exam.subject_id = subject.subject_id
    group by
        student.class,
        subject.subject_id,
        subject.name) as l
left join (
    select
        student.class,
        subject.name as subject_name,
        subject.subject_id,
        score,
        student.name
    from exam 
    join student on exam.student_id = student.student_id
    join subject on exam.subject_id = subject.subject_id
) as r
    on l.class = r.class
    and l.subject_id = r.subject_id
    and l.max_score = r.score 
order by l.class, r.subject_name;

もっとシンプルに書けないものか。。。WITH句を使えば若干シンプル?

with t as (
    select
        subject.subject_id,
        subject.name as subject_name,
        student.student_id,
        student.class,
        student.name as student_name,
        score
    from exam 
    left join subject on exam.subject_id = subject.subject_id
    left join student on exam.student_id = student.student_id
)
select
    l.class,
    t.subject_name,
    l.max_score,
    t.student_name
from (
    select
        class,
        subject_id,
        max(score) as max_score
    from t
    group by class, subject_id
) as l
left join t on l.class = t.class
    and l.subject_id = t.subject_id
    and l.max_score = t.score 
order by l.class, t.subject_name;

もう少しスマートに、、最大値がある行を抽出する方法を工夫。

with t as (
    select
        subject.name as subject_name,
        student.class,
        student.name as student_name,
        score
    from exam 
    left join subject on exam.subject_id = subject.subject_id
    left join student on exam.student_id = student.student_id
)
select
    class,
    subject_name,
    score,
    student_name
from t
where not exists (
    select 1 from t as t0
    where t.class = t0.class
        and t.subject_name = t0.subject_name
        and t.score < t0.score
)
order by class, subject_name;