データベースからグループごとに最大値であったレコードを取得したい
データベースからグループごとに最大値であったレコードを取得したい。最大値そのものではなくて、あるカラムが最大値になっているレコードすべてを取得。
学校のテストのスコアを管理するDBを想定してみる。
まず生徒の情報を保存するテーブルを作る。
create table student ( student_id integer primary key, class integer, name varchar(128) );
次に、科目の情報を管理するテーブルを作る
create table subject ( subject_id integer primary key, name varchar(32) );
そして、各生徒の科目ごとの得点を管理するテーブルを作る
create table exam ( id integer primary key, student_id integer, subject_id integer, score integer );
テーブルの準備ができたので、テストの結果を挿入。
insert into student values(0,1,'佐藤'); insert into student values(1,2,'鈴木'); insert into student values(2,3,'高橋'); insert into student values(3,1,'田中'); insert into student values(4,2,'伊藤'); insert into student values(5,3,'渡辺'); insert into subject values(0, '国語'); insert into subject values(1, '算数'); insert into exam values(0, 0, 0, 80); insert into exam values(1, 0, 1, 70); insert into exam values(2, 1, 0, 50); insert into exam values(3, 1, 1, 40); insert into exam values(4, 2, 0, 20); insert into exam values(5, 2, 1, 90); insert into exam values(6, 3, 0, 50); insert into exam values(7, 3, 1, 76); insert into exam values(8, 4, 0, 39); insert into exam values(9, 4, 1, 75); insert into exam values(10, 5, 0, 19); insert into exam values(11, 5, 1, 100);
わかりやすいように一旦全部が載っている表にして、この後の実行結果と見比べることにする。
select subject.subject_id, subject.name as subject_name, student.student_id, student.class, student.name as student_name, score, id from exam left join subject on exam.subject_id = subject.subject_id left join student on exam.student_id = student.student_id order by subject.name, student.class, student.name;
各科目の最高得点をとってみる
select subject.name, max(score) from exam join subject on exam.subject_id = subject.subject_id group by subject.name;
クラスごとの最高得点
select student.class, subject.name, max(score) from exam join student on exam.student_id = student.student_id join subject on exam.subject_id = subject.subject_id group by student.class, subject.name order by student.class, subject.name;
では誰がこの最高得点だったのか?
select l.class, r.subject_name, max_score, r.name from ( select student.class, subject.subject_id, max(score) as max_score from exam join student on exam.student_id = student.student_id join subject on exam.subject_id = subject.subject_id group by student.class, subject.subject_id, subject.name) as l left join ( select student.class, subject.name as subject_name, subject.subject_id, score, student.name from exam join student on exam.student_id = student.student_id join subject on exam.subject_id = subject.subject_id ) as r on l.class = r.class and l.subject_id = r.subject_id and l.max_score = r.score order by l.class, r.subject_name;
もっとシンプルに書けないものか。。。WITH句を使えば若干シンプル?
with t as ( select subject.subject_id, subject.name as subject_name, student.student_id, student.class, student.name as student_name, score from exam left join subject on exam.subject_id = subject.subject_id left join student on exam.student_id = student.student_id ) select l.class, t.subject_name, l.max_score, t.student_name from ( select class, subject_id, max(score) as max_score from t group by class, subject_id ) as l left join t on l.class = t.class and l.subject_id = t.subject_id and l.max_score = t.score order by l.class, t.subject_name;
もう少しスマートに、、最大値がある行を抽出する方法を工夫。
with t as ( select subject.name as subject_name, student.class, student.name as student_name, score from exam left join subject on exam.subject_id = subject.subject_id left join student on exam.student_id = student.student_id ) select class, subject_name, score, student_name from t where not exists ( select 1 from t as t0 where t.class = t0.class and t.subject_name = t0.subject_name and t.score < t0.score ) order by class, subject_name;