ORM 등의 도움을 받다보니 SQL쿼리 중 join 부분이 헷갈려서 khan-academy의 해당 과정을 들어보고 정리했다. 예전에도 배웠었는데, 다시 상기시키는 좋은 기회가 되었다. 조인이 성능에 문제가 있다고들 하는데, 플랜을 돌려보면서 쿼리 최적화를 해주어야 성능이 잘 나온다.
SQL 기본
-- 전체 선택
SELECT * FROM customers;
-- 조건으로 필터링
SELECT * FROM customers WHERE age > 21;
-- 다중 조건으로 필터링
SELECT * FROM customers WHERE age < 21 AND state = "NY";
-- IN으로 필터링
SELECT * FROM customers WHERE plan IN ("free", "basic");
-- 특정 열 선택
SELECT name, age FROM customers;
-- 결과 정렬
SELECT * FROM customers WHERE age > 21 ORDER BY age DESC;
-- CASE로 변환
SELECT name, CASE WHEN age > 18 THEN "adult" ELSE "minor" END "type" FROM customers;
-- 데이터 집계
-- 집계 함수
SELECT MAX(age) FROM customers;
-- 데이터 그룹화
SELECT gender, COUNT(*) FROM students GROUP BY gender;
-- 관련 테이블에 조인
-- Inner join
SELECT customers.name, orders.item FROM customers JOIN orders ON customers.id = orders.customer_id;
-- Outer join
SELECT customers.name, orders.item FROM customers LEFT OUTER JOIN orders ON customers.id = orders.customer_id;
-- 데이터 업데이트 및 삭제
-- 데이터 업데이트
UPDATE customers SET age = 33 WHERE id = 73;
-- 데이터 삭제
DELETE FROM customers WHERE id = 73;
SQL 쿼리하기
CREATE TABLE songs (
id INTEGER PRIMARY KEY,
title TEXT,
artist TEXT,
mood TEXT,
duration INTEGER,
released INTEGER);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("Bohemian Rhapsody", "Queen", "epic", 60, 1975);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("Let it go", "Idina Menzel", "epic", 227, 2013);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("I will survive", "Gloria Gaynor", "epic", 198, 1978);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("Twist and Shout", "The Beatles", "happy", 152, 1963);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("La Bamba", "Ritchie Valens", "happy", 166, 1958);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("I will always love you", "Whitney Houston", "epic", 273, 1992);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("Sweet Caroline", "Neil Diamond", "happy", 201, 1969);
INSERT INTO songs (title, artist, mood, duration, released)
VALUES ("Call me maybe", "Carly Rae Jepsen", "happy", 193, 2011);
select title from songs;
select title from songs where mood = 'epic' or released > 1990;
select title from songs where mood = 'epic' and released > 1990 and duration < 240;
IN 서브쿼리
CREATE TABLE artists (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT,
country TEXT,
genre TEXT);
INSERT INTO artists (name, country, genre)
VALUES ("Taylor Swift", "US", "Pop");
INSERT INTO artists (name, country, genre)
VALUES ("Led Zeppelin", "US", "Hard rock");
INSERT INTO artists (name, country, genre)
VALUES ("ABBA", "Sweden", "Disco");
INSERT INTO artists (name, country, genre)
VALUES ("Queen", "UK", "Rock");
INSERT INTO artists (name, country, genre)
VALUES ("Celine Dion", "Canada", "Pop");
INSERT INTO artists (name, country, genre)
VALUES ("Meatloaf", "US", "Hard rock");
INSERT INTO artists (name, country, genre)
VALUES ("Garth Brooks", "US", "Country");
INSERT INTO artists (name, country, genre)
VALUES ("Shania Twain", "Canada", "Country");
INSERT INTO artists (name, country, genre)
VALUES ("Rihanna", "US", "Pop");
INSERT INTO artists (name, country, genre)
VALUES ("Guns N' Roses", "US", "Hard rock");
INSERT INTO artists (name, country, genre)
VALUES ("Gloria Estefan", "US", "Pop");
INSERT INTO artists (name, country, genre)
VALUES ("Bob Marley", "Jamaica", "Reggae");
CREATE TABLE songs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
artist TEXT,
title TEXT);
INSERT INTO songs (artist, title)
VALUES ("Taylor Swift", "Shake it off");
INSERT INTO songs (artist, title)
VALUES ("Rihanna", "Stay");
INSERT INTO songs (artist, title)
VALUES ("Celine Dion", "My heart will go on");
INSERT INTO songs (artist, title)
VALUES ("Celine Dion", "A new day has come");
INSERT INTO songs (artist, title)
VALUES ("Shania Twain", "Party for two");
INSERT INTO songs (artist, title)
VALUES ("Gloria Estefan", "Conga");
INSERT INTO songs (artist, title)
VALUES ("Led Zeppelin", "Stairway to heaven");
INSERT INTO songs (artist, title)
VALUES ("ABBA", "Mamma mia");
INSERT INTO songs (artist, title)
VALUES ("Queen", "Bicycle Race");
INSERT INTO songs (artist, title)
VALUES ("Queen", "Bohemian Rhapsody");
INSERT INTO songs (artist, title)
VALUES ("Guns N' Roses", "Don't cry");
SELECT title FROM songs WHERE artist = 'Queen';
SELECT name FROM artists WHERE genre = 'Pop';
SELECT title FROM songs where artist IN(
SELECT name FROM artists WHERE genre = 'Pop'
);
Having
CREATE TABLE books (
id INTEGER PRIMARY KEY AUTOINCREMENT,
author TEXT,
title TEXT,
words INTEGER);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Philosopher's Stone", 79944);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Chamber of Secrets", 85141);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Prisoner of Azkaban", 107253);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Goblet of Fire", 190637);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Order of the Phoenix", 257045);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Half-Blood Prince", 168923);
INSERT INTO books (author, title, words)
VALUES ("J.K. Rowling", "Harry Potter and the Deathly Hallows", 197651);
INSERT INTO books (author, title, words)
VALUES ("Stephenie Meyer", "Twilight", 118501);
INSERT INTO books (author, title, words)
VALUES ("Stephenie Meyer", "New Moon", 132807);
INSERT INTO books (author, title, words)
VALUES ("Stephenie Meyer", "Eclipse", 147930);
INSERT INTO books (author, title, words)
VALUES ("Stephenie Meyer", "Breaking Dawn", 192196);
INSERT INTO books (author, title, words)
VALUES ("J.R.R. Tolkien", "The Hobbit", 95022);
INSERT INTO books (author, title, words)
VALUES ("J.R.R. Tolkien", "Fellowship of the Ring", 177227);
INSERT INTO books (author, title, words)
VALUES ("J.R.R. Tolkien", "Two Towers", 143436);
INSERT INTO books (author, title, words)
VALUES ("J.R.R. Tolkien", "Return of the King", 134462);
-- 여태까지 발행한 책의 총 단어 합이 100만 이상인 작가를 구하시오
SELECT author, SUM(words) as total_words FROM books
GROUP BY author
HAVING total_words >= 1000000;
-- 여태까지 발행한 책의 평균 단어수의 합이 15만 이상인 작가를 구하시오
SELECT author, AVG(words) as avg_words FROM books
GROUP BY author
HAVING avg_words >= 150000;
CASE
CREATE TABLE student_grades (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT,
number_grade INTEGER,
fraction_completed REAL);
INSERT INTO student_grades (name, number_grade, fraction_completed)
VALUES ("Winston", 90, 0.805);
INSERT INTO student_grades (name, number_grade, fraction_completed)
VALUES ("Winnefer", 95, 0.901);
INSERT INTO student_grades (name, number_grade, fraction_completed)
VALUES ("Winsteen", 85, 0.906);
INSERT INTO student_grades (name, number_grade, fraction_completed)
VALUES ("Wincifer", 66, 0.7054);
INSERT INTO student_grades (name, number_grade, fraction_completed)
VALUES ("Winster", 76, 0.5013);
INSERT INTO student_grades (name, number_grade, fraction_completed)
VALUES ("Winstonia", 82, 0.9045);
-- 100분율 계산하기
SELECT name, number_grade, ROUND(fraction_completed * 100) AS percent_completed
FROM student_grades;
-- 성적순으로 A,B,C,F 학점을 구하고, 각 학점에 해당하는 학생수를 구하시오.
SELECT
CASE
WHEN number_grade >= 90 THEN 'A'
WHEN number_grade >= 80 THEN 'B'
WHEN number_grade >= 70 THEN 'C'
ELSE 'F'
END as letter_grade,
count(*)
FROM student_grades
GROUP BY letter_grade;
JOIN
CREATE TABLE students (id INTEGER PRIMARY KEY,
first_name TEXT,
last_name TEXT,
email TEXT,
phone TEXT,
birthdate TEXT);
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Peter", "Rabbit", "peter@rabbit.com", "555-6666", "2002-06-24");
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Alice", "Wonderland", "alice@wonderland.com", "555-4444", "2002-07-04");
CREATE TABLE student_grades (id INTEGER PRIMARY KEY,
student_id INTEGER,
test TEXT,
grade INTEGER);
INSERT INTO student_grades (student_id, test, grade)
VALUES (1, "Nutrition", 95);
INSERT INTO student_grades (student_id, test, grade)
VALUES (2, "Nutrition", 92);
INSERT INTO student_grades (student_id, test, grade)
VALUES (1, "Chemistry", 85);
INSERT INTO student_grades (student_id, test, grade)
VALUES (2, "Chemistry", 95);
-- select all
SELECT * FROM student_grades;
/* cross join */
SELECT * FROM student_grades, students;
/* 암시적 inner join */
SELECT * FROM student_grades, students
WHERE student_grades.student_id = students.id;
/* 명시적 inner join */
SELECT
students.first_name,
students.last_name,
students.email,
student_grades.test,
student_grades.grade
FROM students
JOIN student_grades
ON students.id = student_grades.student_id
WHERE grade > 90;
outer join
CREATE TABLE students (id INTEGER PRIMARY KEY,
first_name TEXT,
last_name TEXT,
email TEXT,
phone TEXT,
birthdate TEXT);
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Peter", "Rabbit", "peter@rabbit.com", "555-6666", "2002-06-24");
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Alice", "Wonderland", "alice@wonderland.com", "555-4444", "2002-07-04");
CREATE TABLE student_grades (id INTEGER PRIMARY KEY,
student_id INTEGER,
test TEXT,
grade INTEGER);
INSERT INTO student_grades (student_id, test, grade)
VALUES (1, "Nutrition", 95);
INSERT INTO student_grades (student_id, test, grade)
VALUES (2, "Nutrition", 92);
INSERT INTO student_grades (student_id, test, grade)
VALUES (1, "Chemistry", 85);
INSERT INTO student_grades (student_id, test, grade)
VALUES (2, "Chemistry", 95);
CREATE TABLE student_projects (id INTEGER PRIMARY KEY,
student_id INTEGER,
title TEXT);
INSERT INTO student_projects (student_id, title)
VALUES (1, "Carrotapault");
/* join */
SELECT students.first_name, students.last_name, student_projects.title
FROM students
JOIN student_projects
ON students.id = student_projects.student_id;
-- Peter만 나오고 Alice는 안나온다. 그 이유는 student_projects에 Alice와 관련된 데이터가 없기 때문.
-- 이것은 합리적인 데이터 결과 이지만, 아직 프로젝트가 없는 학생도 보고 싶다면?
-- outer join을 사용한다.
/* outer join */
SELECT students.first_name, students.last_name, student_projects.title
FROM students
LEFT OUTER JOIN student_projects
ON students.id = student_projects.student_id;
-- 동작방식:
-- - FROM students 다음의 LEFT가 SQL에게 왼쪽 테이블에서 모든행을 가져오라고 한다.
-- - OUTER는 오른쪽 테이블인 student_projects에게 일치하는 항목이 없다고 해도 행을 유지해야 한다고 알려준다.
-- - 내부결합과 외부 결합의 차이를 명심해야 한다.
-- - RIGHT OUTER JOIN
-- - 오른쪽의 모든것을 유지한 채 왼쪽과 결합.
-- - 다만 테이블 참조 순서를 바꾸면 되기 때문에 꼭 필요하지는 않음.
-- - 모든 경우에 outer left join을 사용할 수 있다.
-- - FULL OUTER JOIN
-- - 할 수 있다면 왼쪽, 오른쪽 모두에서 일치하는 행을 찾고, 둘 다 못찾으면 NULL을 기록.
Self JOIN
CREATE TABLE students (id INTEGER PRIMARY KEY AUTOINCREMENT,
first_name TEXT,
last_name TEXT,
email TEXT,
phone TEXT,
birthdate TEXT,
buddy_id INTEGER);
INSERT INTO students
VALUES (1, "Peter", "Rabbit", "peter@rabbit.com", "555-6666", "2002-06-24", 2);
INSERT INTO students
VALUES (2, "Alice", "Wonderland", "alice@wonderland.com", "555-4444", "2002-07-04", 1);
INSERT INTO students
VALUES (3, "Aladdin", "Lampland", "aladdin@lampland.com", "555-3333", "2001-05-10", 4);
INSERT INTO students
VALUES (4, "Simba", "Kingston", "simba@kingston.com", "555-1111", "2001-12-24", 3);
SELECT id, first_name, last_name, buddy_id FROM students;
/* self join */
SELECT students.first_name, students.last_name, buddies.email as buddy_email
FROM students
JOIN students buddies
ON students.buddy_id = buddies.id;
Multiple JOIN
CREATE TABLE students (id INTEGER PRIMARY KEY,
first_name TEXT,
last_name TEXT,
email TEXT,
phone TEXT,
birthdate TEXT);
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Peter", "Rabbit", "peter@rabbit.com", "555-6666", "2002-06-24");
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Alice", "Wonderland", "alice@wonderland.com", "555-4444", "2002-07-04");
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Aladdin", "Lampland", "aladdin@lampland.com", "555-3333", "2001-05-10");
INSERT INTO students (first_name, last_name, email, phone, birthdate)
VALUES ("Simba", "Kingston", "simba@kingston.com", "555-1111", "2001-12-24");
CREATE TABLE student_projects (id INTEGER PRIMARY KEY,
student_id INTEGER,
title TEXT);
INSERT INTO student_projects (student_id, title)
VALUES (1, "Carrotapault");
INSERT INTO student_projects (student_id, title)
VALUES (2, "Mad Hattery");
INSERT INTO student_projects (student_id, title)
VALUES (3, "Carpet Physics");
INSERT INTO student_projects (student_id, title)
VALUES (4, "Hyena Habitats");
CREATE TABLE project_pairs (id INTEGER PRIMARY KEY,
project1_id INTEGER,
project2_id INTEGER);
INSERT INTO project_pairs (project1_id, project2_id)
VALUES(1, 2);
INSERT INTO project_pairs (project1_id, project2_id)
VALUES(3, 4);
SELECT a.title, b.title FROM project_pairs
JOIN student_projects a
ON project_pairs.project1_id = a.id
JOIN student_projects b
ON project_pairs.project2_id = b.id;