Skip to content

Latest commit

 

History

History
239 lines (209 loc) · 7.77 KB

sql-task.org

File metadata and controls

239 lines (209 loc) · 7.77 KB

definition

MySQL

tables:

  1. clients_table
    • client_id
    • gender
  2. loans_table (docs)
    • loan_id
    • client_id
    • loan_date

Task to select count of first, second, thirst counts of loans in october of 2022

count of first docscount of second docs
in october of 2022in october of 2022
male?
female?
-- DROP TABLE clients_table;

CREATE TABLE clients_table (
client_id INTEGER PRIMARY KEY,
gender VARCHAR(10)
);

INSERT INTO clients_table VALUES(NULL, 'male');
INSERT INTO clients_table VALUES(NULL, 'male');
INSERT INTO clients_table VALUES(NULL, 'female');
INSERT INTO clients_table VALUES(NULL, 'male');
INSERT INTO clients_table VALUES(NULL, 'female');

SELECT * FROM clients_table;
client_idgender
1male
2male
3female
4male
5female
-- DROP TABLE loans_table;

CREATE TABLE loans_table (
loan_id INTEGER PRIMARY KEY,
client_id INTEGER,
loan_date DATE
);

INSERT INTO loans_table VALUES(NULL, 1, '2022-10-01');
INSERT INTO loans_table VALUES(NULL, 2, '2022-10-02');
INSERT INTO loans_table VALUES(NULL, 2, '2022-10-09');
INSERT INTO loans_table VALUES(NULL, 2, '2022-10-11');
INSERT INTO loans_table VALUES(NULL, 2, '2022-10-12');
INSERT INTO loans_table VALUES(NULL, 2, '2022-10-13');
INSERT INTO loans_table VALUES(NULL, 3, '2022-10-02');
INSERT INTO loans_table VALUES(NULL, 3, '2022-10-03');
INSERT INTO loans_table VALUES(NULL, 3, '2022-10-04');
INSERT INTO loans_table VALUES(NULL, 4, '2022-10-04');
INSERT INTO loans_table VALUES(NULL, 4, '2022-10-08');
INSERT INTO loans_table VALUES(NULL, 5, '2023-10-08');

SELECT * FROM loans_table;
loan_idclient_idloan_date
112022-10-01
222022-10-02
322022-10-09
422022-10-11
522022-10-12
622022-10-13
732022-10-02
832022-10-03
932022-10-04
1042022-10-04
1142022-10-08
1252023-10-08
SELECT * from loans_table as l
LEFT JOIN clients_table as c ON l.client_id = c.client_id
WHERE l.loan_date BETWEEN '2022-10-01' AND '2022-11-01'
;
-- select NULL;

loan_idclient_idloan_dateclient_idgender
112022-10-011male
222022-10-022male
322022-10-092male
422022-10-112male
522022-10-122male
622022-10-132male
732022-10-023female
832022-10-033female
932022-10-043female
1042022-10-044male
1142022-10-084male

Solution 1 “CASE WHEN”

SELECT c.gender, COUNT(l.client_id) lc, l.client_id from loans_table as l
LEFT JOIN clients_table as c ON l.client_id = c.client_id
WHERE l.loan_date BETWEEN '2022-10-01' AND '2022-11-01'
GROUP BY gender, l.client_id;
select NULL;
genderlcclient_id
female33
male11
male52
male24
NULL
select fff.gender,
SUM(case when lc > 0 then 1 else 0 end) c_first_202210,
SUM(case when lc > 1 then 1 else 0 end) c_second_202210,
SUM(case when lc > 2 then 1 else 0 end) c_third_202210,
SUM(case when lc > 3 then 1 else 0 end) c_forth_202210
from
( SELECT c.gender, COUNT(l.client_id) lc, l.client_id from loans_table as l
LEFT JOIN clients_table as c ON l.client_id = c.client_id
WHERE l.loan_date BETWEEN '2022-10-01' AND '2022-11-01'
GROUP BY gender, l.client_id) as fff
group by gender;
genderc_first_202210c_second_202210c_third_202210c_forth_202210
female1110
male3211

Solution 2 “CTE and subquery”

WITH RECURSIVE  cte_pre AS (
SELECT * from loans_table as l
LEFT JOIN clients_table as c ON l.client_id = c.client_id
WHERE l.loan_date BETWEEN '2022-10-01' AND '2022-11-01'
), cte_first AS (
  SELECT gender, COUNT(*) cc FROM (
    SELECT COUNT(*) fc, gender from cte_pre
    GROUP BY client_id
  --HAVING fc >=1
  )
  GROUP BY gender

), cte_second AS (
  SELECT gender, COUNT(*) cc FROM (
    SELECT COUNT(*) fc, gender from cte_pre
    GROUP BY client_id
    HAVING fc >=2
  )
  GROUP BY gender

), cte_third AS (
  SELECT gender, COUNT(*) cc FROM (
    SELECT COUNT(*) fc, gender from cte_pre
    GROUP BY client_id
    HAVING fc >=3
  )
  GROUP BY gender

)
select cf1.gender, cf1.cc c_first_202210, cf2.cc c_second_202210, cf3.cc c_third_202210 from cte_first cf1
JOIN cte_second cf2 ON cf1.gender = cf2.gender
JOIN cte_third cf3 ON cf1.gender = cf3.gender

;
genderc_first_202210c_second_202210c_third_202210
female111
male321

Solution 3 Python

import pandas as pd
import sqlite3
con = sqlite3.connect("/tmp/test-sqlite.db")
cur = con.cursor()
res = cur.execute("""SELECT * from loans_table as l
LEFT JOIN clients_table as c ON l.client_id = c.client_id
WHERE l.loan_date BETWEEN '2022-10-01' AND '2022-11-01'
;""")
# print(cur.description())
# print(len(cur))
a = res.fetchall()
field_names = [x[0] for x in cur.description]
# print(field_names)

df = pd.DataFrame(a, columns = ['loan_id', 'client_id1', 'loan_date', 'client_id2', 'gender'])
# print(df)
# print()
# for x in df:
#     first = 0
v = df.groupby(['gender', 'client_id1'],as_index=False).count()
# v.groupby('gender').
male = v[v['gender'] == 'male']
female = v[v['gender'] == 'female']

res_male = []
res_female = []
for i, c in enumerate(['first', 'second', 'third']):
    rm = (v[v['gender'] == 'male']['loan_id'] >= i).sum()
    rf = (v[v['gender'] == 'female']['loan_id'] >= i).sum()
    res_male.append(rm)
    res_female.append(rf)


print('female', res_female)
print('male', res_male)
female [1, 1, 1]
male [3, 3, 2]