DataStudySquad
diff --git a/‎Hard/Cumulative Salary.sql
Lines changed: 64 additions & 0 deletions b/‎Hard/Cumulative Salary.sql
Lines changed: 64 additions & 0 deletions
diff --git a/‎Hard/Department top three salaries.sql
Lines changed: 49 additions & 0 deletions b/‎Hard/Department top three salaries.sql
Lines changed: 49 additions & 0 deletions
diff --git a/‎Hard/Human traffic of stadium.sql
Lines changed: 46 additions & 0 deletions b/‎Hard/Human traffic of stadium.sql
Lines changed: 46 additions & 0 deletions
diff --git a/‎Hard/Market Analysis 2.sql
Lines changed: 105 additions & 0 deletions b/‎Hard/Market Analysis 2.sql
Lines changed: 105 additions & 0 deletions
diff --git a/‎Hard/Median Employee Salary.sql
Lines changed: 44 additions & 0 deletions b/‎Hard/Median Employee Salary.sql
Lines changed: 44 additions & 0 deletions
@@ -0,0 +1,64 @@
+-- Question 102
+-- The Employee table holds the salary information in a year.
+
+-- Write a SQL to get the cumulative sum of an employee's salary over a period of 3 months but exclude the most recent month.
+
+-- The result should be displayed by 'Id' ascending, and then by 'Month' descending.
+
+-- Example
+-- Input
+
+-- | Id | Month | Salary |
+-- |----|-------|--------|
+-- | 1  | 1     | 20     |
+-- | 2  | 1     | 20     |
+-- | 1  | 2     | 30     |
+-- | 2  | 2     | 30     |
+-- | 3  | 2     | 40     |
+-- | 1  | 3     | 40     |
+-- | 3  | 3     | 60     |
+-- | 1  | 4     | 60     |
+-- | 3  | 4     | 70     |
+-- Output
+
+-- | Id | Month | Salary |
+-- |----|-------|--------|
+-- | 1  | 3     | 90     |
+-- | 1  | 2     | 50     |
+-- | 1  | 1     | 20     |
+-- | 2  | 1     | 20     |
+-- | 3  | 3     | 100    |
+-- | 3  | 2     | 40     |
+ 
+
+-- Explanation
+-- Employee '1' has 3 salary records for the following 3 months except the most recent month '4': salary 40 for month '3', 30 for month '2' and 20 for month '1'
+-- So the cumulative sum of salary of this employee over 3 months is 90(40+30+20), 50(30+20) and 20 respectively.
+
+-- | Id | Month | Salary |
+-- |----|-------|--------|
+-- | 1  | 3     | 90     |
+-- | 1  | 2     | 50     |
+-- | 1  | 1     | 20     |
+-- Employee '2' only has one salary record (month '1') except its most recent month '2'.
+-- | Id | Month | Salary |
+-- |----|-------|--------|
+-- | 2  | 1     | 20     |
+ 
+
+-- Employ '3' has two salary records except its most recent pay month '4': month '3' with 60 and month '2' with 40. So the cumulative salary is as following.
+-- | Id | Month | Salary |
+-- |----|-------|--------|
+-- | 3  | 3     | 100    |
+-- | 3  | 2     | 40     |
+
+-- Solution
+with t1 as(
+select *, max(month) over(partition by id) as recent_month
+from employee)
+
+select id, month, sum(salary) over(partition by id order by month rows between 2 preceding and current row) as salary
+from t1
+where month<recent_month
+order by 1, 2 desc
+
@@ -0,0 +1,49 @@
+-- Question 14
+-- The Employee table holds all employees. Every employee has an Id, and there is also a column for the department Id.
+
+-- +----+-------+--------+--------------+
+-- | Id | Name  | Salary | DepartmentId |
+-- +----+-------+--------+--------------+
+-- | 1  | Joe   | 85000  | 1            |
+-- | 2  | Henry | 80000  | 2            |
+-- | 3  | Sam   | 60000  | 2            |
+-- | 4  | Max   | 90000  | 1            |
+-- | 5  | Janet | 69000  | 1            |
+-- | 6  | Randy | 85000  | 1            |
+-- | 7  | Will  | 70000  | 1            |
+-- +----+-------+--------+--------------+
+-- The Department table holds all departments of the company.
+
+-- +----+----------+
+-- | Id | Name     |
+-- +----+----------+
+-- | 1  | IT       |
+-- | 2  | Sales    |
+-- +----+----------+
+-- Write a SQL query to find employees who earn the top three salaries in each of the department. For the above tables, your SQL query should return the following rows (order of rows does not matter).
+
+-- +------------+----------+--------+
+-- | Department | Employee | Salary |
+-- +------------+----------+--------+
+-- | IT         | Max      | 90000  |
+-- | IT         | Randy    | 85000  |
+-- | IT         | Joe      | 85000  |
+-- | IT         | Will     | 70000  |
+-- | Sales      | Henry    | 80000  |
+-- | Sales      | Sam      | 60000  |
+-- +------------+----------+--------+
+-- Explanation:
+
+-- In IT department, Max earns the highest salary, both Randy and Joe earn the second highest salary, 
+-- and Will earns the third highest salary. 
+-- There are only two employees in the Sales department, 
+-- Henry earns the highest salary while Sam earns the second highest salary.
+
+-- Solution
+select a.department, a.employee, a.salary
+from (
+select d.name as department, e.name as employee, salary, 
+    dense_rank() over(Partition by d.name order by salary desc) as rk
+from Employee e join Department d
+on e.departmentid = d.id) a
+where a.rk<4
@@ -0,0 +1,46 @@
+-- Question 99
+-- X city built a new stadium, each day many people visit it and the stats are saved as these columns: id, visit_date, people
+
+-- Please write a query to display the records which have 3 or more consecutive rows and the amount of people more than 100(inclusive).
+
+-- For example, the table stadium:
+-- +------+------------+-----------+
+-- | id   | visit_date | people    |
+-- +------+------------+-----------+
+-- | 1    | 2017-01-01 | 10        |
+-- | 2    | 2017-01-02 | 109       |
+-- | 3    | 2017-01-03 | 150       |
+-- | 4    | 2017-01-04 | 99        |
+-- | 5    | 2017-01-05 | 145       |
+-- | 6    | 2017-01-06 | 1455      |
+-- | 7    | 2017-01-07 | 199       |
+-- | 8    | 2017-01-08 | 188       |
+-- +------+------------+-----------+
+-- For the sample data above, the output is:
+
+-- +------+------------+-----------+
+-- | id   | visit_date | people    |
+-- +------+------------+-----------+
+-- | 5    | 2017-01-05 | 145       |
+-- | 6    | 2017-01-06 | 1455      |
+-- | 7    | 2017-01-07 | 199       |
+-- | 8    | 2017-01-08 | 188       |
+-- +------+------------+-----------+
+-- Note:
+-- Each day only have one row record, and the dates are increasing with id increasing.
+
+-- Solution
+select a.id, a.visit_date, a.people
+from
+(select id, visit_date, people, id - row_number() over(order by visit_date) as dates
+from stadium
+where people>=100) a
+left join
+(select b.dates, count(*) as total
+from
+(select id, visit_date, people, id - row_number() over(order by visit_date) as dates
+from stadium
+where people>=100) b
+group by dates) c
+on a.dates = c.dates
+where c.total>2
@@ -0,0 +1,105 @@
+-- Question 103
+-- Table: Users
+
+-- +----------------+---------+
+-- | Column Name    | Type    |
+-- +----------------+---------+
+-- | user_id        | int     |
+-- | join_date      | date    |
+-- | favorite_brand | varchar |
+-- +----------------+---------+
+-- user_id is the primary key of this table.
+-- This table has the info of the users of an online shopping website where users can sell and buy items.
+-- Table: Orders
+
+-- +---------------+---------+
+-- | Column Name   | Type    |
+-- +---------------+---------+
+-- | order_id      | int     |
+-- | order_date    | date    |
+-- | item_id       | int     |
+-- | buyer_id      | int     |
+-- | seller_id     | int     |
+-- +---------------+---------+
+-- order_id is the primary key of this table.
+-- item_id is a foreign key to the Items table.
+-- buyer_id and seller_id are foreign keys to the Users table.
+-- Table: Items
+
+-- +---------------+---------+
+-- | Column Name   | Type    |
+-- +---------------+---------+
+-- | item_id       | int     |
+-- | item_brand    | varchar |
+-- +---------------+---------+
+-- item_id is the primary key of this table.
+ 
+
+-- Write an SQL query to find for each user, whether the brand of the second item (by date) they sold is their favorite brand. If a user sold less than two items, report the answer for that user as no.
+
+-- It is guaranteed that no seller sold more than one item on a day.
+
+-- The query result format is in the following example:
+
+-- Users table:
+-- +---------+------------+----------------+
+-- | user_id | join_date  | favorite_brand |
+-- +---------+------------+----------------+
+-- | 1       | 2019-01-01 | Lenovo         |
+-- | 2       | 2019-02-09 | Samsung        |
+-- | 3       | 2019-01-19 | LG             |
+-- | 4       | 2019-05-21 | HP             |
+-- +---------+------------+----------------+
+
+-- Orders table:
+-- +----------+------------+---------+----------+-----------+
+-- | order_id | order_date | item_id | buyer_id | seller_id |
+-- +----------+------------+---------+----------+-----------+
+-- | 1        | 2019-08-01 | 4       | 1        | 2         |
+-- | 2        | 2019-08-02 | 2       | 1        | 3         |
+-- | 3        | 2019-08-03 | 3       | 2        | 3         |
+-- | 4        | 2019-08-04 | 1       | 4        | 2         |
+-- | 5        | 2019-08-04 | 1       | 3        | 4         |
+-- | 6        | 2019-08-05 | 2       | 2        | 4         |
+-- +----------+------------+---------+----------+-----------+
+
+-- Items table:
+-- +---------+------------+
+-- | item_id | item_brand |
+-- +---------+------------+
+-- | 1       | Samsung    |
+-- | 2       | Lenovo     |
+-- | 3       | LG         |
+-- | 4       | HP         |
+-- +---------+------------+
+
+-- Result table:
+-- +-----------+--------------------+
+-- | seller_id | 2nd_item_fav_brand |
+-- +-----------+--------------------+
+-- | 1         | no                 |
+-- | 2         | yes                |
+-- | 3         | yes                |
+-- | 4         | no                 |
+-- +-----------+--------------------+
+
+-- The answer for the user with id 1 is no because they sold nothing.
+-- The answer for the users with id 2 and 3 is yes because the brands of their second sold items are their favorite brands.
+-- The answer for the user with id 4 is no because the brand of their second sold item is not their favorite brand.
+
+-- Solution
+with t1 as(
+select user_id, 
+case when favorite_brand = item_brand then "yes"
+else "no"
+end as 2nd_item_fav_brand
+from users u left join
+(select o.item_id, seller_id, item_brand, rank() over(partition by seller_id order by order_date) as rk
+from orders o join items i
+using (item_id)) a
+on u.user_id = a.seller_id
+where a.rk = 2)
+
+select u.user_id as seller_id, coalesce(2nd_item_fav_brand,"no") as 2nd_item_fav_brand
+from users u left join t1
+using(user_id)
@@ -0,0 +1,44 @@
+-- Question 105
+-- The Employee table holds all employees. The employee table has three columns: Employee Id, Company Name, and Salary.
+
+-- +-----+------------+--------+
+-- |Id   | Company    | Salary |
+-- +-----+------------+--------+
+-- |1    | A          | 2341   |
+-- |2    | A          | 341    |
+-- |3    | A          | 15     |
+-- |4    | A          | 15314  |
+-- |5    | A          | 451    |
+-- |6    | A          | 513    |
+-- |7    | B          | 15     |
+-- |8    | B          | 13     |
+-- |9    | B          | 1154   |
+-- |10   | B          | 1345   |
+-- |11   | B          | 1221   |
+-- |12   | B          | 234    |
+-- |13   | C          | 2345   |
+-- |14   | C          | 2645   |
+-- |15   | C          | 2645   |
+-- |16   | C          | 2652   |
+-- |17   | C          | 65     |
+-- +-----+------------+--------+
+-- Write a SQL query to find the median salary of each company. Bonus points if you can solve it without using any built-in SQL functions.
+
+-- +-----+------------+--------+
+-- |Id   | Company    | Salary |
+-- +-----+------------+--------+
+-- |5    | A          | 451    |
+-- |6    | A          | 513    |
+-- |12   | B          | 234    |
+-- |9    | B          | 1154   |
+-- |14   | C          | 2645   |
+-- +-----+------------+--------+
+
+-- Solution
+select id, company, salary
+from
+(select *, 
+row_number() over(partition by company order by salary) as rn,
+count(*) over(partition by company) as cnt
+from employee) a
+where rn between cnt/2 and cnt/2+1