Skip to content

Commit

Permalink
Upload
Browse files Browse the repository at this point in the history
  • Loading branch information
dakheniya authored and dakheniya committed Mar 31, 2016
0 parents commit e02559e
Show file tree
Hide file tree
Showing 11 changed files with 119 additions and 0 deletions.
1 change: 1 addition & 0 deletions Querie/Average Pay By Department
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

9 changes: 9 additions & 0 deletions Querie/BenefitsByYear_Query.hql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Create Table benefits_data
(
year int,
Benefits int
);

INSERT OVERWRITE TABLE benefits_data
Select Year,Sum(Benefits) from avro_table
group by year order by 1
1 change: 1 addition & 0 deletions Querie/Code
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

24 changes: 24 additions & 0 deletions Querie/Creating Avro Table_Query.hql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
CREATE TABLE avro1_table
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
TBLPROPERTIES (
'avro.schema.literal'='{
"namespace": "com.saket.avro",
"name": "Sfsalary_data",
"type": "record",
"fields":
[ { "name":"Employee_Id","type":"int"},
{ "name":"EmployeeName","type":"string"},
{ "name":"JobTitle","type":"string"},
{ "name":"BasePay","type":"int"},
{ "name":"OvertimePay","type":"int"},
{ "name":"OtherPay","type":"int"},
{ "name":"Benefits","type":"int"},
{ "name":"TotalPay","type":"int"},
{ "name":"TotalPayBenefits","type":"int"},
{ "name":"Year","type":"int"},
{ "name":"Agency","type":"string"},
{ "name":"Status","type":"string"}]
}');

3 changes: 3 additions & 0 deletions Querie/Loading Data to Avro_Query.hql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
INSERT OVERWRITE TABLE avro1_table SELECT Employee_Id,EmployeeName,
JobTitle,BasePay,OvertimePay,OtherPay,TotalPay,TotalPayBenefits,Year,Agency,Status
FROM salary_data;
9 changes: 9 additions & 0 deletions Querie/Meanpay By Year_Query.hql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CREATE TABLE Record1_DATA
(Year int,
Meanpay int,
Records int
);

INSERT OVERWRITE TABLE Record1_DATA
SELECT year, percentile(cast(Totalpay as bigint), 0.5) , count(*) Records from avro_table
group by year order by 1;
63 changes: 63 additions & 0 deletions Querie/PaymentStructure_Query.hql
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
CREATE TABLE pay1_DATA
(Jobtitle string,
Basepay bigint,
Overtimepay bigint,
Otherpay bigint,
Benefits bigint
)
INSERT OVERWRITE TABLE pay1_DATA
select JobType, cast(avg(Basepay) as bigint), cast(avg(Overtimepay) as bigint),cast(avg(Otherpay) as bigint), cast(avg(Benefits) as bigint)
from
(
select case when upper(jobtitle) like '%FIRE%' then 'Fire'
when upper(jobtitle) like '%POLICE%' then 'Police'
when upper(jobtitle) like '%SHERIFF%' then 'Police'
when upper(jobtitle) like '%PROBATION%' then 'Police'
when upper(jobtitle) like '%SERGEANT%' then 'Police'

when upper(jobtitle) like '%MTA%' then 'Transit'
when upper(jobtitle) like '%TRANSIT%' then 'Transit'

when upper(jobtitle) like '%ANESTH%' then 'Medical'
when upper(jobtitle) like '%MEDICAL%' then 'Medical'
when upper(jobtitle) like '%NURSE%' then 'Medical'
when upper(jobtitle) like '%HEALTH%' then 'Medical'
when upper(jobtitle) like '%PYSICIAN%' then 'Medical'
when upper(jobtitle) like '%ORTHOPEDIC%' then 'Medical'
when upper(jobtitle) like '%PHARM%' then 'Medical'
when upper(jobtitle) like '%health%' then 'Medical'
when upper(jobtitle) like '%AIRPORT%' then 'Airport'

when upper(jobtitle) like '%ANIMAL%' then 'Animal'

when upper(jobtitle) like '%ARCHITECT%' then 'Architectural'

when upper(jobtitle) like '%COURT%' then 'Court'
when upper(jobtitle) like '%LEGAL%' then 'Court'

when upper(jobtitle) like '%MAYOR%' then 'Mayor'

when upper(jobtitle) like '%LIBRARY%' then 'Library'
when upper(jobtitle) like '%PARKING%' then 'Parking'
when upper(jobtitle) like '%Public Works%' then 'Public Works'
when upper(jobtitle) like '%ATTORNEY%' then 'Attorney'
when upper(jobtitle) like '%MECHANIC%' then 'Automotive'
when upper(jobtitle) like '%AUTOMOTIVE%' then 'Automotive'
when upper(jobtitle) like '%CUSTODIAN%' then 'Custodian'
when upper(jobtitle) like '%ENGINEER%' then 'Engineering'
when upper(jobtitle) like '%ENGR%' then 'Engineering'
when upper(jobtitle) like '%ACCOUNT%' then 'Accounting'

when upper(jobtitle) like '%GARDENER%' then 'Gardening'
when upper(jobtitle) like '%GENERAL LABORER%' then 'General Laborer'
when upper(jobtitle) like '%FOOD SERV%' then 'Food Service'
when upper(jobtitle) like '%CLERK%' then 'Clerk'
when upper(jobtitle) like '%PORTER%' then 'Porter'

else 'Other' end JobType,
Basepay,
Overtimepay,
Otherpay,
Benefits
from avro_table )
JobType group by JobType;
1 change: 1 addition & 0 deletions Querie/TotalPay By Year_Query
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Data Analysis Of San Francisco Salaries

In this Project we have choosen Dataset of San Francisco to anlayze the different attributes of cpmpensation and to find which department is best in that area and which has shown the most growth in 4 year and to best suited for analysing in hive we have converted our data to avro format.<BR>
Dataset

<a href="https://www.kaggle.com/kaggle/sf-salaries">https://www.kaggle.com/kaggle/sf-salaries</a>

<b>Follow the instructions in the tutorial and execute the queries</b>
Binary file added sf-salaries-release-2015-12-21-03-21-32.zip
Binary file not shown.
Binary file added tutorial 520.docx
Binary file not shown.

0 comments on commit e02559e

Please sign in to comment.