-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
dakheniya
authored and
dakheniya
committed
Mar 31, 2016
0 parents
commit e02559e
Showing
11 changed files
with
119 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Create Table benefits_data | ||
( | ||
year int, | ||
Benefits int | ||
); | ||
|
||
INSERT OVERWRITE TABLE benefits_data | ||
Select Year,Sum(Benefits) from avro_table | ||
group by year order by 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
CREATE TABLE avro1_table | ||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' | ||
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' | ||
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' | ||
TBLPROPERTIES ( | ||
'avro.schema.literal'='{ | ||
"namespace": "com.saket.avro", | ||
"name": "Sfsalary_data", | ||
"type": "record", | ||
"fields": | ||
[ { "name":"Employee_Id","type":"int"}, | ||
{ "name":"EmployeeName","type":"string"}, | ||
{ "name":"JobTitle","type":"string"}, | ||
{ "name":"BasePay","type":"int"}, | ||
{ "name":"OvertimePay","type":"int"}, | ||
{ "name":"OtherPay","type":"int"}, | ||
{ "name":"Benefits","type":"int"}, | ||
{ "name":"TotalPay","type":"int"}, | ||
{ "name":"TotalPayBenefits","type":"int"}, | ||
{ "name":"Year","type":"int"}, | ||
{ "name":"Agency","type":"string"}, | ||
{ "name":"Status","type":"string"}] | ||
}'); | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
INSERT OVERWRITE TABLE avro1_table SELECT Employee_Id,EmployeeName, | ||
JobTitle,BasePay,OvertimePay,OtherPay,TotalPay,TotalPayBenefits,Year,Agency,Status | ||
FROM salary_data; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
CREATE TABLE Record1_DATA | ||
(Year int, | ||
Meanpay int, | ||
Records int | ||
); | ||
|
||
INSERT OVERWRITE TABLE Record1_DATA | ||
SELECT year, percentile(cast(Totalpay as bigint), 0.5) , count(*) Records from avro_table | ||
group by year order by 1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
CREATE TABLE pay1_DATA | ||
(Jobtitle string, | ||
Basepay bigint, | ||
Overtimepay bigint, | ||
Otherpay bigint, | ||
Benefits bigint | ||
) | ||
INSERT OVERWRITE TABLE pay1_DATA | ||
select JobType, cast(avg(Basepay) as bigint), cast(avg(Overtimepay) as bigint),cast(avg(Otherpay) as bigint), cast(avg(Benefits) as bigint) | ||
from | ||
( | ||
select case when upper(jobtitle) like '%FIRE%' then 'Fire' | ||
when upper(jobtitle) like '%POLICE%' then 'Police' | ||
when upper(jobtitle) like '%SHERIFF%' then 'Police' | ||
when upper(jobtitle) like '%PROBATION%' then 'Police' | ||
when upper(jobtitle) like '%SERGEANT%' then 'Police' | ||
|
||
when upper(jobtitle) like '%MTA%' then 'Transit' | ||
when upper(jobtitle) like '%TRANSIT%' then 'Transit' | ||
|
||
when upper(jobtitle) like '%ANESTH%' then 'Medical' | ||
when upper(jobtitle) like '%MEDICAL%' then 'Medical' | ||
when upper(jobtitle) like '%NURSE%' then 'Medical' | ||
when upper(jobtitle) like '%HEALTH%' then 'Medical' | ||
when upper(jobtitle) like '%PYSICIAN%' then 'Medical' | ||
when upper(jobtitle) like '%ORTHOPEDIC%' then 'Medical' | ||
when upper(jobtitle) like '%PHARM%' then 'Medical' | ||
when upper(jobtitle) like '%health%' then 'Medical' | ||
when upper(jobtitle) like '%AIRPORT%' then 'Airport' | ||
|
||
when upper(jobtitle) like '%ANIMAL%' then 'Animal' | ||
|
||
when upper(jobtitle) like '%ARCHITECT%' then 'Architectural' | ||
|
||
when upper(jobtitle) like '%COURT%' then 'Court' | ||
when upper(jobtitle) like '%LEGAL%' then 'Court' | ||
|
||
when upper(jobtitle) like '%MAYOR%' then 'Mayor' | ||
|
||
when upper(jobtitle) like '%LIBRARY%' then 'Library' | ||
when upper(jobtitle) like '%PARKING%' then 'Parking' | ||
when upper(jobtitle) like '%Public Works%' then 'Public Works' | ||
when upper(jobtitle) like '%ATTORNEY%' then 'Attorney' | ||
when upper(jobtitle) like '%MECHANIC%' then 'Automotive' | ||
when upper(jobtitle) like '%AUTOMOTIVE%' then 'Automotive' | ||
when upper(jobtitle) like '%CUSTODIAN%' then 'Custodian' | ||
when upper(jobtitle) like '%ENGINEER%' then 'Engineering' | ||
when upper(jobtitle) like '%ENGR%' then 'Engineering' | ||
when upper(jobtitle) like '%ACCOUNT%' then 'Accounting' | ||
|
||
when upper(jobtitle) like '%GARDENER%' then 'Gardening' | ||
when upper(jobtitle) like '%GENERAL LABORER%' then 'General Laborer' | ||
when upper(jobtitle) like '%FOOD SERV%' then 'Food Service' | ||
when upper(jobtitle) like '%CLERK%' then 'Clerk' | ||
when upper(jobtitle) like '%PORTER%' then 'Porter' | ||
|
||
else 'Other' end JobType, | ||
Basepay, | ||
Overtimepay, | ||
Otherpay, | ||
Benefits | ||
from avro_table ) | ||
JobType group by JobType; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Data Analysis Of San Francisco Salaries | ||
|
||
In this Project we have choosen Dataset of San Francisco to anlayze the different attributes of cpmpensation and to find which department is best in that area and which has shown the most growth in 4 year and to best suited for analysing in hive we have converted our data to avro format.<BR> | ||
Dataset | ||
|
||
<a href="https://www.kaggle.com/kaggle/sf-salaries">https://www.kaggle.com/kaggle/sf-salaries</a> | ||
|
||
<b>Follow the instructions in the tutorial and execute the queries</b> |
Binary file not shown.
Binary file not shown.