Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
garyelephant committed Aug 5, 2017
0 parents commit 311cdb3
Show file tree
Hide file tree
Showing 26 changed files with 1,574 additions and 0 deletions.
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Created by .ignore support plugin (hsz.mobi)
### Java template
*.class

# Mobile Tools for Java (J2ME)
.mtj.tmp/

# Package Files #
*.jar
*.war
*.ear

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

# build targets
target/

# Intellij Idea files
.idea/
*.iml

.DS_Store

metastore_db/

work_dir

*.keytab
/derby.log

dependency-reduced-pom.xml

apidoc
*.tar.gz
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Readme

## Requirements

Building Tool: SBT


## build and package

Build thin jar

```
# cd to project root dir
sbt package
```

Build fat jar(with all dependencies) for Spark Application

```
sbt clean assembly
```
19 changes: 19 additions & 0 deletions bin/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash
source /etc/profile

exec /letv/programs/spark-1.6.0-bin-hadoop2.6_security/bin/spark-submit \
--master yarn-clienta \
--num-executors 30 \
--executor-cores 2 \
--executor-memory 2G \
--conf spark.app.name=StreamingETL \
--conf spark.ui.port=13000 \
--conf spark.streaming.blockInterval=1000ms \
--conf spark.streaming.kafka.maxRatePerPartition=30000 \
--conf spark.streaming.kafka.maxRetries=2 \
--conf spark.yarn.jar=hdfs://alluxio-cluster/sparkLib/spark-assembly-1.6.0-hadoop2.6.0.jar \
--conf spark.local.dir=/data/slot6/streamingetl/tmp \
--conf spark.driver.extraJavaOptions=-Dconfig.file=/data/slot6/streamingetl/application.conf \
--class org.interestinglab.waterdrop.WaterdropMain \
/data/slot6/streamingetl/WaterdropMain-assembly-0.1.0.jar

21 changes: 21 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name := "StreamingETL"
version := "0.1.0"
organization := "org.interestinglab.waterdrop"

scalaVersion := "2.10.6"

val sparkVersion = "1.6.0"

libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % sparkVersion % "provided",
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided",
"org.apache.spark" %% "spark-streaming" % sparkVersion % "provided",
"org.apache.spark" %% "spark-streaming-kafka" % sparkVersion
exclude("org.spark-project.spark", "unused"),
"com.typesafe" % "config" % "1.3.1",
"org.json4s" %% "json4s-jackson" % "3.4.2"
)

resolvers += Resolver.mavenLocal

scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature")
47 changes: 47 additions & 0 deletions config/application.conf.example-1
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
input {
kafka {
topics = "gpc.oi_gaoyingju.test1"
consumer.bootstrap.servers = "10.110.94.130:9092,10.110.94.131:9092,10.110.95.50:9092,10.110.95.68:9092,10.110.95.82:9092"
consumer.zookeeper.connect = "10.110.94.130:2181,10.110.94.131:2181,10.110.95.50:2181,10.110.95.68:2181,10.110.95.82:2181"
consumer.group.id = "scala_test_group1"
consumer.num.consumer.fetchers = "4"
consumer.auto.offset.reset = "largest"
}
}

filter {
split {
delimiter = " "
keys = ["a", "b", "c"]
source_field = "raw_message"
target_field = "msg"
}
}

sql {
query {
table_name = "mytable"
sql = "select * from mytable where a = \"value\""
}
}

output {
kafka {
topic = "gpc.oi_wangjie7.test1"
producer.bootstrap.servers = "10.110.94.130:9092,10.110.94.131:9092,10.110.95.50:9092,10.110.95.68:9092,10.110.95.82:9092"
producer.acks = "1"
producer.retries = "2"
producer.retry.backoff.ms = "100"
producer.batch.size = "46384"
producer.linger.ms = "6000"
producer.buffer.memory = "268435456" # 256 MB
producer.key.serializer = "org.apache.kafka.common.serialization.StringSerializer"
producer.value.serializer = "org.apache.kafka.common.serialization.StringSerializer"
producer.compression.type = "snappy"
producer.send.buffer.bytes = "131072"
producer.max.request.size = "1048576"
producer.max.in.flight.requests.per.connection = "2"

}
}

1 change: 1 addition & 0 deletions project/assembly.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.2")
1 change: 1 addition & 0 deletions project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=0.13.11
1 change: 1 addition & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.8.0")
117 changes: 117 additions & 0 deletions scalastyle-config.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
<scalastyle>
<name>Scalastyle standard configuration</name>
<check level="warning" class="org.scalastyle.file.FileTabChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.file.FileLengthChecker" enabled="true">
<parameters>
<parameter name="maxFileLength"><![CDATA[800]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.file.HeaderMatchesChecker" enabled="true">
<parameters>
<parameter name="header"><![CDATA[// Copyright (C) 2011-2012 the original author or authors.
// See the LICENCE.txt file distributed with this work for additional
// information regarding copyright ownership.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.SpacesAfterPlusChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.file.WhitespaceEndOfLineChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.SpacesBeforePlusChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.file.FileLineLengthChecker" enabled="true">
<parameters>
<parameter name="maxLineLength"><![CDATA[160]]></parameter>
<parameter name="tabSize"><![CDATA[4]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.ClassNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.ObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[[A-Z][A-Za-z]*]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.PackageObjectNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z]*$]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.EqualsHashCodeChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.IllegalImportsChecker" enabled="true">
<parameters>
<parameter name="illegalImports"><![CDATA[sun._,java.awt._]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.ParameterNumberChecker" enabled="true">
<parameters>
<parameter name="maxParameters"><![CDATA[8]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.MagicNumberChecker" enabled="true">
<parameters>
<parameter name="ignore"><![CDATA[-1,0,1,2,3]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.NoWhitespaceBeforeLeftBracketChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.NoWhitespaceAfterLeftBracketChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.ReturnChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.NullChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.NoCloneChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.NoFinalizeChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.CovariantEqualsChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.StructuralTypeChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.file.RegexChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[println]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.NumberOfTypesChecker" enabled="true">
<parameters>
<parameter name="maxTypes"><![CDATA[30]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.CyclomaticComplexityChecker" enabled="true">
<parameters>
<parameter name="maximum"><![CDATA[10]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.UppercaseLChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.SimplifyBooleanExpressionChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.scalariform.IfBraceChecker" enabled="true">
<parameters>
<parameter name="singleLineAllowed"><![CDATA[true]]></parameter>
<parameter name="doubleLineAllowed"><![CDATA[false]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.MethodLengthChecker" enabled="true">
<parameters>
<parameter name="maxLength"><![CDATA[50]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.MethodNamesChecker" enabled="true">
<parameters>
<parameter name="regex"><![CDATA[^[a-z][A-Za-z0-9]*$]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
<parameters>
<parameter name="maxMethods"><![CDATA[30]]></parameter>
</parameters>
</check>
<check level="warning" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.file.NewLineAtEofChecker" enabled="true"></check>
<check level="warning" class="org.scalastyle.file.NoNewLineAtEofChecker" enabled="false"></check>
</scalastyle>
Loading

0 comments on commit 311cdb3

Please sign in to comment.