Skip to content

Commit

Permalink
Merge branch 'master' into als
Browse files Browse the repository at this point in the history
  • Loading branch information
Meng, Peng authored Oct 19, 2017
2 parents 9127833 + f2c2a9b commit 38cd672
Show file tree
Hide file tree
Showing 29 changed files with 1,147 additions and 6 deletions.
19 changes: 19 additions & 0 deletions bin/functions/hibench_prop_env_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@
# For Logistic Regression
NUM_EXAMPLES_LR="hibench.lr.examples",
NUM_FEATURES_LR="hibench.lr.features",
# For SVM
NUM_EXAMPLES_SVM="hibench.svm.examples",
NUM_FEATURES_SVM="hibench.svm.examples",
# For ALS
NUM_USERS_ALS="hibench.als.users",
NUM_PRODUCTS_ALS="hibench.als.products",
Expand All @@ -103,6 +106,12 @@
NUM_ITERATIONS_ALS="hibench.als.num_iterations",
LAMBDA_ALS="hibench.als.Lambda",
KYRO_ALS="hibench.als.kyro",

# For PCA
NUM_EXAMPLES_PCA="hibench.pca.examples",
NUM_FEATURES_PCA="hibench.pca.features",
MAX_RESULT_SIZE_PCA ="hibench.pca.maxresultsize",

# For Gradient Boosting Tree
NUM_EXAMPLES_GBT="hibench.gbt.examples",
NUM_FEATURES_GBT="hibench.gbt.features",
Expand All @@ -115,6 +124,16 @@
NUM_EXAMPLES_SVD="hibench.svd.examples",
NUM_FEATURES_SVD="hibench.svd.features",
MAXRESULTSIZE_SVD="hibench.svd.maxresultsize",
# For Linear Regression
NUM_EXAMPLES_LINEAR="hibench.linear.examples",
NUM_FEATURES_LINEAR="hibench.linear.features",
# For LDA
NUM_DOCUMENTS_LDA="hibench.lda.num_of_documents",
NUM_VOCABULARY_LDA="hibench.lda.num_of_vocabulary",
NUM_TOPICS_LDA="hibench.lda.num_of_topics",
DOC_LEN_MIN_LDA="hibench.lda.doc_len_min",
DOC_LEN_MAX_LDA="hibench.lda.doc_len_max",
MAXRESULTSIZE_LDA="hibench.lda.maxresultsize",
# For Pagerank
PAGERANK_BASE_HDFS="hibench.pagerank.base.hdfs",
PAGERANK_INPUT="hibench.pagerank.dir.name.input",
Expand Down
14 changes: 13 additions & 1 deletion bin/run_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ for benchmark in `cat $root_dir/conf/benchmarks.lst`; do
if [ $benchmark == "ml/als" ] && [ $framework == "hadoop" ]; then
continue
fi
if [ $benchmark == "ml/svm" ] && [ $framework == "hadoop" ]; then
continue
fi
if [ $benchmark == "ml/pca" ] && [ $framework == "hadoop" ]; then
continue
fi
if [ $benchmark == "ml/gbt" ] && [ $framework == "hadoop" ]; then
continue
fi
Expand All @@ -67,8 +73,14 @@ for benchmark in `cat $root_dir/conf/benchmarks.lst`; do
fi
if [ $benchmark == "ml/svd" ] && [ $framework == "hadoop" ]; then
continue
fi
if [ $benchmark == "ml/linear" ] && [ $framework == "hadoop" ]; then
continue
fi
if [ $benchmark == "ml/lda" ] && [ $framework == "hadoop" ]; then
continue
fi

echo -e "${UYellow}${BYellow}Run ${Yellow}${UYellow}${benchmark}/${framework}${Color_Off}"
echo -e "${BCyan}Exec script: ${Cyan}$WORKLOAD/${framework}/run.sh${Color_Off}"
$WORKLOAD/${framework}/run.sh
Expand Down
35 changes: 35 additions & 0 deletions bin/workloads/ml/lda/prepare/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/lda.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench LDADataPrepare ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $INPUT_HDFS || true
START_TIME=`timestamp`

run_spark_job com.intel.hibench.sparkbench.ml.LDADataGenerator $INPUT_HDFS $NUM_DOCUMENTS_LDA $NUM_VOCABULARY_LDA $DOC_LEN_MIN_LDA $DOC_LEN_MAX_LDA

END_TIME=`timestamp`

show_bannar finish
leave_bench

34 changes: 34 additions & 0 deletions bin/workloads/ml/lda/spark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/lda.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench LDA ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $OUTPUT_HDFS || true

SIZE=`dir_size $INPUT_HDFS`
START_TIME=`timestamp`
run_spark_job com.intel.hibench.sparkbench.ml.LDAExample $INPUT_HDFS $OUTPUT_HDFS $NUM_TOPICS_LDA $MAXRESULTSIZE_LDA
END_TIME=`timestamp`

gen_report ${START_TIME} ${END_TIME} ${SIZE}
show_bannar finish
leave_bench
35 changes: 35 additions & 0 deletions bin/workloads/ml/linear/prepare/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/linear.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench LinearRegressionDataPrepare ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $INPUT_HDFS || true
START_TIME=`timestamp`

run_spark_job com.intel.hibench.sparkbench.ml.LinearRegressionDataGenerator $INPUT_HDFS $NUM_EXAMPLES_LINEAR $NUM_FEATURES_LINEAR

END_TIME=`timestamp`

show_bannar finish
leave_bench

34 changes: 34 additions & 0 deletions bin/workloads/ml/linear/spark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/linear.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench LinearRegression ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $OUTPUT_HDFS || true

SIZE=`dir_size $INPUT_HDFS`
START_TIME=`timestamp`
run_spark_job com.intel.hibench.sparkbench.ml.LinearRegression ${INPUT_HDFS}
END_TIME=`timestamp`

gen_report ${START_TIME} ${END_TIME} ${SIZE}
show_bannar finish
leave_bench
35 changes: 35 additions & 0 deletions bin/workloads/ml/pca/prepare/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/pca.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench PCADataPrepare ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $INPUT_HDFS || true
START_TIME=`timestamp`

run_spark_job com.intel.hibench.sparkbench.ml.PCADataGenerator $INPUT_HDFS $NUM_EXAMPLES_PCA $NUM_FEATURES_PCA

END_TIME=`timestamp`

show_bannar finish
leave_bench

34 changes: 34 additions & 0 deletions bin/workloads/ml/pca/spark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/pca.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench PCA ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $OUTPUT_HDFS || true

SIZE=`dir_size $INPUT_HDFS`
START_TIME=`timestamp`
run_spark_job com.intel.hibench.sparkbench.ml.PCAExample ${INPUT_HDFS} ${MAX_RESULT_SIZE_PCA}
END_TIME=`timestamp`

gen_report ${START_TIME} ${END_TIME} ${SIZE}
show_bannar finish
leave_bench
35 changes: 35 additions & 0 deletions bin/workloads/ml/svm/prepare/prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/svm.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench SVMDataPrepare ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $INPUT_HDFS || true
START_TIME=`timestamp`

run_spark_job com.intel.hibench.sparkbench.ml.SVMDataGenerator $INPUT_HDFS $NUM_EXAMPLES_SVM $NUM_FEATURES_SVM

END_TIME=`timestamp`

show_bannar finish
leave_bench

34 changes: 34 additions & 0 deletions bin/workloads/ml/svm/spark/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/ml/svm.conf
. "${root_dir}/bin/functions/load_bench_config.sh"

enter_bench SVM ${workload_config} ${current_dir}
show_bannar start

rmr_hdfs $OUTPUT_HDFS || true

SIZE=`dir_size $INPUT_HDFS`
START_TIME=`timestamp`
run_spark_job com.intel.hibench.sparkbench.ml.SVMWithSGDExample ${INPUT_HDFS}
END_TIME=`timestamp`

gen_report ${START_TIME} ${END_TIME} ${SIZE}
show_bannar finish
leave_bench
6 changes: 5 additions & 1 deletion conf/benchmarks.lst
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ ml.bayes
ml.kmeans
ml.lr
ml.als
ml.pca
ml.gbt
ml.rf
ml.svd
ml.linear
ml.lda
ml.svm

graph.nweight
graph.nweight
5 changes: 2 additions & 3 deletions conf/hibench.conf
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
# Data scale profile. Available value is tiny, small, large, huge, gigantic and bigdata.
# The definition of these profiles can be found in the workload's conf file i.e. conf/workloads/micro/wordcount.conf
hibench.scale.profile tiny

hibench.scale.profile tiny
# Mapper number in hadoop, partition number in Spark
hibench.default.map.parallelism 8

# Reducer nubmer in hadoop, shuffle partition number in Spark
hibench.default.shuffle.parallelism 8
hibench.default.shuffle.parallelism 8


#======================================================
Expand Down
Loading

0 comments on commit 38cd672

Please sign in to comment.