forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 4
/
gen-protos.sh
executable file
·127 lines (109 loc) · 3.83 KB
/
gen-protos.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -ex
SPARK_HOME="$(cd "`dirname $0`"/..; pwd)"
cd "$SPARK_HOME"
OUTPUT_PATH=""
MODULE=""
SOURCE_MODULE=""
TARGET_MODULE=""
function usage() {
echo "Illegal number of parameters."
echo "Usage:./dev/gen-protos.sh [connect|streaming] [output_path]"
exit -1
}
if [[ $# -lt 1 || $# -gt 2 ]]; then
usage
fi
if [[ $1 == "connect" ]]; then
MODULE="connect"
OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/
SOURCE_MODULE="spark.connect"
TARGET_MODULE="pyspark.sql.connect.proto"
elif [[ $1 == "streaming" ]]; then
MODULE="streaming"
OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/streaming/proto/
SOURCE_MODULE="org.apache.spark.sql.execution.streaming"
TARGET_MODULE="pyspark.sql.streaming.proto"
else
usage
fi
if [[ $# -eq 2 ]]; then
rm -Rf $2
mkdir -p $2
OUTPUT_PATH=$2
fi
if [[ $MODULE == "connect" ]]; then
pushd sql/connect/common/src/main
elif [[ $MODULE == "streaming" ]]; then
pushd sql/core/src/main
fi
LICENSE=$(cat <<'EOF'
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
EOF)
echo "$LICENSE" > /tmp/tmp_licence
# Delete the old generated protobuf files.
rm -Rf gen
# Now, regenerate the new files
buf generate --debug -vvv
# We need to edit the generate python files to account for the actual package location and not
# the one generated by proto.
for f in `find gen/proto/python -name "*.py*"`; do
# First fix the imports.
if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then
sed -e "s/from ${SOURCE_MODULE} import/from ${TARGET_MODULE} import/g" $f > $f.tmp
mv $f.tmp $f
# Now fix the module name in the serialized descriptor.
sed -e "s/DESCRIPTOR, '${SOURCE_MODULE}/DESCRIPTOR, '${TARGET_MODULE}/g" $f > $f.tmp
mv $f.tmp $f
elif [[ $f == *.pyi ]]; then
sed -e "s/import ${SOURCE_MODULE}./import ${TARGET_MODULE}./g" -e "s/${SOURCE_MODULE}./${TARGET_MODULE}./g" -e '/ *@typing_extensions\.final/d' $f > $f.tmp
mv $f.tmp $f
fi
# Prepend the Apache licence header to the files.
cp $f $f.bak
cat /tmp/tmp_licence $f.bak > $f
LC=$(wc -l < $f)
echo $LC
if [[ $f == *_grpc.py && $LC -eq 20 ]]; then
rm $f
fi
rm $f.bak
done
black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python
# Last step copy the result files to the destination module.
for f in `find gen/proto/python -name "*.py*"`; do
cp $f $OUTPUT_PATH
done
# Clean up everything.
rm -Rf gen