forked from Unstructured-IO/pipeline-sec-filings
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: sync version (Unstructured-IO#22)
Adds a script to check version in CHANGELOG.md against user specified files. Same make targets are added, and the check is added to the make check target, and consequently becomes part of CI. Files to be synced are specified in the Makefile rather than hardcoded in the script. Co-authored-by: cragwolfe <[email protected]>
- Loading branch information
Showing
8 changed files
with
181 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,7 +41,7 @@ the `sample-sec-docs` folder, run: | |
|
||
``` | ||
curl -X 'POST' \ | ||
'https://api.unstructured.io/sec-filings/v0.0.2/section' \ | ||
'https://api.unstructured.io/sec-filings/v0.1.0/section' \ | ||
-H 'accept: application/json' \ | ||
-H 'Content-Type: multipart/form-data' \ | ||
-F '[email protected]' \ | ||
|
@@ -85,7 +85,7 @@ example, you can run the following command to request the risk factors section: | |
|
||
``` | ||
curl -X 'POST' \ | ||
'http://localhost:8000/sec-filings/v0.0.2/section' \ | ||
'http://localhost:8000/sec-filings/v0.1.0/section' \ | ||
-H 'accept: application/json' \ | ||
-H 'Content-Type: multipart/form-data' \ | ||
-F '[email protected]' \ | ||
|
@@ -121,7 +121,7 @@ You can also use special regex characters in your pattern, as shown in the examp | |
|
||
``` | ||
curl -X 'POST' \ | ||
'http://localhost:8000/sec-filings/v0.0.2/section' \ | ||
'http://localhost:8000/sec-filings/v0.1.0/section' \ | ||
-H 'accept: application/json' \ | ||
-H 'Content-Type: multipart/form-data' \ | ||
-F '[email protected]' \ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -111,6 +111,11 @@ | |
" '0001104659-22-075415': '425',\n", | ||
" '0001104659-22-078942': 'S-4',\n", | ||
" '0001104659-22-086533': '425',\n", | ||
" '0001104659-22-109429': 'S-4/A',\n", | ||
" '0001104659-22-109436': '8-K',\n", | ||
" '0001104659-22-109438': '425',\n", | ||
" '0001104659-22-110203': '8-K',\n", | ||
" '0001104659-22-110205': '425',\n", | ||
" '0001143313-21-000005': 'CERT',\n", | ||
" '0001393825-22-000010': 'SC 13G',\n", | ||
" '0001410578-21-000577': '10-Q/A',\n", | ||
|
@@ -369,13 +374,13 @@ | |
"source": [ | ||
"### To verify API is handling both the 10-Q and 10-Q/A filings saved by this notebook:\n", | ||
"\n", | ||
" curl -X 'POST' 'http://127.0.0.1:8000/sec-filings/v0.0.1/section' \\\n", | ||
" curl -X 'POST' 'http://127.0.0.1:8000/sec-filings/v0.1.0/section' \\\n", | ||
" -H 'accept: application/json' \\\n", | ||
" -H 'Content-Type: multipart/form-data' \\\n", | ||
" -F '[email protected]' \\\n", | ||
" -F section=RISK_FACTORS -F section=MANAGEMENT_DISCUSSION | jq . | grep -A5 -P '(RISK_FACTORS|MANAGEMENT_DISCUSSION)' \n", | ||
" \n", | ||
" curl -X 'POST' 'http://127.0.0.1:8000/sec-filings/v0.0.1/section' \\\n", | ||
" curl -X 'POST' 'http://127.0.0.1:8000/sec-filings/v0.1.0/section' \\\n", | ||
" -H 'accept: application/json' \\\n", | ||
" -H 'Content-Type: multipart/form-data' \\\n", | ||
" -F '[email protected]' \\\n", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
name: sec-filings | ||
version: 0.0.2 | ||
version: 0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
#!/bin/bash | ||
function usage { | ||
echo "Usage: $(basename "$0") [-c] -f FILE_TO_CHANGE REPLACEMENT_FORMAT [-f FILE_TO_CHANGE REPLACEMENT_FORMAT ...]" 2>&1 | ||
echo 'Synchronize files to latest version in source file' | ||
echo ' -s Specifies source file for version (default is CHANGELOG.md)' | ||
echo ' -f Specifies a file to change and the format for searching and replacing versions' | ||
echo ' FILE_TO_CHANGE is the file to be updated/checked for updates' | ||
echo ' REPLACEMENT_FORMAT is one of (semver, release, api-release)' | ||
echo ' semver indicates to look for a full semver version and replace with the latest full version' | ||
echo ' release indicates to look for a release semver version (x.x.x) and replace with the latest release version' | ||
echo ' api-release indicates to look for a release semver version in the context of an api route and replace with the latest release version' | ||
echo ' -c Compare versions and output proposed changes without changing anything.' | ||
} | ||
|
||
function getopts-extra () { | ||
declare i=1 | ||
# if the next argument is not an option, then append it to array OPTARG | ||
while [[ ${OPTIND} -le $# && ${!OPTIND:0:1} != '-' ]]; do | ||
OPTARG[i]=${!OPTIND} | ||
i+=1 | ||
OPTIND+=1 | ||
done | ||
} | ||
|
||
# Parse input options | ||
declare CHECK=0 | ||
declare SOURCE_FILE="CHANGELOG.md" | ||
declare -a FILES_TO_CHECK=() | ||
declare -a REPLACEMENT_FORMATS=() | ||
declare args | ||
declare OPTIND OPTARG opt | ||
while getopts ":hcs:f:" opt; do | ||
case $opt in | ||
h) | ||
usage | ||
exit 0 | ||
;; | ||
c) | ||
CHECK=1 | ||
;; | ||
s) | ||
SOURCE_FILE="$OPTARG" | ||
;; | ||
f) | ||
getopts-extra "$@" | ||
args=( "${OPTARG[@]}" ) | ||
# validate length of args, should be 2 | ||
if [ ${#args[@]} -eq 2 ]; then | ||
FILES_TO_CHECK+=( "${args[0]}" ) | ||
REPLACEMENT_FORMATS+=( "${args[1]}" ) | ||
else | ||
echo "Exactly 2 arguments must follow -f option." >&2 | ||
exit 1 | ||
fi | ||
;; | ||
\?) | ||
echo "Invalid option: -$OPTARG." >&2 | ||
usage | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
# Parse REPLACEMENT_FORMATS | ||
RE_SEMVER_FULL="(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-((0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(\+([0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*))?" | ||
RE_RELEASE="(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)" | ||
RE_API_RELEASE="v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)" | ||
# Pull out semver appearing earliest in SOURCE_FILE. | ||
LAST_VERSION=$(grep -o -m 1 -E "${RE_SEMVER_FULL}" "$SOURCE_FILE") | ||
LAST_RELEASE=$(grep -o -m 1 -E "${RE_RELEASE}($|[^-+])" "$SOURCE_FILE" | grep -o -m 1 -E "${RE_RELEASE}") | ||
LAST_API_RELEASE="v$(grep -o -m 1 -E "${RE_RELEASE}($|[^-+])$" "$SOURCE_FILE" | grep -o -m 1 -E "${RE_RELEASE}")" | ||
declare -a RE_SEMVERS=() | ||
declare -a UPDATED_VERSIONS=() | ||
for i in "${!REPLACEMENT_FORMATS[@]}"; do | ||
REPLACEMENT_FORMAT=${REPLACEMENT_FORMATS[$i]} | ||
case $REPLACEMENT_FORMAT in | ||
semver) | ||
RE_SEMVERS+=( "$RE_SEMVER_FULL" ) | ||
UPDATED_VERSIONS+=( "$LAST_VERSION" ) | ||
;; | ||
release) | ||
RE_SEMVERS+=( "$RE_RELEASE" ) | ||
UPDATED_VERSIONS+=( "$LAST_RELEASE" ) | ||
;; | ||
api-release) | ||
RE_SEMVERS+=( "$RE_API_RELEASE" ) | ||
UPDATED_VERSIONS+=( "$LAST_API_RELEASE" ) | ||
;; | ||
*) | ||
echo "Invalid replacement format: \"${REPLACEMENT_FORMAT}\". Use semver, release, or api-release" >&2 | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
if [ -z "$LAST_VERSION" ]; | ||
then | ||
# No match to semver regex in SOURCE_FILE, so no version to go from. | ||
printf "Error: Unable to find latest version from %s.\n" "$SOURCE_FILE" | ||
exit 1 | ||
fi | ||
|
||
# Search files in FILES_TO_CHECK and change (or get diffs) | ||
declare FAILED_CHECK=0 | ||
|
||
for i in "${!FILES_TO_CHECK[@]}"; do | ||
FILE_TO_CHANGE=${FILES_TO_CHECK[$i]} | ||
RE_SEMVER=${RE_SEMVERS[$i]} | ||
UPDATED_VERSION=${UPDATED_VERSIONS[$i]} | ||
FILE_VERSION=$(grep -o -m 1 -E "${RE_SEMVER}" "$FILE_TO_CHANGE") | ||
if [ -z "$FILE_VERSION" ]; | ||
then | ||
# No match to semver regex in VERSIONFILE, so nothing to replace | ||
printf "Error: No semver version found in file %s.\n" "$FILE_TO_CHANGE" | ||
exit 1 | ||
else | ||
# Replace semver in VERSIONFILE with semver obtained from SOURCE_FILE | ||
TMPFILE=$(mktemp /tmp/new_version.XXXXXX) | ||
sed -E -r "s/$RE_SEMVER/$UPDATED_VERSION/" "$FILE_TO_CHANGE" > "$TMPFILE" | ||
if [ $CHECK == 1 ]; | ||
then | ||
DIFF=$(diff "$FILE_TO_CHANGE" "$TMPFILE" ) | ||
if [ -z "$DIFF" ]; | ||
then | ||
printf "version sync would make no changes to %s.\n" "$FILE_TO_CHANGE" | ||
rm "$TMPFILE" | ||
else | ||
FAILED_CHECK=1 | ||
printf "version sync would make the following changes to %s:\n%s\n" "$FILE_TO_CHANGE" "$DIFF" | ||
rm "$TMPFILE" | ||
fi | ||
else | ||
cp "$TMPFILE" "$FILE_TO_CHANGE" | ||
rm "$TMPFILE" | ||
fi | ||
fi | ||
done | ||
|
||
# Exit with code determined by whether changes were needed in a check. | ||
if [ ${FAILED_CHECK} -ne 0 ]; then | ||
exit 1 | ||
else | ||
exit 0 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters