forked from rancher/cert-manager
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathverify-links.sh
executable file
·109 lines (91 loc) · 3.15 KB
/
verify-links.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/bin/bash
# +skip_license_check
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script will scan all md (markdown) files for bad references.
# It will look for strings of the form [...](...) and make sure that
# the (...) points to either a valid file in the source tree or, in the
# case of it being an http url, it'll make sure we don't get a 404.
#
# Usage: verify-links.sh [ dir | file ... ]
# default arg is root of our source tree
set -o errexit
set -o nounset
set -o pipefail
REPO_ROOT=$(dirname "${BASH_SOURCE}")/..
if [ "$*" != "" ]; then
args="$*"
else
args="${REPO_ROOT}"
fi
mdFiles=$(find "${args}" -name "*.md" | grep -v vendor | grep -v glide)
tmp=$(mktemp)
for file in ${mdFiles}; do
# echo scanning $file
dir=$(dirname $file)
# Replace ) with )\n so that each possible href is on its own line.
# Then only grab lines that have [..](..) in them - put results in tmp file.
# If the file doesn't have any lines with [..](..) then skip this file
sed "s/)/)\n/g" < $file | grep "\[.*\](.*)" > ${tmp}1 || continue
# This sed will extract the href portion of the [..](..) - meaning
# the stuff in the parens.
sed "s/.*\(\[[^\[\]*\]([^()]*)\)/\1/" < ${tmp}1 > ${tmp}2 || continue
# Extract all headings/anchors.
# And strip off the leading #'s and leading/trailing blanks
grep "^ *#" < $file | sed "s/ *#* *\(.*\) *$/\1/" > ${tmp}anchors
# Now convert the header to what the anchor will look like.
# - lower case stuff
# - convert spaces to -'s
# - remove punctuation marks (only accept 0-9, a-z
cat ${tmp}anchors | \
tr '[:upper:]' '[:lower:]' | \
sed "s/ /-/g" | \
sed "s/[^-a-zA-Z0-9]//g" > ${tmp}anchors1
cat ${tmp}2 | while read line ; do
# Strip off the leading and trailing parens
ref=${line#*(}
ref=${ref%)*}
# An external href (ie. starts with http)
if [ "${ref:0:4}" == "http" ]; then
if ! wget --timeout 10 -o /dev/null ${ref} > /dev/null 2>&1 ; then
echo $file: Can\'t load: url ${ref} | tee -a ${tmp}3
fi
continue
fi
# Local file href - skip for now.
# TODO add support for checking these
if [ "${ref:0:1}" == "#" ]; then
ref=${ref:1}
if ! grep "^$ref$" ${tmp}anchors1 > /dev/null 2>&1 ; then
echo $file: Can\'t find anchor \'\#${ref}\' | tee -a ${tmp}3
fi
continue
fi
# Remove everything after # (aka section of page)
ref=${ref%#*}
newPath=${dir}/${ref}
# And finally make sure the file is there
# debug line: echo ref: $ref "->" $newPath
if ! ls "${newPath}" > /dev/null 2>&1 ; then
echo $file: Can\'t find: ${newPath} | tee -a ${tmp}3
failed=true
fi
done
done
rc=0
if [ -a ${tmp}3 ]; then
rc=1
fi
rm -f ${tmp}*
exit $rc