-
Notifications
You must be signed in to change notification settings - Fork 160
/
Copy pathsource.go
181 lines (161 loc) · 5.72 KB
/
source.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package sourcecache provides a cache of code found in Git repositories.
// It takes directly to the Gerrit instance at go.googlesource.com.
// If RegisterGitMirrorDial is called, it will first try to get code from gitmirror before falling back on Gerrit.
package sourcecache
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"log"
"net"
"net/http"
"time"
"golang.org/x/build/internal/lru"
"golang.org/x/build/internal/singleflight"
"golang.org/x/build/internal/spanlog"
)
var processStartTime = time.Now()
var sourceGroup singleflight.Group
var sourceCache = lru.New(40) // repo-rev -> source
// source is the cache entry type for sourceCache.
type source struct {
Tgz []byte // Source tarball bytes.
TooBig bool
}
// GetSourceTgz returns a Reader that provides a tgz of the requested source revision.
// repo is go.googlesource.com repo ("go", "net", and so on).
// rev is git revision.
//
// An error of type TooBigError is returned if the compressed tarball exceeds a size that
// on 2021-11-22 was deemed to be enough to meet expected legitimate future needs for a while.
// See golang.org/issue/46379.
func GetSourceTgz(sl spanlog.Logger, repo, rev string) (tgz io.Reader, err error) {
sp := sl.CreateSpan("get_source", repo+"@"+rev)
defer func() { sp.Done(err) }()
key := fmt.Sprintf("%v-%v", repo, rev)
v, err, _ := sourceGroup.Do(key, func() (interface{}, error) {
if src, ok := sourceCache.Get(key); ok {
return src, nil
}
if gitMirrorClient != nil {
sp := sl.CreateSpan("get_source_from_gitmirror")
src, err := getSourceTgzFromGitMirror(repo, rev)
if err == nil {
sourceCache.Add(key, src)
sp.Done(nil)
return src, nil
}
log.Printf("Error fetching source %s/%s from gitmirror (after %v uptime): %v",
repo, rev, time.Since(processStartTime), err)
sp.Done(errors.New("timeout"))
}
sp := sl.CreateSpan("get_source_from_gerrit", fmt.Sprintf("%v from gerrit", key))
src, err := getSourceTgzFromGerrit(repo, rev)
sp.Done(err)
if err == nil {
sourceCache.Add(key, src)
}
return src, err
})
if err != nil {
return nil, err
}
if v.(source).TooBig {
return nil, TooBigError{Repo: repo, Rev: rev, Limit: maxSize(repo)}
}
return bytes.NewReader(v.(source).Tgz), nil
}
// TooBigError is the error returned when the source revision is considered too big.
type TooBigError struct {
Repo string
Rev string
Limit int64 // Max size in bytes.
}
func (e TooBigError) Error() string {
return fmt.Sprintf("rejected because compressed tarball of repository go.googlesource.com/%s at revision %s exceeded a limit of %d MB; see golang.org/issue/46379",
e.Repo, e.Rev, e.Limit/1024/1024)
}
var gitMirrorClient *http.Client
// RegisterGitMirrorDial registers a dial function which will be used to reach gitmirror.
// If used, this function must be called before GetSourceTgz.
func RegisterGitMirrorDial(dial func(context.Context) (net.Conn, error)) {
gitMirrorClient = &http.Client{
Timeout: 30 * time.Second,
Transport: &http.Transport{
IdleConnTimeout: 30 * time.Second,
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
return dial(ctx)
},
},
}
}
var gerritHTTPClient = &http.Client{
Timeout: 30 * time.Second,
}
func getSourceTgzFromGerrit(repo, rev string) (source, error) {
return getSourceTgzFromURL(gerritHTTPClient, "gerrit", repo, rev, "https://go.googlesource.com/"+repo+"/+archive/"+rev+".tar.gz")
}
func getSourceTgzFromGitMirror(repo, rev string) (src source, err error) {
for i := 0; i < 2; i++ { // two tries; different pods maybe?
if i > 0 {
time.Sleep(1 * time.Second)
}
// The "gitmirror" hostname is unused:
src, err = getSourceTgzFromURL(gitMirrorClient, "gitmirror", repo, rev, "http://gitmirror/"+repo+".tar.gz?rev="+rev)
if err == nil {
return src, nil
}
if tr, ok := http.DefaultTransport.(*http.Transport); ok {
tr.CloseIdleConnections()
}
}
return source{}, err
}
// getSourceTgzFromURL fetches a source tarball from url.
// If url serves more than maxSize bytes, it stops short.
func getSourceTgzFromURL(hc *http.Client, service, repo, rev, url string) (source, error) {
res, err := hc.Get(url)
if err != nil {
return source{}, fmt.Errorf("fetching %s/%s from %s: %v", repo, rev, service, err)
}
defer res.Body.Close()
if res.StatusCode/100 != 2 {
slurp, _ := io.ReadAll(io.LimitReader(res.Body, 4<<10))
return source{}, fmt.Errorf("fetching %s/%s from %s: %v; body: %s", repo, rev, service, res.Status, slurp)
}
// See golang.org/issue/11224 for a discussion on tree filtering.
b, err := io.ReadAll(io.LimitReader(res.Body, maxSize(repo)+1))
if int64(len(b)) > maxSize(repo) && err == nil {
return source{TooBig: true}, nil
}
if err != nil {
return source{}, fmt.Errorf("reading %s/%s from %s: %v", repo, rev, service, err)
}
return source{Tgz: b}, nil
}
// maxSize controls artificial limits on how big of a compressed source tarball
// this package is willing to accept. It's expected humans may need to manage
// these limits every couple of years for the evolving needs of the Go project,
// and ideally not much more often.
//
// repo is a go.googlesource.com repo ("go", "net", and so on).
func maxSize(repo string) int64 {
switch repo {
default:
// As of 2021-11-22, a compressed tarball of Go source is 23 MB,
// x/net is 1.2 MB,
// x/build is 1.1 MB,
// x/tools is 2.9 MB.
return 100 << 20
case "website":
// In 2021, all content in x/blog (52 MB) and x/talks (74 MB) moved
// to x/website. This makes x/website an outlier, with a compressed
// tarball size of 135 MB. Give it some room to grow from there.
return 200 << 20
}
}