blob: 4f2b6d2f83cb80556d44bdc211b0a6ed63b14883 [file] [log] [blame]
Jan Schärcc9e4d12025-04-14 10:28:40 +00001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
4// Package registry contains a client and server implementation of the OCI
5// Distribution spec. Both client and server only support pulling. The server is
6// intended for use in tests.
7package registry
8
9import (
10 "context"
11 "encoding/json"
12 "errors"
13 "fmt"
14 "io"
15 "net/http"
16 "net/url"
17 "regexp"
18 "strconv"
19 "strings"
20 "sync"
21 "sync/atomic"
22 "time"
23
24 "github.com/cenkalti/backoff/v4"
25 ocispecv1 "github.com/opencontainers/image-spec/specs-go/v1"
26
27 "source.monogon.dev/osbase/oci"
28)
29
30// Sources for these expressions:
31//
32// - https://github.com/opencontainers/distribution-spec/blob/main/spec.md#pulling-manifests
33// - https://github.com/opencontainers/image-spec/blob/main/descriptor.md#digests
34const (
35 repositoryExpr = `[a-z0-9]+(?:(?:\.|_|__|-+)[a-z0-9]+)*(?:\/[a-z0-9]+(?:(?:\.|_|__|-+)[a-z0-9]+)*)*`
36 tagExpr = `[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}`
37 digestExpr = `[a-z0-9]+(?:[+._-][a-z0-9]+)*:[a-zA-Z0-9=_-]+`
38)
39
40var (
Jan Schär62cecde2025-04-16 15:24:04 +000041 RepositoryRegexp = regexp.MustCompile(`^` + repositoryExpr + `$`)
42 TagRegexp = regexp.MustCompile(`^` + tagExpr + `$`)
43 DigestRegexp = regexp.MustCompile(`^` + digestExpr + `$`)
Jan Schärcc9e4d12025-04-14 10:28:40 +000044)
45
Jan Schär2963b682025-07-17 17:03:44 +020046// unknownManifest can be used to parse the media type from a manifest of
47// unknown type.
48type unknownManifest struct {
49 MediaType string `json:"mediaType,omitempty"`
50}
51
Jan Schärcc9e4d12025-04-14 10:28:40 +000052// Client is an OCI registry client.
53type Client struct {
54 // Transport will be used to make requests. For example, this allows
55 // configuring TLS client and CA certificates.
56 // If nil, [http.DefaultTransport] is used.
57 Transport http.RoundTripper
58 // GetBackOff can be set to to make the Client retry HTTP requests.
59 GetBackOff func() backoff.BackOff
60 // RetryNotify receives errors that trigger a retry, e.g. for logging.
61 RetryNotify backoff.Notify
62 // UserAgent is used as the User-Agent HTTP header.
63 UserAgent string
64
65 // Scheme must be either http or https.
66 Scheme string
67 // Host is the host with optional port.
68 Host string
69 // Repository is the name of the repository. It is part of the client because
70 // bearer tokens are usually scoped to a repository.
71 Repository string
72
73 authMu sync.RWMutex
74 // bearerToken is a cached token obtained from an authorization service.
75 bearerToken string
76}
77
Jan Schär2963b682025-07-17 17:03:44 +020078// Read fetches a manifest from the registry and returns an [oci.Ref].
Jan Schärcc9e4d12025-04-14 10:28:40 +000079//
Jan Schär2963b682025-07-17 17:03:44 +020080// The context is used for the manifest request and for all blob and manifest
81// requests made through the Ref.
Jan Schärcc9e4d12025-04-14 10:28:40 +000082//
83// At least one of tag and digest must be set. If only tag is set, then you are
84// trusting the registry to return the right content. Otherwise, the digest is
85// used to verify the manifest. If both tag and digest are set, then the tag is
86// used in the request, and the digest is used to verify the response. The
87// advantage of fetching by tag is that it allows a pull through cache to
88// display tags to a user inspecting the cache contents.
Jan Schär2963b682025-07-17 17:03:44 +020089func (c *Client) Read(ctx context.Context, tag, digest string) (oci.Ref, error) {
Jan Schär62cecde2025-04-16 15:24:04 +000090 if !RepositoryRegexp.MatchString(c.Repository) {
Jan Schärcc9e4d12025-04-14 10:28:40 +000091 return nil, fmt.Errorf("invalid repository %q", c.Repository)
92 }
Jan Schär62cecde2025-04-16 15:24:04 +000093 if tag != "" && !TagRegexp.MatchString(tag) {
Jan Schärcc9e4d12025-04-14 10:28:40 +000094 return nil, fmt.Errorf("invalid tag %q", tag)
95 }
96 if digest != "" {
97 if _, _, err := oci.ParseDigest(digest); err != nil {
98 return nil, err
99 }
100 }
101 var reference string
102 if tag != "" {
103 reference = tag
104 } else if digest != "" {
105 reference = digest
106 } else {
107 return nil, fmt.Errorf("tag and digest cannot both be empty")
108 }
109
110 manifestPath := fmt.Sprintf("/v2/%s/manifests/%s", c.Repository, reference)
Jan Schär2963b682025-07-17 17:03:44 +0200111 var manifestBytes []byte
112 var manifestMediaType string
Jan Schärcc9e4d12025-04-14 10:28:40 +0000113 err := c.retry(ctx, func() error {
114 req, err := c.newGet(manifestPath)
115 if err != nil {
116 return err
117 }
Jan Schär2963b682025-07-17 17:03:44 +0200118 req.Header.Set("Accept", ocispecv1.MediaTypeImageManifest+","+ocispecv1.MediaTypeImageIndex)
Jan Schärcc9e4d12025-04-14 10:28:40 +0000119 resp, err := c.doGet(ctx, req)
120 if err != nil {
121 return err
122 }
123 if resp.StatusCode != http.StatusOK {
124 return readClientError(resp, req)
125 }
126 defer resp.Body.Close()
Jan Schär2963b682025-07-17 17:03:44 +0200127 manifestMediaType = resp.Header.Get("Content-Type")
128 manifestBytes, err = readFullBody(resp, 50*1024*1024)
Jan Schärcc9e4d12025-04-14 10:28:40 +0000129 return err
130 })
131 if err != nil {
132 return nil, err
133 }
134
Jan Schär2963b682025-07-17 17:03:44 +0200135 // Remove any parameters from the Content-Type header.
136 manifestMediaType, _, _ = strings.Cut(manifestMediaType, ";")
137 switch manifestMediaType {
138 case ocispecv1.MediaTypeImageManifest, ocispecv1.MediaTypeImageIndex:
139 // The Content-Type header is valid, use it.
140 default:
141 // We need to parse the manifest to extract the media type, then parse it
142 // again for that media type.
143 var manifest unknownManifest
144 if err := json.Unmarshal(manifestBytes, &manifest); err != nil {
145 return nil, fmt.Errorf("failed to parse manifest: %w", err)
146 }
147 manifestMediaType = manifest.MediaType
148 }
149
Jan Schärcc9e4d12025-04-14 10:28:40 +0000150 blobs := &clientBlobs{
151 ctx: ctx,
152 client: c,
153 }
Jan Schär2963b682025-07-17 17:03:44 +0200154 return oci.NewRef(manifestBytes, manifestMediaType, digest, blobs)
Jan Schärcc9e4d12025-04-14 10:28:40 +0000155}
156
157type clientBlobs struct {
158 ctx context.Context
159 client *Client
160}
161
Jan Schär2963b682025-07-17 17:03:44 +0200162func (r *clientBlobs) Manifest(descriptor *ocispecv1.Descriptor) ([]byte, error) {
163 digest := string(descriptor.Digest)
164 if _, _, err := oci.ParseDigest(digest); err != nil {
165 return nil, err
166 }
167
168 manifestPath := fmt.Sprintf("/v2/%s/manifests/%s", r.client.Repository, digest)
169 var manifestBytes []byte
170 err := r.client.retry(r.ctx, func() error {
171 req, err := r.client.newGet(manifestPath)
172 if err != nil {
173 return err
174 }
175 req.Header.Set("Accept", ocispecv1.MediaTypeImageManifest+","+ocispecv1.MediaTypeImageIndex)
176 resp, err := r.client.doGet(r.ctx, req)
177 if err != nil {
178 return err
179 }
180 if resp.StatusCode != http.StatusOK {
181 return readClientError(resp, req)
182 }
183 defer resp.Body.Close()
184 manifestBytes, err = readKnownSizeBody(resp, int(descriptor.Size))
185 return err
186 })
187 if err != nil {
188 return nil, err
189 }
190 return manifestBytes, nil
191}
192
193func (r *clientBlobs) Blobs(_ *ocispecv1.Descriptor) (oci.Blobs, error) {
194 return r, nil
195}
196
Jan Schärcc9e4d12025-04-14 10:28:40 +0000197func (r *clientBlobs) Blob(descriptor *ocispecv1.Descriptor) (io.ReadCloser, error) {
Jan Schär62cecde2025-04-16 15:24:04 +0000198 if !DigestRegexp.MatchString(string(descriptor.Digest)) {
Jan Schärcc9e4d12025-04-14 10:28:40 +0000199 return nil, fmt.Errorf("invalid blob digest %q", descriptor.Digest)
200 }
201 blobPath := fmt.Sprintf("/v2/%s/blobs/%s", r.client.Repository, descriptor.Digest)
202 var resp *http.Response
203 err := r.client.retry(r.ctx, func() error {
204 req, err := r.client.newGet(blobPath)
205 if err != nil {
206 return err
207 }
208 resp, err = r.client.doGet(r.ctx, req)
209 if err != nil {
210 return err
211 }
212 if resp.StatusCode != http.StatusOK {
213 return readClientError(resp, req)
214 }
215 return nil
216 })
217 if err != nil {
218 return nil, err
219 }
220 if r.client.GetBackOff == nil {
221 return resp.Body, nil
222 }
223 ctx, cancel := context.WithCancelCause(r.ctx)
224 reader := &retryReader{
225 ctx: ctx,
226 cancel: cancel,
227 client: r.client,
228 path: blobPath,
229 pos: 0,
230 size: descriptor.Size,
231 }
232 reader.resp.Store(resp)
233 return reader, nil
234}
235
236type retryReader struct {
237 ctx context.Context
238 cancel context.CancelCauseFunc
239 client *Client
240 path string
241 pos int64
242 size int64
243 // resp is an atomic pointer because it may be concurrently written by Read()
244 // and read by Close().
245 resp atomic.Pointer[http.Response]
246}
247
248func (r *retryReader) Read(p []byte) (n int, err error) {
249 if r.pos >= r.size {
250 return 0, io.EOF
251 }
252 if len(p) == 0 {
253 return 0, nil
254 }
255 if int64(len(p)) > r.size-r.pos {
256 p = p[:r.size-r.pos]
257 }
258 closed := false
259 err = r.client.retry(r.ctx, func() error {
260 if closed {
261 req, err := r.client.newGet(r.path)
262 if err != nil {
263 return err
264 }
265 if r.pos != 0 {
266 req.Header.Set("Range", fmt.Sprintf("bytes=%d-", r.pos))
267 }
268 resp, err := r.client.doGet(r.ctx, req)
269 if err != nil {
270 return err
271 }
272 r.resp.Store(resp)
273 if err := context.Cause(r.ctx); err != nil {
274 resp.Body.Close()
275 return err
276 }
277 switch resp.StatusCode {
278 case http.StatusOK:
279 _, err := io.CopyN(io.Discard, resp.Body, r.pos)
280 if err != nil {
281 return err
282 }
283 case http.StatusPartialContent:
284 if !strings.HasPrefix(resp.Header.Get("Content-Range"), fmt.Sprintf("bytes %d-", r.pos)) {
285 return backoff.Permanent(errors.New("invalid content range"))
286 }
287 default:
288 return readClientError(resp, req)
289 }
290 }
291 var err error
292 n, err = r.resp.Load().Body.Read(p)
293 if n != 0 {
294 r.pos += int64(n)
295 return nil
296 }
297 if err == nil {
298 err = errors.New("read 0 bytes")
299 }
300 closed = true
301 r.resp.Load().Body.Close()
302 return err
303 })
304 if r.pos >= r.size {
305 err = io.EOF
306 } else if err == io.EOF {
307 err = io.ErrUnexpectedEOF
308 }
309 return
310}
311
312func (r *retryReader) Close() error {
313 r.cancel(errors.New("reader closed"))
314 return r.resp.Load().Body.Close()
315}
316
317func (c *Client) retry(ctx context.Context, o func() error) error {
318 if err := ctx.Err(); err != nil {
319 return err
320 }
321 var b backoff.BackOff
322 for {
323 err := o()
324 if err == nil {
325 return nil
326 }
327 var permanent *backoff.PermanentError
328 if errors.As(err, &permanent) {
329 return err
330 }
331 if ctx.Err() != nil {
332 return err
333 }
334 if b == nil {
335 if c.GetBackOff == nil {
336 return err
337 }
338 b = c.GetBackOff()
339 }
340 next := b.NextBackOff()
341 if next == backoff.Stop {
342 return err
343 }
344 var clientErr *ClientError
345 if errors.As(err, &clientErr) && !clientErr.RetryAfter.IsZero() {
346 next = max(next, time.Until(clientErr.RetryAfter))
347 }
348 deadline, hasDeadline := ctx.Deadline()
349 if hasDeadline && time.Until(deadline) < next {
350 return err
351 }
352
353 if c.RetryNotify != nil {
354 c.RetryNotify(err, next)
355 }
356 select {
357 case <-ctx.Done():
358 return ctx.Err()
359 case <-time.After(next):
360 }
361 }
362}
363
364func (c *Client) newGet(path string) (*http.Request, error) {
365 u := url.URL{
366 Scheme: c.Scheme,
367 Host: c.Host,
368 Path: path,
369 }
370 req, err := http.NewRequest("GET", u.String(), nil)
371 if err != nil {
372 return nil, err
373 }
374 if c.UserAgent != "" {
375 req.Header.Set("User-Agent", c.UserAgent)
376 }
377 return req, nil
378}
379
380func (c *Client) doGet(ctx context.Context, req *http.Request) (*http.Response, error) {
381 req = req.WithContext(ctx)
382 c.addAuthorization(req)
383 client := http.Client{Transport: c.Transport}
384 resp, err := client.Do(req)
385 if err != nil {
386 return nil, redactURLError(err)
387 }
388
389 if resp.StatusCode == http.StatusUnauthorized {
390 unauthorizedErr := readClientError(resp, req)
391 retry, err := c.handleUnauthorized(ctx, resp)
392 if err != nil {
393 return nil, err
394 }
395 if !retry {
396 return nil, unauthorizedErr
397 }
398 c.addAuthorization(req)
399 resp, err = client.Do(req)
400 if err != nil {
401 return nil, redactURLError(err)
402 }
403 }
404
405 return resp, nil
406}
407
408func readClientError(resp *http.Response, req *http.Request) error {
409 defer resp.Body.Close()
410 clientErr := &ClientError{
411 StatusCode: resp.StatusCode,
412 }
413 retryAfter := resp.Header.Get("Retry-After")
414 if retryAfter != "" {
415 seconds, err := strconv.ParseInt(retryAfter, 10, 64)
416 if err == nil {
417 clientErr.RetryAfter = time.Now().Add(time.Duration(seconds) * time.Second)
418 } else {
419 clientErr.RetryAfter, _ = http.ParseTime(retryAfter)
420 }
421 }
422 content, err := readFullBody(resp, 2048)
423 if err == nil {
424 clientErr.RawBody = content
425 _ = json.Unmarshal(content, &clientErr.ErrorBody)
426 }
427
428 errReq := resp.Request
429 if errReq == nil {
430 errReq = req
431 }
432 urlErr := &url.Error{
433 Op: errReq.Method,
434 URL: errReq.URL.Redacted(),
435 Err: clientErr,
436 }
437 err = redactURLError(urlErr)
438
439 // Client errors are usually permanent, and server errors are usually
440 // temporary, but there are some exceptions.
441 isTemporary := 500 <= clientErr.StatusCode && clientErr.StatusCode <= 599
442 switch clientErr.StatusCode {
443 case http.StatusRequestTimeout, http.StatusTooEarly,
444 http.StatusTooManyRequests,
445 499: // nginx-specific, client closed request
446 isTemporary = true
447 case http.StatusNotImplemented, http.StatusHTTPVersionNotSupported,
448 http.StatusNetworkAuthenticationRequired:
449 isTemporary = false
450 }
451 if !isTemporary {
452 return backoff.Permanent(err)
453 }
454 return err
455}
456
457// ClientError is an HTTP error received from a registry or authorization
458// service.
459type ClientError struct {
460 ErrorBody
461 StatusCode int
462 RetryAfter time.Time
463 RawBody []byte
464}
465
466type ErrorBody struct {
467 Errors []ErrorInfo `json:"errors,omitempty"`
468}
469
470type ErrorInfo struct {
471 Code string `json:"code"`
472 Message string `json:"message,omitempty"`
473}
474
475func (e *ClientError) Error() string {
476 if len(e.Errors) == 0 {
477 text := fmt.Sprintf("HTTP %d %s", e.StatusCode, http.StatusText(e.StatusCode))
478 if len(e.RawBody) != 0 {
479 text = fmt.Sprintf("%s: %q", text, e.RawBody)
480 }
481 return text
482 }
483 var errorStrs []string
484 for _, ei := range e.Errors {
485 errorStrs = append(errorStrs, fmt.Sprintf("%s: %s", ei.Code, ei.Message))
486 }
487 return fmt.Sprintf("HTTP %d %s", e.StatusCode, strings.Join(errorStrs, "; "))
488}
489
490// redactURLError redacts the URL in an [url.Error]. After redirects, the URL
491// may contain secrets in query parameter values.
492//
493// Logic adapted from:
494// https://github.com/google/go-containerregistry/blob/v0.20.3/internal/redact/redact.go
495func redactURLError(err error) error {
496 var urlErr *url.Error
497 if !errors.As(err, &urlErr) {
498 return err
499 }
500 u, perr := url.Parse(urlErr.URL)
501 if perr != nil {
502 return err
503 }
504 query := u.Query()
505 for name, vals := range query {
506 if name == "scope" || name == "service" {
507 continue
508 }
509 for i := range vals {
510 vals[i] = "REDACTED"
511 }
512 }
513 u.RawQuery = query.Encode()
514 urlErr.URL = u.Redacted()
515 return err
516}
517
518func readFullBody(resp *http.Response, limit int) ([]byte, error) {
519 switch {
520 case resp.ContentLength < 0:
521 lr := io.LimitReader(resp.Body, int64(limit)+1)
522 content, err := io.ReadAll(lr)
523 if err != nil {
524 return nil, err
525 }
526 if len(content) > limit {
527 return nil, backoff.Permanent(fmt.Errorf("HTTP response exceeds limit of %d bytes", limit))
528 }
529 return content, nil
530 case resp.ContentLength <= int64(limit):
531 content := make([]byte, resp.ContentLength)
532 _, err := io.ReadFull(resp.Body, content)
533 if err != nil {
534 return nil, err
535 }
536 return content, nil
537 default:
538 return nil, backoff.Permanent(fmt.Errorf("HTTP response of size %d exceeds limit of %d bytes", resp.ContentLength, limit))
539 }
540}
Jan Schär2963b682025-07-17 17:03:44 +0200541
542func readKnownSizeBody(resp *http.Response, size int) ([]byte, error) {
543 if resp.ContentLength >= 0 && resp.ContentLength != int64(size) {
544 return nil, backoff.Permanent(fmt.Errorf("HTTP response has size %d, expected %d bytes", resp.ContentLength, size))
545 }
546 content := make([]byte, size)
547 _, err := io.ReadFull(resp.Body, content)
548 if err != nil {
549 return nil, err
550 }
551 return content, nil
552}