blob: a652ca47c4dcfd32d385384117824f8b7aa1f7da [file] [log] [blame]
Jan Schärb48174d2025-04-14 10:13:02 +00001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
4// Package oci contains tools for handling OCI images.
5package oci
6
7import (
8 "crypto/sha256"
9 "encoding/json"
10 "fmt"
11 "io"
12 "iter"
13 "strings"
14
15 ocispecv1 "github.com/opencontainers/image-spec/specs-go/v1"
16
17 "source.monogon.dev/osbase/structfs"
18)
19
Jan Schär2963b682025-07-17 17:03:44 +020020// Index represents an OCI image index.
21type Index struct {
22 // Manifest contains the parsed index manifest.
23 Manifest *ocispecv1.Index
24 rawManifest []byte
25 digest string
26 blobs Blobs
27}
28
Jan Schärb48174d2025-04-14 10:13:02 +000029// Image represents an OCI image.
30type Image struct {
31 // Manifest contains the parsed image manifest.
Jan Schär2963b682025-07-17 17:03:44 +020032 Manifest *ocispecv1.Manifest
33 rawManifest []byte
34 digest string
35 blobs Blobs
Jan Schärb48174d2025-04-14 10:13:02 +000036}
37
Jan Schär2963b682025-07-17 17:03:44 +020038// Ref is either an [*Index] or [*Image].
39type Ref interface {
40 // RawManifest returns the bytes of the manifest.
41 // The returned value is shared and must not be modified.
42 RawManifest() []byte
43 // Digest returns the computed digest of RawManifest, in the default digest
44 // algorithm. Only sha256 is supported currently.
45 Digest() string
46 // MediaType returns the media type of the manifest.
47 MediaType() string
48 // isRef is an unexported marker to disallow implementations of the interface
49 // outside this package.
50 isRef()
51}
52
53func (i *Index) RawManifest() []byte { return i.rawManifest }
54func (i *Index) Digest() string { return i.digest }
55func (i *Index) MediaType() string { return ocispecv1.MediaTypeImageIndex }
56func (i *Index) isRef() {}
57
58func (i *Image) RawManifest() []byte { return i.rawManifest }
59func (i *Image) Digest() string { return i.digest }
60func (i *Image) MediaType() string { return ocispecv1.MediaTypeImageManifest }
61func (i *Image) isRef() {}
62
63// Blobs is the interface which image sources implement
64// to retrieve the content of blobs and manifests.
Jan Schärb48174d2025-04-14 10:13:02 +000065type Blobs interface {
66 // Blob returns the contents of a blob from its descriptor.
67 // It does not verify the contents against the digest.
Jan Schär2963b682025-07-17 17:03:44 +020068 //
69 // This is only called on images.
Jan Schärb48174d2025-04-14 10:13:02 +000070 Blob(*ocispecv1.Descriptor) (io.ReadCloser, error)
Jan Schär2963b682025-07-17 17:03:44 +020071 // Manifest returns the contents of a manifest from its descriptor.
72 // It does not verify the contents against the digest.
73 //
74 // This is only called on indexes.
75 Manifest(*ocispecv1.Descriptor) ([]byte, error)
76 // Blobs returns the [Blobs] for the manifest from its descriptor.
77 // Most implementations simply return the receiver itself, but this
78 // allows combining Refs from different sources into an Index.
79 //
80 // This is only called on indexes.
81 Blobs(*ocispecv1.Descriptor) (Blobs, error)
Jan Schärb48174d2025-04-14 10:13:02 +000082}
83
Jan Schär2963b682025-07-17 17:03:44 +020084// NewRef verifies the manifest against the expected digest if not empty,
85// then parses it according to mediaType and returns a [Ref].
86func NewRef(rawManifest []byte, mediaType string, expectedDigest string, blobs Blobs) (Ref, error) {
Jan Schärb48174d2025-04-14 10:13:02 +000087 digest := fmt.Sprintf("sha256:%x", sha256.Sum256(rawManifest))
88 if expectedDigest != "" && expectedDigest != digest {
Jan Schär2963b682025-07-17 17:03:44 +020089 if _, _, err := ParseDigest(expectedDigest); err != nil {
90 return nil, err
91 }
Jan Schärb48174d2025-04-14 10:13:02 +000092 return nil, fmt.Errorf("failed verification of manifest: expected digest %q, computed %q", expectedDigest, digest)
93 }
94
Jan Schär2963b682025-07-17 17:03:44 +020095 switch mediaType {
96 case ocispecv1.MediaTypeImageManifest:
97 manifest := &ocispecv1.Manifest{}
98 if err := json.Unmarshal(rawManifest, manifest); err != nil {
99 return nil, fmt.Errorf("failed to parse image manifest: %w", err)
Jan Schärb48174d2025-04-14 10:13:02 +0000100 }
Jan Schär2963b682025-07-17 17:03:44 +0200101 if manifest.MediaType != ocispecv1.MediaTypeImageManifest {
102 return nil, fmt.Errorf("unexpected manifest media type %q, expected %q", manifest.MediaType, ocispecv1.MediaTypeImageManifest)
103 }
104 image := &Image{
105 Manifest: manifest,
106 rawManifest: rawManifest,
107 digest: digest,
108 blobs: blobs,
109 }
110 for descriptor := range image.Descriptors() {
111 // We validate this here such that StructfsBlob does not need an error return.
112 if descriptor.Size < 0 {
113 return nil, fmt.Errorf("invalid manifest: contains descriptor with negative size")
114 }
115 }
116 return image, nil
117 case ocispecv1.MediaTypeImageIndex:
118 manifest := &ocispecv1.Index{}
119 if err := json.Unmarshal(rawManifest, manifest); err != nil {
120 return nil, fmt.Errorf("failed to parse index manifest: %w", err)
121 }
122 if manifest.MediaType != ocispecv1.MediaTypeImageIndex {
123 return nil, fmt.Errorf("unexpected manifest media type %q, expected %q", manifest.MediaType, ocispecv1.MediaTypeImageIndex)
124 }
125 index := &Index{
126 Manifest: manifest,
127 rawManifest: rawManifest,
128 digest: digest,
129 blobs: blobs,
130 }
131 return index, nil
132 default:
133 return nil, fmt.Errorf("unknown manifest media type %q", mediaType)
134 }
135}
136
137// AsImage can be conveniently wrapped around a call which returns a [Ref] or
138// error, when only [*Image] can be handled.
139func AsImage(ref Ref, err error) (*Image, error) {
140 if err != nil {
141 return nil, err
142 }
143 image, ok := ref.(*Image)
144 if !ok {
145 return nil, fmt.Errorf("unexpected manifest media type %q, only image is supported", ref.MediaType())
146 }
147 return image, nil
148}
149
150// WalkRefs iterates over all Refs reachable from ref in DFS post-order.
151// Each digest is only visited once, even if reachable multiple times.
152//
153// For each Ref, we also pass the digest by which it is referenced. This may be
154// different from ref.Digest() if we ever support multiple digest algorithms.
155func WalkRefs(digest string, ref Ref, fn func(digest string, ref Ref) error) error {
156 visited := make(map[string]bool)
157 return walkRefs(digest, ref, fn, visited)
158}
159
160func walkRefs(digest string, ref Ref, fn func(digest string, ref Ref) error, visited map[string]bool) error {
161 if visited[digest] {
162 return nil
163 }
164 visited[digest] = true
165 switch ref := ref.(type) {
166 case *Image:
167 case *Index:
168 for i := range ref.Manifest.Manifests {
169 descriptor := &ref.Manifest.Manifests[i]
170 childRef, err := ref.Ref(descriptor)
171 if err != nil {
172 return err
173 }
174 err = walkRefs(string(descriptor.Digest), childRef, fn, visited)
175 if err != nil {
176 return err
177 }
178 }
179 default:
180 return fmt.Errorf("unknown manifest media type %q", ref.MediaType())
181 }
182 return fn(digest, ref)
183}
184
185// Ref reads a manifest from its descriptor and wraps it in a [Ref].
186// The manifest is verified against the digest.
187func (i *Index) Ref(descriptor *ocispecv1.Descriptor) (Ref, error) {
188 if descriptor.Size < 0 {
189 return nil, fmt.Errorf("invalid descriptor size %d", descriptor.Size)
190 }
191 if descriptor.Size > 50*1024*1024 {
192 return nil, fmt.Errorf("refusing to read manifest of size %d into memory", descriptor.Size)
193 }
194 switch descriptor.MediaType {
195 case ocispecv1.MediaTypeImageManifest:
196 case ocispecv1.MediaTypeImageIndex:
197 default:
198 return nil, fmt.Errorf("unknown manifest media type %q", descriptor.MediaType)
199 }
200 if descriptor.Digest == "" { // NewRef treats empty digest as unknown.
201 return nil, fmt.Errorf("invalid digest")
Jan Schärb48174d2025-04-14 10:13:02 +0000202 }
203
Jan Schär2963b682025-07-17 17:03:44 +0200204 var rawManifest []byte
205 if int64(len(descriptor.Data)) == descriptor.Size {
206 rawManifest = descriptor.Data
207 } else if len(descriptor.Data) != 0 {
208 return nil, fmt.Errorf("descriptor has embedded data of wrong length")
209 } else {
210 var err error
211 rawManifest, err = i.blobs.Manifest(descriptor)
212 if err != nil {
213 return nil, err
214 }
215 }
216 if int64(len(rawManifest)) != descriptor.Size {
217 return nil, fmt.Errorf("manifest has wrong length, expected %d, got %d bytes", descriptor.Size, len(rawManifest))
218 }
219 blobs, err := i.blobs.Blobs(descriptor)
220 if err != nil {
221 return nil, err
222 }
223 return NewRef(rawManifest, descriptor.MediaType, string(descriptor.Digest), blobs)
Jan Schärb48174d2025-04-14 10:13:02 +0000224}
225
226// Descriptors returns an iterator over all descriptors in the image (config and
227// layers).
228func (i *Image) Descriptors() iter.Seq[*ocispecv1.Descriptor] {
229 return func(yield func(*ocispecv1.Descriptor) bool) {
230 if !yield(&i.Manifest.Config) {
231 return
232 }
233 for l := range i.Manifest.Layers {
234 if !yield(&i.Manifest.Layers[l]) {
235 return
236 }
237 }
238 }
239}
240
241// Blob returns the contents of a blob from its descriptor.
242// It does not verify the contents against the digest.
243func (i *Image) Blob(descriptor *ocispecv1.Descriptor) (io.ReadCloser, error) {
Jan Schär2963b682025-07-17 17:03:44 +0200244 if descriptor.Size < 0 {
245 return nil, fmt.Errorf("invalid descriptor size %d", descriptor.Size)
246 }
Jan Schärb48174d2025-04-14 10:13:02 +0000247 if int64(len(descriptor.Data)) == descriptor.Size {
248 return structfs.Bytes(descriptor.Data).Open()
249 } else if len(descriptor.Data) != 0 {
250 return nil, fmt.Errorf("descriptor has embedded data of wrong length")
251 }
252 return i.blobs.Blob(descriptor)
253}
254
255// ReadBlobVerified reads a blob into a byte slice and verifies it against the
256// digest.
257func (i *Image) ReadBlobVerified(descriptor *ocispecv1.Descriptor) ([]byte, error) {
Jan Schärb48174d2025-04-14 10:13:02 +0000258 if descriptor.Size > 50*1024*1024 {
259 return nil, fmt.Errorf("refusing to read blob of size %d into memory", descriptor.Size)
260 }
261 expectedDigest := string(descriptor.Digest)
262 if _, _, err := ParseDigest(expectedDigest); err != nil {
263 return nil, err
264 }
265 blob, err := i.Blob(descriptor)
266 if err != nil {
267 return nil, err
268 }
269 defer blob.Close()
270 content := make([]byte, descriptor.Size)
271 _, err = io.ReadFull(blob, content)
272 if err != nil {
273 return nil, err
274 }
275 digest := fmt.Sprintf("sha256:%x", sha256.Sum256(content))
276 if expectedDigest != digest {
277 return nil, fmt.Errorf("failed verification of blob: expected digest %q, computed %q", expectedDigest, digest)
278 }
279 return content, nil
280}
281
282// StructfsBlob wraps an image and descriptor into a [structfs.Blob].
283func (i *Image) StructfsBlob(descriptor *ocispecv1.Descriptor) structfs.Blob {
284 return &structfsBlob{
285 image: i,
286 descriptor: descriptor,
287 }
288}
289
290type structfsBlob struct {
291 image *Image
292 descriptor *ocispecv1.Descriptor
293}
294
295func (b *structfsBlob) Open() (io.ReadCloser, error) {
296 return b.image.Blob(b.descriptor)
297}
298
299func (b *structfsBlob) Size() int64 {
300 return b.descriptor.Size
301}
302
303// ParseDigest splits a digest into its components. It returns an error if the
304// algorithm is not supported, or if encoded is not valid for the algorithm.
305func ParseDigest(digest string) (algorithm string, encoded string, err error) {
306 algorithm, encoded, ok := strings.Cut(digest, ":")
307 if !ok {
308 return "", "", fmt.Errorf("invalid digest")
309 }
310 switch algorithm {
311 case "sha256":
312 rest := strings.TrimLeft(encoded, "0123456789abcdef")
313 if len(rest) != 0 {
314 return "", "", fmt.Errorf("invalid character in sha256 digest")
315 }
316 if len(encoded) != sha256.Size*2 {
317 return "", "", fmt.Errorf("invalid sha256 digest length")
318 }
319 default:
320 return "", "", fmt.Errorf("unknown digest algorithm %q", algorithm)
321 }
322 return
323}