blob: eee0b57e192444cda71a87879b470f2a59e2eaf5 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Mateusz Zalega6a058e72022-11-30 18:03:07 +01004// Package wrapngo wraps packngo methods providing the following usability
5// enhancements:
6// - API call rate limiting
7// - resource-aware call retries
8// - use of a configurable back-off algorithm implementation
9// - context awareness
10//
11// The implementation is provided with the following caveats:
12//
13// There can be only one call in flight. Concurrent calls to API-related
14// methods of the same client will block. Calls returning packngo structs will
15// return nil data when a non-nil error value is returned. An
16// os.ErrDeadlineExceeded will be returned after the underlying API calls time
17// out beyond the chosen back-off algorithm implementation's maximum allowed
18// retry interval. Other errors, excluding context.Canceled and
19// context.DeadlineExceeded, indicate either an error originating at Equinix'
20// API endpoint (which may still stem from invalid call inputs), or a network
21// error.
22//
23// Packngo wrappers included below may return timeout errors even after the
24// wrapped calls succeed in the event server reply could not have been
25// received.
26//
27// This implies that effects of mutating calls can't always be verified
28// atomically, requiring explicit synchronization between API users, regardless
29// of the retry/recovery logic used.
30//
31// Having that in mind, some call wrappers exposed by this package will attempt
32// to recover from this kind of situations by requesting information on any
33// resources created, and retrying the call if needed. This approach assumes
34// any concurrent mutating API users will be synchronized, as it should be in
35// any case.
36//
37// Another way of handling this problem would be to leave it up to the user to
38// retry calls if needed, though this would leak Equinix Metal API, and
39// complicate implementations depending on this package. Due to that, the prior
40// approach was chosen.
41package wrapngo
42
43import (
44 "context"
45 "errors"
46 "flag"
47 "fmt"
48 "net/http"
Serge Bazanskidea7cd02023-04-26 13:58:17 +020049 "sync/atomic"
Mateusz Zalega6a058e72022-11-30 18:03:07 +010050 "time"
51
52 "github.com/cenkalti/backoff/v4"
53 "github.com/google/uuid"
54 "github.com/packethost/packngo"
Serge Bazanskidea7cd02023-04-26 13:58:17 +020055 "github.com/prometheus/client_golang/prometheus"
Mateusz Zalega6a058e72022-11-30 18:03:07 +010056)
57
58// Opts conveys configurable Client parameters.
59type Opts struct {
60 // User and APIKey are the credentials used to authenticate with
61 // Metal API.
62
63 User string
64 APIKey string
65
66 // Optional parameters:
67
68 // BackOff controls the client's behavior in the event of API calls failing
69 // due to IO timeouts by adjusting the lower bound on time taken between
70 // subsequent calls.
71 BackOff func() backoff.BackOff
72
73 // APIRate is the minimum time taken between subsequent API calls.
74 APIRate time.Duration
Serge Bazanski7448f282023-02-20 14:15:51 +010075
76 // Parallelism defines how many calls to the Equinix API will be issued in
77 // parallel. When this limit is reached, subsequent attmepts to call the API will
78 // block. The order of serving of pending calls is currently undefined.
79 //
80 // If not defined (ie. 0), defaults to 1.
81 Parallelism int
Serge Bazanskidea7cd02023-04-26 13:58:17 +020082
83 MetricsRegistry *prometheus.Registry
Mateusz Zalega6a058e72022-11-30 18:03:07 +010084}
85
86func (o *Opts) RegisterFlags() {
87 flag.StringVar(&o.User, "equinix_api_username", "", "Username for Equinix API")
88 flag.StringVar(&o.APIKey, "equinix_api_key", "", "Key/token/password for Equinix API")
Serge Bazanskiafd3cf82023-04-19 17:43:46 +020089 flag.IntVar(&o.Parallelism, "equinix_parallelism", 3, "How many parallel connections to the Equinix API will be allowed")
Mateusz Zalega6a058e72022-11-30 18:03:07 +010090}
91
92// Client is a limited interface of methods that the Shepherd uses on Equinix. It
93// is provided to allow for dependency injection of a fake equinix API for tests.
94type Client interface {
95 // GetDevice wraps packngo's cl.Devices.Get.
Serge Bazanski4969fd72023-04-19 17:43:12 +020096 //
97 // TODO(q3k): remove unused pid parameter.
98 GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error)
Mateusz Zalega6a058e72022-11-30 18:03:07 +010099 // ListDevices wraps packngo's cl.Device.List.
100 ListDevices(ctx context.Context, pid string) ([]packngo.Device, error)
101 // CreateDevice attempts to create a new device according to the provided
102 // request. The request _must_ configure a HardwareReservationID. This call
103 // attempts to be as idempotent as possible, and will return ErrRaceLost if a
104 // retry was needed but in the meantime the requested hardware reservation from
105 // which this machine was requested got lost.
106 CreateDevice(ctx context.Context, request *packngo.DeviceCreateRequest) (*packngo.Device, error)
Tim Windelschmidt72a903f2023-06-27 15:49:36 +0200107
108 UpdateDevice(ctx context.Context, id string, request *packngo.DeviceUpdateRequest) (*packngo.Device, error)
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200109 RebootDevice(ctx context.Context, did string) error
110 DeleteDevice(ctx context.Context, id string) error
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100111
112 // ListReservations returns a complete list of hardware reservations associated
113 // with project pid. This is an expensive method that takes a while to execute,
114 // handle with care.
115 ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error)
Tim Windelschmidtde12c7e2024-04-25 18:00:40 +0200116
117 ListOrganizationReservations(ctx context.Context, oid string) ([]packngo.HardwareReservation, error)
118
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200119 // MoveReservation moves a reserved device to the given project.
120 MoveReservation(ctx context.Context, hardwareReservationDID, projectID string) (*packngo.HardwareReservation, error)
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100121
122 // ListSSHKeys wraps packngo's cl.Keys.List.
123 ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error)
124 // CreateSSHKey is idempotent - the key label can be used only once. Further
125 // calls referring to the same label and key will not yield errors. See the
126 // package comment for more info on this method's behavior and returned error
127 // values.
128 CreateSSHKey(ctx context.Context, req *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error)
129 // UpdateSSHKey is idempotent - values included in r can be applied only once,
130 // while subsequent updates using the same data don't produce errors. See the
131 // package comment for information on this method's behavior and returned error
132 // values.
133 UpdateSSHKey(ctx context.Context, kid string, req *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error)
134
135 Close()
136}
137
138// client implements the Client interface.
139type client struct {
140 username string
141 token string
142 o *Opts
143 rlt *time.Ticker
144
Serge Bazanskidea7cd02023-04-26 13:58:17 +0200145 serializer *serializer
146 metrics *metricsSet
147}
148
149// serializer is an N-semaphore channel (configured by opts.Parallelism) which is
150// used to limit the number of concurrent calls to the Equinix API.
151//
152// In addition, it implements some simple waiting/usage statistics for
153// metrics/introspection.
154type serializer struct {
155 sem chan struct{}
156 usage int64
157 waiting int64
158}
159
160// up blocks until the serializer has at least one available concurrent call
161// slot. If the given context expires before such a slot is available, the
162// context error is returned.
163func (s *serializer) up(ctx context.Context) error {
164 atomic.AddInt64(&s.waiting, 1)
165 select {
166 case s.sem <- struct{}{}:
167 atomic.AddInt64(&s.waiting, -1)
168 atomic.AddInt64(&s.usage, 1)
169 return nil
170 case <-ctx.Done():
171 atomic.AddInt64(&s.waiting, -1)
172 return ctx.Err()
173 }
174}
175
176// down releases a previously acquire concurrent call slot.
177func (s *serializer) down() {
178 atomic.AddInt64(&s.usage, -1)
179 <-s.sem
180}
181
182// stats returns the number of in-flight and waiting-for-semaphore requests.
183func (s *serializer) stats() (usage, waiting int64) {
184 usage = atomic.LoadInt64(&s.usage)
185 waiting = atomic.LoadInt64(&s.waiting)
186 return
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100187}
188
189// New creates a Client instance based on Opts. PACKNGO_DEBUG environment
190// variable can be set prior to the below call to enable verbose packngo
191// debug logs.
192func New(opts *Opts) Client {
Tim Windelschmidt38105672024-04-11 01:37:29 +0200193 return newClient(opts)
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100194}
195
Tim Windelschmidt38105672024-04-11 01:37:29 +0200196func newClient(opts *Opts) *client {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100197 // Apply the defaults.
198 if opts.APIRate == 0 {
199 opts.APIRate = 2 * time.Second
200 }
201 if opts.BackOff == nil {
202 opts.BackOff = func() backoff.BackOff {
203 return backoff.NewExponentialBackOff()
204 }
205 }
Serge Bazanski7448f282023-02-20 14:15:51 +0100206 if opts.Parallelism == 0 {
207 opts.Parallelism = 1
208 }
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100209
Serge Bazanskidea7cd02023-04-26 13:58:17 +0200210 cl := &client{
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100211 username: opts.User,
212 token: opts.APIKey,
213 o: opts,
214 rlt: time.NewTicker(opts.APIRate),
215
Serge Bazanskidea7cd02023-04-26 13:58:17 +0200216 serializer: &serializer{
217 sem: make(chan struct{}, opts.Parallelism),
218 },
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100219 }
Serge Bazanskidea7cd02023-04-26 13:58:17 +0200220 if opts.MetricsRegistry != nil {
221 ms := newMetricsSet(cl.serializer)
222 opts.MetricsRegistry.MustRegister(ms.inFlight, ms.waiting, ms.requestLatencies)
223 cl.metrics = ms
224 }
225 return cl
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100226}
227
228func (c *client) Close() {
229 c.rlt.Stop()
230}
231
232var (
233 ErrRaceLost = errors.New("race lost with another API user")
234 ErrNoReservationProvided = errors.New("hardware reservation must be set")
235)
236
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200237func (c *client) PowerOffDevice(ctx context.Context, pid string) error {
238 _, err := wrap(ctx, c, func(p *packngo.Client) (*packngo.Response, error) {
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200239 r, err := p.Devices.PowerOff(pid)
240 if err != nil {
241 return nil, fmt.Errorf("Devices.PowerOff: %w", err)
242 }
243 return r, nil
244 })
245 return err
246}
247
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200248func (c *client) PowerOnDevice(ctx context.Context, pid string) error {
249 _, err := wrap(ctx, c, func(p *packngo.Client) (*packngo.Response, error) {
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200250 r, err := p.Devices.PowerOn(pid)
251 if err != nil {
252 return nil, fmt.Errorf("Devices.PowerOn: %w", err)
253 }
254 return r, nil
255 })
256 return err
257}
258
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200259func (c *client) DeleteDevice(ctx context.Context, id string) error {
260 _, err := wrap(ctx, c, func(p *packngo.Client) (*packngo.Response, error) {
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200261 r, err := p.Devices.Delete(id, false)
262 if err != nil {
263 return nil, fmt.Errorf("Devices.Delete: %w", err)
264 }
265 return r, nil
266 })
267 return err
268}
269
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200270func (c *client) CreateDevice(ctx context.Context, r *packngo.DeviceCreateRequest) (*packngo.Device, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100271 if r.HardwareReservationID == "" {
272 return nil, ErrNoReservationProvided
273 }
274 // Add a tag to the request to detect if someone snatches a hardware reservation
275 // from under us.
276 witnessTag := fmt.Sprintf("wrapngo-idempotency-%s", uuid.New().String())
277 r.Tags = append(r.Tags, witnessTag)
278
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200279 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.Device, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100280 //Does the device already exist?
281 res, _, err := cl.HardwareReservations.Get(r.HardwareReservationID, nil)
282 if err != nil {
283 return nil, fmt.Errorf("couldn't check if device already exists: %w", err)
284 }
285 if res == nil {
286 return nil, fmt.Errorf("unexpected nil response")
287 }
288 if res.Device != nil {
289 // Check if we lost the race for this hardware reservation.
290 tags := make(map[string]bool)
291 for _, tag := range res.Device.Tags {
292 tags[tag] = true
293 }
294 if !tags[witnessTag] {
295 return nil, ErrRaceLost
296 }
297 return res.Device, nil
298 }
299
300 // No device yet. Try to create it.
301 dev, _, err := cl.Devices.Create(r)
302 if err == nil {
303 return dev, nil
304 }
305 // In case of a transient failure (eg. network issue), we retry the whole
306 // operation, which means we first check again if the device already exists. If
307 // it's a permanent error from the API, the backoff logic will fail immediately.
308 return nil, fmt.Errorf("couldn't create device: %w", err)
309 })
310}
311
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200312func (c *client) UpdateDevice(ctx context.Context, id string, r *packngo.DeviceUpdateRequest) (*packngo.Device, error) {
313 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.Device, error) {
Tim Windelschmidt72a903f2023-06-27 15:49:36 +0200314 dev, _, err := cl.Devices.Update(id, r)
315 return dev, err
316 })
317}
318
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200319func (c *client) ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) {
320 return wrap(ctx, c, func(cl *packngo.Client) ([]packngo.Device, error) {
Tim Windelschmidtd1b17472023-04-18 03:49:12 +0200321 // to increase the chances of a stable pagination, we sort the devices by hostname
322 res, _, err := cl.Devices.List(pid, &packngo.GetOptions{SortBy: "hostname"})
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100323 return res, err
324 })
325}
326
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200327func (c *client) GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error) {
328 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.Device, error) {
Serge Bazanski4969fd72023-04-19 17:43:12 +0200329 d, _, err := cl.Devices.Get(did, opts)
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100330 return d, err
331 })
332}
333
334// Currently unexported, only used in tests.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200335func (c *client) deleteDevice(ctx context.Context, did string) error {
336 _, err := wrap(ctx, c, func(cl *packngo.Client) (*struct{}, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100337 _, err := cl.Devices.Delete(did, false)
338 if httpStatusCode(err) == http.StatusNotFound {
339 // 404s may pop up as an after effect of running the back-off
340 // algorithm, and as such should not be propagated.
341 return nil, nil
342 }
343 return nil, err
344 })
345 return err
346}
347
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200348func (c *client) ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) {
349 return wrap(ctx, c, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) {
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200350 res, _, err := cl.HardwareReservations.List(pid, &packngo.ListOptions{Includes: []string{"facility", "device"}})
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100351 return res, err
352 })
353}
354
Tim Windelschmidtde12c7e2024-04-25 18:00:40 +0200355func (c *client) ListOrganizationReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) {
356 return wrap(ctx, c, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) {
357 res, _, err := cl.Organizations.ListHardwareReservations(pid, &packngo.ListOptions{Includes: []string{"facility", "device", "project"}})
358 return res, err
359 })
360}
361
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200362func (c *client) MoveReservation(ctx context.Context, hardwareReservationDID, projectID string) (*packngo.HardwareReservation, error) {
363 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.HardwareReservation, error) {
Tim Windelschmidt8180de92023-05-11 19:45:37 +0200364 hr, _, err := cl.HardwareReservations.Move(hardwareReservationDID, projectID)
365 if err != nil {
366 return nil, fmt.Errorf("HardwareReservations.Move: %w", err)
367 }
368 return hr, err
369 })
370}
371
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200372func (c *client) CreateSSHKey(ctx context.Context, r *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) {
373 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.SSHKey, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100374 // Does the key already exist?
375 ks, _, err := cl.SSHKeys.List()
376 if err != nil {
377 return nil, fmt.Errorf("SSHKeys.List: %w", err)
378 }
379 for _, k := range ks {
380 if k.Label == r.Label {
381 if k.Key != r.Key {
382 return nil, fmt.Errorf("key label already in use for a different key")
383 }
384 return &k, nil
385 }
386 }
387
388 // No key yet. Try to create it.
389 k, _, err := cl.SSHKeys.Create(r)
390 if err != nil {
391 return nil, fmt.Errorf("SSHKeys.Create: %w", err)
392 }
393 return k, nil
394 })
395}
396
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200397func (c *client) UpdateSSHKey(ctx context.Context, id string, r *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) {
398 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.SSHKey, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100399 k, _, err := cl.SSHKeys.Update(id, r)
400 if err != nil {
401 return nil, fmt.Errorf("SSHKeys.Update: %w", err)
402 }
403 return k, err
404 })
405}
406
407// Currently unexported, only used in tests.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200408func (c *client) deleteSSHKey(ctx context.Context, id string) error {
409 _, err := wrap(ctx, c, func(cl *packngo.Client) (struct{}, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100410 _, err := cl.SSHKeys.Delete(id)
411 if err != nil {
412 return struct{}{}, fmt.Errorf("SSHKeys.Delete: %w", err)
413 }
414 return struct{}{}, err
415 })
416 return err
417}
418
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200419func (c *client) ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) {
420 return wrap(ctx, c, func(cl *packngo.Client) ([]packngo.SSHKey, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100421 ks, _, err := cl.SSHKeys.List()
422 if err != nil {
423 return nil, fmt.Errorf("SSHKeys.List: %w", err)
424 }
425 return ks, nil
426 })
427}
428
429// Currently unexported, only used in tests.
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200430func (c *client) getSSHKey(ctx context.Context, id string) (*packngo.SSHKey, error) {
431 return wrap(ctx, c, func(cl *packngo.Client) (*packngo.SSHKey, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100432 k, _, err := cl.SSHKeys.Get(id, nil)
433 if err != nil {
434 return nil, fmt.Errorf("SSHKeys.Get: %w", err)
435 }
436 return k, nil
437 })
438}
Serge Bazanskiae004682023-04-18 13:28:48 +0200439
Tim Windelschmidt0c57d342024-04-11 01:38:47 +0200440func (c *client) RebootDevice(ctx context.Context, did string) error {
441 _, err := wrap(ctx, c, func(cl *packngo.Client) (struct{}, error) {
Serge Bazanskiae004682023-04-18 13:28:48 +0200442 _, err := cl.Devices.Reboot(did)
443 return struct{}{}, err
444 })
445 return err
446}