blob: 4bdc314b770fabbce9e0d452242815130678913c [file] [log] [blame]
Mateusz Zalega6a058e72022-11-30 18:03:07 +01001// Package wrapngo wraps packngo methods providing the following usability
2// enhancements:
3// - API call rate limiting
4// - resource-aware call retries
5// - use of a configurable back-off algorithm implementation
6// - context awareness
7//
8// The implementation is provided with the following caveats:
9//
10// There can be only one call in flight. Concurrent calls to API-related
11// methods of the same client will block. Calls returning packngo structs will
12// return nil data when a non-nil error value is returned. An
13// os.ErrDeadlineExceeded will be returned after the underlying API calls time
14// out beyond the chosen back-off algorithm implementation's maximum allowed
15// retry interval. Other errors, excluding context.Canceled and
16// context.DeadlineExceeded, indicate either an error originating at Equinix'
17// API endpoint (which may still stem from invalid call inputs), or a network
18// error.
19//
20// Packngo wrappers included below may return timeout errors even after the
21// wrapped calls succeed in the event server reply could not have been
22// received.
23//
24// This implies that effects of mutating calls can't always be verified
25// atomically, requiring explicit synchronization between API users, regardless
26// of the retry/recovery logic used.
27//
28// Having that in mind, some call wrappers exposed by this package will attempt
29// to recover from this kind of situations by requesting information on any
30// resources created, and retrying the call if needed. This approach assumes
31// any concurrent mutating API users will be synchronized, as it should be in
32// any case.
33//
34// Another way of handling this problem would be to leave it up to the user to
35// retry calls if needed, though this would leak Equinix Metal API, and
36// complicate implementations depending on this package. Due to that, the prior
37// approach was chosen.
38package wrapngo
39
40import (
41 "context"
42 "errors"
43 "flag"
44 "fmt"
45 "net/http"
46 "time"
47
48 "github.com/cenkalti/backoff/v4"
49 "github.com/google/uuid"
50 "github.com/packethost/packngo"
51)
52
53// Opts conveys configurable Client parameters.
54type Opts struct {
55 // User and APIKey are the credentials used to authenticate with
56 // Metal API.
57
58 User string
59 APIKey string
60
61 // Optional parameters:
62
63 // BackOff controls the client's behavior in the event of API calls failing
64 // due to IO timeouts by adjusting the lower bound on time taken between
65 // subsequent calls.
66 BackOff func() backoff.BackOff
67
68 // APIRate is the minimum time taken between subsequent API calls.
69 APIRate time.Duration
Serge Bazanski7448f282023-02-20 14:15:51 +010070
71 // Parallelism defines how many calls to the Equinix API will be issued in
72 // parallel. When this limit is reached, subsequent attmepts to call the API will
73 // block. The order of serving of pending calls is currently undefined.
74 //
75 // If not defined (ie. 0), defaults to 1.
76 Parallelism int
Mateusz Zalega6a058e72022-11-30 18:03:07 +010077}
78
79func (o *Opts) RegisterFlags() {
80 flag.StringVar(&o.User, "equinix_api_username", "", "Username for Equinix API")
81 flag.StringVar(&o.APIKey, "equinix_api_key", "", "Key/token/password for Equinix API")
Serge Bazanski7448f282023-02-20 14:15:51 +010082 flag.IntVar(&o.Parallelism, "equinix_parallelism", 1, "How many parallel connections to the Equinix API will be allowed")
Mateusz Zalega6a058e72022-11-30 18:03:07 +010083}
84
85// Client is a limited interface of methods that the Shepherd uses on Equinix. It
86// is provided to allow for dependency injection of a fake equinix API for tests.
87type Client interface {
88 // GetDevice wraps packngo's cl.Devices.Get.
Serge Bazanski4969fd72023-04-19 17:43:12 +020089 //
90 // TODO(q3k): remove unused pid parameter.
91 GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error)
Mateusz Zalega6a058e72022-11-30 18:03:07 +010092 // ListDevices wraps packngo's cl.Device.List.
93 ListDevices(ctx context.Context, pid string) ([]packngo.Device, error)
94 // CreateDevice attempts to create a new device according to the provided
95 // request. The request _must_ configure a HardwareReservationID. This call
96 // attempts to be as idempotent as possible, and will return ErrRaceLost if a
97 // retry was needed but in the meantime the requested hardware reservation from
98 // which this machine was requested got lost.
99 CreateDevice(ctx context.Context, request *packngo.DeviceCreateRequest) (*packngo.Device, error)
100
101 // ListReservations returns a complete list of hardware reservations associated
102 // with project pid. This is an expensive method that takes a while to execute,
103 // handle with care.
104 ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error)
105
106 // ListSSHKeys wraps packngo's cl.Keys.List.
107 ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error)
108 // CreateSSHKey is idempotent - the key label can be used only once. Further
109 // calls referring to the same label and key will not yield errors. See the
110 // package comment for more info on this method's behavior and returned error
111 // values.
112 CreateSSHKey(ctx context.Context, req *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error)
113 // UpdateSSHKey is idempotent - values included in r can be applied only once,
114 // while subsequent updates using the same data don't produce errors. See the
115 // package comment for information on this method's behavior and returned error
116 // values.
117 UpdateSSHKey(ctx context.Context, kid string, req *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error)
Serge Bazanskiae004682023-04-18 13:28:48 +0200118 RebootDevice(ctx context.Context, did string) error
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100119
120 Close()
121}
122
123// client implements the Client interface.
124type client struct {
125 username string
126 token string
127 o *Opts
128 rlt *time.Ticker
129
Serge Bazanski7448f282023-02-20 14:15:51 +0100130 // serializer is a N-semaphore channel (configured by opts.Parallelism) which is
131 // used to limit the number of concurrent calls to the Equinix API.
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100132 serializer chan (struct{})
133}
134
135// New creates a Client instance based on Opts. PACKNGO_DEBUG environment
136// variable can be set prior to the below call to enable verbose packngo
137// debug logs.
138func New(opts *Opts) Client {
139 return new(opts)
140}
141
142func new(opts *Opts) *client {
143 // Apply the defaults.
144 if opts.APIRate == 0 {
145 opts.APIRate = 2 * time.Second
146 }
147 if opts.BackOff == nil {
148 opts.BackOff = func() backoff.BackOff {
149 return backoff.NewExponentialBackOff()
150 }
151 }
Serge Bazanski7448f282023-02-20 14:15:51 +0100152 if opts.Parallelism == 0 {
153 opts.Parallelism = 1
154 }
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100155
156 return &client{
157 username: opts.User,
158 token: opts.APIKey,
159 o: opts,
160 rlt: time.NewTicker(opts.APIRate),
161
Serge Bazanski7448f282023-02-20 14:15:51 +0100162 serializer: make(chan struct{}, opts.Parallelism),
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100163 }
164}
165
166func (c *client) Close() {
167 c.rlt.Stop()
168}
169
170var (
171 ErrRaceLost = errors.New("race lost with another API user")
172 ErrNoReservationProvided = errors.New("hardware reservation must be set")
173)
174
175func (e *client) CreateDevice(ctx context.Context, r *packngo.DeviceCreateRequest) (*packngo.Device, error) {
176 if r.HardwareReservationID == "" {
177 return nil, ErrNoReservationProvided
178 }
179 // Add a tag to the request to detect if someone snatches a hardware reservation
180 // from under us.
181 witnessTag := fmt.Sprintf("wrapngo-idempotency-%s", uuid.New().String())
182 r.Tags = append(r.Tags, witnessTag)
183
184 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
185 //Does the device already exist?
186 res, _, err := cl.HardwareReservations.Get(r.HardwareReservationID, nil)
187 if err != nil {
188 return nil, fmt.Errorf("couldn't check if device already exists: %w", err)
189 }
190 if res == nil {
191 return nil, fmt.Errorf("unexpected nil response")
192 }
193 if res.Device != nil {
194 // Check if we lost the race for this hardware reservation.
195 tags := make(map[string]bool)
196 for _, tag := range res.Device.Tags {
197 tags[tag] = true
198 }
199 if !tags[witnessTag] {
200 return nil, ErrRaceLost
201 }
202 return res.Device, nil
203 }
204
205 // No device yet. Try to create it.
206 dev, _, err := cl.Devices.Create(r)
207 if err == nil {
208 return dev, nil
209 }
210 // In case of a transient failure (eg. network issue), we retry the whole
211 // operation, which means we first check again if the device already exists. If
212 // it's a permanent error from the API, the backoff logic will fail immediately.
213 return nil, fmt.Errorf("couldn't create device: %w", err)
214 })
215}
216
217func (e *client) ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) {
218 return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.Device, error) {
Tim Windelschmidtd1b17472023-04-18 03:49:12 +0200219 // to increase the chances of a stable pagination, we sort the devices by hostname
220 res, _, err := cl.Devices.List(pid, &packngo.GetOptions{SortBy: "hostname"})
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100221 return res, err
222 })
223}
224
Serge Bazanski4969fd72023-04-19 17:43:12 +0200225func (e *client) GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error) {
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100226 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
Serge Bazanski4969fd72023-04-19 17:43:12 +0200227 d, _, err := cl.Devices.Get(did, opts)
Mateusz Zalega6a058e72022-11-30 18:03:07 +0100228 return d, err
229 })
230}
231
232// Currently unexported, only used in tests.
233func (e *client) deleteDevice(ctx context.Context, did string) error {
234 _, err := wrap(ctx, e, func(cl *packngo.Client) (*struct{}, error) {
235 _, err := cl.Devices.Delete(did, false)
236 if httpStatusCode(err) == http.StatusNotFound {
237 // 404s may pop up as an after effect of running the back-off
238 // algorithm, and as such should not be propagated.
239 return nil, nil
240 }
241 return nil, err
242 })
243 return err
244}
245
246func (e *client) ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) {
247 return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) {
248 res, _, err := cl.HardwareReservations.List(pid, nil)
249 return res, err
250 })
251}
252
253func (e *client) CreateSSHKey(ctx context.Context, r *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) {
254 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
255 // Does the key already exist?
256 ks, _, err := cl.SSHKeys.List()
257 if err != nil {
258 return nil, fmt.Errorf("SSHKeys.List: %w", err)
259 }
260 for _, k := range ks {
261 if k.Label == r.Label {
262 if k.Key != r.Key {
263 return nil, fmt.Errorf("key label already in use for a different key")
264 }
265 return &k, nil
266 }
267 }
268
269 // No key yet. Try to create it.
270 k, _, err := cl.SSHKeys.Create(r)
271 if err != nil {
272 return nil, fmt.Errorf("SSHKeys.Create: %w", err)
273 }
274 return k, nil
275 })
276}
277
278func (e *client) UpdateSSHKey(ctx context.Context, id string, r *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) {
279 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
280 k, _, err := cl.SSHKeys.Update(id, r)
281 if err != nil {
282 return nil, fmt.Errorf("SSHKeys.Update: %w", err)
283 }
284 return k, err
285 })
286}
287
288// Currently unexported, only used in tests.
289func (e *client) deleteSSHKey(ctx context.Context, id string) error {
290 _, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
291 _, err := cl.SSHKeys.Delete(id)
292 if err != nil {
293 return struct{}{}, fmt.Errorf("SSHKeys.Delete: %w", err)
294 }
295 return struct{}{}, err
296 })
297 return err
298}
299
300func (e *client) ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) {
301 return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.SSHKey, error) {
302 ks, _, err := cl.SSHKeys.List()
303 if err != nil {
304 return nil, fmt.Errorf("SSHKeys.List: %w", err)
305 }
306 return ks, nil
307 })
308}
309
310// Currently unexported, only used in tests.
311func (e *client) getSSHKey(ctx context.Context, id string) (*packngo.SSHKey, error) {
312 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
313 k, _, err := cl.SSHKeys.Get(id, nil)
314 if err != nil {
315 return nil, fmt.Errorf("SSHKeys.Get: %w", err)
316 }
317 return k, nil
318 })
319}
Serge Bazanskiae004682023-04-18 13:28:48 +0200320
321func (e *client) RebootDevice(ctx context.Context, did string) error {
322 _, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
323 _, err := cl.Devices.Reboot(did)
324 return struct{}{}, err
325 })
326 return err
327}