blob: 16478d57d26fc9efc1d7d9f4261175be28283d3e [file] [log] [blame]
Mateusz Zalega6a058e72022-11-30 18:03:07 +01001// Package wrapngo wraps packngo methods providing the following usability
2// enhancements:
3// - API call rate limiting
4// - resource-aware call retries
5// - use of a configurable back-off algorithm implementation
6// - context awareness
7//
8// The implementation is provided with the following caveats:
9//
10// There can be only one call in flight. Concurrent calls to API-related
11// methods of the same client will block. Calls returning packngo structs will
12// return nil data when a non-nil error value is returned. An
13// os.ErrDeadlineExceeded will be returned after the underlying API calls time
14// out beyond the chosen back-off algorithm implementation's maximum allowed
15// retry interval. Other errors, excluding context.Canceled and
16// context.DeadlineExceeded, indicate either an error originating at Equinix'
17// API endpoint (which may still stem from invalid call inputs), or a network
18// error.
19//
20// Packngo wrappers included below may return timeout errors even after the
21// wrapped calls succeed in the event server reply could not have been
22// received.
23//
24// This implies that effects of mutating calls can't always be verified
25// atomically, requiring explicit synchronization between API users, regardless
26// of the retry/recovery logic used.
27//
28// Having that in mind, some call wrappers exposed by this package will attempt
29// to recover from this kind of situations by requesting information on any
30// resources created, and retrying the call if needed. This approach assumes
31// any concurrent mutating API users will be synchronized, as it should be in
32// any case.
33//
34// Another way of handling this problem would be to leave it up to the user to
35// retry calls if needed, though this would leak Equinix Metal API, and
36// complicate implementations depending on this package. Due to that, the prior
37// approach was chosen.
38package wrapngo
39
40import (
41 "context"
42 "errors"
43 "flag"
44 "fmt"
45 "net/http"
46 "time"
47
48 "github.com/cenkalti/backoff/v4"
49 "github.com/google/uuid"
50 "github.com/packethost/packngo"
51)
52
53// Opts conveys configurable Client parameters.
54type Opts struct {
55 // User and APIKey are the credentials used to authenticate with
56 // Metal API.
57
58 User string
59 APIKey string
60
61 // Optional parameters:
62
63 // BackOff controls the client's behavior in the event of API calls failing
64 // due to IO timeouts by adjusting the lower bound on time taken between
65 // subsequent calls.
66 BackOff func() backoff.BackOff
67
68 // APIRate is the minimum time taken between subsequent API calls.
69 APIRate time.Duration
70}
71
72func (o *Opts) RegisterFlags() {
73 flag.StringVar(&o.User, "equinix_api_username", "", "Username for Equinix API")
74 flag.StringVar(&o.APIKey, "equinix_api_key", "", "Key/token/password for Equinix API")
75}
76
77// Client is a limited interface of methods that the Shepherd uses on Equinix. It
78// is provided to allow for dependency injection of a fake equinix API for tests.
79type Client interface {
80 // GetDevice wraps packngo's cl.Devices.Get.
81 GetDevice(ctx context.Context, pid, did string) (*packngo.Device, error)
82 // ListDevices wraps packngo's cl.Device.List.
83 ListDevices(ctx context.Context, pid string) ([]packngo.Device, error)
84 // CreateDevice attempts to create a new device according to the provided
85 // request. The request _must_ configure a HardwareReservationID. This call
86 // attempts to be as idempotent as possible, and will return ErrRaceLost if a
87 // retry was needed but in the meantime the requested hardware reservation from
88 // which this machine was requested got lost.
89 CreateDevice(ctx context.Context, request *packngo.DeviceCreateRequest) (*packngo.Device, error)
90
91 // ListReservations returns a complete list of hardware reservations associated
92 // with project pid. This is an expensive method that takes a while to execute,
93 // handle with care.
94 ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error)
95
96 // ListSSHKeys wraps packngo's cl.Keys.List.
97 ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error)
98 // CreateSSHKey is idempotent - the key label can be used only once. Further
99 // calls referring to the same label and key will not yield errors. See the
100 // package comment for more info on this method's behavior and returned error
101 // values.
102 CreateSSHKey(ctx context.Context, req *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error)
103 // UpdateSSHKey is idempotent - values included in r can be applied only once,
104 // while subsequent updates using the same data don't produce errors. See the
105 // package comment for information on this method's behavior and returned error
106 // values.
107 UpdateSSHKey(ctx context.Context, kid string, req *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error)
108
109 Close()
110}
111
112// client implements the Client interface.
113type client struct {
114 username string
115 token string
116 o *Opts
117 rlt *time.Ticker
118
119 // serializer is a 1-semaphore channel (effectively a mutex) which is used to
120 // limit the number of concurrent calls to the Equinix API.
121 serializer chan (struct{})
122}
123
124// New creates a Client instance based on Opts. PACKNGO_DEBUG environment
125// variable can be set prior to the below call to enable verbose packngo
126// debug logs.
127func New(opts *Opts) Client {
128 return new(opts)
129}
130
131func new(opts *Opts) *client {
132 // Apply the defaults.
133 if opts.APIRate == 0 {
134 opts.APIRate = 2 * time.Second
135 }
136 if opts.BackOff == nil {
137 opts.BackOff = func() backoff.BackOff {
138 return backoff.NewExponentialBackOff()
139 }
140 }
141
142 return &client{
143 username: opts.User,
144 token: opts.APIKey,
145 o: opts,
146 rlt: time.NewTicker(opts.APIRate),
147
148 serializer: make(chan struct{}, 1),
149 }
150}
151
152func (c *client) Close() {
153 c.rlt.Stop()
154}
155
156var (
157 ErrRaceLost = errors.New("race lost with another API user")
158 ErrNoReservationProvided = errors.New("hardware reservation must be set")
159)
160
161func (e *client) CreateDevice(ctx context.Context, r *packngo.DeviceCreateRequest) (*packngo.Device, error) {
162 if r.HardwareReservationID == "" {
163 return nil, ErrNoReservationProvided
164 }
165 // Add a tag to the request to detect if someone snatches a hardware reservation
166 // from under us.
167 witnessTag := fmt.Sprintf("wrapngo-idempotency-%s", uuid.New().String())
168 r.Tags = append(r.Tags, witnessTag)
169
170 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
171 //Does the device already exist?
172 res, _, err := cl.HardwareReservations.Get(r.HardwareReservationID, nil)
173 if err != nil {
174 return nil, fmt.Errorf("couldn't check if device already exists: %w", err)
175 }
176 if res == nil {
177 return nil, fmt.Errorf("unexpected nil response")
178 }
179 if res.Device != nil {
180 // Check if we lost the race for this hardware reservation.
181 tags := make(map[string]bool)
182 for _, tag := range res.Device.Tags {
183 tags[tag] = true
184 }
185 if !tags[witnessTag] {
186 return nil, ErrRaceLost
187 }
188 return res.Device, nil
189 }
190
191 // No device yet. Try to create it.
192 dev, _, err := cl.Devices.Create(r)
193 if err == nil {
194 return dev, nil
195 }
196 // In case of a transient failure (eg. network issue), we retry the whole
197 // operation, which means we first check again if the device already exists. If
198 // it's a permanent error from the API, the backoff logic will fail immediately.
199 return nil, fmt.Errorf("couldn't create device: %w", err)
200 })
201}
202
203func (e *client) ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) {
204 return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.Device, error) {
205 res, _, err := cl.Devices.List(pid, nil)
206 return res, err
207 })
208}
209
210func (e *client) GetDevice(ctx context.Context, pid, did string) (*packngo.Device, error) {
211 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
212 d, _, err := cl.Devices.Get(did, nil)
213 return d, err
214 })
215}
216
217// Currently unexported, only used in tests.
218func (e *client) deleteDevice(ctx context.Context, did string) error {
219 _, err := wrap(ctx, e, func(cl *packngo.Client) (*struct{}, error) {
220 _, err := cl.Devices.Delete(did, false)
221 if httpStatusCode(err) == http.StatusNotFound {
222 // 404s may pop up as an after effect of running the back-off
223 // algorithm, and as such should not be propagated.
224 return nil, nil
225 }
226 return nil, err
227 })
228 return err
229}
230
231func (e *client) ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) {
232 return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) {
233 res, _, err := cl.HardwareReservations.List(pid, nil)
234 return res, err
235 })
236}
237
238func (e *client) CreateSSHKey(ctx context.Context, r *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) {
239 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
240 // Does the key already exist?
241 ks, _, err := cl.SSHKeys.List()
242 if err != nil {
243 return nil, fmt.Errorf("SSHKeys.List: %w", err)
244 }
245 for _, k := range ks {
246 if k.Label == r.Label {
247 if k.Key != r.Key {
248 return nil, fmt.Errorf("key label already in use for a different key")
249 }
250 return &k, nil
251 }
252 }
253
254 // No key yet. Try to create it.
255 k, _, err := cl.SSHKeys.Create(r)
256 if err != nil {
257 return nil, fmt.Errorf("SSHKeys.Create: %w", err)
258 }
259 return k, nil
260 })
261}
262
263func (e *client) UpdateSSHKey(ctx context.Context, id string, r *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) {
264 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
265 k, _, err := cl.SSHKeys.Update(id, r)
266 if err != nil {
267 return nil, fmt.Errorf("SSHKeys.Update: %w", err)
268 }
269 return k, err
270 })
271}
272
273// Currently unexported, only used in tests.
274func (e *client) deleteSSHKey(ctx context.Context, id string) error {
275 _, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
276 _, err := cl.SSHKeys.Delete(id)
277 if err != nil {
278 return struct{}{}, fmt.Errorf("SSHKeys.Delete: %w", err)
279 }
280 return struct{}{}, err
281 })
282 return err
283}
284
285func (e *client) ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) {
286 return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.SSHKey, error) {
287 ks, _, err := cl.SSHKeys.List()
288 if err != nil {
289 return nil, fmt.Errorf("SSHKeys.List: %w", err)
290 }
291 return ks, nil
292 })
293}
294
295// Currently unexported, only used in tests.
296func (e *client) getSSHKey(ctx context.Context, id string) (*packngo.SSHKey, error) {
297 return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
298 k, _, err := cl.SSHKeys.Get(id, nil)
299 if err != nil {
300 return nil, fmt.Errorf("SSHKeys.Get: %w", err)
301 }
302 return k, nil
303 })
304}