Mateusz Zalega | 6a058e7 | 2022-11-30 18:03:07 +0100 | [diff] [blame^] | 1 | // Package wrapngo wraps packngo methods providing the following usability |
| 2 | // enhancements: |
| 3 | // - API call rate limiting |
| 4 | // - resource-aware call retries |
| 5 | // - use of a configurable back-off algorithm implementation |
| 6 | // - context awareness |
| 7 | // |
| 8 | // The implementation is provided with the following caveats: |
| 9 | // |
| 10 | // There can be only one call in flight. Concurrent calls to API-related |
| 11 | // methods of the same client will block. Calls returning packngo structs will |
| 12 | // return nil data when a non-nil error value is returned. An |
| 13 | // os.ErrDeadlineExceeded will be returned after the underlying API calls time |
| 14 | // out beyond the chosen back-off algorithm implementation's maximum allowed |
| 15 | // retry interval. Other errors, excluding context.Canceled and |
| 16 | // context.DeadlineExceeded, indicate either an error originating at Equinix' |
| 17 | // API endpoint (which may still stem from invalid call inputs), or a network |
| 18 | // error. |
| 19 | // |
| 20 | // Packngo wrappers included below may return timeout errors even after the |
| 21 | // wrapped calls succeed in the event server reply could not have been |
| 22 | // received. |
| 23 | // |
| 24 | // This implies that effects of mutating calls can't always be verified |
| 25 | // atomically, requiring explicit synchronization between API users, regardless |
| 26 | // of the retry/recovery logic used. |
| 27 | // |
| 28 | // Having that in mind, some call wrappers exposed by this package will attempt |
| 29 | // to recover from this kind of situations by requesting information on any |
| 30 | // resources created, and retrying the call if needed. This approach assumes |
| 31 | // any concurrent mutating API users will be synchronized, as it should be in |
| 32 | // any case. |
| 33 | // |
| 34 | // Another way of handling this problem would be to leave it up to the user to |
| 35 | // retry calls if needed, though this would leak Equinix Metal API, and |
| 36 | // complicate implementations depending on this package. Due to that, the prior |
| 37 | // approach was chosen. |
| 38 | package wrapngo |
| 39 | |
| 40 | import ( |
| 41 | "context" |
| 42 | "errors" |
| 43 | "flag" |
| 44 | "fmt" |
| 45 | "net/http" |
| 46 | "time" |
| 47 | |
| 48 | "github.com/cenkalti/backoff/v4" |
| 49 | "github.com/google/uuid" |
| 50 | "github.com/packethost/packngo" |
| 51 | ) |
| 52 | |
| 53 | // Opts conveys configurable Client parameters. |
| 54 | type Opts struct { |
| 55 | // User and APIKey are the credentials used to authenticate with |
| 56 | // Metal API. |
| 57 | |
| 58 | User string |
| 59 | APIKey string |
| 60 | |
| 61 | // Optional parameters: |
| 62 | |
| 63 | // BackOff controls the client's behavior in the event of API calls failing |
| 64 | // due to IO timeouts by adjusting the lower bound on time taken between |
| 65 | // subsequent calls. |
| 66 | BackOff func() backoff.BackOff |
| 67 | |
| 68 | // APIRate is the minimum time taken between subsequent API calls. |
| 69 | APIRate time.Duration |
| 70 | } |
| 71 | |
| 72 | func (o *Opts) RegisterFlags() { |
| 73 | flag.StringVar(&o.User, "equinix_api_username", "", "Username for Equinix API") |
| 74 | flag.StringVar(&o.APIKey, "equinix_api_key", "", "Key/token/password for Equinix API") |
| 75 | } |
| 76 | |
| 77 | // Client is a limited interface of methods that the Shepherd uses on Equinix. It |
| 78 | // is provided to allow for dependency injection of a fake equinix API for tests. |
| 79 | type Client interface { |
| 80 | // GetDevice wraps packngo's cl.Devices.Get. |
| 81 | GetDevice(ctx context.Context, pid, did string) (*packngo.Device, error) |
| 82 | // ListDevices wraps packngo's cl.Device.List. |
| 83 | ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) |
| 84 | // CreateDevice attempts to create a new device according to the provided |
| 85 | // request. The request _must_ configure a HardwareReservationID. This call |
| 86 | // attempts to be as idempotent as possible, and will return ErrRaceLost if a |
| 87 | // retry was needed but in the meantime the requested hardware reservation from |
| 88 | // which this machine was requested got lost. |
| 89 | CreateDevice(ctx context.Context, request *packngo.DeviceCreateRequest) (*packngo.Device, error) |
| 90 | |
| 91 | // ListReservations returns a complete list of hardware reservations associated |
| 92 | // with project pid. This is an expensive method that takes a while to execute, |
| 93 | // handle with care. |
| 94 | ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) |
| 95 | |
| 96 | // ListSSHKeys wraps packngo's cl.Keys.List. |
| 97 | ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) |
| 98 | // CreateSSHKey is idempotent - the key label can be used only once. Further |
| 99 | // calls referring to the same label and key will not yield errors. See the |
| 100 | // package comment for more info on this method's behavior and returned error |
| 101 | // values. |
| 102 | CreateSSHKey(ctx context.Context, req *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) |
| 103 | // UpdateSSHKey is idempotent - values included in r can be applied only once, |
| 104 | // while subsequent updates using the same data don't produce errors. See the |
| 105 | // package comment for information on this method's behavior and returned error |
| 106 | // values. |
| 107 | UpdateSSHKey(ctx context.Context, kid string, req *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) |
| 108 | |
| 109 | Close() |
| 110 | } |
| 111 | |
| 112 | // client implements the Client interface. |
| 113 | type client struct { |
| 114 | username string |
| 115 | token string |
| 116 | o *Opts |
| 117 | rlt *time.Ticker |
| 118 | |
| 119 | // serializer is a 1-semaphore channel (effectively a mutex) which is used to |
| 120 | // limit the number of concurrent calls to the Equinix API. |
| 121 | serializer chan (struct{}) |
| 122 | } |
| 123 | |
| 124 | // New creates a Client instance based on Opts. PACKNGO_DEBUG environment |
| 125 | // variable can be set prior to the below call to enable verbose packngo |
| 126 | // debug logs. |
| 127 | func New(opts *Opts) Client { |
| 128 | return new(opts) |
| 129 | } |
| 130 | |
| 131 | func new(opts *Opts) *client { |
| 132 | // Apply the defaults. |
| 133 | if opts.APIRate == 0 { |
| 134 | opts.APIRate = 2 * time.Second |
| 135 | } |
| 136 | if opts.BackOff == nil { |
| 137 | opts.BackOff = func() backoff.BackOff { |
| 138 | return backoff.NewExponentialBackOff() |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | return &client{ |
| 143 | username: opts.User, |
| 144 | token: opts.APIKey, |
| 145 | o: opts, |
| 146 | rlt: time.NewTicker(opts.APIRate), |
| 147 | |
| 148 | serializer: make(chan struct{}, 1), |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | func (c *client) Close() { |
| 153 | c.rlt.Stop() |
| 154 | } |
| 155 | |
| 156 | var ( |
| 157 | ErrRaceLost = errors.New("race lost with another API user") |
| 158 | ErrNoReservationProvided = errors.New("hardware reservation must be set") |
| 159 | ) |
| 160 | |
| 161 | func (e *client) CreateDevice(ctx context.Context, r *packngo.DeviceCreateRequest) (*packngo.Device, error) { |
| 162 | if r.HardwareReservationID == "" { |
| 163 | return nil, ErrNoReservationProvided |
| 164 | } |
| 165 | // Add a tag to the request to detect if someone snatches a hardware reservation |
| 166 | // from under us. |
| 167 | witnessTag := fmt.Sprintf("wrapngo-idempotency-%s", uuid.New().String()) |
| 168 | r.Tags = append(r.Tags, witnessTag) |
| 169 | |
| 170 | return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) { |
| 171 | //Does the device already exist? |
| 172 | res, _, err := cl.HardwareReservations.Get(r.HardwareReservationID, nil) |
| 173 | if err != nil { |
| 174 | return nil, fmt.Errorf("couldn't check if device already exists: %w", err) |
| 175 | } |
| 176 | if res == nil { |
| 177 | return nil, fmt.Errorf("unexpected nil response") |
| 178 | } |
| 179 | if res.Device != nil { |
| 180 | // Check if we lost the race for this hardware reservation. |
| 181 | tags := make(map[string]bool) |
| 182 | for _, tag := range res.Device.Tags { |
| 183 | tags[tag] = true |
| 184 | } |
| 185 | if !tags[witnessTag] { |
| 186 | return nil, ErrRaceLost |
| 187 | } |
| 188 | return res.Device, nil |
| 189 | } |
| 190 | |
| 191 | // No device yet. Try to create it. |
| 192 | dev, _, err := cl.Devices.Create(r) |
| 193 | if err == nil { |
| 194 | return dev, nil |
| 195 | } |
| 196 | // In case of a transient failure (eg. network issue), we retry the whole |
| 197 | // operation, which means we first check again if the device already exists. If |
| 198 | // it's a permanent error from the API, the backoff logic will fail immediately. |
| 199 | return nil, fmt.Errorf("couldn't create device: %w", err) |
| 200 | }) |
| 201 | } |
| 202 | |
| 203 | func (e *client) ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) { |
| 204 | return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.Device, error) { |
| 205 | res, _, err := cl.Devices.List(pid, nil) |
| 206 | return res, err |
| 207 | }) |
| 208 | } |
| 209 | |
| 210 | func (e *client) GetDevice(ctx context.Context, pid, did string) (*packngo.Device, error) { |
| 211 | return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) { |
| 212 | d, _, err := cl.Devices.Get(did, nil) |
| 213 | return d, err |
| 214 | }) |
| 215 | } |
| 216 | |
| 217 | // Currently unexported, only used in tests. |
| 218 | func (e *client) deleteDevice(ctx context.Context, did string) error { |
| 219 | _, err := wrap(ctx, e, func(cl *packngo.Client) (*struct{}, error) { |
| 220 | _, err := cl.Devices.Delete(did, false) |
| 221 | if httpStatusCode(err) == http.StatusNotFound { |
| 222 | // 404s may pop up as an after effect of running the back-off |
| 223 | // algorithm, and as such should not be propagated. |
| 224 | return nil, nil |
| 225 | } |
| 226 | return nil, err |
| 227 | }) |
| 228 | return err |
| 229 | } |
| 230 | |
| 231 | func (e *client) ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) { |
| 232 | return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) { |
| 233 | res, _, err := cl.HardwareReservations.List(pid, nil) |
| 234 | return res, err |
| 235 | }) |
| 236 | } |
| 237 | |
| 238 | func (e *client) CreateSSHKey(ctx context.Context, r *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) { |
| 239 | return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) { |
| 240 | // Does the key already exist? |
| 241 | ks, _, err := cl.SSHKeys.List() |
| 242 | if err != nil { |
| 243 | return nil, fmt.Errorf("SSHKeys.List: %w", err) |
| 244 | } |
| 245 | for _, k := range ks { |
| 246 | if k.Label == r.Label { |
| 247 | if k.Key != r.Key { |
| 248 | return nil, fmt.Errorf("key label already in use for a different key") |
| 249 | } |
| 250 | return &k, nil |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | // No key yet. Try to create it. |
| 255 | k, _, err := cl.SSHKeys.Create(r) |
| 256 | if err != nil { |
| 257 | return nil, fmt.Errorf("SSHKeys.Create: %w", err) |
| 258 | } |
| 259 | return k, nil |
| 260 | }) |
| 261 | } |
| 262 | |
| 263 | func (e *client) UpdateSSHKey(ctx context.Context, id string, r *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) { |
| 264 | return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) { |
| 265 | k, _, err := cl.SSHKeys.Update(id, r) |
| 266 | if err != nil { |
| 267 | return nil, fmt.Errorf("SSHKeys.Update: %w", err) |
| 268 | } |
| 269 | return k, err |
| 270 | }) |
| 271 | } |
| 272 | |
| 273 | // Currently unexported, only used in tests. |
| 274 | func (e *client) deleteSSHKey(ctx context.Context, id string) error { |
| 275 | _, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) { |
| 276 | _, err := cl.SSHKeys.Delete(id) |
| 277 | if err != nil { |
| 278 | return struct{}{}, fmt.Errorf("SSHKeys.Delete: %w", err) |
| 279 | } |
| 280 | return struct{}{}, err |
| 281 | }) |
| 282 | return err |
| 283 | } |
| 284 | |
| 285 | func (e *client) ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) { |
| 286 | return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.SSHKey, error) { |
| 287 | ks, _, err := cl.SSHKeys.List() |
| 288 | if err != nil { |
| 289 | return nil, fmt.Errorf("SSHKeys.List: %w", err) |
| 290 | } |
| 291 | return ks, nil |
| 292 | }) |
| 293 | } |
| 294 | |
| 295 | // Currently unexported, only used in tests. |
| 296 | func (e *client) getSSHKey(ctx context.Context, id string) (*packngo.SSHKey, error) { |
| 297 | return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) { |
| 298 | k, _, err := cl.SSHKeys.Get(id, nil) |
| 299 | if err != nil { |
| 300 | return nil, fmt.Errorf("SSHKeys.Get: %w", err) |
| 301 | } |
| 302 | return k, nil |
| 303 | }) |
| 304 | } |