cloud: split shepherd up
Change-Id: I8e386d9eaaf17543743e1e8a37a8d71426910d59
Reviewed-on: https://review.monogon.dev/c/monogon/+/2213
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/equinix/cli/BUILD.bazel b/cloud/equinix/cli/BUILD.bazel
new file mode 100644
index 0000000..da94d95
--- /dev/null
+++ b/cloud/equinix/cli/BUILD.bazel
@@ -0,0 +1,27 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+ name = "cli_lib",
+ srcs = [
+ "cmd_delete.go",
+ "cmd_move.go",
+ "cmd_reboot.go",
+ "cmd_yoink.go",
+ "main.go",
+ ],
+ importpath = "source.monogon.dev/cloud/equinix/cli",
+ visibility = ["//visibility:private"],
+ deps = [
+ "//cloud/equinix/wrapngo",
+ "//metropolis/cli/pkg/context",
+ "@com_github_packethost_packngo//:packngo",
+ "@com_github_spf13_cobra//:cobra",
+ "@io_k8s_klog_v2//:klog",
+ ],
+)
+
+go_binary(
+ name = "cli",
+ embed = [":cli_lib"],
+ visibility = ["//visibility:public"],
+)
diff --git a/cloud/equinix/cli/cmd_delete.go b/cloud/equinix/cli/cmd_delete.go
new file mode 100644
index 0000000..056956e
--- /dev/null
+++ b/cloud/equinix/cli/cmd_delete.go
@@ -0,0 +1,61 @@
+package main
+
+import (
+ "context"
+ "time"
+
+ "github.com/packethost/packngo"
+ "github.com/spf13/cobra"
+ "k8s.io/klog/v2"
+
+ "source.monogon.dev/cloud/equinix/wrapngo"
+ clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var deleteCmd = &cobra.Command{
+ Use: "delete [target]",
+ Short: "Delete all devices from one project",
+ Args: cobra.ExactArgs(1),
+ Run: doDelete,
+}
+
+func init() {
+ rootCmd.AddCommand(deleteCmd)
+}
+
+func doDelete(cmd *cobra.Command, args []string) {
+ ctx := clicontext.WithInterrupt(context.Background())
+ api := wrapngo.New(&c)
+
+ klog.Infof("Listing devices for %q", args[0])
+
+ devices, err := api.ListDevices(ctx, args[0])
+ if err != nil {
+ klog.Exitf("failed listing devices: %v", err)
+ }
+
+ if len(devices) == 0 {
+ klog.Infof("No devices found in %s", args[0])
+ return
+ }
+
+ klog.Infof("Deleting %d Devices in %s. THIS WILL DELETE SERVERS! You have five seconds to cancel!", len(devices), args[0])
+ time.Sleep(5 * time.Second)
+
+ for _, d := range devices {
+ h := "deleted-" + d.Hostname
+ _, err := api.UpdateDevice(ctx, d.ID, &packngo.DeviceUpdateRequest{
+ Hostname: &h,
+ })
+ if err != nil {
+ klog.Infof("failed updating device %s (%s): %v", d.ID, d.Hostname, err)
+ continue
+ }
+
+ klog.Infof("deleting %s (%s)...", d.ID, d.Hostname)
+ if err := api.DeleteDevice(ctx, d.ID); err != nil {
+ klog.Infof("failed deleting device %s (%s): %v", d.ID, d.Hostname, err)
+ continue
+ }
+ }
+}
diff --git a/cloud/equinix/cli/cmd_move.go b/cloud/equinix/cli/cmd_move.go
new file mode 100644
index 0000000..770e480
--- /dev/null
+++ b/cloud/equinix/cli/cmd_move.go
@@ -0,0 +1,43 @@
+package main
+
+import (
+ "context"
+
+ "github.com/spf13/cobra"
+ "k8s.io/klog/v2"
+
+ "source.monogon.dev/cloud/equinix/wrapngo"
+ clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var moveCmd = &cobra.Command{
+ Use: "move [source] [target]",
+ Short: "Move all reserved hardware from one to another project",
+ Args: cobra.ExactArgs(2),
+ Run: doMove,
+}
+
+func init() {
+ rootCmd.AddCommand(moveCmd)
+}
+
+func doMove(cmd *cobra.Command, args []string) {
+ ctx := clicontext.WithInterrupt(context.Background())
+ api := wrapngo.New(&c)
+
+ klog.Infof("Listing reservations for %q", args[0])
+ reservations, err := api.ListReservations(ctx, args[0])
+ if err != nil {
+ klog.Exitf("failed listing reservations: %v", err)
+ }
+
+ klog.Infof("Got %d reservations. Moving machines", len(reservations))
+ for _, r := range reservations {
+ _, err := api.MoveReservation(ctx, r.ID, args[1])
+ if err != nil {
+ klog.Errorf("failed moving reservation: %v", err)
+ continue
+ }
+ klog.Infof("Moved Device %s", r.ID)
+ }
+}
diff --git a/cloud/equinix/cli/cmd_reboot.go b/cloud/equinix/cli/cmd_reboot.go
new file mode 100644
index 0000000..7fcd35c
--- /dev/null
+++ b/cloud/equinix/cli/cmd_reboot.go
@@ -0,0 +1,46 @@
+package main
+
+import (
+ "context"
+
+ "github.com/spf13/cobra"
+ "k8s.io/klog/v2"
+
+ "source.monogon.dev/cloud/equinix/wrapngo"
+ clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var rebootCmd = &cobra.Command{
+ Use: "reboot [project] [id]",
+ Short: "Reboots all or one specific node",
+ Args: cobra.MaximumNArgs(1),
+ Run: doReboot,
+}
+
+func init() {
+ rootCmd.AddCommand(rebootCmd)
+}
+
+func doReboot(cmd *cobra.Command, args []string) {
+ ctx := clicontext.WithInterrupt(context.Background())
+ api := wrapngo.New(&c)
+
+ klog.Infof("Requesting device list...")
+ devices, err := api.ListDevices(ctx, args[0])
+ if err != nil {
+ klog.Fatal(err)
+ }
+
+ for _, d := range devices {
+ if len(args) == 2 && args[1] != d.ID {
+ continue
+ }
+
+ err := api.RebootDevice(ctx, d.ID)
+ if err != nil {
+ klog.Error(err)
+ continue
+ }
+ klog.Infof("rebooted %s", d.ID)
+ }
+}
diff --git a/cloud/equinix/cli/cmd_yoink.go b/cloud/equinix/cli/cmd_yoink.go
new file mode 100644
index 0000000..bda9e82
--- /dev/null
+++ b/cloud/equinix/cli/cmd_yoink.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+ "bufio"
+ "context"
+ "os"
+ "sort"
+ "strconv"
+ "strings"
+
+ "github.com/packethost/packngo"
+ "github.com/spf13/cobra"
+ "k8s.io/klog/v2"
+
+ "source.monogon.dev/cloud/equinix/wrapngo"
+ clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var yoinkCmd = &cobra.Command{
+ Use: "yoink",
+ Long: `This moves a specified amount of servers that match the given spec to a different metro.
+While spec is a easy to find argument that matches the equinix system spec e.g. w3amd.75xx24c.512.8160.x86,
+metro does not represent the public facing name. Instead it is the acutal datacenter name e.g. fr2"`,
+ Short: "Move a server base on the spec from one to another project",
+ Args: cobra.NoArgs,
+ Run: doYoink,
+}
+
+func init() {
+ yoinkCmd.Flags().Int("count", 1, "how many machines should be moved")
+ yoinkCmd.Flags().String("equinix_source_project", "", "from which project should the machine be yoinked")
+ yoinkCmd.Flags().String("equinix_target_project", "", "to which project should the machine be moved")
+ yoinkCmd.Flags().String("spec", "", "which device spec should be moved")
+ yoinkCmd.Flags().String("metro", "", "to which metro should be moved")
+ rootCmd.AddCommand(yoinkCmd)
+}
+
+func doYoink(cmd *cobra.Command, args []string) {
+ srcProject, err := cmd.Flags().GetString("equinix_source_project")
+ if err != nil {
+ klog.Exitf("flag: %v", err)
+ }
+
+ dstProject, err := cmd.Flags().GetString("equinix_target_project")
+ if err != nil {
+ klog.Exitf("flag: %v", err)
+ }
+
+ if srcProject == "" || dstProject == "" {
+ klog.Exitf("missing project flags")
+ }
+
+ count, err := cmd.Flags().GetInt("count")
+ if err != nil {
+ klog.Exitf("flag: %v", err)
+ }
+
+ spec, err := cmd.Flags().GetString("spec")
+ if err != nil {
+ klog.Exitf("flag: %v", err)
+ }
+
+ if spec == "" {
+ klog.Exitf("missing spec flag")
+ }
+
+ metro, err := cmd.Flags().GetString("metro")
+ if err != nil {
+ klog.Exitf("flag: %v", err)
+ }
+
+ if metro == "" {
+ klog.Exitf("missing metro flag")
+ }
+
+ ctx := clicontext.WithInterrupt(context.Background())
+ api := wrapngo.New(&c)
+
+ klog.Infof("Listing reservations for %q", srcProject)
+ reservations, err := api.ListReservations(ctx, srcProject)
+ if err != nil {
+ klog.Exitf("Failed to list reservations: %v", err)
+ }
+
+ type configDC struct {
+ config string
+ dc string
+ }
+ mtypes := make(map[configDC]int)
+
+ var matchingReservations []packngo.HardwareReservation
+ reqType := configDC{config: strings.ToLower(spec), dc: strings.ToLower(metro)}
+
+ klog.Infof("Got %d reservations", len(reservations))
+ for _, r := range reservations {
+ curType := configDC{config: strings.ToLower(r.Plan.Name), dc: strings.ToLower(r.Facility.Metro.Code)}
+
+ mtypes[curType]++
+ if curType == reqType {
+ matchingReservations = append(matchingReservations, r)
+ }
+ }
+
+ klog.Infof("Found the following configurations:")
+ for dc, c := range mtypes {
+ klog.Infof("%s | %s | %d", dc.dc, dc.config, c)
+ }
+
+ if len(matchingReservations) == 0 {
+ klog.Exitf("Configuration not found: %s - %s", reqType.dc, reqType.config)
+ }
+
+ if len(matchingReservations)-count < 0 {
+ klog.Exitf("Not enough machines with matching configuration found ")
+ }
+
+ // prefer hosts that are not deployed
+ sort.Slice(matchingReservations, func(i, j int) bool {
+ return matchingReservations[i].Device == nil && matchingReservations[j].Device != nil
+ })
+
+ toMove := matchingReservations[:count]
+ var toDelete []string
+ for _, r := range toMove {
+ if r.Device != nil {
+ toDelete = append(toDelete, r.Device.Hostname)
+ }
+ }
+
+ stdInReader := bufio.NewReader(os.Stdin)
+ klog.Infof("Will move %d machines with spec %s in %s from %s to %s.", count, spec, metro, srcProject, dstProject)
+ if len(toDelete) > 0 {
+ klog.Warningf("Not enough free machines found. This will delete %d provisioned hosts! Hosts scheduled for deletion: ", len(toDelete))
+ klog.Warningf("%s", strings.Join(toDelete, ", "))
+ klog.Warningf("Please confirm by inputting in the number of machines that will be moved.")
+
+ read, err := stdInReader.ReadString('\n')
+ if err != nil {
+ klog.Exitf("failed reading input: %v", err)
+ }
+
+ atoi, err := strconv.Atoi(strings.TrimSpace(read))
+ if err != nil {
+ klog.Exitf("failed parsing number: %v", err)
+ }
+
+ if atoi != len(toDelete) {
+ klog.Exitf("Confirmation failed! Wanted \"%q\" got \"%d\"", len(toDelete), atoi)
+ } else {
+ klog.Infof("Thanks for the confirmation! continuing...")
+ }
+ }
+
+ klog.Infof("Note: It can be normal for a device move to fail for project validation issues. This is a known issue and can be ignored")
+ for _, r := range matchingReservations[:count] {
+ if r.Device != nil {
+ klog.Warningf("Deleting server %s (%s) on %s", r.Device.ID, r.Device.Hostname, r.ID)
+
+ if err := api.DeleteDevice(ctx, r.Device.ID); err != nil {
+ klog.Errorf("failed deleting device %s (%s): %v", r.Device.ID, r.Device.Hostname, err)
+ continue
+ }
+ }
+
+ _, err := api.MoveReservation(ctx, r.ID, dstProject)
+ if err != nil {
+ klog.Errorf("failed moving device %s: %v", r.ID, err)
+ }
+ }
+}
diff --git a/cloud/equinix/cli/main.go b/cloud/equinix/cli/main.go
new file mode 100644
index 0000000..a85c0d0
--- /dev/null
+++ b/cloud/equinix/cli/main.go
@@ -0,0 +1,32 @@
+package main
+
+import (
+ "flag"
+
+ "github.com/spf13/cobra"
+
+ "k8s.io/klog/v2"
+
+ "source.monogon.dev/cloud/equinix/wrapngo"
+)
+
+// rootCmd represents the base command when called without any subcommands
+var rootCmd = &cobra.Command{
+ PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+ if c.APIKey == "" || c.User == "" {
+ klog.Exitf("-equinix_api_username and -equinix_api_key must be set")
+ }
+ return nil
+ },
+}
+
+var c wrapngo.Opts
+
+func init() {
+ c.RegisterFlags()
+ rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine)
+}
+
+func main() {
+ cobra.CheckErr(rootCmd.Execute())
+}
diff --git a/cloud/equinix/wrapngo/BUILD.bazel b/cloud/equinix/wrapngo/BUILD.bazel
new file mode 100644
index 0000000..1574a6a
--- /dev/null
+++ b/cloud/equinix/wrapngo/BUILD.bazel
@@ -0,0 +1,31 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+ name = "wrapngo",
+ srcs = [
+ "duct_tape.go",
+ "metrics.go",
+ "wrapn.go",
+ ],
+ importpath = "source.monogon.dev/cloud/equinix/wrapngo",
+ visibility = ["//visibility:public"],
+ deps = [
+ "@com_github_cenkalti_backoff_v4//:backoff",
+ "@com_github_google_uuid//:uuid",
+ "@com_github_packethost_packngo//:packngo",
+ "@com_github_prometheus_client_golang//prometheus",
+ "@io_k8s_klog_v2//:klog",
+ ],
+)
+
+go_test(
+ name = "wrapngo_test",
+ timeout = "eternal",
+ srcs = ["wrapngo_live_test.go"],
+ args = ["-test.v"],
+ embed = [":wrapngo"],
+ deps = [
+ "@com_github_packethost_packngo//:packngo",
+ "@org_golang_x_crypto//ssh",
+ ],
+)
diff --git a/cloud/equinix/wrapngo/duct_tape.go b/cloud/equinix/wrapngo/duct_tape.go
new file mode 100644
index 0000000..d5dab7c
--- /dev/null
+++ b/cloud/equinix/wrapngo/duct_tape.go
@@ -0,0 +1,126 @@
+package wrapngo
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "net/http"
+ "time"
+
+ "github.com/cenkalti/backoff/v4"
+ "github.com/packethost/packngo"
+ "k8s.io/klog/v2"
+)
+
+// wrap a given fn in some reliability-increasing duct tape: context support and
+// exponential backoff retries for intermittent connectivity issues. This allows
+// us to use packngo code instead of writing our own API stub for Equinix Metal.
+//
+// The given fn will be retried until it returns a 'permanent' Equinix error (see
+// isPermanentEquinixError) or the given context expires. Additionally, fn will
+// be called with a brand new packngo client tied to the context of the wrap
+// call. Finally, the given client will also have some logging middleware
+// attached to it which can be activated by setting verbosity 5 (or greater) on
+// this file.
+//
+// The wrapped fn can be either just a plain packngo method or some complicated
+// idempotent logic, as long as it cooperates with the above contract.
+func wrap[U any](ctx context.Context, cl *client, fn func(*packngo.Client) (U, error)) (U, error) {
+ var zero U
+ if err := cl.serializer.up(ctx); err != nil {
+ return zero, err
+ }
+ defer cl.serializer.down()
+
+ bc := backoff.WithContext(cl.o.BackOff(), ctx)
+ pngo, err := cl.clientForContext(ctx)
+ if err != nil {
+ // Generally this shouldn't happen other than with programming errors, so we
+ // don't back this off.
+ return zero, fmt.Errorf("could not crate equinix client: %w", err)
+ }
+
+ var res U
+ err = backoff.Retry(func() error {
+ res, err = fn(pngo)
+ if isPermanentEquinixError(err) {
+ return backoff.Permanent(err)
+ }
+ return err
+ }, bc)
+ if err != nil {
+ return zero, err
+ }
+ return res, nil
+}
+
+type injectContextRoundTripper struct {
+ ctx context.Context
+ original http.RoundTripper
+ metrics *metricsSet
+}
+
+func (r *injectContextRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+ klog.V(5).Infof("Request -> %v", req.URL.String())
+ start := time.Now()
+ res, err := r.original.RoundTrip(req.WithContext(r.ctx))
+ latency := time.Since(start)
+ r.metrics.onAPIRequestDone(req, res, err, latency)
+
+ if err != nil {
+ klog.V(5).Infof("HTTP error <- %v", err)
+ } else {
+ klog.V(5).Infof("Response <- %v", res.Status)
+ }
+ return res, err
+}
+
+func (c *client) clientForContext(ctx context.Context) (*packngo.Client, error) {
+ httpcl := &http.Client{
+ Transport: &injectContextRoundTripper{
+ ctx: ctx,
+ original: http.DefaultTransport,
+ metrics: c.metrics,
+ },
+ }
+ return packngo.NewClient(packngo.WithAuth(c.username, c.token), packngo.WithHTTPClient(httpcl))
+}
+
+// httpStatusCode extracts the status code from error values returned by
+// packngo methods.
+func httpStatusCode(err error) int {
+ var er *packngo.ErrorResponse
+ if err != nil && errors.As(err, &er) {
+ return er.Response.StatusCode
+ }
+ return -1
+}
+
+// IsNotFound returns true if the given error is an Equinix packngo/wrapngo 'not
+// found' error.
+func IsNotFound(err error) bool {
+ return httpStatusCode(err) == http.StatusNotFound
+}
+
+func isPermanentEquinixError(err error) bool {
+ // Invalid argument/state errors from wrapping.
+ if errors.Is(err, ErrRaceLost) {
+ return true
+ }
+ if errors.Is(err, ErrNoReservationProvided) {
+ return true
+ }
+ // Real errors returned from equinix.
+ st := httpStatusCode(err)
+ switch st {
+ case http.StatusUnauthorized:
+ return true
+ case http.StatusForbidden:
+ return true
+ case http.StatusNotFound:
+ return true
+ case http.StatusUnprocessableEntity:
+ return true
+ }
+ return false
+}
diff --git a/cloud/equinix/wrapngo/metrics.go b/cloud/equinix/wrapngo/metrics.go
new file mode 100644
index 0000000..fef506b
--- /dev/null
+++ b/cloud/equinix/wrapngo/metrics.go
@@ -0,0 +1,129 @@
+package wrapngo
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "net/http"
+ "regexp"
+ "strings"
+ "time"
+
+ "github.com/prometheus/client_golang/prometheus"
+ "k8s.io/klog/v2"
+)
+
+// metricsSet contains all the Prometheus metrics collected by wrapngo.
+type metricsSet struct {
+ requestLatencies *prometheus.HistogramVec
+ waiting prometheus.GaugeFunc
+ inFlight prometheus.GaugeFunc
+}
+
+func newMetricsSet(ser *serializer) *metricsSet {
+ return &metricsSet{
+ requestLatencies: prometheus.NewHistogramVec(
+ prometheus.HistogramOpts{
+ Name: "equinix_api_latency",
+ Help: "Equinix API request latency in seconds, partitioned by endpoint status code",
+ },
+ []string{"endpoint", "status_code"},
+ ),
+ waiting: prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Name: "equinix_api_waiting",
+ Help: "Number of API requests pending to be sent to Equinix but waiting on semaphore",
+ },
+ func() float64 {
+ _, waiting := ser.stats()
+ return float64(waiting)
+ },
+ ),
+ inFlight: prometheus.NewGaugeFunc(
+ prometheus.GaugeOpts{
+ Name: "equinix_api_in_flight",
+ Help: "Number of API requests currently being processed by Equinix",
+ },
+ func() float64 {
+ inFlight, _ := ser.stats()
+ return float64(inFlight)
+ },
+ ),
+ }
+}
+
+// getEndpointForPath converts from an Equinix API method and path (eg.
+// /metal/v1/devices/deadbeef) into an 'endpoint' name, which is an imaginary,
+// Monogon-specific name for the API endpoint accessed by this call.
+//
+// If the given path is unknown and thus cannot be converted to an endpoint name,
+// 'Unknown' is return and a warning is logged.
+//
+// We use this function to partition request statistics per API 'endpoint'. An
+// alternative to this would be to record high-level packngo function names, but
+// one packngo function call might actually emit multiple HTTP API requests - so
+// we're stuck recording the low-level requests and gathering statistics from
+// there instead.
+func getEndpointForPath(method, path string) string {
+ path = strings.TrimPrefix(path, "/metal/v1")
+ for name, match := range endpointNames {
+ if match.matches(method, path) {
+ return name
+ }
+ }
+ klog.Warningf("Unknown Equinix API %s %s - cannot determine metric endpoint name", method, path)
+ return "Unknown"
+}
+
+// requestMatch is used to match a HTTP request method/path.
+type requestMatch struct {
+ method string
+ regexp *regexp.Regexp
+}
+
+func (r *requestMatch) matches(method, path string) bool {
+ if r.method != method {
+ return false
+ }
+ return r.regexp.MatchString(path)
+}
+
+var (
+ endpointNames = map[string]requestMatch{
+ "GetDevice": {"GET", regexp.MustCompile(`^/devices/[^/]+$`)},
+ "ListDevices": {"GET", regexp.MustCompile(`^/(organizations|projects)/[^/]+/devices$`)},
+ "CreateDevice": {"POST", regexp.MustCompile(`^/projects/[^/]+/devices$`)},
+ "ListReservations": {"GET", regexp.MustCompile(`^/projects/[^/]+/hardware-reservations$`)},
+ "ListSSHKeys": {"GET", regexp.MustCompile(`^/ssh-keys$`)},
+ "CreateSSHKey": {"POST", regexp.MustCompile(`^/project/[^/]+/ssh-keys$`)},
+ "GetSSHKey": {"GET", regexp.MustCompile(`^/ssh-keys/[^/]+$`)},
+ "UpdateSSHKey": {"PATCH", regexp.MustCompile(`^/ssh-keys/[^/]+$`)},
+ "PerformDeviceAction": {"POST", regexp.MustCompile(`^/devices/[^/]+/actions$`)},
+ }
+)
+
+// onAPIRequestDone is called by the wrapngo code on every API response from
+// Equinix, and records the given parameters into metrics.
+func (m *metricsSet) onAPIRequestDone(req *http.Request, res *http.Response, err error, latency time.Duration) {
+ if m == nil {
+ return
+ }
+
+ code := "unknown"
+ if err == nil {
+ code = fmt.Sprintf("%d", res.StatusCode)
+ } else {
+ switch {
+ case errors.Is(err, context.Canceled):
+ code = "ctx canceled"
+ case errors.Is(err, context.DeadlineExceeded):
+ code = "deadline exceeded"
+ }
+ }
+ if code == "unknown" {
+ klog.Warningf("Unexpected HTTP result: req %s %s, error: %v", req.Method, req.URL.Path, res)
+ }
+
+ endpoint := getEndpointForPath(req.Method, req.URL.Path)
+ m.requestLatencies.With(prometheus.Labels{"endpoint": endpoint, "status_code": code}).Observe(latency.Seconds())
+}
diff --git a/cloud/equinix/wrapngo/wrapn.go b/cloud/equinix/wrapngo/wrapn.go
new file mode 100644
index 0000000..7bd4522
--- /dev/null
+++ b/cloud/equinix/wrapngo/wrapn.go
@@ -0,0 +1,433 @@
+// Package wrapngo wraps packngo methods providing the following usability
+// enhancements:
+// - API call rate limiting
+// - resource-aware call retries
+// - use of a configurable back-off algorithm implementation
+// - context awareness
+//
+// The implementation is provided with the following caveats:
+//
+// There can be only one call in flight. Concurrent calls to API-related
+// methods of the same client will block. Calls returning packngo structs will
+// return nil data when a non-nil error value is returned. An
+// os.ErrDeadlineExceeded will be returned after the underlying API calls time
+// out beyond the chosen back-off algorithm implementation's maximum allowed
+// retry interval. Other errors, excluding context.Canceled and
+// context.DeadlineExceeded, indicate either an error originating at Equinix'
+// API endpoint (which may still stem from invalid call inputs), or a network
+// error.
+//
+// Packngo wrappers included below may return timeout errors even after the
+// wrapped calls succeed in the event server reply could not have been
+// received.
+//
+// This implies that effects of mutating calls can't always be verified
+// atomically, requiring explicit synchronization between API users, regardless
+// of the retry/recovery logic used.
+//
+// Having that in mind, some call wrappers exposed by this package will attempt
+// to recover from this kind of situations by requesting information on any
+// resources created, and retrying the call if needed. This approach assumes
+// any concurrent mutating API users will be synchronized, as it should be in
+// any case.
+//
+// Another way of handling this problem would be to leave it up to the user to
+// retry calls if needed, though this would leak Equinix Metal API, and
+// complicate implementations depending on this package. Due to that, the prior
+// approach was chosen.
+package wrapngo
+
+import (
+ "context"
+ "errors"
+ "flag"
+ "fmt"
+ "net/http"
+ "sync/atomic"
+ "time"
+
+ "github.com/cenkalti/backoff/v4"
+ "github.com/google/uuid"
+ "github.com/packethost/packngo"
+ "github.com/prometheus/client_golang/prometheus"
+)
+
+// Opts conveys configurable Client parameters.
+type Opts struct {
+ // User and APIKey are the credentials used to authenticate with
+ // Metal API.
+
+ User string
+ APIKey string
+
+ // Optional parameters:
+
+ // BackOff controls the client's behavior in the event of API calls failing
+ // due to IO timeouts by adjusting the lower bound on time taken between
+ // subsequent calls.
+ BackOff func() backoff.BackOff
+
+ // APIRate is the minimum time taken between subsequent API calls.
+ APIRate time.Duration
+
+ // Parallelism defines how many calls to the Equinix API will be issued in
+ // parallel. When this limit is reached, subsequent attmepts to call the API will
+ // block. The order of serving of pending calls is currently undefined.
+ //
+ // If not defined (ie. 0), defaults to 1.
+ Parallelism int
+
+ MetricsRegistry *prometheus.Registry
+}
+
+func (o *Opts) RegisterFlags() {
+ flag.StringVar(&o.User, "equinix_api_username", "", "Username for Equinix API")
+ flag.StringVar(&o.APIKey, "equinix_api_key", "", "Key/token/password for Equinix API")
+ flag.IntVar(&o.Parallelism, "equinix_parallelism", 3, "How many parallel connections to the Equinix API will be allowed")
+}
+
+// Client is a limited interface of methods that the Shepherd uses on Equinix. It
+// is provided to allow for dependency injection of a fake equinix API for tests.
+type Client interface {
+ // GetDevice wraps packngo's cl.Devices.Get.
+ //
+ // TODO(q3k): remove unused pid parameter.
+ GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error)
+ // ListDevices wraps packngo's cl.Device.List.
+ ListDevices(ctx context.Context, pid string) ([]packngo.Device, error)
+ // CreateDevice attempts to create a new device according to the provided
+ // request. The request _must_ configure a HardwareReservationID. This call
+ // attempts to be as idempotent as possible, and will return ErrRaceLost if a
+ // retry was needed but in the meantime the requested hardware reservation from
+ // which this machine was requested got lost.
+ CreateDevice(ctx context.Context, request *packngo.DeviceCreateRequest) (*packngo.Device, error)
+
+ UpdateDevice(ctx context.Context, id string, request *packngo.DeviceUpdateRequest) (*packngo.Device, error)
+ RebootDevice(ctx context.Context, did string) error
+ DeleteDevice(ctx context.Context, id string) error
+
+ // ListReservations returns a complete list of hardware reservations associated
+ // with project pid. This is an expensive method that takes a while to execute,
+ // handle with care.
+ ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error)
+ // MoveReservation moves a reserved device to the given project.
+ MoveReservation(ctx context.Context, hardwareReservationDID, projectID string) (*packngo.HardwareReservation, error)
+
+ // ListSSHKeys wraps packngo's cl.Keys.List.
+ ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error)
+ // CreateSSHKey is idempotent - the key label can be used only once. Further
+ // calls referring to the same label and key will not yield errors. See the
+ // package comment for more info on this method's behavior and returned error
+ // values.
+ CreateSSHKey(ctx context.Context, req *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error)
+ // UpdateSSHKey is idempotent - values included in r can be applied only once,
+ // while subsequent updates using the same data don't produce errors. See the
+ // package comment for information on this method's behavior and returned error
+ // values.
+ UpdateSSHKey(ctx context.Context, kid string, req *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error)
+
+ Close()
+}
+
+// client implements the Client interface.
+type client struct {
+ username string
+ token string
+ o *Opts
+ rlt *time.Ticker
+
+ serializer *serializer
+ metrics *metricsSet
+}
+
+// serializer is an N-semaphore channel (configured by opts.Parallelism) which is
+// used to limit the number of concurrent calls to the Equinix API.
+//
+// In addition, it implements some simple waiting/usage statistics for
+// metrics/introspection.
+type serializer struct {
+ sem chan struct{}
+ usage int64
+ waiting int64
+}
+
+// up blocks until the serializer has at least one available concurrent call
+// slot. If the given context expires before such a slot is available, the
+// context error is returned.
+func (s *serializer) up(ctx context.Context) error {
+ atomic.AddInt64(&s.waiting, 1)
+ select {
+ case s.sem <- struct{}{}:
+ atomic.AddInt64(&s.waiting, -1)
+ atomic.AddInt64(&s.usage, 1)
+ return nil
+ case <-ctx.Done():
+ atomic.AddInt64(&s.waiting, -1)
+ return ctx.Err()
+ }
+}
+
+// down releases a previously acquire concurrent call slot.
+func (s *serializer) down() {
+ atomic.AddInt64(&s.usage, -1)
+ <-s.sem
+}
+
+// stats returns the number of in-flight and waiting-for-semaphore requests.
+func (s *serializer) stats() (usage, waiting int64) {
+ usage = atomic.LoadInt64(&s.usage)
+ waiting = atomic.LoadInt64(&s.waiting)
+ return
+}
+
+// New creates a Client instance based on Opts. PACKNGO_DEBUG environment
+// variable can be set prior to the below call to enable verbose packngo
+// debug logs.
+func New(opts *Opts) Client {
+ return new(opts)
+}
+
+func new(opts *Opts) *client {
+ // Apply the defaults.
+ if opts.APIRate == 0 {
+ opts.APIRate = 2 * time.Second
+ }
+ if opts.BackOff == nil {
+ opts.BackOff = func() backoff.BackOff {
+ return backoff.NewExponentialBackOff()
+ }
+ }
+ if opts.Parallelism == 0 {
+ opts.Parallelism = 1
+ }
+
+ cl := &client{
+ username: opts.User,
+ token: opts.APIKey,
+ o: opts,
+ rlt: time.NewTicker(opts.APIRate),
+
+ serializer: &serializer{
+ sem: make(chan struct{}, opts.Parallelism),
+ },
+ }
+ if opts.MetricsRegistry != nil {
+ ms := newMetricsSet(cl.serializer)
+ opts.MetricsRegistry.MustRegister(ms.inFlight, ms.waiting, ms.requestLatencies)
+ cl.metrics = ms
+ }
+ return cl
+}
+
+func (c *client) Close() {
+ c.rlt.Stop()
+}
+
+var (
+ ErrRaceLost = errors.New("race lost with another API user")
+ ErrNoReservationProvided = errors.New("hardware reservation must be set")
+)
+
+func (e *client) PowerOffDevice(ctx context.Context, pid string) error {
+ _, err := wrap(ctx, e, func(p *packngo.Client) (*packngo.Response, error) {
+ r, err := p.Devices.PowerOff(pid)
+ if err != nil {
+ return nil, fmt.Errorf("Devices.PowerOff: %w", err)
+ }
+ return r, nil
+ })
+ return err
+}
+
+func (e *client) PowerOnDevice(ctx context.Context, pid string) error {
+ _, err := wrap(ctx, e, func(p *packngo.Client) (*packngo.Response, error) {
+ r, err := p.Devices.PowerOn(pid)
+ if err != nil {
+ return nil, fmt.Errorf("Devices.PowerOn: %w", err)
+ }
+ return r, nil
+ })
+ return err
+}
+
+func (e *client) DeleteDevice(ctx context.Context, id string) error {
+ _, err := wrap(ctx, e, func(p *packngo.Client) (*packngo.Response, error) {
+ r, err := p.Devices.Delete(id, false)
+ if err != nil {
+ return nil, fmt.Errorf("Devices.Delete: %w", err)
+ }
+ return r, nil
+ })
+ return err
+}
+
+func (e *client) CreateDevice(ctx context.Context, r *packngo.DeviceCreateRequest) (*packngo.Device, error) {
+ if r.HardwareReservationID == "" {
+ return nil, ErrNoReservationProvided
+ }
+ // Add a tag to the request to detect if someone snatches a hardware reservation
+ // from under us.
+ witnessTag := fmt.Sprintf("wrapngo-idempotency-%s", uuid.New().String())
+ r.Tags = append(r.Tags, witnessTag)
+
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
+ //Does the device already exist?
+ res, _, err := cl.HardwareReservations.Get(r.HardwareReservationID, nil)
+ if err != nil {
+ return nil, fmt.Errorf("couldn't check if device already exists: %w", err)
+ }
+ if res == nil {
+ return nil, fmt.Errorf("unexpected nil response")
+ }
+ if res.Device != nil {
+ // Check if we lost the race for this hardware reservation.
+ tags := make(map[string]bool)
+ for _, tag := range res.Device.Tags {
+ tags[tag] = true
+ }
+ if !tags[witnessTag] {
+ return nil, ErrRaceLost
+ }
+ return res.Device, nil
+ }
+
+ // No device yet. Try to create it.
+ dev, _, err := cl.Devices.Create(r)
+ if err == nil {
+ return dev, nil
+ }
+ // In case of a transient failure (eg. network issue), we retry the whole
+ // operation, which means we first check again if the device already exists. If
+ // it's a permanent error from the API, the backoff logic will fail immediately.
+ return nil, fmt.Errorf("couldn't create device: %w", err)
+ })
+}
+
+func (e *client) UpdateDevice(ctx context.Context, id string, r *packngo.DeviceUpdateRequest) (*packngo.Device, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
+ dev, _, err := cl.Devices.Update(id, r)
+ return dev, err
+ })
+}
+
+func (e *client) ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.Device, error) {
+ // to increase the chances of a stable pagination, we sort the devices by hostname
+ res, _, err := cl.Devices.List(pid, &packngo.GetOptions{SortBy: "hostname"})
+ return res, err
+ })
+}
+
+func (e *client) GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
+ d, _, err := cl.Devices.Get(did, opts)
+ return d, err
+ })
+}
+
+// Currently unexported, only used in tests.
+func (e *client) deleteDevice(ctx context.Context, did string) error {
+ _, err := wrap(ctx, e, func(cl *packngo.Client) (*struct{}, error) {
+ _, err := cl.Devices.Delete(did, false)
+ if httpStatusCode(err) == http.StatusNotFound {
+ // 404s may pop up as an after effect of running the back-off
+ // algorithm, and as such should not be propagated.
+ return nil, nil
+ }
+ return nil, err
+ })
+ return err
+}
+
+func (e *client) ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) {
+ res, _, err := cl.HardwareReservations.List(pid, &packngo.ListOptions{Includes: []string{"facility", "device"}})
+ return res, err
+ })
+}
+
+func (e *client) MoveReservation(ctx context.Context, hardwareReservationDID, projectID string) (*packngo.HardwareReservation, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.HardwareReservation, error) {
+ hr, _, err := cl.HardwareReservations.Move(hardwareReservationDID, projectID)
+ if err != nil {
+ return nil, fmt.Errorf("HardwareReservations.Move: %w", err)
+ }
+ return hr, err
+ })
+}
+
+func (e *client) CreateSSHKey(ctx context.Context, r *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
+ // Does the key already exist?
+ ks, _, err := cl.SSHKeys.List()
+ if err != nil {
+ return nil, fmt.Errorf("SSHKeys.List: %w", err)
+ }
+ for _, k := range ks {
+ if k.Label == r.Label {
+ if k.Key != r.Key {
+ return nil, fmt.Errorf("key label already in use for a different key")
+ }
+ return &k, nil
+ }
+ }
+
+ // No key yet. Try to create it.
+ k, _, err := cl.SSHKeys.Create(r)
+ if err != nil {
+ return nil, fmt.Errorf("SSHKeys.Create: %w", err)
+ }
+ return k, nil
+ })
+}
+
+func (e *client) UpdateSSHKey(ctx context.Context, id string, r *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
+ k, _, err := cl.SSHKeys.Update(id, r)
+ if err != nil {
+ return nil, fmt.Errorf("SSHKeys.Update: %w", err)
+ }
+ return k, err
+ })
+}
+
+// Currently unexported, only used in tests.
+func (e *client) deleteSSHKey(ctx context.Context, id string) error {
+ _, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
+ _, err := cl.SSHKeys.Delete(id)
+ if err != nil {
+ return struct{}{}, fmt.Errorf("SSHKeys.Delete: %w", err)
+ }
+ return struct{}{}, err
+ })
+ return err
+}
+
+func (e *client) ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.SSHKey, error) {
+ ks, _, err := cl.SSHKeys.List()
+ if err != nil {
+ return nil, fmt.Errorf("SSHKeys.List: %w", err)
+ }
+ return ks, nil
+ })
+}
+
+// Currently unexported, only used in tests.
+func (e *client) getSSHKey(ctx context.Context, id string) (*packngo.SSHKey, error) {
+ return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
+ k, _, err := cl.SSHKeys.Get(id, nil)
+ if err != nil {
+ return nil, fmt.Errorf("SSHKeys.Get: %w", err)
+ }
+ return k, nil
+ })
+}
+
+func (e *client) RebootDevice(ctx context.Context, did string) error {
+ _, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
+ _, err := cl.Devices.Reboot(did)
+ return struct{}{}, err
+ })
+ return err
+}
diff --git a/cloud/equinix/wrapngo/wrapngo_live_test.go b/cloud/equinix/wrapngo/wrapngo_live_test.go
new file mode 100644
index 0000000..549071a
--- /dev/null
+++ b/cloud/equinix/wrapngo/wrapngo_live_test.go
@@ -0,0 +1,344 @@
+package wrapngo
+
+import (
+ "context"
+ "crypto/ed25519"
+ "crypto/rand"
+ "errors"
+ "fmt"
+ "log"
+ "os"
+ "testing"
+ "time"
+
+ "github.com/packethost/packngo"
+ "golang.org/x/crypto/ssh"
+)
+
+type liveTestClient struct {
+ cl *client
+ ctx context.Context
+
+ apipid string
+ apios string
+
+ sshKeyLabel string
+ testDeviceHostname string
+}
+
+func newLiveTestClient(t *testing.T) *liveTestClient {
+ t.Helper()
+
+ apiuser := os.Getenv("EQUINIX_USER")
+ apikey := os.Getenv("EQUINIX_APIKEY")
+ apipid := os.Getenv("EQUINIX_PROJECT_ID")
+ apios := os.Getenv("EQUINIX_DEVICE_OS")
+
+ if apiuser == "" {
+ t.Skip("EQUINIX_USER must be set.")
+ }
+ if apikey == "" {
+ t.Skip("EQUINIX_APIKEY must be set.")
+ }
+ if apipid == "" {
+ t.Skip("EQUINIX_PROJECT_ID must be set.")
+ }
+ if apios == "" {
+ t.Skip("EQUINIX_DEVICE_OS must be set.")
+ }
+ ctx, ctxC := context.WithCancel(context.Background())
+ t.Cleanup(ctxC)
+ return &liveTestClient{
+ cl: new(&Opts{
+ User: apiuser,
+ APIKey: apikey,
+ }),
+ ctx: ctx,
+
+ apipid: apipid,
+ apios: apios,
+
+ sshKeyLabel: "shepherd-livetest-client",
+ testDeviceHostname: "shepherd-livetest-device",
+ }
+}
+
+// awaitDeviceState returns nil after device matching the id reaches one of the
+// provided states. It will return a non-nil value in case of an API error, and
+// particularly if there exists no device matching id.
+func (l *liveTestClient) awaitDeviceState(t *testing.T, id string, states ...string) error {
+ t.Helper()
+
+ for {
+ d, err := l.cl.GetDevice(l.ctx, l.apipid, id, nil)
+ if err != nil {
+ if errors.Is(err, os.ErrDeadlineExceeded) {
+ continue
+ }
+ return fmt.Errorf("while fetching device info: %w", err)
+ }
+ if d == nil {
+ return fmt.Errorf("expected the test device (ID: %s) to exist.", id)
+ }
+ for _, s := range states {
+ if d.State == s {
+ return nil
+ }
+ }
+ t.Logf("Waiting for device to be provisioned (ID: %s, current state: %q)", id, d.State)
+ time.Sleep(time.Second)
+ }
+}
+
+// cleanup ensures both the test device and the test key are deleted at
+// Equinix.
+func (l *liveTestClient) cleanup(t *testing.T) {
+ t.Helper()
+
+ t.Logf("Cleaning up.")
+
+ // Ensure the device matching testDeviceHostname is deleted.
+ ds, err := l.cl.ListDevices(l.ctx, l.apipid)
+ if err != nil {
+ log.Fatalf("while listing devices: %v", err)
+ }
+ var td *packngo.Device
+ for _, d := range ds {
+ if d.Hostname == l.testDeviceHostname {
+ td = &d
+ break
+ }
+ }
+ if td != nil {
+ t.Logf("Found a test device (ID: %s) that needs to be deleted before progressing further.", td.ID)
+
+ // Devices currently being provisioned can't be deleted. After it's
+ // provisioned, device's state will match either "active", or "failed".
+ if err := l.awaitDeviceState(t, "active", "failed"); err != nil {
+ t.Fatalf("while waiting for device to be provisioned: %v", err)
+ }
+ if err := l.cl.deleteDevice(l.ctx, td.ID); err != nil {
+ t.Fatalf("while deleting test device: %v", err)
+ }
+ }
+
+ // Ensure the key matching sshKeyLabel is deleted.
+ ks, err := l.cl.ListSSHKeys(l.ctx)
+ if err != nil {
+ t.Fatalf("while listing SSH keys: %v", err)
+ }
+ for _, k := range ks {
+ if k.Label == l.sshKeyLabel {
+ t.Logf("Found a SSH test key (ID: %s) - deleting...", k.ID)
+ if err := l.cl.deleteSSHKey(l.ctx, k.ID); err != nil {
+ t.Fatalf("while deleting an SSH key: %v", err)
+ }
+ t.Logf("Deleted a SSH test key (ID: %s).", k.ID)
+ }
+ }
+}
+
+// createSSHAuthKey returns an SSH public key in OpenSSH authorized_keys
+// format.
+func createSSHAuthKey(t *testing.T) string {
+ t.Helper()
+ pub, _, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Errorf("while generating SSH key: %v", err)
+ }
+
+ sshpub, err := ssh.NewPublicKey(pub)
+ if err != nil {
+ t.Errorf("while generating SSH public key: %v", err)
+ }
+ return string(ssh.MarshalAuthorizedKey(sshpub))
+}
+
+// TestLiveAPI performs smoke tests of wrapngo against the real Equinix API. See
+// newLiveTestClient to see which environment variables need to be provided in
+// order for this test to run.
+func TestLiveAPI(t *testing.T) {
+ ltc := newLiveTestClient(t)
+ ltc.cleanup(t)
+
+ cl := ltc.cl
+ ctx := ltc.ctx
+
+ t.Run("ListReservations", func(t *testing.T) {
+ _, err := cl.ListReservations(ctx, ltc.apipid)
+ if err != nil {
+ t.Errorf("while listing hardware reservations: %v", err)
+ }
+ })
+
+ var sshKeyID string
+ t.Run("CreateSSHKey", func(t *testing.T) {
+ nk, err := cl.CreateSSHKey(ctx, &packngo.SSHKeyCreateRequest{
+ Label: ltc.sshKeyLabel,
+ Key: createSSHAuthKey(t),
+ ProjectID: ltc.apipid,
+ })
+ if err != nil {
+ t.Fatalf("while creating an SSH key: %v", err)
+ }
+ if nk.Label != ltc.sshKeyLabel {
+ t.Errorf("key labels don't match.")
+ }
+ t.Logf("Created an SSH key (ID: %s)", nk.ID)
+ sshKeyID = nk.ID
+ })
+
+ var dummySSHPK2 string
+ t.Run("UpdateSSHKey", func(t *testing.T) {
+ if sshKeyID == "" {
+ t.Skip("SSH key couldn't have been created - skipping...")
+ }
+
+ dummySSHPK2 = createSSHAuthKey(t)
+ k, err := cl.UpdateSSHKey(ctx, sshKeyID, &packngo.SSHKeyUpdateRequest{
+ Key: &dummySSHPK2,
+ })
+ if err != nil {
+ t.Fatalf("while updating an SSH key: %v", err)
+ }
+ if k.Key != dummySSHPK2 {
+ t.Errorf("updated SSH key doesn't match the original.")
+ }
+ })
+ t.Run("GetSSHKey", func(t *testing.T) {
+ if sshKeyID == "" {
+ t.Skip("SSH key couldn't have been created - skipping...")
+ }
+
+ k, err := cl.getSSHKey(ctx, sshKeyID)
+ if err != nil {
+ t.Fatalf("while getting an SSH key: %v", err)
+ }
+ if k.Key != dummySSHPK2 {
+ t.Errorf("got key contents that don't match the original.")
+ }
+ })
+ t.Run("ListSSHKeys", func(t *testing.T) {
+ if sshKeyID == "" {
+ t.Skip("SSH key couldn't have been created - skipping...")
+ }
+
+ ks, err := cl.ListSSHKeys(ctx)
+ if err != nil {
+ t.Fatalf("while listing SSH keys: %v", err)
+ }
+
+ // Check that our key is part of the list.
+ found := false
+ for _, k := range ks {
+ if k.ID == sshKeyID {
+ found = true
+ break
+ }
+ }
+ if !found {
+ t.Errorf("SSH key not listed.")
+ }
+ })
+
+ var testDevice *packngo.Device
+ t.Run("CreateDevice", func(t *testing.T) {
+ // Find a provisionable hardware reservation the device will be created with.
+ rvs, err := cl.ListReservations(ctx, ltc.apipid)
+ if err != nil {
+ t.Errorf("while listing hardware reservations: %v", err)
+ }
+ var rv *packngo.HardwareReservation
+ for _, r := range rvs {
+ if r.Provisionable {
+ rv = &r
+ break
+ }
+ }
+ if rv == nil {
+ t.Skip("could not find a provisionable hardware reservation - skipping...")
+ }
+
+ d, err := cl.CreateDevice(ctx, &packngo.DeviceCreateRequest{
+ Hostname: ltc.testDeviceHostname,
+ OS: ltc.apios,
+ Plan: rv.Plan.Slug,
+ HardwareReservationID: rv.ID,
+ ProjectID: ltc.apipid,
+ })
+ if err != nil {
+ t.Fatalf("while creating a device: %v", err)
+ }
+ t.Logf("Created a new test device (ID: %s)", d.ID)
+ testDevice = d
+ })
+ t.Run("GetDevice", func(t *testing.T) {
+ if testDevice == nil {
+ t.Skip("the test device couldn't have been created - skipping...")
+ }
+
+ d, err := cl.GetDevice(ctx, ltc.apipid, testDevice.ID, nil)
+ if err != nil {
+ t.Fatalf("while fetching device info: %v", err)
+ }
+ if d == nil {
+ t.Fatalf("expected the test device (ID: %s) to exist.", testDevice.ID)
+ }
+ if d.ID != testDevice.ID {
+ t.Errorf("got device ID that doesn't match the original.")
+ }
+ })
+ t.Run("ListDevices", func(t *testing.T) {
+ if testDevice == nil {
+ t.Skip("the test device couldn't have been created - skipping...")
+ }
+
+ ds, err := cl.ListDevices(ctx, ltc.apipid)
+ if err != nil {
+ t.Errorf("while listing devices: %v", err)
+ }
+ if len(ds) == 0 {
+ t.Errorf("expected at least one device.")
+ }
+ })
+ t.Run("DeleteDevice", func(t *testing.T) {
+ if testDevice == nil {
+ t.Skip("the test device couldn't have been created - skipping...")
+ }
+
+ // Devices currently being provisioned can't be deleted. After it's
+ // provisioned, device's state will match either "active", or "failed".
+ if err := ltc.awaitDeviceState(t, testDevice.ID, "active", "failed"); err != nil {
+ t.Fatalf("while waiting for device to be provisioned: %v", err)
+ }
+ t.Logf("Deleting the test device (ID: %s)", testDevice.ID)
+ if err := cl.deleteDevice(ctx, testDevice.ID); err != nil {
+ t.Fatalf("while deleting a device: %v", err)
+ }
+ d, err := cl.GetDevice(ctx, ltc.apipid, testDevice.ID, nil)
+ if err != nil && !IsNotFound(err) {
+ t.Fatalf("while fetching device info: %v", err)
+ }
+ if d != nil {
+ t.Fatalf("device should not exist.")
+ }
+ t.Logf("Deleted the test device (ID: %s)", testDevice.ID)
+ })
+ t.Run("DeleteSSHKey", func(t *testing.T) {
+ if sshKeyID == "" {
+ t.Skip("SSH key couldn't have been created - skipping...")
+ }
+
+ t.Logf("Deleting the test SSH key (ID: %s)", sshKeyID)
+ if err := cl.deleteSSHKey(ctx, sshKeyID); err != nil {
+ t.Fatalf("couldn't delete an SSH key: %v", err)
+ }
+ _, err := cl.getSSHKey(ctx, sshKeyID)
+ if err == nil {
+ t.Fatalf("SSH key should not exist")
+ }
+ t.Logf("Deleted the test SSH key (ID: %s)", sshKeyID)
+ })
+
+ ltc.cleanup(t)
+}