cloud: split shepherd up

Change-Id: I8e386d9eaaf17543743e1e8a37a8d71426910d59
Reviewed-on: https://review.monogon.dev/c/monogon/+/2213
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/equinix/cli/BUILD.bazel b/cloud/equinix/cli/BUILD.bazel
new file mode 100644
index 0000000..da94d95
--- /dev/null
+++ b/cloud/equinix/cli/BUILD.bazel
@@ -0,0 +1,27 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "cli_lib",
+    srcs = [
+        "cmd_delete.go",
+        "cmd_move.go",
+        "cmd_reboot.go",
+        "cmd_yoink.go",
+        "main.go",
+    ],
+    importpath = "source.monogon.dev/cloud/equinix/cli",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//cloud/equinix/wrapngo",
+        "//metropolis/cli/pkg/context",
+        "@com_github_packethost_packngo//:packngo",
+        "@com_github_spf13_cobra//:cobra",
+        "@io_k8s_klog_v2//:klog",
+    ],
+)
+
+go_binary(
+    name = "cli",
+    embed = [":cli_lib"],
+    visibility = ["//visibility:public"],
+)
diff --git a/cloud/equinix/cli/cmd_delete.go b/cloud/equinix/cli/cmd_delete.go
new file mode 100644
index 0000000..056956e
--- /dev/null
+++ b/cloud/equinix/cli/cmd_delete.go
@@ -0,0 +1,61 @@
+package main
+
+import (
+	"context"
+	"time"
+
+	"github.com/packethost/packngo"
+	"github.com/spf13/cobra"
+	"k8s.io/klog/v2"
+
+	"source.monogon.dev/cloud/equinix/wrapngo"
+	clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var deleteCmd = &cobra.Command{
+	Use:   "delete [target]",
+	Short: "Delete all devices from one project",
+	Args:  cobra.ExactArgs(1),
+	Run:   doDelete,
+}
+
+func init() {
+	rootCmd.AddCommand(deleteCmd)
+}
+
+func doDelete(cmd *cobra.Command, args []string) {
+	ctx := clicontext.WithInterrupt(context.Background())
+	api := wrapngo.New(&c)
+
+	klog.Infof("Listing devices for %q", args[0])
+
+	devices, err := api.ListDevices(ctx, args[0])
+	if err != nil {
+		klog.Exitf("failed listing devices: %v", err)
+	}
+
+	if len(devices) == 0 {
+		klog.Infof("No devices found in %s", args[0])
+		return
+	}
+
+	klog.Infof("Deleting %d Devices in %s. THIS WILL DELETE SERVERS! You have five seconds to cancel!", len(devices), args[0])
+	time.Sleep(5 * time.Second)
+
+	for _, d := range devices {
+		h := "deleted-" + d.Hostname
+		_, err := api.UpdateDevice(ctx, d.ID, &packngo.DeviceUpdateRequest{
+			Hostname: &h,
+		})
+		if err != nil {
+			klog.Infof("failed updating device %s (%s): %v", d.ID, d.Hostname, err)
+			continue
+		}
+
+		klog.Infof("deleting %s (%s)...", d.ID, d.Hostname)
+		if err := api.DeleteDevice(ctx, d.ID); err != nil {
+			klog.Infof("failed deleting device %s (%s): %v", d.ID, d.Hostname, err)
+			continue
+		}
+	}
+}
diff --git a/cloud/equinix/cli/cmd_move.go b/cloud/equinix/cli/cmd_move.go
new file mode 100644
index 0000000..770e480
--- /dev/null
+++ b/cloud/equinix/cli/cmd_move.go
@@ -0,0 +1,43 @@
+package main
+
+import (
+	"context"
+
+	"github.com/spf13/cobra"
+	"k8s.io/klog/v2"
+
+	"source.monogon.dev/cloud/equinix/wrapngo"
+	clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var moveCmd = &cobra.Command{
+	Use:   "move [source] [target]",
+	Short: "Move all reserved hardware from one to another project",
+	Args:  cobra.ExactArgs(2),
+	Run:   doMove,
+}
+
+func init() {
+	rootCmd.AddCommand(moveCmd)
+}
+
+func doMove(cmd *cobra.Command, args []string) {
+	ctx := clicontext.WithInterrupt(context.Background())
+	api := wrapngo.New(&c)
+
+	klog.Infof("Listing reservations for %q", args[0])
+	reservations, err := api.ListReservations(ctx, args[0])
+	if err != nil {
+		klog.Exitf("failed listing reservations: %v", err)
+	}
+
+	klog.Infof("Got %d reservations. Moving machines", len(reservations))
+	for _, r := range reservations {
+		_, err := api.MoveReservation(ctx, r.ID, args[1])
+		if err != nil {
+			klog.Errorf("failed moving reservation: %v", err)
+			continue
+		}
+		klog.Infof("Moved Device %s", r.ID)
+	}
+}
diff --git a/cloud/equinix/cli/cmd_reboot.go b/cloud/equinix/cli/cmd_reboot.go
new file mode 100644
index 0000000..7fcd35c
--- /dev/null
+++ b/cloud/equinix/cli/cmd_reboot.go
@@ -0,0 +1,46 @@
+package main
+
+import (
+	"context"
+
+	"github.com/spf13/cobra"
+	"k8s.io/klog/v2"
+
+	"source.monogon.dev/cloud/equinix/wrapngo"
+	clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var rebootCmd = &cobra.Command{
+	Use:   "reboot [project] [id]",
+	Short: "Reboots all or one specific node",
+	Args:  cobra.MaximumNArgs(1),
+	Run:   doReboot,
+}
+
+func init() {
+	rootCmd.AddCommand(rebootCmd)
+}
+
+func doReboot(cmd *cobra.Command, args []string) {
+	ctx := clicontext.WithInterrupt(context.Background())
+	api := wrapngo.New(&c)
+
+	klog.Infof("Requesting device list...")
+	devices, err := api.ListDevices(ctx, args[0])
+	if err != nil {
+		klog.Fatal(err)
+	}
+
+	for _, d := range devices {
+		if len(args) == 2 && args[1] != d.ID {
+			continue
+		}
+
+		err := api.RebootDevice(ctx, d.ID)
+		if err != nil {
+			klog.Error(err)
+			continue
+		}
+		klog.Infof("rebooted %s", d.ID)
+	}
+}
diff --git a/cloud/equinix/cli/cmd_yoink.go b/cloud/equinix/cli/cmd_yoink.go
new file mode 100644
index 0000000..bda9e82
--- /dev/null
+++ b/cloud/equinix/cli/cmd_yoink.go
@@ -0,0 +1,170 @@
+package main
+
+import (
+	"bufio"
+	"context"
+	"os"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/packethost/packngo"
+	"github.com/spf13/cobra"
+	"k8s.io/klog/v2"
+
+	"source.monogon.dev/cloud/equinix/wrapngo"
+	clicontext "source.monogon.dev/metropolis/cli/pkg/context"
+)
+
+var yoinkCmd = &cobra.Command{
+	Use: "yoink",
+	Long: `This moves a specified amount of servers that match the given spec to a different metro.
+While spec is a easy to find argument that matches the equinix system spec e.g. w3amd.75xx24c.512.8160.x86, 
+metro does not represent the public facing name. Instead it is the acutal datacenter name e.g. fr2"`,
+	Short: "Move a server base on the spec from one to another project",
+	Args:  cobra.NoArgs,
+	Run:   doYoink,
+}
+
+func init() {
+	yoinkCmd.Flags().Int("count", 1, "how many machines should be moved")
+	yoinkCmd.Flags().String("equinix_source_project", "", "from which project should the machine be yoinked")
+	yoinkCmd.Flags().String("equinix_target_project", "", "to which project should the machine be moved")
+	yoinkCmd.Flags().String("spec", "", "which device spec should be moved")
+	yoinkCmd.Flags().String("metro", "", "to which metro should be moved")
+	rootCmd.AddCommand(yoinkCmd)
+}
+
+func doYoink(cmd *cobra.Command, args []string) {
+	srcProject, err := cmd.Flags().GetString("equinix_source_project")
+	if err != nil {
+		klog.Exitf("flag: %v", err)
+	}
+
+	dstProject, err := cmd.Flags().GetString("equinix_target_project")
+	if err != nil {
+		klog.Exitf("flag: %v", err)
+	}
+
+	if srcProject == "" || dstProject == "" {
+		klog.Exitf("missing project flags")
+	}
+
+	count, err := cmd.Flags().GetInt("count")
+	if err != nil {
+		klog.Exitf("flag: %v", err)
+	}
+
+	spec, err := cmd.Flags().GetString("spec")
+	if err != nil {
+		klog.Exitf("flag: %v", err)
+	}
+
+	if spec == "" {
+		klog.Exitf("missing spec flag")
+	}
+
+	metro, err := cmd.Flags().GetString("metro")
+	if err != nil {
+		klog.Exitf("flag: %v", err)
+	}
+
+	if metro == "" {
+		klog.Exitf("missing metro flag")
+	}
+
+	ctx := clicontext.WithInterrupt(context.Background())
+	api := wrapngo.New(&c)
+
+	klog.Infof("Listing reservations for %q", srcProject)
+	reservations, err := api.ListReservations(ctx, srcProject)
+	if err != nil {
+		klog.Exitf("Failed to list reservations: %v", err)
+	}
+
+	type configDC struct {
+		config string
+		dc     string
+	}
+	mtypes := make(map[configDC]int)
+
+	var matchingReservations []packngo.HardwareReservation
+	reqType := configDC{config: strings.ToLower(spec), dc: strings.ToLower(metro)}
+
+	klog.Infof("Got %d reservations", len(reservations))
+	for _, r := range reservations {
+		curType := configDC{config: strings.ToLower(r.Plan.Name), dc: strings.ToLower(r.Facility.Metro.Code)}
+
+		mtypes[curType]++
+		if curType == reqType {
+			matchingReservations = append(matchingReservations, r)
+		}
+	}
+
+	klog.Infof("Found the following configurations:")
+	for dc, c := range mtypes {
+		klog.Infof("%s | %s | %d", dc.dc, dc.config, c)
+	}
+
+	if len(matchingReservations) == 0 {
+		klog.Exitf("Configuration not found: %s - %s", reqType.dc, reqType.config)
+	}
+
+	if len(matchingReservations)-count < 0 {
+		klog.Exitf("Not enough machines with matching configuration found ")
+	}
+
+	// prefer hosts that are not deployed
+	sort.Slice(matchingReservations, func(i, j int) bool {
+		return matchingReservations[i].Device == nil && matchingReservations[j].Device != nil
+	})
+
+	toMove := matchingReservations[:count]
+	var toDelete []string
+	for _, r := range toMove {
+		if r.Device != nil {
+			toDelete = append(toDelete, r.Device.Hostname)
+		}
+	}
+
+	stdInReader := bufio.NewReader(os.Stdin)
+	klog.Infof("Will move %d machines with spec %s in %s from %s to %s.", count, spec, metro, srcProject, dstProject)
+	if len(toDelete) > 0 {
+		klog.Warningf("Not enough free machines found. This will delete %d provisioned hosts! Hosts scheduled for deletion: ", len(toDelete))
+		klog.Warningf("%s", strings.Join(toDelete, ", "))
+		klog.Warningf("Please confirm by inputting in the number of machines that will be moved.")
+
+		read, err := stdInReader.ReadString('\n')
+		if err != nil {
+			klog.Exitf("failed reading input: %v", err)
+		}
+
+		atoi, err := strconv.Atoi(strings.TrimSpace(read))
+		if err != nil {
+			klog.Exitf("failed parsing number: %v", err)
+		}
+
+		if atoi != len(toDelete) {
+			klog.Exitf("Confirmation failed! Wanted \"%q\" got \"%d\"", len(toDelete), atoi)
+		} else {
+			klog.Infof("Thanks for the confirmation! continuing...")
+		}
+	}
+
+	klog.Infof("Note: It can be normal for a device move to fail for project validation issues. This is a known issue and can be ignored")
+	for _, r := range matchingReservations[:count] {
+		if r.Device != nil {
+			klog.Warningf("Deleting server %s (%s) on %s", r.Device.ID, r.Device.Hostname, r.ID)
+
+			if err := api.DeleteDevice(ctx, r.Device.ID); err != nil {
+				klog.Errorf("failed deleting device %s (%s): %v", r.Device.ID, r.Device.Hostname, err)
+				continue
+			}
+		}
+
+		_, err := api.MoveReservation(ctx, r.ID, dstProject)
+		if err != nil {
+			klog.Errorf("failed moving device %s: %v", r.ID, err)
+		}
+	}
+}
diff --git a/cloud/equinix/cli/main.go b/cloud/equinix/cli/main.go
new file mode 100644
index 0000000..a85c0d0
--- /dev/null
+++ b/cloud/equinix/cli/main.go
@@ -0,0 +1,32 @@
+package main
+
+import (
+	"flag"
+
+	"github.com/spf13/cobra"
+
+	"k8s.io/klog/v2"
+
+	"source.monogon.dev/cloud/equinix/wrapngo"
+)
+
+// rootCmd represents the base command when called without any subcommands
+var rootCmd = &cobra.Command{
+	PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
+		if c.APIKey == "" || c.User == "" {
+			klog.Exitf("-equinix_api_username and -equinix_api_key must be set")
+		}
+		return nil
+	},
+}
+
+var c wrapngo.Opts
+
+func init() {
+	c.RegisterFlags()
+	rootCmd.PersistentFlags().AddGoFlagSet(flag.CommandLine)
+}
+
+func main() {
+	cobra.CheckErr(rootCmd.Execute())
+}
diff --git a/cloud/equinix/wrapngo/BUILD.bazel b/cloud/equinix/wrapngo/BUILD.bazel
new file mode 100644
index 0000000..1574a6a
--- /dev/null
+++ b/cloud/equinix/wrapngo/BUILD.bazel
@@ -0,0 +1,31 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "wrapngo",
+    srcs = [
+        "duct_tape.go",
+        "metrics.go",
+        "wrapn.go",
+    ],
+    importpath = "source.monogon.dev/cloud/equinix/wrapngo",
+    visibility = ["//visibility:public"],
+    deps = [
+        "@com_github_cenkalti_backoff_v4//:backoff",
+        "@com_github_google_uuid//:uuid",
+        "@com_github_packethost_packngo//:packngo",
+        "@com_github_prometheus_client_golang//prometheus",
+        "@io_k8s_klog_v2//:klog",
+    ],
+)
+
+go_test(
+    name = "wrapngo_test",
+    timeout = "eternal",
+    srcs = ["wrapngo_live_test.go"],
+    args = ["-test.v"],
+    embed = [":wrapngo"],
+    deps = [
+        "@com_github_packethost_packngo//:packngo",
+        "@org_golang_x_crypto//ssh",
+    ],
+)
diff --git a/cloud/equinix/wrapngo/duct_tape.go b/cloud/equinix/wrapngo/duct_tape.go
new file mode 100644
index 0000000..d5dab7c
--- /dev/null
+++ b/cloud/equinix/wrapngo/duct_tape.go
@@ -0,0 +1,126 @@
+package wrapngo
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"time"
+
+	"github.com/cenkalti/backoff/v4"
+	"github.com/packethost/packngo"
+	"k8s.io/klog/v2"
+)
+
+// wrap a given fn in some reliability-increasing duct tape: context support and
+// exponential backoff retries for intermittent connectivity issues. This allows
+// us to use packngo code instead of writing our own API stub for Equinix Metal.
+//
+// The given fn will be retried until it returns a 'permanent' Equinix error (see
+// isPermanentEquinixError) or the given context expires. Additionally, fn will
+// be called with a brand new packngo client tied to the context of the wrap
+// call. Finally, the given client will also have some logging middleware
+// attached to it which can be activated by setting verbosity 5 (or greater) on
+// this file.
+//
+// The wrapped fn can be either just a plain packngo method or some complicated
+// idempotent logic, as long as it cooperates with the above contract.
+func wrap[U any](ctx context.Context, cl *client, fn func(*packngo.Client) (U, error)) (U, error) {
+	var zero U
+	if err := cl.serializer.up(ctx); err != nil {
+		return zero, err
+	}
+	defer cl.serializer.down()
+
+	bc := backoff.WithContext(cl.o.BackOff(), ctx)
+	pngo, err := cl.clientForContext(ctx)
+	if err != nil {
+		// Generally this shouldn't happen other than with programming errors, so we
+		// don't back this off.
+		return zero, fmt.Errorf("could not crate equinix client: %w", err)
+	}
+
+	var res U
+	err = backoff.Retry(func() error {
+		res, err = fn(pngo)
+		if isPermanentEquinixError(err) {
+			return backoff.Permanent(err)
+		}
+		return err
+	}, bc)
+	if err != nil {
+		return zero, err
+	}
+	return res, nil
+}
+
+type injectContextRoundTripper struct {
+	ctx      context.Context
+	original http.RoundTripper
+	metrics  *metricsSet
+}
+
+func (r *injectContextRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
+	klog.V(5).Infof("Request -> %v", req.URL.String())
+	start := time.Now()
+	res, err := r.original.RoundTrip(req.WithContext(r.ctx))
+	latency := time.Since(start)
+	r.metrics.onAPIRequestDone(req, res, err, latency)
+
+	if err != nil {
+		klog.V(5).Infof("HTTP error <- %v", err)
+	} else {
+		klog.V(5).Infof("Response <- %v", res.Status)
+	}
+	return res, err
+}
+
+func (c *client) clientForContext(ctx context.Context) (*packngo.Client, error) {
+	httpcl := &http.Client{
+		Transport: &injectContextRoundTripper{
+			ctx:      ctx,
+			original: http.DefaultTransport,
+			metrics:  c.metrics,
+		},
+	}
+	return packngo.NewClient(packngo.WithAuth(c.username, c.token), packngo.WithHTTPClient(httpcl))
+}
+
+// httpStatusCode extracts the status code from error values returned by
+// packngo methods.
+func httpStatusCode(err error) int {
+	var er *packngo.ErrorResponse
+	if err != nil && errors.As(err, &er) {
+		return er.Response.StatusCode
+	}
+	return -1
+}
+
+// IsNotFound returns true if the given error is an Equinix packngo/wrapngo 'not
+// found' error.
+func IsNotFound(err error) bool {
+	return httpStatusCode(err) == http.StatusNotFound
+}
+
+func isPermanentEquinixError(err error) bool {
+	// Invalid argument/state errors from wrapping.
+	if errors.Is(err, ErrRaceLost) {
+		return true
+	}
+	if errors.Is(err, ErrNoReservationProvided) {
+		return true
+	}
+	// Real errors returned from equinix.
+	st := httpStatusCode(err)
+	switch st {
+	case http.StatusUnauthorized:
+		return true
+	case http.StatusForbidden:
+		return true
+	case http.StatusNotFound:
+		return true
+	case http.StatusUnprocessableEntity:
+		return true
+	}
+	return false
+}
diff --git a/cloud/equinix/wrapngo/metrics.go b/cloud/equinix/wrapngo/metrics.go
new file mode 100644
index 0000000..fef506b
--- /dev/null
+++ b/cloud/equinix/wrapngo/metrics.go
@@ -0,0 +1,129 @@
+package wrapngo
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"net/http"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"k8s.io/klog/v2"
+)
+
+// metricsSet contains all the Prometheus metrics collected by wrapngo.
+type metricsSet struct {
+	requestLatencies *prometheus.HistogramVec
+	waiting          prometheus.GaugeFunc
+	inFlight         prometheus.GaugeFunc
+}
+
+func newMetricsSet(ser *serializer) *metricsSet {
+	return &metricsSet{
+		requestLatencies: prometheus.NewHistogramVec(
+			prometheus.HistogramOpts{
+				Name: "equinix_api_latency",
+				Help: "Equinix API request latency in seconds, partitioned by endpoint status code",
+			},
+			[]string{"endpoint", "status_code"},
+		),
+		waiting: prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Name: "equinix_api_waiting",
+				Help: "Number of API requests pending to be sent to Equinix but waiting on semaphore",
+			},
+			func() float64 {
+				_, waiting := ser.stats()
+				return float64(waiting)
+			},
+		),
+		inFlight: prometheus.NewGaugeFunc(
+			prometheus.GaugeOpts{
+				Name: "equinix_api_in_flight",
+				Help: "Number of API requests currently being processed by Equinix",
+			},
+			func() float64 {
+				inFlight, _ := ser.stats()
+				return float64(inFlight)
+			},
+		),
+	}
+}
+
+// getEndpointForPath converts from an Equinix API method and path (eg.
+// /metal/v1/devices/deadbeef) into an 'endpoint' name, which is an imaginary,
+// Monogon-specific name for the API endpoint accessed by this call.
+//
+// If the given path is unknown and thus cannot be converted to an endpoint name,
+// 'Unknown' is return and a warning is logged.
+//
+// We use this function to partition request statistics per API 'endpoint'. An
+// alternative to this would be to record high-level packngo function names, but
+// one packngo function call might actually emit multiple HTTP API requests - so
+// we're stuck recording the low-level requests and gathering statistics from
+// there instead.
+func getEndpointForPath(method, path string) string {
+	path = strings.TrimPrefix(path, "/metal/v1")
+	for name, match := range endpointNames {
+		if match.matches(method, path) {
+			return name
+		}
+	}
+	klog.Warningf("Unknown Equinix API %s %s - cannot determine metric endpoint name", method, path)
+	return "Unknown"
+}
+
+// requestMatch is used to match a HTTP request method/path.
+type requestMatch struct {
+	method string
+	regexp *regexp.Regexp
+}
+
+func (r *requestMatch) matches(method, path string) bool {
+	if r.method != method {
+		return false
+	}
+	return r.regexp.MatchString(path)
+}
+
+var (
+	endpointNames = map[string]requestMatch{
+		"GetDevice":           {"GET", regexp.MustCompile(`^/devices/[^/]+$`)},
+		"ListDevices":         {"GET", regexp.MustCompile(`^/(organizations|projects)/[^/]+/devices$`)},
+		"CreateDevice":        {"POST", regexp.MustCompile(`^/projects/[^/]+/devices$`)},
+		"ListReservations":    {"GET", regexp.MustCompile(`^/projects/[^/]+/hardware-reservations$`)},
+		"ListSSHKeys":         {"GET", regexp.MustCompile(`^/ssh-keys$`)},
+		"CreateSSHKey":        {"POST", regexp.MustCompile(`^/project/[^/]+/ssh-keys$`)},
+		"GetSSHKey":           {"GET", regexp.MustCompile(`^/ssh-keys/[^/]+$`)},
+		"UpdateSSHKey":        {"PATCH", regexp.MustCompile(`^/ssh-keys/[^/]+$`)},
+		"PerformDeviceAction": {"POST", regexp.MustCompile(`^/devices/[^/]+/actions$`)},
+	}
+)
+
+// onAPIRequestDone is called by the wrapngo code on every API response from
+// Equinix, and records the given parameters into metrics.
+func (m *metricsSet) onAPIRequestDone(req *http.Request, res *http.Response, err error, latency time.Duration) {
+	if m == nil {
+		return
+	}
+
+	code := "unknown"
+	if err == nil {
+		code = fmt.Sprintf("%d", res.StatusCode)
+	} else {
+		switch {
+		case errors.Is(err, context.Canceled):
+			code = "ctx canceled"
+		case errors.Is(err, context.DeadlineExceeded):
+			code = "deadline exceeded"
+		}
+	}
+	if code == "unknown" {
+		klog.Warningf("Unexpected HTTP result: req %s %s, error: %v", req.Method, req.URL.Path, res)
+	}
+
+	endpoint := getEndpointForPath(req.Method, req.URL.Path)
+	m.requestLatencies.With(prometheus.Labels{"endpoint": endpoint, "status_code": code}).Observe(latency.Seconds())
+}
diff --git a/cloud/equinix/wrapngo/wrapn.go b/cloud/equinix/wrapngo/wrapn.go
new file mode 100644
index 0000000..7bd4522
--- /dev/null
+++ b/cloud/equinix/wrapngo/wrapn.go
@@ -0,0 +1,433 @@
+// Package wrapngo wraps packngo methods providing the following usability
+// enhancements:
+// - API call rate limiting
+// - resource-aware call retries
+// - use of a configurable back-off algorithm implementation
+// - context awareness
+//
+// The implementation is provided with the following caveats:
+//
+// There can be only one call in flight. Concurrent calls to API-related
+// methods of the same client will block. Calls returning packngo structs will
+// return nil data when a non-nil error value is returned. An
+// os.ErrDeadlineExceeded will be returned after the underlying API calls time
+// out beyond the chosen back-off algorithm implementation's maximum allowed
+// retry interval. Other errors, excluding context.Canceled and
+// context.DeadlineExceeded, indicate either an error originating at Equinix'
+// API endpoint (which may still stem from invalid call inputs), or a network
+// error.
+//
+// Packngo wrappers included below may return timeout errors even after the
+// wrapped calls succeed in the event server reply could not have been
+// received.
+//
+// This implies that effects of mutating calls can't always be verified
+// atomically, requiring explicit synchronization between API users, regardless
+// of the retry/recovery logic used.
+//
+// Having that in mind, some call wrappers exposed by this package will attempt
+// to recover from this kind of situations by requesting information on any
+// resources created, and retrying the call if needed. This approach assumes
+// any concurrent mutating API users will be synchronized, as it should be in
+// any case.
+//
+// Another way of handling this problem would be to leave it up to the user to
+// retry calls if needed, though this would leak Equinix Metal API, and
+// complicate implementations depending on this package. Due to that, the prior
+// approach was chosen.
+package wrapngo
+
+import (
+	"context"
+	"errors"
+	"flag"
+	"fmt"
+	"net/http"
+	"sync/atomic"
+	"time"
+
+	"github.com/cenkalti/backoff/v4"
+	"github.com/google/uuid"
+	"github.com/packethost/packngo"
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+// Opts conveys configurable Client parameters.
+type Opts struct {
+	// User and APIKey are the credentials used to authenticate with
+	// Metal API.
+
+	User   string
+	APIKey string
+
+	// Optional parameters:
+
+	// BackOff controls the client's behavior in the event of API calls failing
+	// due to IO timeouts by adjusting the lower bound on time taken between
+	// subsequent calls.
+	BackOff func() backoff.BackOff
+
+	// APIRate is the minimum time taken between subsequent API calls.
+	APIRate time.Duration
+
+	// Parallelism defines how many calls to the Equinix API will be issued in
+	// parallel. When this limit is reached, subsequent attmepts to call the API will
+	// block. The order of serving of pending calls is currently undefined.
+	//
+	// If not defined (ie. 0), defaults to 1.
+	Parallelism int
+
+	MetricsRegistry *prometheus.Registry
+}
+
+func (o *Opts) RegisterFlags() {
+	flag.StringVar(&o.User, "equinix_api_username", "", "Username for Equinix API")
+	flag.StringVar(&o.APIKey, "equinix_api_key", "", "Key/token/password for Equinix API")
+	flag.IntVar(&o.Parallelism, "equinix_parallelism", 3, "How many parallel connections to the Equinix API will be allowed")
+}
+
+// Client is a limited interface of methods that the Shepherd uses on Equinix. It
+// is provided to allow for dependency injection of a fake equinix API for tests.
+type Client interface {
+	// GetDevice wraps packngo's cl.Devices.Get.
+	//
+	// TODO(q3k): remove unused pid parameter.
+	GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error)
+	// ListDevices wraps packngo's cl.Device.List.
+	ListDevices(ctx context.Context, pid string) ([]packngo.Device, error)
+	// CreateDevice attempts to create a new device according to the provided
+	// request. The request _must_ configure a HardwareReservationID. This call
+	// attempts to be as idempotent as possible, and will return ErrRaceLost if a
+	// retry was needed but in the meantime the requested hardware reservation from
+	// which this machine was requested got lost.
+	CreateDevice(ctx context.Context, request *packngo.DeviceCreateRequest) (*packngo.Device, error)
+
+	UpdateDevice(ctx context.Context, id string, request *packngo.DeviceUpdateRequest) (*packngo.Device, error)
+	RebootDevice(ctx context.Context, did string) error
+	DeleteDevice(ctx context.Context, id string) error
+
+	// ListReservations returns a complete list of hardware reservations associated
+	// with project pid. This is an expensive method that takes a while to execute,
+	// handle with care.
+	ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error)
+	// MoveReservation moves a reserved device to the given project.
+	MoveReservation(ctx context.Context, hardwareReservationDID, projectID string) (*packngo.HardwareReservation, error)
+
+	// ListSSHKeys wraps packngo's cl.Keys.List.
+	ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error)
+	// CreateSSHKey is idempotent - the key label can be used only once. Further
+	// calls referring to the same label and key will not yield errors. See the
+	// package comment for more info on this method's behavior and returned error
+	// values.
+	CreateSSHKey(ctx context.Context, req *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error)
+	// UpdateSSHKey is idempotent - values included in r can be applied only once,
+	// while subsequent updates using the same data don't produce errors. See the
+	// package comment for information on this method's behavior and returned error
+	// values.
+	UpdateSSHKey(ctx context.Context, kid string, req *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error)
+
+	Close()
+}
+
+// client implements the Client interface.
+type client struct {
+	username string
+	token    string
+	o        *Opts
+	rlt      *time.Ticker
+
+	serializer *serializer
+	metrics    *metricsSet
+}
+
+// serializer is an N-semaphore channel (configured by opts.Parallelism) which is
+// used to limit the number of concurrent calls to the Equinix API.
+//
+// In addition, it implements some simple waiting/usage statistics for
+// metrics/introspection.
+type serializer struct {
+	sem     chan struct{}
+	usage   int64
+	waiting int64
+}
+
+// up blocks until the serializer has at least one available concurrent call
+// slot. If the given context expires before such a slot is available, the
+// context error is returned.
+func (s *serializer) up(ctx context.Context) error {
+	atomic.AddInt64(&s.waiting, 1)
+	select {
+	case s.sem <- struct{}{}:
+		atomic.AddInt64(&s.waiting, -1)
+		atomic.AddInt64(&s.usage, 1)
+		return nil
+	case <-ctx.Done():
+		atomic.AddInt64(&s.waiting, -1)
+		return ctx.Err()
+	}
+}
+
+// down releases a previously acquire concurrent call slot.
+func (s *serializer) down() {
+	atomic.AddInt64(&s.usage, -1)
+	<-s.sem
+}
+
+// stats returns the number of in-flight and waiting-for-semaphore requests.
+func (s *serializer) stats() (usage, waiting int64) {
+	usage = atomic.LoadInt64(&s.usage)
+	waiting = atomic.LoadInt64(&s.waiting)
+	return
+}
+
+// New creates a Client instance based on Opts. PACKNGO_DEBUG environment
+// variable can be set prior to the below call to enable verbose packngo
+// debug logs.
+func New(opts *Opts) Client {
+	return new(opts)
+}
+
+func new(opts *Opts) *client {
+	// Apply the defaults.
+	if opts.APIRate == 0 {
+		opts.APIRate = 2 * time.Second
+	}
+	if opts.BackOff == nil {
+		opts.BackOff = func() backoff.BackOff {
+			return backoff.NewExponentialBackOff()
+		}
+	}
+	if opts.Parallelism == 0 {
+		opts.Parallelism = 1
+	}
+
+	cl := &client{
+		username: opts.User,
+		token:    opts.APIKey,
+		o:        opts,
+		rlt:      time.NewTicker(opts.APIRate),
+
+		serializer: &serializer{
+			sem: make(chan struct{}, opts.Parallelism),
+		},
+	}
+	if opts.MetricsRegistry != nil {
+		ms := newMetricsSet(cl.serializer)
+		opts.MetricsRegistry.MustRegister(ms.inFlight, ms.waiting, ms.requestLatencies)
+		cl.metrics = ms
+	}
+	return cl
+}
+
+func (c *client) Close() {
+	c.rlt.Stop()
+}
+
+var (
+	ErrRaceLost              = errors.New("race lost with another API user")
+	ErrNoReservationProvided = errors.New("hardware reservation must be set")
+)
+
+func (e *client) PowerOffDevice(ctx context.Context, pid string) error {
+	_, err := wrap(ctx, e, func(p *packngo.Client) (*packngo.Response, error) {
+		r, err := p.Devices.PowerOff(pid)
+		if err != nil {
+			return nil, fmt.Errorf("Devices.PowerOff: %w", err)
+		}
+		return r, nil
+	})
+	return err
+}
+
+func (e *client) PowerOnDevice(ctx context.Context, pid string) error {
+	_, err := wrap(ctx, e, func(p *packngo.Client) (*packngo.Response, error) {
+		r, err := p.Devices.PowerOn(pid)
+		if err != nil {
+			return nil, fmt.Errorf("Devices.PowerOn: %w", err)
+		}
+		return r, nil
+	})
+	return err
+}
+
+func (e *client) DeleteDevice(ctx context.Context, id string) error {
+	_, err := wrap(ctx, e, func(p *packngo.Client) (*packngo.Response, error) {
+		r, err := p.Devices.Delete(id, false)
+		if err != nil {
+			return nil, fmt.Errorf("Devices.Delete: %w", err)
+		}
+		return r, nil
+	})
+	return err
+}
+
+func (e *client) CreateDevice(ctx context.Context, r *packngo.DeviceCreateRequest) (*packngo.Device, error) {
+	if r.HardwareReservationID == "" {
+		return nil, ErrNoReservationProvided
+	}
+	// Add a tag to the request to detect if someone snatches a hardware reservation
+	// from under us.
+	witnessTag := fmt.Sprintf("wrapngo-idempotency-%s", uuid.New().String())
+	r.Tags = append(r.Tags, witnessTag)
+
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
+		//Does the device already exist?
+		res, _, err := cl.HardwareReservations.Get(r.HardwareReservationID, nil)
+		if err != nil {
+			return nil, fmt.Errorf("couldn't check if device already exists: %w", err)
+		}
+		if res == nil {
+			return nil, fmt.Errorf("unexpected nil response")
+		}
+		if res.Device != nil {
+			// Check if we lost the race for this hardware reservation.
+			tags := make(map[string]bool)
+			for _, tag := range res.Device.Tags {
+				tags[tag] = true
+			}
+			if !tags[witnessTag] {
+				return nil, ErrRaceLost
+			}
+			return res.Device, nil
+		}
+
+		// No device yet. Try to create it.
+		dev, _, err := cl.Devices.Create(r)
+		if err == nil {
+			return dev, nil
+		}
+		// In case of a transient failure (eg. network issue), we retry the whole
+		// operation, which means we first check again if the device already exists. If
+		// it's a permanent error from the API, the backoff logic will fail immediately.
+		return nil, fmt.Errorf("couldn't create device: %w", err)
+	})
+}
+
+func (e *client) UpdateDevice(ctx context.Context, id string, r *packngo.DeviceUpdateRequest) (*packngo.Device, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
+		dev, _, err := cl.Devices.Update(id, r)
+		return dev, err
+	})
+}
+
+func (e *client) ListDevices(ctx context.Context, pid string) ([]packngo.Device, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.Device, error) {
+		// to increase the chances of a stable pagination, we sort the devices by hostname
+		res, _, err := cl.Devices.List(pid, &packngo.GetOptions{SortBy: "hostname"})
+		return res, err
+	})
+}
+
+func (e *client) GetDevice(ctx context.Context, pid, did string, opts *packngo.ListOptions) (*packngo.Device, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.Device, error) {
+		d, _, err := cl.Devices.Get(did, opts)
+		return d, err
+	})
+}
+
+// Currently unexported, only used in tests.
+func (e *client) deleteDevice(ctx context.Context, did string) error {
+	_, err := wrap(ctx, e, func(cl *packngo.Client) (*struct{}, error) {
+		_, err := cl.Devices.Delete(did, false)
+		if httpStatusCode(err) == http.StatusNotFound {
+			// 404s may pop up as an after effect of running the back-off
+			// algorithm, and as such should not be propagated.
+			return nil, nil
+		}
+		return nil, err
+	})
+	return err
+}
+
+func (e *client) ListReservations(ctx context.Context, pid string) ([]packngo.HardwareReservation, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.HardwareReservation, error) {
+		res, _, err := cl.HardwareReservations.List(pid, &packngo.ListOptions{Includes: []string{"facility", "device"}})
+		return res, err
+	})
+}
+
+func (e *client) MoveReservation(ctx context.Context, hardwareReservationDID, projectID string) (*packngo.HardwareReservation, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.HardwareReservation, error) {
+		hr, _, err := cl.HardwareReservations.Move(hardwareReservationDID, projectID)
+		if err != nil {
+			return nil, fmt.Errorf("HardwareReservations.Move: %w", err)
+		}
+		return hr, err
+	})
+}
+
+func (e *client) CreateSSHKey(ctx context.Context, r *packngo.SSHKeyCreateRequest) (*packngo.SSHKey, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
+		// Does the key already exist?
+		ks, _, err := cl.SSHKeys.List()
+		if err != nil {
+			return nil, fmt.Errorf("SSHKeys.List: %w", err)
+		}
+		for _, k := range ks {
+			if k.Label == r.Label {
+				if k.Key != r.Key {
+					return nil, fmt.Errorf("key label already in use for a different key")
+				}
+				return &k, nil
+			}
+		}
+
+		// No key yet. Try to create it.
+		k, _, err := cl.SSHKeys.Create(r)
+		if err != nil {
+			return nil, fmt.Errorf("SSHKeys.Create: %w", err)
+		}
+		return k, nil
+	})
+}
+
+func (e *client) UpdateSSHKey(ctx context.Context, id string, r *packngo.SSHKeyUpdateRequest) (*packngo.SSHKey, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
+		k, _, err := cl.SSHKeys.Update(id, r)
+		if err != nil {
+			return nil, fmt.Errorf("SSHKeys.Update: %w", err)
+		}
+		return k, err
+	})
+}
+
+// Currently unexported, only used in tests.
+func (e *client) deleteSSHKey(ctx context.Context, id string) error {
+	_, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
+		_, err := cl.SSHKeys.Delete(id)
+		if err != nil {
+			return struct{}{}, fmt.Errorf("SSHKeys.Delete: %w", err)
+		}
+		return struct{}{}, err
+	})
+	return err
+}
+
+func (e *client) ListSSHKeys(ctx context.Context) ([]packngo.SSHKey, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) ([]packngo.SSHKey, error) {
+		ks, _, err := cl.SSHKeys.List()
+		if err != nil {
+			return nil, fmt.Errorf("SSHKeys.List: %w", err)
+		}
+		return ks, nil
+	})
+}
+
+// Currently unexported, only used in tests.
+func (e *client) getSSHKey(ctx context.Context, id string) (*packngo.SSHKey, error) {
+	return wrap(ctx, e, func(cl *packngo.Client) (*packngo.SSHKey, error) {
+		k, _, err := cl.SSHKeys.Get(id, nil)
+		if err != nil {
+			return nil, fmt.Errorf("SSHKeys.Get: %w", err)
+		}
+		return k, nil
+	})
+}
+
+func (e *client) RebootDevice(ctx context.Context, did string) error {
+	_, err := wrap(ctx, e, func(cl *packngo.Client) (struct{}, error) {
+		_, err := cl.Devices.Reboot(did)
+		return struct{}{}, err
+	})
+	return err
+}
diff --git a/cloud/equinix/wrapngo/wrapngo_live_test.go b/cloud/equinix/wrapngo/wrapngo_live_test.go
new file mode 100644
index 0000000..549071a
--- /dev/null
+++ b/cloud/equinix/wrapngo/wrapngo_live_test.go
@@ -0,0 +1,344 @@
+package wrapngo
+
+import (
+	"context"
+	"crypto/ed25519"
+	"crypto/rand"
+	"errors"
+	"fmt"
+	"log"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/packethost/packngo"
+	"golang.org/x/crypto/ssh"
+)
+
+type liveTestClient struct {
+	cl  *client
+	ctx context.Context
+
+	apipid string
+	apios  string
+
+	sshKeyLabel        string
+	testDeviceHostname string
+}
+
+func newLiveTestClient(t *testing.T) *liveTestClient {
+	t.Helper()
+
+	apiuser := os.Getenv("EQUINIX_USER")
+	apikey := os.Getenv("EQUINIX_APIKEY")
+	apipid := os.Getenv("EQUINIX_PROJECT_ID")
+	apios := os.Getenv("EQUINIX_DEVICE_OS")
+
+	if apiuser == "" {
+		t.Skip("EQUINIX_USER must be set.")
+	}
+	if apikey == "" {
+		t.Skip("EQUINIX_APIKEY must be set.")
+	}
+	if apipid == "" {
+		t.Skip("EQUINIX_PROJECT_ID must be set.")
+	}
+	if apios == "" {
+		t.Skip("EQUINIX_DEVICE_OS must be set.")
+	}
+	ctx, ctxC := context.WithCancel(context.Background())
+	t.Cleanup(ctxC)
+	return &liveTestClient{
+		cl: new(&Opts{
+			User:   apiuser,
+			APIKey: apikey,
+		}),
+		ctx: ctx,
+
+		apipid: apipid,
+		apios:  apios,
+
+		sshKeyLabel:        "shepherd-livetest-client",
+		testDeviceHostname: "shepherd-livetest-device",
+	}
+}
+
+// awaitDeviceState returns nil after device matching the id reaches one of the
+// provided states. It will return a non-nil value in case of an API error, and
+// particularly if there exists no device matching id.
+func (l *liveTestClient) awaitDeviceState(t *testing.T, id string, states ...string) error {
+	t.Helper()
+
+	for {
+		d, err := l.cl.GetDevice(l.ctx, l.apipid, id, nil)
+		if err != nil {
+			if errors.Is(err, os.ErrDeadlineExceeded) {
+				continue
+			}
+			return fmt.Errorf("while fetching device info: %w", err)
+		}
+		if d == nil {
+			return fmt.Errorf("expected the test device (ID: %s) to exist.", id)
+		}
+		for _, s := range states {
+			if d.State == s {
+				return nil
+			}
+		}
+		t.Logf("Waiting for device to be provisioned (ID: %s, current state: %q)", id, d.State)
+		time.Sleep(time.Second)
+	}
+}
+
+// cleanup ensures both the test device and the test key are deleted at
+// Equinix.
+func (l *liveTestClient) cleanup(t *testing.T) {
+	t.Helper()
+
+	t.Logf("Cleaning up.")
+
+	// Ensure the device matching testDeviceHostname is deleted.
+	ds, err := l.cl.ListDevices(l.ctx, l.apipid)
+	if err != nil {
+		log.Fatalf("while listing devices: %v", err)
+	}
+	var td *packngo.Device
+	for _, d := range ds {
+		if d.Hostname == l.testDeviceHostname {
+			td = &d
+			break
+		}
+	}
+	if td != nil {
+		t.Logf("Found a test device (ID: %s) that needs to be deleted before progressing further.", td.ID)
+
+		// Devices currently being provisioned can't be deleted. After it's
+		// provisioned, device's state will match either "active", or "failed".
+		if err := l.awaitDeviceState(t, "active", "failed"); err != nil {
+			t.Fatalf("while waiting for device to be provisioned: %v", err)
+		}
+		if err := l.cl.deleteDevice(l.ctx, td.ID); err != nil {
+			t.Fatalf("while deleting test device: %v", err)
+		}
+	}
+
+	// Ensure the key matching sshKeyLabel is deleted.
+	ks, err := l.cl.ListSSHKeys(l.ctx)
+	if err != nil {
+		t.Fatalf("while listing SSH keys: %v", err)
+	}
+	for _, k := range ks {
+		if k.Label == l.sshKeyLabel {
+			t.Logf("Found a SSH test key (ID: %s) - deleting...", k.ID)
+			if err := l.cl.deleteSSHKey(l.ctx, k.ID); err != nil {
+				t.Fatalf("while deleting an SSH key: %v", err)
+			}
+			t.Logf("Deleted a SSH test key (ID: %s).", k.ID)
+		}
+	}
+}
+
+// createSSHAuthKey returns an SSH public key in OpenSSH authorized_keys
+// format.
+func createSSHAuthKey(t *testing.T) string {
+	t.Helper()
+	pub, _, err := ed25519.GenerateKey(rand.Reader)
+	if err != nil {
+		t.Errorf("while generating SSH key: %v", err)
+	}
+
+	sshpub, err := ssh.NewPublicKey(pub)
+	if err != nil {
+		t.Errorf("while generating SSH public key: %v", err)
+	}
+	return string(ssh.MarshalAuthorizedKey(sshpub))
+}
+
+// TestLiveAPI performs smoke tests of wrapngo against the real Equinix API. See
+// newLiveTestClient to see which environment variables need to be provided in
+// order for this test to run.
+func TestLiveAPI(t *testing.T) {
+	ltc := newLiveTestClient(t)
+	ltc.cleanup(t)
+
+	cl := ltc.cl
+	ctx := ltc.ctx
+
+	t.Run("ListReservations", func(t *testing.T) {
+		_, err := cl.ListReservations(ctx, ltc.apipid)
+		if err != nil {
+			t.Errorf("while listing hardware reservations: %v", err)
+		}
+	})
+
+	var sshKeyID string
+	t.Run("CreateSSHKey", func(t *testing.T) {
+		nk, err := cl.CreateSSHKey(ctx, &packngo.SSHKeyCreateRequest{
+			Label:     ltc.sshKeyLabel,
+			Key:       createSSHAuthKey(t),
+			ProjectID: ltc.apipid,
+		})
+		if err != nil {
+			t.Fatalf("while creating an SSH key: %v", err)
+		}
+		if nk.Label != ltc.sshKeyLabel {
+			t.Errorf("key labels don't match.")
+		}
+		t.Logf("Created an SSH key (ID: %s)", nk.ID)
+		sshKeyID = nk.ID
+	})
+
+	var dummySSHPK2 string
+	t.Run("UpdateSSHKey", func(t *testing.T) {
+		if sshKeyID == "" {
+			t.Skip("SSH key couldn't have been created - skipping...")
+		}
+
+		dummySSHPK2 = createSSHAuthKey(t)
+		k, err := cl.UpdateSSHKey(ctx, sshKeyID, &packngo.SSHKeyUpdateRequest{
+			Key: &dummySSHPK2,
+		})
+		if err != nil {
+			t.Fatalf("while updating an SSH key: %v", err)
+		}
+		if k.Key != dummySSHPK2 {
+			t.Errorf("updated SSH key doesn't match the original.")
+		}
+	})
+	t.Run("GetSSHKey", func(t *testing.T) {
+		if sshKeyID == "" {
+			t.Skip("SSH key couldn't have been created - skipping...")
+		}
+
+		k, err := cl.getSSHKey(ctx, sshKeyID)
+		if err != nil {
+			t.Fatalf("while getting an SSH key: %v", err)
+		}
+		if k.Key != dummySSHPK2 {
+			t.Errorf("got key contents that don't match the original.")
+		}
+	})
+	t.Run("ListSSHKeys", func(t *testing.T) {
+		if sshKeyID == "" {
+			t.Skip("SSH key couldn't have been created - skipping...")
+		}
+
+		ks, err := cl.ListSSHKeys(ctx)
+		if err != nil {
+			t.Fatalf("while listing SSH keys: %v", err)
+		}
+
+		// Check that our key is part of the list.
+		found := false
+		for _, k := range ks {
+			if k.ID == sshKeyID {
+				found = true
+				break
+			}
+		}
+		if !found {
+			t.Errorf("SSH key not listed.")
+		}
+	})
+
+	var testDevice *packngo.Device
+	t.Run("CreateDevice", func(t *testing.T) {
+		// Find a provisionable hardware reservation the device will be created with.
+		rvs, err := cl.ListReservations(ctx, ltc.apipid)
+		if err != nil {
+			t.Errorf("while listing hardware reservations: %v", err)
+		}
+		var rv *packngo.HardwareReservation
+		for _, r := range rvs {
+			if r.Provisionable {
+				rv = &r
+				break
+			}
+		}
+		if rv == nil {
+			t.Skip("could not find a provisionable hardware reservation - skipping...")
+		}
+
+		d, err := cl.CreateDevice(ctx, &packngo.DeviceCreateRequest{
+			Hostname:              ltc.testDeviceHostname,
+			OS:                    ltc.apios,
+			Plan:                  rv.Plan.Slug,
+			HardwareReservationID: rv.ID,
+			ProjectID:             ltc.apipid,
+		})
+		if err != nil {
+			t.Fatalf("while creating a device: %v", err)
+		}
+		t.Logf("Created a new test device (ID: %s)", d.ID)
+		testDevice = d
+	})
+	t.Run("GetDevice", func(t *testing.T) {
+		if testDevice == nil {
+			t.Skip("the test device couldn't have been created - skipping...")
+		}
+
+		d, err := cl.GetDevice(ctx, ltc.apipid, testDevice.ID, nil)
+		if err != nil {
+			t.Fatalf("while fetching device info: %v", err)
+		}
+		if d == nil {
+			t.Fatalf("expected the test device (ID: %s) to exist.", testDevice.ID)
+		}
+		if d.ID != testDevice.ID {
+			t.Errorf("got device ID that doesn't match the original.")
+		}
+	})
+	t.Run("ListDevices", func(t *testing.T) {
+		if testDevice == nil {
+			t.Skip("the test device couldn't have been created - skipping...")
+		}
+
+		ds, err := cl.ListDevices(ctx, ltc.apipid)
+		if err != nil {
+			t.Errorf("while listing devices: %v", err)
+		}
+		if len(ds) == 0 {
+			t.Errorf("expected at least one device.")
+		}
+	})
+	t.Run("DeleteDevice", func(t *testing.T) {
+		if testDevice == nil {
+			t.Skip("the test device couldn't have been created - skipping...")
+		}
+
+		// Devices currently being provisioned can't be deleted. After it's
+		// provisioned, device's state will match either "active", or "failed".
+		if err := ltc.awaitDeviceState(t, testDevice.ID, "active", "failed"); err != nil {
+			t.Fatalf("while waiting for device to be provisioned: %v", err)
+		}
+		t.Logf("Deleting the test device (ID: %s)", testDevice.ID)
+		if err := cl.deleteDevice(ctx, testDevice.ID); err != nil {
+			t.Fatalf("while deleting a device: %v", err)
+		}
+		d, err := cl.GetDevice(ctx, ltc.apipid, testDevice.ID, nil)
+		if err != nil && !IsNotFound(err) {
+			t.Fatalf("while fetching device info: %v", err)
+		}
+		if d != nil {
+			t.Fatalf("device should not exist.")
+		}
+		t.Logf("Deleted the test device (ID: %s)", testDevice.ID)
+	})
+	t.Run("DeleteSSHKey", func(t *testing.T) {
+		if sshKeyID == "" {
+			t.Skip("SSH key couldn't have been created - skipping...")
+		}
+
+		t.Logf("Deleting the test SSH key (ID: %s)", sshKeyID)
+		if err := cl.deleteSSHKey(ctx, sshKeyID); err != nil {
+			t.Fatalf("couldn't delete an SSH key: %v", err)
+		}
+		_, err := cl.getSSHKey(ctx, sshKeyID)
+		if err == nil {
+			t.Fatalf("SSH key should not exist")
+		}
+		t.Logf("Deleted the test SSH key (ID: %s)", sshKeyID)
+	})
+
+	ltc.cleanup(t)
+}