osbase/blockdev: implement copy_file_range optimization

This change enables the use of the copy_file_range syscall on Linux when
copying from an os.File to a blockdev.File. This speeds up building of
system images, especially with a file system which supports reflinks.

The implementation is partially based on the implementation in the Go
standard library for copy_file_range between two os.File in
src/os/zero_copy_linux.go and src/internal/poll/copy_file_range_unix.go.
We can't use that implementation, because it only supports using the
file offset for both source and destination, but we want to provide the
destination offset as an argument. To support this, the ReaderFromAt
interface is introduced.

With these changes, copy_file_range is now used when building system
images, for both the rootfs and files on the FAT32 boot partition. If
the file system supports it (e.g. XFS), reflinks will be used for the
rootfs, which means no data is copied. For files on the FAT32 partition,
reflinks probably can't be used, because these are only aligned to 512
bytes but would need to be aligned to 4096 bytes on my system for
reflinking.

Change-Id: Ie42b5834e6d3f63a5cc1f347d2681d8a6bb5c006
Reviewed-on: https://review.monogon.dev/c/monogon/+/4293
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/osbase/blockdev/blockdev.go b/osbase/blockdev/blockdev.go
index 5eb7fe8..9877186 100644
--- a/osbase/blockdev/blockdev.go
+++ b/osbase/blockdev/blockdev.go
@@ -97,6 +97,25 @@
 	Sync() error
 }
 
+// ReaderFromAt is similar to [io.ReaderFrom], except that the write starts at
+// offset off instead of using the file offset.
+type ReaderFromAt interface {
+	ReadFromAt(r io.Reader, off int64) (n int64, err error)
+}
+
+// writerOnly wraps an [io.Writer] and hides all methods other than Write
+// (such as ReadFrom).
+type writerOnly struct {
+	io.Writer
+}
+
+// genericReadFromAt is a generic implementation which does not use b.ReadFromAt
+// to prevent recursive calls.
+func genericReadFromAt(b BlockDev, r io.Reader, off int64) (int64, error) {
+	w := &writerOnly{Writer: &ReadWriteSeeker{b: b, currPos: off}}
+	return io.Copy(w, r)
+}
+
 func NewRWS(b BlockDev) *ReadWriteSeeker {
 	return &ReadWriteSeeker{b: b}
 }
@@ -120,6 +139,17 @@
 	return
 }
 
+func (s *ReadWriteSeeker) ReadFrom(r io.Reader) (n int64, err error) {
+	rfa, rfaOK := s.b.(ReaderFromAt)
+	if !rfaOK {
+		w := &writerOnly{Writer: s}
+		return io.Copy(w, r)
+	}
+	n, err = rfa.ReadFromAt(r, s.currPos)
+	s.currPos += n
+	return
+}
+
 func (s *ReadWriteSeeker) Seek(offset int64, whence int) (int64, error) {
 	switch whence {
 	default:
@@ -203,6 +233,40 @@
 	return s.b.WriteAt(p, bOff)
 }
 
+func (s *Section) ReadFromAt(r io.Reader, off int64) (n int64, err error) {
+	rfa, rfaOK := s.b.(ReaderFromAt)
+	if !rfaOK {
+		return genericReadFromAt(s, r, off)
+	}
+	bOff := off + (s.startBlock * s.b.BlockSize())
+	bytesToEnd := (s.endBlock * s.b.BlockSize()) - bOff
+	if off < 0 || bytesToEnd < 0 {
+		return 0, ErrOutOfBounds
+	}
+	ur := r
+	lr, lrOK := r.(*io.LimitedReader)
+	if lrOK {
+		if bytesToEnd >= lr.N {
+			return rfa.ReadFromAt(r, bOff)
+		}
+		ur = lr.R
+	}
+	n, err = rfa.ReadFromAt(io.LimitReader(ur, bytesToEnd), bOff)
+	if lrOK {
+		lr.N -= n
+	}
+	if err == nil && n == bytesToEnd {
+		// Return an error if we have not reached EOF.
+		moreN, moreErr := io.CopyN(io.Discard, r, 1)
+		if moreN != 0 {
+			err = ErrOutOfBounds
+		} else if moreErr != io.EOF {
+			err = moreErr
+		}
+	}
+	return
+}
+
 func (s *Section) BlockCount() int64 {
 	return s.endBlock - s.startBlock
 }