blob: d51b1faf52df4f1aa8f7c44a093f1332ae63b5a2 [file] [log] [blame]
Serge Bazanski581b0bd2020-03-12 13:36:43 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package main
18
19import (
20 "fmt"
21 "io"
Lorenz Brunc88c82d2020-05-08 14:35:04 +020022 "io/ioutil"
Serge Bazanski581b0bd2020-03-12 13:36:43 +010023 "os"
24 "path/filepath"
25 "strings"
26 "syscall"
27
28 "go.uber.org/zap"
29 "golang.org/x/sys/unix"
30)
31
32// switchRoot moves the root from initramfs into a tmpfs
33// This is necessary because you cannot pivot_root from a initramfs (and runsc wants to do that).
34// In the future, we should instead use something like squashfs instead of an initramfs and just nuke this.
35func switchRoot(log *zap.Logger) error {
36 // We detect the need to remount to tmpfs over env vars.
37 // The first run of /init (from initramfs) will not have this var, and will be re-exec'd from a new tmpfs root with
38 // that variable set.
39 witness := "SIGNOS_REMOUNTED"
40
41 // If the witness env var is found in the environment, it means we are ready to go.
42 environ := os.Environ()
43 for _, env := range environ {
44 if strings.HasPrefix(env, witness+"=") {
45 log.Info("Smalltown running in tmpfs root")
46 return nil
47 }
48 }
49
50 // Otherwise, we need to remount to a tmpfs.
51 environ = append(environ, witness+"=yes")
52 log.Info("Smalltown running in initramfs, remounting to tmpfs...")
53
54 // Make note of all directories we have to make and files that we have to copy.
55 paths := []string{}
56 dirs := []string{}
57 err := filepath.Walk("/", func(path string, info os.FileInfo, err error) error {
58 if err != nil {
59 return err
60 }
61 if path == "/" {
62 return nil
63 }
64 // /dev is prepopulated by the initramfs, skip that. The target root uses devtmpfs.
65 if path == "/dev" || strings.HasPrefix(path, "/dev/") {
66 return nil
67 }
68
69 if info.IsDir() {
70 dirs = append(dirs, path)
71 } else {
72 paths = append(paths, path)
73 }
74
75 return nil
76 })
77 if err != nil {
78 return fmt.Errorf("could not list root files: %w", err)
79 }
80
81 log.Info("Copying to tmpfs", zap.Strings("paths", paths), zap.Strings("dirs", dirs))
82
83 // Make new root at /mnt
84 if err := os.Mkdir("/mnt", 0755); err != nil {
85 return fmt.Errorf("could not make /mnt: %w", err)
86 }
87 // And mount a tmpfs on it
88 if err := unix.Mount("tmpfs", "/mnt", "tmpfs", 0, ""); err != nil {
89 return fmt.Errorf("could not mount tmpfs on /mnt: %w", err)
90 }
91
92 // Make all directories. Since filepath.Walk is lexicographically ordered, we don't need to ensure that the parent
93 // exists.
94 for _, src := range dirs {
95 stat, err := os.Stat(src)
96 if err != nil {
97 return fmt.Errorf("Stat(%q): %w", src, err)
98 }
99 dst := "/mnt" + src
100 err = os.Mkdir(dst, stat.Mode())
101 if err != nil {
102 return fmt.Errorf("Mkdir(%q): %w", dst, err)
103 }
104 }
105
106 // Move all files over. Parent directories will exist by now.
107 for _, src := range paths {
108 stat, err := os.Stat(src)
109 if err != nil {
110 return fmt.Errorf("Stat(%q): %w", src, err)
111 }
112 dst := "/mnt" + src
113
114 // Copy file.
115 sfd, err := os.Open(src)
116 if err != nil {
117 return fmt.Errorf("Open(%q): %w", src, err)
118 }
119 dfd, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE, stat.Mode())
120 if err != nil {
121 sfd.Close()
122 return fmt.Errorf("OpenFile(%q): %w", dst, err)
123 }
124 _, err = io.Copy(dfd, sfd)
125
126 sfd.Close()
127 dfd.Close()
128 if err != nil {
129 return fmt.Errorf("Copying %q failed: %w", src, err)
130 }
131
132 // Remove the old file.
133 err = unix.Unlink(src)
134 if err != nil {
135 return fmt.Errorf("Unlink(%q): %w", src, err)
136 }
137 }
138
139 // Set up target filesystems.
140 for _, el := range []struct {
141 dir string
142 fs string
143 flags uintptr
144 }{
145 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
146 {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV},
147 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID},
148 {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID},
149 } {
150 if err := os.Mkdir("/mnt"+el.dir, 0755); err != nil {
151 return fmt.Errorf("could not make /mnt%s: %w", el.dir, err)
152 }
153 if err := unix.Mount(el.fs, "/mnt"+el.dir, el.fs, el.flags, ""); err != nil {
154 return fmt.Errorf("could not mount %s on /mnt%s: %w", el.fs, el.dir, err)
155 }
156 }
157
Lorenz Brunc88c82d2020-05-08 14:35:04 +0200158 // Mount all available CGroups for v1 (v2 uses a single unified hierarchy and is not supported by our runtimes yet)
159 if unix.Mount("tmpfs", "/mnt/sys/fs/cgroup", "tmpfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, ""); err != nil {
160 panic(err)
161 }
162 cgroupsRaw, err := ioutil.ReadFile("/mnt/proc/cgroups")
163 if err != nil {
164 panic(err)
165 }
166
167 cgroupLines := strings.Split(string(cgroupsRaw), "\n")
168 for _, cgroupLine := range cgroupLines {
169 if cgroupLine == "" || strings.HasPrefix(cgroupLine, "#") {
170 continue
171 }
172 cgroupParts := strings.Split(cgroupLine, "\t")
173 cgroupName := cgroupParts[0]
174 if err := os.Mkdir("/mnt/sys/fs/cgroup/"+cgroupName, 0755); err != nil {
175 panic(err)
176 }
177 if err := unix.Mount("cgroup", "/mnt/sys/fs/cgroup/"+cgroupName, "cgroup", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, cgroupName); err != nil {
178 panic(err)
179 }
180 }
181
182 // Enable hierarchical memory accounting
183 useMemoryHierarchy, err := os.OpenFile("/mnt/sys/fs/cgroup/memory/memory.use_hierarchy", os.O_RDWR, 0)
184 if err != nil {
185 panic(err)
186 }
187 if _, err := useMemoryHierarchy.WriteString("1"); err != nil {
188 panic(err)
189 }
190 useMemoryHierarchy.Close()
191
Serge Bazanski581b0bd2020-03-12 13:36:43 +0100192 // Chroot to new root.
193 // This is adapted from util-linux's switch_root.
194 err = os.Chdir("/mnt")
195 if err != nil {
196 return fmt.Errorf("could not chdir to /mnt: %w", err)
197 }
198 err = syscall.Mount("/mnt", "/", "", syscall.MS_MOVE, "")
199 if err != nil {
200 return fmt.Errorf("could not remount /mnt to /: %w", err)
201 }
202 err = syscall.Chroot(".")
203 if err != nil {
204 return fmt.Errorf("could not chroot to new root: %w", err)
205 }
206
207 // Re-exec into new init with new environment
208 return unix.Exec("/init", os.Args, environ)
209}