blob: 244e84989d5c3baa831704f20a57f68e3714a49b [file] [log] [blame]
Lorenz Brunb60d9cb2021-02-18 17:34:00 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17// bazel_cc_fix rewrites include directives in C and C++ code. It rewrites all includes in the target workspace to be
18// workspace-relative and additionally supports rewriting includes via a prototxt-based spec file to for example
19// fix up includes for external libraries.
20// The rewritten code can then be used in Bazel intra- and inter-workspace without dealing with any copts or include-
21// related attributes.
22// To know where an include would resolve to it expects a compilation database (see
23// https://clang.llvm.org/docs/JSONCompilationDatabase.html) as an input. It looks at all files in that database and
24// their transitive dependencies and rewrites all of them according to the include paths specified in the compilation
25// command from the database.
26// The compilation database itself is either generated by the original build system or by using intercept-build, which
27// intercepts calls to the compiler and records them into a compilation database.
28package main
29
30import (
31 "encoding/json"
32 "flag"
33 "fmt"
34 "io/ioutil"
35 "log"
36 "os"
37 "path/filepath"
38 "regexp"
39 "strings"
40
41 "github.com/golang/protobuf/proto"
42 "github.com/mattn/go-shellwords"
43
44 "source.monogon.dev/build/bazel_cc_fix/ccfixspec"
45)
46
47// compilationDBEntry is a single entry from the compilation database which represents a single compiler invocation on
48// a C/C++ source file. It contains the compiler working directory, arguments and input file path.
49type compilationDBEntry struct {
50 Directory string `json:"directory"`
51 Command string `json:"command"`
52 Arguments []string `json:"arguments"`
53 File string `json:"file"`
54 Output string `json:"output"`
55}
56
57// compilationDB is a collection of compilationDBEntries usually stored in a big JSON-serialized document.
58// https://clang.llvm.org/docs/JSONCompilationDatabase.html
59type compilationDB []compilationDBEntry
60
61// rewrites represents a list of include rewrites with the key being the original include statement
62// (like "#include <xyz.h>", with whitespace trimmed on both sides) and the value being another
63type rewrites map[string]string
64
65// replacer returns a strings.Replacer which efficiently performs all replacements in a single pass
66func (r rewrites) replacer() *strings.Replacer {
67 var replacerArgs []string
68 for from, to := range r {
69 replacerArgs = append(replacerArgs, from, to)
70 }
71 return strings.NewReplacer(replacerArgs...)
72}
73
74// addWorkspace adds a rewrite from a given directive to a workspace-relative path.
75func (r rewrites) addWorkspace(oldDirective, workspaceRelativePath string) {
76 normalizedDirective := strings.TrimSpace(oldDirective)
77 replacementDirective := fmt.Sprintf("#include \"%s\"", workspaceRelativePath)
78 oldRewrite, ok := r[normalizedDirective]
79 if !ok {
80 r[normalizedDirective] = replacementDirective
81 } else if oldRewrite != replacementDirective {
82 log.Printf("WARNING: inconsistent rewrite detected: %s => %s | %s", normalizedDirective, oldRewrite, replacementDirective)
83 }
84}
85
86// Type rewriteMetadata is a map of a file path to rewrite metadata for that file
87type rewriteMetadata map[string]rewriteMetadataFile
88
89type rewriteMetadataFile struct {
90 rewrites rewrites
91 source string
92}
93
94var (
95 compilationDBPath = flag.String("compilation_db", "", "Path the the compilation_database.json file for the project")
96 workspacePath = flag.String("workspace", "", "Path to the workspace root")
97 specPath = flag.String("spec", "", "Path to the spec (ccfixspec.CCFixSpec)")
98)
99
100var (
101 reGlobalInclude = regexp.MustCompile("^-I(.*)")
102 reSystemInclude = regexp.MustCompile("^-isystem(.*)")
103 reQuoteInclude = regexp.MustCompile("^-iquote(.*)")
104)
105
106var (
107 reIncludeDirective = regexp.MustCompile(`(?m:^\s*#\s*include\s*([<"])(.*)([>"]))`)
108)
109
110// applyReplaceDirectives applies all directives of the given replaceType in directives to originalPath and returns the
111// resulting string. If returnUnmodified is unset, it returns an empty string when no replacements were performed,
112// otherwise it returns the unmodified originalPath.
113// The first rewrite wins, it does not do any recursive processing.
114func applyReplaceDirectives(directives []*ccfixspec.Replace, replaceType ccfixspec.Replace_Type, originalPath string, returnUnmodified bool) string {
115 for _, d := range directives {
116 if d.Type != replaceType {
117 continue
118 }
119 if d.From == originalPath {
120 return d.To
121 } else if strings.HasSuffix(d.From, "/") && strings.HasPrefix(originalPath, d.From) {
122 return d.To + strings.TrimPrefix(originalPath, d.From)
123 }
124 }
125 if returnUnmodified {
126 return originalPath
127 }
128 return ""
129}
130
131// findFileInWorkspace takes a path from a C include directive and uses the given search path to find its absolute
132// path. If that absolute path is outside the workspace, it returns an empty string, otherwise it returns the path
133// of the file relative to the workspace. It pretends that all files in isGeneratedFile exist on the filesystem.
134func findFileInWorkspace(searchPath []string, inclFile string, isGeneratedFile map[string]bool) string {
135 var inclPath string
136 for _, path := range searchPath {
137 inclPathTry := filepath.Join(path, inclFile)
138 if isGeneratedFile[inclPathTry] {
139 inclPath = inclPathTry
140 break
141 }
142 if _, err := os.Stat(inclPathTry); err == nil {
143 inclPath = inclPathTry
144 break
145 }
146 }
147 if inclPath == "" {
148 // We haven't found the included file. This can happen for system includes (<stdio.h>) or includes from
149 // other operating systems.
150 return ""
151 }
152
153 // Ignore all include directives that don't resolve into our workspace after processing
154 if !filepath.HasPrefix(inclPath, *workspacePath) {
155 return ""
156 }
157
158 workspaceRelativeFilePath, err := filepath.Rel(*workspacePath, inclPath)
159 if err != nil {
160 panic(err)
161 }
162 return workspaceRelativeFilePath
163}
164
165// fixIncludesAndGetRefs opens a file, looks at all its includes, records rewriting data into rewriteMetadata and
166// returns all files included by the file for further analysis.
167func (m rewriteMetadata) fixIncludesAndGetRefs(filePath string, quoteIncludes, systemIncludes []string, spec *ccfixspec.CCFixSpec, isGeneratedFile map[string]bool) []string {
168 meta, ok := m[filePath]
169 if !ok {
170 cSourceRaw, err := ioutil.ReadFile(filePath)
171 if err != nil {
172 log.Printf("failed to open source file: %v", err)
173 return nil
174 }
175 cSource := string(cSourceRaw)
176 m[filePath] = rewriteMetadataFile{
177 rewrites: make(rewrites),
178 source: cSource,
179 }
180 meta = m[filePath]
181 }
182 var includeFiles []string
183 // Find all include directives
184 out := reIncludeDirective.FindAllStringSubmatch(meta.source, -1)
185 for _, incl := range out {
186 inclDirective := incl[0]
187 inclType := incl[1]
188 inclFile := incl[2]
189 var workspaceRelativeFilePath string
190 var searchPath []string
191 if inclType == "\"" {
192 searchPath = quoteIncludes
193 } else if inclType == "<" {
194 searchPath = systemIncludes
195 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_SYSTEM, inclFile, false)
196 }
197 if workspaceRelativeFilePath == "" {
198 workspaceRelativeFilePath = findFileInWorkspace(searchPath, inclFile, isGeneratedFile)
199 }
200 workspaceRelativeFilePath = applyReplaceDirectives(spec.Replace, ccfixspec.Replace_WORKSPACE, workspaceRelativeFilePath, true)
201
202 // Mark generated files as generated
203 foundGenerated := isGeneratedFile[filepath.Join(*workspacePath, workspaceRelativeFilePath)]
204
205 if !foundGenerated {
206 includeFiles = append(includeFiles, filepath.Join(*workspacePath, workspaceRelativeFilePath))
207 }
208
209 // Pretend that a generated file exists at the given path when stripping the BuildDir prefix. This is
210 // generally true for all out-of-tree build systems and saves the user from needing to manually specify
211 // lots of GeneratedFiles.
212 if spec.BuildDir != "" && filepath.HasPrefix(workspaceRelativeFilePath, spec.BuildDir+"/") {
213 workspaceRelativeFilePath = filepath.Clean(strings.TrimPrefix(workspaceRelativeFilePath, spec.BuildDir+"/"))
214 foundGenerated = true
215 }
216
217 // Shorten include paths when both files are in the same directory except when a generated file is involved
218 // as these end up in physically different locations and need to be referenced using a full workspace-
219 // relative path
220 if !foundGenerated && filepath.Dir(filePath) == filepath.Dir(filepath.Join(*workspacePath, workspaceRelativeFilePath)) {
221 workspaceRelativeFilePath = filepath.Base(workspaceRelativeFilePath)
222 }
223 // Don't perform rewrites when both include directives are semantically equivalent
224 if workspaceRelativeFilePath == inclFile && inclType == "\"" {
225 continue
226 }
227 meta.rewrites.addWorkspace(inclDirective, workspaceRelativeFilePath)
228 }
229 return includeFiles
230}
231
232// getIncludeDirs takes a compilation database entry and returns the search paths for both system and quote includes
233func getIncludeDirs(entry compilationDBEntry) (quoteIncludes []string, systemIncludes []string, err error) {
234 // Normalize arguments
235 if len(entry.Arguments) == 0 {
236 commandArgs, err := shellwords.Parse(entry.Command)
237 if err != nil {
238 return []string{}, []string{}, fmt.Errorf("failed to parse command: %w", err)
239 }
240 entry.Arguments = commandArgs
241 }
242
243 // Parse out and generate include search paths
244 var preSystemIncludes []string
245 var systemIncludesRaw []string
246 var quoteIncludesRaw []string
247 filePath := entry.File
248 if !filepath.IsAbs(entry.File) {
249 filePath = filepath.Join(entry.Directory, entry.File)
250 }
251 quoteIncludesRaw = append(quoteIncludesRaw, filepath.Dir(filePath))
252 for i, arg := range entry.Arguments {
253 includeMatch := reGlobalInclude.FindStringSubmatch(arg)
254 if len(includeMatch) > 0 {
255 if len(includeMatch[1]) == 0 {
256 preSystemIncludes = append(preSystemIncludes, entry.Arguments[i+1])
257 } else {
258 preSystemIncludes = append(preSystemIncludes, includeMatch[1])
259 }
260 }
261 includeMatch = reSystemInclude.FindStringSubmatch(arg)
262 if len(includeMatch) > 0 {
263 if len(includeMatch[1]) == 0 {
264 systemIncludesRaw = append(systemIncludesRaw, entry.Arguments[i+1])
265 } else {
266 systemIncludesRaw = append(systemIncludesRaw, includeMatch[1])
267 }
268 }
269 includeMatch = reQuoteInclude.FindStringSubmatch(arg)
270 if len(includeMatch) > 0 {
271 if len(includeMatch[1]) == 0 {
272 quoteIncludesRaw = append(quoteIncludesRaw, entry.Arguments[i+1])
273 } else {
274 quoteIncludesRaw = append(quoteIncludesRaw, includeMatch[1])
275 }
276 }
277 }
278 systemIncludesRaw = append(preSystemIncludes, systemIncludesRaw...)
279 quoteIncludesRaw = append(quoteIncludesRaw, systemIncludesRaw...)
280
281 // Deduplicate and keep the first one
282 systemIncludeSeen := make(map[string]bool)
283 quoteIncludeSeen := make(map[string]bool)
284 for _, systemInclude := range systemIncludesRaw {
285 if !filepath.IsAbs(systemInclude) {
286 systemInclude = filepath.Join(entry.Directory, systemInclude)
287 }
288 if !systemIncludeSeen[systemInclude] {
289 systemIncludeSeen[systemInclude] = true
290 systemIncludes = append(systemIncludes, systemInclude)
291 }
292 }
293 for _, quoteInclude := range quoteIncludesRaw {
294 if !filepath.IsAbs(quoteInclude) {
295 quoteInclude = filepath.Join(entry.Directory, quoteInclude)
296 }
297 if !quoteIncludeSeen[quoteInclude] {
298 quoteIncludeSeen[quoteInclude] = true
299 quoteIncludes = append(quoteIncludes, quoteInclude)
300 }
301 }
302 return
303}
304
305func main() {
306 flag.Parse()
307 compilationDBFile, err := os.Open(*compilationDBPath)
308 if err != nil {
309 log.Fatalf("failed to open compilation db: %v", err)
310 }
311 var compilationDB compilationDB
312 if err := json.NewDecoder(compilationDBFile).Decode(&compilationDB); err != nil {
313 log.Fatalf("failed to read compilation db: %v", err)
314 }
315 specRaw, err := ioutil.ReadFile(*specPath)
316 var spec ccfixspec.CCFixSpec
317 if err := proto.UnmarshalText(string(specRaw), &spec); err != nil {
318 log.Fatalf("failed to load spec: %v", err)
319 }
320
321 isGeneratedFile := make(map[string]bool)
322 for _, entry := range spec.GeneratedFile {
323 isGeneratedFile[filepath.Join(*workspacePath, entry.Path)] = true
324 }
325
326 rewriteMetadata := make(rewriteMetadata)
327
328 // Iterate over all source files in the compilation database and analyze them one-by-one
329 for _, entry := range compilationDB {
330 quoteIncludes, systemIncludes, err := getIncludeDirs(entry)
331 if err != nil {
332 log.Println(err)
333 continue
334 }
335 filePath := entry.File
336 if !filepath.IsAbs(entry.File) {
337 filePath = filepath.Join(entry.Directory, entry.File)
338 }
339 includedFiles := rewriteMetadata.fixIncludesAndGetRefs(filePath, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
340
341 // seen stores the path of already-visited files, similar to #pragma once
342 seen := make(map[string]bool)
343 // rec recursively resolves includes and records rewrites
344 var rec func([]string)
345 rec = func(files []string) {
346 for _, f := range files {
347 if seen[f] {
348 continue
349 }
350 seen[f] = true
351 icf2 := rewriteMetadata.fixIncludesAndGetRefs(f, quoteIncludes, systemIncludes, &spec, isGeneratedFile)
352 rec(icf2)
353 }
354 }
355 rec(includedFiles)
356 }
357
358 // Perform all recorded rewrites on the actual files
359 for file, rew := range rewriteMetadata {
360 outFile, err := os.Create(file)
361 if err != nil {
362 log.Fatalf("failed to open file for writing output: %v", err)
363 }
364 defer outFile.Close()
365 if _, err := rew.rewrites.replacer().WriteString(outFile, rew.source); err != nil {
366 log.Fatalf("failed to write file %v: %v", file, err)
367 }
368 }
369}