| Tim Windelschmidt | 6d33a43 | 2025-02-04 14:34:25 +0100 | [diff] [blame] | 1 | // Copyright The Monogon Project Authors. |
| 2 | // SPDX-License-Identifier: Apache-2.0 |
| 3 | |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 4 | package fat32 |
| 5 | |
| 6 | import ( |
| 7 | "errors" |
| 8 | "fmt" |
| 9 | "math" |
| 10 | "regexp" |
| 11 | "strings" |
| 12 | ) |
| 13 | |
| 14 | // By default, DOS names would be encoded as what Microsoft calls the OEM |
| 15 | // code page. This is however dependant on the code page settings of the |
| 16 | // OS reading the file name as it's not mentioned in FAT32 metadata. |
| 17 | // To get maximum compatibility and make it easy to read in hex editors |
| 18 | // this only encodes ASCII characters and not any specific code page. |
| 19 | // This can still result in garbled data when using a non-latin code page, |
| 20 | // but this is unavoidable. |
| 21 | // This is legal as there is no specific requirements for generating these |
| 22 | // DOS names and any semi-modern system should use the unicode filenames |
| 23 | // anyways. |
| 24 | |
| 25 | var invalidDOSNameChar = regexp.MustCompile("^[^A-Z0-9!#$%&'()@^_\x60{}~-]$") |
| 26 | |
| 27 | // validDOSName matches names which are valid and unique DOS 8.3 file names as |
| 28 | // well as valid ASCII |
| 29 | var validDOSName = regexp.MustCompile(`^^([A-Z0-9!#$%&'()@^_\x60{}~-]{0,8})(\.[A-Z0-9!#$%&'()-@^_\x60{}~-]{1,3})?$`) |
| 30 | |
| Jan Schär | c1b6df4 | 2025-03-20 08:52:18 +0000 | [diff] [blame^] | 31 | func makeUniqueDOSNames(nodes []*node) error { |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 32 | taken := make(map[[11]byte]bool) |
| Jan Schär | c1b6df4 | 2025-03-20 08:52:18 +0000 | [diff] [blame^] | 33 | var lossyNameNodes []*node |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 34 | // Make two passes to ensure that names can always be passed through even |
| 35 | // if they would conflict with a generated name. |
| Jan Schär | c1b6df4 | 2025-03-20 08:52:18 +0000 | [diff] [blame^] | 36 | for _, i := range nodes { |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 37 | for j := range i.dosName { |
| 38 | i.dosName[j] = ' ' |
| 39 | } |
| 40 | nameUpper := strings.ToUpper(i.Name) |
| 41 | dosParts := validDOSName.FindStringSubmatch(nameUpper) |
| 42 | if dosParts != nil { |
| 43 | // Name is pass-through |
| Tim Windelschmidt | 5e460a9 | 2024-04-11 01:33:09 +0200 | [diff] [blame] | 44 | copy(i.dosName[:8], dosParts[1]) |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 45 | if len(dosParts[2]) > 0 { |
| 46 | // Skip the dot, it is implicit |
| Tim Windelschmidt | 5e460a9 | 2024-04-11 01:33:09 +0200 | [diff] [blame] | 47 | copy(i.dosName[8:], dosParts[2][1:]) |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 48 | } |
| 49 | if taken[i.dosName] { |
| 50 | // Mapping is unique, complain about the actual file name, not |
| 51 | // the 8.3 one |
| 52 | return fmt.Errorf("name %q occurs more than once in the same directory", i.Name) |
| 53 | } |
| 54 | taken[i.dosName] = true |
| 55 | continue |
| 56 | } |
| Jan Schär | c1b6df4 | 2025-03-20 08:52:18 +0000 | [diff] [blame^] | 57 | lossyNameNodes = append(lossyNameNodes, i) |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 58 | } |
| 59 | // Willfully ignore the recommended short name generation algorithm as it |
| 60 | // requires tons of bookkeeping and doesn't result in stable names so |
| 61 | // cannot be relied on anyway. |
| 62 | // A FAT32 directory is limited to 2^16 entries (in practice less than half |
| 63 | // of that because of long file name entries), so 4 hex characters |
| 64 | // guarantee uniqueness, regardless of the rest of name. |
| 65 | var nameIdx int |
| Jan Schär | c1b6df4 | 2025-03-20 08:52:18 +0000 | [diff] [blame^] | 66 | for _, i := range lossyNameNodes { |
| Lorenz Brun | bd2ce6d | 2022-07-22 00:00:13 +0000 | [diff] [blame] | 67 | nameUpper := strings.ToUpper(i.Name) |
| 68 | dotParts := strings.Split(nameUpper, ".") |
| 69 | for j := range dotParts { |
| 70 | // Remove all invalid chars |
| 71 | dotParts[j] = invalidDOSNameChar.ReplaceAllString(dotParts[j], "") |
| 72 | } |
| 73 | var fileName string |
| 74 | lastDotPart := dotParts[len(dotParts)-1] |
| 75 | if len(dotParts) > 1 && len(dotParts[0]) > 0 && len(lastDotPart) > 0 { |
| 76 | // We have a valid 8.3 extension |
| 77 | copy(i.dosName[8:], lastDotPart) |
| 78 | fileName = strings.Join(dotParts[:len(dotParts)-1], "") |
| 79 | } else { |
| 80 | fileName = strings.Join(dotParts[:], "") |
| 81 | } |
| 82 | copy(i.dosName[:4], fileName) |
| 83 | |
| 84 | for { |
| 85 | copy(i.dosName[4:], fmt.Sprintf("%04X", nameIdx)) |
| 86 | nameIdx++ |
| 87 | if nameIdx >= math.MaxUint16 { |
| 88 | return errors.New("invariant violated: unable to find unique name with 16 bit counter in 16 bit space") |
| 89 | } |
| 90 | if !taken[i.dosName] { |
| 91 | break |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | return nil |
| 96 | } |