blob: d3532a339565ad72adf3829a0dc73fe9ed34231c [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Mateusz Zalega3ccf6962023-01-23 17:01:40 +00004// Package psample provides a receiver for sampled network packets using the
5// Netlink psample interface.
6package psample
7
8import (
9 "fmt"
10
11 "github.com/mdlayher/genetlink"
12 "github.com/mdlayher/netlink"
13)
14
15// attrId identifies psample netlink message attributes.
16// Identifier numbers are based on psample kernel module sources:
17// https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/include/uapi/linux/psample.h?h=v5.15.89#n5
18type attrId uint16
19
20const (
21 aIIfIndex attrId = iota // u16
22 aOIfIndex // u16
23 aOrigSize // u32
24 aSampleGroup // u32
25 aGroupSeq // u32
26 aSampleRate // u32
27 aData // []byte
28 aGroupRefcount // u32
29 aTunnel
30
31 aPad
32 aOutTC // u16
33 aOutTCOCC // u64
34 aLatency // u64, nanoseconds
35 aTimestamp // u64, nanoseconds
36 aProto // u16
37)
38
39// Packet contains the sampled packet in its raw form, along with its
40// 'psample' metadata.
41type Packet struct {
42 // IncomingInterfaceIndex is the incoming interface index of the packet or 0
43 // if not applicable.
44 IncomingInterfaceIndex uint16
45 // OutgoingInterfaceIndex is the outgoing interface index of the packet or 0
46 // if not applicable.
47 OutgoingInterfaceIndex uint16
48 // OriginalSize is the packet's original size in bytes without any
49 // truncation.
50 OriginalSize uint32
51 // SampleGroup is the sample group to which this packet belongs. This is set
52 // by the sampling action and can be used to differentiate different
53 // sampling streams.
54 SampleGroup uint32
55 // GroupSequence is a monotonically-increasing counter of packets sampled
56 // for each sample group.
57 GroupSequence uint32
58 // SampleRate is the sampling rate (1 in SampleRate packets) used to capture
59 // this packet.
60 SampleRate uint32
61 // Data contains the packet data up to the specified size for truncation.
62 Data []byte
63
64 // The following attributes are only available on kernel versions 5.13+
65
66 // Latency is the sampled packet's latency as indicated by psample. It's
67 // expressed in nanoseconds.
68 Latency uint64
69 // Timestamp marks time of the packet's sampling. It's set by the kernel, and
70 // expressed in Unix nanoseconds.
71 Timestamp uint64
72}
73
74// decode converts raw generic netlink message attributes into a Packet. In
75// cases where some of the known psample attributes were left unspecified in
76// the message, appropriate Packet member variables will be left with their
77// zero values.
78func decode(b []byte) (*Packet, error) {
79 ad, err := netlink.NewAttributeDecoder(b)
80 if err != nil {
81 return nil, err
82 }
83
84 var p Packet
85 for ad.Next() {
86 switch attrId(ad.Type()) {
87 case aIIfIndex:
88 p.IncomingInterfaceIndex = ad.Uint16()
89 case aOIfIndex:
90 p.OutgoingInterfaceIndex = ad.Uint16()
91 case aOrigSize:
92 p.OriginalSize = ad.Uint32()
93 case aSampleGroup:
94 p.SampleGroup = ad.Uint32()
95 case aGroupSeq:
96 p.GroupSequence = ad.Uint32()
97 case aSampleRate:
98 p.SampleRate = ad.Uint32()
99 case aData:
100 p.Data = ad.Bytes()
101 case aLatency:
102 p.Latency = ad.Uint64()
103 case aTimestamp:
104 p.Timestamp = ad.Uint64()
Tim Windelschmidt9b2c1562024-04-11 01:39:25 +0200105 default:
Mateusz Zalega3ccf6962023-01-23 17:01:40 +0000106 }
107 }
108 return &p, nil
109}
110
111// Subscribe returns a NetlinkSocket that's already subscribed to "packets"
112// psample multicast group, which makes it ready to receive packet samples.
113// Close should be called on the returned socket.
114func Subscribe() (*genetlink.Conn, error) {
115 // Create a netlink socket.
116 c, err := genetlink.Dial(nil)
117 if err != nil {
118 return nil, fmt.Errorf("while dialing netlink socket: %w", err)
119 }
120
121 // Lookup the netlink family id associated with psample kernel module.
122 f, err := c.GetFamily("psample")
123 if err != nil {
124 c.Close()
125 return nil, fmt.Errorf("couldn't lookup \"psample\" netlink family: %w", err)
126 }
127
128 // Lookup psample's packet sampling netlink multicast group.
129 var pktGrpId uint32
130 for _, mgrp := range f.Groups {
131 if mgrp.Name == "packets" {
132 pktGrpId = mgrp.ID
133 break
134 }
135 }
136 if pktGrpId == 0 {
137 c.Close()
138 return nil, fmt.Errorf("packets multicast group not found")
139 }
140
141 // Subscribe to 'packets' multicast group in order to receive packet
142 // samples.
143 if err := c.JoinGroup(pktGrpId); err != nil {
144 c.Close()
145 return nil, fmt.Errorf("couldn't join multicast group: %w", err)
146 }
147 return c, nil
148}
149
150// Receive returns one or more of the sampled packets as soon as they're
151// available. It may return a syscall.ENOBUFS error which indicates that the
152// kernel-side buffer of the netlink connection has overflowed and lost
153// packets. This is a transient error, calling Receive again will retrieve
154// future packet samples.
155func Receive(c *genetlink.Conn) ([]Packet, error) {
156 // Wait for the samples to arrive over generic netlink connection c.
157 gnms, nms, err := c.Receive()
158 if err != nil {
159 return nil, fmt.Errorf("while receiving netlink notifications: %w", err)
160 }
161
162 var pkts []Packet
163 for i := 0; i < len(nms); i++ {
164 // Only process multicast notifications.
165 if nms[i].Header.PID != 0 {
166 continue
167 }
168
169 // PSAMPLE_CMD_SAMPLE should be zero in multicast notifications.
170 if gnms[i].Header.Command != 0 {
171 continue
172 }
173
174 // Iterate over the Generic Netlink attributes present in the message,
175 // extracting any relating to the sampled packet.
176 pkt, err := decode(gnms[i].Data)
177 if err != nil {
178 return nil, fmt.Errorf("while decoding netlink notification: %w", err)
179 }
180 pkts = append(pkts, *pkt)
181 }
182 return pkts, nil
183}