Skip to content

Commit

Permalink
netdev-tc-offloads: Don't offload header modification on ip fragments.
Browse files Browse the repository at this point in the history
While offloading header modifications to TC, OVS is using {TCA_PEDIT} +
{TCA_CSUM} combination as that it the only way to represent header
rewrite.  However, {TCA_CSUM} is unable to calculate L4 checksums for
IP fragments.

Since TC already applies fragmentation bit masking, this patch simply
needs to prevent these packets from being processed through TC.

Reported-at: https://issues.redhat.com/browse/FDP-545
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
  • Loading branch information
chaudron committed Dec 18, 2024
1 parent 86829af commit 64364f3
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 1 deletion.
39 changes: 39 additions & 0 deletions lib/netdev-offload-tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1488,6 +1488,31 @@ parse_put_flow_ct_action(struct tc_flower *flower,
return 0;
}

/* This function returns true if the tc layer will add a l4 checksum action
* for this set action. Refer to the csum_update_flag() function for
* detailed logic. Note that even the kernel only supports updating TCP,
* UDP and ICMPv6.
*/
static bool
tc_will_add_l4_checksum(struct tc_flower *flower, int type)
{
switch (type) {
case OVS_KEY_ATTR_IPV4:
case OVS_KEY_ATTR_IPV6:
case OVS_KEY_ATTR_TCP:
case OVS_KEY_ATTR_UDP:
switch (flower->key.ip_proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_ICMPV6:
case IPPROTO_UDPLITE:
return true;
}
break;
}
return false;
}

static int
parse_put_flow_set_masked_action(struct tc_flower *flower,
struct tc_action *action,
Expand Down Expand Up @@ -1520,6 +1545,14 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
return EOPNOTSUPP;
}

if (flower->key.flags & TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT
&& tc_will_add_l4_checksum(flower, type)) {
VLOG_DBG_RL(&rl, "set action type %d not supported on fragments "
"due to checksum limitation", type);
ofpbuf_uninit(&set_buf);
return EOPNOTSUPP;
}

for (i = 0; i < ARRAY_SIZE(set_flower_map[type]); i++) {
struct netlink_field *f = &set_flower_map[type][i];

Expand Down Expand Up @@ -2445,6 +2478,12 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
}

mask->nw_frag = 0;
} else {
/* This scenario should not occur. Currently, all installed IP DP
* flows perform a fully masked match on the fragmentation bits.
* However, since TC depends on this behavior, we return ENOTSUPP
* for now in case this behavior changes in the future. */
return EOPNOTSUPP;
}

if (key->nw_proto == IPPROTO_TCP) {
Expand Down
5 changes: 4 additions & 1 deletion lib/tc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2958,7 +2958,10 @@ csum_update_flag(struct tc_flower *flower,
* eth(dst=<mac>),eth_type(0x0800) actions=set(ipv4(src=<new_ip>))
* we need to force a more specific flow as this can, for example,
* need a recalculation of icmp checksum if the packet that passes
* is ICMPv6 and tcp checksum if its tcp. */
* is ICMPv6 and tcp checksum if its tcp.
*
* This section of the code must be kept in sync with the pre-check
* function in netdev-offload-tc.c, tc_will_add_l4_checksum(). */

switch (htype) {
case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
Expand Down
92 changes: 92 additions & 0 deletions tests/system-traffic.at
Original file line number Diff line number Diff line change
Expand Up @@ -2402,6 +2402,98 @@ recirc_id(<recirc>),in_port(2),eth_type(0x86dd),ipv6(proto=58,frag=no),icmpv6(ty
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

AT_SETUP([datapath - mod_nw_src/set_field on IP fragments])
AT_SKIP_IF([test $HAVE_TCPDUMP = no])
OVS_TRAFFIC_VSWITCHD_START()

ADD_NAMESPACES(at_ns0, at_ns1)

ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", 36:b1:ee:7c:01:03)
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", 36:b1:ee:7c:01:02)

AT_DATA([flows.txt], [dnl
in_port=ovs-p0,ip,nw_src=10.1.1.1 actions=mod_nw_src=11.1.1.1,ovs-p1
in_port=ovs-p0,ipv6,ipv6_src=fc00::1 actions=set_field:fc00::100->ipv6_src,ovs-p1
])

AT_CHECK([ovs-ofctl del-flows br0])
AT_CHECK([ovs-ofctl -Oopenflow13 add-flows br0 flows.txt])

NETNS_DAEMONIZE([at_ns1],
[tcpdump -l -nn -xx -U -i p1 -w p1.pcap 2> tcpdump.err],
[tcpdump.pid])
OVS_WAIT_UNTIL([grep "listening" tcpdump.err])

dnl IPv4 Packet content:
dnl Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
dnl Type: IPv4 (0x0800)
dnl Internet Protocol Version 4, Src: 10.1.1.1, Dst: 10.1.1.2
dnl 0100 .... = Version: 4
dnl .... 0101 = Header Length: 20 bytes (5)
dnl Differentiated Services Field: 0x00 (DSCP: CS0, ECN: Not-ECT)
dnl Total Length: 38
dnl Identification: 0x0001 (1)
dnl 001. .... = Flags: 0x1, More fragments
dnl 0... .... = Reserved bit: Not set
dnl .0.. .... = Don't fragment: Not set
dnl ..1. .... = More fragments: Set
dnl ...0 0000 0000 0000 = Fragment Offset: 0
dnl Time to Live: 64
dnl Protocol: UDP (17)
dnl Header Checksum: 0x44c2
dnl Data (18 bytes)
eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 08 00"
ip="45 00 00 26 00 01 20 00 40 11 44 c2 0a 01 01 01 0a 01 01 02"
data="0b c4 08 84 00 26 e9 64 01 02 03 04 05 06 07 08 09 0a"
packet="${eth} ${ip} ${data}"

dnl We send each packet multiple times, one for learning, which will go to
dnl ovs-vswitchd, and the others will go through the actual datapath.
for i in 1 2 3 4 5; do
NS_CHECK_EXEC([at_ns0],
[$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
done

dnl Update source address and checksums in original packet for comparison.
packet=$(echo "$packet" | sed -e 's/ //g' \
-e 's/0a010101/0b010101/g' -e 's/44c2/43c2/g' -e 's/e964/e864/g')
OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])

dnl Repeat similar test with IPv6.
dnl Packet content:
dnl Ethernet II, Src: 36:b1:ee:7c:01:03, Dst: 36:b1:ee:7c:01:02
dnl Type: IPv6 (0x86dd)
dnl Internet Protocol Version 6, Src: fc00::1, Dst: fc00::2
dnl Payload Length: 24
dnl Next Header: Fragment Header for IPv6 (44)
dnl Hop Limit: 64
dnl Fragment Header for IPv6
dnl Next header: UDP (17)
dnl Reserved octet: 0x00
dnl 0000 0000 0000 0... = Offset: 0 (0 bytes)
dnl .... .... .... .00. = Reserved bits: 0
dnl .... .... .... ...1 = More Fragments: Yes
dnl Identification: 0x2316ab36
dnl Data (16 bytes)
eth="36 b1 ee 7c 01 02 36 b1 ee 7c 01 03 86 dd"
ip="60 00 00 00 00 18 2c 40 fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 \
fc 00 00 00 00 00 00 00 00 00 00 00 00 00 00 02 11 00 00 01 23 16 ab 36"
data="0b c4 08 84 00 26 07 65 01 02 03 04 05 06 07 08"
packet="${eth} ${ip} ${data}"

for i in 1 2 3 4 5; do
NS_CHECK_EXEC([at_ns0],
[$PYTHON3 $srcdir/sendpkt.py p0 ${packet} > /dev/null])
done

dnl Update checksum and source address in original packet for comparison.
packet=$(echo "$packet" | sed -e 's/ //g' -e 's/0765/0666/g' -e \
's/fc000000000000000000000000000001/fc000000000000000000000000000100/g')
OVS_WAIT_UNTIL([test $(ovs-pcap p1.pcap | grep -c "${packet}") -eq 5])

OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

AT_BANNER([MPLS])

AT_SETUP([mpls - encap header dp-support])
Expand Down

0 comments on commit 64364f3

Please sign in to comment.