Skip to content

Commit

Permalink
ipv4: reset rt_iif for recirculated mcast/bcast out pkts
Browse files Browse the repository at this point in the history
Multicast or broadcast egress packets have rt_iif set to the oif. These
packets might be recirculated back as input and lookup to the raw
sockets may fail because they are bound to the incoming interface
(skb_iif). If rt_iif is not zero, during the lookup, inet_iif() function
returns rt_iif instead of skb_iif. Hence, the lookup fails.

v2: Make it non vrf specific (David Ahern). Reword the changelog to
    reflect it.
Signed-off-by: Stephen Suryaputra <[email protected]>
Reviewed-by: David Ahern <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
ssuryaextr authored and davem330 committed Jun 26, 2019
1 parent ee42974 commit 5b18f12
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
bool nopolicy, bool noxfrm, bool will_cache);
struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);

struct in_ifaddr;
void fib_add_ifaddr(struct in_ifaddr *);
Expand Down
12 changes: 12 additions & 0 deletions net/ipv4/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
static int ip_mc_finish_output(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct rtable *new_rt;
int ret;

ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
Expand All @@ -326,6 +327,17 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
return ret;
}

/* Reset rt_iif so that inet_iif() will return skb->skb_iif. Setting
* this to non-zero causes ipi_ifindex in in_pktinfo to be overwritten,
* see ipv4_pktinfo_prepare().
*/
new_rt = rt_dst_clone(net->loopback_dev, skb_rtable(skb));
if (new_rt) {
new_rt->rt_iif = 0;
skb_dst_drop(skb);
skb_dst_set(skb, &new_rt->dst);
}

return dev_loopback_xmit(net, sk, skb);
}

Expand Down
33 changes: 33 additions & 0 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
}
EXPORT_SYMBOL(rt_dst_alloc);

struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
{
struct rtable *new_rt;

new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
rt->dst.flags);

if (new_rt) {
new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
new_rt->rt_flags = rt->rt_flags;
new_rt->rt_type = rt->rt_type;
new_rt->rt_is_input = rt->rt_is_input;
new_rt->rt_iif = rt->rt_iif;
new_rt->rt_pmtu = rt->rt_pmtu;
new_rt->rt_mtu_locked = rt->rt_mtu_locked;
new_rt->rt_gw_family = rt->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
new_rt->rt_gw4 = rt->rt_gw4;
else if (rt->rt_gw_family == AF_INET6)
new_rt->rt_gw6 = rt->rt_gw6;
INIT_LIST_HEAD(&new_rt->rt_uncached);

new_rt->dst.flags |= DST_HOST;
new_rt->dst.input = rt->dst.input;
new_rt->dst.output = rt->dst.output;
new_rt->dst.error = rt->dst.error;
new_rt->dst.lastuse = jiffies;
new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
}
return new_rt;
}
EXPORT_SYMBOL(rt_dst_clone);

/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
Expand Down

0 comments on commit 5b18f12

Please sign in to comment.