Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Browse files Browse the repository at this point in the history
Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next:

1) Add support to specify a stateful expression in set definitions,
   this allows users to specify e.g. counters per set elements.

2) Flowtable software counter support.

3) Flowtable hardware offload counter support, from wenxu.

3) Parallelize flowtable hardware offload requests, from Paul Blakey.
   This includes a patch to add one work entry per offload command.

4) Several patches to rework nf_queue refcount handling, from Florian
   Westphal.

4) A few fixes for the flowtable tunnel offload: Fix crash if tunneling
   information is missing and set up indirect flow block as TC_SETUP_FT,
   patch from wenxu.

5) Stricter netlink attribute sanity check on filters, from Romain Bellan
   and Florent Fourcot.

5) Annotations to make sparse happy, from Jules Irenge.

6) Improve icmp errors in debugging information, from Haishuang Yan.

7) Fix warning in IPVS icmp error debugging, from Haishuang Yan.

8) Fix endianess issue in tcp extension header, from Sergey Marinkevich.
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Mar 31, 2020
2 parents 6fe9a94 + e19680f commit d9679cd
Show file tree
Hide file tree
Showing 21 changed files with 280 additions and 198 deletions.
3 changes: 2 additions & 1 deletion include/net/flow_offload.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ void flow_indr_block_cb_unregister(struct net_device *dev,

void flow_indr_block_call(struct net_device *dev,
struct flow_block_offload *bo,
enum flow_block_command command);
enum flow_block_command command,
enum tc_setup_type type);

#endif /* _NET_FLOW_OFFLOAD_H */
11 changes: 11 additions & 0 deletions include/net/netfilter/nf_conntrack_acct.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
#endif
}

void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
unsigned int bytes);

static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
unsigned int bytes)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_ct_acct_add(ct, dir, 1, bytes);
#endif
}

void nf_conntrack_acct_pernet_init(struct net *net);

int nf_conntrack_acct_init(void);
Expand Down
5 changes: 3 additions & 2 deletions include/net/netfilter/nf_flow_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct nf_flowtable_type {
};

enum nf_flowtable_flags {
NF_FLOWTABLE_HW_OFFLOAD = 0x1,
NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
};

struct nf_flowtable {
Expand All @@ -73,7 +74,7 @@ struct nf_flowtable {
struct delayed_work gc_work;
unsigned int flags;
struct flow_block flow_block;
struct mutex flow_block_lock; /* Guards flow_block */
struct rw_semaphore flow_block_lock; /* Guards flow_block */
possible_net_t net;
};

Expand Down
7 changes: 5 additions & 2 deletions include/net/netfilter/nf_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ struct nf_queue_entry {
struct sk_buff *skb;
unsigned int id;
unsigned int hook_index; /* index in hook_entries->hook[] */

#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct net_device *physin;
struct net_device *physout;
#endif
struct nf_hook_state state;
u16 size; /* sizeof(entry) + saved route keys */

Expand All @@ -35,7 +38,7 @@ void nf_unregister_queue_handler(struct net *net);
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);

void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
void nf_queue_entry_free(struct nf_queue_entry *entry);

static inline void init_hashrandom(u32 *jhash_initval)
{
Expand Down
5 changes: 5 additions & 0 deletions include/net/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,15 @@ struct nft_set_iter {
* @size: number of set elements
* @field_len: length of each field in concatenation, bytes
* @field_count: number of concatenated fields in element
* @expr: set must support for expressions
*/
struct nft_set_desc {
unsigned int klen;
unsigned int dlen;
unsigned int size;
u8 field_len[NFT_REG32_COUNT];
u8 field_count;
bool expr;
};

/**
Expand Down Expand Up @@ -416,6 +418,7 @@ struct nft_set_type {
* @policy: set parameterization (see enum nft_set_policies)
* @udlen: user data length
* @udata: user data
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
* @genmask: generation mask
Expand Down Expand Up @@ -444,6 +447,7 @@ struct nft_set {
u16 policy;
u16 udlen;
unsigned char *udata;
struct nft_expr *expr;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
u16 flags:14,
Expand Down Expand Up @@ -846,6 +850,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
return (void *)expr->data;
}

int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
const struct nft_expr *expr);
Expand Down
15 changes: 15 additions & 0 deletions include/uapi/linux/netfilter/nf_tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ enum nft_set_field_attributes {
* @NFTA_SET_USERDATA: user data (NLA_BINARY)
* @NFTA_SET_OBJ_TYPE: stateful object type (NLA_U32: NFT_OBJECT_*)
* @NFTA_SET_HANDLE: set handle (NLA_U64)
* @NFTA_SET_EXPR: set expression (NLA_NESTED: nft_expr_attributes)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
Expand All @@ -361,6 +362,7 @@ enum nft_set_attributes {
NFTA_SET_PAD,
NFTA_SET_OBJ_TYPE,
NFTA_SET_HANDLE,
NFTA_SET_EXPR,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
Expand Down Expand Up @@ -1551,6 +1553,19 @@ enum nft_object_attributes {
};
#define NFTA_OBJ_MAX (__NFTA_OBJ_MAX - 1)

/**
* enum nft_flowtable_flags - nf_tables flowtable flags
*
* @NFT_FLOWTABLE_HW_OFFLOAD: flowtable hardware offload is enabled
* @NFT_FLOWTABLE_COUNTER: enable flow counters
*/
enum nft_flowtable_flags {
NFT_FLOWTABLE_HW_OFFLOAD = 0x1,
NFT_FLOWTABLE_COUNTER = 0x2,
NFT_FLOWTABLE_MASK = (NFT_FLOWTABLE_HW_OFFLOAD |
NFT_FLOWTABLE_COUNTER)
};

/**
* enum nft_flowtable_attributes - nf_tables flow table netlink attributes
*
Expand Down
6 changes: 3 additions & 3 deletions net/core/flow_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,8 @@ EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);

void flow_indr_block_call(struct net_device *dev,
struct flow_block_offload *bo,
enum flow_block_command command)
enum flow_block_command command,
enum tc_setup_type type)
{
struct flow_indr_block_cb *indr_block_cb;
struct flow_indr_block_dev *indr_dev;
Expand All @@ -521,8 +522,7 @@ void flow_indr_block_call(struct net_device *dev,
return;

list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
bo);
indr_block_cb->cb(dev, indr_block_cb->cb_priv, type, bo);
}
EXPORT_SYMBOL_GPL(flow_indr_block_call);

Expand Down
45 changes: 25 additions & 20 deletions net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1661,8 +1661,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
bool ipip, new_cp = false;
bool tunnel, new_cp = false;
union nf_inet_addr *raddr;
char *outer_proto = "IPIP";

*related = 1;

Expand Down Expand Up @@ -1703,8 +1704,8 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
return NF_ACCEPT; /* The packet looks wrong, ignore */
raddr = (union nf_inet_addr *)&cih->daddr;

/* Special case for errors for IPIP packets */
ipip = false;
/* Special case for errors for IPIP/UDP/GRE tunnel packets */
tunnel = false;
if (cih->protocol == IPPROTO_IPIP) {
struct ip_vs_dest *dest;

Expand All @@ -1721,24 +1722,27 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true;
tunnel = true;
} else if ((cih->protocol == IPPROTO_UDP || /* Can be UDP encap */
cih->protocol == IPPROTO_GRE) && /* Can be GRE encap */
/* Error for our tunnel must arrive at LOCAL_IN */
(skb_rtable(skb)->rt_flags & RTCF_LOCAL)) {
__u8 iproto;
int ulen;

/* Non-first fragment has no UDP header */
/* Non-first fragment has no UDP/GRE header */
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
offset2 = offset + cih->ihl * 4;
if (cih->protocol == IPPROTO_UDP)
if (cih->protocol == IPPROTO_UDP) {
ulen = ipvs_udp_decap(ipvs, skb, offset2, AF_INET,
raddr, &iproto);
else
outer_proto = "UDP";
} else {
ulen = ipvs_gre_decap(ipvs, skb, offset2, AF_INET,
raddr, &iproto);
outer_proto = "GRE";
}
if (ulen > 0) {
/* Skip IP and UDP/GRE tunnel headers */
offset = offset2 + ulen;
Expand All @@ -1747,7 +1751,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
&_ciph);
if (cih && cih->version == 4 && cih->ihl >= 5 &&
iproto == IPPROTO_IPIP)
ipip = true;
tunnel = true;
else
return NF_ACCEPT;
}
Expand All @@ -1767,19 +1771,19 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
"Checking incoming ICMP for");

offset2 = offset;
ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !tunnel, &ciph);
offset = ciph.len;

/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
* For IPIP/UDP/GRE tunnel this is error for request, not for reply.
*/
cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
ipvs, AF_INET, skb, &ciph);

if (!cp) {
int v;

if (ipip || !sysctl_schedule_icmp(ipvs))
if (tunnel || !sysctl_schedule_icmp(ipvs))
return NF_ACCEPT;

if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
Expand All @@ -1797,7 +1801,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
goto out;
}

if (ipip) {
if (tunnel) {
__be32 info = ic->un.gateway;
__u8 type = ic->type;
__u8 code = ic->code;
Expand All @@ -1809,17 +1813,18 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
u32 mtu = ntohs(ic->un.frag.mtu);
__be16 frag_off = cih->frag_off;

/* Strip outer IP and ICMP, go to IPIP header */
/* Strip outer IP and ICMP, go to IPIP/UDP/GRE header */
if (pskb_pull(skb, ihl + sizeof(_icmph)) == NULL)
goto ignore_ipip;
goto ignore_tunnel;
offset2 -= ihl + sizeof(_icmph);
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
IP_VS_DBG(12, "ICMP for %s %pI4->%pI4: mtu=%u\n",
outer_proto, &ip_hdr(skb)->saddr,
&ip_hdr(skb)->daddr, mtu);
ipv4_update_pmtu(skb, ipvs->net, mtu, 0, 0);
/* Client uses PMTUD? */
if (!(frag_off & htons(IP_DF)))
goto ignore_ipip;
goto ignore_tunnel;
/* Prefer the resulting PMTU */
if (dest) {
struct ip_vs_dest_dst *dest_dst;
Expand All @@ -1832,11 +1837,11 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
mtu -= sizeof(struct iphdr);
info = htonl(mtu);
}
/* Strip outer IP, ICMP and IPIP, go to IP header of
/* Strip outer IP, ICMP and IPIP/UDP/GRE, go to IP header of
* original request.
*/
if (pskb_pull(skb, offset2) == NULL)
goto ignore_ipip;
goto ignore_tunnel;
skb_reset_network_header(skb);
IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
Expand All @@ -1845,7 +1850,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
/* ICMP can be shorter but anyways, account it */
ip_vs_out_stats(cp, skb);

ignore_ipip:
ignore_tunnel:
consume_skb(skb);
verdict = NF_STOLEN;
goto out;
Expand Down
18 changes: 10 additions & 8 deletions net/netfilter/nf_conntrack_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
}

static void nf_conntrack_all_lock(void)
__acquires(&nf_conntrack_locks_all_lock)
{
int i;

Expand All @@ -162,6 +163,7 @@ static void nf_conntrack_all_lock(void)
}

static void nf_conntrack_all_unlock(void)
__releases(&nf_conntrack_locks_all_lock)
{
/* All prior stores must be complete before we clear
* 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
Expand Down Expand Up @@ -863,20 +865,20 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);

static inline void nf_ct_acct_update(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int len)
void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
unsigned int bytes)
{
struct nf_conn_acct *acct;

acct = nf_conn_acct_find(ct);
if (acct) {
struct nf_conn_counter *counter = acct->counter;

atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
atomic64_add(packets, &counter[dir].packets);
atomic64_add(bytes, &counter[dir].bytes);
}
}
EXPORT_SYMBOL_GPL(nf_ct_acct_add);

static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
const struct nf_conn *loser_ct)
Expand All @@ -890,7 +892,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,

/* u32 should be fine since we must have seen one packet. */
bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
nf_ct_acct_update(ct, ctinfo, bytes);
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
}
}

Expand Down Expand Up @@ -1931,15 +1933,15 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
WRITE_ONCE(ct->timeout, extra_jiffies);
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);

bool nf_ct_kill_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb)
{
nf_ct_acct_update(ct, ctinfo, skb->len);
nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);

return nf_ct_delete(ct, 0, 0);
}
Expand Down
3 changes: 2 additions & 1 deletion net/netfilter/nf_conntrack_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
struct ctnetlink_filter *filter;

#ifndef CONFIG_NF_CONNTRACK_MARK
if (cda[CTA_MARK] && cda[CTA_MARK_MASK])
if (cda[CTA_MARK] || cda[CTA_MARK_MASK])
return ERR_PTR(-EOPNOTSUPP);
#endif

Expand Down Expand Up @@ -1533,6 +1533,7 @@ static int
ctnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr)
__must_hold(RCU)
{
struct nf_nat_hook *nat_hook;
int err;
Expand Down
Loading

0 comments on commit d9679cd

Please sign in to comment.