diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 34 | ||||
-rw-r--r-- | net/core/devlink.c | 10 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 18 | ||||
-rw-r--r-- | net/core/skbuff.c | 18 | ||||
-rw-r--r-- | net/core/skmsg.c | 55 |
5 files changed, 92 insertions, 43 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 64b21f0a2048..8f1a47ad6781 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -131,6 +131,7 @@ #include <trace/events/napi.h> #include <trace/events/net.h> #include <trace/events/skb.h> +#include <trace/events/qdisc.h> #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> @@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) } } +static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, + struct sk_buff **to_free, + struct netdev_queue *txq) +{ + int rc; + + rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK; + if (rc == NET_XMIT_SUCCESS) + trace_qdisc_enqueue(q, txq, skb); + return rc; +} + static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) @@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * of q->seqlock to protect from racing with requeuing. */ if (unlikely(!nolock_qdisc_is_empty(q))) { - rc = q->enqueue(skb, q, &to_free) & - NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); __qdisc_run(q); qdisc_run_end(q); @@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, return NET_XMIT_SUCCESS; } - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); qdisc_run(q); no_lock_out: @@ -3923,7 +3935,7 @@ no_lock_out: qdisc_run_end(q); rc = NET_XMIT_SUCCESS; } else { - rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; + rc = dev_qdisc_enqueue(skb, q, &to_free, txq); if (qdisc_run_begin(q)) { if (unlikely(contended)) { spin_unlock(&q->busylock); @@ -9700,14 +9712,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) struct net_device *dev; int err, fd; + rtnl_lock(); dev = dev_get_by_index(net, attr->link_create.target_ifindex); - if (!dev) + if (!dev) { + rtnl_unlock(); return -EINVAL; + } link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; - goto out_put_dev; + goto unlock; } bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog); @@ -9717,14 +9732,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); - goto out_put_dev; + goto unlock; } - rtnl_lock(); err = dev_xdp_attach_link(dev, NULL, link); rtnl_unlock(); if (err) { + link->dev = NULL; bpf_link_cleanup(&link_primer); goto out_put_dev; } @@ -9734,6 +9749,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) dev_put(dev); return fd; +unlock: + rtnl_unlock(); + out_put_dev: dev_put(dev); return err; diff --git a/net/core/devlink.c b/net/core/devlink.c index 8fdd04f00fd7..85032626de24 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -9328,18 +9328,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: n = snprintf(name, len, "p%u", attrs->phys.port_number); if (n < len && attrs->split) n += snprintf(name + n, len - n, "s%u", attrs->phys.split_subport_number); - if (!attrs->split) - n = snprintf(name, len, "p%u", attrs->phys.port_number); - else - n = snprintf(name, len, "p%us%u", - attrs->phys.port_number, - attrs->phys.split_subport_number); - break; case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -9381,6 +9373,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2aadbfc5193b..4b2415d34873 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1504,7 +1504,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow) } EXPORT_SYMBOL(flow_get_u32_dst); -/* Sort the source and destination IP (and the ports if the IP are the same), +/* Sort the source and destination IP and the ports, * to have consistent hash within the two directions */ static inline void __flow_hash_consistentify(struct flow_keys *keys) @@ -1515,11 +1515,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) case FLOW_DISSECTOR_KEY_IPV4_ADDRS: addr_diff = (__force u32)keys->addrs.v4addrs.dst - (__force u32)keys->addrs.v4addrs.src; - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); + + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; @@ -1527,13 +1527,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) addr_diff = memcmp(&keys->addrs.v6addrs.dst, &keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) { for (i = 0; i < 4; i++) swap(keys->addrs.v6addrs.src.s6_addr32[i], keys->addrs.v6addrs.dst.s6_addr32[i]); + } + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f63de967ac25..fc7942c0dddc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -663,7 +663,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb->cloned && atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1, &shinfo->dataref)) - return; + goto exit; skb_zcopy_clear(skb, true); @@ -674,6 +674,17 @@ static void skb_release_data(struct sk_buff *skb) kfree_skb_list(shinfo->frag_list); skb_free_head(skb); +exit: + /* When we clone an SKB we copy the reycling bit. The pp_recycle + * bit is only set on the head though, so in order to avoid races + * while trying to recycle fragments on __skb_frag_unref() we need + * to make one SKB responsible for triggering the recycle path. + * So disable the recycling bit if an SKB is cloned and we have + * additional references to to the fragmented part of the SKB. + * Eventually the last SKB will have the recycling bit set and it's + * dataref set to 0, which will trigger the recycling + */ + skb->pp_recycle = 0; } /* @@ -3011,8 +3022,11 @@ skb_zerocopy_headlen(const struct sk_buff *from) if (!from->head_frag || skb_headlen(from) < L1_CACHE_BYTES || - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) { hlen = skb_headlen(from); + if (!hlen) + hlen = from->len; + } if (skb_has_frag_list(from)) hlen = from->len; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9b6160a191f8..2d6249b28928 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -508,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, if (skb_linearize(skb)) return -EAGAIN; num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len); - if (unlikely(num_sge < 0)) { - kfree(msg); + if (unlikely(num_sge < 0)) return num_sge; - } copied = skb->len; msg->sg.start = 0; @@ -530,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) { struct sock *sk = psock->sk; struct sk_msg *msg; + int err; /* If we are receiving on the same sock skb->sk is already assigned, * skip memory accounting and owner transition seeing it already set @@ -548,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb) * into user buffers. */ skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } /* Puts an skb on the ingress queue of the socket already assigned to the @@ -559,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb { struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC); struct sock *sk = psock->sk; + int err; if (unlikely(!msg)) return -EAGAIN; sk_msg_init(msg); skb_set_owner_r(skb, sk); - return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg); + if (err < 0) + kfree(msg); + return err; } static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, @@ -578,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } -static void sock_drop(struct sock *sk, struct sk_buff *skb) +static void sk_psock_skb_state(struct sk_psock *psock, + struct sk_psock_work_state *state, + struct sk_buff *skb, + int len, int off) { - sk_drops_add(sk, skb); - kfree_skb(skb); + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + state->skb = skb; + state->len = len; + state->off = off; + } else { + sock_drop(psock->sk, skb); + } + spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; - struct sk_buff *skb; + struct sk_buff *skb = NULL; bool ingress; u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (state->skb) { + if (unlikely(state->skb)) { + spin_lock_bh(&psock->ingress_lock); skb = state->skb; len = state->len; off = state->off; state->skb = NULL; - goto start; + spin_unlock_bh(&psock->ingress_lock); } + if (skb) + goto start; while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; @@ -615,9 +634,8 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - state->skb = skb; - state->len = len; - state->off = off; + sk_psock_skb_state(psock, state, skb, + len, off); goto end; } /* Hard errors break pipe and stop xmit. */ @@ -716,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } + kfree_skb(psock->work_state.skb); + /* We null the skb here to ensure that calls to sk_psock_backlog + * do not pick up the free'd skb. + */ + psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } @@ -767,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work) void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_stop(psock, false); - write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); @@ -778,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); + sk_psock_stop(psock, false); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); } |