aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c34
-rw-r--r--net/core/devlink.c10
-rw-r--r--net/core/flow_dissector.c18
-rw-r--r--net/core/skbuff.c18
-rw-r--r--net/core/skmsg.c55
5 files changed, 92 insertions, 43 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 64b21f0a2048..8f1a47ad6781 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
#include <trace/events/napi.h>
#include <trace/events/net.h>
#include <trace/events/skb.h>
+#include <trace/events/qdisc.h>
#include <linux/inetdevice.h>
#include <linux/cpu_rmap.h>
#include <linux/static_key.h>
@@ -3844,6 +3845,18 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
}
}
+static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q,
+ struct sk_buff **to_free,
+ struct netdev_queue *txq)
+{
+ int rc;
+
+ rc = q->enqueue(skb, q, to_free) & NET_XMIT_MASK;
+ if (rc == NET_XMIT_SUCCESS)
+ trace_qdisc_enqueue(q, txq, skb);
+ return rc;
+}
+
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
@@ -3862,8 +3875,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
* of q->seqlock to protect from racing with requeuing.
*/
if (unlikely(!nolock_qdisc_is_empty(q))) {
- rc = q->enqueue(skb, q, &to_free) &
- NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
__qdisc_run(q);
qdisc_run_end(q);
@@ -3879,7 +3891,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
return NET_XMIT_SUCCESS;
}
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
qdisc_run(q);
no_lock_out:
@@ -3923,7 +3935,7 @@ no_lock_out:
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -9700,14 +9712,17 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
struct net_device *dev;
int err, fd;
+ rtnl_lock();
dev = dev_get_by_index(net, attr->link_create.target_ifindex);
- if (!dev)
+ if (!dev) {
+ rtnl_unlock();
return -EINVAL;
+ }
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
- goto out_put_dev;
+ goto unlock;
}
bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
@@ -9717,14 +9732,14 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
- goto out_put_dev;
+ goto unlock;
}
- rtnl_lock();
err = dev_xdp_attach_link(dev, NULL, link);
rtnl_unlock();
if (err) {
+ link->dev = NULL;
bpf_link_cleanup(&link_primer);
goto out_put_dev;
}
@@ -9734,6 +9749,9 @@ int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
dev_put(dev);
return fd;
+unlock:
+ rtnl_unlock();
+
out_put_dev:
dev_put(dev);
return err;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 8fdd04f00fd7..85032626de24 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -9328,18 +9328,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
switch (attrs->flavour) {
case DEVLINK_PORT_FLAVOUR_PHYSICAL:
- case DEVLINK_PORT_FLAVOUR_VIRTUAL:
n = snprintf(name, len, "p%u", attrs->phys.port_number);
if (n < len && attrs->split)
n += snprintf(name + n, len - n, "s%u",
attrs->phys.split_subport_number);
- if (!attrs->split)
- n = snprintf(name, len, "p%u", attrs->phys.port_number);
- else
- n = snprintf(name, len, "p%us%u",
- attrs->phys.port_number,
- attrs->phys.split_subport_number);
-
break;
case DEVLINK_PORT_FLAVOUR_CPU:
case DEVLINK_PORT_FLAVOUR_DSA:
@@ -9381,6 +9373,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port,
n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf,
attrs->pci_sf.sf);
break;
+ case DEVLINK_PORT_FLAVOUR_VIRTUAL:
+ return -EOPNOTSUPP;
}
if (n >= len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2aadbfc5193b..4b2415d34873 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1504,7 +1504,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow)
}
EXPORT_SYMBOL(flow_get_u32_dst);
-/* Sort the source and destination IP (and the ports if the IP are the same),
+/* Sort the source and destination IP and the ports,
* to have consistent hash within the two directions
*/
static inline void __flow_hash_consistentify(struct flow_keys *keys)
@@ -1515,11 +1515,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
addr_diff = (__force u32)keys->addrs.v4addrs.dst -
(__force u32)keys->addrs.v4addrs.src;
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0)
swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst);
+
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
@@ -1527,13 +1527,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
addr_diff = memcmp(&keys->addrs.v6addrs.dst,
&keys->addrs.v6addrs.src,
sizeof(keys->addrs.v6addrs.dst));
- if ((addr_diff < 0) ||
- (addr_diff == 0 &&
- ((__force u16)keys->ports.dst <
- (__force u16)keys->ports.src))) {
+ if (addr_diff < 0) {
for (i = 0; i < 4; i++)
swap(keys->addrs.v6addrs.src.s6_addr32[i],
keys->addrs.v6addrs.dst.s6_addr32[i]);
+ }
+ if ((__force u16)keys->ports.dst <
+ (__force u16)keys->ports.src) {
swap(keys->ports.src, keys->ports.dst);
}
break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f63de967ac25..fc7942c0dddc 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -663,7 +663,7 @@ static void skb_release_data(struct sk_buff *skb)
if (skb->cloned &&
atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,
&shinfo->dataref))
- return;
+ goto exit;
skb_zcopy_clear(skb, true);
@@ -674,6 +674,17 @@ static void skb_release_data(struct sk_buff *skb)
kfree_skb_list(shinfo->frag_list);
skb_free_head(skb);
+exit:
+ /* When we clone an SKB we copy the reycling bit. The pp_recycle
+ * bit is only set on the head though, so in order to avoid races
+ * while trying to recycle fragments on __skb_frag_unref() we need
+ * to make one SKB responsible for triggering the recycle path.
+ * So disable the recycling bit if an SKB is cloned and we have
+ * additional references to to the fragmented part of the SKB.
+ * Eventually the last SKB will have the recycling bit set and it's
+ * dataref set to 0, which will trigger the recycling
+ */
+ skb->pp_recycle = 0;
}
/*
@@ -3011,8 +3022,11 @@ skb_zerocopy_headlen(const struct sk_buff *from)
if (!from->head_frag ||
skb_headlen(from) < L1_CACHE_BYTES ||
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
hlen = skb_headlen(from);
+ if (!hlen)
+ hlen = from->len;
+ }
if (skb_has_frag_list(from))
hlen = from->len;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9b6160a191f8..2d6249b28928 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -508,10 +508,8 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
if (skb_linearize(skb))
return -EAGAIN;
num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
- if (unlikely(num_sge < 0)) {
- kfree(msg);
+ if (unlikely(num_sge < 0))
return num_sge;
- }
copied = skb->len;
msg->sg.start = 0;
@@ -530,6 +528,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sk;
struct sk_msg *msg;
+ int err;
/* If we are receiving on the same sock skb->sk is already assigned,
* skip memory accounting and owner transition seeing it already set
@@ -548,7 +547,10 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
* into user buffers.
*/
skb_set_owner_r(skb, sk);
- return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
}
/* Puts an skb on the ingress queue of the socket already assigned to the
@@ -559,12 +561,16 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
{
struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
struct sock *sk = psock->sk;
+ int err;
if (unlikely(!msg))
return -EAGAIN;
sk_msg_init(msg);
skb_set_owner_r(skb, sk);
- return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+ if (err < 0)
+ kfree(msg);
+ return err;
}
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
@@ -578,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
return sk_psock_skb_ingress(psock, skb);
}
-static void sock_drop(struct sock *sk, struct sk_buff *skb)
+static void sk_psock_skb_state(struct sk_psock *psock,
+ struct sk_psock_work_state *state,
+ struct sk_buff *skb,
+ int len, int off)
{
- sk_drops_add(sk, skb);
- kfree_skb(skb);
+ spin_lock_bh(&psock->ingress_lock);
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+ state->skb = skb;
+ state->len = len;
+ state->off = off;
+ } else {
+ sock_drop(psock->sk, skb);
+ }
+ spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
struct sk_psock *psock = container_of(work, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
bool ingress;
u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
- if (state->skb) {
+ if (unlikely(state->skb)) {
+ spin_lock_bh(&psock->ingress_lock);
skb = state->skb;
len = state->len;
off = state->off;
state->skb = NULL;
- goto start;
+ spin_unlock_bh(&psock->ingress_lock);
}
+ if (skb)
+ goto start;
while ((skb = skb_dequeue(&psock->ingress_skb))) {
len = skb->len;
@@ -615,9 +634,8 @@ start:
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
- state->skb = skb;
- state->len = len;
- state->off = off;
+ sk_psock_skb_state(psock, state, skb,
+ len, off);
goto end;
}
/* Hard errors break pipe and stop xmit. */
@@ -716,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
+ kfree_skb(psock->work_state.skb);
+ /* We null the skb here to ensure that calls to sk_psock_backlog
+ * do not pick up the free'd skb.
+ */
+ psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
@@ -767,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work)
void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
{
- sk_psock_stop(psock, false);
-
write_lock_bh(&sk->sk_callback_lock);
sk_psock_restore_proto(sk, psock);
rcu_assign_sk_user_data(sk, NULL);
@@ -778,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
+ sk_psock_stop(psock, false);
+
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
queue_rcu_work(system_wq, &psock->rwork);
}