aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c8
-rw-r--r--net/batman-adv/main.c56
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/translation-table.c4
-rw-r--r--net/bpf/test_run.c14
-rw-r--r--net/bridge/br_multicast.c6
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/br_private.h6
-rw-r--r--net/bridge/netfilter/ebtables.c4
-rw-r--r--net/caif/chnl_net.c19
-rw-r--r--net/can/isotp.c51
-rw-r--r--net/can/j1939/j1939-priv.h1
-rw-r--r--net/can/j1939/main.c7
-rw-r--r--net/can/j1939/transport.c14
-rw-r--r--net/core/dev.c25
-rw-r--r--net/core/dev_addr_lists.c6
-rw-r--r--net/core/net-procfs.c24
-rw-r--r--net/core/net-sysfs.c4
-rw-r--r--net/core/netclassid_cgroup.c7
-rw-r--r--net/core/netprio_cgroup.c10
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c36
-rw-r--r--net/core/skmsg.c14
-rw-r--r--net/core/sock.c83
-rw-r--r--net/core/sock_destructor.h12
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dsa/Kconfig5
-rw-r--r--net/dsa/dsa.c5
-rw-r--r--net/dsa/dsa2.c173
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/slave.c12
-rw-r--r--net/dsa/switch.c2
-rw-r--r--net/dsa/tag_dsa.c30
-rw-r--r--net/dsa/tag_ocelot.c3
-rw-r--r--net/dsa/tag_ocelot_8021q.c42
-rw-r--r--net/dsa/tag_sja1105.c43
-rw-r--r--net/ipv4/fib_semantics.c16
-rw-r--r--net/ipv4/icmp.c23
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/netfilter/iptable_raw.c2
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c30
-rw-r--r--net/ipv4/nexthop.c21
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_bpf.c27
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c45
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_bpf.c1
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ioam6.c70
-rw-r--r--net/ipv6/ioam6_iptunnel.c6
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c48
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c2
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c25
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/ipv6/tcp_ipv6.c15
-rw-r--r--net/ipv6/udp.c5
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/mac80211/mesh.c9
-rw-r--r--net/mac80211/mesh_pathtbl.c5
-rw-r--r--net/mac80211/mesh_ps.c3
-rw-r--r--net/mac80211/rate.c4
-rw-r--r--net/mac80211/rx.c3
-rw-r--r--net/mac80211/tx.c12
-rw-r--r--net/mac80211/wpa.c6
-rw-r--r--net/mctp/route.c2
-rw-r--r--net/mptcp/mptcp_diag.c2
-rw-r--r--net/mptcp/options.c39
-rw-r--r--net/mptcp/pm_netlink.c4
-rw-r--r--net/mptcp/protocol.c61
-rw-r--r--net/mptcp/protocol.h2
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/mptcp/syncookies.c13
-rw-r--r--net/mptcp/token.c11
-rw-r--r--net/mptcp/token_test.c14
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c5
-rw-r--r--net/netfilter/nf_conntrack_core.c154
-rw-r--r--net/netfilter/nf_nat_core.c17
-rw-r--r--net/netfilter/nf_nat_masquerade.c168
-rw-r--r--net/netfilter/nf_tables_api.c121
-rw-r--r--net/netfilter/nft_chain_filter.c9
-rw-r--r--net/netfilter/nft_compat.c17
-rw-r--r--net/netfilter/nft_quota.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c2
-rw-r--r--net/netfilter/xt_LOG.c10
-rw-r--r--net/netfilter/xt_NFLOG.c10
-rw-r--r--net/netlink/af_netlink.c14
-rw-r--r--net/nfc/af_nfc.c3
-rw-r--r--net/nfc/digital_core.c9
-rw-r--r--net/nfc/digital_technology.c8
-rw-r--r--net/nfc/nci/rsp.c2
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/sched/act_ct.c2
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/sch_api.c6
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_mqprio.c30
-rw-r--r--net/sched/sch_taprio.c4
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_statefuns.c139
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_core.c22
-rw-r--r--net/smc/smc_llc.c65
-rw-r--r--net/smc/smc_tx.c22
-rw-r--r--net/smc/smc_wr.h14
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/tipc/crypto.c32
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/tls/tls_main.c4
-rw-r--r--net/tls/tls_sw.c21
-rw-r--r--net/unix/af_unix.c100
-rw-r--r--net/unix/unix_bpf.c2
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c26
-rw-r--r--net/wireless/scan.c7
-rw-r--r--net/wireless/util.c14
-rw-r--r--net/xfrm/xfrm_user.c67
129 files changed, 1602 insertions, 886 deletions
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 1669744304c5..17687848daec 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1560,10 +1560,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
return 0;
bat_priv->bla.claim_hash = batadv_hash_new(128);
- bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.claim_hash)
+ return -ENOMEM;
- if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.backbone_hash) {
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
return -ENOMEM;
+ }
batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 3ddd66e4c29e..5207cd8d6ad8 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -190,29 +190,41 @@ int batadv_mesh_init(struct net_device *soft_iface)
bat_priv->gw.generation = 0;
- ret = batadv_v_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
-
ret = batadv_originator_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_orig;
+ }
ret = batadv_tt_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_tt;
+ }
+
+ ret = batadv_v_mesh_init(bat_priv);
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_v;
+ }
ret = batadv_bla_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_bla;
+ }
ret = batadv_dat_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_dat;
+ }
ret = batadv_nc_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_nc;
+ }
batadv_gw_init(bat_priv);
batadv_mcast_init(bat_priv);
@@ -222,8 +234,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
return 0;
-err:
- batadv_mesh_free(soft_iface);
+err_nc:
+ batadv_dat_free(bat_priv);
+err_dat:
+ batadv_bla_free(bat_priv);
+err_bla:
+ batadv_v_mesh_free(bat_priv);
+err_v:
+ batadv_tt_free(bat_priv);
+err_tt:
+ batadv_originator_free(bat_priv);
+err_orig:
+ batadv_purge_outstanding_packets(bat_priv, NULL);
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
return ret;
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 9f06132e007d..0a7f1d36a6a8 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -152,8 +152,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
&batadv_nc_coding_hash_lock_class_key);
bat_priv->nc.decoding_hash = batadv_hash_new(128);
- if (!bat_priv->nc.decoding_hash)
+ if (!bat_priv->nc.decoding_hash) {
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
goto err;
+ }
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index e0b3dace2020..4b7ad6684bc4 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4162,8 +4162,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
return ret;
ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
+ if (ret < 0) {
+ batadv_tt_local_table_free(bat_priv);
return ret;
+ }
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
batadv_tt_tvlv_unicast_handler_v1,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2eb0e55ef54d..b5f4ef35357c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -552,6 +552,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
__skb->gso_segs = skb_shinfo(skb)->gso_segs;
}
+static struct proto bpf_dummy_proto = {
+ .name = "bpf_dummy",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct sock),
+};
+
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
@@ -596,20 +602,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
break;
}
- sk = kzalloc(sizeof(struct sock), GFP_USER);
+ sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
if (!sk) {
kfree(data);
kfree(ctx);
return -ENOMEM;
}
- sock_net_set(sk, net);
sock_init_data(NULL, sk);
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
kfree(ctx);
- kfree(sk);
+ sk_free(sk);
return -ENOMEM;
}
skb->sk = sk;
@@ -682,8 +687,7 @@ out:
if (dev && dev != net->loopback_dev)
dev_put(dev);
kfree_skb(skb);
- bpf_sk_storage_free(sk);
- kfree(sk);
+ sk_free(sk);
kfree(ctx);
return ret;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3523c8c7068f..f3d751105343 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1677,8 +1677,6 @@ static void br_multicast_update_querier(struct net_bridge_mcast *brmctx,
int ifindex,
struct br_ip *saddr)
{
- lockdep_assert_held_once(&brmctx->br->multicast_lock);
-
write_seqcount_begin(&querier->seq);
querier->port_ifidx = ifindex;
memcpy(&querier->addr, saddr, sizeof(*saddr));
@@ -3867,13 +3865,13 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
- seqcount_init(&brmctx->ip4_querier.seq);
+ seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
- seqcount_init(&brmctx->ip6_querier.seq);
+ seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
timer_setup(&brmctx->ip4_mc_router_timer,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 6c58fc14d2cb..5c6c4305ed23 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1666,7 +1666,8 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
}
return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
- nla_total_size(sizeof(struct br_mcast_stats)) +
+ nla_total_size_64bit(sizeof(struct br_mcast_stats)) +
+ (p ? nla_total_size_64bit(sizeof(p->stp_xstats)) : 0) +
nla_total_size(0);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b4cef3a97f12..37ca76406f1e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -82,7 +82,7 @@ struct bridge_mcast_other_query {
struct bridge_mcast_querier {
struct br_ip addr;
int port_ifidx;
- seqcount_t seq;
+ seqcount_spinlock_t seq;
};
/* IGMP/MLD statistics */
@@ -1125,9 +1125,7 @@ static inline unsigned long br_multicast_lmqt(const struct net_bridge_mcast *brm
static inline unsigned long br_multicast_gmi(const struct net_bridge_mcast *brmctx)
{
- /* use the RFC default of 2 for QRV */
- return 2 * brmctx->multicast_query_interval +
- brmctx->multicast_query_response_interval;
+ return brmctx->multicast_membership_interval;
}
static inline bool
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 83d1798dfbb4..ba045f35114d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -926,7 +926,9 @@ static int translate_table(struct net *net, const char *name,
return -ENOMEM;
for_each_possible_cpu(i) {
newinfo->chainstack[i] =
- vmalloc(array_size(udc_cnt, sizeof(*(newinfo->chainstack[0]))));
+ vmalloc_node(array_size(udc_cnt,
+ sizeof(*(newinfo->chainstack[0]))),
+ cpu_to_node(i));
if (!newinfo->chainstack[i]) {
while (i)
vfree(newinfo->chainstack[--i]);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 37b67194c0df..414dc5671c45 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -53,20 +53,6 @@ struct chnl_net {
enum caif_states state;
};
-static void robust_list_del(struct list_head *delete_node)
-{
- struct list_head *list_node;
- struct list_head *n;
- ASSERT_RTNL();
- list_for_each_safe(list_node, n, &chnl_net_list) {
- if (list_node == delete_node) {
- list_del(list_node);
- return;
- }
- }
- WARN_ON(1);
-}
-
static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
{
struct sk_buff *skb;
@@ -364,6 +350,7 @@ static int chnl_net_init(struct net_device *dev)
ASSERT_RTNL();
priv = netdev_priv(dev);
strncpy(priv->name, dev->name, sizeof(priv->name));
+ INIT_LIST_HEAD(&priv->list_field);
return 0;
}
@@ -372,7 +359,7 @@ static void chnl_net_uninit(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- robust_list_del(&priv->list_field);
+ list_del_init(&priv->list_field);
}
static const struct net_device_ops netdev_ops = {
@@ -537,7 +524,7 @@ static void __exit chnl_exit_module(void)
rtnl_lock();
list_for_each_safe(list_node, _tmp, &chnl_net_list) {
dev = list_entry(list_node, struct chnl_net, list_field);
- list_del(list_node);
+ list_del_init(list_node);
delete_device(dev);
}
rtnl_unlock();
diff --git a/net/can/isotp.c b/net/can/isotp.c
index caaa532ece94..df6968b28bf4 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -121,7 +121,7 @@ enum {
struct tpcon {
int idx;
int len;
- u8 state;
+ u32 state;
u8 bs;
u8 sn;
u8 ll_dl;
@@ -848,6 +848,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
struct isotp_sock *so = isotp_sk(sk);
+ u32 old_state = so->tx.state;
struct sk_buff *skb;
struct net_device *dev;
struct canfd_frame *cf;
@@ -860,45 +861,55 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
return -EADDRNOTAVAIL;
/* we do not support multiple buffers - for now */
- if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) {
- if (msg->msg_flags & MSG_DONTWAIT)
- return -EAGAIN;
+ if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE ||
+ wq_has_sleeper(&so->wait)) {
+ if (msg->msg_flags & MSG_DONTWAIT) {
+ err = -EAGAIN;
+ goto err_out;
+ }
/* wait for complete transmission of current pdu */
- wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ if (err)
+ goto err_out;
}
- if (!size || size > MAX_MSG_LENGTH)
- return -EINVAL;
+ if (!size || size > MAX_MSG_LENGTH) {
+ err = -EINVAL;
+ goto err_out;
+ }
/* take care of a potential SF_DL ESC offset for TX_DL > 8 */
off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0;
/* does the given data fit into a single frame for SF_BROADCAST? */
if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) &&
- (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off))
- return -EINVAL;
+ (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) {
+ err = -EINVAL;
+ goto err_out;
+ }
err = memcpy_from_msg(so->tx.buf, msg, size);
if (err < 0)
- return err;
+ goto err_out;
dev = dev_get_by_index(sock_net(sk), so->ifindex);
- if (!dev)
- return -ENXIO;
+ if (!dev) {
+ err = -ENXIO;
+ goto err_out;
+ }
skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv),
msg->msg_flags & MSG_DONTWAIT, &err);
if (!skb) {
dev_put(dev);
- return err;
+ goto err_out;
}
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = dev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
- so->tx.state = ISOTP_SENDING;
so->tx.len = size;
so->tx.idx = 0;
@@ -954,15 +965,25 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err) {
pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
__func__, ERR_PTR(err));
- return err;
+ goto err_out;
}
if (wait_tx_done) {
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+
+ if (sk->sk_err)
+ return -sk->sk_err;
}
return size;
+
+err_out:
+ so->tx.state = old_state;
+ if (so->tx.state == ISOTP_IDLE)
+ wake_up_interruptible(&so->wait);
+
+ return err;
}
static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index f6df20808f5e..16af1a7f80f6 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -330,6 +330,7 @@ int j1939_session_activate(struct j1939_session *session);
void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec);
void j1939_session_timers_cancel(struct j1939_session *session);
+#define J1939_MIN_TP_PACKET_SIZE 9
#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff)
#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff)
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index 08c8606cfd9c..9bc55ecb37f9 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -249,11 +249,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
struct j1939_priv *priv, *priv_new;
int ret;
- priv = j1939_priv_get_by_ndev(ndev);
+ spin_lock(&j1939_netdev_lock);
+ priv = j1939_priv_get_by_ndev_locked(ndev);
if (priv) {
kref_get(&priv->rx_kref);
+ spin_unlock(&j1939_netdev_lock);
return priv;
}
+ spin_unlock(&j1939_netdev_lock);
priv = j1939_priv_create(ndev);
if (!priv)
@@ -269,10 +272,10 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
/* Someone was faster than us, use their priv and roll
* back our's.
*/
+ kref_get(&priv_new->rx_kref);
spin_unlock(&j1939_netdev_lock);
dev_put(ndev);
kfree(priv);
- kref_get(&priv_new->rx_kref);
return priv_new;
}
j1939_priv_set(ndev, priv);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index bb5c4b8979be..6c0a0ebdd024 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1237,12 +1237,11 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
session->err = -ETIME;
j1939_session_deactivate(session);
} else {
- netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
- __func__, session);
-
j1939_session_list_lock(session->priv);
if (session->state >= J1939_SESSION_ACTIVE &&
session->state < J1939_SESSION_ACTIVE_MAX) {
+ netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n",
+ __func__, session);
j1939_session_get(session);
hrtimer_start(&session->rxtimer,
ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
@@ -1609,6 +1608,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv,
abort = J1939_XTP_ABORT_FAULT;
else if (len > priv->tp_max_packet_size)
abort = J1939_XTP_ABORT_RESOURCE;
+ else if (len < J1939_MIN_TP_PACKET_SIZE)
+ abort = J1939_XTP_ABORT_FAULT;
}
if (abort != J1939_XTP_NO_ABORT) {
@@ -1789,6 +1790,7 @@ static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb,
static void j1939_xtp_rx_dat_one(struct j1939_session *session,
struct sk_buff *skb)
{
+ enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT;
struct j1939_priv *priv = session->priv;
struct j1939_sk_buff_cb *skcb, *se_skcb;
struct sk_buff *se_skb = NULL;
@@ -1803,9 +1805,11 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
skcb = j1939_skb_to_cb(skb);
dat = skb->data;
- if (skb->len <= 1)
+ if (skb->len != 8) {
/* makes no sense */
+ abort = J1939_XTP_ABORT_UNEXPECTED_DATA;
goto out_session_cancel;
+ }
switch (session->last_cmd) {
case 0xff:
@@ -1904,7 +1908,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
out_session_cancel:
kfree_skb(se_skb);
j1939_session_timers_cancel(session);
- j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
+ j1939_session_cancel(session, abort);
j1939_session_put(session);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26dd..eb3a366bf212 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3163,6 +3163,12 @@ static u16 skb_tx_hash(const struct net_device *dev,
qoffset = sb_dev->tc_to_txq[tc].offset;
qcount = sb_dev->tc_to_txq[tc].count;
+ if (unlikely(!qcount)) {
+ net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
+ sb_dev->name, qoffset, tc);
+ qoffset = 0;
+ qcount = dev->real_num_tx_queues;
+ }
}
if (skb_rx_queue_recorded(skb)) {
@@ -3906,7 +3912,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
skb->pkt_type = PACKET_LOOPBACK;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (skb->ip_summed == CHECKSUM_NONE)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
WARN_ON(!skb_dst(skb));
skb_dst_force(skb);
netif_rx_ni(skb);
@@ -6923,12 +6930,16 @@ EXPORT_SYMBOL(napi_disable);
*/
void napi_enable(struct napi_struct *n)
{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
- if (n->dev->threaded && n->thread)
- set_bit(NAPI_STATE_THREADED, &n->state);
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
+
+ new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
+ if (n->dev->threaded && n->thread)
+ new |= NAPIF_STATE_THREADED;
+ } while (cmpxchg(&n->state, val, new) != val);
}
EXPORT_SYMBOL(napi_enable);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 8c39283c26ae..f0cb38344126 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -50,6 +50,11 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
if (addr_len > MAX_ADDR_LEN)
return -EINVAL;
+ ha = list_first_entry(&list->list, struct netdev_hw_addr, list);
+ if (ha && !memcmp(addr, ha->addr, addr_len) &&
+ (!addr_type || addr_type == ha->type))
+ goto found_it;
+
while (*ins_point) {
int diff;
@@ -64,6 +69,7 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
} else if (diff > 0) {
ins_point = &parent->rb_right;
} else {
+found_it:
if (exclusive)
return -EEXIST;
if (global) {
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index eab5fc88a002..d8b9dbabd4a4 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -77,8 +77,8 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
- seq_printf(seq, "%9s: %16llu %12llu %4llu %6llu %4llu %5llu %10llu %9llu "
- "%16llu %12llu %4llu %6llu %4llu %5llu %7llu %10llu\n",
+ seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
+ "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
dev->name, stats->rx_bytes, stats->rx_packets,
stats->rx_errors,
stats->rx_dropped + stats->rx_missed_errors,
@@ -103,11 +103,11 @@ static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
static int dev_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Interface| Receive "
- " | Transmit\n"
- " | bytes packets errs drop fifo frame "
- "compressed multicast| bytes packets errs "
- " drop fifo colls carrier compressed\n");
+ seq_puts(seq, "Inter-| Receive "
+ " | Transmit\n"
+ " face |bytes packets errs drop fifo frame "
+ "compressed multicast|bytes packets errs "
+ "drop fifo colls carrier compressed\n");
else
dev_seq_printf_stats(seq, v);
return 0;
@@ -259,14 +259,14 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
struct packet_type *pt = v;
if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Type Device Function\n");
+ seq_puts(seq, "Type Device Function\n");
else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-9s %ps\n",
+ seq_printf(seq, " %-8s %ps\n",
pt->dev ? pt->dev->name : "", pt->func);
}
@@ -327,14 +327,12 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct netdev_hw_addr *ha;
struct net_device *dev = v;
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Ifindex Interface Refcount Global_use Address\n");
+ if (v == SEQ_START_TOKEN)
return 0;
- }
netif_addr_lock_bh(dev);
netdev_for_each_mc_addr(ha, dev) {
- seq_printf(seq, "%-7d %-9s %-8d %-10d %*phN\n",
+ seq_printf(seq, "%-4d %-15s %-5d %-5d %*phN\n",
dev->ifindex, dev->name,
ha->refcount, ha->global_use,
(int)dev->addr_len, ha->addr);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6197774048b..b2e49eb7001d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1973,9 +1973,9 @@ int netdev_register_kobject(struct net_device *ndev)
int netdev_change_owner(struct net_device *ndev, const struct net *net_old,
const struct net *net_new)
{
+ kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID;
+ kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID;
struct device *dev = &ndev->dev;
- kuid_t old_uid, new_uid;
- kgid_t old_gid, new_gid;
int error;
net_ns_get_ownership(net_old, &old_uid, &old_gid);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index b49c57d35a88..1a6a86693b74 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file);
- if (sock) {
- spin_lock(&cgroup_sk_update_lock);
+ if (sock)
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
- spin_unlock(&cgroup_sk_update_lock);
- }
if (--ctx->batch == 0) {
ctx->batch = UPDATE_CLASSID_BATCH;
return n + 1;
@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
struct css_task_iter it;
struct task_struct *p;
- cgroup_sk_alloc_disable();
-
cs->classid = (u32)value;
css_task_iter_start(css, 0, &it);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 99a431c56f23..8456dfbe2eb4 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev)
return -ENODEV;
- cgroup_sk_alloc_disable();
-
rtnl_lock();
ret = netprio_set_prio(of_css(of), dev, prio);
@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
static int update_netprio(const void *v, struct file *file, unsigned n)
{
struct socket *sock = sock_from_file(file);
- if (sock) {
- spin_lock(&cgroup_sk_update_lock);
+
+ if (sock)
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
(unsigned long)v);
- spin_unlock(&cgroup_sk_update_lock);
- }
return 0;
}
@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;
- cgroup_sk_alloc_disable();
-
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 972c8cb303a5..8ccce85562a1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -5262,7 +5262,7 @@ nla_put_failure:
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
- size_t size = 0;
+ size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2170bea2c7de..fe9358437380 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -80,6 +80,7 @@
#include <linux/indirect_call_wrapper.h>
#include "datagram.h"
+#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
@@ -1804,30 +1805,39 @@ EXPORT_SYMBOL(skb_realloc_headroom);
struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
{
int delta = headroom - skb_headroom(skb);
+ int osize = skb_end_offset(skb);
+ struct sock *sk = skb->sk;
if (WARN_ONCE(delta <= 0,
"%s is expecting an increase in the headroom", __func__))
return skb;
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
+ delta = SKB_DATA_ALIGN(delta);
+ /* pskb_expand_head() might crash, if skb is shared. */
+ if (skb_shared(skb) || !is_skb_wmem(skb)) {
struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
+ if (unlikely(!nskb))
+ goto fail;
+
+ if (sk)
+ skb_set_owner_w(nskb, sk);
+ consume_skb(skb);
skb = nskb;
}
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
+ if (pskb_expand_head(skb, delta, 0, GFP_ATOMIC))
+ goto fail;
+
+ if (sk && is_skb_wmem(skb)) {
+ delta = skb_end_offset(skb) - osize;
+ refcount_add(delta, &sk->sk_wmem_alloc);
+ skb->truesize += delta;
}
return skb;
+
+fail:
+ kfree_skb(skb);
+ return NULL;
}
EXPORT_SYMBOL(skb_expand_head);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..a86ef7e844f8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+bool sk_msg_is_readable(struct sock *sk)
+{
+ struct sk_psock *psock;
+ bool empty = true;
+
+ rcu_read_lock();
+ psock = sk_psock(sk);
+ if (likely(psock))
+ empty = list_empty(&psock->ingress_msg);
+ rcu_read_unlock();
+ return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
struct sk_buff *skb)
{
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..c1601f75ec4b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1376,6 +1376,16 @@ set_sndbuf:
}
EXPORT_SYMBOL(sock_setsockopt);
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+ const struct cred *cred;
+
+ spin_lock(&sk->sk_peer_lock);
+ cred = get_cred(sk->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ return cred;
+}
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
@@ -1552,7 +1562,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
+
+ spin_lock(&sk->sk_peer_lock);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
@@ -1560,20 +1574,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_PEERGROUPS:
{
+ const struct cred *cred;
int ret, n;
- if (!sk->sk_peer_cred)
+ cred = sk_get_peer_cred(sk);
+ if (!cred)
return -ENODATA;
- n = sk->sk_peer_cred->group_info->ngroups;
+ n = cred->group_info->ngroups;
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
+ put_cred(cred);
return put_user(len, optlen) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval,
- sk->sk_peer_cred->group_info);
+ ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ put_cred(cred);
if (ret)
return ret;
goto lenout;
@@ -1935,9 +1952,10 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_frag.page = NULL;
}
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+ put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
+
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
@@ -3145,6 +3163,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
+ spin_lock_init(&sk->sk_peer_lock);
+
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -3179,17 +3199,15 @@ EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);
@@ -3212,42 +3230,37 @@ void release_sock(struct sock *sk)
}
EXPORT_SYMBOL(release_sock);
-/**
- * lock_sock_fast - fast version of lock_sock
- * @sk: socket
- *
- * This version should be used for very small section, where process wont block
- * return false if fast path is taken:
- *
- * sk_lock.slock locked, owned = 0, BH disabled
- *
- * return true if slow path is taken:
- *
- * sk_lock.slock unlocked, owned = 1, BH enabled
- */
-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
+bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (!sk->sk_lock.owned)
+ if (!sk->sk_lock.owned) {
/*
- * Note : We must disable BH
+ * Fast path return with bottom halves disabled and
+ * sock::sk_lock.slock held.
+ *
+ * The 'mutex' is not contended and holding
+ * sock::sk_lock.slock prevents all other lockers to
+ * proceed so the corresponding unlock_sock_fast() can
+ * avoid the slow path of release_sock() completely and
+ * just release slock.
+ *
+ * From a semantical POV this is equivalent to 'acquiring'
+ * the 'mutex', hence the corresponding lockdep
+ * mutex_release() has to happen in the fast path of
+ * unlock_sock_fast().
*/
return false;
+ }
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
__acquire(&sk->sk_lock.slock);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
-EXPORT_SYMBOL(lock_sock_fast);
+EXPORT_SYMBOL(__lock_sock_fast);
int sock_gettstamp(struct socket *sock, void __user *userstamp,
bool timeval, bool time32)
diff --git a/net/core/sock_destructor.h b/net/core/sock_destructor.h
new file mode 100644
index 000000000000..2f396e6bfba5
--- /dev/null
+++ b/net/core/sock_destructor.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _NET_CORE_SOCK_DESTRUCTOR_H
+#define _NET_CORE_SOCK_DESTRUCTOR_H
+#include <net/tcp.h>
+
+static inline bool is_skb_wmem(const struct sk_buff *skb)
+{
+ return skb->destructor == sock_wfree ||
+ skb->destructor == __sock_wfree ||
+ (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+#endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..5f88526ad61c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
.extra1 = &long_one,
- .extra2 = &long_max,
+ .extra2 = &bpf_jit_limit_max,
},
#endif
{
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index c5c74a34d139..91e7a2202697 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
+ newdp->dccps_hc_rx_ccid = NULL;
+ newdp->dccps_hc_tx_ccid = NULL;
newdp->dccps_service = dreq->dreq_service;
newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 548285539752..d8ee15f1c7a9 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -101,8 +101,6 @@ config NET_DSA_TAG_RTL4_A
config NET_DSA_TAG_OCELOT
tristate "Tag driver for Ocelot family of switches, using NPI port"
- depends on MSCC_OCELOT_SWITCH_LIB || \
- (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
select PACKING
help
Say Y or M if you want to enable NPI tagging for the Ocelot switches
@@ -114,8 +112,6 @@ config NET_DSA_TAG_OCELOT
config NET_DSA_TAG_OCELOT_8021Q
tristate "Tag driver for Ocelot family of switches, using VLAN"
- depends on MSCC_OCELOT_SWITCH_LIB || \
- (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
help
Say Y or M if you want to enable support for tagging frames with a
custom VLAN-based header. Frames that require timestamping, such as
@@ -138,7 +134,6 @@ config NET_DSA_TAG_LAN9303
config NET_DSA_TAG_SJA1105
tristate "Tag driver for NXP SJA1105 switches"
- depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
select PACKING
help
Say Y or M if you want to enable support for tagging frames with the
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1dc45e40f961..41f36ad8b0ec 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -345,6 +345,11 @@ bool dsa_schedule_work(struct work_struct *work)
return queue_work(dsa_owq, work);
}
+void dsa_flush_workqueue(void)
+{
+ flush_workqueue(dsa_owq);
+}
+
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 1b2b25d7bd02..e9911b18bdbf 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -170,7 +170,7 @@ void dsa_bridge_num_put(const struct net_device *bridge_dev, int bridge_num)
/* Check if the bridge is still in use, otherwise it is time
* to clean it up so we can reuse this bridge_num later.
*/
- if (!dsa_bridge_num_find(bridge_dev))
+ if (dsa_bridge_num_find(bridge_dev) < 0)
clear_bit(bridge_num, &dsa_fwd_offloading_bridges);
}
@@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
bool dsa_port_link_registered = false;
+ struct dsa_switch *ds = dp->ds;
bool dsa_port_enabled = false;
int err = 0;
@@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp)
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);
+ if (ds->ops->port_setup) {
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
dsa_port_link_unregister_of(dp);
- if (err)
+ if (err) {
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
return err;
+ }
dp->setup = true;
@@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a, *tmp;
if (!dp->setup)
return;
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+
devlink_port_type_clear(dlp);
switch (dp->type) {
@@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp)
dp->devlink_port_setup = false;
}
+/* Destroy the current devlink port, and create a new one which has the UNUSED
+ * flavour. At this point, any call to ds->ops->port_setup has been already
+ * balanced out by a call to ds->ops->port_teardown, so we know that any
+ * devlink port regions the driver had are now unregistered. We then call its
+ * ds->ops->port_setup again, in order for the driver to re-create them on the
+ * new devlink port.
+ */
+static int dsa_port_reinit_as_unused(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ dsa_port_devlink_teardown(dp);
+ dp->type = DSA_PORT_TYPE_UNUSED;
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
+
+ if (ds->ops->port_setup) {
+ /* On error, leave the devlink port registered,
+ * dsa_switch_teardown will clean it up later.
+ */
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int dsa_devlink_info_get(struct devlink *dl,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
@@ -767,7 +811,9 @@ static int dsa_switch_setup_tag_protocol(struct dsa_switch *ds)
if (!dsa_is_cpu_port(ds, port))
continue;
+ rtnl_lock();
err = ds->ops->change_tag_protocol(ds, port, tag_ops->proto);
+ rtnl_unlock();
if (err) {
dev_err(ds->dev, "Unable to use tag protocol \"%s\": %pe\n",
tag_ops->name, ERR_PTR(err));
@@ -836,7 +882,7 @@ static int dsa_switch_setup(struct dsa_switch *ds)
devlink_params_publish(ds->devlink);
if (!ds->slave_mii_bus && ds->ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ ds->slave_mii_bus = mdiobus_alloc();
if (!ds->slave_mii_bus) {
err = -ENOMEM;
goto teardown;
@@ -846,13 +892,16 @@ static int dsa_switch_setup(struct dsa_switch *ds)
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- goto teardown;
+ goto free_slave_mii_bus;
}
ds->setup = true;
return 0;
+free_slave_mii_bus:
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->slave_mii_bus);
teardown:
if (ds->ops->teardown)
ds->ops->teardown(ds);
@@ -877,8 +926,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
if (!ds->setup)
return;
- if (ds->slave_mii_bus && ds->ops->phy_read)
+ if (ds->slave_mii_bus && ds->ops->phy_read) {
mdiobus_unregister(ds->slave_mii_bus);
+ mdiobus_free(ds->slave_mii_bus);
+ ds->slave_mii_bus = NULL;
+ }
dsa_switch_unregister_notifier(ds);
@@ -897,6 +949,33 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
ds->setup = false;
}
+/* First tear down the non-shared, then the shared ports. This ensures that
+ * all work items scheduled by our switchdev handlers for user ports have
+ * completed before we destroy the refcounting kept on the shared ports.
+ */
+static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp))
+ dsa_port_teardown(dp);
+
+ dsa_flush_workqueue();
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
+ dsa_port_teardown(dp);
+}
+
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
+}
+
static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -911,38 +990,22 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
err = dsa_port_setup(dp);
if (err) {
- dsa_port_devlink_teardown(dp);
- dp->type = DSA_PORT_TYPE_UNUSED;
- err = dsa_port_devlink_setup(dp);
+ err = dsa_port_reinit_as_unused(dp);
if (err)
goto teardown;
- continue;
}
}
return 0;
teardown:
- list_for_each_entry(dp, &dst->ports, list)
- dsa_port_teardown(dp);
+ dsa_tree_teardown_ports(dst);
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
+ dsa_tree_teardown_switches(dst);
return err;
}
-static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_port_teardown(dp);
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
-}
-
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -1034,6 +1097,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
teardown_master:
dsa_tree_teardown_master(dst);
teardown_switches:
+ dsa_tree_teardown_ports(dst);
dsa_tree_teardown_switches(dst);
teardown_cpu_ports:
dsa_tree_teardown_cpu_ports(dst);
@@ -1052,6 +1116,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_master(dst);
+ dsa_tree_teardown_ports(dst);
+
dsa_tree_teardown_switches(dst);
dsa_tree_teardown_cpu_ports(dst);
@@ -1308,12 +1374,15 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
for_each_available_child_of_node(ports, port) {
err = of_property_read_u32(port, "reg", &reg);
- if (err)
+ if (err) {
+ of_node_put(port);
goto out_put_node;
+ }
if (reg >= ds->num_ports) {
dev_err(ds->dev, "port %pOF index %u exceeds num_ports (%zu)\n",
port, reg, ds->num_ports);
+ of_node_put(port);
err = -EINVAL;
goto out_put_node;
}
@@ -1321,8 +1390,10 @@ static int dsa_switch_parse_ports_of(struct dsa_switch *ds,
dp = dsa_to_port(ds, reg);
err = dsa_port_parse_of(dp, port);
- if (err)
+ if (err) {
+ of_node_put(port);
goto out_put_node;
+ }
}
out_put_node:
@@ -1546,3 +1617,53 @@ void dsa_unregister_switch(struct dsa_switch *ds)
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
+
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
+{
+ struct net_device *master, *slave_dev;
+ LIST_HEAD(unregister_list);
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
+ rtnl_lock();
+
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+ if (dp->ds != ds)
+ continue;
+
+ if (!dsa_port_is_user(dp))
+ continue;
+
+ master = dp->cpu_dp->master;
+ slave_dev = dp->slave;
+
+ netdev_upper_dev_unlink(master, slave_dev);
+ /* Just unlinking ourselves as uppers of the master is not
+ * sufficient. When the master net device unregisters, that will
+ * also call dev_close, which we will catch as NETDEV_GOING_DOWN
+ * and trigger a dev_close on our own devices (dsa_slave_close).
+ * In turn, that will call dev_mc_unsync on the master's net
+ * device. If the master is also a DSA switch port, this will
+ * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
+ * its own master. Lockdep will complain about the fact that
+ * all cascaded masters have the same dsa_master_addr_list_lock_key,
+ * which it normally would not do if the cascaded masters would
+ * be in a proper upper/lower relationship, which we've just
+ * destroyed.
+ * To suppress the lockdep warnings, let's actually unregister
+ * the DSA slave interfaces too, to avoid the nonsensical
+ * multicast address list synchronization on shutdown.
+ */
+ unregister_netdevice_queue(slave_dev, &unregister_list);
+ }
+ unregister_netdevice_many(&unregister_list);
+
+ rtnl_unlock();
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 33ab7d7af9eb..a5c9bc7b66c6 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -170,6 +170,7 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
+void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 662ff531d4e2..a2bf2d8ac65b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1854,13 +1854,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
* use the switch internal MDIO bus instead
*/
ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags);
- if (ret) {
- netdev_err(slave_dev,
- "failed to connect to port %d: %d\n",
- dp->index, ret);
- phylink_destroy(dp->pl);
- return ret;
- }
+ }
+ if (ret) {
+ netdev_err(slave_dev, "failed to connect to PHY: %pe\n",
+ ERR_PTR(ret));
+ phylink_destroy(dp->pl);
}
return ret;
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 1c797ec8e2c2..6466d0539af9 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -168,7 +168,7 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
if (extack._msg)
dev_err(ds->dev, "port %d: %s\n", info->port,
extack._msg);
- if (err && err != EOPNOTSUPP)
+ if (err && err != -EOPNOTSUPP)
return err;
}
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 77d0ce89ab77..b3da4b2ea11c 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -45,6 +45,7 @@
* 6 6 2 2 4 2 N
*/
+#include <linux/dsa/mv88e6xxx.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
#include <linux/slab.h>
@@ -129,12 +130,9 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
u8 tag_dev, tag_port;
enum dsa_cmd cmd;
u8 *dsa_header;
- u16 pvid = 0;
- int err;
if (skb->offload_fwd_mark) {
struct dsa_switch_tree *dst = dp->ds->dst;
- struct net_device *br = dp->bridge_dev;
cmd = DSA_CMD_FORWARD;
@@ -144,19 +142,6 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
*/
tag_dev = dst->last_switch + 1 + dp->bridge_num;
tag_port = 0;
-
- /* If we are offloading forwarding for a VLAN-unaware bridge,
- * inject packets to hardware using the bridge's pvid, since
- * that's where the packets ingressed from.
- */
- if (!br_vlan_enabled(br)) {
- /* Safe because __dev_queue_xmit() runs under
- * rcu_read_lock_bh()
- */
- err = br_vlan_get_pvid_rcu(br, &pvid);
- if (err)
- return NULL;
- }
} else {
cmd = DSA_CMD_FROM_CPU;
tag_dev = dp->ds->index;
@@ -180,16 +165,21 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
dsa_header[2] &= ~0x10;
}
} else {
+ struct net_device *br = dp->bridge_dev;
+ u16 vid;
+
+ vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
+
skb_push(skb, DSA_HLEN + extra);
dsa_alloc_etype_header(skb, DSA_HLEN + extra);
- /* Construct untagged DSA tag. */
+ /* Construct DSA header from untagged frame. */
dsa_header = dsa_etype_header_pos_tx(skb) + extra;
dsa_header[0] = (cmd << 6) | tag_dev;
dsa_header[1] = tag_port << 3;
- dsa_header[2] = pvid >> 8;
- dsa_header[3] = pvid & 0xff;
+ dsa_header[2] = vid >> 8;
+ dsa_header[3] = vid & 0xff;
}
return skb;
@@ -210,7 +200,7 @@ static struct sk_buff *dsa_rcv_ll(struct sk_buff *skb, struct net_device *dev,
cmd = dsa_header[0] >> 6;
switch (cmd) {
case DSA_CMD_FORWARD:
- trunk = !!(dsa_header[1] & 7);
+ trunk = !!(dsa_header[1] & 4);
break;
case DSA_CMD_TO_CPU:
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index d37ab98e7fe1..605b51ca6921 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,8 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
*/
#include <linux/dsa/ocelot.h>
-#include <soc/mscc/ocelot.h>
#include "dsa_priv.h"
static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3038a257ba05..3412051981d7 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
*
* An implementation of the software-defined tag_8021q.c tagger format, which
* also preserves full functionality under a vlan_filtering bridge. It does
@@ -9,10 +9,32 @@
* that on egress
*/
#include <linux/dsa/8021q.h>
-#include <soc/mscc/ocelot.h>
-#include <soc/mscc/ocelot_ptp.h>
+#include <linux/dsa/ocelot.h>
#include "dsa_priv.h"
+static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
+ struct sk_buff *skb)
+{
+ struct felix_deferred_xmit_work *xmit_work;
+ struct felix_port *felix_port = dp->priv;
+
+ xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
+ if (!xmit_work)
+ return NULL;
+
+ /* Calls felix_port_deferred_xmit in felix.c */
+ kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ xmit_work->dp = dp;
+ xmit_work->skb = skb_get(skb);
+
+ kthread_queue_work(felix_port->xmit_worker, &xmit_work->work);
+
+ return NULL;
+}
+
static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
@@ -20,18 +42,10 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
- struct ocelot *ocelot = dp->ds->priv;
- int port = dp->index;
- u32 rew_op = 0;
+ struct ethhdr *hdr = eth_hdr(skb);
- rew_op = ocelot_ptp_rew_op(skb);
- if (rew_op) {
- if (!ocelot_can_inject(ocelot, 0))
- return NULL;
-
- ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
- return NULL;
- }
+ if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest))
+ return ocelot_defer_xmit(dp, skb);
return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
((pcp << VLAN_PRIO_SHIFT) | tx_vid));
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index c054f48541c8..2edede9ddac9 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -4,6 +4,7 @@
#include <linux/if_vlan.h>
#include <linux/dsa/sja1105.h>
#include <linux/dsa/8021q.h>
+#include <linux/skbuff.h>
#include <linux/packing.h>
#include "dsa_priv.h"
@@ -53,6 +54,11 @@
#define SJA1110_TX_TRAILER_LEN 4
#define SJA1110_MAX_PADDING_LEN 15
+enum sja1110_meta_tstamp {
+ SJA1110_META_TSTAMP_TX = 0,
+ SJA1110_META_TSTAMP_RX = 1,
+};
+
/* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
static inline bool sja1105_is_link_local(const struct sk_buff *skb)
{
@@ -520,6 +526,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
is_meta);
}
+static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
+ u8 ts_id, enum sja1110_meta_tstamp dir,
+ u64 tstamp)
+{
+ struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct skb_shared_hwtstamps shwt = {0};
+ struct sja1105_port *sp = dp->priv;
+
+ if (!dsa_port_is_sja1105(dp))
+ return;
+
+ /* We don't care about RX timestamps on the CPU port */
+ if (dir == SJA1110_META_TSTAMP_RX)
+ return;
+
+ spin_lock(&sp->data->skb_txtstamp_queue.lock);
+
+ skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) {
+ if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
+ continue;
+
+ __skb_unlink(skb, &sp->data->skb_txtstamp_queue);
+ skb_match = skb;
+
+ break;
+ }
+
+ spin_unlock(&sp->data->skb_txtstamp_queue.lock);
+
+ if (WARN_ON(!skb_match))
+ return;
+
+ shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
+ skb_complete_tx_timestamp(skb_match, &shwt);
+}
+
static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
{
u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b42c429cebbe..3364cb9c67e0 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1661,7 +1661,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info);
#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6)
int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
- int nh_weight, u8 rt_family)
+ int nh_weight, u8 rt_family, u32 nh_tclassid)
{
const struct net_device *dev = nhc->nhc_dev;
struct rtnexthop *rtnh;
@@ -1679,6 +1679,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc,
rtnh->rtnh_flags = flags;
+ if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid))
+ goto nla_put_failure;
+
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
@@ -1706,14 +1709,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi)
}
for_nexthops(fi) {
- if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
- AF_INET) < 0)
- goto nla_put_failure;
+ u32 nh_tclassid = 0;
#ifdef CONFIG_IP_ROUTE_CLASSID
- if (nh->nh_tclassid &&
- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
- goto nla_put_failure;
+ nh_tclassid = nh->nh_tclassid;
#endif
+ if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight,
+ AF_INET, nh_tclassid) < 0)
+ goto nla_put_failure;
} endfor_nexthops(fi);
mp_end:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 8b30cadff708..b7e277d8a84d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1054,14 +1054,19 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr), &_iio);
if (!ext_hdr || !iio)
goto send_mal_query;
- if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr))
+ if (ntohs(iio->extobj_hdr.length) <= sizeof(iio->extobj_hdr) ||
+ ntohs(iio->extobj_hdr.length) > sizeof(_iio))
goto send_mal_query;
ident_len = ntohs(iio->extobj_hdr.length) - sizeof(iio->extobj_hdr);
+ iio = skb_header_pointer(skb, sizeof(_ext_hdr),
+ sizeof(iio->extobj_hdr) + ident_len, &_iio);
+ if (!iio)
+ goto send_mal_query;
+
status = 0;
dev = NULL;
switch (iio->extobj_hdr.class_type) {
case ICMP_EXT_ECHO_CTYPE_NAME:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
if (ident_len >= IFNAMSIZ)
goto send_mal_query;
memset(buff, 0, sizeof(buff));
@@ -1069,30 +1074,24 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
dev = dev_get_by_name(net, buff);
break;
case ICMP_EXT_ECHO_CTYPE_INDEX:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(iio->ident.ifindex), &_iio);
if (ident_len != sizeof(iio->ident.ifindex))
goto send_mal_query;
dev = dev_get_by_index(net, ntohl(iio->ident.ifindex));
break;
case ICMP_EXT_ECHO_CTYPE_ADDR:
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
+ if (ident_len < sizeof(iio->ident.addr.ctype3_hdr) ||
+ ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
iio->ident.addr.ctype3_hdr.addrlen)
goto send_mal_query;
switch (ntohs(iio->ident.addr.ctype3_hdr.afi)) {
case ICMP_AFI_IP:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(iio->extobj_hdr) +
- sizeof(struct in_addr), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in_addr))
goto send_mal_query;
dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
break;
#if IS_ENABLED(CONFIG_IPV6)
case ICMP_AFI_IP6:
- iio = skb_header_pointer(skb, sizeof(_ext_hdr), sizeof(_iio), &_iio);
- if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
- sizeof(struct in6_addr))
+ if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr))
goto send_mal_query;
dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev);
dev_hold(dev);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 80aeaf9e6e16..bfb522e51346 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -242,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
- score = sk->sk_family == PF_INET ? 2 : 1;
+ if (sk->sk_family == PF_INET)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index b88e0f36cd05..8265c6765705 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -42,7 +42,7 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init iptable_raw_table_init(struct net *net)
+static int iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
const struct xt_table *table = &packet_raw;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 613432a36f0a..e61ea428ea18 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -20,13 +20,8 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-static unsigned int defrag4_pernet_id __read_mostly;
static DEFINE_MUTEX(defrag4_mutex);
-struct defrag4_pernet {
- unsigned int users;
-};
-
static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
u_int32_t user)
{
@@ -111,19 +106,15 @@ static const struct nf_hook_ops ipv4_defrag_ops[] = {
static void __net_exit defrag4_net_exit(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
- if (nf_defrag->users) {
+ if (net->nf.defrag_ipv4_users) {
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
- nf_defrag->users = 0;
+ net->nf.defrag_ipv4_users = 0;
}
}
static struct pernet_operations defrag4_net_ops = {
.exit = defrag4_net_exit,
- .id = &defrag4_pernet_id,
- .size = sizeof(struct defrag4_pernet),
};
static int __init nf_defrag_init(void)
@@ -138,24 +129,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv4_enable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
int err = 0;
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv4_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_defrag->users) {
- nf_defrag->users++;
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
if (err == 0)
- nf_defrag->users = 1;
+ net->nf.defrag_ipv4_users = 1;
out_unlock:
mutex_unlock(&defrag4_mutex);
@@ -165,12 +155,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
void nf_defrag_ipv4_disable(struct net *net)
{
- struct defrag4_pernet *nf_defrag = net_generic(net, defrag4_pernet_id);
-
mutex_lock(&defrag4_mutex);
- if (nf_defrag->users) {
- nf_defrag->users--;
- if (nf_defrag->users == 0)
+ if (net->nf.defrag_ipv4_users) {
+ net->nf.defrag_ipv4_users--;
+ if (net->nf.defrag_ipv4_users == 0)
nf_unregister_net_hooks(net, ipv4_defrag_ops,
ARRAY_SIZE(ipv4_defrag_ops));
}
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 75ca4b6e484f..9e8100728d46 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
rcu_assign_pointer(old->nh_grp, newg);
if (newg->resilient) {
+ /* Make sure concurrent readers are not using 'oldg' anymore. */
+ synchronize_net();
rcu_assign_pointer(oldg->res_table, tmp_table);
rcu_assign_pointer(oldg->spare->res_table, tmp_table);
}
@@ -3565,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = {
};
static int nexthops_dump(struct net *net, struct notifier_block *nb,
+ enum nexthop_event_type event_type,
struct netlink_ext_ack *extack)
{
struct rb_root *root = &net->nexthop.rb_root;
@@ -3575,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb,
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
- err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
- extack);
+ err = call_nexthop_notifier(nb, net, event_type, nh, extack);
if (err)
break;
}
@@ -3590,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
int err;
rtnl_lock();
- err = nexthops_dump(net, nb, extack);
+ err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
if (err)
goto unlock;
err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
@@ -3603,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
- return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
- nb);
+ int err;
+
+ rtnl_lock();
+ err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
+ nb);
+ if (err)
+ goto unlock;
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+unlock:
+ rtnl_unlock();
+ return err;
}
EXPORT_SYMBOL(unregister_nexthop_notifier);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e8b48df73c85..f5c336f8b0c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -486,10 +486,7 @@ static bool tcp_stream_is_readable(struct sock *sk, int target)
{
if (tcp_epollin_ready(sk, target))
return true;
-
- if (sk->sk_prot->stream_memory_read)
- return sk->sk_prot->stream_memory_read(sk);
- return false;
+ return sk_is_readable(sk);
}
/*
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index d3e9386b493e..5f4d6f45d87f 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
#ifdef CONFIG_BPF_SYSCALL
-static bool tcp_bpf_stream_read(const struct sock *sk)
-{
- struct sk_psock *psock;
- bool empty = true;
-
- rcu_read_lock();
- psock = sk_psock(sk);
- if (likely(psock))
- empty = list_empty(&psock->ingress_msg);
- rcu_read_unlock();
- return !empty;
-}
-
static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
long timeo)
{
@@ -232,6 +219,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
bool cork = false, enospc = sk_msg_full(msg);
struct sock *sk_redir;
u32 tosend, delta = 0;
+ u32 eval = __SK_NONE;
int ret;
more_data:
@@ -275,13 +263,24 @@ more_data:
case __SK_REDIRECT:
sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend);
+ if (!psock->apply_bytes) {
+ /* Clean up before releasing the sock lock. */
+ eval = psock->eval;
+ psock->eval = __SK_NONE;
+ psock->sk_redir = NULL;
+ }
if (psock->cork) {
cork = true;
psock->cork = NULL;
}
sk_msg_return(sk, msg, tosend);
release_sock(sk);
+
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+
+ if (eval == __SK_REDIRECT)
+ sock_put(sk_redir);
+
lock_sock(sk);
if (unlikely(ret < 0)) {
int free = sk_msg_free_nocharge(sk, msg);
@@ -479,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
prot[TCP_BPF_BASE].unhash = sock_map_unhash;
prot[TCP_BPF_BASE].close = sock_map_close;
prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg;
- prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read;
+ prot[TCP_BPF_BASE].sock_is_readable = sk_msg_is_readable;
prot[TCP_BPF_TX] = prot[TCP_BPF_BASE];
prot[TCP_BPF_TX].sendmsg = tcp_bpf_sendmsg;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f7bd7ae7d7a..141e85e6422b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1346,7 +1346,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
if ((sacked & TCPCB_SACKED_ACKED) &&
before(start_seq, state->reord))
state->reord = start_seq;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2e62e0d6373a..5b8ce65dfc06 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1037,6 +1037,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
+static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new)
+{
+ if (!old)
+ return true;
+
+ /* l3index always overrides non-l3index */
+ if (old->l3index && new->l3index == 0)
+ return false;
+ if (old->l3index == 0 && new->l3index)
+ return true;
+
+ return old->prefixlen < new->prefixlen;
+}
+
/* Find the Key structure for an address. */
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
@@ -1059,7 +1073,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->l3index && key->l3index != l3index)
+ if (key->flags & TCP_MD5SIG_FLAG_IFINDEX && key->l3index != l3index)
continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
@@ -1074,8 +1088,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
match = false;
}
- if (match && (!best_match ||
- key->prefixlen > best_match->prefixlen))
+ if (match && better_md5_match(best_match, key))
best_match = key;
}
return best_match;
@@ -1085,7 +1098,7 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
int family, u8 prefixlen,
- int l3index)
+ int l3index, u8 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1105,7 +1118,9 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
lockdep_sock_is_held(sk)) {
if (key->family != family)
continue;
- if (key->l3index && key->l3index != l3index)
+ if ((key->flags & TCP_MD5SIG_FLAG_IFINDEX) != (flags & TCP_MD5SIG_FLAG_IFINDEX))
+ continue;
+ if (key->l3index != l3index)
continue;
if (!memcmp(&key->addr, addr, size) &&
key->prefixlen == prefixlen)
@@ -1129,7 +1144,7 @@ EXPORT_SYMBOL(tcp_v4_md5_lookup);
/* This can be called on a newly created socket, from other files */
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, int l3index,
+ int family, u8 prefixlen, int l3index, u8 flags,
const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
@@ -1137,7 +1152,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
if (key) {
/* Pre-existing entry - just update that one.
* Note that the key might be used concurrently.
@@ -1182,6 +1197,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->family = family;
key->prefixlen = prefixlen;
key->l3index = l3index;
+ key->flags = flags;
memcpy(&key->addr, addr,
(family == AF_INET6) ? sizeof(struct in6_addr) :
sizeof(struct in_addr));
@@ -1191,11 +1207,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen, int l3index)
+ u8 prefixlen, int l3index, u8 flags)
{
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index, flags);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -1229,6 +1245,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
const union tcp_md5_addr *addr;
u8 prefixlen = 32;
int l3index = 0;
+ u8 flags;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -1239,6 +1256,8 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
if (sin->sin_family != AF_INET)
return -EINVAL;
+ flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
@@ -1246,7 +1265,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
}
- if (optname == TCP_MD5SIG_EXT &&
+ if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
@@ -1267,12 +1286,12 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);
+ return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index, flags);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index,
+ return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
@@ -1596,7 +1615,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index,
+ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index, key->flags,
key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8851c9463b4b..2fffcf2b54f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -390,7 +390,8 @@ static int compute_score(struct sock *sk, struct net *net,
dif, sdif);
if (!dev_match)
return -1;
- score += 4;
+ if (sk->sk_bound_dev_if)
+ score += 4;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -1053,7 +1054,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
__be16 dport;
u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
struct ip_options_data opt_copy;
@@ -1361,7 +1362,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
}
up->len += size;
- if (!(up->corkflag || (flags&MSG_MORE)))
+ if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
@@ -2662,9 +2663,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
if (val != 0) {
- up->corkflag = 1;
+ WRITE_ONCE(up->corkflag, 1);
} else {
- up->corkflag = 0;
+ WRITE_ONCE(up->corkflag, 0);
lock_sock(sk);
push_pending_frames(sk);
release_sock(sk);
@@ -2787,7 +2788,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
- val = up->corkflag;
+ val = READ_ONCE(up->corkflag);
break;
case UDP_ENCAP:
@@ -2866,6 +2867,9 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
+ /* psock ingress_msg queue should not contain any bad checksum frames */
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c
index 7a1d5f473878..bbe6569c9ad3 100644
--- a/net/ipv4/udp_bpf.c
+++ b/net/ipv4/udp_bpf.c
@@ -114,6 +114,7 @@ static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = udp_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void udp_bpf_check_v6_needs_rebuild(struct proto *ops)
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 0d122edc368d..b91003538d87 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_module(void)
{
int err;
- udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
+ udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
if (!udp_tunnel_nic_workqueue)
return -ENOMEM;
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 55c290d55605..67c9114835c8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -106,7 +106,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif))
return -1;
- score = 1;
+ score = sk->sk_bound_dev_if ? 2 : 1;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
index 5e8961004832..d128172bb549 100644
--- a/net/ipv6/ioam6.c
+++ b/net/ipv6/ioam6.c
@@ -770,6 +770,66 @@ static void __ioam6_fill_trace_data(struct sk_buff *skb,
data += sizeof(__be32);
}
+ /* bit12 undefined: filled with empty value */
+ if (trace->type.bit12) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit13 undefined: filled with empty value */
+ if (trace->type.bit13) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit14 undefined: filled with empty value */
+ if (trace->type.bit14) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit15 undefined: filled with empty value */
+ if (trace->type.bit15) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit16 undefined: filled with empty value */
+ if (trace->type.bit16) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit17 undefined: filled with empty value */
+ if (trace->type.bit17) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit18 undefined: filled with empty value */
+ if (trace->type.bit18) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit19 undefined: filled with empty value */
+ if (trace->type.bit19) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit20 undefined: filled with empty value */
+ if (trace->type.bit20) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* bit21 undefined: filled with empty value */
+ if (trace->type.bit21) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
/* opaque state snapshot */
if (trace->type.bit22) {
if (!sc) {
@@ -791,16 +851,10 @@ void ioam6_fill_trace_data(struct sk_buff *skb,
struct ioam6_schema *sc;
u8 sclen = 0;
- /* Skip if Overflow flag is set OR
- * if an unknown type (bit 12-21) is set
+ /* Skip if Overflow flag is set
*/
- if (trace->overflow ||
- trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
- trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
- trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
- trace->type.bit21) {
+ if (trace->overflow)
return;
- }
/* NodeLen does not include Opaque State Snapshot length. We need to
* take it into account if the corresponding bit is set (bit 22) and
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index f9ee04541c17..9b7b726f8f45 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -75,7 +75,11 @@ static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
u32 fields;
if (!trace->type_be32 || !trace->remlen ||
- trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4 ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21)
return false;
trace->nodelen = 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bec5b22f80d..0371d2c14145 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1378,7 +1378,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
- int sernum = fib6_new_sernum(info->nl_net);
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1478,7 +1477,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (!err) {
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
- __fib6_update_sernum_upto_root(rt, sernum);
+ __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
fib6_start_gc(info->nl_net, rt);
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 12f985f43bcc..2f044a49afa8 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -464,13 +464,14 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
int ip6_forward(struct sk_buff *skb)
{
- struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct inet6_skb_parm *opt = IP6CB(skb);
struct net *net = dev_net(dst->dev);
+ struct inet6_dev *idev;
u32 mtu;
+ idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
if (net->ipv6.devconf_all->forwarding == 0)
goto error;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index de2cf3943b91..a579ea14a69b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
+ acpar.fragoff = 0;
acpar.hotdrop = false;
acpar.state = state;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 733c83d38b30..4ad8b2032f1f 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
static inline bool
segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
{
- bool r;
- pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
- invert ? '!' : ' ', min, id, max);
- r = (id >= min && id <= max) ^ invert;
- pr_debug(" result %s\n", r ? "PASS" : "FAILED");
- return r;
+ return (id >= min && id <= max) ^ invert;
}
static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
@@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
- pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen);
- pr_debug("TYPE %04X ", rh->type);
- pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left);
-
- pr_debug("IPv6 RT segsleft %02X ",
- segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
- rh->segments_left,
- !!(rtinfo->invflags & IP6T_RT_INV_SGS)));
- pr_debug("type %02X %02X %02X ",
- rtinfo->rt_type, rh->type,
- (!(rtinfo->flags & IP6T_RT_TYP) ||
- ((rtinfo->rt_type == rh->type) ^
- !!(rtinfo->invflags & IP6T_RT_INV_TYP))));
- pr_debug("len %02X %04X %02X ",
- rtinfo->hdrlen, hdrlen,
- !(rtinfo->flags & IP6T_RT_LEN) ||
- ((rtinfo->hdrlen == hdrlen) ^
- !!(rtinfo->invflags & IP6T_RT_INV_LEN)));
- pr_debug("res %02X %02X %02X ",
- rtinfo->flags & IP6T_RT_RES,
- ((const struct rt0_hdr *)rh)->reserved,
- !((rtinfo->flags & IP6T_RT_RES) &&
- (((const struct rt0_hdr *)rh)->reserved)));
-
ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1],
rh->segments_left,
!!(rtinfo->invflags & IP6T_RT_INV_SGS))) &&
@@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
reserved),
sizeof(_reserved),
&_reserved);
+ if (!rp) {
+ par->hotdrop = true;
+ return false;
+ }
ret = (*rp == 0);
}
- pr_debug("#%d ", rtinfo->addrnr);
if (!(rtinfo->flags & IP6T_RT_FST)) {
return ret;
} else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) {
- pr_debug("Not strict ");
if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
- pr_debug("There isn't enough space\n");
return false;
} else {
unsigned int i = 0;
- pr_debug("#%d ", rtinfo->addrnr);
for (temp = 0;
temp < (unsigned int)((hdrlen - 8) / 16);
temp++) {
@@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
- if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) {
- pr_debug("i=%d temp=%d;\n", i, temp);
+ if (ipv6_addr_equal(ap, &rtinfo->addrs[i]))
i++;
- }
if (i == rtinfo->addrnr)
break;
}
- pr_debug("i=%d #%d\n", i, rtinfo->addrnr);
if (i == rtinfo->addrnr)
return ret;
else
return false;
}
} else {
- pr_debug("Strict ");
if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) {
- pr_debug("There isn't enough space\n");
return false;
} else {
- pr_debug("#%d ", rtinfo->addrnr);
for (temp = 0; temp < rtinfo->addrnr; temp++) {
ap = skb_header_pointer(skb,
ptr
@@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp]))
break;
}
- pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr);
if (temp == rtinfo->addrnr &&
temp == (unsigned int)((hdrlen - 8) / 16))
return ret;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index a0108415275f..5c47be29b9ee 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -33,7 +33,7 @@
static const char nf_frags_cache_name[] = "nf-frags";
-unsigned int nf_frag_pernet_id __read_mostly;
+static unsigned int nf_frag_pernet_id __read_mostly;
static struct inet_frags nf_frags;
static struct nft_ct_frag6_pernet *nf_frag_pernet(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index e8a59d8bf2ad..cb4eb1d2c620 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -25,8 +25,6 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
-extern unsigned int nf_frag_pernet_id;
-
static DEFINE_MUTEX(defrag6_mutex);
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -91,12 +89,10 @@ static const struct nf_hook_ops ipv6_defrag_ops[] = {
static void __net_exit defrag6_net_exit(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
-
- if (nf_frag->users) {
+ if (net->nf.defrag_ipv6_users) {
nf_unregister_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
- nf_frag->users = 0;
+ net->nf.defrag_ipv6_users = 0;
}
}
@@ -134,24 +130,23 @@ static void __exit nf_defrag_fini(void)
int nf_defrag_ipv6_enable(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
int err = 0;
mutex_lock(&defrag6_mutex);
- if (nf_frag->users == UINT_MAX) {
+ if (net->nf.defrag_ipv6_users == UINT_MAX) {
err = -EOVERFLOW;
goto out_unlock;
}
- if (nf_frag->users) {
- nf_frag->users++;
+ if (net->nf.defrag_ipv6_users) {
+ net->nf.defrag_ipv6_users++;
goto out_unlock;
}
err = nf_register_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
if (err == 0)
- nf_frag->users = 1;
+ net->nf.defrag_ipv6_users = 1;
out_unlock:
mutex_unlock(&defrag6_mutex);
@@ -161,12 +156,10 @@ EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
void nf_defrag_ipv6_disable(struct net *net)
{
- struct nft_ct_frag6_pernet *nf_frag = net_generic(net, nf_frag_pernet_id);
-
mutex_lock(&defrag6_mutex);
- if (nf_frag->users) {
- nf_frag->users--;
- if (nf_frag->users == 0)
+ if (net->nf.defrag_ipv6_users) {
+ net->nf.defrag_ipv6_users--;
+ if (net->nf.defrag_ipv6_users == 0)
nf_unregister_net_hooks(net, ipv6_defrag_ops,
ARRAY_SIZE(ipv6_defrag_ops));
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dbc224023977..9b9ef09382ab 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5681,14 +5681,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
- rt->fib6_nh->fib_nh_weight, AF_INET6) < 0)
+ rt->fib6_nh->fib_nh_weight, AF_INET6,
+ 0) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
sibling->fib6_nh->fib_nh_weight,
- AF_INET6) < 0)
+ AF_INET6, 0) < 0)
goto nla_put_failure;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0ce52d46e4f8..b03dd02c9f13 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -599,6 +599,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
int l3index = 0;
u8 prefixlen;
+ u8 flags;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -609,6 +610,8 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (sin6->sin6_family != AF_INET6)
return -EINVAL;
+ flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
@@ -619,7 +622,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
- if (optname == TCP_MD5SIG_EXT &&
+ if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
@@ -640,9 +643,9 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
AF_INET, prefixlen,
- l3index);
+ l3index, flags);
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index);
+ AF_INET6, prefixlen, l3index, flags);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
@@ -650,12 +653,12 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, l3index,
+ AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen,
GFP_KERNEL);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index,
+ AF_INET6, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
@@ -1404,7 +1407,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, l3index, key->key, key->keylen,
+ AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ea53847b5b7e..8d785232b479 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net,
dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif);
if (!dev_match)
return -1;
- score++;
+ if (sk->sk_bound_dev_if)
+ score++;
if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
@@ -1303,7 +1304,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 53486b162f01..93271a2632b8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -869,8 +869,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
}
if (tunnel->version == L2TP_HDR_VER_3 &&
- l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
+ l2tp_session_dec_refcount(session);
goto invalid;
+ }
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 97095b7c9c64..5dcfd53a4ab6 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -672,7 +672,7 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
u8 *ie, u8 ie_len)
{
struct ieee80211_supported_band *sband;
- const u8 *cap;
+ const struct element *cap;
const struct ieee80211_he_operation *he_oper = NULL;
sband = ieee80211_get_sband(sdata);
@@ -687,9 +687,10 @@ ieee80211_mesh_update_bss_params(struct ieee80211_sub_if_data *sdata,
sdata->vif.bss_conf.he_support = true;
- cap = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ie, ie_len);
- if (cap && cap[1] >= ieee80211_he_oper_size(&cap[3]))
- he_oper = (void *)(cap + 3);
+ cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ie_len);
+ if (cap && cap->datalen >= 1 + sizeof(*he_oper) &&
+ cap->datalen >= 1 + ieee80211_he_oper_size(cap->data + 1))
+ he_oper = (void *)(cap->data + 1);
if (he_oper)
sdata->vif.bss_conf.he_oper.params =
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index efbefcbac3ac..7cab1cf09bf1 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void)
atomic_set(&newtbl->entries, 0);
spin_lock_init(&newtbl->gates_lock);
spin_lock_init(&newtbl->walk_lock);
- rhashtable_init(&newtbl->rhead, &mesh_rht_params);
+ if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) {
+ kfree(newtbl);
+ return NULL;
+ }
return newtbl;
}
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 204830a55240..3fbd0b9ff913 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -2,6 +2,7 @@
/*
* Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
* Copyright 2012-2013, cozybit Inc.
+ * Copyright (C) 2021 Intel Corporation
*/
#include "mesh.h"
@@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
/* only transmit to PS STA with announced, non-zero awake window */
if (test_sta_flag(sta, WLAN_STA_PS_STA) &&
- (!elems->awake_window || !le16_to_cpu(*elems->awake_window)))
+ (!elems->awake_window || !get_unaligned_le16(elems->awake_window)))
return;
if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER))
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index e5935e3d7a07..8c6416129d5b 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -392,10 +392,6 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta,
int mcast_rate;
bool use_basicrate = false;
- if (ieee80211_is_tx_data(txrc->skb) &&
- info->flags & IEEE80211_TX_CTL_NO_ACK)
- return false;
-
if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) {
__rate_control_send_low(txrc->hw, sband, pubsta, info,
txrc->rate_idx_mask);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 99ed68f7dc36..c4071b015c18 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4131,7 +4131,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
- ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
+ !is_valid_ether_addr(hdr->addr2))
return false;
if (ieee80211_is_beacon(hdr->frame_control))
return true;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2d1193ed3eb5..8921088a5df6 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2209,7 +2209,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb,
}
vht_mcs = iterator.this_arg[4] >> 4;
+ if (vht_mcs > 11)
+ vht_mcs = 0;
vht_nss = iterator.this_arg[4] & 0xF;
+ if (!vht_nss || vht_nss > 8)
+ vht_nss = 1;
break;
/*
@@ -3380,6 +3384,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
goto out;
+ /* If n == 2, the "while (*frag_tail)" loop above didn't execute
+ * and frag_tail should be &skb_shinfo(head)->frag_list.
+ * However, ieee80211_amsdu_prepare_head() can reallocate it.
+ * Reload frag_tail to have it pointing to the correct place.
+ */
+ if (n == 2)
+ frag_tail = &skb_shinfo(head)->frag_list;
+
/*
* Pad out the previous subframe to a multiple of 4 by adding the
* padding to the next one, that's being added. Note that head->len
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index bca47fad5a16..4eed23e27610 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 5265525011ad..5ca186d53cb0 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -1083,8 +1083,10 @@ static void __net_exit mctp_routes_net_exit(struct net *net)
{
struct mctp_route *rt;
+ rcu_read_lock();
list_for_each_entry_rcu(rt, &net->mctp.routes, list)
mctp_route_release(rt);
+ rcu_read_unlock();
}
static struct pernet_operations mctp_net_ops = {
diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c
index f48eb6315bbb..292374fb0779 100644
--- a/net/mptcp/mptcp_diag.c
+++ b/net/mptcp/mptcp_diag.c
@@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb,
struct sock *sk;
net = sock_net(in_skb->sk);
- msk = mptcp_token_get_sock(req->id.idiag_cookie[0]);
+ msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]);
if (!msk)
goto out_nosk;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c41273cefc51..f0f22eb4fd5f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -485,11 +485,11 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
mpext = mptcp_get_ext(skb);
data_len = mpext ? mpext->data_len : 0;
- /* we will check ext_copy.data_len in mptcp_write_options() to
+ /* we will check ops->data_len in mptcp_write_options() to
* discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
* TCPOLEN_MPTCP_MPC_ACK
*/
- opts->ext_copy.data_len = data_len;
+ opts->data_len = data_len;
opts->suboptions = OPTION_MPTCP_MPC_ACK;
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
@@ -505,9 +505,9 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
if (opts->csum_reqd) {
/* we need to propagate more info to csum the pseudo hdr */
- opts->ext_copy.data_seq = mpext->data_seq;
- opts->ext_copy.subflow_seq = mpext->subflow_seq;
- opts->ext_copy.csum = mpext->csum;
+ opts->data_seq = mpext->data_seq;
+ opts->subflow_seq = mpext->subflow_seq;
+ opts->csum = mpext->csum;
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
}
*size = ALIGN(len, 4);
@@ -1227,7 +1227,7 @@ static void mptcp_set_rwin(const struct tcp_sock *tp)
WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
}
-static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+static u16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __sum16 sum)
{
struct csum_pseudo_header header;
__wsum csum;
@@ -1237,15 +1237,21 @@ static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
* always the 64-bit value, irrespective of what length is used in the
* DSS option itself.
*/
- header.data_seq = cpu_to_be64(mpext->data_seq);
- header.subflow_seq = htonl(mpext->subflow_seq);
- header.data_len = htons(mpext->data_len);
+ header.data_seq = cpu_to_be64(data_seq);
+ header.subflow_seq = htonl(subflow_seq);
+ header.data_len = htons(data_len);
header.csum = 0;
- csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
+ csum = csum_partial(&header, sizeof(header), ~csum_unfold(sum));
return (__force u16)csum_fold(csum);
}
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
+{
+ return __mptcp_make_csum(mpext->data_seq, mpext->subflow_seq, mpext->data_len,
+ mpext->csum);
+}
+
void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
struct mptcp_out_options *opts)
{
@@ -1337,7 +1343,7 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
len = TCPOLEN_MPTCP_MPC_SYN;
} else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
len = TCPOLEN_MPTCP_MPC_SYNACK;
- } else if (opts->ext_copy.data_len) {
+ } else if (opts->data_len) {
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
if (opts->csum_reqd)
len += TCPOLEN_MPTCP_DSS_CHECKSUM;
@@ -1366,14 +1372,17 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
put_unaligned_be64(opts->rcvr_key, ptr);
ptr += 2;
- if (!opts->ext_copy.data_len)
+ if (!opts->data_len)
goto mp_capable_done;
if (opts->csum_reqd) {
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
- mptcp_make_csum(&opts->ext_copy), ptr);
+ put_unaligned_be32(opts->data_len << 16 |
+ __mptcp_make_csum(opts->data_seq,
+ opts->subflow_seq,
+ opts->data_len,
+ opts->csum), ptr);
} else {
- put_unaligned_be32(opts->ext_copy.data_len << 16 |
+ put_unaligned_be32(opts->data_len << 16 |
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
}
ptr += 1;
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index c4f9a5ce3815..050eea231528 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -1718,9 +1718,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
list_for_each_entry(entry, &pernet->local_addr_list, list) {
if (addresses_equal(&entry->addr, &addr.addr, true)) {
- ret = mptcp_nl_addr_backup(net, &entry->addr, bkup);
- if (ret)
- return ret;
+ mptcp_nl_addr_backup(net, &entry->addr, bkup);
if (bkup)
entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2602f1386160..d073b2111382 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -528,7 +528,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
@@ -742,10 +741,9 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
/* Wake-up the reader only for in-sequence data */
mptcp_data_lock(sk);
- if (move_skbs_to_msk(msk, ssk)) {
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ if (move_skbs_to_msk(msk, ssk))
sk->sk_data_ready(sk);
- }
+
mptcp_data_unlock(sk);
}
@@ -847,7 +845,6 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
sk->sk_shutdown |= RCV_SHUTDOWN;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
sk->sk_data_ready(sk);
}
@@ -1316,7 +1313,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
goto alloc_skb;
}
- must_collapse = (info->size_goal - skb->len > 0) &&
+ must_collapse = (info->size_goal > skb->len) &&
(skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
if (must_collapse) {
size_bias = skb->len;
@@ -1325,7 +1322,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
}
alloc_skb:
- if (!must_collapse && !ssk->sk_tx_skb_cache &&
+ if (!must_collapse &&
!mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
return 0;
@@ -1759,21 +1756,6 @@ out:
return copied ? : ret;
}
-static void mptcp_wait_data(struct sock *sk, long *timeo)
-{
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-
- sk_wait_event(sk, timeo,
- test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
-
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
-}
-
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
size_t len, int flags,
@@ -2077,19 +2059,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld", timeo);
- mptcp_wait_data(sk, &timeo);
- }
-
- if (skb_queue_empty_lockless(&sk->sk_receive_queue) &&
- skb_queue_empty(&msk->receive_queue)) {
- /* entire backlog drained, clear DATA_READY. */
- clear_bit(MPTCP_DATA_READY, &msk->flags);
-
- /* .. race-breaker: ssk might have gotten new data
- * after last __mptcp_move_skbs() returned false.
- */
- if (unlikely(__mptcp_move_skbs(msk)))
- set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk_wait_data(sk, &timeo, NULL);
}
out_err:
@@ -2098,9 +2068,9 @@ out_err:
tcp_recv_timestamp(msg, sk, &tss);
}
- pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
- msk, test_bit(MPTCP_DATA_READY, &msk->flags),
- skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
+ pr_debug("msk=%p rx queue empty=%d:%d copied=%d",
+ msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
+ skb_queue_empty(&msk->receive_queue), copied);
if (!(flags & MSG_PEEK))
mptcp_rcv_space_adjust(msk, copied);
@@ -2368,7 +2338,6 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
inet_sk_state_store(sk, TCP_CLOSE);
sk->sk_shutdown = SHUTDOWN_MASK;
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
mptcp_close_wake_up(sk);
@@ -2735,7 +2704,7 @@ cleanup:
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow = lock_sock_fast(ssk);
+ bool slow = lock_sock_fast_nested(ssk);
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
@@ -3385,8 +3354,14 @@ unlock_fail:
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
{
- return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM :
- 0;
+ /* Concurrent splices from sk_receive_queue into receive_queue will
+ * always show at least one non-empty queue when checked in this order.
+ */
+ if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
+ skb_queue_empty_lockless(&msk->receive_queue))
+ return 0;
+
+ return EPOLLIN | EPOLLRDNORM;
}
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
@@ -3421,7 +3396,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN)
- return mptcp_check_readable(msk);
+ return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : 0;
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3e6fd1615f1..dc984676c5eb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -709,7 +709,7 @@ int mptcp_token_new_connect(struct sock *sk);
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
struct mptcp_sock *msk);
bool mptcp_token_exists(u32 token);
-struct mptcp_sock *mptcp_token_get_sock(u32 token);
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token);
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
long *s_num);
void mptcp_token_destroy(struct mptcp_sock *msk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 1de7ce883c37..6172f380dfb7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -86,7 +86,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
struct mptcp_sock *msk;
int local_id;
- msk = mptcp_token_get_sock(subflow_req->token);
+ msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token);
if (!msk) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
return NULL;
diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c
index 37127781aee9..7f22526346a7 100644
--- a/net/mptcp/syncookies.c
+++ b/net/mptcp/syncookies.c
@@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
e->valid = 0;
- msk = mptcp_token_get_sock(e->token);
+ msk = mptcp_token_get_sock(net, e->token);
if (!msk) {
spin_unlock_bh(&join_entry_locks[i]);
return false;
}
- /* If this fails, the token got re-used in the mean time by another
- * mptcp socket in a different netns, i.e. entry is outdated.
- */
- if (!net_eq(sock_net((struct sock *)msk), net))
- goto err_put;
-
subflow_req->remote_nonce = e->remote_nonce;
subflow_req->local_nonce = e->local_nonce;
subflow_req->backup = e->backup;
@@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl
subflow_req->msk = msk;
spin_unlock_bh(&join_entry_locks[i]);
return true;
-
-err_put:
- spin_unlock_bh(&join_entry_locks[i]);
- sock_put((struct sock *)msk);
- return false;
}
void __init mptcp_join_cookie_init(void)
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index a98e554b034f..e581b341c5be 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -231,6 +231,7 @@ found:
/**
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
+ * @net: restrict to this namespace
* @token: token of the mptcp connection to retrieve
*
* This function returns the mptcp connection structure with the given token.
@@ -238,7 +239,7 @@ found:
*
* returns NULL if no connection with the given token value exists.
*/
-struct mptcp_sock *mptcp_token_get_sock(u32 token)
+struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token)
{
struct hlist_nulls_node *pos;
struct token_bucket *bucket;
@@ -251,11 +252,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token)
again:
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
msk = mptcp_sk(sk);
- if (READ_ONCE(msk->token) != token)
+ if (READ_ONCE(msk->token) != token ||
+ !net_eq(sock_net(sk), net))
continue;
+
if (!refcount_inc_not_zero(&sk->sk_refcnt))
goto not_found;
- if (READ_ONCE(msk->token) != token) {
+
+ if (READ_ONCE(msk->token) != token ||
+ !net_eq(sock_net(sk), net)) {
sock_put(sk);
goto again;
}
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index e1bd6f0a0676..5d984bec1cd8 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test)
GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req);
mptcp_token_init_request((struct request_sock *)req);
+ sock_net_set((struct sock *)req, &init_net);
return req;
}
@@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test)
KUNIT_ASSERT_EQ(test, 0,
mptcp_token_new_request((struct request_sock *)req));
KUNIT_EXPECT_NE(test, 0, (int)req->token);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token));
/* cleanup */
mptcp_token_destroy_request((struct request_sock *)req);
@@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test)
msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER);
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
+ sock_net_set((struct sock *)msk, &init_net);
return msk;
}
@@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test)
mptcp_token_new_connect((struct sock *)icsk));
KUNIT_EXPECT_NE(test, 0, (int)ctx->token);
KUNIT_EXPECT_EQ(test, ctx->token, msk->token);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token));
KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt));
mptcp_token_destroy(msk);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token));
}
static void mptcp_token_test_accept(struct kunit *test)
@@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test)
mptcp_token_new_request((struct request_sock *)req));
msk->token = req->token;
mptcp_token_accept(req, msk);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
/* this is now a no-op */
mptcp_token_destroy_request((struct request_sock *)req);
- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token));
/* cleanup */
mptcp_token_destroy(msk);
@@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test)
/* simulate race on removal */
refcount_set(&sk->sk_refcnt, 0);
- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token));
+ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token));
/* cleanup */
mptcp_token_destroy(msk);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54395266339d..92a747896f80 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -109,7 +109,7 @@ config NF_CONNTRACK_MARK
config NF_CONNTRACK_SECMARK
bool 'Connection tracking security mark support'
depends on NETWORK_SECMARK
- default m if NETFILTER_ADVANCED=n
+ default y if NETFILTER_ADVANCED=n
help
This option enables security markings to be applied to
connections. Typically they are copied to connections from
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 6186358eac7c..6e391308431d 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -130,11 +130,11 @@ htable_size(u8 hbits)
{
size_t hsize;
- /* We must fit both into u32 in jhash and size_t */
+ /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+ if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index c100c6b112c8..2c467c422dc6 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void)
int idx;
/* Compute size and mask */
+ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+ pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
+ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+ }
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c25097092a06..29ec3ef63edc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -4090,6 +4090,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Global sysctls must be ro in non-init netns */
+ if (!net_eq(net, &init_net))
+ tbl[idx++].mode = 0444;
+#endif
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 94e18fb9690d..770a63103c7a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
-#define MAX_CHAINLEN 64u
+#define MIN_CHAINLEN 8u
+#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
static struct conntrack_gc_work conntrack_gc_work;
@@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
const struct net *net)
{
struct {
struct nf_conntrack_man src;
union nf_inet_addr dst_addr;
+ unsigned int zone;
u32 net_mix;
u16 dport;
u16 proto;
@@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
/* The direction must be ignored, so handle usable members manually. */
combined.src = tuple->src;
combined.dst_addr = tuple->dst.u3;
+ combined.zone = zoneid;
combined.net_mix = net_hash_mix(net);
combined.dport = (__force __u16)tuple->dst.u.all;
combined.proto = tuple->dst.protonum;
@@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
static u32 __hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
unsigned int size)
{
- return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+ return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
}
static u32 hash_conntrack(const struct net *net,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid)
{
- return scale_hash(hash_conntrack_raw(tuple, net));
+ return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
}
static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
clean_from_lists(ct);
@@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
- return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, net));
+ unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ struct nf_conntrack_tuple_hash *thash;
+
+ thash = __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, zone_id, net));
+
+ if (thash)
+ return thash;
+
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (rid != zone_id)
+ return __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, rid, net));
+ return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
+ unsigned int max_chainlen;
unsigned int chainlen = 0;
unsigned int sequence;
int err = -EEXIST;
@@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1103,8 +1131,8 @@ drop:
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
+ unsigned int chainlen = 0, sequence, max_chainlen;
const struct nf_conntrack_zone *zone;
- unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
@@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN) {
+ if (chainlen++ > max_chainlen) {
chaintoolong:
nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
rcu_read_lock();
begin:
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = __hash_conntrack(net, tuple, hsize);
+ hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
+ u32 hash, zone_id, rid;
struct nf_conn *ct;
- u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
- hash = hash_conntrack_raw(&tuple, state->net);
+
+ zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ hash = hash_conntrack_raw(&tuple, zone_id, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+ if (!h) {
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (zone_id != rid) {
+ u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+ h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+ }
+ }
+
if (!h) {
h = init_conntrack(state->net, tmpl, &tuple,
skb, dataoff, hash);
@@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
spinlock_t *lockp;
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+ struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+ if (hlist_nulls_empty(hslot))
+ continue;
+
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
- if (*bucket < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
- continue;
- /* All nf_conn objects are added to hash table twice, one
- * for original direction tuple, once for the reply tuple.
- *
- * Exception: In the IPS_NAT_CLASH case, only the reply
- * tuple is added (the original tuple already existed for
- * a different object).
- *
- * We only need to call the iterator once for each
- * conntrack, so we just use the 'reply' direction
- * tuple while iterating.
- */
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- goto found;
- }
+ hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+ continue;
+ /* All nf_conn objects are added to hash table twice, one
+ * for original direction tuple, once for the reply tuple.
+ *
+ * Exception: In the IPS_NAT_CLASH case, only the reply
+ * tuple is added (the original tuple already existed for
+ * a different object).
+ *
+ * We only need to call the iterator once for each
+ * conntrack, so we just use the 'reply' direction
+ * tuple while iterating.
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
}
spin_unlock(lockp);
local_bh_enable();
@@ -2264,26 +2308,20 @@ found:
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
- unsigned int bucket = 0, sequence;
+ unsigned int bucket = 0;
struct nf_conn *ct;
might_sleep();
- for (;;) {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
-
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
+ mutex_lock(&nf_conntrack_mutex);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ /* Time to push up daises... */
- nf_ct_delete(ct, portid, report);
- nf_ct_put(ct);
- cond_resched();
- }
-
- if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
- break;
- bucket = 0;
+ nf_ct_delete(ct, portid, report);
+ nf_ct_put(ct);
+ cond_resched();
}
+ mutex_unlock(&nf_conntrack_mutex);
}
struct iter_data {
@@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
if (!hash)
return -ENOMEM;
+ mutex_lock(&nf_conntrack_mutex);
old_size = nf_conntrack_htable_size;
if (old_size == hashsize) {
+ mutex_unlock(&nf_conntrack_mutex);
kvfree(hash);
return 0;
}
@@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
for (i = 0; i < nf_conntrack_htable_size; i++) {
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+ unsigned int zone_id;
+
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
+
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
bucket = __hash_conntrack(nf_ct_net(ct),
- &h->tuple, hashsize);
+ &h->tuple, zone_id, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
@@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
nf_conntrack_all_unlock();
local_bh_enable();
+ mutex_unlock(&nf_conntrack_mutex);
+
synchronize_net();
kvfree(old_hash);
return 0;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7008961f5cb0..273117683922 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -150,13 +150,16 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
/* We keep an extra hash for each conntrack, for fast searching. */
static unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net,
+ const struct nf_conntrack_zone *zone,
+ const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
struct {
struct nf_conntrack_man src;
u32 net_mix;
u32 protonum;
+ u32 zone;
} __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
@@ -165,9 +168,13 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
/* Original src, to ensure we map it consistently if poss. */
combined.src = tuple->src;
- combined.net_mix = net_hash_mix(n);
+ combined.net_mix = net_hash_mix(net);
combined.protonum = tuple->dst.protonum;
+ /* Zone ID can be used provided its valid for both directions */
+ if (zone->dir == NF_CT_DEFAULT_ZONE_DIR)
+ combined.zone = zone->id;
+
hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
return reciprocal_scale(hash, nf_nat_htable_size);
@@ -272,7 +279,7 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range2 *range)
{
- unsigned int h = hash_by_src(net, tuple);
+ unsigned int h = hash_by_src(net, zone, tuple);
const struct nf_conn *ct;
hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
@@ -619,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
unsigned int srchash;
spinlock_t *lock;
- srchash = hash_by_src(net,
+ srchash = hash_by_src(net, nf_ct_zone(ct),
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS];
spin_lock_bh(lock);
@@ -788,7 +795,7 @@ static void __nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
unsigned int h;
- h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ h = hash_by_src(nf_ct_net(ct), nf_ct_zone(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]);
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
index 8e8a65d46345..acd73f717a08 100644
--- a/net/netfilter/nf_nat_masquerade.c
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -9,8 +9,19 @@
#include <net/netfilter/nf_nat_masquerade.h>
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ union nf_inet_addr addr;
+ int ifindex;
+ int (*iter)(struct nf_conn *i, void *data);
+};
+
+#define MAX_MASQ_WORKER_COUNT 16
+
static DEFINE_MUTEX(masq_mutex);
static unsigned int masq_refcnt __read_mostly;
+static atomic_t masq_worker_count __read_mostly;
unsigned int
nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
@@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
-static int device_cmp(struct nf_conn *i, void *ifindex)
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&masq_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* Iterate conntrack table in the background and remove conntrack entries
+ * that use the device/address being removed.
+ *
+ * In case too many work items have been queued already or memory allocation
+ * fails iteration is skipped, conntrack entries will time out eventually.
+ */
+static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
+ int ifindex,
+ int (*iter)(struct nf_conn *i, void *data),
+ gfp_t gfp_flags)
+{
+ struct masq_dev_work *w;
+
+ if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
+ return;
+
+ net = maybe_get_net(net);
+ if (!net)
+ return;
+
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kzalloc(sizeof(*w), gfp_flags);
+ if (w) {
+ /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
+ atomic_inc(&masq_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = ifindex;
+ w->net = net;
+ w->iter = iter;
+ if (addr)
+ w->addr = *addr;
+ schedule_work(&w->work);
+ return;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+}
+
+static int device_cmp(struct nf_conn *i, void *arg)
{
const struct nf_conn_nat *nat = nfct_nat(i);
+ const struct masq_dev_work *w = arg;
if (!nat)
return 0;
- return nat->masq_index == (int)(long)ifindex;
+ return nat->masq_index == w->ifindex;
}
static int masq_device_event(struct notifier_block *this,
@@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this,
* and forget them.
*/
- nf_ct_iterate_cleanup_net(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
+ nf_nat_masq_schedule(net, NULL, dev->ifindex,
+ device_cmp, GFP_KERNEL);
}
return NOTIFY_DONE;
@@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this,
static int inet_cmp(struct nf_conn *ct, void *ptr)
{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct net_device *dev = ifa->ifa_dev->dev;
struct nf_conntrack_tuple *tuple;
+ struct masq_dev_work *w = ptr;
- if (!device_cmp(ct, (void *)(long)dev->ifindex))
+ if (!device_cmp(ct, ptr))
return 0;
tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- return ifa->ifa_address == tuple->dst.u3.ip;
+ return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
}
static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
- struct net *net = dev_net(idev->dev);
+ const struct in_ifaddr *ifa = ptr;
+ const struct in_device *idev;
+ const struct net_device *dev;
+ union nf_inet_addr addr;
+
+ if (event != NETDEV_DOWN)
+ return NOTIFY_DONE;
/* The masq_dev_notifier will catch the case of the device going
* down. So if the inetdev is dead and being destroyed we have
* no work to do. Otherwise this is an individual address removal
* and we have to perform the flush.
*/
+ idev = ifa->ifa_dev;
if (idev->dead)
return NOTIFY_DONE;
- if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+ memset(&addr, 0, sizeof(addr));
+
+ addr.ip = ifa->ifa_address;
+
+ dev = idev->dev;
+ nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
+ inet_cmp, GFP_KERNEL);
return NOTIFY_DONE;
}
@@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = {
};
#if IS_ENABLED(CONFIG_IPV6)
-static atomic_t v6_worker_count __read_mostly;
-
static int
nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
const struct in6_addr *daddr, unsigned int srcprefs,
@@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
-struct masq_dev_work {
- struct work_struct work;
- struct net *net;
- struct in6_addr addr;
- int ifindex;
-};
-
-static int inet6_cmp(struct nf_conn *ct, void *work)
-{
- struct masq_dev_work *w = (struct masq_dev_work *)work;
- struct nf_conntrack_tuple *tuple;
-
- if (!device_cmp(ct, (void *)(long)w->ifindex))
- return 0;
-
- tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
- return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
-}
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
- struct masq_dev_work *w;
-
- w = container_of(work, struct masq_dev_work, work);
-
- nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
-
- put_net(w->net);
- kfree(w);
- atomic_dec(&v6_worker_count);
- module_put(THIS_MODULE);
-}
-
/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
*
* Defer it to the system workqueue.
@@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this,
{
struct inet6_ifaddr *ifa = ptr;
const struct net_device *dev;
- struct masq_dev_work *w;
- struct net *net;
+ union nf_inet_addr addr;
- if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+ if (event != NETDEV_DOWN)
return NOTIFY_DONE;
dev = ifa->idev->dev;
- net = maybe_get_net(dev_net(dev));
- if (!net)
- return NOTIFY_DONE;
- if (!try_module_get(THIS_MODULE))
- goto err_module;
+ memset(&addr, 0, sizeof(addr));
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- atomic_inc(&v6_worker_count);
-
- INIT_WORK(&w->work, iterate_cleanup_work);
- w->ifindex = dev->ifindex;
- w->net = net;
- w->addr = ifa->addr;
- schedule_work(&w->work);
+ addr.in6 = ifa->addr;
- return NOTIFY_DONE;
- }
-
- module_put(THIS_MODULE);
- err_module:
- put_net(net);
+ nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
+ GFP_ATOMIC);
return NOTIFY_DONE;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 081437dd75b7..c0851fec11d4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -780,6 +780,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -790,8 +791,11 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_table_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table);
+ event, flags, ctx->family, ctx->table);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -1563,6 +1567,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct nftables_pernet *nft_net;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -1573,8 +1578,11 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_chain_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table,
+ event, flags, ctx->family, ctx->table,
ctx->chain);
if (err < 0) {
kfree_skb(skb);
@@ -2866,8 +2874,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
u32 flags, int family,
const struct nft_table *table,
const struct nft_chain *chain,
- const struct nft_rule *rule,
- const struct nft_rule *prule)
+ const struct nft_rule *rule, u64 handle)
{
struct nlmsghdr *nlh;
const struct nft_expr *expr, *next;
@@ -2887,9 +2894,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
NFTA_RULE_PAD))
goto nla_put_failure;
- if (event != NFT_MSG_DELRULE && prule) {
- if (nla_put_be64(skb, NFTA_RULE_POSITION,
- cpu_to_be64(prule->handle),
+ if (event != NFT_MSG_DELRULE && handle) {
+ if (nla_put_be64(skb, NFTA_RULE_POSITION, cpu_to_be64(handle),
NFTA_RULE_PAD))
goto nla_put_failure;
}
@@ -2925,7 +2931,10 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
const struct nft_rule *rule, int event)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
+ const struct nft_rule *prule;
struct sk_buff *skb;
+ u64 handle = 0;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -2936,9 +2945,20 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (event == NFT_MSG_NEWRULE &&
+ !list_is_first(&rule->list, &ctx->chain->rules) &&
+ !list_is_last(&rule->list, &ctx->chain->rules)) {
+ prule = list_prev_entry(rule, list);
+ handle = prule->handle;
+ }
+ if (ctx->flags & (NLM_F_APPEND | NLM_F_REPLACE))
+ flags |= NLM_F_APPEND;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq,
- event, 0, ctx->family, ctx->table,
- ctx->chain, rule, NULL);
+ event, flags, ctx->family, ctx->table,
+ ctx->chain, rule, handle);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -2964,6 +2984,7 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
const struct nft_rule *rule, *prule;
unsigned int s_idx = cb->args[0];
+ u64 handle;
prule = NULL;
list_for_each_entry_rcu(rule, &chain->rules, list) {
@@ -2975,12 +2996,17 @@ static int __nf_tables_dump_rules(struct sk_buff *skb,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
}
+ if (prule)
+ handle = prule->handle;
+ else
+ handle = 0;
+
if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFT_MSG_NEWRULE,
NLM_F_MULTI | NLM_F_APPEND,
table->family,
- table, chain, rule, prule) < 0)
+ table, chain, rule, handle) < 0)
return 1;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -3143,7 +3169,7 @@ static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid,
info->nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0,
- family, table, chain, rule, NULL);
+ family, table, chain, rule, 0);
if (err < 0)
goto err_fill_rule_info;
@@ -3403,17 +3429,15 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
+ err = nft_delrule(&ctx, old_rule);
+ if (err < 0)
+ goto err_destroy_flow_rule;
+
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
goto err_destroy_flow_rule;
}
- err = nft_delrule(&ctx, old_rule);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_destroy_flow_rule;
- }
-
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
@@ -3943,8 +3967,9 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
gfp_t gfp_flags)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
- struct sk_buff *skb;
u32 portid = ctx->portid;
+ struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -3955,7 +3980,10 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
- err = nf_tables_fill_set(skb, ctx, set, event, 0);
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
+ err = nf_tables_fill_set(skb, ctx, set, event, flags);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -4336,7 +4364,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
alloc_size = sizeof(*set) + size + udlen;
- if (alloc_size < size)
+ if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
@@ -5231,12 +5259,13 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
const struct nft_set *set,
const struct nft_set_elem *elem,
- int event, u16 flags)
+ int event)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
u32 portid = ctx->portid;
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
@@ -5246,6 +5275,9 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
set, elem);
if (err < 0) {
@@ -6921,7 +6953,7 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
- int family, int report, gfp_t gfp)
+ u16 flags, int family, int report, gfp_t gfp)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct sk_buff *skb;
@@ -6946,8 +6978,9 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
if (skb == NULL)
goto err;
- err = nf_tables_fill_obj_info(skb, net, portid, seq, event, 0, family,
- table, obj, false);
+ err = nf_tables_fill_obj_info(skb, net, portid, seq, event,
+ flags & (NLM_F_CREATE | NLM_F_EXCL),
+ family, table, obj, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -6964,7 +6997,7 @@ static void nf_tables_obj_notify(const struct nft_ctx *ctx,
struct nft_object *obj, int event)
{
nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
- ctx->family, ctx->report, GFP_KERNEL);
+ ctx->flags, ctx->family, ctx->report, GFP_KERNEL);
}
/*
@@ -7745,6 +7778,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
struct sk_buff *skb;
+ u16 flags = 0;
int err;
if (!ctx->report &&
@@ -7755,8 +7789,11 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
if (skb == NULL)
goto err;
+ if (ctx->flags & (NLM_F_CREATE | NLM_F_EXCL))
+ flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
+
err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
- ctx->seq, event, 0,
+ ctx->seq, event, flags,
ctx->family, flowtable, hook_list);
if (err < 0) {
kfree_skb(skb);
@@ -8634,7 +8671,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_setelem_activate(net, te->set, &te->elem);
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_NEWSETELEM, 0);
+ NFT_MSG_NEWSETELEM);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
@@ -8642,7 +8679,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_DELSETELEM, 0);
+ NFT_MSG_DELSETELEM);
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem)) {
atomic_dec(&te->set->nelems);
@@ -9599,7 +9636,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
table->use--;
nf_tables_chain_destroy(&ctx);
}
- list_del(&table->list);
nf_tables_table_destroy(&ctx);
}
@@ -9612,6 +9648,8 @@ static void __nft_release_tables(struct net *net)
if (nft_table_has_owner(table))
continue;
+ list_del(&table->list);
+
__nft_release_table(net, table);
}
}
@@ -9619,31 +9657,38 @@ static void __nft_release_tables(struct net *net)
static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
+ struct nft_table *table, *to_delete[8];
struct nftables_pernet *nft_net;
struct netlink_notify *n = ptr;
- struct nft_table *table, *nt;
struct net *net = n->net;
- bool release = false;
+ unsigned int deleted;
+ bool restart = false;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
nft_net = nft_pernet(net);
+ deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
n->portid == table->nlpid) {
__nft_release_hook(net, table);
- release = true;
+ list_del_rcu(&table->list);
+ to_delete[deleted++] = table;
+ if (deleted >= ARRAY_SIZE(to_delete))
+ break;
}
}
- if (release) {
+ if (deleted) {
+ restart = deleted >= ARRAY_SIZE(to_delete);
synchronize_rcu();
- list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
- if (nft_table_has_owner(table) &&
- n->portid == table->nlpid)
- __nft_release_table(net, table);
- }
+ while (deleted)
+ __nft_release_table(net, to_delete[--deleted]);
+
+ if (restart)
+ goto again;
}
mutex_unlock(&nft_net->commit_mutex);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 5b02408a920b..3ced0eb6b7c3 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -342,12 +342,6 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
return;
}
- /* UNREGISTER events are also happening on netns exit.
- *
- * Although nf_tables core releases all tables/chains, only this event
- * handler provides guarantee that hook->ops.dev is still accessible,
- * so we cannot skip exiting net namespaces.
- */
__nft_release_basechain(ctx);
}
@@ -366,6 +360,9 @@ static int nf_tables_netdev_event(struct notifier_block *this,
event != NETDEV_CHANGENAME)
return NOTIFY_DONE;
+ if (!check_net(ctx.net))
+ return NOTIFY_DONE;
+
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 272bcdb1392d..f69cc73c5813 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,6 +19,7 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_log.h>
/* Used for matches where *info is larger than X byte */
#define NFT_MATCH_LARGE_THRESH 192
@@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_compat_wait_for_destructors();
ret = xt_check_target(&par, size, proto, inv);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ const char *modname = NULL;
+
+ if (strcmp(target->name, "LOG") == 0)
+ modname = "nf_log_syslog";
+ else if (strcmp(target->name, "NFLOG") == 0)
+ modname = "nfnetlink_log";
+
+ if (modname &&
+ nft_request_module(ctx->net, "%s", modname) == -EAGAIN)
+ return -EAGAIN;
+ }
+
return ret;
+ }
/* The standard target cannot be used */
if (!target->target)
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0363f533a42b..c4d1389f7185 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -60,7 +60,7 @@ static void nft_quota_obj_eval(struct nft_object *obj,
if (overquota &&
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
- NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
+ NFT_MSG_NEWOBJ, 0, nft_pf(pkt), 0, GFP_ATOMIC);
}
static int nft_quota_do_init(const struct nlattr * const tb[],
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index 7b2f359bfce4..2f7cf5ecebf4 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -137,7 +137,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info)
{
int ret;
- info->timer = kmalloc(sizeof(*info->timer), GFP_KERNEL);
+ info->timer = kzalloc(sizeof(*info->timer), GFP_KERNEL);
if (!info->timer) {
ret = -ENOMEM;
goto out;
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index 2ff75f7637b0..f39244f9c0ed 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int log_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
+ int ret;
if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6)
return -EINVAL;
@@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ if (ret != 0 && !par->nft_compat) {
+ request_module("%s", "nf_log_syslog");
+
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG);
+ }
+
+ return ret;
}
static void log_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb5793208059..e660c3710a10 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
static int nflog_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
+ int ret;
if (info->flags & ~XT_NFLOG_MASK)
return -EINVAL;
if (info->prefix[sizeof(info->prefix) - 1] != '\0')
return -EINVAL;
- return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ if (ret != 0 && !par->nft_compat) {
+ request_module("%s", "nfnetlink_log");
+
+ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG);
+ }
+
+ return ret;
}
static void nflog_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 24b7cf447bc5..ada47e59647a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -594,7 +594,10 @@ static int netlink_insert(struct sock *sk, u32 portid)
/* We need to ensure that the socket is hashed and visible. */
smp_wmb();
- nlk_sk(sk)->bound = portid;
+ /* Paired with lockless reads from netlink_bind(),
+ * netlink_connect() and netlink_sendmsg().
+ */
+ WRITE_ONCE(nlk_sk(sk)->bound, portid);
err:
release_sock(sk);
@@ -1012,7 +1015,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (nlk->ngroups < BITS_PER_LONG)
groups &= (1UL << nlk->ngroups) - 1;
- bound = nlk->bound;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ bound = READ_ONCE(nlk->bound);
if (bound) {
/* Ensure nlk->portid is up-to-date. */
smp_rmb();
@@ -1098,8 +1102,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
/* No need for barriers here as we return to user-space without
* using any of the bound attributes.
+ * Paired with WRITE_ONCE() in netlink_insert().
*/
- if (!nlk->bound)
+ if (!READ_ONCE(nlk->bound))
err = netlink_autobind(sock);
if (err == 0) {
@@ -1888,7 +1893,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dst_group = nlk->dst_group;
}
- if (!nlk->bound) {
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ if (!READ_ONCE(nlk->bound)) {
err = netlink_autobind(sock);
if (err)
goto out;
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index 6024fad905ff..dda323e0a473 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -60,6 +60,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto)
proto_tab[nfc_proto->id] = nfc_proto;
write_unlock(&proto_tab_lock);
+ if (rc)
+ proto_unregister(nfc_proto->proto);
+
return rc;
}
EXPORT_SYMBOL(nfc_proto_register);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index fefc03674f4f..d63d2e5dc60c 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -277,6 +277,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param)
static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
{
struct digital_tg_mdaa_params *params;
+ int rc;
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
@@ -291,8 +292,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2);
params->sc = DIGITAL_SENSF_FELICA_SC;
- return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
- 500, digital_tg_recv_atr_req, NULL);
+ rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
+ 500, digital_tg_recv_atr_req, NULL);
+ if (rc)
+ kfree(params);
+
+ return rc;
}
static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech)
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 84d2345c75a3..3adf4589852a 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -465,8 +465,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
skb_put_u8(skb, sel_cmd);
skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR);
- return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
- target);
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
}
static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg,
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index a2e72c003805..b911ab78bed9 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -334,6 +334,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
ndev->cur_conn_id);
if (conn_info) {
list_del(&conn_info->list);
+ if (conn_info == ndev->rf_conn_info)
+ ndev->rf_conn_info = NULL;
devm_kfree(&ndev->nfc_dev->dev, conn_info);
}
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 543365f58e97..2a2bc64f75cf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -46,6 +46,8 @@
* Copyright (C) 2011, <lokec@ccs.neu.edu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/ethtool.h>
#include <linux/types.h>
#include <linux/mm.h>
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index ad9df0cb4b98..90866ae45573 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -960,6 +960,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
tmpl = p->tmpl;
tcf_lastuse_update(&c->tcf_tm);
+ tcf_action_update_bstats(&c->common, skb);
if (clear) {
qdisc_skb_cb(skb)->post_ct = false;
@@ -1049,7 +1050,6 @@ out_push:
qdisc_skb_cb(skb)->post_ct = true;
out_clear:
- tcf_action_update_bstats(&c->common, skb);
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 23b21253b3c3..eb6345a027e1 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2188,18 +2188,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
arg->count = arg->skip;
+ rcu_read_lock();
idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) {
/* don't return filters that are being deleted */
if (!refcount_inc_not_zero(&f->refcnt))
continue;
+ rcu_read_unlock();
+
if (arg->fn(tp, f, arg) < 0) {
__fl_put(f);
arg->stop = 1;
+ rcu_read_lock();
break;
}
__fl_put(f);
arg->count++;
+ rcu_read_lock();
}
+ rcu_read_unlock();
arg->cookie = id;
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 5e90e9b160e3..12f39a2dffd4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -513,6 +513,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return stab;
}
+ if (s->size_log > STAB_SIZE_LOG_MAX ||
+ s->cell_log > STAB_SIZE_LOG_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
+ return ERR_PTR(-EINVAL);
+ }
+
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index a579a4131d22..e1040421b797 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -233,6 +233,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
+ if (!q->ops->change)
+ return 0;
+
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 8766ab5b8788..5eb3b1b7ae5e 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -529,22 +529,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
- struct gnet_stats_queue __percpu *cpu_qstats = NULL;
spin_lock_bh(qdisc_lock(qdisc));
+
if (qdisc_is_percpu_stats(qdisc)) {
- cpu_bstats = qdisc->cpu_bstats;
- cpu_qstats = qdisc->cpu_qstats;
+ qlen = qdisc_qlen_sum(qdisc);
+
+ __gnet_stats_copy_basic(NULL, &bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats,
+ qlen);
+ } else {
+ qlen += qdisc->q.qlen;
+ bstats.bytes += qdisc->bstats.bytes;
+ bstats.packets += qdisc->bstats.packets;
+ qstats.backlog += qdisc->qstats.backlog;
+ qstats.drops += qdisc->qstats.drops;
+ qstats.requeues += qdisc->qstats.requeues;
+ qstats.overlimits += qdisc->qstats.overlimits;
}
-
- qlen = qdisc_qlen_sum(qdisc);
- __gnet_stats_copy_basic(NULL, &sch->bstats,
- cpu_bstats, &qdisc->bstats);
- __gnet_stats_copy_queue(&sch->qstats,
- cpu_qstats,
- &qdisc->qstats,
- qlen);
spin_unlock_bh(qdisc_lock(qdisc));
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 1ab2fc933a21..b9fd18d98646 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1641,6 +1641,10 @@ static void taprio_destroy(struct Qdisc *sch)
list_del(&q->taprio_list);
spin_unlock(&taprio_list_lock);
+ /* Note that taprio_reset() might not be called if an error
+ * happens in qdisc_create(), after taprio_init() has been called.
+ */
+ hrtimer_cancel(&q->advance_timer);
taprio_disable_offload(dev, q, NULL);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5ef86fdb1176..1f1786021d9c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -702,7 +702,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
/* Break out if chunk length is less then minimal. */
- if (ntohs(ch->length) < sizeof(_ch))
+ if (!ch || ntohs(ch->length) < sizeof(_ch))
break;
ch_end = offset + SCTP_PAD4(ntohs(ch->length));
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index b8fa8f1a7277..c7503fd64915 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -3697,7 +3697,7 @@ struct sctp_chunk *sctp_make_strreset_req(
outlen = (sizeof(outreq) + stream_len) * out;
inlen = (sizeof(inreq) + stream_len) * in;
- retval = sctp_make_reconf(asoc, outlen + inlen);
+ retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen));
if (!retval)
return NULL;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 32df65f68c12..fb3da4d8f4a3 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -156,6 +156,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
void *arg,
struct sctp_cmd_seq *commands);
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands);
+
/* Small helper function that checks if the chunk length
* is of the appropriate length. The 'required_length' argument
* is set to be the size of a specific chunk we are testing.
@@ -337,6 +343,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (!chunk->singleton)
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Make sure that the INIT chunk has a valid length.
+ * Normally, this would cause an ABORT with a Protocol Violation
+ * error, but since we don't have an association, we'll
+ * just discard the packet.
+ */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
@@ -351,14 +365,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (chunk->sctp_hdr->vtag != 0)
return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
- /* Make sure that the INIT chunk has a valid length.
- * Normally, this would cause an ABORT with a Protocol Violation
- * error, but since we don't have an association, we'll
- * just discard the packet.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* If the INIT is coming toward a closing socket, we'll send back
* and ABORT. Essentially, this catches the race of INIT being
* backloged to the socket at the same time as the user issues close().
@@ -704,6 +710,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
struct sock *sk;
int error = 0;
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
@@ -718,7 +727,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
* in sctp_unpack_cookie().
*/
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
/* If the endpoint is not listening or if the number of associations
* on the TCP-style socket exceed the max backlog, respond with an
@@ -1524,20 +1534,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
if (!chunk->singleton)
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Make sure that the INIT chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
- /* Make sure that the INIT chunk has a valid length.
- * In this case, we generate a protocol violation since we have
- * an association established.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
-
if (SCTP_INPUT_CB(chunk->skb)->encap_port != chunk->transport->encap_port)
return sctp_sf_new_encap_port(net, ep, asoc, type, arg, commands);
@@ -1882,9 +1888,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
* its peer.
*/
if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
- disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
- SCTP_ST_CHUNK(chunk->chunk_hdr->type),
- chunk, commands);
+ disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc,
+ SCTP_ST_CHUNK(chunk->chunk_hdr->type),
+ chunk, commands);
if (SCTP_DISPOSITION_NOMEM == disposition)
goto nomem;
@@ -2202,9 +2208,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
* enough for the chunk header. Cookie length verification is
* done later.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) {
+ if (!sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands);
+ }
/* "Decode" the chunk. We have no optional parameters so we
* are in good shape.
@@ -2341,7 +2349,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2387,7 +2395,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2657,7 +2665,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2970,13 +2978,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
* that belong to this association, it should discard the INIT chunk and
* retransmit the SHUTDOWN ACK chunk.
*/
-enum sctp_disposition sctp_sf_do_9_2_reshutack(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const union sctp_subtype type,
- void *arg,
- struct sctp_cmd_seq *commands)
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
@@ -3010,6 +3016,26 @@ nomem:
return SCTP_DISPOSITION_NOMEM;
}
+enum sctp_disposition
+sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_chunk *chunk = arg;
+
+ if (!chunk->singleton)
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+ if (chunk->sctp_hdr->vtag != 0)
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
+
+ return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands);
+}
+
/*
* sctp_sf_do_ecn_cwr
*
@@ -3662,6 +3688,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+
ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
do {
/* Report violation if the chunk is less then minimal */
@@ -3777,12 +3806,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5(
SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
- /* If the chunk length is invalid, we don't want to process
- * the reset of the packet.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* We need to discard the rest of the packet to prevent
* potential boomming attacks from additional bundled chunks.
* This is documented in SCTP Threats ID.
@@ -3810,6 +3833,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -3845,6 +3871,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
+ /* Make sure that the ASCONF ADDIP chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* ADD-IP: Section 4.1.1
* This chunk MUST be sent in an authenticated way by using
* the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3853,13 +3884,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
*/
if (!asoc->peer.asconf_capable ||
(!net->sctp.addip_noauth && !chunk->auth))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
- commands);
-
- /* Make sure that the ASCONF ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
hdr = (struct sctp_addiphdr *)chunk->skb->data;
serial = ntohl(hdr->serial);
@@ -3988,6 +4013,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
+ /* Make sure that the ADDIP chunk has a valid length. */
+ if (!sctp_chunk_length_valid(asconf_ack,
+ sizeof(struct sctp_addip_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* ADD-IP, Section 4.1.2:
* This chunk MUST be sent in an authenticated way by using
* the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3996,14 +4027,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
*/
if (!asoc->peer.asconf_capable ||
(!net->sctp.addip_noauth && !asconf_ack->auth))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
- commands);
-
- /* Make sure that the ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(asconf_ack,
- sizeof(struct sctp_addip_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
rcvd_serial = ntohl(addip_hdr->serial);
@@ -4575,6 +4599,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length.
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
@@ -4642,6 +4669,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -6348,6 +6378,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(
* yet.
*/
switch (chunk->chunk_hdr->type) {
+ case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
{
struct sctp_initack_chunk *initack;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c038efc23ce3..78b663dbfa1f 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1057,7 +1057,7 @@ static void smc_connect_work(struct work_struct *work)
if (smc->clcsock->sk->sk_err) {
smc->sk.sk_err = smc->clcsock->sk->sk_err;
} else if ((1 << smc->clcsock->sk->sk_state) &
- (TCPF_SYN_SENT | TCP_SYN_RECV)) {
+ (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
if ((rc == -EPIPE) &&
((1 << smc->clcsock->sk->sk_state) &
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index f23f558054a7..99acd337ba90 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -150,9 +150,11 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
again:
link = conn->lnk;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
if (rc)
- return rc;
+ goto put_out;
spin_lock_bh(&conn->send_lock);
if (link != conn->lnk) {
@@ -160,6 +162,7 @@ again:
spin_unlock_bh(&conn->send_lock);
smc_wr_tx_put_slot(link,
(struct smc_wr_tx_pend_priv *)pend);
+ smc_wr_tx_link_put(link);
if (again)
return -ENOLINK;
again = true;
@@ -167,6 +170,8 @@ again:
}
rc = smc_cdc_msg_send(conn, wr_buf, pend);
spin_unlock_bh(&conn->send_lock);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e286dafd6e88..6ec1ebe878ae 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -230,7 +230,8 @@ static int smc_clc_prfx_set(struct socket *clcsock,
goto out_rel;
}
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
+ goto out_rel;
/* analyze IP specific data of net_device belonging to TCP socket */
addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index af227b65669e..d2206743dc71 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -949,7 +949,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
to_lnk = &lgr->lnk[i];
break;
}
- if (!to_lnk) {
+ if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
smc_lgr_terminate_sched(lgr);
return NULL;
}
@@ -981,24 +981,26 @@ again:
read_unlock_bh(&lgr->conns_lock);
/* pre-fetch buffer outside of send_lock, might sleep */
rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
/* avoid race with smcr_tx_sndbuf_nonempty() */
spin_lock_bh(&conn->send_lock);
smc_switch_link_and_count(conn, to_lnk);
rc = smc_switch_cursor(smc, pend, wr_buf);
spin_unlock_bh(&conn->send_lock);
sock_put(&smc->sk);
- if (rc) {
- smcr_link_down_cond_sched(to_lnk);
- return NULL;
- }
+ if (rc)
+ goto err_out;
goto again;
}
read_unlock_bh(&lgr->conns_lock);
+ smc_wr_tx_link_put(to_lnk);
return to_lnk;
+
+err_out:
+ smcr_link_down_cond_sched(to_lnk);
+ smc_wr_tx_link_put(to_lnk);
+ return NULL;
}
static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
@@ -1474,7 +1476,9 @@ static void smc_conn_abort_work(struct work_struct *work)
abort_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ lock_sock(&smc->sk);
smc_conn_kill(conn, true);
+ release_sock(&smc->sk);
sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 2e7560eba981..f1d323439a2a 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -383,9 +383,11 @@ int smc_llc_send_confirm_link(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
confllc = (struct smc_llc_msg_confirm_link *)wr_buf;
memset(confllc, 0, sizeof(*confllc));
confllc->hd.common.type = SMC_LLC_CONFIRM_LINK;
@@ -402,6 +404,8 @@ int smc_llc_send_confirm_link(struct smc_link *link,
confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -415,9 +419,11 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
struct smc_link *link;
int i, rc, rtok_ix;
+ if (!smc_wr_tx_link_hold(send_link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
@@ -444,6 +450,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
(u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
/* send llc message */
rc = smc_wr_tx_send(send_link, pend);
+put_out:
+ smc_wr_tx_link_put(send_link);
return rc;
}
@@ -456,9 +464,11 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf;
memset(rkeyllc, 0, sizeof(*rkeyllc));
rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
@@ -467,6 +477,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -480,9 +492,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addllc = (struct smc_llc_msg_add_link *)wr_buf;
memset(addllc, 0, sizeof(*addllc));
@@ -504,6 +518,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
}
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -517,9 +533,11 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
delllc = (struct smc_llc_msg_del_link *)wr_buf;
memset(delllc, 0, sizeof(*delllc));
@@ -536,6 +554,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
delllc->reason = htonl(reason);
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -547,9 +567,11 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
struct smc_wr_buf *wr_buf;
int rc;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
testllc = (struct smc_llc_msg_test_link *)wr_buf;
memset(testllc, 0, sizeof(*testllc));
testllc->hd.common.type = SMC_LLC_TEST_LINK;
@@ -557,6 +579,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
memcpy(testllc->user_data, user_data, sizeof(testllc->user_data));
/* send llc message */
rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
return rc;
}
@@ -567,13 +591,16 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/* schedule an llc send on link, may wait for buffers,
@@ -586,13 +613,16 @@ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
struct smc_wr_buf *wr_buf;
int rc;
- if (!smc_link_usable(link))
+ if (!smc_wr_tx_link_hold(link))
return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
- return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+ rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
/********************************* receive ***********************************/
@@ -672,9 +702,11 @@ static int smc_llc_add_link_cont(struct smc_link *link,
struct smc_buf_desc *rmb;
u8 n;
+ if (!smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
if (rc)
- return rc;
+ goto put_out;
addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
memset(addc_llc, 0, sizeof(*addc_llc));
@@ -706,7 +738,10 @@ static int smc_llc_add_link_cont(struct smc_link *link,
addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
if (lgr->role == SMC_CLNT)
addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
- return smc_wr_tx_send(link, pend);
+ rc = smc_wr_tx_send(link, pend);
+put_out:
+ smc_wr_tx_link_put(link);
+ return rc;
}
static int smc_llc_cli_rkey_exchange(struct smc_link *link,
@@ -1787,7 +1822,7 @@ void smc_llc_link_active(struct smc_link *link)
link->smcibdev->ibdev->name, link->ibport);
link->state = SMC_LNK_ACTIVE;
if (link->lgr->llc_testlink_time) {
- link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
+ link->llc_testlink_time = link->lgr->llc_testlink_time;
schedule_delayed_work(&link->llc_testlink_wrk,
link->llc_testlink_time);
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index c79361dfcdfb..738a4a99c827 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
/* Wakeup sndbuf consumers from any context (IRQ or process)
* since there is more data to transmit; usable snd_wnd as max transmit
*/
-static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
+static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
struct smc_link *link = conn->lnk;
@@ -505,8 +505,11 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
+ if (!link || !smc_wr_tx_link_hold(link))
+ return -ENOLINK;
rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
if (rc < 0) {
+ smc_wr_tx_link_put(link);
if (rc == -EBUSY) {
struct smc_sock *smc =
container_of(conn, struct smc_sock, conn);
@@ -547,22 +550,7 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
out_unlock:
spin_unlock_bh(&conn->send_lock);
- return rc;
-}
-
-static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- struct smc_link *link = conn->lnk;
- int rc = -ENOLINK;
-
- if (!link)
- return rc;
-
- atomic_inc(&link->wr_tx_refcnt);
- if (smc_link_usable(link))
- rc = _smcr_tx_sndbuf_nonempty(conn);
- if (atomic_dec_and_test(&link->wr_tx_refcnt))
- wake_up_all(&link->wr_tx_wait);
+ smc_wr_tx_link_put(link);
return rc;
}
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 423b8709f1c9..2bc626f230a5 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -60,6 +60,20 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
atomic_long_set(wr_tx_id, val);
}
+static inline bool smc_wr_tx_link_hold(struct smc_link *link)
+{
+ if (!smc_link_usable(link))
+ return false;
+ atomic_inc(&link->wr_tx_refcnt);
+ return true;
+}
+
+static inline void smc_wr_tx_link_put(struct smc_link *link)
+{
+ if (atomic_dec_and_test(&link->wr_tx_refcnt))
+ wake_up_all(&link->wr_tx_wait);
+}
+
static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk)
{
wake_up_all(&lnk->wr_tx_wait);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 3e776e3dff91..1f2817195549 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -645,7 +645,7 @@ static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci,
}
__set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
goto ok;
- } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
+ } else if (seq_num + GSS_SEQ_WIN <= sd->sd_max) {
goto toolow;
}
if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index c9391d38de85..dc60c32bb70d 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -2285,43 +2285,53 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr)
u16 key_gen = msg_key_gen(hdr);
u16 size = msg_data_sz(hdr);
u8 *data = msg_data(hdr);
+ unsigned int keylen;
+
+ /* Verify whether the size can exist in the packet */
+ if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) {
+ pr_debug("%s: message data size is too small\n", rx->name);
+ goto exit;
+ }
+
+ keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+
+ /* Verify the supplied size values */
+ if (unlikely(size != keylen + sizeof(struct tipc_aead_key) ||
+ keylen > TIPC_AEAD_KEY_SIZE_MAX)) {
+ pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name);
+ goto exit;
+ }
spin_lock(&rx->lock);
if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) {
pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name,
rx->skey, key_gen, rx->key_gen);
- goto exit;
+ goto exit_unlock;
}
/* Allocate memory for the key */
skey = kmalloc(size, GFP_ATOMIC);
if (unlikely(!skey)) {
pr_err("%s: unable to allocate memory for skey\n", rx->name);
- goto exit;
+ goto exit_unlock;
}
/* Copy key from msg data */
- skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME)));
+ skey->keylen = keylen;
memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME);
memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32),
skey->keylen);
- /* Sanity check */
- if (unlikely(size != tipc_aead_key_size(skey))) {
- kfree(skey);
- skey = NULL;
- goto exit;
- }
-
rx->key_gen = key_gen;
rx->skey_mode = msg_key_mode(hdr);
rx->skey = skey;
rx->nokey = 0;
mb(); /* for nokey flag */
-exit:
+exit_unlock:
spin_unlock(&rx->lock);
+exit:
/* Schedule the key attaching on this crypto */
if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0)))
return true;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index a0a27d87f631..ad570c2450be 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
{
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index fde56ff49163..9ab81db8a654 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -681,12 +681,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
prot[TLS_BASE][TLS_SW].recvmsg = tls_sw_recvmsg;
- prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
+ prot[TLS_BASE][TLS_SW].sock_is_readable = tls_sw_sock_is_readable;
prot[TLS_BASE][TLS_SW].close = tls_sk_proto_close;
prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
prot[TLS_SW][TLS_SW].recvmsg = tls_sw_recvmsg;
- prot[TLS_SW][TLS_SW].stream_memory_read = tls_sw_stream_read;
+ prot[TLS_SW][TLS_SW].sock_is_readable = tls_sw_sock_is_readable;
prot[TLS_SW][TLS_SW].close = tls_sk_proto_close;
#ifdef CONFIG_TLS_DEVICE
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 4feb95e34b64..1b08b877a890 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -35,6 +35,7 @@
* SOFTWARE.
*/
+#include <linux/bug.h>
#include <linux/sched/signal.h>
#include <linux/module.h>
#include <linux/splice.h>
@@ -43,6 +44,14 @@
#include <net/strparser.h>
#include <net/tls.h>
+noinline void tls_err_abort(struct sock *sk, int err)
+{
+ WARN_ON_ONCE(err >= 0);
+ /* sk->sk_err should contain a positive error code. */
+ sk->sk_err = -err;
+ sk_error_report(sk);
+}
+
static int __skb_nsg(struct sk_buff *skb, int offset, int len,
unsigned int recursion_level)
{
@@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags)
tx_err:
if (rc < 0 && rc != -EAGAIN)
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
return rc;
}
@@ -450,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
/* If err is already set on socket, return the same code */
if (sk->sk_err) {
- ctx->async_wait.err = sk->sk_err;
+ ctx->async_wait.err = -sk->sk_err;
} else {
ctx->async_wait.err = err;
tls_err_abort(sk, err);
@@ -763,7 +772,7 @@ static int tls_push_record(struct sock *sk, int flags,
msg_pl->sg.size + prot->tail_size, i);
if (rc < 0) {
if (rc != -EINPROGRESS) {
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
if (split) {
tls_ctx->pending_open_record_frags = true;
tls_merge_open_record(sk, rec, tmp, orig_end);
@@ -1827,7 +1836,7 @@ int tls_sw_recvmsg(struct sock *sk,
err = decrypt_skb_update(sk, skb, &msg->msg_iter,
&chunk, &zc, async_capable);
if (err < 0 && err != -EINPROGRESS) {
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
goto recv_end;
}
@@ -2007,7 +2016,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
}
if (err < 0) {
- tls_err_abort(sk, EBADMSG);
+ tls_err_abort(sk, -EBADMSG);
goto splice_read_end;
}
ctx->decrypted = 1;
@@ -2026,7 +2035,7 @@ splice_read_end:
return copied ? : err;
}
-bool tls_sw_stream_read(const struct sock *sk)
+bool tls_sw_sock_is_readable(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eb47b9de2380..78e08e82c08c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -608,20 +608,42 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(task_tgid(current));
sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -806,7 +828,7 @@ static void unix_unhash(struct sock *sk)
}
struct proto unix_dgram_proto = {
- .name = "UNIX-DGRAM",
+ .name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
@@ -828,20 +850,25 @@ struct proto unix_stream_proto = {
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
- struct sock *sk = NULL;
struct unix_sock *u;
+ struct sock *sk;
+ int err;
atomic_long_inc(&unix_nr_socks);
- if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
- goto out;
+ if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) {
+ err = -ENFILE;
+ goto err;
+ }
if (type == SOCK_STREAM)
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
else /*dgram and seqpacket */
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);
- if (!sk)
- goto out;
+ if (!sk) {
+ err = -ENOMEM;
+ goto err;
+ }
sock_init_data(sock, sk);
@@ -861,20 +888,23 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
unix_insert_socket(unix_sockets_unbound(sk), sk);
-out:
- if (sk == NULL)
- atomic_long_dec(&unix_nr_socks);
- else {
- local_bh_disable();
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- local_bh_enable();
- }
+
+ local_bh_disable();
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ local_bh_enable();
+
return sk;
+
+err:
+ atomic_long_dec(&unix_nr_socks);
+ return ERR_PTR(err);
}
static int unix_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ struct sock *sk;
+
if (protocol && protocol != PF_UNIX)
return -EPROTONOSUPPORT;
@@ -901,7 +931,11 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
return -ESOCKTNOSUPPORT;
}
- return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
+ sk = unix_create1(net, sock, kern, sock->type);
+ if (IS_ERR(sk))
+ return PTR_ERR(sk);
+
+ return 0;
}
static int unix_release(struct socket *sock)
@@ -1314,12 +1348,15 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
we will have to recheck all again in any case.
*/
- err = -ENOMEM;
-
/* create new sock for complete connection */
newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
- if (newsk == NULL)
+ if (IS_ERR(newsk)) {
+ err = PTR_ERR(newsk);
+ newsk = NULL;
goto out;
+ }
+
+ err = -ENOMEM;
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
@@ -2845,6 +2882,9 @@ static int unix_shutdown(struct socket *sock, int mode)
unix_state_lock(sk);
sk->sk_shutdown |= mode;
+ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
+ mode == SHUTDOWN_MASK)
+ sk->sk_state = TCP_CLOSE;
other = unix_peer(sk);
if (other)
sock_hold(other);
@@ -2867,12 +2907,10 @@ static int unix_shutdown(struct socket *sock, int mode)
other->sk_shutdown |= peer_mode;
unix_state_unlock(other);
other->sk_state_change(other);
- if (peer_mode == SHUTDOWN_MASK) {
+ if (peer_mode == SHUTDOWN_MASK)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
- other->sk_state = TCP_CLOSE;
- } else if (peer_mode & RCV_SHUTDOWN) {
+ else if (peer_mode & RCV_SHUTDOWN)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
- }
}
if (other)
sock_put(other);
@@ -3014,6 +3052,8 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
@@ -3053,6 +3093,8 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
+ if (sk_is_readable(sk))
+ mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (sk->sk_type == SOCK_SEQPACKET) {
@@ -3073,7 +3115,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
other = unix_peer(sk);
if (other && unix_peer(other) != sk &&
- unix_recvq_full(other) &&
+ unix_recvq_full_lockless(other) &&
unix_dgram_peer_wake_me(sk, other))
writable = 0;
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index b927e2baae50..452376c6f419 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -102,6 +102,7 @@ static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = unix_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
}
static void unix_stream_bpf_rebuild_protos(struct proto *prot,
@@ -110,6 +111,7 @@ static void unix_stream_bpf_rebuild_protos(struct proto *prot,
*prot = *base;
prot->close = sock_map_close;
prot->recvmsg = unix_bpf_recvmsg;
+ prot->sock_is_readable = sk_msg_is_readable;
prot->unhash = sock_map_unhash;
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 03323121ca50..aaba847d79eb 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -524,6 +524,7 @@ use_default_name:
INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
INIT_WORK(&rdev->mgmt_registrations_update_wk,
cfg80211_mgmt_registrations_update_wk);
+ spin_lock_init(&rdev->mgmt_registrations_lock);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -1279,7 +1280,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev)
INIT_LIST_HEAD(&wdev->event_list);
spin_lock_init(&wdev->event_lock);
INIT_LIST_HEAD(&wdev->mgmt_registrations);
- spin_lock_init(&wdev->mgmt_registrations_lock);
INIT_LIST_HEAD(&wdev->pmsr_list);
spin_lock_init(&wdev->pmsr_lock);
INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b35d0db12f1d..1720abf36f92 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -100,6 +100,8 @@ struct cfg80211_registered_device {
struct work_struct propagate_cac_done_wk;
struct work_struct mgmt_registrations_update_wk;
+ /* lock for all wdev lists */
+ spinlock_t mgmt_registrations_lock;
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 3aa69b375a10..783acd2c4211 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -452,9 +452,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
lockdep_assert_held(&rdev->wiphy.mtx);
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
if (!wdev->mgmt_registrations_need_update) {
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
return;
}
@@ -479,7 +479,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
rcu_read_unlock();
wdev->mgmt_registrations_need_update = 0;
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
}
@@ -503,6 +503,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
int match_len, bool multicast_rx,
struct netlink_ext_ack *extack)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_mgmt_registration *reg, *nreg;
int err = 0;
u16 mgmt_type;
@@ -548,7 +549,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
if (!nreg)
return -ENOMEM;
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
int mlen = min(match_len, reg->match_len);
@@ -583,7 +584,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
list_add(&nreg->list, &wdev->mgmt_registrations);
}
wdev->mgmt_registrations_need_update = 1;
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
@@ -591,7 +592,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
out:
kfree(nreg);
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
return err;
}
@@ -602,7 +603,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_mgmt_registration *reg, *tmp;
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
if (reg->nlportid != nlportid)
@@ -615,7 +616,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
schedule_work(&rdev->mgmt_registrations_update_wk);
}
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
if (nlportid && rdev->crit_proto_nlportid == nlportid) {
rdev->crit_proto_nlportid = 0;
@@ -628,15 +629,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
struct cfg80211_mgmt_registration *reg, *tmp;
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
list_del(&reg->list);
kfree(reg);
}
wdev->mgmt_registrations_need_update = 1;
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
}
@@ -784,7 +786,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
data = buf + ieee80211_hdrlen(mgmt->frame_control);
data_len = len - ieee80211_hdrlen(mgmt->frame_control);
- spin_lock_bh(&wdev->mgmt_registrations_lock);
+ spin_lock_bh(&rdev->mgmt_registrations_lock);
list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
if (reg->frame_type != ftype)
@@ -808,7 +810,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
break;
}
- spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ spin_unlock_bh(&rdev->mgmt_registrations_lock);
trace_cfg80211_return_bool(result);
return result;
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 11c68b159324..adc0d14cfd86 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss,
}
ssid_len = ssid[1];
ssid = ssid + 2;
- rcu_read_unlock();
/* check if nontrans_bss is in the list */
list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) {
- if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len))
+ if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) {
+ rcu_read_unlock();
return 0;
+ }
}
+ rcu_read_unlock();
+
/* add to the list */
list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list);
return 0;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 18dba3d7c638..a1a99a574984 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1028,14 +1028,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
!(rdev->wiphy.interface_modes & (1 << ntype)))
return -EOPNOTSUPP;
- /* if it's part of a bridge, reject changing type to station/ibss */
- if (netif_is_bridge_port(dev) &&
- (ntype == NL80211_IFTYPE_ADHOC ||
- ntype == NL80211_IFTYPE_STATION ||
- ntype == NL80211_IFTYPE_P2P_CLIENT))
- return -EBUSY;
-
if (ntype != otype) {
+ /* if it's part of a bridge, reject changing type to station/ibss */
+ if (netif_is_bridge_port(dev) &&
+ (ntype == NL80211_IFTYPE_ADHOC ||
+ ntype == NL80211_IFTYPE_STATION ||
+ ntype == NL80211_IFTYPE_P2P_CLIENT))
+ return -EBUSY;
+
dev->ieee80211_ptr->use_4addr = false;
dev->ieee80211_ptr->mesh_id_up_len = 0;
wdev_lock(dev->ieee80211_ptr);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 03b66d154b2b..3a3cb09eec12 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1961,24 +1961,65 @@ static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
return skb;
}
+static int xfrm_notify_userpolicy(struct net *net)
+{
+ struct xfrm_userpolicy_default *up;
+ int len = NLMSG_ALIGN(sizeof(*up));
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ int err;
+
+ skb = nlmsg_new(len, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_GETDEFAULT, sizeof(*up), 0);
+ if (nlh == NULL) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ up = nlmsg_data(nlh);
+ up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+
+ nlmsg_end(skb, nlh);
+
+ rcu_read_lock();
+ err = xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY);
+ rcu_read_unlock();
+
+ return err;
+}
+
static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh,
struct nlattr **attrs)
{
struct net *net = sock_net(skb->sk);
struct xfrm_userpolicy_default *up = nlmsg_data(nlh);
- u8 dirmask;
- u8 old_default = net->xfrm.policy_default;
- if (up->dirmask >= XFRM_USERPOLICY_DIRMASK_MAX)
- return -EINVAL;
+ if (up->in == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_IN;
+ else if (up->in == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_IN;
- dirmask = (1 << up->dirmask) & XFRM_POL_DEFAULT_MASK;
+ if (up->fwd == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_FWD;
+ else if (up->fwd == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_FWD;
- net->xfrm.policy_default = (old_default & (0xff ^ dirmask))
- | (up->action << up->dirmask);
+ if (up->out == XFRM_USERPOLICY_BLOCK)
+ net->xfrm.policy_default |= XFRM_POL_DEFAULT_OUT;
+ else if (up->out == XFRM_USERPOLICY_ACCEPT)
+ net->xfrm.policy_default &= ~XFRM_POL_DEFAULT_OUT;
rt_genid_bump_all(net);
+ xfrm_notify_userpolicy(net);
return 0;
}
@@ -1988,13 +2029,11 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
struct sk_buff *r_skb;
struct nlmsghdr *r_nlh;
struct net *net = sock_net(skb->sk);
- struct xfrm_userpolicy_default *r_up, *up;
+ struct xfrm_userpolicy_default *r_up;
int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default));
u32 portid = NETLINK_CB(skb).portid;
u32 seq = nlh->nlmsg_seq;
- up = nlmsg_data(nlh);
-
r_skb = nlmsg_new(len, GFP_ATOMIC);
if (!r_skb)
return -ENOMEM;
@@ -2007,8 +2046,12 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh,
r_up = nlmsg_data(r_nlh);
- r_up->action = ((net->xfrm.policy_default & (1 << up->dirmask)) >> up->dirmask);
- r_up->dirmask = up->dirmask;
+ r_up->in = net->xfrm.policy_default & XFRM_POL_DEFAULT_IN ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->fwd = net->xfrm.policy_default & XFRM_POL_DEFAULT_FWD ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
+ r_up->out = net->xfrm.policy_default & XFRM_POL_DEFAULT_OUT ?
+ XFRM_USERPOLICY_BLOCK : XFRM_USERPOLICY_ACCEPT;
nlmsg_end(r_skb, r_nlh);
return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid);