diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/devinet.c | 35 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 21 | ||||
-rw-r--r-- | net/ipv4/netfilter/nft_fib_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 42 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 5 | ||||
-rw-r--r-- | net/ipv4/udp.c | 4 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 40 |
7 files changed, 102 insertions, 49 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index ab76744383cf..7cf5f7d0d0de 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -298,17 +298,19 @@ static struct in_device *inetdev_init(struct net_device *dev) /* Account for reference dev->ip_ptr (below) */ refcount_set(&in_dev->refcnt, 1); - err = devinet_sysctl_register(in_dev); - if (err) { - in_dev->dead = 1; - neigh_parms_release(&arp_tbl, in_dev->arp_parms); - in_dev_put(in_dev); - in_dev = NULL; - goto out; + if (dev != blackhole_netdev) { + err = devinet_sysctl_register(in_dev); + if (err) { + in_dev->dead = 1; + neigh_parms_release(&arp_tbl, in_dev->arp_parms); + in_dev_put(in_dev); + in_dev = NULL; + goto out; + } + ip_mc_init_dev(in_dev); + if (dev->flags & IFF_UP) + ip_mc_up(in_dev); } - ip_mc_init_dev(in_dev); - if (dev->flags & IFF_UP) - ip_mc_up(in_dev); /* we can receive as soon as ip_ptr is set -- do this last */ rcu_assign_pointer(dev->ip_ptr, in_dev); @@ -347,6 +349,19 @@ static void inetdev_destroy(struct in_device *in_dev) in_dev_put(in_dev); } +static int __init inet_blackhole_dev_init(void) +{ + int err = 0; + + rtnl_lock(); + if (!inetdev_init(blackhole_netdev)) + err = -ENOMEM; + rtnl_unlock(); + + return err; +} +late_initcall(inet_blackhole_dev_init); + int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) { const struct in_ifaddr *ifa; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 2c5632d4fddb..2b698f8419fe 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1045,21 +1045,31 @@ static bool reqsk_queue_unlink(struct request_sock *req) found = __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); } - if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) - reqsk_put(req); + return found; } -bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +static bool __inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + bool from_timer) { bool unlinked = reqsk_queue_unlink(req); + if (!from_timer && timer_delete_sync(&req->rsk_timer)) + reqsk_put(req); + if (unlinked) { reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); reqsk_put(req); } + return unlinked; } + +bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + return __inet_csk_reqsk_queue_drop(sk, req, false); +} EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) @@ -1152,7 +1162,7 @@ static void reqsk_timer_handler(struct timer_list *t) if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) { /* delete timer */ - inet_csk_reqsk_queue_drop(sk_listener, nreq); + __inet_csk_reqsk_queue_drop(sk_listener, nreq, true); goto no_ownership; } @@ -1178,7 +1188,8 @@ no_ownership: } drop: - inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq); + __inet_csk_reqsk_queue_drop(sk_listener, oreq, true); + reqsk_put(req); } static bool reqsk_queue_hash_req(struct request_sock *req, diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 00da1332bbf1..09fff5d424ef 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -65,6 +65,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, .flowi4_scope = RT_SCOPE_UNIVERSE, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_uid = sock_net_uid(nft_net(pkt), NULL), + .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; const struct net_device *oif; const struct net_device *found; @@ -83,9 +84,6 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; - if (priv->flags & NFTA_FIB_F_IIF) - fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif); - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { nft_fib_store_result(dest, priv, nft_in(pkt)); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc05ec1faac8..2d844e1f867f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2473,8 +2473,22 @@ static bool tcp_skb_spurious_retrans(const struct tcp_sock *tp, */ static inline bool tcp_packet_delayed(const struct tcp_sock *tp) { - return tp->retrans_stamp && - tcp_tsopt_ecr_before(tp, tp->retrans_stamp); + const struct sock *sk = (const struct sock *)tp; + + if (tp->retrans_stamp && + tcp_tsopt_ecr_before(tp, tp->retrans_stamp)) + return true; /* got echoed TS before first retransmission */ + + /* Check if nothing was retransmitted (retrans_stamp==0), which may + * happen in fast recovery due to TSQ. But we ignore zero retrans_stamp + * in TCP_SYN_SENT, since when we set FLAG_SYN_ACKED we also clear + * retrans_stamp even if we had retransmitted the SYN. + */ + if (!tp->retrans_stamp && /* no record of a retransmit/SYN? */ + sk->sk_state != TCP_SYN_SENT) /* not the FLAG_SYN_ACKED case? */ + return true; /* nothing was retransmitted */ + + return false; } /* Undo procedures. */ @@ -2508,6 +2522,16 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } +/* If loss recovery is finished and there are no retransmits out in the + * network, then we clear retrans_stamp so that upon the next loss recovery + * retransmits_timed_out() and timestamp-undo are using the correct value. + */ +static void tcp_retrans_stamp_cleanup(struct sock *sk) +{ + if (!tcp_any_retrans_done(sk)) + tcp_sk(sk)->retrans_stamp = 0; +} + static void DBGUNDO(struct sock *sk, const char *msg) { #if FASTRETRANS_DEBUG > 1 @@ -2875,6 +2899,9 @@ void tcp_enter_recovery(struct sock *sk, bool ece_ack) struct tcp_sock *tp = tcp_sk(sk); int mib_idx; + /* Start the clock with our fast retransmit, for undo and ETIMEDOUT. */ + tcp_retrans_stamp_cleanup(sk); + if (tcp_is_reno(tp)) mib_idx = LINUX_MIB_TCPRENORECOVERY; else @@ -6657,10 +6684,17 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk) if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss && !tp->packets_out) tcp_try_undo_recovery(sk); - /* Reset rtx states to prevent spurious retransmits_timed_out() */ tcp_update_rto_time(tp); - tp->retrans_stamp = 0; inet_csk(sk)->icsk_retransmits = 0; + /* In tcp_fastopen_synack_timer() on the first SYNACK RTO we set + * retrans_stamp but don't enter CA_Loss, so in case that happened we + * need to zero retrans_stamp here to prevent spurious + * retransmits_timed_out(). However, if the ACK of our SYNACK caused us + * to enter CA_Recovery then we need to leave retrans_stamp as it was + * set entering CA_Recovery, for correct retransmits_timed_out() and + * undo behavior. + */ + tcp_retrans_stamp_cleanup(sk); /* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1, * we no longer need req so release it. diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4fd746bd4d54..68804fd01daf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2342,10 +2342,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || - tcp_has_tx_tstamp(skb) || - !skb_pure_zcopy_same(skb, next) || - skb_frags_readable(skb) != skb_frags_readable(next)) + if (tcp_has_tx_tstamp(skb) || !tcp_skb_can_collapse(skb, next)) return false; len -= skb->len; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8accbf4cb295..2849b273b131 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -951,8 +951,10 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(datalen, cork->gso_size); + + /* Don't checksum the payload, skb will get segmented */ + goto csum_partial; } - goto csum_partial; } if (is_udplite) /* UDP-Lite */ diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 0294fef577fa..7e1c2faed1ff 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -17,47 +17,43 @@ #include <net/ip.h> #include <net/l3mdev.h> -static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *__xfrm4_dst_lookup(struct flowi4 *fl4, + const struct xfrm_dst_lookup_params *params) { struct rtable *rt; memset(fl4, 0, sizeof(*fl4)); - fl4->daddr = daddr->a4; - fl4->flowi4_tos = tos; - fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); - fl4->flowi4_mark = mark; - if (saddr) - fl4->saddr = saddr->a4; - - rt = __ip_route_output_key(net, fl4); + fl4->daddr = params->daddr->a4; + fl4->flowi4_tos = params->tos; + fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(params->net, + params->oif); + fl4->flowi4_mark = params->mark; + if (params->saddr) + fl4->saddr = params->saddr->a4; + fl4->flowi4_proto = params->ipproto; + fl4->uli = params->uli; + + rt = __ip_route_output_key(params->net, fl4); if (!IS_ERR(rt)) return &rt->dst; return ERR_CAST(rt); } -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - u32 mark) +static struct dst_entry *xfrm4_dst_lookup(const struct xfrm_dst_lookup_params *params) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); + return __xfrm4_dst_lookup(&fl4, params); } -static int xfrm4_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr, - u32 mark) +static int xfrm4_get_saddr(xfrm_address_t *saddr, + const struct xfrm_dst_lookup_params *params) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); + dst = __xfrm4_dst_lookup(&fl4, params); if (IS_ERR(dst)) return -EHOSTUNREACH; |