diff options
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r-- | net/mptcp/subflow.c | 317 |
1 files changed, 209 insertions, 108 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index a5ede357cfbc..d55f4ef736a5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -108,6 +108,8 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); + subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } @@ -150,7 +152,7 @@ static int subflow_check_req(struct request_sock *req, return -EINVAL; #endif - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@ -162,7 +164,7 @@ static int subflow_check_req(struct request_sock *req, } if (mp_opt.mp_capable && listener->request_mptcp) { - int err, retries = 4; + int err, retries = MPTCP_TOKEN_MAX_RETRIES; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; again: @@ -247,7 +249,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req, int err; subflow_init_req(req, sk_listener); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt); if (mp_opt.mp_capable && mp_opt.mp_join) return -EINVAL; @@ -394,7 +396,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { if (!mp_opt.mp_capable) { MPTCP_INC_STATS(sock_net(sk), @@ -404,6 +406,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto fallback; } + if (mp_opt.csum_reqd) + WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); + if (mp_opt.deny_join_id0) + WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; @@ -430,15 +436,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) goto do_reset; } + if (!mptcp_finish_join(sk)) + goto do_reset; + subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, hmac); memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); - if (!mptcp_finish_join(sk)) - goto do_reset; - subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); @@ -630,26 +636,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, /* if the sk is MP_CAPABLE, we try to fetch the client key */ if (subflow_req->mp_capable) { - if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { - /* here we can receive and accept an in-window, - * out-of-order pkt, which will not carry the MP_CAPABLE - * opt even on mptcp enabled paths - */ - goto create_msk; - } - - mptcp_get_options(skb, &mp_opt); + /* we can receive and accept an in-window, out-of-order pkt, + * which may not carry the MP_CAPABLE opt even on mptcp enabled + * paths: always try to extract the peer key, and fallback + * for packets missing it. + * Even OoO DSS packets coming legitly after dropped or + * reordered MPC will cause fallback, but we don't have other + * options. + */ + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; } -create_msk: new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); @@ -785,10 +790,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); } -static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) +static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { - WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", - ssn, subflow->map_subflow_seq, subflow->map_data_len); + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", + ssn, subflow->map_subflow_seq, subflow->map_data_len); } static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) @@ -813,22 +818,104 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) /* Mapping covers data later in the subflow stream, * currently unsupported. */ - warn_bad_map(subflow, ssn); + dbg_bad_map(subflow, ssn); return false; } if (unlikely(!before(ssn, subflow->map_subflow_seq + subflow->map_data_len))) { /* Mapping does covers past subflow data, invalid */ - warn_bad_map(subflow, ssn + skb->len); + dbg_bad_map(subflow, ssn); return false; } return true; } +static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + __wsum csum; + + len = min(skb->len - offset, delta); + csum = skb_checksum(skb, offset, len, 0); + subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, + subflow->map_csum_len); + + delta -= len; + subflow->map_csum_len += len; + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + /* note that 'map_data_len' accounts only for the carried data, does + * not include the eventual seq increment due to the data fin, + * while the pseudo header requires the original DSS data len, + * including that + */ + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + } + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -867,7 +954,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, data_len = mpext->data_len; if (data_len == 0) { - pr_err("Infinite mapping not handled"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); return MAPPING_INVALID; } @@ -922,9 +1008,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk, /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; } /* If this skb data are fully covered by the current mapping, @@ -936,27 +1023,41 @@ static enum mapping_status get_mapping_status(struct sock *ssk, } /* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; } subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; + subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum); validate_seq: /* we revalidate valid mapping on new skb, because we must ensure * the current skb is completely covered by the available mapping */ - if (!validate_mapping(ssk, skb)) + if (!validate_mapping(ssk, skb)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH); return MAPPING_INVALID; + } skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); } static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, @@ -1002,7 +1103,7 @@ static bool subflow_check_data_avail(struct sock *ssk) struct sk_buff *skb; if (!skb_peek(&ssk->sk_receive_queue)) - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); if (subflow->data_avail) return true; @@ -1013,21 +1114,11 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (status == MAPPING_INVALID) { - ssk->sk_err = EBADMSG; - goto fatal; - } - if (status == MAPPING_DUMMY) { - __mptcp_do_fallback(msk); - skb = skb_peek(&ssk->sk_receive_queue); - subflow->map_valid = 1; - subflow->map_seq = READ_ONCE(msk->ack_seq); - subflow->map_data_len = skb->len; - subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - return true; - } + if (unlikely(status == MAPPING_INVALID)) + goto fallback; + + if (unlikely(status == MAPPING_DUMMY)) + goto fallback; if (status != MAPPING_OK) goto no_data; @@ -1040,10 +1131,8 @@ static bool subflow_check_data_avail(struct sock *ssk) * MP_CAPABLE-based mapping */ if (unlikely(!READ_ONCE(msk->can_ack))) { - if (!subflow->mpc_map) { - ssk->sk_err = EBADMSG; - goto fatal; - } + if (!subflow->mpc_map) + goto fallback; WRITE_ONCE(msk->remote_key, subflow->remote_key); WRITE_ONCE(msk->ack_seq, subflow->map_seq); WRITE_ONCE(msk->can_ack, true); @@ -1053,35 +1142,43 @@ static bool subflow_check_data_avail(struct sock *ssk) ack_seq = mptcp_subflow_get_mapped_dsn(subflow); pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, ack_seq); - if (ack_seq == old_ack) { - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - break; - } else if (after64(ack_seq, old_ack)) { - subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; - break; + if (unlikely(before64(ack_seq, old_ack))) { + mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + continue; } - /* only accept in-sequence mapping. Old values are spurious - * retransmission - */ - mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + break; } return true; no_data: subflow_sched_work_if_closed(msk, ssk); return false; -fatal: - /* fatal protocol error, close the socket */ - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - ssk->sk_error_report(ssk); - tcp_set_state(ssk, TCP_CLOSE); - subflow->reset_transient = 0; - subflow->reset_reason = MPTCP_RST_EMPTCP; - tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; - return false; + +fallback: + /* RFC 8684 section 3.7. */ + if (subflow->mp_join || subflow->fully_established) { + /* fatal protocol error, close the socket. + * subflow_error_report() will introduce the appropriate barriers + */ + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + subflow->reset_transient = 0; + subflow->reset_reason = MPTCP_RST_EMPTCP; + tcp_send_active_reset(ssk, GFP_ATOMIC); + WRITE_ONCE(subflow->data_avail, 0); + return false; + } + + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + return true; } bool mptcp_subflow_data_available(struct sock *sk) @@ -1092,7 +1189,7 @@ bool mptcp_subflow_data_available(struct sock *sk) if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@ -1120,41 +1217,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space) *full_space = tcp_full_space(sk); } -static void subflow_data_ready(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - u16 state = 1 << inet_sk_state_load(sk); - struct sock *parent = subflow->conn; - struct mptcp_sock *msk; - - msk = mptcp_sk(parent); - if (state & TCPF_LISTEN) { - /* MPJ subflow are removed from accept queue before reaching here, - * avoid stray wakeups - */ - if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) - return; - - set_bit(MPTCP_DATA_READY, &msk->flags); - parent->sk_data_ready(parent); - return; - } - - WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && - !subflow->mp_join && !(state & TCPF_CLOSE)); - - if (mptcp_subflow_data_available(sk)) - mptcp_data_ready(parent, sk); -} - -static void subflow_write_space(struct sock *ssk) -{ - struct sock *sk = mptcp_subflow_ctx(ssk)->conn; - - mptcp_propagate_sndbuf(sk, ssk); - mptcp_write_space(sk); -} - void __mptcp_error_report(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -1195,6 +1257,43 @@ static void subflow_error_report(struct sock *ssk) mptcp_data_unlock(sk); } +static void subflow_data_ready(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + u16 state = 1 << inet_sk_state_load(sk); + struct sock *parent = subflow->conn; + struct mptcp_sock *msk; + + msk = mptcp_sk(parent); + if (state & TCPF_LISTEN) { + /* MPJ subflow are removed from accept queue before reaching here, + * avoid stray wakeups + */ + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) + return; + + set_bit(MPTCP_DATA_READY, &msk->flags); + parent->sk_data_ready(parent); + return; + } + + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && + !subflow->mp_join && !(state & TCPF_CLOSE)); + + if (mptcp_subflow_data_available(sk)) + mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); +} + +static void subflow_write_space(struct sock *ssk) +{ + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_propagate_sndbuf(sk, ssk); + mptcp_write_space(sk); +} + static struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { @@ -1505,6 +1604,8 @@ static void subflow_state_change(struct sock *sk) */ if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); subflow_sched_work_if_closed(mptcp_sk(parent), sk); |