aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r--net/mptcp/subflow.c317
1 files changed, 209 insertions, 108 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a5ede357cfbc..d55f4ef736a5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -108,6 +108,8 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
+ subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@@ -150,7 +152,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -162,7 +164,7 @@ static int subflow_check_req(struct request_sock *req,
}
if (mp_opt.mp_capable && listener->request_mptcp) {
- int err, retries = 4;
+ int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
again:
@@ -247,7 +249,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
return -EINVAL;
@@ -394,7 +396,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) {
MPTCP_INC_STATS(sock_net(sk),
@@ -404,6 +406,10 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto fallback;
}
+ if (mp_opt.csum_reqd)
+ WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
+ if (mp_opt.deny_join_id0)
+ WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
@@ -430,15 +436,15 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
goto do_reset;
}
+ if (!mptcp_finish_join(sk))
+ goto do_reset;
+
subflow_generate_hmac(subflow->local_key, subflow->remote_key,
subflow->local_nonce,
subflow->remote_nonce,
hmac);
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (!mptcp_finish_join(sk))
- goto do_reset;
-
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
@@ -630,26 +636,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* if the sk is MP_CAPABLE, we try to fetch the client key */
if (subflow_req->mp_capable) {
- if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
- /* here we can receive and accept an in-window,
- * out-of-order pkt, which will not carry the MP_CAPABLE
- * opt even on mptcp enabled paths
- */
- goto create_msk;
- }
-
- mptcp_get_options(skb, &mp_opt);
+ /* we can receive and accept an in-window, out-of-order pkt,
+ * which may not carry the MP_CAPABLE opt even on mptcp enabled
+ * paths: always try to extract the peer key, and fallback
+ * for packets missing it.
+ * Even OoO DSS packets coming legitly after dropped or
+ * reordered MPC will cause fallback, but we don't have other
+ * options.
+ */
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_capable) {
fallback = true;
goto create_child;
}
-create_msk:
new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req);
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(skb, &mp_opt);
+ mptcp_get_options(sk, skb, &mp_opt);
if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -785,10 +790,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
}
-static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
{
- WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
- ssn, subflow->map_subflow_seq, subflow->map_data_len);
+ pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+ ssn, subflow->map_subflow_seq, subflow->map_data_len);
}
static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -813,22 +818,104 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
/* Mapping covers data later in the subflow stream,
* currently unsupported.
*/
- warn_bad_map(subflow, ssn);
+ dbg_bad_map(subflow, ssn);
return false;
}
if (unlikely(!before(ssn, subflow->map_subflow_seq +
subflow->map_data_len))) {
/* Mapping does covers past subflow data, invalid */
- warn_bad_map(subflow, ssn + skb->len);
+ dbg_bad_map(subflow, ssn);
return false;
}
return true;
}
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
+ bool csum_reqd)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct csum_pseudo_header header;
+ u32 offset, seq, delta;
+ __wsum csum;
+ int len;
+
+ if (!csum_reqd)
+ return MAPPING_OK;
+
+ /* mapping already validated on previous traversal */
+ if (subflow->map_csum_len == subflow->map_data_len)
+ return MAPPING_OK;
+
+ /* traverse the receive queue, ensuring it contains a full
+ * DSS mapping and accumulating the related csum.
+ * Preserve the accoumlate csum across multiple calls, to compute
+ * the csum only once
+ */
+ delta = subflow->map_data_len - subflow->map_csum_len;
+ for (;;) {
+ seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
+ offset = seq - TCP_SKB_CB(skb)->seq;
+
+ /* if the current skb has not been accounted yet, csum its contents
+ * up to the amount covered by the current DSS
+ */
+ if (offset < skb->len) {
+ __wsum csum;
+
+ len = min(skb->len - offset, delta);
+ csum = skb_checksum(skb, offset, len, 0);
+ subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
+ subflow->map_csum_len);
+
+ delta -= len;
+ subflow->map_csum_len += len;
+ }
+ if (delta == 0)
+ break;
+
+ if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
+ /* if this subflow is closed, the partial mapping
+ * will be never completed; flush the pending skbs, so
+ * that subflow_sched_work_if_closed() can kick in
+ */
+ if (unlikely(ssk->sk_state == TCP_CLOSE))
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
+
+ /* not enough data to validate the csum */
+ return MAPPING_EMPTY;
+ }
+
+ /* the DSS mapping for next skbs will be validated later,
+ * when a get_mapping_status call will process such skb
+ */
+ skb = skb->next;
+ }
+
+ /* note that 'map_data_len' accounts only for the carried data, does
+ * not include the eventual seq increment due to the data fin,
+ * while the pseudo header requires the original DSS data len,
+ * including that
+ */
+ header.data_seq = cpu_to_be64(subflow->map_seq);
+ header.subflow_seq = htonl(subflow->map_subflow_seq);
+ header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
+ header.csum = 0;
+
+ csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
+ if (unlikely(csum_fold(csum))) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
+ return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ }
+
+ return MAPPING_OK;
+}
+
static enum mapping_status get_mapping_status(struct sock *ssk,
struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ bool csum_reqd = READ_ONCE(msk->csum_enabled);
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
@@ -867,7 +954,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
- pr_err("Infinite mapping not handled");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
return MAPPING_INVALID;
}
@@ -922,9 +1008,10 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
/* Allow replacing only with an identical map */
if (subflow->map_seq == map_seq &&
subflow->map_subflow_seq == mpext->subflow_seq &&
- subflow->map_data_len == data_len) {
+ subflow->map_data_len == data_len &&
+ subflow->map_csum_reqd == mpext->csum_reqd) {
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+ goto validate_csum;
}
/* If this skb data are fully covered by the current mapping,
@@ -936,27 +1023,41 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
}
/* will validate the next map after consuming the current one */
- return MAPPING_OK;
+ goto validate_csum;
}
subflow->map_seq = map_seq;
subflow->map_subflow_seq = mpext->subflow_seq;
subflow->map_data_len = data_len;
subflow->map_valid = 1;
+ subflow->map_data_fin = mpext->data_fin;
subflow->mpc_map = mpext->mpc_map;
- pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
+ subflow->map_csum_reqd = mpext->csum_reqd;
+ subflow->map_csum_len = 0;
+ subflow->map_data_csum = csum_unfold(mpext->csum);
+
+ /* Cfr RFC 8684 Section 3.3.0 */
+ if (unlikely(subflow->map_csum_reqd != csum_reqd))
+ return MAPPING_INVALID;
+
+ pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
subflow->map_seq, subflow->map_subflow_seq,
- subflow->map_data_len);
+ subflow->map_data_len, subflow->map_csum_reqd,
+ subflow->map_data_csum);
validate_seq:
/* we revalidate valid mapping on new skb, because we must ensure
* the current skb is completely covered by the available mapping
*/
- if (!validate_mapping(ssk, skb))
+ if (!validate_mapping(ssk, skb)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSTCPMISMATCH);
return MAPPING_INVALID;
+ }
skb_ext_del(skb, SKB_EXT_MPTCP);
- return MAPPING_OK;
+
+validate_csum:
+ return validate_data_csum(ssk, skb, csum_reqd);
}
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
@@ -1002,7 +1103,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
if (subflow->data_avail)
return true;
@@ -1013,21 +1114,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (status == MAPPING_INVALID) {
- ssk->sk_err = EBADMSG;
- goto fatal;
- }
- if (status == MAPPING_DUMMY) {
- __mptcp_do_fallback(msk);
- skb = skb_peek(&ssk->sk_receive_queue);
- subflow->map_valid = 1;
- subflow->map_seq = READ_ONCE(msk->ack_seq);
- subflow->map_data_len = skb->len;
- subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
- subflow->ssn_offset;
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
- return true;
- }
+ if (unlikely(status == MAPPING_INVALID))
+ goto fallback;
+
+ if (unlikely(status == MAPPING_DUMMY))
+ goto fallback;
if (status != MAPPING_OK)
goto no_data;
@@ -1040,10 +1131,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
* MP_CAPABLE-based mapping
*/
if (unlikely(!READ_ONCE(msk->can_ack))) {
- if (!subflow->mpc_map) {
- ssk->sk_err = EBADMSG;
- goto fatal;
- }
+ if (!subflow->mpc_map)
+ goto fallback;
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->ack_seq, subflow->map_seq);
WRITE_ONCE(msk->can_ack, true);
@@ -1053,35 +1142,43 @@ static bool subflow_check_data_avail(struct sock *ssk)
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
ack_seq);
- if (ack_seq == old_ack) {
- subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
- break;
- } else if (after64(ack_seq, old_ack)) {
- subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
- break;
+ if (unlikely(before64(ack_seq, old_ack))) {
+ mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ continue;
}
- /* only accept in-sequence mapping. Old values are spurious
- * retransmission
- */
- mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ break;
}
return true;
no_data:
subflow_sched_work_if_closed(msk, ssk);
return false;
-fatal:
- /* fatal protocol error, close the socket */
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
- ssk->sk_error_report(ssk);
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- subflow->data_avail = 0;
- return false;
+
+fallback:
+ /* RFC 8684 section 3.7. */
+ if (subflow->mp_join || subflow->fully_established) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, 0);
+ return false;
+ }
+
+ __mptcp_do_fallback(msk);
+ skb = skb_peek(&ssk->sk_receive_queue);
+ subflow->map_valid = 1;
+ subflow->map_seq = READ_ONCE(msk->ack_seq);
+ subflow->map_data_len = skb->len;
+ subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ return true;
}
bool mptcp_subflow_data_available(struct sock *sk)
@@ -1092,7 +1189,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- subflow->data_avail = 0;
+ WRITE_ONCE(subflow->data_avail, 0);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1120,41 +1217,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
*full_space = tcp_full_space(sk);
}
-static void subflow_data_ready(struct sock *sk)
-{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- u16 state = 1 << inet_sk_state_load(sk);
- struct sock *parent = subflow->conn;
- struct mptcp_sock *msk;
-
- msk = mptcp_sk(parent);
- if (state & TCPF_LISTEN) {
- /* MPJ subflow are removed from accept queue before reaching here,
- * avoid stray wakeups
- */
- if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
- return;
-
- set_bit(MPTCP_DATA_READY, &msk->flags);
- parent->sk_data_ready(parent);
- return;
- }
-
- WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
- !subflow->mp_join && !(state & TCPF_CLOSE));
-
- if (mptcp_subflow_data_available(sk))
- mptcp_data_ready(parent, sk);
-}
-
-static void subflow_write_space(struct sock *ssk)
-{
- struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
-
- mptcp_propagate_sndbuf(sk, ssk);
- mptcp_write_space(sk);
-}
-
void __mptcp_error_report(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -1195,6 +1257,43 @@ static void subflow_error_report(struct sock *ssk)
mptcp_data_unlock(sk);
}
+static void subflow_data_ready(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ u16 state = 1 << inet_sk_state_load(sk);
+ struct sock *parent = subflow->conn;
+ struct mptcp_sock *msk;
+
+ msk = mptcp_sk(parent);
+ if (state & TCPF_LISTEN) {
+ /* MPJ subflow are removed from accept queue before reaching here,
+ * avoid stray wakeups
+ */
+ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ return;
+
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+ parent->sk_data_ready(parent);
+ return;
+ }
+
+ WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ !subflow->mp_join && !(state & TCPF_CLOSE));
+
+ if (mptcp_subflow_data_available(sk))
+ mptcp_data_ready(parent, sk);
+ else if (unlikely(sk->sk_err))
+ subflow_error_report(sk);
+}
+
+static void subflow_write_space(struct sock *ssk)
+{
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_propagate_sndbuf(sk, ssk);
+ mptcp_write_space(sk);
+}
+
static struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
@@ -1505,6 +1604,8 @@ static void subflow_state_change(struct sock *sk)
*/
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
+ else if (unlikely(sk->sk_err))
+ subflow_error_report(sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);