aboutsummaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c312
1 files changed, 146 insertions, 166 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 29a2d690d8d5..7bb82424e551 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -39,10 +39,15 @@ struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
+ u8 has_rxtstamp:1;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+enum {
+ MPTCP_CMSG_TS = BIT(0),
+};
+
static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk);
@@ -272,6 +277,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = (struct sock *)msk;
struct sk_buff *tail;
+ bool has_rxtstamp;
__skb_unlink(skb, &ssk->sk_receive_queue);
@@ -280,13 +286,17 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
/* try to fetch required memory from subflow */
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- if (ssk->sk_forward_alloc < skb->truesize)
- goto drop;
- __sk_mem_reclaim(ssk, skb->truesize);
- if (!sk_rmem_schedule(sk, skb, skb->truesize))
+ int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+
+ if (ssk->sk_forward_alloc < amount)
goto drop;
+
+ ssk->sk_forward_alloc -= amount;
+ sk->sk_forward_alloc += amount;
}
+ has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+
/* the skb map_seq accounts for the skb offset:
* mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq
* value
@@ -294,6 +304,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow);
MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
MPTCP_SKB_CB(skb)->offset = offset;
+ MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
@@ -422,56 +433,55 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
- lock_sock(ssk);
+ slow = lock_sock_fast(ssk);
if (tcp_can_send_ack(ssk))
tcp_send_ack(ssk);
- release_sock(ssk);
+ unlock_sock_fast(ssk, slow);
}
}
-static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
+static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
- int ret;
+ bool slow;
- lock_sock(ssk);
- ret = tcp_can_send_ack(ssk);
- if (ret)
+ slow = lock_sock_fast(ssk);
+ if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
- release_sock(ssk);
- return ret;
+ unlock_sock_fast(ssk, slow);
+}
+
+static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
+{
+ const struct inet_connection_sock *icsk = inet_csk(ssk);
+ u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending);
+ const struct tcp_sock *tp = tcp_sk(ssk);
+
+ return (ack_pending & ICSK_ACK_SCHED) &&
+ ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
+ READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
+ (rx_empty && ack_pending &
+ (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
- struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
- bool cleanup;
+ int space = __mptcp_space(sk);
+ bool cleanup, rx_empty;
- /* this is a simple superset of what tcp_cleanup_rbuf() implements
- * so that we don't have to acquire the ssk socket lock most of the time
- * to do actually nothing
- */
- cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
- if (!cleanup)
- return;
+ cleanup = (space > 0) && (space >= (old_space << 1));
+ rx_empty = !atomic_read(&sk->sk_rmem_alloc);
- /* if the hinted ssk is still active, try to use it */
- if (likely(ack_hint)) {
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
- return;
- }
+ if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
+ mptcp_subflow_cleanup_rbuf(ssk);
}
-
- /* otherwise pick the first active subflow */
- mptcp_for_each_subflow(msk, subflow)
- if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
- return;
}
static bool mptcp_check_data_fin(struct sock *sk)
@@ -616,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
- WRITE_ONCE(msk->ack_hint, ssk);
*bytes += moved;
return done;
@@ -668,18 +677,19 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
-static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
- if (inet_sk_state_load(sk) == TCP_CLOSE)
- return;
-
- mptcp_data_lock(sk);
-
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
__mptcp_ofo_queue(msk);
+ if (unlikely(ssk->sk_err)) {
+ if (!sock_owned_by_user(sk))
+ __mptcp_error_report(sk);
+ else
+ set_bit(MPTCP_ERROR_REPORT, &msk->flags);
+ }
/* If the moves have caught up with the DATA_FIN sequence number
* it's time to ack the DATA_FIN and change socket state, but
@@ -688,7 +698,7 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
*/
if (mptcp_pending_data_fin(sk, NULL))
mptcp_schedule_work(sk);
- mptcp_data_unlock(sk);
+ return moved > 0;
}
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@ -696,7 +706,6 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(sk);
int sk_rbuf, ssk_rbuf;
- bool wake;
/* The peer can send data while we are shutting down this
* subflow at msk destruction time, but we must avoid enqueuing
@@ -705,28 +714,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
if (unlikely(subflow->disposable))
return;
- /* move_skbs_to_msk below can legitly clear the data_avail flag,
- * but we will need later to properly woke the reader, cache its
- * value
- */
- wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
- if (wake)
- set_bit(MPTCP_DATA_READY, &msk->flags);
-
ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
if (unlikely(ssk_rbuf > sk_rbuf))
sk_rbuf = ssk_rbuf;
- /* over limit? can't append more skbs to msk */
+ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
- goto wake;
-
- move_skbs_to_msk(msk, ssk);
+ return;
-wake:
- if (wake)
+ /* Wake-up the reader only for in-sequence data */
+ mptcp_data_lock(sk);
+ if (move_skbs_to_msk(msk, ssk)) {
+ set_bit(MPTCP_DATA_READY, &msk->flags);
sk->sk_data_ready(sk);
+ }
+ mptcp_data_unlock(sk);
}
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -858,7 +861,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
sock_owned_by_me(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->data_avail)
+ if (READ_ONCE(subflow->data_avail))
return mptcp_subflow_tcp_sock(subflow);
}
@@ -879,31 +882,29 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq,
!mpext->frozen;
}
+/* we can append data to the given data frag if:
+ * - there is space available in the backing page_frag
+ * - the data frag tail matches the current page_frag free offset
+ * - the data frag end sequence number matches the current write seq
+ */
static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
const struct page_frag *pfrag,
const struct mptcp_data_frag *df)
{
return df && pfrag->page == df->page &&
pfrag->size - pfrag->offset > 0 &&
+ pfrag->offset == (df->offset + df->data_len) &&
df->data_seq + df->data_len == msk->write_seq;
}
-static int mptcp_wmem_with_overhead(struct sock *sk, int size)
+static int mptcp_wmem_with_overhead(int size)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- int ret, skbs;
-
- ret = size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
- skbs = (msk->tx_pending_data + size) / msk->size_goal_cache;
- if (skbs < msk->skb_tx_cache.qlen)
- return ret;
-
- return ret + (skbs - msk->skb_tx_cache.qlen) * SKB_TRUESIZE(MAX_TCP_HEADER);
+ return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
}
static void __mptcp_wmem_reserve(struct sock *sk, int size)
{
- int amount = mptcp_wmem_with_overhead(sk, size);
+ int amount = mptcp_wmem_with_overhead(size);
struct mptcp_sock *msk = mptcp_sk(sk);
WARN_ON_ONCE(msk->wmem_reserved);
@@ -941,6 +942,10 @@ static void __mptcp_update_wmem(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
+
if (!msk->wmem_reserved)
return;
@@ -1079,10 +1084,20 @@ out:
static void __mptcp_clean_una_wakeup(struct sock *sk)
{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(!lockdep_is_held(&sk->sk_lock.slock));
+#endif
__mptcp_clean_una(sk);
mptcp_write_space(sk);
}
+static void mptcp_clean_una_wakeup(struct sock *sk)
+{
+ mptcp_data_lock(sk);
+ __mptcp_clean_una_wakeup(sk);
+ mptcp_data_unlock(sk);
+}
+
static void mptcp_enter_memory_pressure(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -1184,49 +1199,8 @@ static struct sk_buff *__mptcp_do_alloc_tx_skb(struct sock *sk, gfp_t gfp)
return NULL;
}
-static bool mptcp_tx_cache_refill(struct sock *sk, int size,
- struct sk_buff_head *skbs, int *total_ts)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct sk_buff *skb;
- int space_needed;
-
- if (unlikely(tcp_under_memory_pressure(sk))) {
- mptcp_mem_reclaim_partial(sk);
-
- /* under pressure pre-allocate at most a single skb */
- if (msk->skb_tx_cache.qlen)
- return true;
- space_needed = msk->size_goal_cache;
- } else {
- space_needed = msk->tx_pending_data + size -
- msk->skb_tx_cache.qlen * msk->size_goal_cache;
- }
-
- while (space_needed > 0) {
- skb = __mptcp_do_alloc_tx_skb(sk, sk->sk_allocation);
- if (unlikely(!skb)) {
- /* under memory pressure, try to pass the caller a
- * single skb to allow forward progress
- */
- while (skbs->qlen > 1) {
- skb = __skb_dequeue_tail(skbs);
- *total_ts -= skb->truesize;
- __kfree_skb(skb);
- }
- return skbs->qlen > 0;
- }
-
- *total_ts += skb->truesize;
- __skb_queue_tail(skbs, skb);
- space_needed -= msk->size_goal_cache;
- }
- return true;
-}
-
static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
struct sk_buff *skb;
if (ssk->sk_tx_skb_cache) {
@@ -1237,22 +1211,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
return true;
}
- skb = skb_peek(&msk->skb_tx_cache);
- if (skb) {
- if (likely(sk_wmem_schedule(ssk, skb->truesize))) {
- skb = __skb_dequeue(&msk->skb_tx_cache);
- if (WARN_ON_ONCE(!skb))
- return false;
-
- mptcp_wmem_uncharge(sk, skb->truesize);
- ssk->sk_tx_skb_cache = skb;
- return true;
- }
-
- /* over memory limit, no point to try to allocate a new skb */
- return false;
- }
-
skb = __mptcp_do_alloc_tx_skb(sk, gfp);
if (!skb)
return false;
@@ -1268,7 +1226,6 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
{
return !ssk->sk_tx_skb_cache &&
- !skb_peek(&mptcp_sk(sk)->skb_tx_cache) &&
tcp_under_memory_pressure(sk);
}
@@ -1279,6 +1236,18 @@ static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
}
+/* note: this always recompute the csum on the whole skb, even
+ * if we just appended a single frag. More status info needed
+ */
+static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
+{
+ struct mptcp_ext *mpext = mptcp_get_ext(skb);
+ __wsum csum = ~csum_unfold(mpext->csum);
+ int offset = skb->len - added;
+
+ mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
+}
+
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
struct mptcp_data_frag *dfrag,
struct mptcp_sendmsg_info *info)
@@ -1299,7 +1268,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
/* compute send limit */
info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags);
avail_size = info->size_goal;
- msk->size_goal_cache = info->size_goal;
skb = tcp_write_queue_tail(ssk);
if (skb) {
/* Limit the write to the size available in the
@@ -1373,10 +1341,14 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (zero_window_probe) {
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
mpext->frozen = 1;
- ret = 0;
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
tcp_push_pending_frames(ssk);
+ return 0;
}
out:
+ if (READ_ONCE(msk->csum_enabled))
+ mptcp_update_data_checksum(tail, ret);
mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
return ret;
}
@@ -1644,7 +1616,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
while (msg_data_left(msg)) {
int total_ts, frag_truesize = 0;
struct mptcp_data_frag *dfrag;
- struct sk_buff_head skbs;
bool dfrag_collapsed;
size_t psize, offset;
@@ -1677,16 +1648,10 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
psize = pfrag->size - offset;
psize = min_t(size_t, psize, msg_data_left(msg));
total_ts = psize + frag_truesize;
- __skb_queue_head_init(&skbs);
- if (!mptcp_tx_cache_refill(sk, psize, &skbs, &total_ts))
- goto wait_for_memory;
- if (!mptcp_wmem_alloc(sk, total_ts)) {
- __skb_queue_purge(&skbs);
+ if (!mptcp_wmem_alloc(sk, total_ts))
goto wait_for_memory;
- }
- skb_queue_splice_tail(&skbs, &msk->skb_tx_cache);
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
mptcp_wmem_uncharge(sk, psize + frag_truesize);
@@ -1743,7 +1708,7 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
sk_wait_event(sk, timeo,
- test_and_clear_bit(MPTCP_DATA_READY, &msk->flags), &wait);
+ test_bit(MPTCP_DATA_READY, &msk->flags), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
@@ -1751,7 +1716,9 @@ static void mptcp_wait_data(struct sock *sk, long *timeo)
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
struct msghdr *msg,
- size_t len, int flags)
+ size_t len, int flags,
+ struct scm_timestamping_internal *tss,
+ int *cmsg_flags)
{
struct sk_buff *skb, *tmp;
int copied = 0;
@@ -1771,6 +1738,11 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
}
}
+ if (MPTCP_SKB_CB(skb)->has_rxtstamp) {
+ tcp_update_recv_tstamps(skb, tss);
+ *cmsg_flags |= MPTCP_CMSG_TS;
+ }
+
copied += count;
if (count < data_len) {
@@ -1934,7 +1906,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
- tcp_cleanup_rbuf(ssk, moved);
+
+ if (unlikely(ssk->sk_err))
+ __mptcp_error_report(sk);
unlock_sock_fast(ssk, slowpath);
} while (!done);
@@ -1947,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
- mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@@ -1958,7 +1931,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- int copied = 0;
+ struct scm_timestamping_internal tss;
+ int copied = 0, cmsg_flags = 0;
int target;
long timeo;
@@ -1980,7 +1954,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
while (copied < len) {
int bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags);
+ bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
if (!copied)
copied = bytes_read;
@@ -2056,11 +2030,14 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
*/
if (unlikely(__mptcp_move_skbs(msk)))
set_bit(MPTCP_DATA_READY, &msk->flags);
- } else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
- /* data to read but mptcp_wait_data() cleared DATA_READY */
- set_bit(MPTCP_DATA_READY, &msk->flags);
}
+
out_err:
+ if (cmsg_flags && copied >= 0) {
+ if (cmsg_flags & MPTCP_CMSG_TS)
+ tcp_recv_timestamp(msg, sk, &tss);
+ }
+
pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
msk, test_bit(MPTCP_DATA_READY, &msk->flags),
skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
@@ -2192,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;
- if (ssk == msk->ack_hint)
- msk->ack_hint = NULL;
-
if (ssk == msk->first)
msk->first = NULL;
@@ -2266,13 +2240,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
+ bool slow;
- lock_sock(tcp_sk);
+ slow = lock_sock_fast(tcp_sk);
if (tcp_sk->sk_state != TCP_CLOSE) {
tcp_send_active_reset(tcp_sk, GFP_ATOMIC);
tcp_set_state(tcp_sk, TCP_CLOSE);
}
- release_sock(tcp_sk);
+ unlock_sock_fast(tcp_sk, slow);
}
inet_sk_state_store(sk, TCP_CLOSE);
@@ -2293,7 +2268,7 @@ static void __mptcp_retrans(struct sock *sk)
struct sock *ssk;
int ret;
- __mptcp_clean_una_wakeup(sk);
+ mptcp_clean_una_wakeup(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag) {
if (mptcp_data_fin_enabled(msk)) {
@@ -2317,8 +2292,8 @@ static void __mptcp_retrans(struct sock *sk)
/* limit retransmission to the bytes already sent on some subflows */
info.sent = 0;
- info.limit = dfrag->already_sent;
- while (info.sent < dfrag->already_sent) {
+ info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
+ while (info.sent < info.limit) {
if (!mptcp_alloc_tx_skb(sk, ssk))
break;
@@ -2330,9 +2305,11 @@ static void __mptcp_retrans(struct sock *sk)
copied += ret;
info.sent += ret;
}
- if (copied)
+ if (copied) {
+ dfrag->already_sent = max(dfrag->already_sent, info.sent);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
+ }
mptcp_set_timeout(sk, ssk);
release_sock(ssk);
@@ -2400,17 +2377,15 @@ static int __mptcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&msk->rtx_queue);
INIT_WORK(&msk->work, mptcp_worker);
__skb_queue_head_init(&msk->receive_queue);
- __skb_queue_head_init(&msk->skb_tx_cache);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
msk->wmem_reserved = 0;
msk->rmem_released = 0;
msk->tx_pending_data = 0;
- msk->size_goal_cache = TCP_BASE_MSS;
- msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
+ WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk);
@@ -2418,13 +2393,12 @@ static int __mptcp_init_sock(struct sock *sk)
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
- tcp_assign_congestion_control(sk);
-
return 0;
}
static int mptcp_init_sock(struct sock *sk)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
int ret;
@@ -2442,6 +2416,16 @@ static int mptcp_init_sock(struct sock *sk)
if (ret)
return ret;
+ /* fetch the ca name; do it outside __mptcp_init_sock(), so that clone will
+ * propagate the correct value
+ */
+ tcp_assign_congestion_control(sk);
+ strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name);
+
+ /* no need to keep a reference to the ops, the name will suffice */
+ tcp_cleanup_congestion_control(sk);
+ icsk->icsk_ca_ops = NULL;
+
sk_sockets_allocated_inc(sk);
sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
@@ -2453,15 +2437,10 @@ static void __mptcp_clear_xmit(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct mptcp_data_frag *dtmp, *dfrag;
- struct sk_buff *skb;
WRITE_ONCE(msk->first_pending, NULL);
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list)
dfrag_clear(sk, dfrag);
- while ((skb = __skb_dequeue(&msk->skb_tx_cache)) != NULL) {
- sk->sk_forward_alloc += skb->truesize;
- kfree_skb(skb);
- }
}
static void mptcp_cancel_work(struct sock *sk)
@@ -2616,7 +2595,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
- tcp_cleanup_congestion_control(sk);
sk_refcnt_debug_release(sk);
mptcp_dispose_initial_subflow(msk);
sock_put(sk);
@@ -2743,6 +2721,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
+ if (mp_opt->csum_reqd)
+ WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;