aboutsummaryrefslogtreecommitdiff
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2024-06-25 11:10:20 +0200
committerPaolo Abeni <pabeni@redhat.com>2024-06-25 11:10:21 +0200
commit7e7c714a36a5b10e391168e7e8145060e041ea12 (patch)
tree1fb44e4934b36a9e237bb5da8f98fe2d61869d6f /net/unix/af_unix.c
parentbf2468f9afba8001c7432d104756a5dd3537bc76 (diff)
parent22e5751b0524fedd4f345412d8d3394387471ab7 (diff)
downloadlinux-7e7c714a36a5b10e391168e7e8145060e041ea12.tar.gz
linux-7e7c714a36a5b10e391168e7e8145060e041ea12.tar.bz2
linux-7e7c714a36a5b10e391168e7e8145060e041ea12.zip
Merge branch 'af_unix-remove-spin_lock_nested-and-convert-to-lock_cmp_fn'
Kuniyuki Iwashima says: ==================== af_unix: Remove spin_lock_nested() and convert to lock_cmp_fn. This series removes spin_lock_nested() in AF_UNIX and instead defines the locking orders as functions tied to each lock by lockdep_set_lock_cmp_fn(). When the defined function returns a negative value, lockdep considers it will not cause deadlock. (See ->cmp_fn() in check_deadlock() and check_prev_add().) When we cannot define the total ordering, we return -1 for the allowed ordering and otherwise 0 as undefined. [0] [0]: https://lore.kernel.org/netdev/thzkgbuwuo3knevpipu4rzsh5qgmwhklihypdgziiruabvh46f@uwdkpcfxgloo/ Changes: v4: * Patch 4 * Make unix_state_lock_cmp_fn() symmetric. v3: https://lore.kernel.org/netdev/20240614200715.93150-1-kuniyu@amazon.com/ * Patch 3 * Cache sk->sk_state * s/unix_state_lock()/unix_state_unlock()/ * Patch 8 * Add embryo -> listener locking order v2: https://lore.kernel.org/netdev/20240611222905.34695-1-kuniyu@amazon.com/ * Patch 1 & 2 * Use (((l) > (r)) - ((l) < (r))) for comparison v1: https://lore.kernel.org/netdev/20240610223501.73191-1-kuniyu@amazon.com/ ==================== Link: https://lore.kernel.org/r/20240620205623.60139-1-kuniyu@amazon.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c151
1 files changed, 103 insertions, 48 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e9c941e6a464..103a7909cb1a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -126,6 +126,81 @@ static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2];
* hash table is protected with spinlock.
* each socket state is protected by separate spinlock.
*/
+#ifdef CONFIG_PROVE_LOCKING
+#define cmp_ptr(l, r) (((l) > (r)) - ((l) < (r)))
+
+static int unix_table_lock_cmp_fn(const struct lockdep_map *a,
+ const struct lockdep_map *b)
+{
+ return cmp_ptr(a, b);
+}
+
+static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
+ const struct lockdep_map *_b)
+{
+ const struct unix_sock *a, *b;
+
+ a = container_of(_a, struct unix_sock, lock.dep_map);
+ b = container_of(_b, struct unix_sock, lock.dep_map);
+
+ if (a->sk.sk_state == TCP_LISTEN) {
+ /* unix_stream_connect(): Before the 2nd unix_state_lock(),
+ *
+ * 1. a is TCP_LISTEN.
+ * 2. b is not a.
+ * 3. concurrent connect(b -> a) must fail.
+ *
+ * Except for 2. & 3., the b's state can be any possible
+ * value due to concurrent connect() or listen().
+ *
+ * 2. is detected in debug_spin_lock_before(), and 3. cannot
+ * be expressed as lock_cmp_fn.
+ */
+ switch (b->sk.sk_state) {
+ case TCP_CLOSE:
+ case TCP_ESTABLISHED:
+ case TCP_LISTEN:
+ return -1;
+ default:
+ /* Invalid case. */
+ return 0;
+ }
+ }
+
+ /* Should never happen. Just to be symmetric. */
+ if (b->sk.sk_state == TCP_LISTEN) {
+ switch (b->sk.sk_state) {
+ case TCP_CLOSE:
+ case TCP_ESTABLISHED:
+ return 1;
+ default:
+ return 0;
+ }
+ }
+
+ /* unix_state_double_lock(): ascending address order. */
+ return cmp_ptr(a, b);
+}
+
+static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
+ const struct lockdep_map *_b)
+{
+ const struct sock *a, *b;
+
+ a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
+ b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
+
+ /* unix_collect_skb(): listener -> embryo order. */
+ if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
+ return -1;
+
+ /* Should never happen. Just to be symmetric. */
+ if (b->sk_state == TCP_LISTEN && unix_sk(a)->listener == b)
+ return 1;
+
+ return 0;
+}
+#endif
static unsigned int unix_unbound_hash(struct sock *sk)
{
@@ -168,7 +243,7 @@ static void unix_table_double_lock(struct net *net,
swap(hash1, hash2);
spin_lock(&net->unx.table.locks[hash1]);
- spin_lock_nested(&net->unx.table.locks[hash2], SINGLE_DEPTH_NESTING);
+ spin_lock(&net->unx.table.locks[hash2]);
}
static void unix_table_double_unlock(struct net *net,
@@ -676,14 +751,19 @@ static void unix_release_sock(struct sock *sk, int embrion)
static void init_peercred(struct sock *sk)
{
+ sk->sk_peer_pid = get_pid(task_tgid(current));
+ sk->sk_peer_cred = get_current_cred();
+}
+
+static void update_peercred(struct sock *sk)
+{
const struct cred *old_cred;
struct pid *old_pid;
spin_lock(&sk->sk_peer_lock);
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
- sk->sk_peer_pid = get_pid(task_tgid(current));
- sk->sk_peer_cred = get_current_cred();
+ init_peercred(sk);
spin_unlock(&sk->sk_peer_lock);
put_pid(old_pid);
@@ -692,26 +772,12 @@ static void init_peercred(struct sock *sk)
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- const struct cred *old_cred;
- struct pid *old_pid;
+ lockdep_assert_held(&unix_sk(peersk)->lock);
- if (sk < peersk) {
- spin_lock(&sk->sk_peer_lock);
- spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
- } else {
- spin_lock(&peersk->sk_peer_lock);
- spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
- }
- old_pid = sk->sk_peer_pid;
- old_cred = sk->sk_peer_cred;
- sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
+ spin_lock(&sk->sk_peer_lock);
+ sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
-
spin_unlock(&sk->sk_peer_lock);
- spin_unlock(&peersk->sk_peer_lock);
-
- put_pid(old_pid);
- put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -735,7 +801,7 @@ static int unix_listen(struct socket *sock, int backlog)
WRITE_ONCE(sk->sk_state, TCP_LISTEN);
/* set credentials so connect can copy them */
- init_peercred(sk);
+ update_peercred(sk);
err = 0;
out_unlock:
@@ -972,12 +1038,15 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
sk->sk_destruct = unix_sock_destructor;
+ lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
+
u = unix_sk(sk);
u->listener = NULL;
u->vertex = NULL;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
+ lock_set_cmp_fn(&u->lock, unix_state_lock_cmp_fn, NULL);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
@@ -1326,11 +1395,12 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
+
if (sk1 > sk2)
swap(sk1, sk2);
unix_state_lock(sk1);
- unix_state_lock_nested(sk2, U_LOCK_SECOND);
+ unix_state_lock(sk2);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1473,6 +1543,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
+ unsigned char state;
long timeo;
int err;
@@ -1523,7 +1594,6 @@ restart:
goto out;
}
- /* Latch state of peer */
unix_state_lock(other);
/* Apparently VFS overslept socket death. Retry. */
@@ -1553,37 +1623,21 @@ restart:
goto restart;
}
- /* Latch our state.
-
- It is tricky place. We need to grab our state lock and cannot
- drop lock on peer. It is dangerous because deadlock is
- possible. Connect to self case and simultaneous
- attempt to connect are eliminated by checking socket
- state. other is TCP_LISTEN, if sk is TCP_LISTEN we
- check this before attempt to grab lock.
-
- Well, and we have to recheck the state after socket locked.
+ /* self connect and simultaneous connect are eliminated
+ * by rejecting TCP_LISTEN socket to avoid deadlock.
*/
- switch (READ_ONCE(sk->sk_state)) {
- case TCP_CLOSE:
- /* This is ok... continue with connect */
- break;
- case TCP_ESTABLISHED:
- /* Socket is already connected */
- err = -EISCONN;
- goto out_unlock;
- default:
- err = -EINVAL;
+ state = READ_ONCE(sk->sk_state);
+ if (unlikely(state != TCP_CLOSE)) {
+ err = state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
goto out_unlock;
}
- unix_state_lock_nested(sk, U_LOCK_SECOND);
+ unix_state_lock(sk);
- if (sk->sk_state != TCP_CLOSE) {
+ if (unlikely(sk->sk_state != TCP_CLOSE)) {
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EINVAL;
unix_state_unlock(sk);
- unix_state_unlock(other);
- sock_put(other);
- goto restart;
+ goto out_unlock;
}
err = security_unix_stream_connect(sk, other, newsk);
@@ -3578,6 +3632,7 @@ static int __net_init unix_net_init(struct net *net)
for (i = 0; i < UNIX_HASH_SIZE; i++) {
spin_lock_init(&net->unx.table.locks[i]);
+ lock_set_cmp_fn(&net->unx.table.locks[i], unix_table_lock_cmp_fn, NULL);
INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
}