aboutsummaryrefslogtreecommitdiff
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c241
1 files changed, 167 insertions, 74 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9a0ae7172f92..8f060d7f9a0e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -84,6 +84,7 @@ struct listeners {
#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
#define NETLINK_F_RECV_NO_ENOBUFS 0x8
#define NETLINK_F_LISTEN_ALL_NSID 0x10
+#define NETLINK_F_CAP_ACK 0x20
static inline int netlink_is_kernel(struct sock *sk)
{
@@ -124,6 +125,24 @@ static inline u32 netlink_group_mask(u32 group)
return group ? 1 << (group - 1) : 0;
}
+static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
+ gfp_t gfp_mask)
+{
+ unsigned int len = skb_end_offset(skb);
+ struct sk_buff *new;
+
+ new = alloc_skb(len, gfp_mask);
+ if (new == NULL)
+ return NULL;
+
+ NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
+ NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
+ NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
+
+ memcpy(skb_put(new, len), skb->data, len);
+ return new;
+}
+
int netlink_add_tap(struct netlink_tap *nt)
{
if (unlikely(nt->dev->type != ARPHRD_NETLINK))
@@ -205,7 +224,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
int ret = -ENOMEM;
dev_hold(dev);
- nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
+ nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
+ else
+ nskb = skb_clone(skb, GFP_ATOMIC);
if (nskb) {
nskb->dev = dev;
nskb->protocol = htons((u16) sk->sk_protocol);
@@ -278,11 +301,6 @@ static void netlink_rcv_wake(struct sock *sk)
}
#ifdef CONFIG_NETLINK_MMAP
-static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
-{
- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
-}
-
static bool netlink_rx_is_mmaped(struct sock *sk)
{
return nlk_sk(sk)->rx_ring.pg_vec != NULL;
@@ -357,25 +375,52 @@ err1:
return NULL;
}
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+ unsigned int order)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct sk_buff_head *queue;
+ struct netlink_ring *ring;
+
+ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+ spin_lock_bh(&queue->lock);
+
+ ring->frame_max = req->nm_frame_nr - 1;
+ ring->head = 0;
+ ring->frame_size = req->nm_frame_size;
+ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+
+ swap(ring->pg_vec_len, req->nm_block_nr);
+ swap(ring->pg_vec_order, order);
+ swap(ring->pg_vec, pg_vec);
+
+ __skb_queue_purge(queue);
+ spin_unlock_bh(&queue->lock);
+
+ WARN_ON(atomic_read(&nlk->mapped));
+
+ if (pg_vec)
+ free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool closing, bool tx_ring)
+ bool tx_ring)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ring *ring;
- struct sk_buff_head *queue;
void **pg_vec = NULL;
unsigned int order = 0;
- int err;
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- if (!closing) {
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
- }
+ if (atomic_read(&nlk->mapped))
+ return -EBUSY;
+ if (atomic_read(&ring->pending))
+ return -EBUSY;
if (req->nm_block_nr) {
if (ring->pg_vec != NULL)
@@ -407,31 +452,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
return -EINVAL;
}
- err = -EBUSY;
mutex_lock(&nlk->pg_vec_lock);
- if (closing || atomic_read(&nlk->mapped) == 0) {
- err = 0;
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
+ if (atomic_read(&nlk->mapped) == 0) {
+ __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+ mutex_unlock(&nlk->pg_vec_lock);
+ return 0;
}
+
mutex_unlock(&nlk->pg_vec_lock);
if (pg_vec)
free_pg_vec(pg_vec, order, req->nm_block_nr);
- return err;
+
+ return -EBUSY;
}
static void netlink_mm_open(struct vm_area_struct *vma)
@@ -578,16 +611,6 @@ netlink_current_frame(const struct netlink_ring *ring,
return netlink_lookup_frame(ring, ring->head, status);
}
-static struct nl_mmap_hdr *
-netlink_previous_frame(const struct netlink_ring *ring,
- enum nl_mmap_status status)
-{
- unsigned int prev;
-
- prev = ring->head ? ring->head - 1 : ring->frame_max;
- return netlink_lookup_frame(ring, prev, status);
-}
-
static void netlink_increment_head(struct netlink_ring *ring)
{
ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
@@ -595,11 +618,11 @@ static void netlink_increment_head(struct netlink_ring *ring)
static void netlink_forward_ring(struct netlink_ring *ring)
{
- unsigned int head = ring->head, pos = head;
+ unsigned int head = ring->head;
const struct nl_mmap_hdr *hdr;
do {
- hdr = __netlink_lookup_frame(ring, pos);
+ hdr = __netlink_lookup_frame(ring, ring->head);
if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
break;
if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
@@ -608,6 +631,21 @@ static void netlink_forward_ring(struct netlink_ring *ring)
} while (ring->head != head);
}
+static bool netlink_has_valid_frame(struct netlink_ring *ring)
+{
+ unsigned int head = ring->head, pos = head;
+ const struct nl_mmap_hdr *hdr;
+
+ do {
+ hdr = __netlink_lookup_frame(ring, pos);
+ if (hdr->nm_status == NL_MMAP_STATUS_VALID)
+ return true;
+ pos = pos != 0 ? pos - 1 : ring->frame_max;
+ } while (pos != head);
+
+ return false;
+}
+
static bool netlink_dump_space(struct netlink_sock *nlk)
{
struct netlink_ring *ring = &nlk->rx_ring;
@@ -653,13 +691,19 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
mask = datagram_poll(file, sock, wait);
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (nlk->rx_ring.pg_vec) {
- netlink_forward_ring(&nlk->rx_ring);
- if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED))
- mask |= POLLIN | POLLRDNORM;
+ /* We could already have received frames in the normal receive
+ * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
+ * so if mask contains pollin/etc already, there's no point
+ * walking the ring.
+ */
+ if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (nlk->rx_ring.pg_vec) {
+ if (netlink_has_valid_frame(&nlk->rx_ring))
+ mask |= POLLIN | POLLRDNORM;
+ }
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
}
- spin_unlock_bh(&sk->sk_receive_queue.lock);
spin_lock_bh(&sk->sk_write_queue.lock);
if (nlk->tx_ring.pg_vec) {
@@ -819,7 +863,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
}
#else /* CONFIG_NETLINK_MMAP */
-#define netlink_skb_is_mmaped(skb) false
#define netlink_rx_is_mmaped(sk) false
#define netlink_tx_is_mmaped(sk) false
#define netlink_mmap sock_no_mmap
@@ -900,10 +943,10 @@ static void netlink_sock_destruct(struct sock *sk)
memset(&req, 0, sizeof(req));
if (nlk->rx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, false);
+ __netlink_set_ring(sk, &req, false, NULL, 0);
memset(&req, 0, sizeof(req));
if (nlk->tx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, true);
+ __netlink_set_ring(sk, &req, true, NULL, 0);
}
#endif /* CONFIG_NETLINK_MMAP */
@@ -1067,8 +1110,8 @@ static int netlink_insert(struct sock *sk, u32 portid)
lock_sock(sk);
- err = -EBUSY;
- if (nlk_sk(sk)->portid)
+ err = nlk_sk(sk)->portid == portid ? 0 : -EBUSY;
+ if (nlk_sk(sk)->bound)
goto err;
err = -ENOMEM;
@@ -1081,12 +1124,21 @@ static int netlink_insert(struct sock *sk, u32 portid)
err = __netlink_insert(table, sk);
if (err) {
+ /* In case the hashtable backend returns with -EBUSY
+ * from here, it must not escape to the caller.
+ */
+ if (unlikely(err == -EBUSY))
+ err = -EOVERFLOW;
if (err == -EEXIST)
err = -EADDRINUSE;
- nlk_sk(sk)->portid = 0;
sock_put(sk);
+ goto err;
}
+ /* We need to ensure that the socket is hashed and visible. */
+ smp_wmb();
+ nlk_sk(sk)->bound = portid;
+
err:
release_sock(sk);
return err;
@@ -1471,6 +1523,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
int err;
long unsigned int groups = nladdr->nl_groups;
+ bool bound;
if (addr_len < sizeof(struct sockaddr_nl))
return -EINVAL;
@@ -1487,9 +1540,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return err;
}
- if (nlk->portid)
+ bound = nlk->bound;
+ if (bound) {
+ /* Ensure nlk->portid is up-to-date. */
+ smp_rmb();
+
if (nladdr->nl_pid != nlk->portid)
return -EINVAL;
+ }
if (nlk->netlink_bind && groups) {
int group;
@@ -1505,7 +1563,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
}
}
- if (!nlk->portid) {
+ /* No need for barriers here as we return to user-space without
+ * using any of the bound attributes.
+ */
+ if (!bound) {
err = nladdr->nl_pid ?
netlink_insert(sk, nladdr->nl_pid) :
netlink_autobind(sock);
@@ -1553,7 +1614,10 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
!netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
- if (!nlk->portid)
+ /* No need for barriers here as we return to user-space without
+ * using any of the bound attributes.
+ */
+ if (!nlk->bound)
err = netlink_autobind(sock);
if (err == 0) {
@@ -1812,15 +1876,16 @@ retry:
}
EXPORT_SYMBOL(netlink_unicast);
-struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
- u32 dst_portid, gfp_t gfp_mask)
+struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
+ unsigned int ldiff, u32 dst_portid,
+ gfp_t gfp_mask)
{
#ifdef CONFIG_NETLINK_MMAP
+ unsigned int maxlen, linear_size;
struct sock *sk = NULL;
struct sk_buff *skb;
struct netlink_ring *ring;
struct nl_mmap_hdr *hdr;
- unsigned int maxlen;
sk = netlink_getsockbyportid(ssk, dst_portid);
if (IS_ERR(sk))
@@ -1831,7 +1896,11 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
if (ring->pg_vec == NULL)
goto out_put;
- if (ring->frame_size - NL_MMAP_HDRLEN < size)
+ /* We need to account the full linear size needed as a ring
+ * slot cannot have non-linear parts.
+ */
+ linear_size = size + ldiff;
+ if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
goto out_put;
skb = alloc_skb_head(gfp_mask);
@@ -1845,13 +1914,14 @@ struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
/* check again under lock */
maxlen = ring->frame_size - NL_MMAP_HDRLEN;
- if (maxlen < size)
+ if (maxlen < linear_size)
goto out_free;
netlink_forward_ring(ring);
hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
if (hdr == NULL)
goto err2;
+
netlink_ring_setup_skb(skb, sk, ring, hdr);
netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
atomic_inc(&ring->pending);
@@ -1877,7 +1947,7 @@ out:
#endif
return alloc_skb(size, gfp_mask);
}
-EXPORT_SYMBOL_GPL(netlink_alloc_skb);
+EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
int netlink_has_listeners(struct sock *sk, unsigned int group)
{
@@ -2223,7 +2293,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
- err = netlink_set_ring(sk, &req, false,
+ err = netlink_set_ring(sk, &req,
optname == NETLINK_TX_RING);
break;
}
@@ -2238,6 +2308,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
err = 0;
break;
+ case NETLINK_CAP_ACK:
+ if (val)
+ nlk->flags |= NETLINK_F_CAP_ACK;
+ else
+ nlk->flags &= ~NETLINK_F_CAP_ACK;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2312,6 +2389,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
netlink_table_ungrab();
break;
}
+ case NETLINK_CAP_ACK:
+ if (len < sizeof(int))
+ return -EINVAL;
+ len = sizeof(int);
+ val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0;
+ if (put_user(len, optlen) ||
+ put_user(val, optval))
+ return -EFAULT;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2371,17 +2458,20 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dst_group = nlk->dst_group;
}
- if (!nlk->portid) {
+ if (!nlk->bound) {
err = netlink_autobind(sock);
if (err)
goto out;
+ } else {
+ /* Ensure nlk is hashed and visible. */
+ smp_rmb();
}
/* It's a really convoluted way for userland to ask for mmaped
* sendmsg(), but that's what we've got...
*/
if (netlink_tx_is_mmaped(sk) &&
- msg->msg_iter.type == ITER_IOVEC &&
+ iter_is_iovec(&msg->msg_iter) &&
msg->msg_iter.nr_segs == 1 &&
msg->msg_iter.iov->iov_base == NULL) {
err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
@@ -2853,9 +2943,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
struct nlmsghdr *rep;
struct nlmsgerr *errmsg;
size_t payload = sizeof(*errmsg);
+ struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
- /* error messages get the original request appened */
- if (err)
+ /* Error messages get the original request appened, unless the user
+ * requests to cap the error message.
+ */
+ if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
payload += nlmsg_len(nlh);
skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
@@ -2878,7 +2971,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
NLMSG_ERROR, payload, 0);
errmsg = nlmsg_data(rep);
errmsg->error = err;
- memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
+ memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
}
EXPORT_SYMBOL(netlink_ack);