diff options
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r-- | drivers/net/virtio_net.c | 410 |
1 files changed, 388 insertions, 22 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7276d5a95bd0..08327e005ccc 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -22,6 +22,7 @@ #include <linux/module.h> #include <linux/virtio.h> #include <linux/virtio_net.h> +#include <linux/bpf.h> #include <linux/scatterlist.h> #include <linux/if_vlan.h> #include <linux/slab.h> @@ -81,6 +82,8 @@ struct receive_queue { struct napi_struct napi; + struct bpf_prog __rcu *xdp_prog; + /* Chain pages by the private ptr. */ struct page *pages; @@ -111,6 +114,9 @@ struct virtnet_info { /* # of queue pairs currently used by the driver */ u16 curr_queue_pairs; + /* # of XDP queue pairs currently used by the driver */ + u16 xdp_queue_pairs; + /* I like... big packets and I cannot lie! */ bool big_packets; @@ -324,6 +330,90 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, return skb; } +static void virtnet_xdp_xmit(struct virtnet_info *vi, + struct receive_queue *rq, + struct send_queue *sq, + struct xdp_buff *xdp) +{ + struct page *page = virt_to_head_page(xdp->data); + struct virtio_net_hdr_mrg_rxbuf *hdr; + unsigned int num_sg, len; + void *xdp_sent; + int err; + + /* Free up any pending old buffers before queueing new ones. */ + while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) { + struct page *sent_page = virt_to_head_page(xdp_sent); + + if (vi->mergeable_rx_bufs) + put_page(sent_page); + else + give_pages(rq, sent_page); + } + + /* Zero header and leave csum up to XDP layers */ + hdr = xdp->data; + memset(hdr, 0, vi->hdr_len); + + num_sg = 1; + sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); + err = virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, + xdp->data, GFP_ATOMIC); + if (unlikely(err)) { + if (vi->mergeable_rx_bufs) + put_page(page); + else + give_pages(rq, page); + return; // On error abort to avoid unnecessary kick + } else if (!vi->mergeable_rx_bufs) { + /* If not mergeable bufs must be big packets so cleanup pages */ + give_pages(rq, (struct page *)page->private); + page->private = 0; + } + + virtqueue_kick(sq->vq); +} + +static u32 do_xdp_prog(struct virtnet_info *vi, + struct receive_queue *rq, + struct bpf_prog *xdp_prog, + struct page *page, int offset, int len) +{ + int hdr_padded_len; + struct xdp_buff xdp; + unsigned int qp; + u32 act; + u8 *buf; + + buf = page_address(page) + offset; + + if (vi->mergeable_rx_bufs) + hdr_padded_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hdr_padded_len = sizeof(struct padded_vnet_hdr); + + xdp.data = buf + hdr_padded_len; + xdp.data_end = xdp.data + (len - vi->hdr_len); + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + return XDP_PASS; + case XDP_TX: + qp = vi->curr_queue_pairs - + vi->xdp_queue_pairs + + smp_processor_id(); + xdp.data = buf + (vi->mergeable_rx_bufs ? 0 : 4); + virtnet_xdp_xmit(vi, rq, &vi->sq[qp], &xdp); + return XDP_TX; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + return XDP_DROP; + } +} + static struct sk_buff *receive_small(struct virtnet_info *vi, void *buf, unsigned int len) { struct sk_buff * skb = buf; @@ -340,17 +430,102 @@ static struct sk_buff *receive_big(struct net_device *dev, void *buf, unsigned int len) { + struct bpf_prog *xdp_prog; struct page *page = buf; - struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); + struct sk_buff *skb; + rcu_read_lock(); + xdp_prog = rcu_dereference(rq->xdp_prog); + if (xdp_prog) { + struct virtio_net_hdr_mrg_rxbuf *hdr = buf; + u32 act; + + if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + goto err_xdp; + act = do_xdp_prog(vi, rq, xdp_prog, page, 0, len); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + rcu_read_unlock(); + goto xdp_xmit; + case XDP_DROP: + default: + goto err_xdp; + } + } + rcu_read_unlock(); + + skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE); if (unlikely(!skb)) goto err; return skb; +err_xdp: + rcu_read_unlock(); err: dev->stats.rx_dropped++; give_pages(rq, page); +xdp_xmit: + return NULL; +} + +/* The conditions to enable XDP should preclude the underlying device from + * sending packets across multiple buffers (num_buf > 1). However per spec + * it does not appear to be illegal to do so but rather just against convention. + * So in order to avoid making a system unresponsive the packets are pushed + * into a page and the XDP program is run. This will be extremely slow and we + * push a warning to the user to fix this as soon as possible. Fixing this may + * require resolving the underlying hardware to determine why multiple buffers + * are being received or simply loading the XDP program in the ingress stack + * after the skb is built because there is no advantage to running it here + * anymore. + */ +static struct page *xdp_linearize_page(struct receive_queue *rq, + u16 num_buf, + struct page *p, + int offset, + unsigned int *len) +{ + struct page *page = alloc_page(GFP_ATOMIC); + unsigned int page_off = 0; + + if (!page) + return NULL; + + memcpy(page_address(page) + page_off, page_address(p) + offset, *len); + page_off += *len; + + while (--num_buf) { + unsigned int buflen; + unsigned long ctx; + void *buf; + int off; + + ctx = (unsigned long)virtqueue_get_buf(rq->vq, &buflen); + if (unlikely(!ctx)) + goto err_buf; + + /* guard against a misconfigured or uncooperative backend that + * is sending packet larger than the MTU. + */ + if ((page_off + buflen) > PAGE_SIZE) + goto err_buf; + + buf = mergeable_ctx_to_buf_address(ctx); + p = virt_to_head_page(buf); + off = buf - page_address(p); + + memcpy(page_address(page) + page_off, + page_address(p) + off, buflen); + page_off += buflen; + } + + *len = page_off; + return page; +err_buf: + __free_pages(page, 0); return NULL; } @@ -365,11 +540,67 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); struct page *page = virt_to_head_page(buf); int offset = buf - page_address(page); - unsigned int truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + struct sk_buff *head_skb, *curr_skb; + struct bpf_prog *xdp_prog; + unsigned int truesize; + + head_skb = NULL; + + rcu_read_lock(); + xdp_prog = rcu_dereference(rq->xdp_prog); + if (xdp_prog) { + struct page *xdp_page; + u32 act; + + /* No known backend devices should send packets with + * more than a single buffer when XDP conditions are + * met. However it is not strictly illegal so the case + * is handled as an exception and a warning is thrown. + */ + if (unlikely(num_buf > 1)) { + bpf_warn_invalid_xdp_buffer(); + + /* linearize data for XDP */ + xdp_page = xdp_linearize_page(rq, num_buf, + page, offset, &len); + if (!xdp_page) + goto err_xdp; + offset = 0; + } else { + xdp_page = page; + } - struct sk_buff *head_skb = page_to_skb(vi, rq, page, offset, len, - truesize); - struct sk_buff *curr_skb = head_skb; + /* Transient failure which in theory could occur if + * in-flight packets from before XDP was enabled reach + * the receive path after XDP is loaded. In practice I + * was not able to create this condition. + */ + if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + goto err_xdp; + + act = do_xdp_prog(vi, rq, xdp_prog, page, offset, len); + switch (act) { + case XDP_PASS: + if (unlikely(xdp_page != page)) + __free_pages(xdp_page, 0); + break; + case XDP_TX: + if (unlikely(xdp_page != page)) + goto err_xdp; + rcu_read_unlock(); + goto xdp_xmit; + case XDP_DROP: + default: + if (unlikely(xdp_page != page)) + __free_pages(xdp_page, 0); + goto err_xdp; + } + } + rcu_read_unlock(); + + truesize = max(len, mergeable_ctx_to_buf_truesize(ctx)); + head_skb = page_to_skb(vi, rq, page, offset, len, truesize); + curr_skb = head_skb; if (unlikely(!curr_skb)) goto err_skb; @@ -423,6 +654,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len); return head_skb; +err_xdp: + rcu_read_unlock(); err_skb: put_page(page); while (--num_buf) { @@ -439,6 +672,7 @@ err_skb: err_buf: dev->stats.rx_dropped++; dev_kfree_skb(head_skb); +xdp_xmit: return NULL; } @@ -969,12 +1203,17 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) struct virtnet_info *vi = netdev_priv(dev); struct virtio_device *vdev = vi->vdev; int ret; - struct sockaddr *addr = p; + struct sockaddr *addr; struct scatterlist sg; - ret = eth_prepare_mac_addr_change(dev, p); + addr = kmalloc(sizeof(*addr), GFP_KERNEL); + if (!addr) + return -ENOMEM; + memcpy(addr, p, sizeof(*addr)); + + ret = eth_prepare_mac_addr_change(dev, addr); if (ret) - return ret; + goto out; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) { sg_init_one(&sg, addr->sa_data, dev->addr_len); @@ -982,7 +1221,8 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) { dev_warn(&vdev->dev, "Failed to set mac address by vq command.\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) && !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) { @@ -996,8 +1236,11 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p) } eth_commit_mac_addr_change(dev, p); + ret = 0; - return 0; +out: + kfree(addr); + return ret; } static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, @@ -1328,6 +1571,13 @@ static int virtnet_set_channels(struct net_device *dev, if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0) return -EINVAL; + /* For now we don't support modifying channels while XDP is loaded + * also when XDP is loaded all RX queues have XDP programs so we only + * need to check a single RX queue. + */ + if (vi->rq[0].xdp_prog) + return -EINVAL; + get_online_cpus(); err = virtnet_set_queues(vi, queue_pairs); if (!err) { @@ -1419,17 +1669,93 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_settings = virtnet_set_settings, }; -#define MIN_MTU 68 -#define MAX_MTU 65535 - -static int virtnet_change_mtu(struct net_device *dev, int new_mtu) +static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog) { - if (new_mtu < MIN_MTU || new_mtu > MAX_MTU) + unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); + struct virtnet_info *vi = netdev_priv(dev); + struct bpf_prog *old_prog; + u16 xdp_qp = 0, curr_qp; + int i, err; + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6)) { + netdev_warn(dev, "can't set XDP while host is implementing LRO, disable LRO first\n"); + return -EOPNOTSUPP; + } + + if (vi->mergeable_rx_bufs && !vi->any_header_sg) { + netdev_warn(dev, "XDP expects header/data in single page, any_header_sg required\n"); return -EINVAL; - dev->mtu = new_mtu; + } + + if (dev->mtu > max_sz) { + netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); + return -EINVAL; + } + + curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs; + if (prog) + xdp_qp = nr_cpu_ids; + + /* XDP requires extra queues for XDP_TX */ + if (curr_qp + xdp_qp > vi->max_queue_pairs) { + netdev_warn(dev, "request %i queues but max is %i\n", + curr_qp + xdp_qp, vi->max_queue_pairs); + return -ENOMEM; + } + + err = virtnet_set_queues(vi, curr_qp + xdp_qp); + if (err) { + dev_warn(&dev->dev, "XDP Device queue allocation failure.\n"); + return err; + } + + if (prog) { + prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); + if (IS_ERR(prog)) { + virtnet_set_queues(vi, curr_qp); + return PTR_ERR(prog); + } + } + + vi->xdp_queue_pairs = xdp_qp; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); + + for (i = 0; i < vi->max_queue_pairs; i++) { + old_prog = rtnl_dereference(vi->rq[i].xdp_prog); + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + return 0; } +static bool virtnet_xdp_query(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + if (vi->rq[i].xdp_prog) + return true; + } + return false; +} + +static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return virtnet_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = virtnet_xdp_query(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -1437,7 +1763,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = virtnet_set_mac_address, .ndo_set_rx_mode = virtnet_set_rx_mode, - .ndo_change_mtu = virtnet_change_mtu, .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, @@ -1447,6 +1772,7 @@ static const struct net_device_ops virtnet_netdev = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = virtnet_busy_poll, #endif + .ndo_xdp = virtnet_xdp, }; static void virtnet_config_changed_work(struct work_struct *work) @@ -1508,12 +1834,20 @@ static void virtnet_free_queues(struct virtnet_info *vi) static void free_receive_bufs(struct virtnet_info *vi) { + struct bpf_prog *old_prog; int i; + rtnl_lock(); for (i = 0; i < vi->max_queue_pairs; i++) { while (vi->rq[i].pages) __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); + + old_prog = rtnl_dereference(vi->rq[i].xdp_prog); + RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL); + if (old_prog) + bpf_prog_put(old_prog); } + rtnl_unlock(); } static void free_receive_page_frags(struct virtnet_info *vi) @@ -1524,6 +1858,16 @@ static void free_receive_page_frags(struct virtnet_info *vi) put_page(vi->rq[i].alloc_frag.page); } +static bool is_xdp_queue(struct virtnet_info *vi, int q) +{ + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; +} + static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@ -1531,8 +1875,12 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; - while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) - dev_kfree_skb(buf); + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { + if (!is_xdp_queue(vi, i)) + dev_kfree_skb(buf); + else + put_page(virt_to_head_page(buf)); + } } for (i = 0; i < vi->max_queue_pairs; i++) { @@ -1753,6 +2101,9 @@ static bool virtnet_validate_features(struct virtio_device *vdev) return true; } +#define MIN_MTU ETH_MIN_MTU +#define MAX_MTU ETH_MAX_MTU + static int virtnet_probe(struct virtio_device *vdev) { int i, err; @@ -1826,6 +2177,10 @@ static int virtnet_probe(struct virtio_device *vdev) dev->vlan_features = dev->features; + /* MTU range: 68 - 65535 */ + dev->min_mtu = MIN_MTU; + dev->max_mtu = MAX_MTU; + /* Configuration may specify what MAC to use. Otherwise random. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) virtio_cread_bytes(vdev, @@ -1880,15 +2235,22 @@ static int virtnet_probe(struct virtio_device *vdev) mtu = virtio_cread16(vdev, offsetof(struct virtio_net_config, mtu)); - if (virtnet_change_mtu(dev, mtu)) + if (mtu < dev->min_mtu) { __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU); + } else { + dev->mtu = mtu; + dev->max_mtu = mtu; + } } if (vi->any_header_sg) dev->needed_headroom = vi->hdr_len; - /* Use single tx/rx queue pair as default */ - vi->curr_queue_pairs = 1; + /* Enable multiqueue by default */ + if (num_online_cpus() >= max_queue_pairs) + vi->curr_queue_pairs = max_queue_pairs; + else + vi->curr_queue_pairs = num_online_cpus(); vi->max_queue_pairs = max_queue_pairs; /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ @@ -1919,6 +2281,10 @@ static int virtnet_probe(struct virtio_device *vdev) goto free_unregister_netdev; } + rtnl_lock(); + virtnet_set_queues(vi, vi->curr_queue_pairs); + rtnl_unlock(); + /* Assume link up if device can't report link status, otherwise get link status from config. */ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) { |