From 143b86f346c709d6fed661aee36cea70ef874815 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 21 Oct 2022 00:58:45 -0700 Subject: ice: Enable RX queue selection using skbedit action This patch uses TC skbedit queue_mapping action to support forwarding packets to a device queue. Such filters with action forward to queue will be the highest priority switch filter in HW. Example: $ tc filter add dev ens4f0 protocol ip ingress flower\ dst_ip 192.168.1.12 ip_proto tcp dst_port 5001\ action skbedit queue_mapping 5 skip_sw The above command adds an ingress filter, incoming packets qualifying the match will be accepted into queue 5. The queue number is in decimal format. Refactored ice_add_tc_flower_adv_fltr() to consolidate code with action FWD_TO_VSI and FWD_TO QUEUE. Reviewed-by: Sridhar Samudrala Reviewed-by: Vinicius Costa Gomes Signed-off-by: Amritha Nambiar Signed-off-by: Paolo Abeni --- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0f6718719453..df65e829ea33 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8283,7 +8283,7 @@ static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) rule.rid = fltr->rid; rule.rule_id = fltr->rule_id; - rule.vsi_handle = fltr->dest_id; + rule.vsi_handle = fltr->dest_vsi_handle; status = ice_rem_adv_rule_by_id(&pf->hw, &rule); if (status) { if (status == -ENOENT) -- cgit From 068c38ad88ccb09e5e966d4db5cedab0e02b3b95 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 26 Oct 2022 15:22:14 +0200 Subject: net: Remove the obsolte u64_stats_fetch_*_irq() users (drivers). Now that the 32bit UP oddity is gone and 32bit uses always a sequence count, there is no need for the fetch_irq() variants anymore. Convert to the regular interface. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Acked-by: Peter Zijlstra (Intel) Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index df65e829ea33..1f27dc20b4f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6370,10 +6370,10 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, unsigned int start; do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); *pkts = stats.pkts; *bytes = stats.bytes; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); } /** -- cgit From ac73d4bf2cdaf2cb8a43df8ee4a5c066d2c5d7b4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:02:04 +0100 Subject: net: make drivers to use SET_NETDEV_DEVLINK_PORT to set devlink_port Benefit from the previously implemented tracking of netdev events in devlink code and instead of calling devlink_port_type_eth_set() and devlink_port_type_clear() to set devlink port type and link to related netdev, use SET_NETDEV_DEVLINK_PORT() macro to assign devlink_port pointer to netdevice which is about to be registered. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 1f27dc20b4f1..74d25fda11bd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4603,6 +4603,7 @@ static int ice_register_netdev(struct ice_pf *pf) if (err) goto err_devlink_create; + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); err = register_netdev(vsi->netdev); if (err) goto err_register_netdev; @@ -4611,8 +4612,6 @@ static int ice_register_netdev(struct ice_pf *pf) netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); - devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); - return 0; err_register_netdev: ice_devlink_destroy_pf_port(pf); -- cgit From 77df1db80da384c565106321f5934967690da7dd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Nov 2022 17:02:10 +0100 Subject: net: remove unused ndo_get_devlink_port Remove ndo_get_devlink_port which is no longer used alongside with the implementations in drivers. Signed-off-by: Jiri Pirko Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 74d25fda11bd..a9fc89aebebe 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -298,20 +298,6 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) return status; } -/** - * ice_get_devlink_port - Get devlink port from netdev - * @netdev: the netdevice structure - */ -static struct devlink_port *ice_get_devlink_port(struct net_device *netdev) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - - if (!ice_is_switchdev_running(pf)) - return NULL; - - return &pf->devlink_port; -} - /** * ice_vsi_sync_fltr - Update the VSI filter list to the HW * @vsi: ptr to the VSI @@ -9107,5 +9093,4 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, - .ndo_get_devlink_port = ice_get_devlink_port, }; -- cgit From 80fe30a8c1f4d2177e1d25922cc6a8c439198103 Mon Sep 17 00:00:00 2001 From: Michal Wilczynski Date: Tue, 15 Nov 2022 11:48:23 +0100 Subject: ice: Prevent ADQ, DCB coexistence with Custom Tx scheduler ADQ, DCB might interfere with Custom Tx Scheduler changes that user might introduce using devlink-rate API. Check if ADQ, DCB is active, when user tries to change any setting in exported Tx scheduler tree. If any of those are active block the user from doing so, and log an appropriate message. Remove the exported hierarchy if user enable ADQ or DCB. Prevent ADQ or DCB from getting configured if user already made some changes using devlink-rate API. Signed-off-by: Michal Wilczynski Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a9fc89aebebe..d6f460ff1b72 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -8580,6 +8580,12 @@ static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) switch (mode) { case TC_MQPRIO_MODE_CHANNEL: + if (pf->hw.port_info->is_custom_tx_enabled) { + dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n"); + return -EBUSY; + } + ice_tear_down_devlink_rate_tree(pf); + ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); if (ret) { netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", -- cgit From 2fd5e433cd2685d4af471209e48be1a951687193 Mon Sep 17 00:00:00 2001 From: Benjamin Mikailenko Date: Fri, 18 Nov 2022 16:20:01 -0500 Subject: ice: Accumulate HW and Netdev statistics over reset Resets happen with or without user interaction. For example, incidents such as TX hang or a reconfiguration of parameters will result in a reset. During reset, hardware and software statistics were set to zero. This created an issue for the user where a reset happens in the background, statistics set to zero, and the user checks statistics expecting them to be populated. To ensure this doesn't happen, keep accumulating stats over reset. 1. Remove function calls which reset hardware and netdev statistics. 2. Do not rollover statistics in ice_stat_update40 during reset. Signed-off-by: Benjamin Mikailenko Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d6f460ff1b72..aa80e2b896b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6397,6 +6397,7 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, */ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { + struct rtnl_link_stats64 *net_stats, *stats_prev; struct rtnl_link_stats64 *vsi_stats; u64 pkts, bytes; int i; @@ -6436,10 +6437,28 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_unlock(); - vsi->net_stats.tx_packets = vsi_stats->tx_packets; - vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; - vsi->net_stats.rx_packets = vsi_stats->rx_packets; - vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; + net_stats = &vsi->net_stats; + stats_prev = &vsi->net_stats_prev; + + /* clear prev counters after reset */ + if (vsi_stats->tx_packets < stats_prev->tx_packets || + vsi_stats->rx_packets < stats_prev->rx_packets) { + stats_prev->tx_packets = 0; + stats_prev->tx_bytes = 0; + stats_prev->rx_packets = 0; + stats_prev->rx_bytes = 0; + } + + /* update netdev counters */ + net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; + net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; + net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; + net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; + + stats_prev->tx_packets = vsi_stats->tx_packets; + stats_prev->tx_bytes = vsi_stats->tx_bytes; + stats_prev->rx_packets = vsi_stats->rx_packets; + stats_prev->rx_bytes = vsi_stats->rx_bytes; kfree(vsi_stats); } @@ -6501,6 +6520,9 @@ void ice_update_pf_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, &cur_ps->eth.rx_bytes); -- cgit From 288ecf491b1654845ae99c79b7fefad2d3ea47bd Mon Sep 17 00:00:00 2001 From: Benjamin Mikailenko Date: Fri, 18 Nov 2022 16:20:02 -0500 Subject: ice: Accumulate ring statistics over reset Resets may occur with or without user interaction. For example, a TX hang or reconfiguration of parameters will result in a reset. During reset, the VSI is freed, freeing any statistics structures inside as well. This would create an issue for the user where a reset happens in the background, statistics set to zero, and the user checks ring statistics expecting them to be populated. To ensure this doesn't happen, accumulate ring statistics over reset. Define a new ring statistics structure, ice_ring_stats. The new structure lives in the VSI's parent, preserving ring statistics when VSI is freed. 1. Define a new structure vsi_ring_stats in the PF scope 2. Allocate/free stats only during probe, unload, or change in ring size 3. Replace previous ring statistics functionality with new structure Signed-off-by: Benjamin Mikailenko Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 66 ++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 15 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index aa80e2b896b1..a21085c0cfee 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -130,12 +130,17 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; + struct ice_ring_stats *ring_stats; if (!tx_ring) continue; if (ice_ring_ch_enabled(tx_ring)) continue; + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this @@ -144,8 +149,8 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * prev_pkt would be negative if there was no * pending work. */ - packets = tx_ring->stats.pkts & INT_MAX; - if (tx_ring->tx_stats.prev_pkt == packets) { + packets = ring_stats->stats.pkts & INT_MAX; + if (ring_stats->tx_stats.prev_pkt == packets) { /* Trigger sw interrupt to revive the queue */ ice_trigger_sw_intr(hw, tx_ring->q_vector); continue; @@ -155,7 +160,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * to ice_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt = + ring_stats->tx_stats.prev_pkt = ice_get_tx_pending(tx_ring) ? packets : -1; } } @@ -2546,13 +2551,20 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) ice_for_each_xdp_txq(vsi, i) { u16 xdp_q_idx = vsi->alloc_txq + i; + struct ice_ring_stats *ring_stats; struct ice_tx_ring *xdp_ring; xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); - if (!xdp_ring) goto free_xdp_rings; + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) { + ice_free_tx_ring(xdp_ring); + goto free_xdp_rings; + } + + xdp_ring->ring_stats = ring_stats; xdp_ring->q_index = xdp_q_idx; xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; @@ -2575,9 +2587,13 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) return 0; free_xdp_rings: - for (; i >= 0; i--) - if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + for (; i >= 0; i--) { + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) { + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; ice_free_tx_ring(vsi->xdp_rings[i]); + } + } return -ENOMEM; } @@ -2778,6 +2794,8 @@ free_qmap: synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); } + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } @@ -4756,11 +4774,18 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_init_pf_unroll; } + pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi, + sizeof(*pf->vsi_stats), GFP_KERNEL); + if (!pf->vsi_stats) { + err = -ENOMEM; + goto err_init_vsi_unroll; + } + err = ice_init_interrupt_scheme(pf); if (err) { dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); err = -EIO; - goto err_init_vsi_unroll; + goto err_init_vsi_stats_unroll; } /* In case of MSIX we are going to setup the misc vector right here @@ -4941,6 +4966,9 @@ err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: ice_clear_interrupt_scheme(pf); +err_init_vsi_stats_unroll: + devm_kfree(dev, pf->vsi_stats); + pf->vsi_stats = NULL; err_init_vsi_unroll: devm_kfree(dev, pf->vsi); err_init_pf_unroll: @@ -5063,6 +5091,8 @@ static void ice_remove(struct pci_dev *pdev) continue; ice_vsi_free_q_vectors(pf->vsi[i]); } + devm_kfree(&pdev->dev, pf->vsi_stats); + pf->vsi_stats = NULL; ice_deinit_pf(pf); ice_devlink_destroy_regions(pf); ice_deinit_hw(&pf->hw); @@ -6380,14 +6410,16 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (!ring) + if (!ring || !ring->ring_stats) continue; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, + ring->ring_stats->stats, &pkts, + &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->tx_stats.restart_q; - vsi->tx_busy += ring->tx_stats.tx_busy; - vsi->tx_linearize += ring->tx_stats.tx_linearize; + vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; + vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; + vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize; } } @@ -6422,12 +6454,16 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_ring_stats *ring_stats; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ring_stats = ring->ring_stats; + ice_fetch_u64_stats_per_ring(&ring_stats->syncp, + ring_stats->stats, &pkts, + &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; - vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; - vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; + vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; + vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed; } /* update XDP Tx rings counters */ -- cgit From 6b1ff5d392283b737abc038f7ab1509d9b8311c7 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Mon, 5 Dec 2022 11:52:43 -0800 Subject: ice: always call ice_ptp_link_change and make it void The ice_ptp_link_change function is currently only called for E822 based hardware. Future changes are going to extend this function to perform additional tasks on link change. Always call this function, moving the E810 check from the callers down to just before we call the E822-specific function required to restart the PHY. This function also returns an error value, but none of the callers actually check it. In general, the errors it produces are more likely systemic problems such as invalid or corrupt port numbers. No caller checks these, and so no warning is logged. Re-order the flag checks so that ICE_FLAG_PTP is checked first. Drop the unnecessary check for ICE_FLAG_PTP_SUPPORTED, as ICE_FLAG_PTP will not be set except when ICE_FLAG_PTP_SUPPORTED is set. Convert the port checks to WARN_ON_ONCE, in order to generate a kernel stack trace when they are hit. Convert the function to void since no caller actually checks these return values. Co-developed-by: Dave Ertman Signed-off-by: Dave Ertman Signed-off-by: Jacob Keller Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2b23b4714a26..a9a7f8b52140 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1111,8 +1111,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, pf->hw.pf_id, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -6340,8 +6339,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, pf->hw.pf_id, true); } /* Perform an initial read of the statistics registers now to @@ -6773,8 +6771,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev && vsi->type == ICE_VSI_PF) { vlan_err = ice_vsi_del_vlan_zero(vsi); - if (!ice_is_e810(&vsi->back->hw)) - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { -- cgit