From ef3f2cfc6010c13feb40cfb7fd7490832cf86f45 Mon Sep 17 00:00:00 2001 From: Camelia Groza Date: Tue, 8 Jun 2021 13:33:42 +0300 Subject: [PATCH] sdk_dpaa: rework private bpool seeding per CPU per interface The dpa_bp introduction has modified the existing design of buffer pool seeding. The most notable (and unintended) change was that instead of placing in the buffer pool an allotment of buffers for each CPU, for each interface, only an allotment for each CPU was performed. With this, the possibility that in certain scenarios, each interface can draw from the buffer pool enough buffers to drain it completely, but not enough to trigger a refill. This may result in a complete stall of the reception on the interface. The current patch reintroduces the initial design, that makes sure each probed interface contributes to the common buffer pool an allotment of buffers for each CPU, removing the above mentioned issue. In consequence, the per buffer pool seed_cb() callback is removed. The seeding is now done at net device probe. Reported-by: Radu Bulie Signed-off-by: Madalin Bucur Signed-off-by: Camelia Groza --- .../net/ethernet/freescale/sdk_dpaa/Kconfig | 11 +++--- .../ethernet/freescale/sdk_dpaa/dpaa_eth.c | 38 +++++++++++++++++-- .../ethernet/freescale/sdk_dpaa/dpaa_eth.h | 8 +--- .../freescale/sdk_dpaa/dpaa_eth_common.c | 6 --- .../ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c | 29 +------------- .../freescale/sdk_dpaa/dpaa_ethtool.c | 6 +-- 6 files changed, 46 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig b/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig index d7bbc1dce928..bb6693e2d702 100644 --- a/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig +++ b/drivers/net/ethernet/freescale/sdk_dpaa/Kconfig @@ -97,13 +97,14 @@ config FSL_DPAA_1588 Enable IEEE1588 support code. config FSL_DPAA_ETH_MAX_BUF_COUNT - int "Maximum nuber of buffers in private bpool" + int "Maximum number of buffers in the private bpool" depends on FSL_SDK_DPAA_ETH range 64 2048 default "128" help - The maximum number of buffers to be by default allocated in the DPAA-Ethernet private port's - buffer pool. One needn't normally modify this, as it has probably been tuned for performance + The maximum number of buffers to be by default allocated in the DPAA-Ethernet + private port's buffer pool per CPU per user (ports that share the same pool). + One needn't normally modify this, as it has probably been tuned for performance already. This cannot be lower than DPAA_ETH_REFILL_THRESHOLD. config FSL_DPAA_ETH_REFILL_THRESHOLD @@ -113,8 +114,8 @@ config FSL_DPAA_ETH_REFILL_THRESHOLD default "80" help The DPAA-Ethernet driver will start replenishing buffer pools whose count - falls below this threshold. This must be related to DPAA_ETH_MAX_BUF_COUNT. One needn't normally - modify this value unless one has very specific performance reasons. + falls below this threshold per CPU. This must be related to DPAA_ETH_MAX_BUF_COUNT. + One needn't normally modify this value unless one has very specific performance reasons. config FSL_DPAA_CS_THRESHOLD_1G hex "Egress congestion threshold on 1G ports" diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c index 0764d53868dc..44bae5e54d04 100644 --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.c @@ -443,7 +443,7 @@ priv_rx_error_dqrr(struct qman_portal *portal, priv = netdev_priv(net_dev); percpu_priv = raw_cpu_ptr(priv->percpu_priv); - count_ptr = raw_cpu_ptr(priv->dpa_bp->percpu_count); + count_ptr = raw_cpu_ptr(priv->percpu_count); if (dpaa_eth_napi_schedule(percpu_priv, portal)) return qman_cb_dqrr_stop; @@ -481,7 +481,7 @@ priv_rx_default_dqrr(struct qman_portal *portal, /* IRQ handler, non-migratable; safe to use raw_cpu_ptr here */ percpu_priv = raw_cpu_ptr(priv->percpu_priv); - count_ptr = raw_cpu_ptr(dpa_bp->percpu_count); + count_ptr = raw_cpu_ptr(priv->percpu_count); if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal))) return qman_cb_dqrr_stop; @@ -795,10 +795,8 @@ dpa_priv_bp_probe(struct device *dev) return ERR_PTR(-ENOMEM); } - dpa_bp->percpu_count = devm_alloc_percpu(dev, *dpa_bp->percpu_count); dpa_bp->target_count = CONFIG_FSL_DPAA_ETH_MAX_BUF_COUNT; - dpa_bp->seed_cb = dpa_bp_priv_seed; dpa_bp->free_buf_cb = _dpa_bp_free_pf; return dpa_bp; @@ -884,6 +882,23 @@ static int dpa_priv_bp_create(struct net_device *net_dev, struct dpa_bp *dpa_bp, return 0; } +static void dpa_priv_bp_seed(struct net_device *net_dev) +{ + struct dpa_priv_s *priv = netdev_priv(net_dev); + struct dpa_bp *dpa_bp = priv->dpa_bp; + int i; + + /* Give each CPU an allotment of buffers */ + for_each_possible_cpu(i) { + /* Although we access another CPU's counters here + * we do it at boot time so it is safe + */ + int *count_ptr = per_cpu_ptr(priv->percpu_count, i); + + dpaa_eth_refill_bpools(dpa_bp, count_ptr); + } +} + static const struct of_device_id dpa_match[]; #ifdef CONFIG_FSL_DPAA_DBG_LOOP @@ -1062,11 +1077,24 @@ dpaa_eth_priv_probe(struct platform_device *_of_dev) err = -ENOMEM; goto alloc_percpu_failed; } + for_each_possible_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); memset(percpu_priv, 0, sizeof(*percpu_priv)); } + priv->percpu_count = devm_alloc_percpu(dev, *priv->percpu_count); + if (!priv->percpu_count) { + dev_err(dev, "devm_alloc_percpu() failed\n"); + err = -ENOMEM; + goto alloc_percpu_failed; + } + + for_each_possible_cpu(i) { + int *percpu_count = per_cpu_ptr(priv->percpu_count, i); + *percpu_count = 0; + } + /* Initialize NAPI */ err = dpa_private_napi_add(net_dev); @@ -1075,6 +1103,8 @@ dpaa_eth_priv_probe(struct platform_device *_of_dev) err = dpa_private_netdev_init(net_dev); + dpa_priv_bp_seed(net_dev); + if (err < 0) goto netdev_init_failed; diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h index e1acce7e928f..f8ab524b368c 100644 --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth.h @@ -293,11 +293,7 @@ struct dpa_bp { * the buffers */ void __iomem *vaddr; - /* current number of buffers in the bpool alloted to this CPU */ - int __percpu *percpu_count; atomic_t refs; - /* some bpools need to be seeded before use by this cb */ - int (*seed_cb)(struct dpa_bp *); /* some bpools need to be emptied before freeing; this cb is used * for freeing of individual buffers taken from the pool */ @@ -348,6 +344,8 @@ struct dpa_percpu_priv_s { struct dpa_priv_s { struct dpa_percpu_priv_s __percpu *percpu_priv; struct dpa_bp *dpa_bp; + /* current number of buffers in the bpool allotted to this CPU */ + int __percpu *percpu_count; /* Store here the needed Tx headroom for convenience and speed * (even though it can be computed based on the fields of buf_layout) */ @@ -431,8 +429,6 @@ struct fm_port_fqs { extern struct net_device *dpa_loop_netdevs[20]; #endif -/* functions with different implementation for SG and non-SG: */ -int dpa_bp_priv_seed(struct dpa_bp *dpa_bp); int dpaa_eth_refill_bpools(struct dpa_bp *dpa_bp, int *count_ptr); void __hot _dpa_rx(struct net_device *net_dev, struct qman_portal *portal, diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c index 04d5e6a77838..5bb6964bc562 100644 --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_common.c @@ -706,12 +706,6 @@ dpa_bp_alloc(struct dpa_bp *dpa_bp, struct device *dev) dpa_bp->dev = dev; - if (dpa_bp->seed_cb) { - err = dpa_bp->seed_cb(dpa_bp); - if (err) - goto bman_free_pool; - } - dpa_bpid2pool_map(dpa_bp->bpid, dpa_bp); return 0; diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c index c05cbe8d0687..cc6e5680667b 100644 --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_eth_sg.c @@ -166,7 +166,7 @@ dma_map_failed: build_skb_failed: netdev_alloc_failed: - net_err_ratelimited("dpa_bp_add_8_bufs() failed\n"); + net_err_ratelimited("%s failed\n", __func__); WARN_ONCE(1, "Memory allocation failure on Rx\n"); bm_buffer_set64(&bmb[i], 0); @@ -179,31 +179,6 @@ netdev_alloc_failed: return 0; } -/* Cold path wrapper over _dpa_bp_add_8_bufs(). */ -static void dpa_bp_add_8_bufs(const struct dpa_bp *dpa_bp, int cpu) -{ - int *count_ptr = per_cpu_ptr(dpa_bp->percpu_count, cpu); - *count_ptr += _dpa_bp_add_8_bufs(dpa_bp); -} - -int dpa_bp_priv_seed(struct dpa_bp *dpa_bp) -{ - int i; - - /* Give each CPU an allotment of "config_count" buffers */ - for_each_possible_cpu(i) { - int j; - - /* Although we access another CPU's counters here - * we do it at boot time so it is safe - */ - for (j = 0; j < dpa_bp->config_count; j += 8) - dpa_bp_add_8_bufs(dpa_bp, i); - } - return 0; -} -EXPORT_SYMBOL(dpa_bp_priv_seed); - /* Add buffers/(pages) for Rx processing whenever bpool count falls below * REFILL_THRESHOLD. */ @@ -1117,7 +1092,7 @@ int __hot dpa_tx_extended(struct sk_buff *skb, struct net_device *net_dev, /* Non-migratable context, safe to use raw_cpu_ptr */ percpu_priv = raw_cpu_ptr(priv->percpu_priv); percpu_stats = &percpu_priv->stats; - countptr = raw_cpu_ptr(priv->dpa_bp->percpu_count); + countptr = raw_cpu_ptr(priv->percpu_count); clear_fd(&fd); diff --git a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_ethtool.c index 57553bb672c0..f6632ed74a24 100644 --- a/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/sdk_dpaa/dpaa_ethtool.c @@ -411,12 +411,10 @@ static void dpa_get_ethtool_stats(struct net_device *net_dev, struct dpa_ern_cnt ern_cnt; struct dpa_priv_s *priv; unsigned int num_cpus, offset; - struct dpa_bp *dpa_bp; int total_stats, i; total_stats = dpa_get_sset_count(net_dev, ETH_SS_STATS); priv = netdev_priv(net_dev); - dpa_bp = priv->dpa_bp; num_cpus = num_online_cpus(); bp_count = 0; @@ -427,8 +425,8 @@ static void dpa_get_ethtool_stats(struct net_device *net_dev, for_each_online_cpu(i) { percpu_priv = per_cpu_ptr(priv->percpu_priv, i); - if (dpa_bp->percpu_count) - bp_count = *(per_cpu_ptr(dpa_bp->percpu_count, i)); + if (priv->percpu_count) + bp_count = *(per_cpu_ptr(priv->percpu_count, i)); rx_errors.dme += percpu_priv->rx_errors.dme; rx_errors.fpe += percpu_priv->rx_errors.fpe; -- 2.17.1