From 18cf3d31f8292e3994ec972097419f1c23e0d2ff Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Fri, 9 Jun 2023 15:44:18 +0300 Subject: [PATCH 01/15] net/mlx5: Track the current number of completion EQs In preparation to allocate completion EQs, add a counter to track the number of completion EQs currently allocated. Store the maximum number of EQs in max_comp_eqs variable. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 24 ++++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 3db4866d7880..66257f7879b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -58,7 +58,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_eqs; + int curr_comp_eqs; + int max_comp_eqs; struct mlx5_irq_table *irq_table; struct mlx5_irq **comp_irqs; struct mlx5_irq *ctrl_irq; @@ -452,6 +453,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); eq_table->irq_table = mlx5_irq_table_get(dev); + eq_table->curr_comp_eqs = 0; return 0; } @@ -807,7 +809,7 @@ static void comp_irqs_release_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs); + mlx5_irqs_release_vectors(table->comp_irqs, table->max_comp_eqs); } static int comp_irqs_request_pci(struct mlx5_core_dev *dev) @@ -821,7 +823,7 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) int cpu; int i; - ncomp_eqs = table->num_comp_eqs; + ncomp_eqs = table->max_comp_eqs; cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); if (!cpus) return -ENOMEM; @@ -847,13 +849,13 @@ static void comp_irqs_release_sf(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs); + mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->max_comp_eqs); } static int comp_irqs_request_sf(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int ncomp_eqs = table->num_comp_eqs; + int ncomp_eqs = table->max_comp_eqs; return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); } @@ -874,7 +876,7 @@ static int comp_irqs_request(struct mlx5_core_dev *dev) int ncomp_eqs; int ret; - ncomp_eqs = table->num_comp_eqs; + ncomp_eqs = table->max_comp_eqs; table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); if (!table->comp_irqs) return -ENOMEM; @@ -901,7 +903,7 @@ static int alloc_rmap(struct mlx5_core_dev *mdev) if (mlx5_core_is_sf(mdev)) return 0; - eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->max_comp_eqs); if (!eq_table->rmap) return -ENOMEM; return 0; @@ -934,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) eq->core.eqn); tasklet_disable(&eq->tasklet_ctx.task); kfree(eq); + table->curr_comp_eqs--; } comp_irqs_release(dev); free_rmap(dev); @@ -973,6 +976,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) goto err_irqs_req; } + table->max_comp_eqs = ncomp_eqs; INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); @@ -1008,9 +1012,9 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); + table->curr_comp_eqs++; } - table->num_comp_eqs = ncomp_eqs; return 0; clean_eq: @@ -1057,7 +1061,7 @@ int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_eqs; + return dev->priv.eq_table->max_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); @@ -1148,7 +1152,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - eq_table->num_comp_eqs = get_num_eqs(dev); + eq_table->max_comp_eqs = get_num_eqs(dev); err = create_async_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create async EQs\n"); From a1772de78d7303e33517d1741e0fce1c7247bec4 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 11 Jun 2023 14:35:36 +0300 Subject: [PATCH 02/15] net/mlx5: Refactor completion IRQ request/release API Introduce a per-vector completion IRQ request API that requests a single IRQ for a given vector index instead of multiple IRQs request API. On driver load, loop over all completion vectors and request an IRQ for each one via the newly introduced API. Symmetrically, introduce an IRQ release API per vector. On driver unload, loop over all vectors and release each completion IRQ via the new per-vector API. As IRQ vectors will be requested dynamically later in the patchset, add a cpumask of the bounded CPUs to avoid the possible mapping of two IRQs of the same device to the same cpu. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 36 ++++++-- .../mellanox/mlx5/core/irq_affinity.c | 82 +++++++++---------- .../ethernet/mellanox/mlx5/core/mlx5_irq.h | 26 +++--- .../net/ethernet/mellanox/mlx5/core/pci_irq.c | 60 +++++--------- 4 files changed, 101 insertions(+), 103 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 66257f7879b7..268fd61ae8c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -64,6 +64,7 @@ struct mlx5_eq_table { struct mlx5_irq **comp_irqs; struct mlx5_irq *ctrl_irq; struct cpu_rmap *rmap; + struct cpumask used_cpus; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -453,6 +454,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); eq_table->irq_table = mlx5_irq_table_get(dev); + cpumask_clear(&eq_table->used_cpus); eq_table->curr_comp_eqs = 0; return 0; } @@ -808,8 +810,10 @@ EXPORT_SYMBOL(mlx5_eq_update_ci); static void comp_irqs_release_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; + int i; - mlx5_irqs_release_vectors(table->comp_irqs, table->max_comp_eqs); + for (i = 0; i < table->max_comp_eqs; i++) + mlx5_irq_release_vector(table->comp_irqs[i]); } static int comp_irqs_request_pci(struct mlx5_core_dev *dev) @@ -817,9 +821,9 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; const struct cpumask *prev = cpu_none_mask; const struct cpumask *mask; + struct mlx5_irq *irq; int ncomp_eqs; u16 *cpus; - int ret; int cpu; int i; @@ -840,24 +844,42 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) } spread_done: rcu_read_unlock(); - ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap); + for (i = 0; i < ncomp_eqs; i++) { + irq = mlx5_irq_request_vector(dev, cpus[i], i, &table->rmap); + if (IS_ERR(irq)) + break; + + table->comp_irqs[i] = irq; + } + kfree(cpus); - return ret; + return i ? i : PTR_ERR(irq); } static void comp_irqs_release_sf(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; + int i; - mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->max_comp_eqs); + for (i = 0; i < table->max_comp_eqs; i++) + mlx5_irq_affinity_irq_release(dev, table->comp_irqs[i]); } static int comp_irqs_request_sf(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int ncomp_eqs = table->max_comp_eqs; + struct mlx5_irq *irq; + int i; + + for (i = 0; i < table->max_comp_eqs; i++) { + irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, i); + if (IS_ERR(irq)) + break; + + table->comp_irqs[i] = irq; + } - return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs); + return i ? i : PTR_ERR(irq); } static void comp_irqs_release(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index fa467335526e..ed51800f9f67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -156,67 +156,61 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc * return least_loaded_irq; } -void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, - int num_irqs) +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) { struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); - int i; - - for (i = 0; i < num_irqs; i++) { - int cpu = cpumask_first(mlx5_irq_get_affinity_mask(irqs[i])); + int cpu; - synchronize_irq(pci_irq_vector(pool->dev->pdev, - mlx5_irq_get_index(irqs[i]))); - if (mlx5_irq_put(irqs[i])) - if (pool->irqs_per_cpu) - cpu_put(pool, cpu); - } + cpu = cpumask_first(mlx5_irq_get_affinity_mask(irq)); + synchronize_irq(pci_irq_vector(pool->dev->pdev, + mlx5_irq_get_index(irq))); + if (mlx5_irq_put(irq)) + if (pool->irqs_per_cpu) + cpu_put(pool, cpu); } /** - * mlx5_irq_affinity_irqs_request_auto - request one or more IRQs for mlx5 device. - * @dev: mlx5 device that is requesting the IRQs. - * @nirqs: number of IRQs to request. - * @irqs: an output array of IRQs pointers. + * mlx5_irq_affinity_irq_request_auto - request one IRQ for mlx5 device. + * @dev: mlx5 device that is requesting the IRQ. + * @used_cpus: cpumask of bounded cpus by the device + * @vecidx: vector index to request an IRQ for. * * Each IRQ is bounded to at most 1 CPU. - * This function is requesting IRQs according to the default assignment. + * This function is requesting an IRQ according to the default assignment. * The default assignment policy is: - * - in each iteration, request the least loaded IRQ which is not bound to any + * - request the least loaded IRQ which is not bound to any * CPU of the previous IRQs requested. * - * This function returns the number of IRQs requested, (which might be smaller than - * @nirqs), if successful, or a negative error code in case of an error. + * On success, this function updates used_cpus mask and returns an irq pointer. + * In case of an error, an appropriate error pointer is returned. */ -int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, - struct mlx5_irq **irqs) +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx) { struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev); struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; - int i = 0; af_desc.is_managed = 1; cpumask_copy(&af_desc.mask, cpu_online_mask); - for (i = 0; i < nirqs; i++) { - if (mlx5_irq_pool_is_sf_pool(pool)) - irq = mlx5_irq_affinity_request(pool, &af_desc); - else - /* In case SF pool doesn't exists, fallback to the PF IRQs. - * The PF IRQs are already allocated and binded to CPU - * at this point. Hence, only an index is needed. - */ - irq = mlx5_irq_request(dev, i, NULL, NULL); - if (IS_ERR(irq)) - break; - irqs[i] = irq; - cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask); - mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", - pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), - cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), - mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); - } - if (!i) - return PTR_ERR(irq); - return i; + cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus); + if (mlx5_irq_pool_is_sf_pool(pool)) + irq = mlx5_irq_affinity_request(pool, &af_desc); + else + /* In case SF pool doesn't exists, fallback to the PF IRQs. + * The PF IRQs are already allocated and binded to CPU + * at this point. Hence, only an index is needed. + */ + irq = mlx5_irq_request(dev, vecidx, NULL, NULL); + + if (IS_ERR(irq)) + return irq; + + cpumask_or(used_cpus, used_cpus, mlx5_irq_get_affinity_mask(irq)); + mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n", + pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)), + cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)), + mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ); + + return irq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h index aa403a5ea34e..1088114e905d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h @@ -29,9 +29,9 @@ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq); struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, struct irq_affinity_desc *af_desc, struct cpu_rmap **rmap); -int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs, struct cpu_rmap **rmap); -void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs); +struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, + u16 vecidx, struct cpu_rmap **rmap); +void mlx5_irq_release_vector(struct mlx5_irq *irq); int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb); int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb); struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq); @@ -39,17 +39,17 @@ int mlx5_irq_get_index(struct mlx5_irq *irq); struct mlx5_irq_pool; #ifdef CONFIG_MLX5_SF -int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, - struct mlx5_irq **irqs); +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx); struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc); -void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs, - int num_irqs); +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq); #else -static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs, - struct mlx5_irq **irqs) +static inline +struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, + struct cpumask *used_cpus, u16 vecidx) { - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); } static inline struct mlx5_irq * @@ -58,7 +58,9 @@ mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc * return ERR_PTR(-EOPNOTSUPP); } -static inline void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, - struct mlx5_irq **irqs, int num_irqs) {} +static inline +void mlx5_irq_affinity_irq_release(struct mlx5_core_dev *dev, struct mlx5_irq *irq) +{ +} #endif #endif /* __MLX5_IRQ_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index cba2a4afb5fd..33a133c9918c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -432,19 +432,10 @@ static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev) return pool ? pool : irq_table->pcif_pool; } -/** - * mlx5_irqs_release - release one or more IRQs back to the system. - * @irqs: IRQs to be released. - * @nirqs: number of IRQs to be released. - */ -static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) +static void _mlx5_irq_release(struct mlx5_irq *irq) { - int i; - - for (i = 0; i < nirqs; i++) { - synchronize_irq(irqs[i]->map.virq); - mlx5_irq_put(irqs[i]); - } + synchronize_irq(irq->map.virq); + mlx5_irq_put(irq); } /** @@ -453,7 +444,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs) */ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq) { - mlx5_irqs_release(&ctrl_irq, 1); + _mlx5_irq_release(ctrl_irq); } /** @@ -569,53 +560,42 @@ void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) EXPORT_SYMBOL(mlx5_msix_free); /** - * mlx5_irqs_release_vectors - release one or more IRQs back to the system. - * @irqs: IRQs to be released. - * @nirqs: number of IRQs to be released. + * mlx5_irq_release_vector - release one IRQ back to the system. + * @irq: the irq to release. */ -void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs) +void mlx5_irq_release_vector(struct mlx5_irq *irq) { - mlx5_irqs_release(irqs, nirqs); + _mlx5_irq_release(irq); } /** - * mlx5_irqs_request_vectors - request one or more IRQs for mlx5 device. - * @dev: mlx5 device that is requesting the IRQs. - * @cpus: CPUs array for binding the IRQs - * @nirqs: number of IRQs to request. - * @irqs: an output array of IRQs pointers. + * mlx5_irq_request_vector - request one IRQ for mlx5 device. + * @dev: mlx5 device that is requesting the IRQ. + * @cpu: CPU to bind the IRQ to. + * @vecidx: vector index to request an IRQ for. * @rmap: pointer to reverse map pointer for completion interrupts * * Each IRQ is bound to at most 1 CPU. - * This function is requests nirqs IRQs, starting from @vecidx. + * This function is requests one IRQ, for the given @vecidx. * - * This function returns the number of IRQs requested, (which might be smaller than - * @nirqs), if successful, or a negative error code in case of an error. + * This function returns a pointer to the irq on success, or an error pointer + * in case of an error. */ -int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs, - struct mlx5_irq **irqs, struct cpu_rmap **rmap) +struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu, + u16 vecidx, struct cpu_rmap **rmap) { struct mlx5_irq_table *table = mlx5_irq_table_get(dev); struct mlx5_irq_pool *pool = table->pcif_pool; struct irq_affinity_desc af_desc; - struct mlx5_irq *irq; int offset = 1; - int i; if (!pool->xa_num_irqs.max) offset = 0; af_desc.is_managed = false; - for (i = 0; i < nirqs; i++) { - cpumask_clear(&af_desc.mask); - cpumask_set_cpu(cpus[i], &af_desc.mask); - irq = mlx5_irq_request(dev, i + offset, &af_desc, rmap); - if (IS_ERR(irq)) - break; - irqs[i] = irq; - } - - return i ? i : PTR_ERR(irq); + cpumask_clear(&af_desc.mask); + cpumask_set_cpu(cpu, &af_desc.mask); + return mlx5_irq_request(dev, vecidx + offset, &af_desc, rmap); } static struct mlx5_irq_pool * From c8a0245c3937d302a52c9d6b54de44b302e78265 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 18 Jun 2023 19:23:24 +0300 Subject: [PATCH 03/15] net/mlx5: Use xarray to store and manage completion IRQs Use xarray to store the completion IRQs instead of a fixed-size allocated array as not all completion IRQs will be requested on driver load, but rather on demand when an EQ is created. The xarray offers more scalability, reduced memory overhead, and provides the ability to dynamically resize the array when needed. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 61 ++++++++++---------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 268fd61ae8c0..ac22d4d6b94b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,7 +61,7 @@ struct mlx5_eq_table { int curr_comp_eqs; int max_comp_eqs; struct mlx5_irq_table *irq_table; - struct mlx5_irq **comp_irqs; + struct xarray comp_irqs; struct mlx5_irq *ctrl_irq; struct cpu_rmap *rmap; struct cpumask used_cpus; @@ -455,14 +455,18 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) eq_table->irq_table = mlx5_irq_table_get(dev); cpumask_clear(&eq_table->used_cpus); + xa_init(&eq_table->comp_irqs); eq_table->curr_comp_eqs = 0; return 0; } void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *table = dev->priv.eq_table; + mlx5_eq_debugfs_cleanup(dev); - kvfree(dev->priv.eq_table); + xa_destroy(&table->comp_irqs); + kvfree(table); } /* Async EQs */ @@ -810,10 +814,13 @@ EXPORT_SYMBOL(mlx5_eq_update_ci); static void comp_irqs_release_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i; + struct mlx5_irq *irq; + unsigned long index; - for (i = 0; i < table->max_comp_eqs; i++) - mlx5_irq_release_vector(table->comp_irqs[i]); + xa_for_each(&table->comp_irqs, index, irq) { + xa_erase(&table->comp_irqs, index); + mlx5_irq_release_vector(irq); + } } static int comp_irqs_request_pci(struct mlx5_core_dev *dev) @@ -849,7 +856,8 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) if (IS_ERR(irq)) break; - table->comp_irqs[i] = irq; + if (xa_err(xa_store(&table->comp_irqs, i, irq, GFP_KERNEL))) + break; } kfree(cpus); @@ -859,10 +867,13 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) static void comp_irqs_release_sf(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i; + struct mlx5_irq *irq; + unsigned long index; - for (i = 0; i < table->max_comp_eqs; i++) - mlx5_irq_affinity_irq_release(dev, table->comp_irqs[i]); + xa_for_each(&table->comp_irqs, index, irq) { + xa_erase(&table->comp_irqs, index); + mlx5_irq_affinity_irq_release(dev, irq); + } } static int comp_irqs_request_sf(struct mlx5_core_dev *dev) @@ -876,7 +887,8 @@ static int comp_irqs_request_sf(struct mlx5_core_dev *dev) if (IS_ERR(irq)) break; - table->comp_irqs[i] = irq; + if (xa_err(xa_store(&table->comp_irqs, i, irq, GFP_KERNEL))) + break; } return i ? i : PTR_ERR(irq); @@ -884,31 +896,14 @@ static int comp_irqs_request_sf(struct mlx5_core_dev *dev) static void comp_irqs_release(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; - mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) : comp_irqs_release_pci(dev); - - kfree(table->comp_irqs); } static int comp_irqs_request(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; - int ncomp_eqs; - int ret; - - ncomp_eqs = table->max_comp_eqs; - table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL); - if (!table->comp_irqs) - return -ENOMEM; - - ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) : - comp_irqs_request_pci(dev); - if (ret < 0) - kfree(table->comp_irqs); - - return ret; + return mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) : + comp_irqs_request_pci(dev); } #ifdef CONFIG_RFS_ACCEL @@ -983,10 +978,11 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; + struct mlx5_irq *irq; + unsigned long index; int ncomp_eqs; int nent; int err; - int i; err = alloc_rmap(dev); if (err) @@ -1002,7 +998,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); - for (i = 0; i < ncomp_eqs; i++) { + xa_for_each(&table->comp_irqs, index, irq) + { struct mlx5_eq_param param = {}; eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); @@ -1018,7 +1015,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .irq = table->comp_irqs[i], + .irq = irq, .nent = nent, }; From 54b2cf41b853e04ea09a2a075e4a6dc30638c298 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 12 Jun 2023 15:34:27 +0300 Subject: [PATCH 04/15] net/mlx5: Refactor completion IRQ request/release handlers in EQ layer Break the completion IRQ request/release functions into per-vector handlers for both PCI devices and SFs in the EQ layer. On EQ table creation, loop over all vectors and request an IRQ for each one using the new per-vector functions. Perform the symmetrical change when releasing IRQs on EQ table cleanup. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 109 +++++++++---------- 1 file changed, 51 insertions(+), 58 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ac22d4d6b94b..c01a5d8dbe9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -811,99 +811,84 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -static void comp_irqs_release_pci(struct mlx5_core_dev *dev) +static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_irq *irq; - unsigned long index; - xa_for_each(&table->comp_irqs, index, irq) { - xa_erase(&table->comp_irqs, index); - mlx5_irq_release_vector(irq); - } + irq = xa_load(&table->comp_irqs, vecidx); + if (!irq) + return; + + xa_erase(&table->comp_irqs, vecidx); + mlx5_irq_release_vector(irq); } -static int comp_irqs_request_pci(struct mlx5_core_dev *dev) +static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; const struct cpumask *prev = cpu_none_mask; const struct cpumask *mask; struct mlx5_irq *irq; - int ncomp_eqs; - u16 *cpus; + int found_cpu = 0; + int i = 0; int cpu; - int i; - - ncomp_eqs = table->max_comp_eqs; - cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL); - if (!cpus) - return -ENOMEM; - i = 0; rcu_read_lock(); for_each_numa_hop_mask(mask, dev->priv.numa_node) { for_each_cpu_andnot(cpu, mask, prev) { - cpus[i] = cpu; - if (++i == ncomp_eqs) + if (i++ == vecidx) { + found_cpu = cpu; goto spread_done; + } } prev = mask; } + spread_done: rcu_read_unlock(); - for (i = 0; i < ncomp_eqs; i++) { - irq = mlx5_irq_request_vector(dev, cpus[i], i, &table->rmap); - if (IS_ERR(irq)) - break; - - if (xa_err(xa_store(&table->comp_irqs, i, irq, GFP_KERNEL))) - break; - } + irq = mlx5_irq_request_vector(dev, found_cpu, vecidx, &table->rmap); + if (IS_ERR(irq)) + return PTR_ERR(irq); - kfree(cpus); - return i ? i : PTR_ERR(irq); + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } -static void comp_irqs_release_sf(struct mlx5_core_dev *dev) +static void comp_irq_release_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_irq *irq; - unsigned long index; - xa_for_each(&table->comp_irqs, index, irq) { - xa_erase(&table->comp_irqs, index); - mlx5_irq_affinity_irq_release(dev, irq); - } + irq = xa_load(&table->comp_irqs, vecidx); + if (!irq) + return; + + xa_erase(&table->comp_irqs, vecidx); + mlx5_irq_affinity_irq_release(dev, irq); } -static int comp_irqs_request_sf(struct mlx5_core_dev *dev) +static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_irq *irq; - int i; - - for (i = 0; i < table->max_comp_eqs; i++) { - irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, i); - if (IS_ERR(irq)) - break; - if (xa_err(xa_store(&table->comp_irqs, i, irq, GFP_KERNEL))) - break; - } + irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); + if (IS_ERR(irq)) + return PTR_ERR(irq); - return i ? i : PTR_ERR(irq); + return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } -static void comp_irqs_release(struct mlx5_core_dev *dev) +static void comp_irq_release(struct mlx5_core_dev *dev, u16 vecidx) { - mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) : - comp_irqs_release_pci(dev); + mlx5_core_is_sf(dev) ? comp_irq_release_sf(dev, vecidx) : + comp_irq_release_pci(dev, vecidx); } -static int comp_irqs_request(struct mlx5_core_dev *dev) +static int comp_irq_request(struct mlx5_core_dev *dev, u16 vecidx) { - return mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) : - comp_irqs_request_pci(dev); + return mlx5_core_is_sf(dev) ? comp_irq_request_sf(dev, vecidx) : + comp_irq_request_pci(dev, vecidx); } #ifdef CONFIG_RFS_ACCEL @@ -944,6 +929,8 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; + struct mlx5_irq *irq; + unsigned long index; list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); @@ -955,7 +942,10 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) kfree(eq); table->curr_comp_eqs--; } - comp_irqs_release(dev); + + xa_for_each(&table->comp_irqs, index, irq) + comp_irq_release(dev, index); + free_rmap(dev); } @@ -980,7 +970,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_comp *eq; struct mlx5_irq *irq; unsigned long index; - int ncomp_eqs; + int vecidx; int nent; int err; @@ -988,13 +978,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) if (err) return err; - ncomp_eqs = comp_irqs_request(dev); - if (ncomp_eqs < 0) { - err = ncomp_eqs; - goto err_irqs_req; + for (vecidx = 0; vecidx < table->max_comp_eqs; vecidx++) { + err = comp_irq_request(dev, vecidx); + if (err < 0) + break; } - table->max_comp_eqs = ncomp_eqs; + if (!vecidx) + goto err_irqs_req; + + table->max_comp_eqs = vecidx; INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); From 273c697fdedc65f8b423a652fb248860a3dbf36d Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 19 Jun 2023 15:01:43 +0300 Subject: [PATCH 05/15] net/mlx5: Use xarray to store and manage completion EQs Use xarray to store the completion EQs instead of a linked list. The xarray offers more scalability, reduced memory overhead, and facilitates the lookup of a certain EQ given a vector index. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 50 ++++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c01a5d8dbe9b..b343c0fd621e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -47,7 +47,7 @@ enum { static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); struct mlx5_eq_table { - struct list_head comp_eqs_list; + struct xarray comp_eqs; struct mlx5_eq_async pages_eq; struct mlx5_eq_async cmd_eq; struct mlx5_eq_async async_eq; @@ -455,6 +455,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) eq_table->irq_table = mlx5_irq_table_get(dev); cpumask_clear(&eq_table->used_cpus); + xa_init(&eq_table->comp_eqs); xa_init(&eq_table->comp_irqs); eq_table->curr_comp_eqs = 0; return 0; @@ -466,6 +467,7 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) mlx5_eq_debugfs_cleanup(dev); xa_destroy(&table->comp_irqs); + xa_destroy(&table->comp_eqs); kvfree(table); } @@ -928,12 +930,12 @@ static void free_rmap(struct mlx5_core_dev *mdev) {} static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq, *n; + struct mlx5_eq_comp *eq; struct mlx5_irq *irq; unsigned long index; - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { - list_del(&eq->list); + xa_for_each(&table->comp_eqs, index, eq) { + xa_erase(&table->comp_eqs, index); mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", @@ -988,7 +990,6 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) goto err_irqs_req; table->max_comp_eqs = vecidx; - INIT_LIST_HEAD(&table->comp_eqs_list); nent = comp_eq_depth_devlink_param_get(dev); xa_for_each(&table->comp_irqs, index, irq) @@ -1022,13 +1023,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) } mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); - /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ - list_add_tail(&eq->list, &table->comp_eqs_list); + err = xa_err(xa_store(&table->comp_eqs, index, eq, GFP_KERNEL)); + if (err) + goto disable_eq; table->curr_comp_eqs++; } return 0; +disable_eq: + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); clean_eq: kfree(eq); clean: @@ -1043,21 +1047,16 @@ static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int err = -ENOENT; - int i = 0; - list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (i++ == vector) { - if (irqn) - *irqn = eq->core.irqn; - if (eqn) - *eqn = eq->core.eqn; - err = 0; - break; - } - } + eq = xa_load(&table->comp_eqs, vector); + if (!eq) + return -ENOENT; - return err; + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; + return 0; } int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) @@ -1082,12 +1081,10 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; - int i = 0; - list_for_each_entry(eq, &table->comp_eqs_list, list) { - if (i++ == vector) - return mlx5_irq_get_affinity_mask(eq->core.irq); - } + eq = xa_load(&table->comp_eqs, vector); + if (eq) + return mlx5_irq_get_affinity_mask(eq->core.irq); WARN_ON_ONCE(1); return NULL; @@ -1105,8 +1102,9 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; + unsigned long index; - list_for_each_entry(eq, &table->comp_eqs_list, list) { + xa_for_each(&table->comp_eqs, index, eq) { if (eq->core.eqn == eqn) return eq; } From e3e56775e91398df95c610bfc9bf4025db7dcf66 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Sun, 11 Jun 2023 19:55:02 +0300 Subject: [PATCH 06/15] net/mlx5: Implement single completion EQ create/destroy methods Currently, create_comp_eqs() function handles the creation of all completion EQs for all the vectors on driver load. While on driver unload, destroy_comp_eqs() performs the equivalent job. In preparation for dynamic EQ creation, replace create_comp_eqs() / destroy_comp_eqs() with create_comp_eq() / destroy_comp_eq() functions which will receive a vector index and allocate/destroy an EQ for that specific vector. Thus, allowing more flexibility in the management of completion EQs. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 137 +++++++++---------- 1 file changed, 66 insertions(+), 71 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index b343c0fd621e..41fa15757101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -927,28 +927,19 @@ static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; } static void free_rmap(struct mlx5_core_dev *mdev) {} #endif -static void destroy_comp_eqs(struct mlx5_core_dev *dev) +static void destroy_comp_eq(struct mlx5_core_dev *dev, struct mlx5_eq_comp *eq, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq; - struct mlx5_irq *irq; - unsigned long index; - - xa_for_each(&table->comp_eqs, index, eq) { - xa_erase(&table->comp_eqs, index); - mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); - if (destroy_unmap_eq(dev, &eq->core)) - mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", - eq->core.eqn); - tasklet_disable(&eq->tasklet_ctx.task); - kfree(eq); - table->curr_comp_eqs--; - } - xa_for_each(&table->comp_irqs, index, irq) - comp_irq_release(dev, index); - - free_rmap(dev); + xa_erase(&table->comp_eqs, vecidx); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); + if (destroy_unmap_eq(dev, &eq->core)) + mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", + eq->core.eqn); + tasklet_disable(&eq->tasklet_ctx.task); + kfree(eq); + comp_irq_release(dev, vecidx); + table->curr_comp_eqs--; } static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) @@ -966,79 +957,62 @@ static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) return MLX5_COMP_EQ_SIZE; } -static int create_comp_eqs(struct mlx5_core_dev *dev) +static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_param param = {}; struct mlx5_eq_comp *eq; struct mlx5_irq *irq; - unsigned long index; - int vecidx; int nent; int err; - err = alloc_rmap(dev); + err = comp_irq_request(dev, vecidx); if (err) return err; - for (vecidx = 0; vecidx < table->max_comp_eqs; vecidx++) { - err = comp_irq_request(dev, vecidx); - if (err < 0) - break; - } - - if (!vecidx) - goto err_irqs_req; - - table->max_comp_eqs = vecidx; nent = comp_eq_depth_devlink_param_get(dev); - xa_for_each(&table->comp_irqs, index, irq) - { - struct mlx5_eq_param param = {}; + eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); + if (!eq) { + err = -ENOMEM; + goto clean_irq; + } - eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, dev->priv.numa_node); - if (!eq) { - err = -ENOMEM; - goto clean; - } + INIT_LIST_HEAD(&eq->tasklet_ctx.list); + INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); + spin_lock_init(&eq->tasklet_ctx.lock); + tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); - INIT_LIST_HEAD(&eq->tasklet_ctx.list); - INIT_LIST_HEAD(&eq->tasklet_ctx.process_list); - spin_lock_init(&eq->tasklet_ctx.lock); - tasklet_setup(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb); - - eq->irq_nb.notifier_call = mlx5_eq_comp_int; - param = (struct mlx5_eq_param) { - .irq = irq, - .nent = nent, - }; - - err = create_map_eq(dev, &eq->core, ¶m); - if (err) - goto clean_eq; - err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); - if (err) { - destroy_unmap_eq(dev, &eq->core); - goto clean_eq; - } + irq = xa_load(&table->comp_irqs, vecidx); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; + param = (struct mlx5_eq_param) { + .irq = irq, + .nent = nent, + }; - mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); - err = xa_err(xa_store(&table->comp_eqs, index, eq, GFP_KERNEL)); - if (err) - goto disable_eq; - table->curr_comp_eqs++; + err = create_map_eq(dev, &eq->core, ¶m); + if (err) + goto clean_eq; + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); + if (err) { + destroy_unmap_eq(dev, &eq->core); + goto clean_eq; } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); + err = xa_err(xa_store(&table->comp_eqs, vecidx, eq, GFP_KERNEL)); + if (err) + goto disable_eq; + + table->curr_comp_eqs++; return 0; disable_eq: mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); clean_eq: kfree(eq); -clean: - destroy_comp_eqs(dev); -err_irqs_req: - free_rmap(dev); +clean_irq: + comp_irq_release(dev, vecidx); return err; } @@ -1161,6 +1135,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; + int i; eq_table->max_comp_eqs = get_num_eqs(dev); err = create_async_eqs(dev); @@ -1169,8 +1144,19 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) goto err_async_eqs; } - err = create_comp_eqs(dev); + err = alloc_rmap(dev); if (err) { + mlx5_core_err(dev, "Failed to allocate rmap\n"); + goto err_rmap; + } + + for (i = 0; i < eq_table->max_comp_eqs; i++) { + err = create_comp_eq(dev, i); + if (err < 0) + break; + } + + if (!i) { mlx5_core_err(dev, "Failed to create completion EQs\n"); goto err_comp_eqs; } @@ -1178,6 +1164,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) return 0; err_comp_eqs: + free_rmap(dev); +err_rmap: destroy_async_eqs(dev); err_async_eqs: return err; @@ -1185,7 +1173,14 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { - destroy_comp_eqs(dev); + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + unsigned long index; + + xa_for_each(&table->comp_eqs, index, eq) + destroy_comp_eq(dev, eq, index); + + free_rmap(dev); destroy_async_eqs(dev); } From ddd2c79da02021153dc8674e5a7e9748e56c1240 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 12 Jun 2023 11:50:10 +0300 Subject: [PATCH 07/15] net/mlx5: Introduce mlx5_cpumask_default_spread For better code readability in the completion IRQ request code, define the cpu lookup per completion vector logic in a separate function. The new method mlx5_cpumask_default_spread() given a vector index 'n' will return the 'nth' cpu. This new method will be used also in the next patch. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 41fa15757101..ad654d460d0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -826,20 +826,18 @@ static void comp_irq_release_pci(struct mlx5_core_dev *dev, u16 vecidx) mlx5_irq_release_vector(irq); } -static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) +static int mlx5_cpumask_default_spread(int numa_node, int index) { - struct mlx5_eq_table *table = dev->priv.eq_table; const struct cpumask *prev = cpu_none_mask; const struct cpumask *mask; - struct mlx5_irq *irq; int found_cpu = 0; int i = 0; int cpu; rcu_read_lock(); - for_each_numa_hop_mask(mask, dev->priv.numa_node) { + for_each_numa_hop_mask(mask, numa_node) { for_each_cpu_andnot(cpu, mask, prev) { - if (i++ == vecidx) { + if (i++ == index) { found_cpu = cpu; goto spread_done; } @@ -849,7 +847,17 @@ static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) spread_done: rcu_read_unlock(); - irq = mlx5_irq_request_vector(dev, found_cpu, vecidx, &table->rmap); + return found_cpu; +} + +static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) +{ + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_irq *irq; + int cpu; + + cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); + irq = mlx5_irq_request_vector(dev, cpu, vecidx, &table->rmap); if (IS_ERR(irq)) return PTR_ERR(irq); From f3147015fa0769cf1dcbfdb9040ad380cc4daeb5 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 12 Jun 2023 11:58:14 +0300 Subject: [PATCH 08/15] net/mlx5: Add IRQ vector to CPU lookup function Currently, once driver load completes, IRQ requests were performed for all vectors. However, as we move to support dynamic creation of EQs, this will not be the case as some IRQs will not exist at this stage. Thus, in such case, use the default CPU to IRQ mapping which is the serial mapping based on IRQ vector index. Meaning, the n'th vector gets mapped to the n'th CPU. Introduce an API function mlx5_comp_vector_cpu() that takes an IRQ index and provides the corresponding CPU mapping. It utilizes the existing IRQ affinity if defined, or resorts to the default serialized CPU mapping otherwise. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/trap.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 19 ++++++++++++++++--- include/linux/mlx5/driver.h | 3 +-- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 201ac7dd338f..bcf9807a5556 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -128,7 +128,7 @@ static void mlx5e_build_trap_params(struct mlx5_core_dev *mdev, static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, 0)); + int cpu = mlx5_comp_vector_get_cpu(priv->mdev, 0); struct net_device *netdev = priv->netdev; struct mlx5e_trap *t; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1c820119e438..9ec7b975c549 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2445,7 +2445,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct xsk_buff_pool *xsk_pool, struct mlx5e_channel **cp) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + int cpu = mlx5_comp_vector_get_cpu(priv->mdev, ix); struct net_device *netdev = priv->netdev; struct mlx5e_xsk_param xsk; struct mlx5e_channel *c; @@ -2862,7 +2862,7 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, cpumask_clear(priv->scratchpad.cpumask); for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq)); + int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index ad654d460d0c..2f5c0d00285f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1058,7 +1058,7 @@ unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_comp_vectors_count); -struct cpumask * +static struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -1068,10 +1068,23 @@ mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) if (eq) return mlx5_irq_get_affinity_mask(eq->core.irq); - WARN_ON_ONCE(1); return NULL; } -EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); + +int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector) +{ + struct cpumask *mask; + int cpu; + + mask = mlx5_comp_irq_get_affinity_mask(dev, vector); + if (mask) + cpu = cpumask_first(mask); + else + cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vector); + + return cpu; +} +EXPORT_SYMBOL(mlx5_comp_vector_get_cpu); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index fa70c25423b2..e686722fa4ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1109,8 +1109,7 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev); -struct cpumask * -mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector); +int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, From 674dd4e2e04e7a62bfacf28129e0808f33395bdf Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Thu, 22 Jun 2023 18:52:44 +0300 Subject: [PATCH 09/15] net/mlx5: Rename mlx5_comp_vectors_count() to mlx5_comp_vectors_max() To accurately represent its purpose, rename the function that retrieves the value of maximum vectors from mlx5_comp_vectors_count() to mlx5_comp_vectors_max(). Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 2 +- drivers/vfio/pci/mlx5/cmd.c | 2 +- include/linux/mlx5/driver.h | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f0b394ed7452..3c25b9045f9d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3685,7 +3685,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_max(mdev); mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0f8f70b91485..c1deb04ba7e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -193,7 +193,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? MLX5E_MIN_NUM_CHANNELS : - min_t(int, mlx5_comp_vectors_count(mdev), MLX5E_MAX_NUM_CHANNELS); + min_t(int, mlx5_comp_vectors_max(mdev), MLX5E_MAX_NUM_CHANNELS); } /* The maximum WQE size can be retrieved by max_wqe_sz_sq in diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9ec7b975c549..5710d755c2c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2856,7 +2856,7 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5_core_dev *mdev = priv->mdev; int num_comp_vectors, ix, irq; - num_comp_vectors = mlx5_comp_vectors_count(mdev); + num_comp_vectors = mlx5_comp_vectors_max(mdev); for (ix = 0; ix < params->num_channels; ix++) { cpumask_clear(priv->scratchpad.cpumask); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 2f5c0d00285f..6272962ea077 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -1052,11 +1052,11 @@ int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) return vector2eqnirqn(dev, vector, NULL, irqn); } -unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) +unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev) { return dev->priv.eq_table->max_comp_eqs; } -EXPORT_SYMBOL(mlx5_comp_vectors_count); +EXPORT_SYMBOL(mlx5_comp_vectors_max); static struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 4a5ae86e2b62..24bb30b5008c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -1096,7 +1096,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, if (!in) goto err_cqwq; - vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); + vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) { kvfree(in); diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index deed156e6165..eee20f04a469 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -1025,7 +1025,7 @@ static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, goto err_buff; } - vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); + vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) goto err_vec; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e686722fa4ca..43c4fd26c69a 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1108,7 +1108,7 @@ int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, bool map_wc, bool fast_path); void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); -unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev); +unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev); int mlx5_comp_vector_get_cpu(struct mlx5_core_dev *dev, int vector); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, From 54c5297801f3b9140c751c7f5660770c52dea24e Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Thu, 22 Jun 2023 19:05:46 +0300 Subject: [PATCH 10/15] net/mlx5: Handle SF IRQ request in the absence of SF IRQ pool In case the SF IRQ pool is not available due to setup limitations, SF currently relies on the already allocated PF IRQs to fulfill its IRQ vector requests. However, with the dynamic EQ allocation introduced in the next patch, it is possible that not all IRQs of PF will be allocated after the driver is loaded. In such case, if a SF requests a completion IRQ without having its own independent IRQ pool, SF will lack a PF IRQ to utilize. To address this scenario, allocate an IRQ for the SF from the PF's IRQ pool on demand. The new IRQ will be shared between the SF and it's PF. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 24 +++++++++++++++++-- .../mellanox/mlx5/core/irq_affinity.c | 12 ++++------ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6272962ea077..6e6e0a1c12b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -850,14 +850,29 @@ static int mlx5_cpumask_default_spread(int numa_node, int index) return found_cpu; } +static struct cpu_rmap *mlx5_eq_table_get_pci_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(dev)) + return dev->priv.parent_mdev->priv.eq_table->rmap; +#endif + return dev->priv.eq_table->rmap; +#else + return NULL; +#endif +} + static int comp_irq_request_pci(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; + struct cpu_rmap *rmap; struct mlx5_irq *irq; int cpu; + rmap = mlx5_eq_table_get_pci_rmap(dev); cpu = mlx5_cpumask_default_spread(dev->priv.numa_node, vecidx); - irq = mlx5_irq_request_vector(dev, cpu, vecidx, &table->rmap); + irq = mlx5_irq_request_vector(dev, cpu, vecidx, &rmap); if (IS_ERR(irq)) return PTR_ERR(irq); @@ -883,8 +898,13 @@ static int comp_irq_request_sf(struct mlx5_core_dev *dev, u16 vecidx) struct mlx5_irq *irq; irq = mlx5_irq_affinity_irq_request_auto(dev, &table->used_cpus, vecidx); - if (IS_ERR(irq)) + if (IS_ERR(irq)) { + /* In case SF irq pool does not exist, fallback to the PF irqs*/ + if (PTR_ERR(irq) == -ENOENT) + return comp_irq_request_pci(dev, vecidx); + return PTR_ERR(irq); + } return xa_err(xa_store(&table->comp_irqs, vecidx, irq, GFP_KERNEL)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index ed51800f9f67..047d5fed5f89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -191,17 +191,13 @@ struct mlx5_irq *mlx5_irq_affinity_irq_request_auto(struct mlx5_core_dev *dev, struct irq_affinity_desc af_desc = {}; struct mlx5_irq *irq; + if (!mlx5_irq_pool_is_sf_pool(pool)) + return ERR_PTR(-ENOENT); + af_desc.is_managed = 1; cpumask_copy(&af_desc.mask, cpu_online_mask); cpumask_andnot(&af_desc.mask, &af_desc.mask, used_cpus); - if (mlx5_irq_pool_is_sf_pool(pool)) - irq = mlx5_irq_affinity_request(pool, &af_desc); - else - /* In case SF pool doesn't exists, fallback to the PF IRQs. - * The PF IRQs are already allocated and binded to CPU - * at this point. Hence, only an index is needed. - */ - irq = mlx5_irq_request(dev, vecidx, NULL, NULL); + irq = mlx5_irq_affinity_request(pool, &af_desc); if (IS_ERR(irq)) return irq; From f14c1a14e63227a65faa68237687784a6dd2e922 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 12 Jun 2023 10:13:50 +0300 Subject: [PATCH 11/15] net/mlx5: Allocate completion EQs dynamically This commit enables the dynamic allocation of EQs at runtime, allowing for more flexibility in managing completion EQs and reducing the memory overhead of driver load. Whenever a CQ is created for a given vector index, the driver will lookup to see if there is an already mapped completion EQ for that vector, if so, utilize it. Otherwise, allocate a new EQ on demand and then utilize it for the CQ completion events. Add a protection lock to the EQ table to protect from concurrent EQ creation attempts. While at it, replace mlx5_vector2irqn()/mlx5_vector2eqn() with mlx5_comp_eqn_get() and mlx5_comp_irqn_get() which will allocate an EQ on demand if no EQ is found for the given vector. Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 2 +- drivers/infiniband/hw/mlx5/devx.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 80 ++++++++++--------- .../ethernet/mellanox/mlx5/core/fpga/conn.c | 2 +- .../net/ethernet/mellanox/mlx5/core/lib/aso.c | 2 +- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 +- .../mellanox/mlx5/core/steering/dr_send.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- drivers/vfio/pci/mlx5/cmd.c | 2 +- include/linux/mlx5/driver.h | 2 +- 11 files changed, 54 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index efc9e4a6df04..9773d2a3d97f 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -993,7 +993,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - err = mlx5_vector2eqn(dev->mdev, vector, &eqn); + err = mlx5_comp_eqn_get(dev->mdev, vector, &eqn); if (err) goto err_cqb; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index db5fb196c728..8ba53edf2311 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1002,7 +1002,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn); + err = mlx5_comp_eqn_get(dev->mdev, user_vector, &dev_eqn); if (err < 0) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5710d755c2c4..5919b889479f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1989,7 +1989,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); + err = mlx5_comp_eqn_get(mdev, param->eq_ix, &eqn); if (err) return err; @@ -2452,7 +2452,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, unsigned int irq; int err; - err = mlx5_vector2irqn(priv->mdev, ix, &irq); + err = mlx5_comp_irqn_get(priv->mdev, ix, &irq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6e6e0a1c12b5..ea0405e0a43f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -58,6 +58,7 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ + struct mutex comp_lock; /* sync comp eqs creations */ int curr_comp_eqs; int max_comp_eqs; struct mlx5_irq_table *irq_table; @@ -457,6 +458,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) cpumask_clear(&eq_table->used_cpus); xa_init(&eq_table->comp_eqs); xa_init(&eq_table->comp_irqs); + mutex_init(&eq_table->comp_lock); eq_table->curr_comp_eqs = 0; return 0; } @@ -985,6 +987,7 @@ static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) return MLX5_COMP_EQ_SIZE; } +/* Must be called with EQ table comp_lock held */ static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) { struct mlx5_eq_table *table = dev->priv.eq_table; @@ -994,6 +997,13 @@ static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) int nent; int err; + lockdep_assert_held(&table->comp_lock); + if (table->curr_comp_eqs == table->max_comp_eqs) { + mlx5_core_err(dev, "maximum number of vectors is allocated, %d\n", + table->max_comp_eqs); + return -ENOMEM; + } + err = comp_irq_request(dev, vecidx); if (err) return err; @@ -1033,7 +1043,7 @@ static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) goto disable_eq; table->curr_comp_eqs++; - return 0; + return eq->core.eqn; disable_eq: mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); @@ -1044,32 +1054,47 @@ static int create_comp_eq(struct mlx5_core_dev *dev, u16 vecidx) return err; } -static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq; + int ret = 0; - eq = xa_load(&table->comp_eqs, vector); - if (!eq) - return -ENOENT; - - if (irqn) - *irqn = eq->core.irqn; - if (eqn) + mutex_lock(&table->comp_lock); + eq = xa_load(&table->comp_eqs, vecidx); + if (eq) { *eqn = eq->core.eqn; + goto out; + } + + ret = create_comp_eq(dev, vecidx); + if (ret < 0) { + mutex_unlock(&table->comp_lock); + return ret; + } + + *eqn = ret; +out: + mutex_unlock(&table->comp_lock); return 0; } +EXPORT_SYMBOL(mlx5_comp_eqn_get); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) { - return vector2eqnirqn(dev, vector, eqn, NULL); -} -EXPORT_SYMBOL(mlx5_vector2eqn); + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq; + int eqn; + int err; -int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) -{ - return vector2eqnirqn(dev, vector, NULL, irqn); + /* Allocate the EQ if not allocated yet */ + err = mlx5_comp_eqn_get(dev, vector, &eqn); + if (err) + return err; + + eq = xa_load(&table->comp_eqs, vector); + *irqn = eq->core.irqn; + return 0; } unsigned int mlx5_comp_vectors_max(struct mlx5_core_dev *dev) @@ -1119,10 +1144,9 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) struct mlx5_eq_comp *eq; unsigned long index; - xa_for_each(&table->comp_eqs, index, eq) { + xa_for_each(&table->comp_eqs, index, eq) if (eq->core.eqn == eqn) return eq; - } return ERR_PTR(-ENOENT); } @@ -1130,11 +1154,7 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { - struct mlx5_eq_table *table = dev->priv.eq_table; - - mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ mlx5_irq_table_free_irqs(dev); - mutex_unlock(&table->lock); } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING @@ -1176,7 +1196,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - int i; eq_table->max_comp_eqs = get_num_eqs(dev); err = create_async_eqs(dev); @@ -1191,21 +1210,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) goto err_rmap; } - for (i = 0; i < eq_table->max_comp_eqs; i++) { - err = create_comp_eq(dev, i); - if (err < 0) - break; - } - - if (!i) { - mlx5_core_err(dev, "Failed to create completion EQs\n"); - goto err_comp_eqs; - } - return 0; -err_comp_eqs: - free_rmap(dev); err_rmap: destroy_async_eqs(dev); err_async_eqs: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index 12abe991583a..c4de6bf8d1b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -445,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); + err = mlx5_comp_eqn_get(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 5a80fb7dbbca..40c7be124041 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -81,7 +81,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) int inlen, eqn; int err; - err = mlx5_vector2eqn(mdev, 0, &eqn); + err = mlx5_comp_eqn_get(mdev, 0, &eqn); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index d3d628b862f3..69a75459775d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -104,6 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif -int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); +int mlx5_comp_irqn_get(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 24bb30b5008c..6fa06ba2d346 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -1097,7 +1097,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn); + err = mlx5_comp_eqn_get(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 9138ef2fb2c8..bece4df7b8dd 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -580,7 +580,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn); + err = mlx5_comp_eqn_get(mdev, 0, &eqn); if (err) goto err_vec; diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index eee20f04a469..c82c1f4fc588 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -1026,7 +1026,7 @@ static int mlx5vf_create_cq(struct mlx5_core_dev *mdev, } vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn); + err = mlx5_comp_eqn_get(mdev, vector, &eqn); if (err) goto err_vec; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 43c4fd26c69a..3e1017d764b7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1058,7 +1058,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); +int mlx5_comp_eqn_get(struct mlx5_core_dev *dev, u16 vecidx, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); From a0ae00e71e3e652c3bde8dcb61b4c1718618192e Mon Sep 17 00:00:00 2001 From: Ruan Jinjie Date: Fri, 4 Aug 2023 09:43:44 +0800 Subject: [PATCH 12/15] net/mlx5: remove many unnecessary NULL values There are many pointers assigned first, which need not to be initialized, so remove the NULL assignments. Signed-off-by: Ruan Jinjie Reviewed-by: Jesse Brandeburg Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c index 39c03dcbd196..e5c1012921d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.c @@ -57,7 +57,7 @@ static const char * const mlx5_fpga_qp_error_strings[] = { }; static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void) { - struct mlx5_fpga_device *fdev = NULL; + struct mlx5_fpga_device *fdev; fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); if (!fdev) @@ -252,7 +252,7 @@ int mlx5_fpga_device_start(struct mlx5_core_dev *mdev) int mlx5_fpga_init(struct mlx5_core_dev *mdev) { - struct mlx5_fpga_device *fdev = NULL; + struct mlx5_fpga_device *fdev; if (!MLX5_CAP_GEN(mdev, fpga)) { mlx5_core_dbg(mdev, "FPGA capability not present\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c index 4047629a876b..30564d9b00e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/hv_vhca.c @@ -40,7 +40,7 @@ struct mlx5_hv_vhca_agent { struct mlx5_hv_vhca *mlx5_hv_vhca_create(struct mlx5_core_dev *dev) { - struct mlx5_hv_vhca *hv_vhca = NULL; + struct mlx5_hv_vhca *hv_vhca; hv_vhca = kzalloc(sizeof(*hv_vhca), GFP_KERNEL); if (!hv_vhca) From 58f6d9d04489788d4115bd4bec4193df9e23f45f Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 6 Aug 2023 13:34:59 +0300 Subject: [PATCH 13/15] net/mlx5: Fix typo reminder -> remainder Fix a typo in esw_qos_devlink_rate_to_mbps(): reminder -> remainder. Signed-off-by: Gal Pressman Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 7c79476cc5f9..1887a24ee414 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -740,7 +740,7 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, u64 *rate, struct netlink_ext_ack *extack) { - u32 link_speed_max, reminder; + u32 link_speed_max, remainder; u64 value; int err; @@ -750,8 +750,8 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * return err; } - value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); - if (reminder) { + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder); + if (remainder) { pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", name, *rate); NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); From d602be220cf97aa581a9b34a7780a889bc3dd25c Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 11 Jul 2023 09:42:01 +0300 Subject: [PATCH 14/15] net/mlx5: E-Switch, Remove redundant arg ignore_flow_lvl The arg is always passed as true and thus redundant. Signed-off-by: Roi Dayan Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 723dff87e6d5..e391535e1ab1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -375,7 +375,6 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - bool ignore_flow_lvl, int *i) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; @@ -385,8 +384,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, return -EOPNOTSUPP; for (j = esw_attr->split_count; j < esw_attr->out_count; j++, (*i)++) { - if (ignore_flow_lvl) - flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, @@ -569,7 +567,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest, err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i); (*i)++; } else if (esw_is_indir_table(esw, attr)) { - err = esw_setup_indir_table(dest, flow_act, esw, attr, true, i); + err = esw_setup_indir_table(dest, flow_act, esw, attr, i); } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); } else { From b56fb19c337951fc4daba646fab2c50bb4c41c58 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 11 Jul 2023 13:23:27 +0300 Subject: [PATCH 15/15] net/mlx5: Bridge, Only handle registered netdev bridge events Don't handle bridge events for a netdev that doesn't belong to an eswitch that registered for bridge events. Signed-off-by: Roi Dayan Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 560800246573..0fef853eab62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -77,6 +77,10 @@ mlx5_esw_bridge_rep_vport_num_vhca_id_get(struct net_device *dev, struct mlx5_es return NULL; priv = netdev_priv(dev); + + if (!priv->mdev->priv.eswitch->br_offloads) + return NULL; + rpriv = priv->ppriv; *vport_num = rpriv->rep->vport; *esw_owner_vhca_id = MLX5_CAP_GEN(priv->mdev, vhca_id);