diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 8e2a356911a9d42c39768c5ea5348dd02af5402f..45657a7502f6473c4e693ec0bd37258b1b8338c0 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -627,6 +627,19 @@ struct lpfc_ras_fwlog { enum ras_state state; /* RAS logging running state */ }; +enum lpfc_irq_chann_mode { + /* Assign IRQs to all possible cpus that have hardware queues */ + NORMAL_MODE, + + /* Assign IRQs only to cpus on the same numa node as HBA */ + NUMA_MODE, + + /* Assign IRQs only on non-hyperthreaded CPUs. This is the + * same as normal_mode, but assign IRQS only on physical CPUs. + */ + NHT_MODE, +}; + struct lpfc_hba { /* SCSI interface function jump table entries */ struct lpfc_io_buf * (*lpfc_get_scsi_buf) @@ -835,7 +848,6 @@ struct lpfc_hba { uint32_t cfg_fcp_mq_threshold; uint32_t cfg_hdw_queue; uint32_t cfg_irq_chann; - uint32_t cfg_irq_numa; uint32_t cfg_suppress_rsp; uint32_t cfg_nvme_oas; uint32_t cfg_nvme_embed_cmd; @@ -1003,6 +1015,7 @@ struct lpfc_hba { mempool_t *active_rrq_pool; struct fc_host_statistics link_stats; + enum lpfc_irq_chann_mode irq_chann_mode; enum intr_type_t intr_type; uint32_t intr_mode; #define LPFC_INTR_ERROR 0xFFFFFFFF @@ -1314,19 +1327,19 @@ lpfc_phba_elsring(struct lpfc_hba *phba) } /** - * lpfc_next_online_numa_cpu - Finds next online CPU on NUMA node - * @numa_mask: Pointer to phba's numa_mask member. + * lpfc_next_online_cpu - Finds next online CPU on cpumask + * @mask: Pointer to phba's cpumask member. * @start: starting cpu index * * Note: If no valid cpu found, then nr_cpu_ids is returned. * **/ static inline unsigned int -lpfc_next_online_numa_cpu(const struct cpumask *numa_mask, unsigned int start) +lpfc_next_online_cpu(const struct cpumask *mask, unsigned int start) { unsigned int cpu_it; - for_each_cpu_wrap(cpu_it, numa_mask, start) { + for_each_cpu_wrap(cpu_it, mask, start) { if (cpu_online(cpu_it)) break; } diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 1354c141d61435544e5757eaf37cdc197b46f371..2791efa770afa509998fb0515077019f709b9b16 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -5704,17 +5704,69 @@ LPFC_ATTR_R(hdw_queue, LPFC_HBA_HDWQ_MIN, LPFC_HBA_HDWQ_MAX, "Set the number of I/O Hardware Queues"); -static inline void -lpfc_assign_default_irq_numa(struct lpfc_hba *phba) +#if IS_ENABLED(CONFIG_X86) +/** + * lpfc_cpumask_irq_mode_init - initalizes cpumask of phba based on + * irq_chann_mode + * @phba: Pointer to HBA context object. + **/ +static void +lpfc_cpumask_irq_mode_init(struct lpfc_hba *phba) +{ + unsigned int cpu, first_cpu, numa_node = NUMA_NO_NODE; + const struct cpumask *sibling_mask; + struct cpumask *aff_mask = &phba->sli4_hba.irq_aff_mask; + + cpumask_clear(aff_mask); + + if (phba->irq_chann_mode == NUMA_MODE) { + /* Check if we're a NUMA architecture */ + numa_node = dev_to_node(&phba->pcidev->dev); + if (numa_node == NUMA_NO_NODE) { + phba->irq_chann_mode = NORMAL_MODE; + return; + } + } + + for_each_possible_cpu(cpu) { + switch (phba->irq_chann_mode) { + case NUMA_MODE: + if (cpu_to_node(cpu) == numa_node) + cpumask_set_cpu(cpu, aff_mask); + break; + case NHT_MODE: + sibling_mask = topology_sibling_cpumask(cpu); + first_cpu = cpumask_first(sibling_mask); + if (first_cpu < nr_cpu_ids) + cpumask_set_cpu(first_cpu, aff_mask); + break; + default: + break; + } + } +} +#endif + +static void +lpfc_assign_default_irq_chann(struct lpfc_hba *phba) { #if IS_ENABLED(CONFIG_X86) - /* If AMD architecture, then default is LPFC_IRQ_CHANN_NUMA */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - phba->cfg_irq_numa = 1; - else - phba->cfg_irq_numa = 0; + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + /* If AMD architecture, then default is NUMA_MODE */ + phba->irq_chann_mode = NUMA_MODE; + break; + case X86_VENDOR_INTEL: + /* If Intel architecture, then default is no hyperthread mode */ + phba->irq_chann_mode = NHT_MODE; + break; + default: + phba->irq_chann_mode = NORMAL_MODE; + break; + } + lpfc_cpumask_irq_mode_init(phba); #else - phba->cfg_irq_numa = 0; + phba->irq_chann_mode = NORMAL_MODE; #endif } @@ -5726,6 +5778,7 @@ lpfc_assign_default_irq_numa(struct lpfc_hba *phba) * * 0 = Configure number of IRQ Channels to: * if AMD architecture, number of CPUs on HBA's NUMA node + * if Intel architecture, number of physical CPUs. * otherwise, number of active CPUs. * [1,256] = Manually specify how many IRQ Channels to use. * @@ -5751,35 +5804,44 @@ MODULE_PARM_DESC(lpfc_irq_chann, "Set number of interrupt vectors to allocate"); static int lpfc_irq_chann_init(struct lpfc_hba *phba, uint32_t val) { - const struct cpumask *numa_mask; + const struct cpumask *aff_mask; if (phba->cfg_use_msi != 2) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "8532 use_msi = %u ignoring cfg_irq_numa\n", phba->cfg_use_msi); - phba->cfg_irq_numa = 0; - phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + phba->irq_chann_mode = NORMAL_MODE; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF; return 0; } /* Check if default setting was passed */ if (val == LPFC_IRQ_CHANN_DEF) - lpfc_assign_default_irq_numa(phba); + lpfc_assign_default_irq_chann(phba); - if (phba->cfg_irq_numa) { - numa_mask = &phba->sli4_hba.numa_mask; + if (phba->irq_chann_mode != NORMAL_MODE) { + aff_mask = &phba->sli4_hba.irq_aff_mask; - if (cpumask_empty(numa_mask)) { + if (cpumask_empty(aff_mask)) { lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "8533 Could not identify NUMA node, " - "ignoring cfg_irq_numa\n"); - phba->cfg_irq_numa = 0; - phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + "8533 Could not identify CPUS for " + "mode %d, ignoring\n", + phba->irq_chann_mode); + phba->irq_chann_mode = NORMAL_MODE; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF; } else { - phba->cfg_irq_chann = cpumask_weight(numa_mask); + phba->cfg_irq_chann = cpumask_weight(aff_mask); + + /* If no hyperthread mode, then set hdwq count to + * aff_mask weight as well + */ + if (phba->irq_chann_mode == NHT_MODE) + phba->cfg_hdw_queue = phba->cfg_irq_chann; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "8543 lpfc_irq_chann set to %u " - "(numa)\n", phba->cfg_irq_chann); + "(mode: %d)\n", phba->cfg_irq_chann, + phba->irq_chann_mode); } } else { if (val > LPFC_IRQ_CHANN_MAX) { @@ -5790,7 +5852,7 @@ lpfc_irq_chann_init(struct lpfc_hba *phba, uint32_t val) val, LPFC_IRQ_CHANN_MIN, LPFC_IRQ_CHANN_MAX); - phba->cfg_irq_chann = LPFC_IRQ_CHANN_MIN; + phba->cfg_irq_chann = LPFC_IRQ_CHANN_DEF; return -EINVAL; } phba->cfg_irq_chann = val; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4104bdcdbb6fdf2cd077c203eecda7d5fb7d01a7..8b85303518436f2a3f8c0dbd2836fe4c1d175ab6 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6022,29 +6022,6 @@ static void lpfc_log_intr_mode(struct lpfc_hba *phba, uint32_t intr_mode) return; } -/** - * lpfc_cpumask_of_node_init - initalizes cpumask of phba's NUMA node - * @phba: Pointer to HBA context object. - * - **/ -static void -lpfc_cpumask_of_node_init(struct lpfc_hba *phba) -{ - unsigned int cpu, numa_node; - struct cpumask *numa_mask = &phba->sli4_hba.numa_mask; - - cpumask_clear(numa_mask); - - /* Check if we're a NUMA architecture */ - numa_node = dev_to_node(&phba->pcidev->dev); - if (numa_node == NUMA_NO_NODE) - return; - - for_each_possible_cpu(cpu) - if (cpu_to_node(cpu) == numa_node) - cpumask_set_cpu(cpu, numa_mask); -} - /** * lpfc_enable_pci_dev - Enable a generic PCI device. * @phba: pointer to lpfc hba data structure. @@ -6483,7 +6460,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) phba->sli4_hba.num_present_cpu = lpfc_present_cpu; phba->sli4_hba.num_possible_cpu = cpumask_last(cpu_possible_mask) + 1; phba->sli4_hba.curr_disp_cpu = 0; - lpfc_cpumask_of_node_init(phba); /* Get all the module params for configuring this host */ lpfc_get_cfgparam(phba); @@ -6691,6 +6667,13 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) #endif /* Not supported for NVMET */ phba->cfg_xri_rebalancing = 0; + if (phba->irq_chann_mode == NHT_MODE) { + phba->cfg_irq_chann = + phba->sli4_hba.num_present_cpu; + phba->cfg_hdw_queue = + phba->sli4_hba.num_present_cpu; + phba->irq_chann_mode = NORMAL_MODE; + } break; } } @@ -7032,7 +7015,7 @@ lpfc_sli4_driver_resource_unset(struct lpfc_hba *phba) phba->sli4_hba.num_possible_cpu = 0; phba->sli4_hba.num_present_cpu = 0; phba->sli4_hba.curr_disp_cpu = 0; - cpumask_clear(&phba->sli4_hba.numa_mask); + cpumask_clear(&phba->sli4_hba.irq_aff_mask); /* Free memory allocated for fast-path work queue handles */ kfree(phba->sli4_hba.hba_eq_hdl); @@ -11287,11 +11270,12 @@ lpfc_irq_clear_aff(struct lpfc_hba_eq_hdl *eqhdl) * @offline: true, cpu is going offline. false, cpu is coming online. * * If cpu is going offline, we'll try our best effort to find the next - * online cpu on the phba's NUMA node and migrate all offlining IRQ affinities. + * online cpu on the phba's original_mask and migrate all offlining IRQ + * affinities. * - * If cpu is coming online, reaffinitize the IRQ back to the onlineng cpu. + * If cpu is coming online, reaffinitize the IRQ back to the onlining cpu. * - * Note: Call only if cfg_irq_numa is enabled, otherwise rely on + * Note: Call only if NUMA or NHT mode is enabled, otherwise rely on * PCI_IRQ_AFFINITY to auto-manage IRQ affinity. * **/ @@ -11301,14 +11285,14 @@ lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) struct lpfc_vector_map_info *cpup; struct cpumask *aff_mask; unsigned int cpu_select, cpu_next, idx; - const struct cpumask *numa_mask; + const struct cpumask *orig_mask; - if (!phba->cfg_irq_numa) + if (phba->irq_chann_mode == NORMAL_MODE) return; - numa_mask = &phba->sli4_hba.numa_mask; + orig_mask = &phba->sli4_hba.irq_aff_mask; - if (!cpumask_test_cpu(cpu, numa_mask)) + if (!cpumask_test_cpu(cpu, orig_mask)) return; cpup = &phba->sli4_hba.cpu_map[cpu]; @@ -11317,9 +11301,9 @@ lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline) return; if (offline) { - /* Find next online CPU on NUMA node */ - cpu_next = cpumask_next_wrap(cpu, numa_mask, cpu, true); - cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu_next); + /* Find next online CPU on original mask */ + cpu_next = cpumask_next_wrap(cpu, orig_mask, cpu, true); + cpu_select = lpfc_next_online_cpu(orig_mask, cpu_next); /* Found a valid CPU */ if ((cpu_select < nr_cpu_ids) && (cpu_select != cpu)) { @@ -11434,7 +11418,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) { int vectors, rc, index; char *name; - const struct cpumask *numa_mask = NULL; + const struct cpumask *aff_mask = NULL; unsigned int cpu = 0, cpu_cnt = 0, cpu_select = nr_cpu_ids; struct lpfc_hba_eq_hdl *eqhdl; const struct cpumask *maskp; @@ -11444,16 +11428,18 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) /* Set up MSI-X multi-message vectors */ vectors = phba->cfg_irq_chann; - if (phba->cfg_irq_numa) { - numa_mask = &phba->sli4_hba.numa_mask; - cpu_cnt = cpumask_weight(numa_mask); + if (phba->irq_chann_mode != NORMAL_MODE) + aff_mask = &phba->sli4_hba.irq_aff_mask; + + if (aff_mask) { + cpu_cnt = cpumask_weight(aff_mask); vectors = min(phba->cfg_irq_chann, cpu_cnt); - /* cpu: iterates over numa_mask including offline or online - * cpu_select: iterates over online numa_mask to set affinity + /* cpu: iterates over aff_mask including offline or online + * cpu_select: iterates over online aff_mask to set affinity */ - cpu = cpumask_first(numa_mask); - cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + cpu = cpumask_first(aff_mask); + cpu_select = lpfc_next_online_cpu(aff_mask, cpu); } else { flags |= PCI_IRQ_AFFINITY; } @@ -11487,7 +11473,7 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) eqhdl->irq = pci_irq_vector(phba->pcidev, index); - if (phba->cfg_irq_numa) { + if (aff_mask) { /* If found a neighboring online cpu, set affinity */ if (cpu_select < nr_cpu_ids) lpfc_irq_set_aff(eqhdl, cpu_select); @@ -11497,11 +11483,11 @@ lpfc_sli4_enable_msix(struct lpfc_hba *phba) LPFC_CPU_FIRST_IRQ, cpu); - /* Iterate to next offline or online cpu in numa_mask */ - cpu = cpumask_next(cpu, numa_mask); + /* Iterate to next offline or online cpu in aff_mask */ + cpu = cpumask_next(cpu, aff_mask); - /* Find next online cpu in numa_mask to set affinity */ - cpu_select = lpfc_next_online_numa_cpu(numa_mask, cpu); + /* Find next online cpu in aff_mask to set affinity */ + cpu_select = lpfc_next_online_cpu(aff_mask, cpu); } else if (vectors == 1) { cpu = cpumask_first(cpu_present_mask); lpfc_assign_eq_map_info(phba, index, LPFC_CPU_FIRST_IRQ, diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 8da7429e385a5904610bfd3f9d648b45235ff6eb..4decb53d81c362de2c2a3e9fde2eea5f45190831 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -920,7 +920,7 @@ struct lpfc_sli4_hba { struct lpfc_vector_map_info *cpu_map; uint16_t num_possible_cpu; uint16_t num_present_cpu; - struct cpumask numa_mask; + struct cpumask irq_aff_mask; uint16_t curr_disp_cpu; struct lpfc_eq_intr_info __percpu *eq_info; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS