return;
}
+/**
+ * lpfc_idle_stat_delay_work - idle_stat tracking
+ *
+ * This routine tracks per-cq idle_stat and determines polling decisions.
+ *
+ * Return codes:
+ * None
+ **/
+static void
+lpfc_idle_stat_delay_work(struct work_struct *work)
+{
+ struct lpfc_hba *phba = container_of(to_delayed_work(work),
+ struct lpfc_hba,
+ idle_stat_delay_work);
+ struct lpfc_queue *cq;
+ struct lpfc_sli4_hdw_queue *hdwq;
+ struct lpfc_idle_stat *idle_stat;
+ u32 i, idle_percent;
+ u64 wall, wall_idle, diff_wall, diff_idle, busy_time;
+
+ if (phba->pport->load_flag & FC_UNLOADING)
+ return;
+
+ if (phba->link_state == LPFC_HBA_ERROR ||
+ phba->pport->fc_flag & FC_OFFLINE_MODE)
+ goto requeue;
+
+ for_each_present_cpu(i) {
+ hdwq = &phba->sli4_hba.hdwq[phba->sli4_hba.cpu_map[i].hdwq];
+ cq = hdwq->io_cq;
+
+ /* Skip if we've already handled this cq's primary CPU */
+ if (cq->chann != i)
+ continue;
+
+ idle_stat = &phba->sli4_hba.idle_stat[i];
+
+ /* get_cpu_idle_time returns values as running counters. Thus,
+ * to know the amount for this period, the prior counter values
+ * need to be subtracted from the current counter values.
+ * From there, the idle time stat can be calculated as a
+ * percentage of 100 - the sum of the other consumption times.
+ */
+ wall_idle = get_cpu_idle_time(i, &wall, 1);
+ diff_idle = wall_idle - idle_stat->prev_idle;
+ diff_wall = wall - idle_stat->prev_wall;
+
+ if (diff_wall <= diff_idle)
+ busy_time = 0;
+ else
+ busy_time = diff_wall - diff_idle;
+
+ idle_percent = div64_u64(100 * busy_time, diff_wall);
+ idle_percent = 100 - idle_percent;
+
+ if (idle_percent < 15)
+ cq->poll_mode = LPFC_QUEUE_WORK;
+ else
+ cq->poll_mode = LPFC_IRQ_POLL;
+
+ idle_stat->prev_idle = wall_idle;
+ idle_stat->prev_wall = wall;
+ }
+
+requeue:
+ schedule_delayed_work(&phba->idle_stat_delay_work,
+ msecs_to_jiffies(LPFC_IDLE_STAT_DELAY));
+}
+
static void
lpfc_hb_eq_delay_work(struct work_struct *work)
{
if (phba->pport)
lpfc_stop_vport_timers(phba->pport);
cancel_delayed_work_sync(&phba->eq_delay_work);
+ cancel_delayed_work_sync(&phba->idle_stat_delay_work);
del_timer_sync(&phba->sli.mbox_tmo);
del_timer_sync(&phba->fabric_block_timer);
del_timer_sync(&phba->eratt_poll);
INIT_DELAYED_WORK(&phba->eq_delay_work, lpfc_hb_eq_delay_work);
+ INIT_DELAYED_WORK(&phba->idle_stat_delay_work,
+ lpfc_idle_stat_delay_work);
+
return 0;
}
goto out_free_hba_cpu_map;
}
+ phba->sli4_hba.idle_stat = kcalloc(phba->sli4_hba.num_possible_cpu,
+ sizeof(*phba->sli4_hba.idle_stat),
+ GFP_KERNEL);
+ if (!phba->sli4_hba.idle_stat) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ "3390 Failed allocation for idle_stat\n");
+ rc = -ENOMEM;
+ goto out_free_hba_eq_info;
+ }
+
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
phba->sli4_hba.c_stat = alloc_percpu(struct lpfc_hdwq_stat);
if (!phba->sli4_hba.c_stat) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
"3332 Failed allocating per cpu hdwq stats\n");
rc = -ENOMEM;
- goto out_free_hba_eq_info;
+ goto out_free_hba_idle_stat;
}
#endif
return 0;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+out_free_hba_idle_stat:
+ kfree(phba->sli4_hba.idle_stat);
+#endif
out_free_hba_eq_info:
free_percpu(phba->sli4_hba.eq_info);
-#endif
out_free_hba_cpu_map:
kfree(phba->sli4_hba.cpu_map);
out_free_hba_eq_hdl:
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
free_percpu(phba->sli4_hba.c_stat);
#endif
+ kfree(phba->sli4_hba.idle_stat);
/* Free memory allocated for msi-x interrupt vector to CPU mapping */
kfree(phba->sli4_hba.cpu_map);
return 1;
}
+/**
+ * lpfc_init_idle_stat_hb - Initialize idle_stat tracking
+ *
+ * This routine initializes the per-cq idle_stat to dynamically dictate
+ * polling decisions.
+ *
+ * Return codes:
+ * None
+ **/
+static void lpfc_init_idle_stat_hb(struct lpfc_hba *phba)
+{
+ int i;
+ struct lpfc_sli4_hdw_queue *hdwq;
+ struct lpfc_queue *cq;
+ struct lpfc_idle_stat *idle_stat;
+ u64 wall;
+
+ for_each_present_cpu(i) {
+ hdwq = &phba->sli4_hba.hdwq[phba->sli4_hba.cpu_map[i].hdwq];
+ cq = hdwq->io_cq;
+
+ /* Skip if we've already handled this cq's primary CPU */
+ if (cq->chann != i)
+ continue;
+
+ idle_stat = &phba->sli4_hba.idle_stat[i];
+
+ idle_stat->prev_idle = get_cpu_idle_time(i, &wall, 1);
+ idle_stat->prev_wall = wall;
+
+ if (phba->nvmet_support)
+ cq->poll_mode = LPFC_QUEUE_WORK;
+ else
+ cq->poll_mode = LPFC_IRQ_POLL;
+ }
+
+ if (!phba->nvmet_support)
+ schedule_delayed_work(&phba->idle_stat_delay_work,
+ msecs_to_jiffies(LPFC_IDLE_STAT_DELAY));
+}
+
static void lpfc_sli4_dip(struct lpfc_hba *phba)
{
uint32_t if_type;
queue_delayed_work(phba->wq, &phba->eq_delay_work,
msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+ /* start per phba idle_stat_delay heartbeat */
+ lpfc_init_idle_stat_hb(phba);
+
/* Start error attention (ERATT) polling timer */
mod_timer(&phba->eratt_poll,
jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval));
if (!ret)
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0390 Cannot schedule soft IRQ "
+ "0390 Cannot schedule queue work "
"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
cqid, cq->queue_id, raw_smp_processor_id());
}
* @cq: Pointer to CQ to be processed
* @handler: Routine to process each cqe
* @delay: Pointer to usdelay to set in case of rescheduling of the handler
+ * @poll_mode: Polling mode we were called from
*
* This routine processes completion queue entries in a CQ. While a valid
* queue element is found, the handler is called. During processing checks
static bool
__lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
bool (*handler)(struct lpfc_hba *, struct lpfc_queue *,
- struct lpfc_cqe *), unsigned long *delay)
+ struct lpfc_cqe *), unsigned long *delay,
+ enum lpfc_poll_mode poll_mode)
{
struct lpfc_cqe *cqe;
bool workposted = false;
arm = false;
}
+ /* Note: complete the irq_poll softirq before rearming CQ */
+ if (poll_mode == LPFC_IRQ_POLL)
+ irq_poll_complete(&cq->iop);
+
/* Track the max number of CQEs processed in 1 EQ */
if (count > cq->CQ_max_cqe)
cq->CQ_max_cqe = count;
case LPFC_MCQ:
workposted |= __lpfc_sli4_process_cq(phba, cq,
lpfc_sli4_sp_handle_mcqe,
- &delay);
+ &delay, LPFC_QUEUE_WORK);
break;
case LPFC_WCQ:
if (cq->subtype == LPFC_IO)
workposted |= __lpfc_sli4_process_cq(phba, cq,
lpfc_sli4_fp_handle_cqe,
- &delay);
+ &delay, LPFC_QUEUE_WORK);
else
workposted |= __lpfc_sli4_process_cq(phba, cq,
lpfc_sli4_sp_handle_cqe,
- &delay);
+ &delay, LPFC_QUEUE_WORK);
break;
default:
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
&cq->sched_spwork, delay);
if (!ret)
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0394 Cannot schedule soft IRQ "
+ "0394 Cannot schedule queue work "
"for cqid=%d on CPU %d\n",
cq->queue_id, cq->chann);
}
return workposted;
}
+/**
+ * lpfc_sli4_sched_cq_work - Schedules cq work
+ * @phba: Pointer to HBA context object.
+ * @cq: Pointer to CQ
+ * @cqid: CQ ID
+ *
+ * This routine checks the poll mode of the CQ corresponding to
+ * cq->chann, then either schedules a softirq or queue_work to complete
+ * cq work.
+ *
+ * queue_work path is taken if in NVMET mode, or if poll_mode is in
+ * LPFC_QUEUE_WORK mode. Otherwise, softirq path is taken.
+ *
+ **/
+static void lpfc_sli4_sched_cq_work(struct lpfc_hba *phba,
+ struct lpfc_queue *cq, uint16_t cqid)
+{
+ int ret = 0;
+
+ switch (cq->poll_mode) {
+ case LPFC_IRQ_POLL:
+ irq_poll_sched(&cq->iop);
+ break;
+ case LPFC_QUEUE_WORK:
+ default:
+ if (is_kdump_kernel())
+ ret = queue_work(phba->wq, &cq->irqwork);
+ else
+ ret = queue_work_on(cq->chann, phba->wq, &cq->irqwork);
+ if (!ret)
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "0383 Cannot schedule queue work "
+ "for CQ eqcqid=%d, cqid=%d on CPU %d\n",
+ cqid, cq->queue_id,
+ raw_smp_processor_id());
+ }
+}
+
/**
* lpfc_sli4_hba_handle_eqe - Process a fast-path event queue entry
* @phba: Pointer to HBA context object.
struct lpfc_queue *cq = NULL;
uint32_t qidx = eq->hdwq;
uint16_t cqid, id;
- int ret = 0;
if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
else
cq->isr_timestamp = 0;
#endif
- if (is_kdump_kernel())
- ret = queue_work(phba->wq, &cq->irqwork);
- else
- ret = queue_work_on(cq->chann, phba->wq, &cq->irqwork);
- if (!ret)
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0363 Cannot schedule soft IRQ "
- "for CQ eqcqid=%d, cqid=%d on CPU %d\n",
- cqid, cq->queue_id, raw_smp_processor_id());
+ lpfc_sli4_sched_cq_work(phba, cq, cqid);
}
/**
* __lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
* @cq: Pointer to CQ to be processed
+ * @poll_mode: Enum lpfc_poll_state to determine poll mode
*
* This routine calls the cq processing routine with the handler for
* fast path CQEs.
* the delay indicates when to reschedule it.
**/
static void
-__lpfc_sli4_hba_process_cq(struct lpfc_queue *cq)
+__lpfc_sli4_hba_process_cq(struct lpfc_queue *cq,
+ enum lpfc_poll_mode poll_mode)
{
struct lpfc_hba *phba = cq->phba;
unsigned long delay;
/* process and rearm the CQ */
workposted |= __lpfc_sli4_process_cq(phba, cq, lpfc_sli4_fp_handle_cqe,
- &delay);
+ &delay, poll_mode);
if (delay) {
if (is_kdump_kernel())
&cq->sched_irqwork, delay);
if (!ret)
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0367 Cannot schedule soft IRQ "
- "for cqid=%d on CPU %d\n",
- cq->queue_id, cq->chann);
+ "0367 Cannot schedule queue work "
+ "for cqid=%d on CPU %d\n",
+ cq->queue_id, cq->chann);
}
/* wake up worker thread if there are works to be done */
{
struct lpfc_queue *cq = container_of(work, struct lpfc_queue, irqwork);
- __lpfc_sli4_hba_process_cq(cq);
+ __lpfc_sli4_hba_process_cq(cq, LPFC_QUEUE_WORK);
}
/**
struct lpfc_queue *cq = container_of(to_delayed_work(work),
struct lpfc_queue, sched_irqwork);
- __lpfc_sli4_hba_process_cq(cq);
+ __lpfc_sli4_hba_process_cq(cq, LPFC_QUEUE_WORK);
}
/**
return status;
}
+static int lpfc_cq_poll_hdler(struct irq_poll *iop, int budget)
+{
+ struct lpfc_queue *cq = container_of(iop, struct lpfc_queue, iop);
+
+ __lpfc_sli4_hba_process_cq(cq, LPFC_IRQ_POLL);
+
+ return 1;
+}
+
/**
* lpfc_cq_create - Create a Completion Queue on the HBA
* @phba: HBA structure that indicates port to create a queue on.
if (cq->queue_id > phba->sli4_hba.cq_max)
phba->sli4_hba.cq_max = cq->queue_id;
+
+ irq_poll_init(&cq->iop, LPFC_IRQ_POLL_WEIGHT, lpfc_cq_poll_hdler);
out:
mempool_free(mbox, phba->mbox_mem_pool);
return status;