sched/fair: Add sched_group per-CPU max capacity
authorMorten Rasmussen <morten.rasmussen@arm.com>
Wed, 4 Jul 2018 10:17:41 +0000 (11:17 +0100)
committerIngo Molnar <mingo@kernel.org>
Mon, 10 Sep 2018 09:05:49 +0000 (11:05 +0200)
The current sg->min_capacity tracks the lowest per-CPU compute capacity
available in the sched_group when rt/irq pressure is taken into account.
Minimum capacity isn't the ideal metric for tracking if a sched_group
needs offloading to another sched_group for some scenarios, e.g. a
sched_group with multiple CPUs if only one is under heavy pressure.
Tracking maximum capacity isn't perfect either but a better choice for
some situations as it indicates that the sched_group definitely compute
capacity constrained either due to rt/irq pressure on all CPUs or
asymmetric CPU capacities (e.g. big.LITTLE).

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: gaku.inami.xh@renesas.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/1530699470-29808-4-git-send-email-morten.rasmussen@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/sched/fair.c
kernel/sched/sched.h
kernel/sched/topology.c

index 6e04bea..fe04315 100644 (file)
@@ -7557,13 +7557,14 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
        cpu_rq(cpu)->cpu_capacity = capacity;
        sdg->sgc->capacity = capacity;
        sdg->sgc->min_capacity = capacity;
+       sdg->sgc->max_capacity = capacity;
 }
 
 void update_group_capacity(struct sched_domain *sd, int cpu)
 {
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
-       unsigned long capacity, min_capacity;
+       unsigned long capacity, min_capacity, max_capacity;
        unsigned long interval;
 
        interval = msecs_to_jiffies(sd->balance_interval);
@@ -7577,6 +7578,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
        capacity = 0;
        min_capacity = ULONG_MAX;
+       max_capacity = 0;
 
        if (child->flags & SD_OVERLAP) {
                /*
@@ -7607,6 +7609,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                        }
 
                        min_capacity = min(capacity, min_capacity);
+                       max_capacity = max(capacity, max_capacity);
                }
        } else  {
                /*
@@ -7620,12 +7623,14 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
                        capacity += sgc->capacity;
                        min_capacity = min(sgc->min_capacity, min_capacity);
+                       max_capacity = max(sgc->max_capacity, max_capacity);
                        group = group->next;
                } while (group != child->groups);
        }
 
        sdg->sgc->capacity = capacity;
        sdg->sgc->min_capacity = min_capacity;
+       sdg->sgc->max_capacity = max_capacity;
 }
 
 /*
@@ -7721,16 +7726,27 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 }
 
 /*
- * group_smaller_cpu_capacity: Returns true if sched_group sg has smaller
+ * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
  * per-CPU capacity than sched_group ref.
  */
 static inline bool
-group_smaller_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
 {
        return sg->sgc->min_capacity * capacity_margin <
                                                ref->sgc->min_capacity * 1024;
 }
 
+/*
+ * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
+ * per-CPU capacity_orig than sched_group ref.
+ */
+static inline bool
+group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
+{
+       return sg->sgc->max_capacity * capacity_margin <
+                                               ref->sgc->max_capacity * 1024;
+}
+
 static inline enum
 group_type group_classify(struct sched_group *group,
                          struct sg_lb_stats *sgs)
@@ -7876,7 +7892,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
         * power/energy consequences are not considered.
         */
        if (sgs->sum_nr_running <= sgs->group_weight &&
-           group_smaller_cpu_capacity(sds->local, sg))
+           group_smaller_min_cpu_capacity(sds->local, sg))
                return false;
 
 asym_packing:
index 7dbf67d..fe17e0b 100644 (file)
@@ -1197,6 +1197,7 @@ struct sched_group_capacity {
         */
        unsigned long           capacity;
        unsigned long           min_capacity;           /* Min per-CPU capacity in group */
+       unsigned long           max_capacity;           /* Max per-CPU capacity in group */
        unsigned long           next_update;
        int                     imbalance;              /* XXX unrelated to capacity but shared group state */
 
index b0cdf5e..2536e1b 100644 (file)
@@ -693,6 +693,7 @@ static void init_overlap_sched_group(struct sched_domain *sd,
        sg_span = sched_group_span(sg);
        sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
        sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 }
 
 static int
@@ -852,6 +853,7 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
 
        sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
        sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
+       sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
 
        return sg;
 }