现在基于EEVDF的社区patch进行逐一解析,尽可能抓住重点来介绍eevdf
添加两个参数avg_vruntime 和 avg_load
@@ -554,6 +554,9 @@ struct cfs_rq { unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ + s64 avg_vruntime; + u64 avg_load; + u64 exec_clock; u64 min_vruntime;
核心说明
/* * Compute virtual time from the per-task service numbers: * * Fair schedulers conserve lag: * + * \Sum lag_i = 0 * * Where lag_i is given by: * * lag_i = S - s_i = w_i * (V - v_i) * * Where S is the ideal service time and V is it's virtual time counterpart. */
这个注释很容易理解,上篇论文提过,lag为S-s,因为引入了V,所以是w * (v-vi),然后我们要满足sum(lag)=0,所以可以转换公式如下
/* Therefore: * * \Sum lag_i = 0 * \Sum w_i * (V - v_i) = 0 * \Sum w_i * V - w_i * v_i = 0 * * From which we can solve an expression for V in v_i (which we have in * se->vruntime): * * \Sum v_i * w_i \Sum v_i * w_i * V = -------------- = -------------- * \Sum w_i W * * Specifically, this is the weighted average of all entity virtual runtimes. */
因为sum(lag)=0,所以得到v的公式是
/* * [[ NOTE: this is only equal to the ideal scheduler under the condition * that join/leave operations happen at lag_i = 0, otherwise the * virtual time has non-continguous motion equivalent to: * * V +-= lag_i / W * * Also see the comment in place_entity() that deals with this. ]] * * However, since v_i is u64, and the multiplcation could easily overflow * transform it into a relative form that uses smaller quantities: * * Substitute: v_i == (v_i - v0) + v0 * * \Sum ((v_i - v0) + v0) * w_i \Sum (v_i - v0) * w_i * V = ---------------------------- = --------------------- + v0 * W W * * Which we track using: * * v0 := cfs_rq->min_vruntime * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime * \Sum w_i := cfs_rq->avg_load * * Since min_vruntime is a monotonic increasing variable that closely tracks * the per-task service, these deltas: (v_i - v), will be in the order of the * maximal (virtual) lag induced in the system due to quantisation. * * Also, we use scale_load_down() to reduce the size. * * As measured, the max (key * weight) value was ~44 bits for a kernel build. */
为了避免u64的overflow,所以引用一个替代v_i == (v_i - v0) + v0,代入原公式如下
这里将公式对接到代码上
v0 = cfs_rq->min_vruntime sum(v_i - v0) * w_i = cfs_rq->avg_vruntime sum(w_i) = cfs_rq->avg_load
对于v_i - v0的函数如下
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) { return (s64)(se->vruntime - cfs_rq->min_vruntime); }
那么cfs_rq->avg_vruntime和cfs_rq->avg_load的计算函数就是
static void avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long weight = scale_load_down(se->load.weight); s64 key = entity_key(cfs_rq, se); cfs_rq->avg_vruntime += key * weight; cfs_rq->avg_load += weight; } static void avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) { unsigned long weight = scale_load_down(se->load.weight); s64 key = entity_key(cfs_rq, se); cfs_rq->avg_vruntime -= key * weight; cfs_rq->avg_load -= weight; }
随着时间的推移,min_vruntime会不断更新,所以函数如下
static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime) { u64 min_vruntime = cfs_rq->min_vruntime; /* * open coded max_vruntime() to allow updating avg_vruntime */ s64 delta = (s64)(vruntime - min_vruntime); if (delta > 0) { avg_vruntime_update(cfs_rq, delta); min_vruntime = vruntime; } return min_vruntime; }
同样的avg_vruntime 也会随着时间更新,所以函数如下
static inline void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta) { /* * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load */ cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta; }
还记得计算V的公式吗,如下
其实就是
V = cfs_rq->avg_vruntime / cfs_rq->avg_load + cfs_rq->min_vruntime
所以对于V的函数实现如下
u64 avg_vruntime(struct cfs_rq *cfs_rq) { struct sched_entity *curr = cfs_rq->curr; s64 avg = cfs_rq->avg_vruntime; long load = cfs_rq->avg_load; if (curr && curr->on_rq) { unsigned long weight = scale_load_down(curr->load.weight); avg += entity_key(cfs_rq, curr) * weight; load += weight; } if (load) avg = div_s64(avg, load); return cfs_rq->min_vruntime + avg; }
和之前的CFS的计算vruntime一致,我们要在enqueue和dequeue的时候主动update_curr,这里直接调用add/sub来更新V,如下
@@ -642,12 +767,14 @@ static inline bool __entity_less(struct */ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { + avg_vruntime_add(cfs_rq, se); rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less); } static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline); + avg_vruntime_sub(cfs_rq, se); } struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) @@ -3379,6 +3506,8 @@ static void reweight_entity(struct cfs_r /* commit outstanding execution time */ if (cfs_rq->curr == se) update_curr(cfs_rq); + else + avg_vruntime_sub(cfs_rq, se); update_load_sub(&cfs_rq->load, se->load.weight); } dequeue_load_avg(cfs_rq, se); @@ -3394,9 +3523,11 @@ static void reweight_entity(struct cfs_r #endif enqueue_load_avg(cfs_rq, se); - if (se->on_rq) + if (se->on_rq) { update_load_add(&cfs_rq->load, se->load.weight); - + if (cfs_rq->curr != se) + avg_vruntime_add(cfs_rq, se); + } }
这里介绍了eevdf的第一个patch,其作用是添加了avg_vruntime和avg_load,并结合论文提供了v的计算函数
https://lore.kernel.org/lkml/20230531115839.089944915@infradead.org/ https://lore.kernel.org/lkml/20230531124603.654144274@infradead.org/