Linux内核的进程负载均衡机制

发布时间：2019-04-11 23:54:40 所属栏目：Windows 来源：金庆辉

导读：副标题#e# 概述在多核系统中，为了更好的利用多CPU并行能力，进程调度器可以将进程负载尽可能的平均到各个CPU上。再具体实现中，如何选择将进程迁移到的目标CPU，除了考虑各个CPU的负载平衡，还需要将Cache利用纳入权衡因素。同时，对于进程A唤醒进程B这个

以下是rebalance_domains()函数核心流程，值得注意的是，每个层级的调度间隔不是固定的，而是临时计算出来，他在一个可通过proc接口配置的最小值和最大值之间。

以下是对CPU的每个层级调度域调用load_balance()函数核心流程，目的是把一些进程迁移到指定的CPU(该场景就是当前CPU)。

以我的服务器为例，观察不同层级调度域的调度间隔范围，时间单位为jiffies。

可见，SMT负载均衡频率最高，越往上层越低。这也符合体系结构特点，在越低层次迁移进程代价越小(Cache利用率高)，所以可以更加频繁一点。

CPU进入idle前负载均衡

当进程调度函数__schedule()把即将切换到idle进程前，会发生一次负载均衡来避免当前CPU空闲。

static void __sched __schedule(void) 
{ 
        ... 
        if (unlikely(!rq->nr_running)) 
                idle_balance(cpu, rq); 
 
        ... 
}

核心函数idle_balance()。基本上也是尽可能在低层调度域中负载均衡。

/*  * idle_balance is called by schedule() if this_cpu is about to become  * idle. Attempts to pull tasks from other CPUs.  */ 
void idle_balance(int this_cpu, struct rq *this_rq) 
{ 
    unsigned long next_balance = jiffies + HZ; 
    struct sched_domain *sd; 
    int pulled_task = 0; 
    u64 curr_cost = 0; 
 
    this_rq->idle_stamp = rq_clock(this_rq); 
 
    /* 如果该CPU平均空闲时间小于/proc中的配置值或者该cpu调度域中所有cpu都是idle状态，那么不需要负载均衡了*/ 
    if (this_rq->avg_idle < sysctl_sched_migration_cost || 
        !this_rq->rd->overload) { 
        rcu_read_lock(); 
        sd = rcu_dereference_check_sched_domain(this_rq->sd); 
        if (sd) 
            update_next_balance(sd, 0, &next_balance); 
        rcu_read_unlock(); 
 
        goto out; 
    } 
 
    /*   * Drop the rq->lock, but keep IRQ/preempt disabled.     */ 
    raw_spin_unlock(&this_rq->lock); 
 
    update_blocked_averages(this_cpu); 
    rcu_read_lock(); 
    /* 从底向上遍历调度域，只要迁移成功一个进程就跳出循环*/ 
    for_each_domain(this_cpu, sd) { 
        int should_balance; 
        u64 t0, domain_cost; 
 
        if (!(sd->flags & SD_LOAD_BALANCE)) 
            continue; 
 
        /*           * 如果（当前累积的负载均衡开销时间 + 历史上该层级负载均衡开销最大值）已经大于CPU平均空闲时间了，          * 那么就没有必要负载均衡了。注意，sd->max_newidle_lb_cost会在load_balance()函数中缓慢减少。          */ 
        if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { 
            update_next_balance(sd, 0, &next_balance); 
            break; 
        } 
 
        /* 我的机器上该标记总是设置了SD_BALANCE_NEWIDLE */ 
        if (sd->flags & SD_BALANCE_NEWIDLE) { 
            t0 = sched_clock_cpu(this_cpu); 
 
            pulled_task = load_balance(this_cpu, this_rq, 
                           sd, CPU_NEWLY_IDLE, 
                           &should_balance); 
            
            domain_cost = sched_clock_cpu(this_cpu) - t0; 
            if (domain_cost > sd->max_newidle_lb_cost) 
                sd->max_newidle_lb_cost = domain_cost; 
 
           /* 记录了当前负载均衡开销累计值 */ 
            curr_cost += domain_cost; 
        } 
 
        update_next_balance(sd, 0, &next_balance); 
 
        /*       * Stop searching for tasks to pull if there are         * now runnable tasks on this rq.        */         
        if (pulled_task || this_rq->nr_running > 0) { 
            this_rq->idle_stamp = 0; 
            break; 
        } 
    } 
    rcu_read_unlock(); 
 
    raw_spin_lock(&this_rq->lock); 
 
out: 
    /* Move the next balance forward */ 
    if (time_after(this_rq->next_balance, next_balance)) 
        this_rq->next_balance = next_balance; 
 
    if (curr_cost > this_rq->max_idle_balance_cost) 
        this_rq->max_idle_balance_cost = curr_cost; 
}

其它需要用到SMP负载均衡模型的时机

（编辑：南平站长网）

【声明】本站内容均来自网络，其相关言论仅代表作者个人观点，不代表本站立场。若无意侵犯到您的权利，请及时与联系站长删除相关内容!

3/10

首页

尾页

win10家庭版激活密钥及	win10x系统怎么没有本
win10旗舰版64位纯净版	怎么设定win10动态桌面