IPVS收发数据相关的速率计算

本文介绍IPVS中的收发数据相关的速率计算。

使能估算器

ip_vs_start_estimator函数将第二个参数stats包含的estimator挂载到ipvs网络命名空间中的估算器列表中。

void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{   
    struct ip_vs_estimator *est = &stats->est;
    
    INIT_LIST_HEAD(&est->list);
    
    spin_lock_bh(&ipvs->est_lock);
    list_add(&est->list, &ipvs->est_list);
    spin_unlock_bh(&ipvs->est_lock);
}

以下函数调用ip_vs_start_estimator启动估算器。其中函数__ip_vs_update_dest中的调用用于估算每个真实服务器相关的速率;函数ip_vs_add_service中的调用用来估算虚拟服务中的速率。

static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add)
{
    struct netns_ipvs *ipvs = svc->ipvs;

    if (add) {
        ip_vs_start_estimator(svc->ipvs, &dest->stats);
}
static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p)
{
    ip_vs_start_estimator(ipvs, &svc->stats);
}
static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
{
    ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
}

ipvs性能估算器初始化由函数ip_vs_estimator_net_init完成。其中的关键是初始化一个估算定时器,时长为2秒钟,超时处理函数为:estimation_timer。此设置是网络命名空间独立的,每个命名空间都有一个ipvs估算器。

int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
{
    INIT_LIST_HEAD(&ipvs->est_list);
    spin_lock_init(&ipvs->est_lock);
    timer_setup(&ipvs->est_timer, estimation_timer, 0);
    mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
}

在超时函数estimation_timer,计算5个速率值:连接速率、输入报文速率、输出报文速率、输入数据速率和输出数据速率。以上的5个速率值每2秒钟更新一次。速率估算的方式是,计算最近8秒的速率,每2秒计算一次这2秒的速率,此值在最终的平均速率中占比为1/4,参见以下的公式:

avgrate = avgrate*(1-W) + rate*W

where W = 2^(-2)

以每秒新建连接cps为例,e->cps = e->cps + (rate - e->cps) * 1/4 = e->cps * (1-1/4) + rate * 1/4。为防止右移操作导致的数据丢失,内核在计算支持将连接数左移了9位。对于inbytes和outbytes数据,左移了4位。e->cps中保存的即为最近2秒接收到的报文量。

static void estimation_timer(struct timer_list *t)
{   
    struct ip_vs_estimator *e;
    struct ip_vs_stats *s;
    u64 rate;
    struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
    
    spin_lock(&ipvs->est_lock);
    list_for_each_entry(e, &ipvs->est_list, list) {
        s = container_of(e, struct ip_vs_stats, est);
        
        spin_lock(&s->lock);
        ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
        
        /* scaled by 2^10, but divided 2 seconds */
        rate = (s->kstats.conns - e->last_conns) << 9;
        e->last_conns = s->kstats.conns;
        e->cps += ((s64)rate - (s64)e->cps) >> 2;
        
        rate = (s->kstats.inpkts - e->last_inpkts) << 9;
        e->last_inpkts = s->kstats.inpkts;
        e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
        
        rate = (s->kstats.outpkts - e->last_outpkts) << 9;
        e->last_outpkts = s->kstats.outpkts;
        e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
        
        /* scaled by 2^5, but divided 2 seconds */
        rate = (s->kstats.inbytes - e->last_inbytes) << 4;
        e->last_inbytes = s->kstats.inbytes;
        e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
        
        rate = (s->kstats.outbytes - e->last_outbytes) << 4;
        e->last_outbytes = s->kstats.outbytes;
        e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
        spin_unlock(&s->lock);
    }
    spin_unlock(&ipvs->est_lock);
    mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}

右移在估算超时函数estimation_timer中的左移操作,在速率读取函数ip_vs_read_estimator中进行了右移操作。以cps为例,由于e->cps中保存的为2秒的连接数量,在进行右移操作时,不是右移9位,而是10位,即得到每秒的连接数量。

void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;

dst->cps = (e->cps + 0x1FF) >> 10; 
dst->inpps = (e->inpps + 0x1FF) >> 10;
dst->outpps = (e->outpps + 0x1FF) >> 10;
dst->inbps = (e->inbps + 0xF) >> 5;
dst->outbps = (e->outbps + 0xF) >> 5;

}

ip_vs_read_cpu_stats读取由顺序锁保护的统计数据。将每个处理器的统计数据进行累加操作。

static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, struct ip_vs_cpu_stats __percpu *stats)
{   
    bool add = false;
    
    for_each_possible_cpu(i) { 
        struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
        unsigned int start;
        u64 conns, inpkts, outpkts, inbytes, outbytes;
        
        if (add) {
            do {
                start = u64_stats_fetch_begin(&s->syncp);
                conns = s->cnt.conns;
                inpkts = s->cnt.inpkts;
                outpkts = s->cnt.outpkts;
                inbytes = s->cnt.inbytes;
                outbytes = s->cnt.outbytes;
            } while (u64_stats_fetch_retry(&s->syncp, start));
            sum->conns += conns;
            sum->inpkts += inpkts;
            sum->outpkts += outpkts;
            sum->inbytes += inbytes;
            sum->outbytes += outbytes;
        } else {
            add = true;
            do {
                start = u64_stats_fetch_begin(&s->syncp);
                sum->conns = s->cnt.conns;
                sum->inpkts = s->cnt.inpkts;
                sum->outpkts = s->cnt.outpkts;
                sum->inbytes = s->cnt.inbytes;
                sum->outbytes = s->cnt.outbytes;
            } while (u64_stats_fetch_retry(&s->syncp, start));
        }
    }
}

输入统计

函数ip_vs_in_stats用于输入信息的统计。由代码可见,在接收到一个数据包之后,内核一方面会增加其最终的真实服务器的输入报文和输入数据长度,还会增加此数据包匹配的虚拟服务的输入报文和输入数据长度。最后,还将增加ipvs网络命名空间中的输入统计信息。

统计信息的修改,由顺序锁进行保护。

static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
    struct ip_vs_dest *dest = cp->dest;
    struct netns_ipvs *ipvs = cp->ipvs;

    if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
        struct ip_vs_cpu_stats *s;
        struct ip_vs_service *svc;

        s = this_cpu_ptr(dest->stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.inpkts++;
        s->cnt.inbytes += skb->len;
        u64_stats_update_end(&s->syncp);

        svc = rcu_dereference(dest->svc);
        s = this_cpu_ptr(svc->stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.inpkts++;
        s->cnt.inbytes += skb->len;
        u64_stats_update_end(&s->syncp);

        s = this_cpu_ptr(ipvs->tot_stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.inpkts++;
        s->cnt.inbytes += skb->len;
        u64_stats_update_end(&s->syncp);
    }
}

以上输入统计信息函数在ip_vs_in函数中调用,而ip_vs_in函数在NF_INET_LOCAL_IN和NF_INET_LOCAL_OUT两个hook点上都由被调用。所以此统计函数可统计由系统外部或者应用层进入ipvs系统的数据。另外,对于调度失败的情况,例如tcp_conn_schedule函数中,如果ignored未设置,ip_vs_leave函数中也可能调用ip_vs_in_stats函数增加统计信息。

static int tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
          int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{
    if (svc) {
        int ignored;
        /*
         * Let the virtual server select a real server for the incoming connection, and create a connection entry.
         */
        *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
        if (!*cpp && ignored <= 0) {
            if (!ignored)
                *verdict = ip_vs_leave(svc, skb, pd, iph);

输出统计

函数ip_vs_out_stats用于输出信息的统计。与以上的输入统计函数ip_vs_in_stats类似,此函数将增加真实服务器、虚拟服务和ipvs网络命名空间三者中的输出统计信息。统计信息的修改,由顺序锁进行保护。

static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
    struct ip_vs_dest *dest = cp->dest;
    struct netns_ipvs *ipvs = cp->ipvs;

    if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
        struct ip_vs_cpu_stats *s;
        struct ip_vs_service *svc;

        s = this_cpu_ptr(dest->stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.outpkts++;
        s->cnt.outbytes += skb->len;
        u64_stats_update_end(&s->syncp);

        svc = rcu_dereference(dest->svc);
        s = this_cpu_ptr(svc->stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.outpkts++;
        s->cnt.outbytes += skb->len;
        u64_stats_update_end(&s->syncp);

        s = this_cpu_ptr(ipvs->tot_stats.cpustats);
        u64_stats_update_begin(&s->syncp);
        s->cnt.outpkts++;
        s->cnt.outbytes += skb->len;
        u64_stats_update_end(&s->syncp);
    }
}

以上输出统计信息函数,在NF_INET_LOCAL_IN,NF_INET_FORWARD和NF_INET_LOCAL_OUT三个hook点都有调用流程。对于这三个hook点,在NAT转发模式下,如果找到匹配的连接,表明是回复的报文,增加统计信息。

连接统计

连接信息统计函数ip_vs_conn_stats,用来增加真实服务器、虚拟服务和ipvs网络命名空间三者中的连接统计信息。统计信息的修改,由顺序锁进行保护。

static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
    struct netns_ipvs *ipvs = svc->ipvs;
    struct ip_vs_cpu_stats *s;

    s = this_cpu_ptr(cp->dest->stats.cpustats);
    u64_stats_update_begin(&s->syncp);
    s->cnt.conns++;
    u64_stats_update_end(&s->syncp);

    s = this_cpu_ptr(svc->stats.cpustats);
    u64_stats_update_begin(&s->syncp);
    s->cnt.conns++;
    u64_stats_update_end(&s->syncp);

    s = this_cpu_ptr(ipvs->tot_stats.cpustats);
    u64_stats_update_begin(&s->syncp);
    s->cnt.conns++;
    u64_stats_update_end(&s->syncp);
}

以上连接统计函数在ip_vs_sched_persist、ip_vs_new_conn_out和ip_vs_schedule函数中都由调用,要注意的是其在创建新的ipvs连接之后,才调用此函数。如下的函数ip_vs_new_conn_out:

struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, struct ip_vs_dest *dest, 
                      struct sk_buff *skb, const struct ip_vs_iphdr *iph, __be16 dport,  __be16 cport)
{
    cp = ip_vs_conn_new(&param, dest->af, daddr, dport, flags, dest, 0);
    if (!cp) {
        if (ct) ip_vs_conn_put(ct);
        return NULL;
    }
    if (ct) {
        ip_vs_control_add(cp, ct);
        ip_vs_conn_put(ct);
    }
    ip_vs_conn_stats(cp, svc);
}

Linux内核版本 4.15

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页