IPVS的PE引擎SIP

如下配置命令指定使用Persistence功能,并且PE(Persistence Engine)指定为sip。

$ sudo ipvsadm -A -t 207.175.44.110:80 -s rr -p --pe sip
$ 

SIP PE初始化

初始化函数ip_vs_sip_init,执行sip PE的注册工作,将ip_vs_sip_pe结构注册到全局的pe链表ip_vs_pe上。

static struct ip_vs_pe ip_vs_sip_pe =
{
        .name =                 "sip",
        .refcnt =               ATOMIC_INIT(0),
        .module =               THIS_MODULE,
        .n_list =               LIST_HEAD_INIT(ip_vs_sip_pe.n_list),
        .fill_param =           ip_vs_sip_fill_param,
        .ct_match =             ip_vs_sip_ct_match,
        .hashkey_raw =          ip_vs_sip_hashkey_raw,
        .show_pe_data =         ip_vs_sip_show_pe_data,
        .conn_out =             ip_vs_sip_conn_out,
};

static int __init ip_vs_sip_init(void)
{
        return register_ip_vs_pe(&ip_vs_sip_pe);
}

SIP PE绑定

如本文开头的ipvsadm配置命令,指定了–pe参数的值为sip,在创建虚拟服务函数ip_vs_add_service中,通过使用sip作为PE的名称由ip_vs_pe_getbyname函数遍历全局ip_vs_pe链表,找到对应的PE结构,比如说SIP的PE结构:ip_vs_sip_pe。

static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p)
{
    struct ip_vs_pe *pe = NULL;
    struct ip_vs_service *svc = NULL;

    if (u->pe_name && *u->pe_name) {
        pe = ip_vs_pe_getbyname(u->pe_name);
        if (pe == NULL) {
            pr_info("persistence engine module ip_vs_pe_%s not found\n", u->pe_name);
            ret = -ENOENT;
            goto out_err;
        }
    }

    /* Bind the ct retriever */
    RCU_INIT_POINTER(svc->pe, pe);
}

参数填充回调fill_param

回调函数ip_vs_sip_fill_param主要用于获取当前SIP会话的呼叫ID。

由于SIP使用UDP作为传输协议,非UDP协议此函数返回错误码EINVAL。函数ip_vs_sip_fill_param最重要的是通过get_callid函数获取到SIP会话的呼叫ID,即Call ID,之后将callid保存到pe_data中。随后的操作将使用此callid值标识同一个sip会话。

static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
{
        struct ip_vs_iphdr iph;
        unsigned int dataoff, datalen, matchoff, matchlen;
        const char *dptr;

        retc = ip_vs_fill_iph_skb(p->af, skb, false, &iph);

        /* Only useful with UDP */
        if (!retc || iph.protocol != IPPROTO_UDP)
                return -EINVAL;
        /* todo: IPv6 fragments: I think this only should be done for the first fragment. /HS */
        dataoff = iph.len + sizeof(struct udphdr);

        if (dataoff >= skb->len)
                return -EINVAL;
        retc = skb_linearize(skb);
        if (retc < 0)
                return retc;
        dptr = skb->data + dataoff;
        datalen = skb->len - dataoff;

        if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
                return -EINVAL;

        /* N.B: pe_data is only set on success,
         * this allows fallback to the default persistence logic on failure
         */
        p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
        if (!p->pe_data)
                return -ENOMEM;

        p->pe_data_len = matchlen;

        return 0;
}

获取call id函数callid,通过conntrack系统中的函数ct_sip_get_header获取报文中的call id字段值。其通过搜索报文中的SIP_HDR_CALL_ID类型头,即字符串:“Call-Id”,进行匹配。

static int get_callid(const char *dptr, unsigned int dataoff, unsigned int datalen, unsigned int *matchoff, unsigned int *matchlen)
{
        /* Find callid */
        while (1) {
                int ret = ct_sip_get_header(NULL, dptr, dataoff, datalen, SIP_HDR_CALL_ID, matchoff, matchlen);
                if (ret > 0)
                        break;
                if (!ret)
                        return -EINVAL;
                dataoff += *matchoff;
        }

        /* Too large is useless */
        if (*matchlen > IP_VS_PEDATA_MAXLEN)
                return -EINVAL;

        /* SIP headers are always followed by a line terminator */
        if (*matchoff + *matchlen == datalen)
                return -EINVAL;

        /* RFC 2543 allows lines to be terminated with CR, LF or CRLF,
         * RFC 3261 allows only CRLF, we support both. */
        if (*(dptr + *matchoff + *matchlen) != '\r' &&
            *(dptr + *matchoff + *matchlen) != '\n')
                return -EINVAL;

        IP_VS_DBG_BUF(9, "SIP callid %s (%d bytes)\n", IP_VS_DEBUG_CALLID(dptr + *matchoff, *matchlen), *matchlen);
        return 0;
}

连接匹配回调函数ct_match

函数ip_vs_sip_ct_match用于验证当前的连接参数p,是否与已存在的连接ct相匹配。有几个条件:
1)地址族相同;
2)客户端地址必须相同;
3)虚拟服务地址和端口号必须相同;
4)此连接ct要为连接模板,IP_VS_CONN_F_TEMPLATE;
5)两者协议相同;
6)PE引擎的数据相同,即Call ID相同。

static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p, struct ip_vs_conn *ct)
{
        bool ret = false;

        if (ct->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
            /* protocol should only be IPPROTO_IP if d_addr is a fwmark */
            ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, p->vaddr, &ct->vaddr) &&
            ct->vport == p->vport && ct->flags & IP_VS_CONN_F_TEMPLATE && ct->protocol == p->protocol &&
            ct->pe_data && ct->pe_data_len == p->pe_data_len && !memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
                ret = true;

        return ret;
}

哈希回调hashkey_raw

当内核在全局连接链表ip_vs_conn_tab中查找时,将使用此函数计算hash值。由以下代码可知,hash值的计算主要是基于PE引擎数据。

static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p, u32 initval, bool inverse)
{
        return jhash(p->pe_data, p->pe_data_len, initval);
}

输出回调conn_out

SIP PE引擎的输出函数ip_vs_sip_conn_out,调用了通用的连接输出函数ip_vs_new_conn_out。

static struct ip_vs_conn *ip_vs_sip_conn_out(struct ip_vs_service *svc, struct ip_vs_dest *dest,
           struct sk_buff *skb, const struct ip_vs_iphdr *iph, __be16 dport, __be16 cport)
{
    if (likely(iph->protocol == IPPROTO_UDP))
        return ip_vs_new_conn_out(svc, dest, skb, iph, dport, cport);

    return NULL;       
}  

输入处理流程

在IPVS子系统入口函数ip_vs_in处,如果未匹配上现存的连接,将使用函数ip_vs_try_to_schedule进行调度。

static unsigned int ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
    struct ip_vs_conn *cp;

    if (unlikely(!cp)) {
        int v;

        if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
            return v;
    }
}

ip_vs_try_to_schedule函数将调用特定协议的调度函数conn_schedule,对于SIP来说,即是调用UDP协议的调度函数udp_conn_schedule。正常情况下,只有第一个IP分配会进入此函数调度,后续的分配可找到已创建的连接。

static unsigned int ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
              struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{   
    struct ip_vs_protocol *pp = pd->pp;
    
    if (!iph->fragoffs) {
        /* No (second) fragments need to enter here, as nf_defrag_ipv6 replayed fragment zero will already have created the cp */
        
        /* Schedule and create new connection entry into cpp */ 
        if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
            return 0;
    }
}

udp_conn_schedule函数如下,其通过函数ip_vs_schedule调度来选择真实的服务器,并且创建新的连接条目。

static int udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
          int *verdict, struct ip_vs_conn **cpp, struct ip_vs_iphdr *iph)
{
    if (likely(!ip_vs_iph_inverse(iph)))
        svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]);
    else
        svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
                     &iph->saddr, ports[0]);
    if (svc) {
        int ignored;

        /*
         * Let the virtual server select a real server for the incoming connection, and create a connection entry.
         */
        *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
}

对于使能SIP PE引擎的情况,ip_vs_schedule函数将调用ip_vs_sched_persist进行专门的处理。

struct ip_vs_conn *ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
           struct ip_vs_proto_data *pd, int *ignored, struct ip_vs_iphdr *iph)
{
    /* Persistent service 
	 */
    if (svc->flags & IP_VS_SVC_F_PERSISTENT)
        return ip_vs_sched_persist(svc, skb, cport, vport, ignored, iph);
}

如下函数ip_vs_sched_persist,在其中将调用到SIP PE引擎的3个回调函数:其中1个在ip_vs_conn_fill_param_persist中调用fill_param,填充参数;另2个是在ip_vs_ct_in_get中调用ct_match,进行连接模板匹配;和hashkey_raw回调函数,进行hash计算

如果没有匹配到连接模板,此函数将使用通用的调度器进行调度处理,选择真实服务器,之后创建一个新的连接模板。否则,使用已有的连接模板创建新连接。

static struct ip_vs_conn *ip_vs_sched_persist(struct ip_vs_service *svc,
            struct sk_buff *skb, __be16 src_port, __be16 dst_port, int *ignored, struct ip_vs_iphdr *iph)
{
    {   
        int protocol = iph->protocol;
        const union nf_inet_addr *vaddr = dst_addr;
        __be16 vport = 0;

        /* return *ignored = -1 so NF_DROP can be used */
        if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, vaddr, vport, &param) < 0) {
            *ignored = -1;
            return NULL;
        }
    }

    /* Check if a template already exists */
    ct = ip_vs_ct_in_get(&param);
    if (!ct || !ip_vs_check_template(ct, NULL)) {
        struct ip_vs_scheduler *sched;

        /* No template found or the dest of the connection template is not available. return *ignored=0 i.e. ICMP and NF_DROP
         */
        sched = rcu_dereference(svc->scheduler);
        if (sched) {
            /* read svc->sched_data after svc->scheduler */
            smp_rmb();
            dest = sched->schedule(svc, skb, iph);
        } else {
            dest = NULL;
        }

        /* Create a template This adds param.pe_data to the template, and thus param.pe_data will be destroyed
         * when the template expires */
        ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
}

fill_param调用点

函数ip_vs_conn_fill_param_persist将调用SIP PE引擎的fill_param回调函数,填充PE数据。

static inline int ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, struct sk_buff *skb, int protocol,
                  const union nf_inet_addr *caddr, __be16 cport,
                  const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p)
{
    ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr, vport, p);
	
    p->pe = rcu_dereference(svc->pe);
    if (p->pe && p->pe->fill_param)
        return p->pe->fill_param(p, skb);

    return 0;
}

ct_match调用点

struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
{       
    unsigned int hash;
    struct ip_vs_conn *cp;

    hash = ip_vs_conn_hashkey_param(p, false);
        
    hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
        if (unlikely(p->pe_data && p->pe->ct_match)) {
            if (cp->ipvs != p->ipvs)
                continue;
            if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
                if (__ip_vs_conn_get(cp))
                    goto out;
            }
            continue;
        }  

hashkey_raw调用点

static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, bool inverse)
{
    const union nf_inet_addr *addr;
    __be16 port;

    if (p->pe_data && p->pe->hashkey_raw)
        return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) & ip_vs_conn_tab_mask;

输出处理流程

函数ip_vs_out对于UDP协议报文,使用__ip_vs_rs_conn_out函数进行处理,此函数用于真实服务器发起的连接。

static unsigned int ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
    if (atomic_read(&ipvs->conn_out_counter)) {
        /* Currently only for UDP:
         * connection oriented protocols typically use ephemeral ports for outgoing connections, so
         * related incoming responses would not match any VS
         */
        if (pp->protocol == IPPROTO_UDP) {
            cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
            if (likely(cp))
                return handle_response(af, skb, pd, cp, &iph, hooknum);
        }
    }
}

如下函数__ip_vs_rs_conn_out,如果确认此报文是由真实服务器发出,通过函数ip_vs_find_real_service实现,将调用PE引擎的conn_out函数指针进行处理。对于SIP PE,此函数为ip_vs_sip_conn_out。由前文的介绍可知,其封装了函数:ip_vs_new_conn_out。

static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, struct netns_ipvs *ipvs,
                          int af, struct sk_buff *skb, const struct ip_vs_iphdr *iph)
{       
    struct ip_vs_dest *dest;
    struct ip_vs_conn *cp = NULL;
    __be16 _ports[2], *pptr;
            
    if (hooknum == NF_INET_LOCAL_IN)
        return NULL;
            
    pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
    if (!pptr)
        return NULL;

    dest = ip_vs_find_real_service(ipvs, af, iph->protocol, &iph->saddr, pptr[0]);
    if (dest) {
        struct ip_vs_service *svc;
        struct ip_vs_pe *pe;
    
        svc = rcu_dereference(dest->svc);
        if (svc) {   
            pe = rcu_dereference(svc->pe);
            if (pe && pe->conn_out)
                cp = pe->conn_out(svc, dest, skb, iph, pptr[0], pptr[1]);
} 

以下为函数ip_vs_new_conn_out的代码,对于Persistence服务,其与在上节输入流程中介绍的函数ip_vs_sched_persist类似,可参考上节的内容。

struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct sk_buff *skb,
                      const struct ip_vs_iphdr *iph, __be16 dport, __be16 cport)
{
    if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
#ifdef CONFIG_IP_VS_IPV6
        if (svc->af == AF_INET6)
            ipv6_addr_prefix(&snet.in6, &caddr->in6, (__force __u32)svc->netmask);
        else
#endif
            snet.ip = caddr->ip & svc->netmask;

        if (ip_vs_conn_fill_param_persist(svc, skb, iph->protocol, &snet, 0, vaddr, vport, &param) < 0)
            return NULL;
        ct = ip_vs_ct_in_get(&param);
        if (!ct || !ip_vs_check_template(ct, dest)) {
            ct = ip_vs_conn_new(&param, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0);
}

内核版本 4.15

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页