IPVS中的ICMP报文处理-由内到外

这里主要明与NAT/Masq转发模式相关的ICMP报文处理,但也会提及由于出错引发的IPVS系统主动发送的ICMP报文。

ICMP由内到外处理流程入口

入口函数ip_vs_out实质上挂载在netfilter的3个hook点上,分别为:NF_INET_FORWARD、NF_INET_LOCAL_IN和NF_INET_LOCAL_OUT。第一个hook点作用于转发的报文;后两个作用于到本机的报文。此函数用于处理IPVS由内到外的发出报文,包括ICMP报文。如果协议号为IPPROTO_ICMP/IPPROTO_ICMPV6,分别使用函数ip_vs_out_icmp、ip_vs_out_icmp_v6进行处理。

static unsigned int ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
    struct ip_vs_iphdr iph;
    struct ip_vs_protocol *pp;
    struct ip_vs_proto_data *pd;
    struct ip_vs_conn *cp;
    struct sock *sk;

    ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
    if (af == AF_INET6) {
        if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
            int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related, hooknum, &iph);
            if (related)
                return verdict;
        }
    } else
#endif
        if (unlikely(iph.protocol == IPPROTO_ICMP)) {
            int related;
            int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
            if (related)
                return verdict;
        }

否则,以下处理处理其它非关联的ICMP报文和其它的IP报文,但是,需要注意的是,对于找不到IPVS连接的报文,IPVS系统认为是由真实服务器所主动发送(目前仅支持SIP协议PE引擎,此情况下真实服务器可主动发起RTP数据报文)。否则IPVS系统将发送ICMP错误报文。

    pd = ip_vs_proto_data_get(ipvs, iph.protocol);
    pp = pd->pp;
    cp = pp->conn_out_get(ipvs, af, skb, &iph);

    if (likely(cp)) {
        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
            goto ignore_cp;
        return handle_response(af, skb, pd, cp, &iph, hooknum);
    }

    /* Check for real-server-started requests */
    if (atomic_read(&ipvs->conn_out_counter)) {
        /* Currently only for UDP:
         * connection oriented protocols typically use ephemeral ports for outgoing connections, so
         * related incoming responses would not match any VS
         */
        if (pp->protocol == IPPROTO_UDP) {
            cp = __ip_vs_rs_conn_out(hooknum, ipvs, af, skb, &iph);
            if (likely(cp))
                return handle_response(af, skb, pd, cp, &iph, hooknum);
        }
    }

如下所示,PROC文件/proc/sys/net/ipv4/vs/nat_icmp_send可用于控制在此种错误情况下,是否发送ICMP报文。IPVS默认的nat_icmp_send值为0,不发送ICMP。ICMP报文由函数icmp_send或者icmpv6_send发送,类型为ICMP_DEST_UNREACH,代码为ICMP_PORT_UNREACH。

    if (sysctl_nat_icmp_send(ipvs) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) {
        __be16 _ports[2], *pptr;

        pptr = frag_safe_skb_hp(skb, iph.len, sizeof(_ports), _ports, &iph);
        if (pptr == NULL)
            return NF_ACCEPT;   /* Not for me */
        if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr, pptr[0])) {
            /*
             * Notify the real server: there is no existing entry if it is not RST packet or not TCP packet.
             */
            if ((iph.protocol != IPPROTO_TCP && iph.protocol != IPPROTO_SCTP)
                 || ((iph.protocol == IPPROTO_TCP && !is_tcp_reset(skb, iph.len))
                 || (iph.protocol == IPPROTO_SCTP && !is_sctp_abort(skb, iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6
                if (af == AF_INET6) {
                    if (!skb->dev) skb->dev = ipvs->net->loopback_dev;
                    icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
                } else
#endif
                    icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
                return NF_DROP;

IPVS由内到外的ICMP处理

函数ip_vs_out_icmp目前仅处理三种类型的ICMP报文:ICMP_DEST_UNREACH、ICMP_SOURCE_QUENCH和ICMP_TIME_EXCEEDED。如果不是这三种类型,设置为不相关联的ICMP,结束处理。

static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, unsigned int hooknum)
{
    struct icmphdr  _icmph, *ic;
    struct iphdr    _ciph, *cih;    /* The ip header contained within the ICMP */
    struct ip_vs_iphdr ciph;

    *related = 1;
    iph = ip_hdr(skb);
    offset = ihl = iph->ihl * 4;
    ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
    /*
     * Work through seeing if this is for us.
     * These checks are supposed to be in an order that means easy things are checked first to speed up processing.... however
     * this means that some packets will manage to get a long way down this stack and then be rejected, but that's life.
     */
    if ((ic->type != ICMP_DEST_UNREACH) &&
        (ic->type != ICMP_SOURCE_QUENCH) &&
        (ic->type != ICMP_TIME_EXCEEDED)) {
        *related = 0;
        return NF_ACCEPT;
    }

接下来,找到ICMP报文中内层的IP报文,根据其中的IP头部信息,查找IPVS连接。如果找到的话,表明此ICMP报文是由之前客户端的请求报文所触发的,由真实服务器回复的ICMP报文。就有函数handle_response_icmp处理。

    /* Now find the contained IP header */
    offset += sizeof(_icmph);
    cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);

    pp = ip_vs_proto_get(cih->protocol);

    /* Is the embedded protocol header present? */
    if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag))
        return NF_ACCEPT;

    ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);

    /* The embedded headers contain source and dest in reverse order */
    cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
    if (!cp)
        return NF_ACCEPT;

    snet.ip = iph->saddr;
    return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, pp, ciph.len, ihl, hooknum);

NAT的ICMP响应处理

如下处理函数handle_response_icmp,首先一点是其仅工作于NAT/Masq转发模式。对于四层协议:IPPROTO_TCP、IPPROTO_UDP或者IPPROTO_SCTP,由于NAT将修改其中开头的源端口和目的端口号,一并将其设置为可写状态。

static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, __u8 protocol, struct ip_vs_conn *cp,
                struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl, unsigned int hooknum)
{
    unsigned int verdict = NF_DROP;

    if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
        goto ignore_cp;

    if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol || IPPROTO_SCTP == protocol)
        offset += 2 * sizeof(__u16);
    if (!skb_make_writable(skb, offset))
        goto out;

接下来又函数ip_vs_nat_icmp处理报文中NAT相关字段的修改。既然IP报文被修改,随后进行重路由,参见函数ip_vs_route_me_harder。

#ifdef CONFIG_IP_VS_IPV6
    if (af == AF_INET6)
        ip_vs_nat_icmp_v6(skb, pp, cp, 1);
    else
#endif
        ip_vs_nat_icmp(skb, pp, cp, 1);

    if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
        goto out;

函数ip_vs_nat_icmp负责对ICMP报文进行NAT处理。由于当前的处理报文是由内部到外部,inout参数为1。修改报文的IP头部的源地址,和ICMP内部IP报文的目的IP地址(因为内部IP表示原方向报文),同时更新IP头部校验和。

void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int inout)
{
    struct iphdr *iph    = ip_hdr(skb);
    unsigned int icmp_offset = iph->ihl*4;
    struct icmphdr *icmph    = (struct icmphdr *)(skb_network_header(skb) + icmp_offset);
    struct iphdr *ciph   = (struct iphdr *)(icmph + 1);

    if (inout) {
        iph->saddr = cp->vaddr.ip;
        ip_send_check(iph);
        ciph->daddr = cp->vaddr.ip;
        ip_send_check(ciph);
    } else {
        iph->daddr = cp->daddr.ip;
        ip_send_check(iph);
        ciph->saddr = cp->daddr.ip;
        ip_send_check(ciph);
    }

随后,对于4层协议IPPROTO_TCP、IPPROTO_UDP和IPPROTO_SCTP,如果报文为由内到外,修改ICMP内部4层头中目的端口号(还原为发送时客户端请求的端口号)。

    /* the TCP/UDP/SCTP port */
    if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol || IPPROTO_SCTP == ciph->protocol) {
        __be16 *ports = (void *)ciph + ciph->ihl*4;

        if (inout)
            ports[1] = cp->vport;
        else
            ports[0] = cp->dport;
    }

    /* And finally the ICMP checksum */
    icmph->checksum = 0;
    icmph->checksum = ip_vs_checksum_complete(skb, icmp_offset);
    skb->ip_summed = CHECKSUM_UNNECESSARY;

内核版本 4.15

已标记关键词 清除标记
©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页