路由FIB通知链

路由系统 专栏收录该内容
31 篇文章 1 订阅

在网络命名空间初始化时,初始化fib通知链操作链表。

static int __net_init fib_notifier_net_init(struct net *net)
{
    struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id);

    INIT_LIST_HEAD(&fn_net->fib_notifier_ops);
    ATOMIC_INIT_NOTIFIER_HEAD(&fn_net->fib_chain);

对于IPv4,在网络命名空间初始化时,注册fib通知链处理结构fib4_notifier_ops_template。

static const struct fib_notifier_ops fib4_notifier_ops_template = {
    .family     = AF_INET,
    .fib_seq_read   = fib4_seq_read,
    .fib_dump   = fib4_dump,
    .owner      = THIS_MODULE,
};

int __net_init fib4_notifier_init(struct net *net)
{
    struct fib_notifier_ops *ops;

    net->ipv4.fib_seq = 0;

    ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net);
    if (IS_ERR(ops))
        return PTR_ERR(ops);
    net->ipv4.notifier_ops = ops;

FIB下一跳通知链调用

当出现网络设备down,设备注销,设备物理链路状态改变或者删除设备IP地址等情况时,需要更新所有以此设备为下一跳的表项,并且调用call_fib_nh_notifiers发送下一跳改变的消息到fib通知链,消息类型为FIB_EVENT_NH_DEL。

int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
{
    unsigned int hash = fib_devindex_hashfn(dev->ifindex);
    struct hlist_head *head = &fib_info_devhash[hash];
    struct fib_nh *nh;

    hlist_for_each_entry(nh, head, nh_hash) {
        struct fib_info *fi = nh->nh_parent;

        ...
        change_nexthops(fi) {
            if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD)
                dead++;
            else if (nexthop_nh->fib_nh_dev == dev &&
                 nexthop_nh->fib_nh_scope != scope) {
                switch (event) {
                case NETDEV_DOWN:
                case NETDEV_UNREGISTER:
                    nexthop_nh->fib_nh_flags |= RTNH_F_DEAD;
                    fallthrough;
                case NETDEV_CHANGE:
                    nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
                    break;
                }
                call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_DEL);

与以上的事件相反,当出现网络设备up,设备物理链路状态改变或者添加设备IP地址等情况时,也需要更新设备相关的下一跳信息。清除下一跳中的RTNH_F_LINKDOWN和/或RTNH_F_DEAD标志,并且发送消息到fib通知链,类型为FIB_EVENT_NH_ADD。

int fib_sync_up(struct net_device *dev, unsigned char nh_flags)
{
    struct fib_info *prev_fi;
    struct hlist_head *head;
    struct fib_nh *nh;

    if (!(dev->flags & IFF_UP)) return 0;

    if (nh_flags & RTNH_F_DEAD) {
        unsigned int flags = dev_get_flags(dev);

        if (flags & (IFF_RUNNING | IFF_LOWER_UP))
            nh_flags |= RTNH_F_LINKDOWN;
    }
    hash = fib_devindex_hashfn(dev->ifindex);
    head = &fib_info_devhash[hash];

    hlist_for_each_entry(nh, head, nh_hash) {
        struct fib_info *fi = nh->nh_parent;

        BUG_ON(!fi->fib_nhs);
        if (nh->fib_nh_dev != dev || fi == prev_fi)
            continue;

        prev_fi = fi;
        change_nexthops(fi) {
            if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
                alive++;
                continue;
            }
            if (!nexthop_nh->fib_nh_dev || !(nexthop_nh->fib_nh_dev->flags & IFF_UP))
                continue;
            if (nexthop_nh->fib_nh_dev != dev || !__in_dev_get_rtnl(dev))
                continue;

            nexthop_nh->fib_nh_flags &= ~nh_flags;
            call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);

如下call_fib_nh_notifiers函数,调用IPv4对应的通知链函数call_fib4_notifiers。如果接口设置了忽略链路down事件,并且下一跳设置了标志RTNH_F_LINKDOWN,前者可通过PROC文件:

/proc/sys/net/ipv4/conf/ens34/ignore_routes_with_linkdown

进行设置,不调用FIB通知链。另外,如果下一跳FIB以及设置了RTNH_F_DEAD标志,也没有必要调用通知链。

static int call_fib_nh_notifiers(struct fib_nh *nh, enum fib_event_type event_type)
{
    bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev);
    struct fib_nh_notifier_info info = {
        .fib_nh = nh,
    };

    switch (event_type) {
    case FIB_EVENT_NH_ADD:
        if (nh->fib_nh_flags & RTNH_F_DEAD)
            break;
        if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN)
            break;
        return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info);
    case FIB_EVENT_NH_DEL:
        if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || (nh->fib_nh_flags & RTNH_F_DEAD))
            return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info);

FIB表项通知链调用

当用户层通过ip route命令或者route命令,以及内核通过fib_magic自行添加路由表项时,由函数fib_table_insert进行处理,最后由函数call_fib_entry_notifiers发送类型为FIB_EVENT_ENTRY_REPLACE的通知链事件。

int fib_table_insert(struct net *net, struct fib_table *tb,
             struct fib_config *cfg, struct netlink_ext_ack *extack)
{
    struct fib_alias *fa, *new_fa;

    new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
    if (!new_fa) goto out;

    if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) == new_fa) {
        enum fib_event_type fib_event;

        fib_event = FIB_EVENT_ENTRY_REPLACE;
        err = call_fib_entry_notifiers(net, fib_event, key, plen,
                           new_fa, extack);

同上,当要删除一条FIB表项时,也是由函数call_fib_entry_notifiers发送通知链事件。如果FIB表中有路由可替代被删除的路由,发送FIB_EVENT_ENTRY_REPLACE时间,否则发送FIB_EVENT_ENTRY_DEL事件。

int fib_table_delete(struct net *net, struct fib_table *tb, struct fib_config *cfg, struct netlink_ext_ack *extack)
{
    fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);

static void fib_notify_alias_delete(struct net *net, u32 key, struct hlist_head *fah,
                    struct fib_alias *fa_to_delete, struct netlink_ext_ack *extack)
{
    struct fib_alias *fa_next, *fa_to_notify;
    u32 tb_id = fa_to_delete->tb_id;
    u8 slen = fa_to_delete->fa_slen;
    ...
    /* Determine if the route should be replaced by the next route in the list.
     */
    fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
                   struct fib_alias, fa_list);
    if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
        fib_event = FIB_EVENT_ENTRY_REPLACE;
        fa_to_notify = fa_next;
    } else {
        fib_event = FIB_EVENT_ENTRY_DEL;
        fa_to_notify = fa_to_delete;
    }
    call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen, fa_to_notify, extack);

下一跳和FIB通知链

当用户层修改下一跳结构时,如通过ip nexthop命令,由函数nexthop_replace_notify发送通知链事件。

static void nexthop_replace_notify(struct net *net, struct nexthop *nh, struct nl_info *info)
{
    struct nh_grp_entry *nhge;

    __nexthop_replace_notify(net, nh, info);

    list_for_each_entry(nhge, &nh->grp_list, nh_list)
        __nexthop_replace_notify(net, nhge->nh_parent, info);

如果使用此下一跳结构的FIB链表不为空,将此受影响的fib_info结构的成员nh_updated设置为true(后续将据此进行判断),由函数fib_info_notify_update处理更新。

static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, struct nl_info *info)
{
    if (!list_empty(&nh->fi_list)) {
        struct fib_info *fi;

        /* expectation is a few fib_info per nexthop and then
         * a lot of routes per fib_info. So mark the fib_info
         * and then walk the fib tables once
         */
        list_for_each_entry(fi, &nh->fi_list, nh_list)
            fi->nh_updated = true;

        fib_info_notify_update(net, info);

        list_for_each_entry(fi, &nh->fi_list, nh_list)
            fi->nh_updated = false;

遍历命名空间中的路由表哈希数组(目前256个),找到哈希链表头,进一步遍历链表中的每个路由表,针对路由表调用__fib_info_notify_update处理。即此函数将遍历命名空间中的所有路由表。

void fib_info_notify_update(struct net *net, struct nl_info *info)
{
    unsigned int h;

    for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
        struct hlist_head *head = &net->ipv4.fib_table_hash[h];
        struct fib_table *tb;

        hlist_for_each_entry_rcu(tb, head, tb_hlist,
                     lockdep_rtnl_is_held())
            __fib_info_notify_update(net, tb, info);

接下来,遍历每个路由表trie结构的叶子节点,如果其对应的fib_info设置了下一跳更新nh_updated,调用通知链函数call_fib_entry_notifiers。注释中提到,以后此通知链将在nexthop模块中实现。

static void __fib_info_notify_update(struct net *net, struct fib_table *tb, struct nl_info *info)
{
    struct fib_alias *fa;
	 
    for (;;) {

        hlist_for_each_entry(fa, &n->leaf, fa_list) {
            struct fib_info *fi = fa->fa_info;

            if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
                continue;
            ...
            /* call_fib_entry_notifiers will be removed when
             * in-kernel notifier is implemented and supported
             * for nexthop objects
             */
            call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
                         n->key, KEYLENGTH - fa->fa_slen, fa, NULL);

FIB表查询

如下函数fib4_dump,其首先dump路由策略,即遍历命名空间中的路由策略,发送fib通知; 其次,由函数fib_notify完成fib表项的dump处理。

static int fib4_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack)
{
    int err;

    err = fib4_rules_dump(net, nb, extack);
    if (err)
        return err;

    return fib_notify(net, nb, extack);

函数fib_notify遍历命名空间中所有的路由表,由函数fib_table_notify处理每个路由表的通知(dump处理)。

int fib_notify(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack)
{      
    unsigned int h;
    int err;

    for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
        struct hlist_head *head = &net->ipv4.fib_table_hash[h];
        struct fib_table *tb;
   
        hlist_for_each_entry_rcu(tb, head, tb_hlist) {
            err = fib_table_notify(tb, nb, extack);
            if (err)
                return err;

遍历路由表trie结构的所有叶子节点,每个节点由fib_leaf_notify处理。

static int fib_table_notify(struct fib_table *tb, struct notifier_block *nb, struct netlink_ext_ack *extack)
{  
    struct trie *t = (struct trie *)tb->tb_data;
    struct key_vector *l, *tp = t->kv;
    t_key key = 0;

    while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
        err = fib_leaf_notify(l, tb, nb, extack);
        if (err) return err;

        key = l->key + 1;
        /* stop in case of wrap around */
        if (key < l->key)
            break;

如下具体的叶子节点处理函数,由于local和main路由表可以共享同一个trie结构,为避免同一个FIB表项进行重复通知,要求fa的路由表ID和当前的ID相同。最后是由函数call_fib_entry_notifier执行通知链。

static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
               struct notifier_block *nb, struct netlink_ext_ack *extack)
{
    struct fib_alias *fa;
    int last_slen = -1;

    hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
        struct fib_info *fi = fa->fa_info;

        if (!fi) continue;

        /* local and main table can share the same trie,
         * so don't notify twice for the same entry.
         */
        if (tb->tb_id != fa->tb_id) continue;
        if (fa->fa_slen == last_slen) continue;

        last_slen = fa->fa_slen;
        err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
                          l->key, KEYLENGTH - fa->fa_slen, fa, extack);

FIB通知链注册与处理

函数register_fib_notifier负责FIB通知链的注册,以下可见,在注册新的通知链处理函数时,将执行fib_net_dump,其将调用上一节介绍的fib4_dump函数(对于IPv4协议),将所有的FIB通知发送一遍。

int register_fib_notifier(struct net *net, struct notifier_block *nb,
              void (*cb)(struct notifier_block *nb), struct netlink_ext_ack *extack)
{
    int retries = 0;
    int err;

    do {
        unsigned int fib_seq = fib_seq_sum(net);

        err = fib_net_dump(net, nb, extack);
        if (err)
            return err;
        if (fib_dump_is_consistent(net, nb, cb, fib_seq))
            return 0;

目前在使用fib通知链的主要是网络驱动程序,如mellanox的网卡驱动,其注册了处理函数mlxsw_sp_router_fib_event。

int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
             struct netlink_ext_ack *extack)
{
    ...
    mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
    err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
                    &mlxsw_sp->router->fib_nb,
                    mlxsw_sp_router_fib_dump_flush, extack);

如下函数mlxsw_sp_router_fib_event,处理FIB路由策略和FIB表项的相关事件。

static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
{
    struct mlxsw_sp_fib_event_work *fib_work;
    struct fib_notifier_info *info = ptr;
    struct mlxsw_sp_router *router;

    if ((info->family != AF_INET && info->family != AF_INET6 &&
         info->family != RTNL_FAMILY_IPMR &&
         info->family != RTNL_FAMILY_IP6MR))
        return NOTIFY_DONE;

    router = container_of(nb, struct mlxsw_sp_router, fib_nb);

    switch (event) {
    case FIB_EVENT_RULE_ADD:
    case FIB_EVENT_RULE_DEL:
        err = mlxsw_sp_router_fib_rule_event(event, info, router->mlxsw_sp);
        return notifier_from_errno(err);
    case FIB_EVENT_ENTRY_ADD:
    case FIB_EVENT_ENTRY_REPLACE:
    case FIB_EVENT_ENTRY_APPEND:

内核版本 5.10

  • 0
    点赞
  • 0
    评论
  • 0
    收藏
  • 一键三连
    一键三连
  • 扫一扫,分享海报

相关推荐
©️2020 CSDN 皮肤主题: 编程工作室 设计师:CSDN官方博客 返回首页
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、C币套餐、付费专栏及课程。

余额充值