linux协议栈之邻居子系统详细介绍

来源:百度文库 编辑:神马文学网 时间:2024/04/28 08:06:13

 

时间:2007-11-06 17:02:35  来源:Linux联盟收集整理  作者:

一:邻居子系统概述

         邻居子系统是从物理来说是指在同一个局域网内的终端。从网络拓扑的结构来说,是指他们之间相隔的距离仅为一跳,他们属于同一个突冲域

         邻居子系统的作用:

         它为第三层协议与第二层协议提供地址映射关系。

         提供邻居头部缓存,加速发包的速度

二:邻居子系统在整个协议栈的地位

         发送数据的时候,要在本机进行路由查找,如果有到目的地地址的路径,查看arp缓存中是否存在相应的映射关系,如果没有,则新建邻居项。判断邻居项是否为可用状态。如果不可用。把skb 存至邻居发送对列中,然后将发送arp请求。

         如果接收到arp应答。则将对应邻居项置为可用。如果在指定时间内末收到响应包,则将对应邻居项置为无效状态。

         如果邻居更改为可用状态,则把邻居项对应的skb对列中的数据包发送出去

三:流程概述;

发包流程。

下面以包送udp数据为例,看看是怎么与邻居子系统相关联的

Sendmsg()  à  ip_route_output()(到路由缓存中查找目的出口)à  ip_route_output_slow( 如果缓存中不存在目的项,则到路由表中查找)     à         ip_build_xmit() à output_maybe_reroute à skb->dst->output()

如果至时找到了路由,则根据路由信息分配个dst_entry,并调用arp_bind_neighbour为之绑定邻居 output指针赋值为ip_output 

 转到执行ip_output

ip_output à __ip_finish_output() -à ip_finish_output2() à dst->neighbour->output()

现在就转至邻居项的出口函数了。关于上述详细流程,将在以后的学习中继续给出

Neighbour->output怎么处理呢?我们从初始化时看起

四:邻居子系统初始化

跟以前一样,linux源代码版本为2.6.21

void __init arp_init(void)                                                     (net/ipv4/arp.c)

{

         //邻居表初始化

         neigh_table_init(&arp_tbl);

         //注册arp协议

         dev_add_pack(&arp_packet_type);

         //建立proc对象

         arp_proc_init();

#ifdef CONFIG_SYSCTL

         neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,

                                  NET_IPV4_NEIGH, "ipv4", NULL);

#endif

         //事件通知链表

         register_netdevice_notifier(&arp_netdev_notifier);

}

  neigh_table_init(&arp_tbl);中,对邻居表进行了相应的初始化,特别的,初始化了一个垃圾回收定时器。后面再给出讨论

arp_packet_type的内容为:

static struct packet_type arp_packet_type = {

         .type =       __constant_htons(ETH_P_ARP),           (链路层对应的协议号)

         .func =       arp_rcv,                                                    《数据包的处理函数》

}

从上面可以看出,当接收到arp数据包时,将用arp_rcv()处理

五:邻居系统数据结构分析

neigh_table结构:

struct neigh_table

{

         //下一个邻居表

         struct neigh_table         *next;

         //协议簇

         int                        family;

         //入口长度,也就是一个邻居结构的大小,初始化为sizeof(neighbour)+4(4为一个IP地址的长度)

         int                        entry_size;

         //哈希关键值长度 IP地址的长度,为4

         int                        key_len;

         //哈希值的计数函数(哈希值是经对应设备net_device  目的Ip计算出来的)

         __u32                           (*hash)(const void *pkey, const struct net_device *);

         //邻居初始化函数

         int                        (*constructor)(struct neighbour *);

         int                        (*pconstructor)(struct pneigh_entry *);

         void                     (*pdestructor)(struct pneigh_entry *);

         void                     (*proxy_redo)(struct sk_buff *skb);

         //邻居表的名称

         char                     *id;

         struct neigh_parms       parms;

         /* HACK. gc_* shoul follow parms without a gap! */

         //常规垃圾回收的时候

         int                        gc_interval;

         int                        gc_thresh1;

         //第二个阀值,如果邻居超过此值,当创建新的邻居时

         //若超过五秒没有刷新,则必须立即刷新,强制垃圾回收

         int                        gc_thresh2;

         //允许邻居的上限

         int                        gc_thresh3;

         //最近刷新时间

         unsigned long                last_flush;

         //常规的垃圾回收定时器

         struct timer_list           gc_timer;

         struct timer_list           proxy_timer;

         struct sk_buff_head     proxy_queue;

         //整个表中邻居的数量

         int                        entries;

         rwlock_t              lock;

         unsigned long                last_rand;

         struct neigh_parms       *parms_list;

         kmem_cache_t              *kmem_cachep;

         struct neigh_statistics  *stats;

         //哈希数组,存入其中的邻居

         struct neighbour  **hash_buckets;

         //哈希数组大小的掩码

         unsigned int                  hash_mask;

         __u32                           hash_rnd;

         unsigned int                  hash_chain_gc;

         //与代理arp相关

         struct pneigh_entry      **phash_buckets;

#ifdef CONFIG_PROC_FS

         struct proc_dir_entry   *pde;

#endif

}

Neighbour结构:

struct neighbour

{

         //下一个邻居

         struct neighbour  *next;

         //所在的邻居表

         struct neigh_table         *tbl;

         //arp传输参数

         struct neigh_parms       *parms;

         //邻居项所对应的网络设备

         struct net_device          *dev;

         //最后使用时间

         unsigned long                used;

         unsigned long                confirmed;

         //更新时间

         unsigned long                updated;

         __u8                    flags;

         //邻居项对应的状态

         __u8                    nud_state;

         __u8                    type;

         //存活标志,如果dead1,那么垃圾回收函数会将此项删除

         __u8                    dead;

         //重试发送arp请求的次数

         atomic_t              probes;

         rwlock_t              lock;

         //对应邻居的头部缓存

         unsigned char                ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)];

         struct hh_cache            *hh;

         //引用计数

         atomic_t              refcnt;

         //邻居项对应的发送函数

         int                        (*output)(struct sk_buff *skb);

         //对应的发送skb队列

         struct sk_buff_head     arp_queue;

         //定时器

         struct timer_list  timer;

         struct neigh_ops *ops;

         //哈希关键字

         u8                        primary_key[0];

};

在前面已经分析过,查找到路由后,会调用arp_bind_neighbour绑定一个邻居项

int arp_bind_neighbour(struct dst_entry *dst)

{

         struct net_device *dev = dst->dev;

         struct neighbour *n = dst->neighbour;

         if (dev == NULL)

                   return -EINVAL;

         //如果邻居项不存在

         if (n == NULL) {

                   u32 nexthop = ((struct rtable*)dst)->rt_gateway;

                   if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT))

                            nexthop = 0;

                   n = __neigh_lookup_errno(

#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)

                       dev->type == ARPHRD_ATM ? clip_tbl_hook :

#endif

                       &arp_tbl, &nexthop, dev);

                   if (IS_ERR(n))

                            return PTR_ERR(n);

                   dst->neighbour = n;

         }

         return 0;

}

如果邻居项不存同,则执行__neigh_lookup_errno()

__neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,

  struct net_device *dev)

{

         //在邻居表中查找邻居项

         struct neighbour *n = neigh_lookup(tbl, pkey, dev);

         if (n)

                   return n;

         //新建邻居项

         return neigh_create(tbl, pkey, dev);

}

从上面可以看到,它会先到邻居表中寻找对应的邻居项,如果不存在,则新建一项。继续跟进

struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,

                                   struct net_device *dev)

{

         u32 hash_val;

         int key_len = tbl->key_len;

         int error;

         struct neighbour *n1, *rc, *n = neigh_alloc(tbl);

         if (!n) {

                   rc = ERR_PTR(-ENOBUFS);

                   goto out;

         }

         //从此可以看到,哈希键值就是目的IP

         memcpy(n->primary_key, pkey, key_len);

         n->dev = dev;

         dev_hold(dev);

         /* Protocol specific setup. */

         //初始化函数

         if (tbl->constructor &&         (error = tbl->constructor(n)) < 0) {

                   rc = ERR_PTR(error);

                   goto out_neigh_release;

         }

         /* Device specific setup. */

         if (n->parms->neigh_setup &&

             (error = n->parms->neigh_setup(n)) < 0) {

                   rc = ERR_PTR(error);

                   goto out_neigh_release;

         }

         n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

         write_lock_bh(&tbl->lock);

         //如果总数超过了hash_mask +1,则增长哈希表

         if (tbl->entries > (tbl->hash_mask + 1))

                   neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);

         hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;

         // 如果邻居表项为删除项

         if (n->parms->dead) {

                   rc = ERR_PTR(-EINVAL);

                   goto out_tbl_unlock;

         }

         //遍历对应的哈希数组项。如果已经存在,则更新引用计数

                  for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {

                   if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {

                            neigh_hold(n1);

                            rc = n1;

                            goto out_tbl_unlock;

                   }

         }

// 如果不存在,把插入项加到哈希数组项的头部

         n->next = tbl->hash_buckets[hash_val];

         tbl->hash_buckets[hash_val] = n;

         n->dead = 0;

         neigh_hold(n);

         write_unlock_bh(&tbl->lock);

         NEIGH_PRINTK2("neigh %p is created.\n", n);

         rc = n;

out:

         return rc;

out_tbl_unlock:

         write_unlock_bh(&tbl->lock);

out_neigh_release:

         neigh_release(n);

         goto out;

}

在函数里,会调用tbl->constructor()进行初始化。在arp_tbl结构中,为constructor赋值为arp_constructor

static int arp_constructor(struct neighbour *neigh)

{

         u32 addr = *(u32*)neigh->primary_key;

         struct net_device *dev = neigh->dev;

         struct in_device *in_dev;

         struct neigh_parms *parms;

         neigh->type = inet_addr_type(addr);

         rcu_read_lock();

         in_dev = rcu_dereference(__in_dev_get(dev));

         if (in_dev == NULL) {

                   rcu_read_unlock();

                   return -EINVAL;

         }

         parms = in_dev->arp_parms;

         __neigh_parms_put(neigh->parms);

         neigh->parms = neigh_parms_clone(parms);

         rcu_read_unlock();

         //dev->hard_header.是为被赋值勤

         if (dev->hard_header == NULL) {

                   neigh->nud_state = NUD_NOARP;

                   neigh->ops = &arp_direct_ops;

                   neigh->output = neigh->ops->queue_xmit;

         } else {

                  

#if 1

                                     switch (dev->type) {

                   default:

                            break;

                   case ARPHRD_ROSE:         

                            neigh->ops = &arp_broken_ops;

                            neigh->output = neigh->ops->output;

                            return 0;

#endif

                   ;}

#endif

                   if (neigh->type == RTN_MULTICAST) {

                            neigh->nud_state = NUD_NOARP;

                            arp_mc_map(addr, neigh->ha, dev, 1);

                   } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {

                            neigh->nud_state = NUD_NOARP;

                            memcpy(neigh->ha, dev->dev_addr, dev->addr_len);

                   } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) {

                            neigh->nud_state = NUD_NOARP;

                            memcpy(neigh->ha, dev->broadcast, dev->addr_len);

                   }

                   if (dev->hard_header_cache)

                            neigh->ops = &arp_hh_ops;

                   else

                            neigh->ops = &arp_generic_ops;

                   //如果邻居项是可用状态,则调用connected_output里的函数

                   if (neigh->nud_state&NUD_VALID)

                            neigh->output = neigh->ops->connected_output;

                   else

                   //如果邻居项不可用

                            neigh->output = neigh->ops->output;

         }

         return 0;

}

在网卡驱动那一章,我们是调用alloc_etherdev()来构建网卡的net_device结构的,在allocetherdev()调用alloc_etherdev对各函数指针赋值

void ether_setup(struct net_device *dev)                                              drivers/net/net_init.c

{

         /* Fill in the fields of the device structure with ethernet-generic values.

            This should be in a common file instead of per-driver.  */

        

         dev->change_mtu                  = eth_change_mtu;

         dev->hard_header         = eth_header;

         dev->rebuild_header    = eth_rebuild_header;

         dev->set_mac_address         = eth_mac_addr;

         dev->hard_header_cache       = eth_header_cache;

         dev->header_cache_update= eth_header_cache_update;

         dev->hard_header_parse       = eth_header_parse;

         dev->type           = ARPHRD_ETHER;

         dev->hard_header_len          = ETH_HLEN;

         dev->mtu            = 1500; /* eth_mtu */

         dev->addr_len              = ETH_ALEN;

         dev->tx_queue_len       = 1000;      /* Ethernet wants good queues */  

        

         memset(dev->broadcast,0xFF, ETH_ALEN);

         /* New-style flags. */

         dev->flags           = IFF_BROADCAST|IFF_MULTICAST;

}

所以,neigh->output 就指向了arp_hh_opsàoutput

Arp_hh_ops的结构如下:

static struct neigh_ops arp_hh_ops = {

         .family =             AF_INET,

         .solicit =              arp_solicit,

         .error_report =             arp_error_report,

         .output =             neigh_resolve_output,

         .connected_output =    neigh_resolve_output,

         .hh_output =                dev_queue_xmit,

         .queue_xmit =               dev_queue_xmit,

};

由此可以看到,最终的数据都会流到neigh_resolve_output