乡下人产国偷v产偷v自拍,国产午夜片在线观看,婷婷成人亚洲综合国产麻豆,久久综合给合久久狠狠狠9

  • <output id="e9wm2"></output>
    <s id="e9wm2"><nobr id="e9wm2"><ins id="e9wm2"></ins></nobr></s>

    • 分享

      淺析Linux Kernel 哈希路由表實現(xiàn)(二) -- 算法 -- IT技術(shù)博客大學習 -- 共學習 共進步!

       mzsm 2015-06-24

      在向外發(fā)送數(shù)據(jù)包的時候,首先需要查詢路由表來確定路由包的路由,主要由ip_route_output_key()函數(shù)來完成,該函數(shù)又調(diào)用了ip_route_output_flow(),而這個函數(shù)最終又調(diào)用了__ip_route_output_key()這個函數(shù)來進行路由的查詢,下面主要來看一下這個函數(shù):

      int __ip_route_output_key(struct net *net, struct rtable **rp,     const struct flowi *flp){ unsigned int hash; int res; struct rtable *rth;  if (!rt_caching(net))  goto slow_output;  hash = rt_hash(flp->fl4_dst, flp->fl4_src, flp->oif, rt_genid(net));  rcu_read_lock_bh(); for (rth = rcu_dereference_bh(rt_hash_table[hash].chain); rth;  rth = rcu_dereference_bh(rth->dst.rt_next)) {  if (rth->fl.fl4_dst == flp->fl4_dst &&      rth->fl.fl4_src == flp->fl4_src &&      rth->fl.iif == 0 &&      rth->fl.oif == flp->oif &&      rth->fl.mark == flp->mark &&      !((rth->fl.fl4_tos ^ flp->fl4_tos) &       (IPTOS_RT_MASK | RTO_ONLINK)) &&      net_eq(dev_net(rth->dst.dev), net) &&      !rt_is_expired(rth)) {   dst_use(&rth->dst, jiffies);   RT_CACHE_STAT_INC(out_hit);   rcu_read_unlock_bh();   *rp = rth;   return 0;  }  RT_CACHE_STAT_INC(out_hlist_search); } rcu_read_unlock_bh(); slow_output: rcu_read_lock(); res = ip_route_output_slow(net, rp, flp); rcu_read_unlock(); return res;}

      Linux的路由表中的常用路由是存儲在路由緩存中的,該路由緩存即是類型為struct rt_hash_bucket的全局列表rt_hash_table,該緩存列表在ip_rt_init()中初始化。

      struct flowi結(jié)構(gòu)中包含了查詢路由表所需要的請求信息,是一個搜索健值。由代碼可看出,首先在路由緩存列表rt_hash_table中查詢精確匹配的未過期的路由表項struct rtable,(注,因為是出口路由,所以入口接口號是0),若找到后增加路由表項的引用計數(shù)和后即刻返回。若未找到匹配的路由表項,則繼續(xù)在路由表中查找匹配的路由表項,路由表中的查詢速度會比路由緩存中慢,所以ip_route_output_slow()函數(shù)的命名就不難理解了,主動的路由解析工作都是在這個函數(shù)里面進行的,在看它的定義之前先看下服務類型和路由范圍的相關(guān) 定義:

      #define IPTOS_TOS_MASK  0x1E#define IPTOS_TOS(tos)  ((tos)&IPTOS_TOS_MASK)#define IPTOS_LOWDELAY  0x10 /* 最小延時 */#define IPTOS_THROUGHPUT 0x08 /* 最大吞吐量 */#define IPTOS_RELIABILITY 0x04 /* 最高可靠性 */#define IPTOS_MINCOST  0x02 /* 最小消費 */#define RTO_ONLINK          0x01

      由掩碼可知,服務類型實際上用了從第2位到第5位共四位的數(shù)據(jù),表示四種服務類型,而最低位的RTO_ONLINK如果置位,則scope為RT_SCOPE_LINK,或沒有,則scope為RT_SCOPE_UNIVERSE,接下來看看scope的相關(guān)定義:

      enum rt_scope_t { RT_SCOPE_UNIVERSE=0,  /* 表示在空間中的任何位置 *//* User defined values  */ RT_SCOPE_SITE=200, RT_SCOPE_LINK=253,   /* 與本地直接相連的地址 */ RT_SCOPE_HOST=254,   /* 本地地址 */ RT_SCOPE_NOWHERE=255  /* 不可達的地址 */};

      其中值越大所表示的范圍便越精確,實際上這也不是什么范圍的意思,只不過是到目的地址的某種距離的表示。OK,接下來看ip_route_output_slow()函數(shù)的定義:

      static int ip_route_output_slow(struct net *net, struct rtable **rp,    const struct flowi *oldflp){ u32 tos = RT_FL_TOS(oldflp); struct flowi fl = { .nl_u = { .ip4_u =          { .daddr = oldflp->fl4_dst,     .saddr = oldflp->fl4_src,     .tos = tos & IPTOS_RT_MASK,     .scope = ((tos & RTO_ONLINK) ?        RT_SCOPE_LINK :        RT_SCOPE_UNIVERSE),          } },       .mark = oldflp->mark,       .iif = net->loopback_dev->ifindex,       .oif = oldflp->oif }; struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; int err;   res.fi  = NULL;#ifdef CONFIG_IP_MULTIPLE_TABLES res.r  = NULL;#endif  if (oldflp->fl4_src) {  /* 若源地址為組播地址,受限廣播地址(255.255.255.255)或0地址,  均不合法,即刻返回 */  err = -EINVAL;  if (ipv4_is_multicast(oldflp->fl4_src) ||      ipv4_is_lbcast(oldflp->fl4_src) ||      ipv4_is_zeronet(oldflp->fl4_src))   goto out;   if (oldflp->oif == 0 &&      (ipv4_is_multicast(oldflp->fl4_dst) ||       ipv4_is_lbcast(oldflp->fl4_dst))) {   /* 等價于inet_addr_type(saddr) == RTN_LOCAL,    __ip_dev_find()函數(shù)實際是搜索RT_TABLE_LOCAL   路由表中的路由表項,如果未找到對應設(shè)備則返回,因為   Linux不允許環(huán)回接口發(fā)組播或受限廣播 */   dev_out = __ip_dev_find(net, oldflp->fl4_src, false);   if (dev_out == NULL)    goto out;    /* 給外面接口賦值后轉(zhuǎn)去創(chuàng)建路由緩存 */   fl.oif = dev_out->ifindex;   goto make_route;  }   if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {   /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */   if (!__ip_dev_find(net, oldflp->fl4_src, false))    goto out;  } }   if (oldflp->oif) {  dev_out = dev_get_by_index_rcu(net, oldflp->oif);  err = -ENODEV;  if (dev_out == NULL)   goto out;   /* 如果外出接口示啟用或外出接口對應的IPv4數(shù)據(jù)不存在,則返回網(wǎng)絡不可達 */  if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {   err = -ENETUNREACH;   goto out;  }  /* 若是本地組播地址或受限廣播地址則直接轉(zhuǎn)去創(chuàng)建路由緩存 */  if (ipv4_is_local_multicast(oldflp->fl4_dst) ||      ipv4_is_lbcast(oldflp->fl4_dst)) {   if (!fl.fl4_src)    fl.fl4_src = inet_select_addr(dev_out, 0,             RT_SCOPE_LINK);   goto make_route;  }   /* 若未指定源地址,則根據(jù)目地地址類型創(chuàng)建選擇一個源地址 */  if (!fl.fl4_src) {   if (ipv4_is_multicast(oldflp->fl4_dst))    fl.fl4_src = inet_select_addr(dev_out, 0,             fl.fl4_scope);   else if (!oldflp->fl4_dst)    fl.fl4_src = inet_select_addr(dev_out, 0,             RT_SCOPE_HOST);  } }  /* 如果目的地址不存在,則令目的地址等于源地址,若都不存在,則使用環(huán)回接口,  路由類型為本地路由,轉(zhuǎn)而創(chuàng)建路由緩存 */ if (!fl.fl4_dst) {  fl.fl4_dst = fl.fl4_src;  if (!fl.fl4_dst)   fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);  dev_out = net->loopback_dev;  fl.oif = net->loopback_dev->ifindex;  res.type = RTN_LOCAL;  flags |= RTCF_LOCAL;  goto make_route; }/*OK, 走到這里先總結(jié)一下不需要查詢路由表即可直接創(chuàng)建路由緩存的情況:1. 指定了源地址,未指定外出接口,目的地址為組播地址或受限廣播地址2. 指定了外出接口,并且目的地址為本地組播地址或受限廣播地址3. 未指定目的地址。 若以上三種情況均未滿足,則需要進行路由表查詢。*/  if (fib_lookup(net, &fl, &res)) {  res.fi = NULL;  if (oldflp->oif) {   /* 程序走到這里說明查詢路由表失敗,未找到對應的路由表項,   但卻指定了外出接口,這時候即便沒有路由也是可以發(fā)送數(shù)據(jù)包的。   當然,如果未指定外出接口,則只能返回網(wǎng)絡不可達了。 */   if (fl.fl4_src == 0)    fl.fl4_src = inet_select_addr(dev_out, 0,             RT_SCOPE_LINK);   res.type = RTN_UNICAST;   goto make_route;  }  err = -ENETUNREACH;  goto out; }  /* 若為本地路由,則使用環(huán)回接口 */ if (res.type == RTN_LOCAL) {  if (!fl.fl4_src) {   if (res.fi->fib_prefsrc)    fl.fl4_src = res.fi->fib_prefsrc;   else    fl.fl4_src = fl.fl4_dst;  }  dev_out = net->loopback_dev;  fl.oif = dev_out->ifindex;  res.fi = NULL;  flags |= RTCF_LOCAL;  goto make_route; } /*  使用默認路由需要三個條件:1. 若前綴為0,也即掩碼長度為0,默認路由匹配所有的目的地址。2. 路由類型為RTN_UNICAST,我們知道本地地址,組播地址和廣播地址3. 未指定出口設(shè)備,上面我們提到即便是沒有路由的情況下提供了出口設(shè)備,數(shù)據(jù)包也是可以發(fā)送的。 這時候路由是默認路由,因此我們需要選擇默認網(wǎng)關(guān) */ if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)  fib_select_default(net, &fl, &res);  if (!fl.fl4_src)  fl.fl4_src = FIB_RES_PREFSRC(res);  dev_out = FIB_RES_DEV(res); fl.oif = dev_out->ifindex; make_route: /* 創(chuàng)建一條路由緩存 */ err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); out: return err;}

      接下來看下創(chuàng)建路由緩存的函數(shù):

      static int ip_mkroute_output(struct rtable **rp,        struct fib_result *res,        const struct flowi *fl,        const struct flowi *oldflp,        struct net_device *dev_out,        unsigned flags){ struct rtable *rth = NULL; int err = __mkroute_output(&rth, res, fl, oldflp, dev_out, flags); unsigned hash; if (err == 0) {  hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src, oldflp->oif,          rt_genid(dev_net(dev_out)));  err = rt_intern_hash(hash, rth, rp, NULL, oldflp->oif); }  return err;}

      該函數(shù)首先調(diào)用__mkroute_output()函數(shù)生成一條路由緩存,然后再調(diào)用rt_intern_hash()函數(shù)寫入到緩存列表中去。

       static int rt_intern_hash(unsigned hash, struct rtable *rt,     struct rtable **rp, struct sk_buff *skb, int ifindex){ struct rtable *rth, *cand; struct rtable __rcu **rthp, **candp; unsigned long now; u32   min_score; int  chain_length; int attempts = !in_softirq(); restart: chain_length = 0; min_score = ~(u32)0; cand = NULL; candp = NULL; now = jiffies;  if (!rt_caching(dev_net(rt->dst.dev))) {/* 如果路由未進行緩存,那么把路由的DST標示設(shè)為DST_NOCACHE,調(diào)用者 便會知道這條路由未進行緩存,使用完成之后可以根據(jù)該標志對路由進 行釋放。如果在這里把路由給丟掉的話,那么當沒有進行路由緩存的情況 下調(diào)用都就沒辦不法解析路由信息了。 */  rt->dst.flags |= DST_NOCACHE;  if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {   int err = arp_bind_neighbour(&rt->dst);   if (err) {    if (net_ratelimit())     printk(KERN_WARNING         \'Neighbour table failure & not caching routes.\\n\');    ip_rt_put(rt);    return err;   }  }   goto skip_hashing; }   rthp = &rt_hash_table[hash].chain;  spin_lock_bh(rt_hash_lock_addr(hash)); /* 開始遍歷哈希鏈表 */ while ((rth = rcu_dereference_protected(*rthp,   lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {  /* 如果路由已過期,則直接從列表中刪除并釋放內(nèi)存空間 */  if (rt_is_expired(rth)) {   *rthp = rth->dst.rt_next;   rt_free(rth);   continue;  }  /* 如果未過期,并在列表中找到了匹配的路由,則將該路由緩存項拿到   鏈表的最新端,并增加引用計數(shù),釋放新建待插入的緩存項內(nèi)存。 */  if (compare_keys(&rth->fl, &rt->fl) && compare_netns(rth, rt)) {   *rthp = rth->dst.rt_next;   rcu_assign_pointer(rth->dst.rt_next,        rt_hash_table[hash].chain);   rcu_assign_pointer(rt_hash_table[hash].chain, rth);    dst_use(&rth->dst, now);   spin_unlock_bh(rt_hash_lock_addr(hash));    rt_drop(rt);   if (rp)    *rp = rth;   else    skb_dst_set(skb, &rth->dst);   return 0;  }   if (!atomic_read(&rth->dst.__refcnt)) {   u32 score = rt_score(rth);    if (score <= min_score) {    cand = rth;    candp = rthp;    min_score = score;   }  }   chain_length++;   rthp = &rth->dst.rt_next; }  if (cand) {  /* ip_rt_gc_elasticity used to be average length of chain   * length, when exceeded gc becomes really aggressive.   *   * The second limit is less certain. At the moment it allows   * only 2 entries per bucket. We will see.   */  if (chain_length > ip_rt_gc_elasticity) {   *candp = cand->dst.rt_next;   rt_free(cand);  } } else {/* 如果某個哈希槽上的鏈表長度大于所能接受的鏈表的最大長度, 則說明哈希碰撞太嚴重,需要重構(gòu)哈希表,這個長度目前定義為20。 如果需要重構(gòu)則增加重構(gòu)計數(shù)current_rt_cache_rebuild_count的值, rt_caching()函數(shù)就是簡單地判斷該值是否超過最大值來斷定緩存是否 正在進行的,最大值為4。 */  if (chain_length > rt_chain_length_max &&      slow_chain_length(rt_hash_table[hash].chain) > rt_chain_length_max) {   struct net *net = dev_net(rt->dst.dev);   int num = ++net->ipv4.current_rt_cache_rebuild_count;   if (!rt_caching(net)) {    printk(KERN_WARNING \'%s: %d rebuilds is over limit, route caching disabled\\n\',     rt->dst.dev->name, num);   }   /* 重建哈希列表,然后重新開始此函數(shù) */   rt_emergency_hash_rebuild(net);   spin_unlock_bh(rt_hash_lock_addr(hash));    hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,     ifindex, rt_genid(net));   goto restart;  } }  /* 當路由為單播路由或者為外出路由(iif為0的情況即為外出路由) 則需要把路由綁定到arp */ if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {  int err = arp_bind_neighbour(&rt->dst);  if (err) {   spin_unlock_bh(rt_hash_lock_addr(hash));    if (err != -ENOBUFS) {    rt_drop(rt);    return err;   }    /* Neighbour tables are full and nothing      can be released. Try to shrink route cache,      it is most likely it holds some neighbour records.    */   if (attempts-- > 0) {    int saved_elasticity = ip_rt_gc_elasticity;    int saved_int = ip_rt_gc_min_interval;    ip_rt_gc_elasticity = 1;    ip_rt_gc_min_interval = 0;    /* 路由表進行垃圾回收,這個以后再寫 */    rt_garbage_collect(&ipv4_dst_ops);    ip_rt_gc_min_interval = saved_int;    ip_rt_gc_elasticity = saved_elasticity;    goto restart;   }    if (net_ratelimit())    printk(KERN_WARNING \'ipv4: Neighbour table overflow.\\n\');   rt_drop(rt);   return -ENOBUFS;  } }  /* 將該表項放至哈希鏈表的頭部 */ rt->dst.rt_next = rt_hash_table[hash].chain;  /*  * Since lookup is lockfree, we must make sure  * previous writes to rt are comitted to memory  * before making rt visible to other CPUS.  */ rcu_assign_pointer(rt_hash_table[hash].chain, rt);  spin_unlock_bh(rt_hash_lock_addr(hash)); skip_hashing: if (rp)  *rp = rt; else  skb_dst_set(skb, &rt->dst); return 0;}

      簡單注釋了一下幾個比較重要的函數(shù),求大牛批評指正。

        本站是提供個人知識管理的網(wǎng)絡存儲空間,所有內(nèi)容均由用戶發(fā)布,不代表本站觀點。請注意甄別內(nèi)容中的聯(lián)系方式、誘導購買等信息,謹防詐騙。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請點擊一鍵舉報。
        轉(zhuǎn)藏 分享 獻花(0

        0條評論

        發(fā)表

        請遵守用戶 評論公約

        類似文章 更多