乡下人产国偷v产偷v自拍,国产午夜片在线观看,婷婷成人亚洲综合国产麻豆,久久综合给合久久狠狠狠9

  • <output id="e9wm2"></output>
    <s id="e9wm2"><nobr id="e9wm2"><ins id="e9wm2"></ins></nobr></s>

    • 分享

      LINUX下PING與TCP_IP協(xié)議棧學習筆記(1) - TCP/IP - Linux

       jijo 2009-04-13





       

      框架如下
      PING程序
      A.使用的SOCKET接口
      1. socket
      2. sendto
      3. recvfrom
      B.PING地址:127.0.0.1
      TCP/IP協(xié)議棧:
      1. IP層
      2. ICMP層

      很可惜,在這次學習中沒能深入路由表的檢索中~,感覺還需努力哈~ 希望大家推薦一些路由算法的資料或者書籍 T ^T 不要太深~ 夠PING使用就行了~

      選擇PING本機是因為能了解收發(fā)的過程,同時也除去了對網(wǎng)卡硬件的了解的限制,在最小程度下了解TCP/IP協(xié)議棧的基本工作原理

      在文中有對TCP/IP協(xié)議棧理解不足和錯誤的地方,請大家一定要拍磚指正 = 3= 萬分感謝

      好~ = 3=)/ 首先來看看PING程序
      下面這段PING程序來自網(wǎng)上,感謝這位梁生的無償奉獻 = 3=)/
      我稍微做了一下修改,可能不大美觀和嚴謹,C語言編程功夫還需提高啊

       

      /***********************************************************
       * 作者:梁俊輝 *
       * 時間:2001年10月 *
       * 名稱:myping.c *
       * 說明:本程序用于演示ping命令的實現(xiàn)原理 *
       ***********************************************************/

      #include <string.h>
      #include <stdio.h>
      #include <stdlib.h>
      #include <signal.h>
      #include <arpa/inet.h>
      #include <sys/types.h>
      #include <sys/socket.h>
      #include <unistd.h>
      #include <netinet/in.h>
      #include <netinet/ip.h>
      #include <netinet/ip_icmp.h>
      #include <netdb.h>
      #include <setjmp.h>
      #include <errno.h>
      #define PACKET_SIZE 4096
      #define MAX_WAIT_TIME 5
      #define MAX_NO_PACKETS 3
      char sendpacket[PACKET_SIZE];
      char recvpacket[PACKET_SIZE];
      int sockfd,datalen=56;
      int nsend=0,nreceived=0;
      struct sockaddr_in dest_addr;
      pid_t pid;
      struct sockaddr_in from;
      void statistics(int signo);
      unsigned short cal_chksum(unsigned short *addr,int len);
      int pack(int pack_no);
      void send_packet(void);
      void recv_packet(void);
      int unpack(char *buf,int len);
      void tv_sub(struct timeval *out,struct timeval *in);
      void statistics(int signo)
      {
          printf("\n--------------------PING statistics-------------------\n");
          printf("%d packets transmitted, %d received , %%%d lost\n",nsend,nreceived,(nsend-nreceived)/nsend*100);
          close(sockfd);
          exit(1);
      }
      /*校驗和算法*/
      unsigned short cal_chksum(unsigned short *addr,int len)
      {
          int nleft=len;
          int sum=0;
          unsigned short *w=addr;
          unsigned short answer=0;
          /*把ICMP報頭二進制數(shù)據(jù)以2字節(jié)為單位累加起來*/
          while(nleft>1)
          {
              sum+=*w++;
              nleft-=2;
          }
          /*若ICMP報頭為奇數(shù)個字節(jié),會剩下最后一字節(jié)。把最后一個字節(jié)視為一個2字節(jié)數(shù)據(jù)的高字節(jié),這個2字節(jié)數(shù)據(jù)的低字節(jié)為0,繼續(xù)累加*/
          if( nleft==1)
          {
              *(unsigned char *)(&answer)=*(unsigned char *)w;
              sum+=answer;
          }
          sum=(sum>>16)+(sum&0xffff);
          sum+=(sum>>16);
          answer=~sum;
          return answer;
      }
      /*設(shè)置ICMP報頭*/
      int pack(int pack_no)
      {
          int i,packsize;
          struct icmp *icmp;
          struct timeval * tval;
          //將sendpacket強制轉(zhuǎn)換成icmp結(jié)構(gòu)
          icmp = (struct icmp*)sendpacket;
          icmp->icmp_type = ICMP_ECHO; //設(shè)置ICMP報文類型
          icmp->icmp_code = 0;
          icmp->icmp_cksum = 0;
          icmp->icmp_seq = pack_no;
          icmp->icmp_id = pid;
          packsize = 8 + datalen;
          tval = (struct timeval *)icmp->icmp_data;
          gettimeofday(tval,NULL);
          icmp->icmp_cksum = cal_chksum( (unsigned short *)icmp,packsize); /*校驗算法*/
          return packsize;
      }
      /*發(fā)送三個ICMP報文*/
      void send_packet()
      {
          int packetsize;
          while( nsend < MAX_NO_PACKETS)
          {
              nsend++;
              packetsize = pack(nsend); /*設(shè)置ICMP報頭*/
              //int sendto ( SOCKET s , const char FAR *buf , int len , int flags , const struct sockaddr FAR *to , int token );
              //[參數(shù)]
              //s - 指向用Socket函數(shù)生成的Socket
              //buf - 接受數(shù)據(jù)的緩沖區(qū)(數(shù)組)的指針
              //len - 緩沖區(qū)的大小
              //flag - 調(diào)用方式(MSG_DONTROUTE , MSG_OOB)
              //to - 指向發(fā)送方SOCKET地址的指針
              //token - 發(fā)送方SOCKET地址的大小 
              if( sendto(sockfd,sendpacket,packetsize,0,(struct sockaddr *)&dest_addr,sizeof(dest_addr) )<0 )
              {
                  perror("sendto error");
                  continue;
              }
              sleep(1); /*每隔一秒發(fā)送一個ICMP報文*/
          }
      }
      /*接收所有ICMP報文*/
      void recv_packet()
      {
          int n,fromlen;
          extern int errno;
          signal(SIGALRM,statistics);
          fromlen=sizeof(from);
          while( nreceived<nsend)
          {
              alarm(MAX_WAIT_TIME);
              //recvfrom()返回讀入的字節(jié)數(shù)
              if( (n = recvfrom(sockfd,recvpacket,sizeof(recvpacket),0,(struct sockaddr *)&from,&fromlen)) <0)
              {
                  if(errno==EINTR)
                      continue;
                  perror("recvfrom error");
                  continue;
              }
              //解讀收到的icmp包
              if(unpack(recvpacket,n) == -1)
                  continue;
              nreceived++;
          }
      }
      /*剝?nèi)CMP報頭*/
      int unpack(char *buf,int len)
      {
          int i,iphdrlen;
          struct ip *ip;
          struct icmp *icmp;
          ip = (struct ip *)buf;
          iphdrlen = ip->ip_hl << 2; /*求ip報頭長度,即ip報頭的長度標志乘4*/
          icmp = (struct icmp *)(buf+iphdrlen); /*越過ip報頭,指向ICMP報頭*/
          len -= iphdrlen; /*ICMP報頭及ICMP數(shù)據(jù)報的總長度*/
          if( len < 8) /*小于ICMP報頭長度則不合理*/
          {
              printf("ICMP packets\'s length is less than 8\n");
              return -1;
          }
          /*確保所接收的是自己發(fā)的ICMP的回應(yīng)*/
          if( (icmp->icmp_type == ICMP_ECHOREPLY) && (icmp->icmp_id == pid) )
          {
              /*顯示相關(guān)信息*/
              printf("%d byte from %s: icmp_seq=%u ttl=%d \n",
                  len,
                  inet_ntoa(from.sin_addr),
                  icmp->icmp_seq,
                  ip->ip_ttl
                  );
          }
          else
              return -1;
      }
      int main(int argc,char *argv[])
      {
          struct hostent *host;
          struct protoent *protocol;
          unsigned long int inaddr = 0;
          int waittime=MAX_WAIT_TIME;
          int size=50*1024;
          //檢測參數(shù)是否過少
          if(argc<2)
          {
              printf("usage:%s hostname/IP address\n",argv[0]);
              exit(1);
          }
          //getprotobyname()返回對應(yīng)于給定協(xié)議名的包含名字和協(xié)議號的protoent結(jié)構(gòu)指針    
          //結(jié)構(gòu)的成員有: 
          //成員 用途 
          //p_name 正規(guī)的協(xié)議名。 
          //p_aliases 一個以空指針結(jié)尾的可選協(xié)議名隊列。 
          //p_proto 以主機字節(jié)順序排列的協(xié)議號 
          if( (protocol=getprotobyname("icmp") )==NULL)
          {
              perror("getprotobyname");
              exit(1);
          }
          /*生成使用ICMP的原始套接字,這種套接字只有root用戶才能生成*/
          if( (sockfd = socket(AF_INET,SOCK_RAW,protocol->p_proto) ) < 0)
          {
              perror("socket error");
              exit(1);
          }
          /* 回收root權(quán)限,設(shè)置當前用戶權(quán)限*/
          setuid(getuid());
          //初始化dest_addr
          bzero(&dest_addr,sizeof(dest_addr));
          //設(shè)置協(xié)議家族類型為    AF_INET    
          dest_addr.sin_family = AF_INET;
          /*判斷是主機名還是ip地址*/
          if( inaddr = inet_addr(argv[1]) == INADDR_NONE)
          {
              //通過dns取得ip地址
              if((host = gethostbyname(argv[1]) )==NULL) /*是主機名*/
              {
                  perror("gethostbyname error");
                  exit(1);
              }
              memcpy( (char *)&dest_addr.sin_addr,host->h_addr,host->h_length);
          }
          else
          { /*是ip地址*/
              inaddr = inet_addr(argv[1]);
              memcpy( (char *)&dest_addr.sin_addr,(char *)&inaddr,sizeof(inaddr));
          }
          /*獲取main的進程id,用于設(shè)置ICMP的標志符*/
          pid=getpid();
          printf("PING %s(%s): %d bytes data in ICMP packets.\n",argv[1],inet_ntoa(dest_addr.sin_addr),datalen);
          send_packet(); /*發(fā)送所有ICMP報文*/
          recv_packet(); /*接收所有ICMP報文*/
          statistics(SIGALRM); /*進行統(tǒng)計*/
          return 0;
      }

       

      PING的流程在上面已經(jīng)有詳細的注釋了,我就不說了
      PING程序的主要流程分為3個步驟
      1. 建立一個socket結(jié)構(gòu)                 ->socket
      2. 用這個socket發(fā)送ICMP包           ->sendto
      3. 用這個socket接收ICMP包           ->recvfrom

      由于是PING本機,所以在TCP/IP協(xié)議棧中會有4個部分的內(nèi)容
      1. 建立socket
      2. 通過socket發(fā)送ICMP包
      3. 本機收到ICMP包后發(fā)送應(yīng)答
      4. 通過socket接收ICMP包

      下面我們就來進入TCP/IP協(xié)議棧來看看這3個系統(tǒng)調(diào)用如何為我們的PING程序服務(wù)的

      首先是第1部分,建立一個socket結(jié)構(gòu)

      sockfd = socket(AF_INET,SOCK_RAW,protocol->p_proto)
      這個函數(shù)會執(zhí)行系統(tǒng)調(diào)用sys_socketcall
      sys_socketcall在/net/socket.c中

      asmlinkage long sys_socketcall(int call, unsigned long __user *args)
      {
          unsigned long a[6];
          unsigned long a0, a1;
          int err;

          //檢測參數(shù)的數(shù)量是否合理
          if (call < 1 || call > SYS_RECVMSG)
              return -EINVAL;
          /* copy_from_user should be SMP safe. */
          //從用戶空間拷貝參數(shù)到內(nèi)核空間,復(fù)制在a[]數(shù)組里
          if (copy_from_user(a, args, nargs[call]))
              return -EFAULT;
          //取得所要判斷的跳躍類型
          err = audit_socketcall(nargs[call] / sizeof(unsigned long), a);
          if (err)
              return err;
          a0 = a[0];
          a1 = a[1];
          switch (call) {
          case SYS_SOCKET:
              err = sys_socket(a0, a1, a[2]);
              break;
          .........................
          case SYS_SENDTO:
              err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
                       (struct sockaddr __user *)a[4], a[5]);
              break;
          ...............................
          case SYS_RECVFROM:
              err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
                       (struct sockaddr __user *)a[4],
                       (int __user *)a[5]);
              break;
          default:
              err = -EINVAL;
              break;
          }
          return err;
      }

      上面只列出了我們所用到的3個case
      現(xiàn)在我們的目標是case SYS_SOCKET,也就是要創(chuàng)建一個socket了

      sys_socket在/net/socket.c中

      asmlinkage long sys_socket(int family, int type, int protocol)
      {
          int retval;
          struct socket *sock;
          //創(chuàng)建一個socket
          retval = sock_create(family, type, protocol, &sock);
          if (retval < 0)
              goto out;
          //將該socket映射到fd中
          retval = sock_map_fd(sock);
          if (retval < 0)
              goto out_release;
      out:
          /* It may be already another descriptor 8) Not kernel problem. */
          return retval;
      out_release:
          sock_release(sock);
          return retval;
      }

      很簡單的調(diào)用

      sock_create在/net/socket.c中

      int sock_create(int family, int type, int protocol, struct socket **res)
      {
          return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
      }

      繼續(xù),進入到__sock_create中

       

      static int __sock_create(struct net *net, int family, int type, int protocol,
                   struct socket **res, int kern)
      {
          int err;
          struct socket *sock;
          const struct net_proto_family *pf;
          /*
           * Check protocol is in range
           */

          //檢測協(xié)議家族類型是否在范圍之內(nèi) 
          if (family < 0 || family >= NPROTO)
              return -EAFNOSUPPORT;
          //檢測協(xié)議傳輸類型是否在范圍之內(nèi) 
          if (type < 0 || type >= SOCK_MAX)
              return -EINVAL;
          /* Compatibility.
           This uglymoron is moved from INET layer to here to avoid
           deadlock in module load.
           */

           //檢測協(xié)議家族類型是否為PF_INET
           //檢測協(xié)議傳輸類型是否為SOCK_PACKET
          if (family == PF_INET && type == SOCK_PACKET)
          {
              static int warned;
              if (!warned)
              {
                  warned = 1;
                  printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
                   current->comm);
              }
              family = PF_PACKET;
          }
          err = security_socket_create(family, type, protocol, kern);
          if (err)
              return err;
          /*
           *    Allocate the socket and allow the family to set things up. if
           *    the protocol is 0, the family is instructed to select an appropriate
           *    default.
           */

           //分配一個socket
          sock = sock_alloc();
          //檢測分配是否成功
          if (!sock)
          {
              if (net_ratelimit())
                  printk(KERN_WARNING "socket: no more sockets\n");
              return -ENFILE;    /* Not exactly a match, but its the
                       closest posix thing */

          }
          //設(shè)置協(xié)議傳輸類型
          sock->type = type;
      #if defined(CONFIG_KMOD)
          /* Attempt to load a protocol module if the find failed.
           *
           * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
           * requested real, full-featured networking support upon configuration.
           * Otherwise module support will break!
           */

          if (net_families[family] == NULL)
              request_module("net-pf-%d", family);
      #endif
          rcu_read_lock();
          //根據(jù)協(xié)議類型取得對應(yīng)的協(xié)議家族結(jié)構(gòu)
          pf = rcu_dereference(net_families[family]);
          err = -EAFNOSUPPORT;
          //檢測取得協(xié)議結(jié)構(gòu)是否成功
          if (!pf)
              goto out_release;
          /*
           * We will call the ->create function, that possibly is in a loadable
           * module, so we have to bump that loadable module refcnt first.
           */

           //增加協(xié)議家族的使用計數(shù)器
          if (!try_module_get(pf->owner))
              goto out_release;
          /* Now protected by module ref count */
          rcu_read_unlock();
          //運行協(xié)議家族結(jié)構(gòu)中的對socket初始化函數(shù)
          err = pf->create(net, sock, protocol);
          //檢測初始化是否成功
          if (err < 0)
              goto out_module_put;
          /*
           * Now to bump the refcnt of the [loadable] module that owns this
           * socket at sock_release time we decrement its refcnt.
           */

           //增加socket所使用的協(xié)議的使用計數(shù)器
          if (!try_module_get(sock->ops->owner))
              goto out_module_busy;
          /*
           * Now that we're done with the ->create function, the [loadable]
           * module can have its refcnt decremented
           */

           //減少協(xié)議家族使用計數(shù)器
          module_put(pf->owner);
          err = security_socket_post_create(sock, family, type, protocol, kern);
          if (err)
              goto out_sock_release;
          //設(shè)置socket指針為初始化完成的socket
          *res = sock;
          return 0;
      out_module_busy:
          err = -EAFNOSUPPORT;
      out_module_put:
          sock->ops = NULL;
          module_put(pf->owner);
      out_sock_release:
          sock_release(sock);
          return err;
      out_release:
          rcu_read_unlock();
          goto out_sock_release;
      }

      security_socket_create,關(guān)于security的內(nèi)容我們都略過,一來減少框架的復(fù)雜度,二來我也不知道security主要做的是啥 哈哈 不過可以肯定的是不會妨礙TCP/IP協(xié)議棧的正常運行

      首先是sock_alloc
      sock_alloc在/net/socket.c中

      static struct socket *sock_alloc(void)
      {
          struct inode *inode;
          struct socket *sock;
          inode = new_inode(sock_mnt->mnt_sb);
          if (!inode)
              return NULL;
          sock = SOCKET_I(inode);
          inode->i_mode = S_IFSOCK | S_IRWXUGO;
          inode->i_uid = current->fsuid;
          inode->i_gid = current->fsgid;
          get_cpu_var(sockets_in_use)++;
          put_cpu_var(sockets_in_use);
          return sock;
      }

      主要是申請一個新的socket,并對他的文件屬性進行初始化,socket是屬于虛擬文件系統(tǒng)的一部分,我們暫時只要這一點就好了

      回到__sock_create中,然后到
      pf = rcu_dereference(net_families[family]);
      net_families的初始化我們也不分析,因為涉及的面太廣,為了緊扣PING,我們只需要知道得到了inet_family_ops這個結(jié)構(gòu)就可以了,詳細的初始化部分在/net/ipv4/af_inet.c中,大家有興趣的可以看看
      inet_family_ops的結(jié)構(gòu)如下

      static struct net_proto_family inet_family_ops = {
          .family = PF_INET,
          .create = inet_create,
          .owner    = THIS_MODULE,
      };

      緊接著我們就到了
      err = pf->create(net, sock, protocol);
      調(diào)用inet_family_ops的create函數(shù)

      inet_create在/net/ipv4/af_inet.c中

      static int inet_create(struct net *net, struct socket *sock, int protocol)
      {
          struct sock *sk;
          struct list_head *p;
          struct inet_protosw *answer;
          struct inet_sock *inet;
          struct proto *answer_prot;
          unsigned char answer_flags;
          char answer_no_check;
          int try_loading_module = 0;
          int err;

          //檢測socket的協(xié)議傳輸類型是否為RAW
          //檢測socket的協(xié)議傳輸類型是否為DGRAM
          //第三個不知道檢測的什么
          if (sock->type != SOCK_RAW &&
           sock->type != SOCK_DGRAM &&
           !inet_ehash_secret)
              build_ehash_secret();
          //設(shè)置socket的狀態(tài)為未連接
          sock->state = SS_UNCONNECTED;
          /* Look for the requested type/protocol pair. */
          //初始化協(xié)議結(jié)構(gòu)
          answer = NULL;
      lookup_protocol:
          err = -ESOCKTNOSUPPORT;
          rcu_read_lock();
          //歷遍協(xié)議族
          list_for_each_rcu(p, &inetsw[sock->type])
          {
              //取得對應(yīng)的協(xié)議的結(jié)構(gòu)
              answer = list_entry(p, struct inet_protosw, list);
              /* Check the non-wild match. */
              //檢測需要的協(xié)議是否和當前歷遍的協(xié)議相等
              if (protocol == answer->protocol)
              {
                  //檢測需要的協(xié)議是否為IP協(xié)議
                  if (protocol != IPPROTO_IP)
                      //跳出循環(huán)
                      break;
              }
              else
              {
                  /* Check for the two wild cases. */
                  //檢測需要的協(xié)議是否為IP協(xié)議
                  if (IPPROTO_IP == protocol)
                  {
                      //設(shè)置需要的協(xié)議為當前歷遍的協(xié)議
                      protocol = answer->protocol;
                      //跳出循環(huán)
                      break;
                  }
                  //檢測當前歷遍的協(xié)議是否為IP協(xié)議
                  if (IPPROTO_IP == answer->protocol)
                      //跳出循環(huán)
                      break;
              }
              err = -EPROTONOSUPPORT;
              //設(shè)置協(xié)議結(jié)構(gòu)為空
              answer = NULL;
          }
          //檢測取得協(xié)議是否為空
          if (unlikely(answer == NULL))
          {
              if (try_loading_module < 2)
              {
                  rcu_read_unlock();
                  /*
                   * Be more specific, e.g. net-pf-2-proto-132-type-1
                   * (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
                   */

                  if (++try_loading_module == 1)
                      request_module("net-pf-%d-proto-%d-type-%d",
                           PF_INET, protocol, sock->type);
                  /*
                   * Fall back to generic, e.g. net-pf-2-proto-132
                   * (net-pf-PF_INET-proto-IPPROTO_SCTP)
                   */

                  else
                      request_module("net-pf-%d-proto-%d",
                           PF_INET, protocol);
                  goto lookup_protocol;
              }
              else
                  goto out_rcu_unlock;
          }
          err = -EPERM;
          if (answer->capability > 0 && !capable(answer->capability))
              goto out_rcu_unlock;
          err = -EAFNOSUPPORT;
          if (!inet_netns_ok(net, protocol))
              goto out_rcu_unlock;
          //設(shè)置socket的協(xié)議次操作集為當前協(xié)議結(jié)構(gòu)的操作集
          sock->ops = answer->ops;
          answer_prot = answer->prot;
          answer_no_check = answer->no_check;
          answer_flags = answer->flags;
          rcu_read_unlock();
          BUG_TRAP(answer_prot->slab != NULL);
          err = -ENOBUFS;
          //分配一個sock結(jié)構(gòu)
          sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
          //檢測分配是否成功
          if (sk == NULL)
              goto out;
          err = 0;
          sk->sk_no_check = answer_no_check;
          if (INET_PROTOSW_REUSE & answer_flags)
              sk->sk_reuse = 1;
          //將sock結(jié)構(gòu)強制轉(zhuǎn)換成inet_sock結(jié)構(gòu)
          inet = inet_sk(sk);
          inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
          //檢測協(xié)議傳輸類型是否為未處理
          if (SOCK_RAW == sock->type)
          {
              //設(shè)置本地端口號為協(xié)議類型
              inet->num = protocol;
              //檢測協(xié)議類型是否為未處理
              if (IPPROTO_RAW == protocol)
                  inet->hdrincl = 1;
          }
          if (ipv4_config.no_pmtu_disc)
              inet->pmtudisc = IP_PMTUDISC_DONT;
          else
              inet->pmtudisc = IP_PMTUDISC_WANT;

          inet->id = 0;
          //初始化sock
          sock_init_data(sock, sk);
          //設(shè)置sock的回收處理函數(shù)
          sk->sk_destruct     = inet_sock_destruct;
          //設(shè)置sock的協(xié)議家族類型
          sk->sk_family     = PF_INET;
          //設(shè)置sock的協(xié)議類型
          sk->sk_protocol     = protocol;
          sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
          inet->uc_ttl    = -1;
          inet->mc_loop    = 1;
          inet->mc_ttl    = 1;
          inet->mc_index    = 0;
          inet->mc_list    = NULL;
          sk_refcnt_debug_inc(sk);
          //檢測本地端口號是否存在
          if (inet->num)
          {
              /* It assumes that any protocol which allows
               * the user to assign a number at socket
               * creation time automatically
               * shares.
               */

               //設(shè)置對方端口號為本地端口號
              inet->sport = htons(inet->num);
              /* Add to protocol hash chains. */
              sk->sk_prot->hash(sk);
          }
          //檢測協(xié)議初始化函數(shù)是否存在
          if (sk->sk_prot->init)
          {
              //執(zhí)行協(xié)議初始化函數(shù)
              err = sk->sk_prot->init(sk);
              if (err)
                  sk_common_release(sk);
          }
      out:
          return err;
      out_rcu_unlock:
          rcu_read_unlock();
          goto out;
      }

      inetsw結(jié)構(gòu)的注冊不關(guān)心,我們看結(jié)果

       

      answer就是其中的第二項

      這里我們的protocol為IPPROTO_ICMP
      answer->protocol為IPPROTO_IP

      所以是進入了if (IPPROTO_IP == answer->protocol)后break跳出了循環(huán)

      之后到inet_netns_ok
      inet_netns_ok在/net/ipv4/af_inet.c中

       

      static inline int inet_netns_ok(struct net *net, int protocol)
      {
          int hash;
          struct net_protocol *ipprot;
          if (net == &init_net)
              return 1;
          //取得哈希值
          hash = protocol & (MAX_INET_PROTOS - 1);
          //取得哈希值對應(yīng)的協(xié)議
          ipprot = rcu_dereference(inet_protos[hash]);
          //檢測協(xié)議是否為空
          if (ipprot == NULL)
              /* raw IP is OK */
              return 1;
          return ipprot->netns_ok;
      }

      由于在__sock_create中我們傳入的net類型為init_net,所以這里是返回1,不會goto out_rcu_unlock結(jié)束的

      繼續(xù)在inet_create中向下走,來到了sk_alloc
      sk_alloc在/net/core/sock.c中

      struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
               struct proto *prot)
      {
          struct sock *sk;
          //分配一個sock結(jié)構(gòu)
          sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
          //檢測分配是否成功
          if (sk)
          {
              //設(shè)置協(xié)議家族類型
              sk->sk_family = family;
              /*
               * See comment in struct sock definition to understand
               * why we need sk_prot_creator -acme
               */

               //設(shè)置協(xié)議主操作集
              sk->sk_prot = sk->sk_prot_creator = prot;
              sock_lock_init(sk);
              sock_net_set(sk, get_net(net));
          }
          return sk;
      }

      sk_prot_alloc在協(xié)議結(jié)構(gòu)的高速緩存中分配一個sock結(jié)構(gòu),分配成功后進行一些簡單的初始化操作便退出了

      繼續(xù)向下走,到sock_init_data
      sock_init_data在/net/core/sock.c中

      void sock_init_data(struct socket *sock, struct sock *sk)
      {
          //初始化skb接收隊列
          skb_queue_head_init(&sk->sk_receive_queue);
          //初始化skb發(fā)送隊列
          skb_queue_head_init(&sk->sk_write_queue);
          //初始化skb錯誤隊列
          skb_queue_head_init(&sk->sk_error_queue);
      #ifdef CONFIG_NET_DMA
          skb_queue_head_init(&sk->sk_async_wait_queue);
      #endif
          sk->sk_send_head    =    NULL;
          init_timer(&sk->sk_timer);
          sk->sk_allocation    =    GFP_KERNEL;
          sk->sk_rcvbuf        =    sysctl_rmem_default;
          sk->sk_sndbuf        =    sysctl_wmem_default;
          sk->sk_state        =    TCP_CLOSE;
          //連接socket到sock
          sk->sk_socket        =    sock;
          sock_set_flag(sk, SOCK_ZAPPED);
          //檢測socket是否存在
          if (sock)
          {
              //設(shè)置sock的協(xié)議傳輸類型
              sk->sk_type    =    sock->type;
              //設(shè)置sock的等待隊列
              sk->sk_sleep    =    &sock->wait;
              //連接sock到socket
              sock->sk    =    sk;
          }
          else
              //設(shè)置sock的等待隊列為空
              sk->sk_sleep    =    NULL;
          rwlock_init(&sk->sk_dst_lock);
          rwlock_init(&sk->sk_callback_lock);
          lockdep_set_class_and_name(&sk->sk_callback_lock,
                  af_callback_keys + sk->sk_family,
                  af_family_clock_key_strings[sk->sk_family]);
          //設(shè)置sock的狀態(tài)改變處理函數(shù)
          sk->sk_state_change    =    sock_def_wakeup;
          //設(shè)置sock的數(shù)據(jù)準備處理函數(shù)
          sk->sk_data_ready    =    sock_def_readable;
          sk->sk_write_space    =    sock_def_write_space;
          //設(shè)置sock的錯誤處理函數(shù)
          sk->sk_error_report    =    sock_def_error_report;
          //設(shè)置sock的回收處理函數(shù)
          sk->sk_destruct        =    sock_def_destruct;
          //發(fā)送數(shù)據(jù)的緩沖頁面
          sk->sk_sndmsg_page    =    NULL;
          //發(fā)送數(shù)據(jù)的緩沖頁面偏移值
          sk->sk_sndmsg_off    =    0;
          sk->sk_peercred.pid     =    0;
          sk->sk_peercred.uid    =    -1;
          sk->sk_peercred.gid    =    -1;
          sk->sk_write_pending    =    0;
          sk->sk_rcvlowat        =    1;
          sk->sk_rcvtimeo        =    MAX_SCHEDULE_TIMEOUT;
          sk->sk_sndtimeo        =    MAX_SCHEDULE_TIMEOUT;
          sk->sk_stamp = ktime_set(-1L, 0);
          atomic_set(&sk->sk_refcnt, 1);
          atomic_set(&sk->sk_drops, 0);
      }

      這是個大家伙,負責sock結(jié)構(gòu)的詳細初始化
      初始化完成后繼續(xù)inet_create的執(zhí)行
      由于之前設(shè)置了inet->num為協(xié)議號,這里會執(zhí)行sk->sk_prot->hash
      在進入這個函數(shù)之前讓我們先來看一下目前sock的結(jié)構(gòu)
       
      sk_prot為一個宏  #define sk_prot __sk_common.skc_prot
      指向了raw_prot,所以sk->sk_prot->hash就是執(zhí)行了raw_hash_sk
      raw_hash_sk在/net/ipv4/raw.c中

      void raw_hash_sk(struct sock *sk)
      {
          struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
          struct hlist_head *head;
          head = &h->ht[inet_sk(sk)->num & (RAW_HTABLE_SIZE - 1)];
          write_lock_bh(&h->lock);
          sk_add_node(sk, head);
          sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
          write_unlock_bh(&h->lock);
      }

      主要是將raw_prot連接到了socket的隊列中,如下圖

       
      因為raw_prot是有raw_init這個函數(shù)的,所以我們進入到sk->sk_prot->init
      raw_init在/net/ipv4/raw.c中

      static int raw_init(struct sock *sk)
      {
          //把sock結(jié)構(gòu)強制轉(zhuǎn)換為raw_sock結(jié)構(gòu)
          struct raw_sock *rp = raw_sk(sk);
          //檢測端口號是否為ICMP
          if (inet_sk(sk)->num == IPPROTO_ICMP)
              //清空icmp_filter結(jié)構(gòu)
              memset(&rp->filter, 0, sizeof(rp->filter));
          return 0;
      }

      結(jié)構(gòu)圖如下

       
      為什么能一直這樣強制轉(zhuǎn)換下去,就不怕結(jié)構(gòu)超界么?
      其實這是一早有預(yù)謀的,在raw_prot中有一個成員為
      .obj_size    = sizeof(struct raw_sock)
      而在協(xié)議中分配空間的時候就已經(jīng)分配了raw_sock所需要的空間,我們一直在用他的一部分而已
      好, 到這里inet_create就完成了,一路返回到sys_socket中
      執(zhí)行最后一步,把初始化好的socket結(jié)構(gòu)映射到一個文件描述符中,并返回這個文件描述符
      這樣,我們的ping程序的sockfd就拿到了一個按要求初始化好的socket結(jié)構(gòu)索引號了
      在之后的sendto和recvfrom操作中就能夠使用這個索引號進行發(fā)送和接收了
      然后到第2部分,發(fā)送初始化好的icmp結(jié)構(gòu)
      sendto(sockfd,sendpacket,packetsize,0,(struct sockaddr *)&dest_addr,sizeof(dest_addr)
      繼續(xù)來到系統(tǒng)調(diào)用sys_socketcall中
      這次我們的目標是case SYS_SENDTO
      sys_sendto在/net/socket.c中

      asmlinkage long sys_sendto(int fd, void __user *buff, size_t len,
                   unsigned flags, struct sockaddr __user *addr,
                   int addr_len)
      {
          struct socket *sock;
          char address[MAX_SOCK_ADDR];
          int err;
          struct msghdr msg;
          struct iovec iov;
          int fput_needed;
          //從文件描述符中返回socket
          sock = sockfd_lookup_light(fd, &err, &fput_needed);
          if (!sock)
              goto out;
          //取得需要發(fā)送數(shù)據(jù)的起始地址
          iov.iov_base = buff;
          //取得需要發(fā)送數(shù)據(jù)的數(shù)據(jù)長度
          iov.iov_len = len;
          msg.msg_name = NULL;
          //連接iov到msg
          msg.msg_iov = &iov;
          msg.msg_iovlen = 1;
          msg.msg_control = NULL;
          msg.msg_controllen = 0;
          msg.msg_namelen = 0;
          //是否有地址參數(shù)
          if (addr)
          {
              //從用戶數(shù)據(jù)轉(zhuǎn)換為內(nèi)核數(shù)據(jù)
              err = move_addr_to_kernel(addr, addr_len, address);
              if (err < 0)
                  goto out_put;
              //設(shè)置地址
              msg.msg_name = address;
              //設(shè)置地址長度
              msg.msg_namelen = addr_len;
          }
          if (sock->file->f_flags & O_NONBLOCK)
              flags |= MSG_DONTWAIT;
          msg.msg_flags = flags;
          err = sock_sendmsg(sock, &msg, len);
      out_put:
          fput_light(sock->file, fput_needed);
      out:
          return err;
      }

      初始化好的msg結(jié)構(gòu)如下

       

       

      在iovec結(jié)構(gòu)中保存了我們要發(fā)送數(shù)據(jù)的首地址和大小

      然后進入到sock_sendmsg
      sock_sendmsg在/net/socket.c中

      int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
      {
          struct kiocb iocb;
          struct sock_iocb siocb;
          int ret;

          init_sync_kiocb(&iocb, NULL);
          iocb.private = &siocb;
          ret = __sock_sendmsg(&iocb, sock, msg, size);
          if (-EIOCBQUEUED == ret)
              ret = wait_on_sync_kiocb(&iocb);
          return ret;
      }

      我不大明白kiocb的用處,google也不是說得很清楚,大概就是說關(guān)于文件同步操作方面上的,請明白的同學們指教一下 = 3=)/ 感謝  這里就不把kiocb的結(jié)構(gòu)畫進來了

      然后進入到__sock_sendmsg
      __sock_sendmsg在/net/socket.c中

      static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
                       struct msghdr *msg, size_t size)
      {
          struct sock_iocb *si = kiocb_to_siocb(iocb);
          int err;

          //連接socket
          si->sock = sock;
          si->scm = NULL;
          //連接msg
          si->msg = msg;
          //設(shè)置需要拷貝的數(shù)據(jù)大小
          si->size = size;
          err = security_socket_sendmsg(sock, msg, size);
          if (err)
              return err;
          return sock->ops->sendmsg(iocb, sock, msg, size);
      }

      連接完成后的結(jié)構(gòu)圖如下

       
      sock->ops->sendmsg調(diào)用的為inet_sockraw_ops中的sendmsg操作,也就是inet_sendmsg函數(shù)
      inet_sendmsg在/net/ipv4/af_inet.c中

      int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
               size_t size)
      {
          struct sock *sk = sock->sk;
          /* We may need to bind the socket. */
          //檢測端口號是否存在
          if (!inet_sk(sk)->num && inet_autobind(sk))
              return -EAGAIN;
          return sk->sk_prot->sendmsg(iocb, sk, msg, size);
      }

      我們在之前已經(jīng)設(shè)置了端口號,所以這里直接來到了sk->sk_prot->sendmsg
      sk->sk_prot->sendmsg調(diào)用的是raw_prot中的sendmsg操作,也就是raw_setsockopt函數(shù)

      raw_setsockopt在/net/ipv4/raw.c中

      static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
               size_t len)
      {
          struct inet_sock *inet = inet_sk(sk);
          struct ipcm_cookie ipc;
          struct rtable *rt = NULL;
          int free = 0;
          __be32 daddr;
          __be32 saddr;
          u8 tos;
          int err;

          err = -EMSGSIZE;
          //檢測數(shù)據(jù)的長度是否過長
          if (len > 0xFFFF)
              goto out;
          /*
           *    Check the flags.
           */

          err = -EOPNOTSUPP;
          if (msg->msg_flags & MSG_OOB)    /* Mirror BSD error message */
              goto out; /* compatibility */
          /*
           *    Get and verify the address.
           */

          //檢測是否有目的地址
          if (msg->msg_namelen)
          {
              //將地址數(shù)據(jù)格式化成sockaddr_in結(jié)構(gòu)
              struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;    
              err = -EINVAL;
              //檢測地址數(shù)據(jù)長度是否過小
              if (msg->msg_namelen < sizeof(*usin))
                  goto out;
              //檢測協(xié)議家族類型是否為AF_INET
              if (usin->sin_family != AF_INET)
              {
                  static int complained;      
                  if (!complained++)
                      printk(KERN_INFO "%s forgot to set AF_INET in "
                               "raw sendmsg. Fix it!\n",
                               current->comm);      
                  err = -EAFNOSUPPORT;
                  //檢測是否存在協(xié)議家族類型
                  if (usin->sin_family)
                      goto out;
              }
              //取得目的地址IP
              daddr = usin->sin_addr.s_addr;
              /* ANK: I did not forget to get protocol from port field.
               * I just do not know, who uses this weirdness.
               * IP_HDRINCL is much more convenient.
               */

          }
          else
          {
              err = -EDESTADDRREQ;
              if (sk->sk_state != TCP_ESTABLISHED)
                  goto out;
              daddr = inet->daddr;
          }
          ipc.addr = inet->saddr;
          ipc.opt = NULL;
          ipc.oif = sk->sk_bound_dev_if;
          //檢測是否有控制信息
          if (msg->msg_controllen)
          {
              err = ip_cmsg_send(sock_net(sk), msg, &ipc);
              if (err)
                  goto out;
              if (ipc.opt)
                  free = 1;
          }
          saddr = ipc.addr;
          ipc.addr = daddr;
          //檢測是否存在ip_options
          if (!ipc.opt)
              //無則設(shè)置為inet_sock中的ip_options
              ipc.opt = inet->opt;
          //檢測是否存在ip_options
          if (ipc.opt)
          {
              err = -EINVAL;
              /* Linux does not mangle headers on raw sockets,
               * so that IP options + IP_HDRINCL is non-sense.
               */

              if (inet->hdrincl)
                  goto done;
              if (ipc.opt->srr)
              {
                  if (!daddr)
                      goto done;
                  daddr = ipc.opt->faddr;
              }
          }
          //取得服務(wù)類型
          tos = RT_CONN_FLAGS(sk);
          if (msg->msg_flags & MSG_DONTROUTE)
              tos |= RTO_ONLINK;
          //檢測是否為多播地址
          if (ipv4_is_multicast(daddr))
          {
              if (!ipc.oif)
                  ipc.oif = inet->mc_index;
              if (!saddr)
                  saddr = inet->mc_addr;
          }
          //進行路由表查詢
          {
              struct flowi fl = { .oif = ipc.oif,
                       .mark = sk->sk_mark,
                       .nl_u = { .ip4_u =
                           { .daddr = daddr,
                              .saddr = saddr,
                              .tos = tos } },
                       .proto = inet->hdrincl ? IPPROTO_RAW :
                                   sk->sk_protocol,
                       };
              if (!inet->hdrincl)
              {
                  err = raw_probe_proto_opt(&fl, msg);
                  if (err)
                      goto done;
              }
              security_sk_classify_flow(sk, &fl);
              err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
          }
          if (err)
              goto done;
          err = -EACCES;
          if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
              goto done;
          if (msg->msg_flags & MSG_CONFIRM)
              goto do_confirm;
      back_from_confirm:
          if (inet->hdrincl)
          {
              err = raw_send_hdrinc(sk, msg->msg_iov, len,rt, msg->msg_flags);
          }
          else
          {
              if (!ipc.addr)
                  ipc.addr = rt->rt_dst;
              lock_sock(sk);
              //拷貝需要發(fā)送的數(shù)據(jù)到skb中
              err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
                          &ipc, rt, msg->msg_flags);
              //檢測拷貝是否成功
              if (err)
                  //不成功則釋放所有sock下發(fā)送隊列中所有的skb
                  ip_flush_pending_frames(sk);
              else if (!(msg->msg_flags & MSG_MORE))
                  //發(fā)送sk中的skb
                  err = ip_push_pending_frames(sk);
              release_sock(sk);
          }
      done:
          if (free)
              kfree(ipc.opt);
          ip_rt_put(rt);
      out:
          if (err < 0)
              return err;
          return len;
      do_confirm:
          dst_confirm(&rt->u.dst);
          if (!(msg->msg_flags & MSG_PROBE) || len)
              goto back_from_confirm;
          err = 0;
          goto done;
      }

      這里最關(guān)鍵的就是
      err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1);
      這是一個路由表查詢函數(shù)

      無能為力........

      不過我根據(jù)DEBUG的信息把查詢結(jié)果畫了出來,分別為ipcm_cookie和rtable兩個結(jié)構(gòu),其中最關(guān)鍵的為rtable中的dst_entry

       
      rtable中的idev連接lo這個環(huán)回虛擬網(wǎng)卡設(shè)備
      lo網(wǎng)卡的注冊在/drivers/net/loopback.c中
      由于牽涉到路由表的添加問題,我這里就不介紹他的注冊了
      現(xiàn)在回到raw_sendmsg,進入ip_append_data, ip_append_data負責將要發(fā)送的數(shù)據(jù)組裝到sk_buff結(jié)構(gòu)中










        本站是提供個人知識管理的網(wǎng)絡(luò)存儲空間,所有內(nèi)容均由用戶發(fā)布,不代表本站觀點。請注意甄別內(nèi)容中的聯(lián)系方式、誘導(dǎo)購買等信息,謹防詐騙。如發(fā)現(xiàn)有害或侵權(quán)內(nèi)容,請點擊一鍵舉報。
        轉(zhuǎn)藏 分享 獻花(0

        0條評論

        發(fā)表

        請遵守用戶 評論公約

        類似文章 更多