本文共 5076 字,大约阅读时间需要 16 分钟。
网络子系统在内核中可划分为两部分,分别是网络协议栈和网络设备。将从网络编程的接口入手,了解网络子系统,主要以TCP socket编程为例来进行分析。socket系统调用原型:int socket(int family, int type, int protocol);SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol){ return __sys_socket(family, type, protocol);}int __sys_socket(int family, int type, int protocol){ int retval; struct socket *sock; int flags; BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); flags = type & ~SOCK_TYPE_MASK; if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return -EINVAL; type &= SOCK_TYPE_MASK; //type表示数据的传输形式,比如:流,包等 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCKS; retval = sock_create(family, type, protocol, &sock); //创建socket结构体对象 if (retval < 0) return retval; return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); //为socket映射对应的fd文件描述符 //在发送/接收操作中,都会利用fd文件描述符}int sock_create(int family, int type, int protocol, struct socket **res){ return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);}int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern){ int err; struct socket *sock; //套接字结构体 const struct net_proto_family *pf; //网络协议族结构体,主要用来描述OSI七层协议中的网络层,比如:IP协议(AF_INET) if (family < 0 || family >= NPROTO) return -EAFNOSUPPORT; if (type < 0 || type >= SOCK_MAX) return -EINVAL; if (family == PF_INET && type == SOCK_PACKET) { pr_info_once(); family = PF_PACKET; } err = security_socket_create(family, type, protocol, kern); if (err) return err; sock = sock_alloc(); //创建套接字结构体对象 if (!sock) { net_warn_ratelimited("socket: no more sockets\n"); return -ENFILE; } sock->type = type; //套接字数据传输形式#ifdef CONFIG_MODULES if (rcu_access_pointer(net_families[family]) == NULL) request_module("net-pf-%d", family);#endif rcu_read_lock(); pf = rcu_dereference(net_families[family]); //net_families为只读型全局变量,该语句返回net_families数组中与family所对应的net_proto_family结构体对象 //net_families数组在内核初始化阶段,由各个协议族通过sock_register函数来填充 //比如:IPV4会调用sock_register(&inet_family_ops)来填充 //static const struct net_proto_family inet_family_ops = { // .family = PF_INET, // .create = inet_create, // .owner = THIS_MODULE, //}; err = -EAFNOSUPPORT; if (!pf) goto out_release; if (!try_module_get(pf->owner)) goto out_release; rcu_read_unlock(); err = pf->create(net, sock, protocol, kern); //调用族协议中指向的create函数 if (err < 0) goto out_module_put; if (!try_module_get(sock->ops->owner)) goto out_module_busy; module_put(pf->owner); err = security_socket_post_create(sock, family, type, protocol, kern); if (err) goto out_sock_release; *res = sock; //返回创建的套接字结构体 return 0;out_module_busy: err = -EAFNOSUPPORT;out_modue_put: sock->ops = NULL; module_put(pf->owner);out_sock_release: sock_release(sock); return err;out release: rcu_read_unlock(); goto out_sock_release;}struct socket *sock_alloc(void){ struct inode *inode; struct socket *socket; inode = new_inode_pseudo(sock_mnt->mnt_sb); //创建inode节点,该节点通过sock_mnt挂载点的超级块获得 if (inode) return NULL; sock = SOCKET_I(inode); //socket结构体通过inode结构体获得,两者被统一包含在结构体struct socket_alloc中,因此可通过container_of接口来获取对方的首地址。 //struct socket_alloc ---> |-----------------| // | struct socket * | // |-----------------| // | struct inode * | // |-----------------| inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); inode->i_op = &sockfs_inode_ops; //i_op为socket文件系统的操作集合 return sock;}struct inode *new_inode_pseudo(struct super_block *sb){ //super_block实际为为sock_mnt->mnt_sb struct inode *inode = alloc_inode(sb); if (inode) { spin_lock(); inode->i_state = 0; spin_unlock(&inode->i_lock); INIT_LIST_HEAD(&inode->i_sb_list); } return inode;}static struct inode *alloc_inode(struct super_block *sb){ const struct super_operations *ops = sb->s_op; struct inode *inode; if (ops->alloc_inode) iode = ops->alloc_inode(sb); //该函数的执行体为sock_alloc_inode()接口 else inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); //从inode_cachep缓存区中分配inode结构体对象 if (!inode) return NULL; if (unlikely(inode_init_always(sb, inode))) { if (ops->destory_inode) { ops->destory_inode(inode); if (!ops->free_inode) return NULL; } inode->free_inode = ops->free_inode; i_callback(&inode->i_rcu); return NULL; } return inode; //返回inode结构体对象}//通过代码分析,可知inode结构体通过super_block来获取。而super_block实际为为sock_mnt->mnt_sb,sock_mnt的原型为:static struct vfsmount *sock_mnt __read_mostly;//在socket初始化的过程中完成对sock_mnt的初始化。//关于代码中的pf->create()接口,主要创建与协议族相关的结构体信息。以AF_INET家族协议为例,pf->create()实际为:static int inet_create(struct net *net, struct socket *sock, int protocol, int kern)//inet_create()接口主要为套接字结构体所使用的协议族,以及子协议进行初始化。//当发送数据时,会根据socket所对应的协议族以及子协议,执行真正的操作接口。//以sendmsg()接口为例,sendmsg()会根据协议封装含有数据信息的套接字缓冲区sk_buffer,同时将该套接字缓冲区存入到队列中等待发送。另外,与套接字缓冲区相关联的结构体信息包括:struct net, struct net_device等,内核通过结构体struct nfqnl_instance来作为这些结构体的集合。
转载地址:http://ipxii.baihongyu.com/