读书人

Linux内核中PF_KEY协议族的兑现(1)

发布时间: 2012-08-14 10:39:58 作者: rapoo

Linux内核中PF_KEY协议族的实现(1)
本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源:http://yfydz.cublog.cn

1. 前言在Linux2.6内核中自带了PF_KEY协议族的实现,这样就不用象2.4那样打补丁来实现了。内核中PF_KEY实现要完成的功能是实现维护内核的安全联盟(SA)和安全策略(SP)数据库, 以及和用户空间的接口。以下内核代码版本为2.6.19.2, PF_KEY相关代码在net/key/目录下,定义了内核中PF_KEY与用户空间的接口,这个接口是RFC定义的,因此各种实现都基本类似;但具体关于SA和SP的内部的实现和管理则是与实现相关的,各种实现各自不同,在linux内核是使用xfrm库来实现的,代码在net/xfrm/目录下定义。 2. 数据结构关于SA和SP的数据结构已经在RFC2367中定义, 头文件为include/linux/pfkeyv2.h, 这些是用户空间和内核空间共享的,只是作为接口的数据结构;而内核中具体使用的数据结构为xfrm定义的结构,在include/net/xfrm.h中定义。2.1 PF_KEY类型的sockstruct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk;// 比普通sock添加两个参数// 是否进行登记 int  registered;// 是否是混杂模式 int  promisc;};2.2 状态(SA)xfrm状态用来描述SA在内核中的具体实现:struct xfrm_state{ /* Note: bydst is re-used during gc */// 每个状态结构挂接到三个HASH链表中 struct hlist_node bydst; // 按目的地址HASH struct hlist_node bysrc; // 按源地址HASH struct hlist_node byspi; // 按SPI值HASH atomic_t  refcnt; // 所有使用计数 spinlock_t  lock;   // 状态锁 struct xfrm_id  id; // ID struct xfrm_selector sel; // 状态选择子 u32   genid; /* Key manger bits */ struct {  u8  state;  u8  dying;  u32  seq; } km; /* Parameters of this state. */ struct {  u32  reqid;  u8  mode;  u8  replay_window;  u8  aalgo, ealgo, calgo;  u8  flags;  u16  family;  xfrm_address_t saddr;  int  header_len;  int  trailer_len; } props; struct xfrm_lifetime_cfg lft; // 生存时间 /* Data for transformer */ struct xfrm_algo *aalg; // hash算法 struct xfrm_algo *ealg; // 加密算法 struct xfrm_algo *calg; // 压缩算法 /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; // NAT-T封装信息 /* Data for care-of address */ xfrm_address_t *coaddr; /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; /* If a tunnel, number of users + 1 */ atomic_t  tunnel_users; /* State for replay detection */ struct xfrm_replay_state replay; /* Replay detection state at the time we sent the last notification */ struct xfrm_replay_state preplay; /* internal flag that only holds state for delayed aevent at the  * moment */ u32   xflags; /* Replay detection notification settings */ u32   replay_maxage; u32   replay_maxdiff; /* Replay detection notification timer */ struct timer_list rtimer; /* Statistics */ struct xfrm_stats stats; struct xfrm_lifetime_cur curlft; struct timer_list timer; /* Last used time */ u64   lastused; /* Reference to data common to all the instances of this  * transformer. */ struct xfrm_type *type; struct xfrm_mode *mode; /* Security context */ struct xfrm_sec_ctx *security; /* Private data of this transformer, format is opaque,  * interpreted by xfrm_type methods. */ void   *data;}; 2.3 策略(SP)struct xfrm_policy{ struct xfrm_policy *next; // 下一个策略 struct hlist_node bydst; // 按目的地址HASH的链表 struct hlist_node byidx; // 按索引号HASH的链表 /* This lock only affects elements except for entry. */ rwlock_t  lock; atomic_t  refcnt; struct timer_list timer; u8   type; u32   priority; u32   index; struct xfrm_selector selector; struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cur curlft; struct dst_entry       *bundles; __u16   family; __u8   action; __u8   flags; __u8   dead; __u8   xfrm_nr; struct xfrm_sec_ctx *security; struct xfrm_tmpl        xfrm_vec[XFRM_MAX_DEPTH];};2.4 事件struct km_event{ union {  u32 hard;  u32 proto;  u32 byid;  u32 aevent;  u32 type; } data; u32 seq; u32 pid; u32 event;};3. 初始化/* net/key/af_key.c */static int __init ipsec_pfkey_init(void){// 登记key_proto结构, 该结构定义如下:// static struct proto key_proto = {// .name   = "KEY",// .owner   = THIS_MODULE,// .obj_size = sizeof(struct pfkey_sock),//};// 最后一个参数为0, 表示不进行slab的分配, 只是简单的将key_proto结构// 挂接到系统的网络协议链表中,这个结构最主要是告知了pfkey sock结构的大小 int err = proto_register(&key_proto, 0); if (err != 0)  goto out;// 登记pfkey协议族的的操作结构 err = sock_register(&pfkey_family_ops); if (err != 0)  goto out_unregister_key_proto;#ifdef CONFIG_PROC_FS err = -ENOMEM;// 建立只读的pfkey的PROC文件: /proc/net/pfkey if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL)  goto out_sock_unregister;#endif// 登记通知(notify)处理pfkeyv2_mgr err = xfrm_register_km(&pfkeyv2_mgr); if (err != 0)  goto out_remove_proc_entry;out: return err;out_remove_proc_entry:#ifdef CONFIG_PROC_FS remove_proc_entry("net/pfkey", NULL);out_sock_unregister:#endif sock_unregister(PF_KEY);out_unregister_key_proto: proto_unregister(&key_proto); goto out;}4. pfkey套接口操作4.1  建立套接口/* net/key/af_key.c */// pfkey协议族操作, 在用户程序使用socket打开pfkey类型的socket时调用,// 相应的create函数在__sock_create(net/socket.c)函数中调用:static struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE,};// 在用户空间每次打开pfkey socket时都会调用此函数:static int pfkey_create(struct socket *sock, int protocol){ struct sock *sk; int err;// 建立PFKEY的socket必须有ROOT权限 if (!capable(CAP_NET_ADMIN))  return -EPERM;// socket类型必须是RAW, 协议为PF_KEY_V2 if (sock->type != SOCK_RAW)  return -ESOCKTNOSUPPORT; if (protocol != PF_KEY_V2)  return -EPROTONOSUPPORT; err = -ENOMEM;// 分配sock结构, 并清零 sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1); if (sk == NULL)  goto out;// PFKEY类型socket的操作 sock->ops = &pfkey_ops;// 初始化socket参数 sock_init_data(sock, sk);// 初始化sock的族类型和释放函数 sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct;// 增加使用数 atomic_inc(&pfkey_socks_nr);// 将sock挂接到系统的sock链表 pfkey_insert(sk); return 0;out: return err;} 4.2 PF_KEY套接口操作static const struct proto_ops pfkey_ops = { .family  = PF_KEY, .owner  = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ .bind  = sock_no_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept  = sock_no_accept, .getname = sock_no_getname, .ioctl  = sock_no_ioctl, .listen  = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, .mmap  = sock_no_mmap, .sendpage = sock_no_sendpage, /* Now the operations that really occur. */ .release = pfkey_release, .poll  = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg,};PF_KEY类型的sock中大多数操作都没有定义, 这是因为PF_KEY的数据都是本机内的内核空间于用户空间的交换, 因此实际和网络相关的操作都不用定义, 所谓发送和介绍数据也只是内核与用户空间之间的通信。4.2.1 释放套接口static int pfkey_release(struct socket *sock){// 从socket到sock结构转换 struct sock *sk = sock->sk; if (!sk)  return 0;// 将sock从系统的sock链表断开 pfkey_remove(sk);// 设置sock状态为DEAD, 清空sock中的socket和sleep指针 sock_orphan(sk); sock->sk = NULL;// 清除当前数据队列 skb_queue_purge(&sk->sk_write_queue);// 释放sock sock_put(sk); return 0;}4.2.2 描述符选择使用的是标准的数据报选择函数: datagram_poll4.2.3 发送数据实际是将数据从内核空间发送给用户空间的程序:static int pfkey_sendmsg(struct kiocb *kiocb,    struct socket *sock, struct msghdr *msg, size_t len){ struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; err = -EOPNOTSUPP;// PF_KEY不支持MSG_OOB标志 if (msg->msg_flags & MSG_OOB)  goto out; err = -EMSGSIZE;// 一次发送的数据长度不能太大 if ((unsigned)len > sk->sk_sndbuf - 32)  goto out; err = -ENOBUFS;// 获取一个空闲的skbuff skb = alloc_skb(len, GFP_KERNEL); if (skb == NULL)  goto out; err = -EFAULT;// 从缓冲区中拷贝数据到skbuff中 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))  goto out;// 获取SADB数据头的指针 hdr = pfkey_get_base_msg(skb, &err); if (!hdr)  goto out; mutex_lock(&xfrm_cfg_mutex);// 处理PFKEY数据的发送 err = pfkey_process(sk, skb, hdr); mutex_unlock(&xfrm_cfg_mutex);out: if (err && hdr && pfkey_error(hdr, err, sk) == 0)  err = 0; if (skb)  kfree_skb(skb); return err ? : len;}static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr){ void *ext_hdrs[SADB_EXT_MAX]; int err;// 向混杂模式的sock发送SA消息 pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,   BROADCAST_PROMISC_ONLY, NULL); memset(ext_hdrs, 0, sizeof(ext_hdrs));// 解析SADB数据头中的消息类型 err = parse_exthdrs(skb, hdr, ext_hdrs); if (!err) {  err = -EOPNOTSUPP;// 根据消息类型调用相关的处理函数进行处理  if (pfkey_funcs[hdr->sadb_msg_type])   err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs); } return err;}4.2.4 接收数据实际是将数据从用户空间发送给内核空间:static int pfkey_recvmsg(struct kiocb *kiocb,    struct socket *sock, struct msghdr *msg, size_t len,    int flags){ struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; err = -EINVAL;// 只支持4类标志 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))  goto out; msg->msg_namelen = 0;// 接收数据包 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL)  goto out; copied = skb->len;// 接收到的数据超过了接收缓冲区长度, 设置截断标志 if (copied > len) {  msg->msg_flags |= MSG_TRUNC;  copied = len; } skb->h.raw = skb->data;// 将数据包中信息拷贝到接收缓冲区 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); if (err)  goto out_free;// 设置时间戳 sock_recv_timestamp(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied;out_free: skb_free_datagram(sk, skb);out: return err;}4.2.5 pfkey广播pfkey广播是将内核到用户空间的回应信息, 所有打开了PF_KEY类型socket的用户空间程序都可以收到, 所以用户空间程序在收到消息的时候要判断是否该消息是给自己的, 不是就忽略掉,这和netlink的广播比较类似。/* Send SKB to all pfkey sockets matching selected criteria.  */#define BROADCAST_ALL  0#define BROADCAST_ONE  1#define BROADCAST_REGISTERED 2#define BROADCAST_PROMISC_ONLY 4static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,      int broadcast_flags, struct sock *one_sk){ struct sock *sk; struct hlist_node *node; struct sk_buff *skb2 = NULL; int err = -ESRCH; /* XXX Do we need something like netlink_overrun?  I think  * XXX PF_KEY socket apps will not mind current behavior.  */ if (!skb)  return -ENOMEM; pfkey_lock_table();// 遍历所有的pfkey sock表, sk_for_each(sk, node, &pfkey_table) {// 获取pfkey sock用于发送消息  struct pfkey_sock *pfk = pfkey_sk(sk);  int err2;  /* Yes, it means that if you are meant to receive this   * pfkey message you receive it twice as promiscuous   * socket.   */// 该pfkey sock是混杂模式, 先发送一次, 由于后面还会广播发送, 所以设置了混杂模式的pfkey// sock一般情况下会收到两次  if (pfk->promisc)   pfkey_broadcast_one(skb, &skb2, allocation, sk);  /* the exact target will be processed later */// 指定了one_sk的话这个one_sk对应的用户程序将最后才收到包, 现在在循环中不发// 以后才发  if (sk == one_sk)   continue;// 如果不是广播给所有的进程, #define BROADCAST_ALL  0  if (broadcast_flags != BROADCAST_ALL) {// 如果只广播给pfkey混杂模式的进程, 跳过, 继续循环   if (broadcast_flags & BROADCAST_PROMISC_ONLY)    continue;// 如果只广播给登记的进程而该sock没登记, 跳过, 继续循环   if ((broadcast_flags & BROADCAST_REGISTERED) &&       !pfk->registered)    continue;// 只广播给一个, 和one_sk配合使用, 这样消息就只会发送给one_sk和所有混杂模式的pfkey sock   if (broadcast_flags & BROADCAST_ONE)    continue;  }// 发送给该pfkey sock  err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);  /* Error is cleare after succecful sending to at least one   * registered KM */  if ((broadcast_flags & BROADCAST_REGISTERED) && err)   err = err2; } pfkey_unlock_table();// 如果指定one_sk, 再向该pfkey sock发送, 该sock是最后一个收到消息的 if (one_sk != NULL)  err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);// 释放skb if (skb2)  kfree_skb(skb2); kfree_skb(skb); return err;}// 发送一个包static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,          gfp_t allocation, struct sock *sk){ int err = -ENOBUFS; sock_hold(sk); if (*skb2 == NULL) {// skb2是skb的一个克隆包  if (atomic_read(&skb->users) != 1) {   *skb2 = skb_clone(skb, allocation);  } else {   *skb2 = skb;// 因为发送会减少skb的使用计数   atomic_inc(&skb->users);  } } if (*skb2 != NULL) {// 实际发送的时skb2  if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {   skb_orphan(*skb2);   skb_set_owner_r(*skb2, sk);   skb_queue_tail(&sk->sk_receive_queue, *skb2);   sk->sk_data_ready(sk, (*skb2)->len);   *skb2 = NULL;   err = 0;  } } sock_put(sk); return err;}...... 待续 ......发表于: 2007-05-07,修改于: 2007-05-07 20:10,已浏览3512次,有评论2条 推荐 投诉网友: 本站网友 时间:2007-05-30 10:54:31 IP地址:58.211.149.★好象pfkey_sendmsg和pfkey_recvmsg方向搞反了吧? pfkey_sendmsg对应socket write操作,是从用户空间往内核空间发数据(写); pfkey_recvmsg对应socket read操作,是从内核往用户发消息,用户来读.BTW: 哥们,代码只有联系起来看,才能看出它究竟是干什么的.否则只不过是一句句的C语言,有什么用呢?网友: yfydz 时间:2007-05-30 13:43:09 IP地址:218.247.216.★恩,是写反了代码再怎么联系起来看,不也得一句句看?我已经划开成块了,不想看太细节可以只看大概流程就可以

读书人网 >UNIXLINUX

热点推荐