读书人

netfilter的规约处理

发布时间: 2012-08-03 00:12:14 作者: rapoo

netfilter的规则处理
本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源:http://yfydz.cublog.cn

1. 前言 netfilter中的防火墙规则是通过用户层的iptables命令来进行编辑的。而规则都是从属于某个表的(见我以前关于 netfilter新表的文章)。一般在mangle表对数据进行修改,在nat表对数据进行NAT,在filter表进行过滤。所不同的是NAT表中的规则只对新包(NEW/RELATED)进行处理,而MANGLE和FILTER表中的规则对所有数据包都处理。 以下Linux内核代码版本为2.4.26。2. 数据结构每条规则是用结构struct ipt_entry来定义的:/* include/linux/netfilter_ipv4/ip_tables.h */struct ipt_entry{ struct ipt_ip ip; /* Mark with fields that we care about. */ unsigned int nfcache; /* Size of ipt_entry + matches */ u_int16_t target_offset; /* Size of ipt_entry + matches + target */ u_int16_t next_offset; /* Back pointer */ unsigned int comefrom; /* Packet and byte counters. */ struct ipt_counters counters; /* The matches (if any), then the target. */ unsigned char elems[0];};参数说明:struct ipt_ip ip:基本匹配项,包括协议、源地址/掩码、目的地址/掩码、进入网卡、出网卡等unsigned int nfcache:标志项u_int16_t target_offset:规则动作的偏移位置u_int16_t next_offset:下一个规则的偏移位置unsigned int comefrom:规则返回点struct ipt_counters counters:计数器unsigned char elems[0]:规则匹配项表,最后是动作项ipt_ip结构:struct ipt_ip { /* Source and destination IP addr */ struct in_addr src, dst; /* Mask for src and dest IP addr */ struct in_addr smsk, dmsk; char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; /* Protocol, 0 = ANY */ u_int16_t proto; /* Flags word */ u_int8_t flags; /* Inverse flags */ u_int8_t invflags;};规则中的匹配项结构,注意这不是描述匹配的结构struct ipt_matchstruct ipt_entry_match{ union {// 这是用户空间(iptables)用到的部分,只提供名称即可  struct {   u_int16_t match_size;   /* Used by userspace */   char name[IPT_FUNCTION_MAXNAMELEN];  } user;// 这是内核空间用到的部分,指向具体的匹配模块  struct {   u_int16_t match_size;   /* Used inside the kernel */   struct ipt_match *match;  } kernel;  /* Total length */  u_int16_t match_size; } u; unsigned char data[0];};规则中的目标(规则动作)项结构,注意这不是描述目标的结构struct ipt_targetstruct ipt_entry_target{ union {// 这是用户空间(iptables)用到的部分,只提供名称即可  struct {   u_int16_t target_size;   /* Used by userspace */   char name[IPT_FUNCTION_MAXNAMELEN];  } user;// 这是内核空间用到的部分,指向具体的目标模块  struct {   u_int16_t target_size;   /* Used inside the kernel */   struct ipt_target *target;  } kernel;  /* Total length */  u_int16_t target_size; } u; unsigned char data[0];};3. 规则集操作函数netfilter处理规则处理基本函数为ipt_do_table(),在filter/mangle表最终都要进入该函数,而nat表只对 NEW/RELATED的包进入该函数。该函数遍历所定义的规则集,顺次进行匹配,一旦和规则的条件匹配成功,则按规则指定的动作返回,返回值可能为 NF_ACCEPT/NF_DROP/NF_QUEUE/NF_STOLEN等。/* net/ipv4/netfilter/ip_tables.c */unsigned intipt_do_table(struct sk_buff **pskb,      unsigned int hook,      const struct net_device *in,      const struct net_device *out,      struct ipt_table *table,      void *userdata){ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))) = { 0 }; u_int16_t offset; struct iphdr *ip; void *protohdr; u_int16_t datalen; int hotdrop = 0; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; void *table_base; struct ipt_entry *e, *back; /* Initialization */ ip = (*pskb)->nh.iph; protohdr = (u_int32_t *)ip + ip->ihl; datalen = (*pskb)->len - ip->ihl * 4;// 如果数据包的进入网卡或出网卡为NULL,则在规则匹配时用nulldevname代替 indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as  * if it was a normal packet.  All other fragments are treated  * normally, except that they will NEVER match rules that ask  * things we don't know, ie. tcp syn flag or ports).  If the  * rule is also a fragment-specific rule, non-fragments won't  * match it. */ offset = ntohs(ip->frag_off) & IP_OFFSET; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook));// 找到规则集起点,每个表可在不同的挂接点定义规则集,但所有规则集都是统一// 在一个数值里的 table_base = (void *)table->private->entries  + TABLE_OFFSET(table->private,          cpu_number_map(smp_processor_id()));// 第一个规则 e = get_entry(table_base, table->private->hook_entry[hook]);#ifdef CONFIG_NETFILTER_DEBUG /* Check noone else using our table */ if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac     && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {  printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",         smp_processor_id(),         table->name,         &((struct ipt_entry *)table_base)->comefrom,         ((struct ipt_entry *)table_base)->comefrom); } ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;#endif// 规则集的最后一条规则,最后一条规则是链的缺省动作,不是全接收就是全部拒绝 /* For return from builtin chain */ back = get_entry(table_base, table->private->underflow[hook]);// 这是个死循环,因为最后一条规则是链的缺省动作,不是全接收就是全部拒绝// 是能够跳出的,除非发生意外 do {  IP_NF_ASSERT(e);  IP_NF_ASSERT(back);  (*pskb)->nfcache |= e->nfcache;// 进行基本元素(struct ipt_ip中定义的元素)的匹配,符合再进行后续匹配  if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {   struct ipt_entry_target *t;// 循环匹配规则中独立的匹配条件   if (IPT_MATCH_ITERATE(e, do_match,           *pskb, in, out,           offset, protohdr,           datalen, &hotdrop) != 0)    goto no_match;// 全部条件匹配,计数器增加   ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);// 获取规则目标   t = ipt_get_target(e);   IP_NF_ASSERT(t->u.kernel.target);   /* Standard target? */   if (!t->u.kernel.target->target) {    int v;// 标准目标,正常情况v值是小于0的,如ACCEPT实际对于-NF_ACCEPT-1,// DROP对应-NF_DROP-1,都是小于0的数    v = ((struct ipt_standard_target *)t)->verdict;    if (v < 0) {     /* Pop from stack? */     if (v != IPT_RETURN) {// verdict重新计算回正常值      verdict = (unsigned)(-v) - 1;      break;     }// 对于IPT_RETURN,返回原来的链重新继续循环     e = back;     back = get_entry(table_base,        back->comefrom);     continue;    }    if (table_base + v        != (void *)e + e->next_offset) {     /* Save old back ptr in next entry */     struct ipt_entry *next      = (void *)e + e->next_offset;     next->comefrom      = (void *)back - table_base;     /* set back pointer to next entry */     back = next;    }    e = get_entry(table_base, v);   } else {// 规则目标非标准目标,而是单独定义的目标模块    /* Targets which reenter must return                                   abs. verdicts */#ifdef CONFIG_NETFILTER_DEBUG    ((struct ipt_entry *)table_base)->comefrom     = 0xeeeeeeec;#endif// 调用目标模块的target()函数    verdict = t->u.kernel.target->target(pskb,             hook,             in, out,             t->data,             userdata);#ifdef CONFIG_NETFILTER_DEBUG    if (((struct ipt_entry *)table_base)->comefrom        != 0xeeeeeeec        && verdict == IPT_CONTINUE) {     printk("Target %s reentered!\n",            t->u.kernel.target->name);     verdict = NF_DROP;    }    ((struct ipt_entry *)table_base)->comefrom     = 0x57acc001;#endif// 目标有可能修改数据包的各种信息,数据包本身也可能不再是原来的包而是拷贝// 后的包,因此关于包的网络参数需要重新识别    /* Target might have changed stuff. */    ip = (*pskb)->nh.iph;    protohdr = (u_int32_t *)ip + ip->ihl;    datalen = (*pskb)->len - ip->ihl * 4;    if (verdict == IPT_CONTINUE)// 返回IPT_CONTINUE时继续下一条规则的检查// 注意不支持IPT_RETURN     e = (void *)e + e->next_offset;    else     /* Verdict */     break;   }  } else {// 规则不匹配,找下一条规则继续  no_match:   e = (void *)e + e->next_offset;  }// 匹配模块中有hotdrop参数,允许匹配模块丢包,而通常匹配模块是不丢包的 } while (!hotdrop);#ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;#endif read_unlock_bh(&table->lock);#ifdef DEBUG_ALLOW_ALL return NF_ACCEPT;#else if (hotdrop)  return NF_DROP; else return verdict;#endif}4. 规则的修改netfilter本质上是以数组方法保存规则集的,虽然每条规则的大小可能是不同的,因此在编辑规则时实际上操作比较麻烦的,对于 iptables的各种编辑规则的命令,实际上都是替换操作:IPT_SO_SET_REPLACE,对应的处理函数为do_replace()。/* net/ipv4/netfilter/ip_tables.c */static intdo_replace(void *user, unsigned int len){ int ret; struct ipt_replace tmp; struct ipt_table *t; struct ipt_table_info *newinfo, *oldinfo; struct ipt_counters *counters;// 先从用户空间拷贝规则集的描述信息,由结构struct ipt_replace描述 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)  return -EFAULT;// 长度检查 /* Hack: Causes ipchains to give correct error msg --RR */ if (len != sizeof(tmp) + tmp.size)  return -ENOPROTOOPT; /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)  return -ENOMEM;// 分配实际的规则集内存空间,每个CPU一个 newinfo = vmalloc(sizeof(struct ipt_table_info)     + SMP_ALIGN(tmp.size) * smp_num_cpus); if (!newinfo)  return -ENOMEM; if (copy_from_user(newinfo->entries, user + sizeof(tmp),      tmp.size) != 0) {  ret = -EFAULT;  goto free_newinfo; }// 分配老规则集的计数器空间准备返回给用户空间 counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters)); if (!counters) {  ret = -ENOMEM;  goto free_newinfo; } memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));// 转换规则,检查规则的合法性等操作 ret = translate_table(tmp.name, tmp.valid_hooks,         newinfo, tmp.size, tmp.num_entries,         tmp.hook_entry, tmp.underflow); if (ret != 0)  goto free_newinfo_counters; duprintf("ip_tables: Translated table\n");// 找到相应的ipt_table表 t = find_table_lock(tmp.name, &ret, &ipt_mutex); if (!t)  goto free_newinfo_counters_untrans; /* You lied! */ if (tmp.valid_hooks != t->valid_hooks) {  duprintf("Valid hook crap: %08X vs %08X\n",    tmp.valid_hooks, t->valid_hooks);  ret = -EINVAL;  goto free_newinfo_counters_untrans_unlock; }// 将新的规则集替换原来的规则集 oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret); if (!oldinfo)  goto free_newinfo_counters_untrans_unlock; /* Update module usage count based on number of rules */ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",  oldinfo->number, oldinfo->initial_entries, newinfo->number); if (t->me && (oldinfo->number <= oldinfo->initial_entries) &&      (newinfo->number > oldinfo->initial_entries))  __MOD_INC_USE_COUNT(t->me); else if (t->me && (oldinfo->number > oldinfo->initial_entries) &&    (newinfo->number <= oldinfo->initial_entries))  __MOD_DEC_USE_COUNT(t->me); /* Get the old counters. */// 读取老规则集的计数器 get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */// 遍历清除老规则集,调用规则中匹配和目标模块的destroy()函数 IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);// 释放老规则集 vfree(oldinfo); /* Silent error: too late now. */// 将计数器拷贝回用户空间 copy_to_user(tmp.counters, counters,       sizeof(struct ipt_counters) * tmp.num_counters);// 将老计数器释放 vfree(counters); up(&ipt_mutex); return 0; free_newinfo_counters_untrans_unlock: up(&ipt_mutex); free_newinfo_counters_untrans: IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL); free_newinfo_counters: vfree(counters); free_newinfo: vfree(newinfo); return ret;}处理过程中比较重要的连接函数为translate_table()和replace_table(),也都在ip_tables.c中定义:static inttranslate_table(const char *name,  unsigned int valid_hooks,  struct ipt_table_info *newinfo,  unsigned int size,  unsigned int number,  const unsigned int *hook_entries,  const unsigned int *underflows){ unsigned int i; int ret; newinfo->size = size; newinfo->number = number; /* Init all hooks to impossible value. */ for (i = 0; i < NF_IP_NUMHOOKS; i++) {  newinfo->hook_entry[i] = 0xFFFFFFFF;  newinfo->underflow[i] = 0xFFFFFFFF; } duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */// 检查规则集中规则的合法性,检查偏移是否正确 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,    check_entry_size_and_hooks,    newinfo,    newinfo->entries,    newinfo->entries + size,    hook_entries, underflows, &i); if (ret != 0)  return ret; if (i != number) {  duprintf("translate_table: %u not %u entries\n",    i, number);  return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_IP_NUMHOOKS; i++) {  /* Only hooks which are valid */  if (!(valid_hooks & (1 << i)))   continue;// 检查是否在合法hook点没有设置规则,在每个合法hook点是必须有规则的  if (newinfo->hook_entry[i] == 0xFFFFFFFF) {   duprintf("Invalid hook entry %u %u\n",     i, hook_entries[i]);   return -EINVAL;  }  if (newinfo->underflow[i] == 0xFFFFFFFF) {   duprintf("Invalid underflow %u %u\n",     i, underflows[i]);   return -EINVAL;  } }// 检查自定义的链是否形成环 if (!mark_source_chains(newinfo, valid_hooks))  return -ELOOP; /* Finally, each sanity check must pass */ i = 0;// 遍历规则,通过调用匹配和目标的checkentry()函数检查其合法性 ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,    check_entry, name, size, &i); if (ret != 0) {  IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,      cleanup_entry, &i);  return ret; }// 规则集是每个CPU都有一个 /* And one copy for every other CPU */ for (i = 1; i < smp_num_cpus; i++) {  memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,         newinfo->entries,         SMP_ALIGN(newinfo->size)); } return ret;} static struct ipt_table_info *replace_table(struct ipt_table *table,       unsigned int num_counters,       struct ipt_table_info *newinfo,       int *error){ struct ipt_table_info *oldinfo;#ifdef CONFIG_NETFILTER_DEBUG {  struct ipt_entry *table_base;  unsigned int i;  for (i = 0; i < smp_num_cpus; i++) {   table_base =    (void *)newinfo->entries    + TABLE_OFFSET(newinfo, i);   table_base->comefrom = 0xdead57ac;  } }#endif /* Do the substitution. */ write_lock_bh(&table->lock); /* Check inside lock: is the old number correct? */ if (num_counters != table->private->number) {  duprintf("num_counters != table->private->number (%u/%u)\n",    num_counters, table->private->number);  write_unlock_bh(&table->lock);  *error = -EAGAIN;  return NULL; }// struct ipt_table结构中的private指向规则集// 获取老规则集地址指针 oldinfo = table->private;// 指向新规则集 table->private = newinfo; newinfo->initial_entries = oldinfo->initial_entries; write_unlock_bh(&table->lock); return oldinfo;} 5. 结论netfilter的规则是数组方式顺序保存,但每个元素(规则)的大小是不同的,每条规则除了基本部分相同外,还包括不同数量的匹配和目标项。规则匹配是顺序匹配,而编辑时实际上是将整个规则集全部替换。

读书人网 >互联网

热点推荐