diff --git a/libip6t_FULLCONENAT.c b/libip6t_FULLCONENAT.c new file mode 100644 index 0000000..0cdc1a5 --- /dev/null +++ b/libip6t_FULLCONENAT.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include +#include +#include +#include /* INT_MAX in ip_tables.h */ +#include +#include + +#ifndef NF_NAT_RANGE_PROTO_RANDOM_FULLY +#define NF_NAT_RANGE_PROTO_RANDOM_FULLY (1 << 4) +#endif + +enum { + O_TO_PORTS = 0, + O_RANDOM, + O_RANDOM_FULLY, + O_TO_SRC, + O_PERSISTENT, +}; + +static void FULLCONENAT_help(void) +{ + printf( +"FULLCONENAT target options:\n" +" --to-source [[-]] [--persistent]\n" +" Address to map source to.\n" +" --to-ports [-]\n" +" Port (range) to map to.\n" +" --random\n" +" Randomize source port.\n" +" --random-fully\n" +" Fully randomize source port.\n"); +} + +static const struct xt_option_entry FULLCONENAT_opts[] = { + {.name = "to-ports", .id = O_TO_PORTS, .type = XTTYPE_STRING}, + {.name = "random", .id = O_RANDOM, .type = XTTYPE_NONE}, + {.name = "random-fully", .id = O_RANDOM_FULLY, .type = XTTYPE_NONE}, + {.name = "to-source", .id = O_TO_SRC, .type = XTTYPE_STRING}, + {.name = "persistent", .id = O_PERSISTENT, .type = XTTYPE_NONE}, + XTOPT_TABLEEND, +}; + +static void parse_to(const char *orig_arg, struct nf_nat_range *r) +{ + char *arg, *dash, *error; + const struct in6_addr *ip; + + arg = strdup(orig_arg); + if (arg == NULL) + xtables_error(RESOURCE_PROBLEM, "strdup"); + + r->flags |= NF_NAT_RANGE_MAP_IPS; + dash = strchr(arg, '-'); + + if (dash) + *dash = '\0'; + + ip = xtables_numeric_to_ip6addr(arg); + if (!ip) + xtables_error(PARAMETER_PROBLEM, "Bad IP address \"%s\"\n", + arg); + r->min_addr.in6 = *ip; + if (dash) { + ip = xtables_numeric_to_ip6addr(dash+1); + if (!ip) + xtables_error(PARAMETER_PROBLEM, "Bad IP address \"%s\"\n", + dash+1); + r->max_addr.in6 = *ip; + } else + r->max_addr = r->min_addr; + + free(arg); +} + +/* Parses ports */ +static void +parse_ports(const char *arg, struct nf_nat_range *r) +{ + char *end; + unsigned int port, maxport; + + r->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + + if (!xtables_strtoui(arg, &end, &port, 0, UINT16_MAX)) + xtables_param_act(XTF_BAD_VALUE, "FULLCONENAT", "--to-ports", arg); + + switch (*end) { + case '\0': + r->min_proto.tcp.port + = r->max_proto.tcp.port + = htons(port); + return; + case '-': + if (!xtables_strtoui(end + 1, NULL, &maxport, 0, UINT16_MAX)) + break; + + if (maxport < port) + break; + + r->min_proto.tcp.port = htons(port); + r->max_proto.tcp.port = htons(maxport); + return; + default: + break; + } + xtables_param_act(XTF_BAD_VALUE, "FULLCONENAT", "--to-ports", arg); +} + +static void FULLCONENAT_parse(struct xt_option_call *cb) +{ + const struct ip6t_entry *entry = cb->xt_entry; + struct nf_nat_range *r = cb->data; + int portok; + + if (entry->ipv6.proto == IPPROTO_TCP + || entry->ipv6.proto == IPPROTO_UDP + || entry->ipv6.proto == IPPROTO_SCTP + || entry->ipv6.proto == IPPROTO_DCCP + || entry->ipv6.proto == IPPROTO_ICMP) + portok = 1; + else + portok = 0; + + xtables_option_parse(cb); + switch (cb->entry->id) { + case O_TO_PORTS: + if (!portok) + xtables_error(PARAMETER_PROBLEM, + "Need TCP, UDP, SCTP or DCCP with port specification"); + parse_ports(cb->arg, r); + break; + case O_TO_SRC: + parse_to(cb->arg, r); + break; + case O_RANDOM_FULLY: + r->flags |= NF_NAT_RANGE_PROTO_RANDOM_FULLY; + break; + case O_RANDOM: + r->flags |= NF_NAT_RANGE_PROTO_RANDOM; + break; + case O_PERSISTENT: + r->flags |= NF_NAT_RANGE_PERSISTENT; + break; + } +} + +static void +FULLCONENAT_print(const void *ip, const struct xt_entry_target *target, + int numeric) +{ + const struct nf_nat_range *r = (const void *)target->data; + + if (r->flags & NF_NAT_RANGE_MAP_IPS) { + printf(" to:%s", xtables_ip6addr_to_numeric(&r->min_addr.in6)); + if (memcmp(&r->min_addr, &r->max_addr, sizeof(r->min_addr))) + printf("-%s", xtables_ip6addr_to_numeric(&r->max_addr.in6)); + } + + if (r->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + printf(" masq ports: "); + printf("%hu", ntohs(r->min_proto.tcp.port)); + if (r->max_proto.tcp.port != r->min_proto.tcp.port) + printf("-%hu", ntohs(r->max_proto.tcp.port)); + if (r->flags & NF_NAT_RANGE_PERSISTENT) + printf(" persistent"); + } + + if (r->flags & NF_NAT_RANGE_PROTO_RANDOM) + printf(" random"); + + if (r->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) + printf(" random-fully"); +} + +static void +FULLCONENAT_save(const void *ip, const struct xt_entry_target *target) +{ + const struct nf_nat_range *r = (const void *)target->data; + + if (r->flags & NF_NAT_RANGE_MAP_IPS) { + printf(" --to-source %s", xtables_ip6addr_to_numeric(&r->min_addr.in6)); + if (memcmp(&r->min_addr, &r->max_addr, sizeof(r->min_addr))) + printf("-%s", xtables_ip6addr_to_numeric(&r->max_addr.in6)); + if (r->flags & NF_NAT_RANGE_PERSISTENT) + printf(" --persistent"); + } + + if (r->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + printf(" --to-ports %hu", ntohs(r->min_proto.tcp.port)); + if (r->max_proto.tcp.port != r->min_proto.tcp.port) + printf("-%hu", ntohs(r->max_proto.tcp.port)); + } + + if (r->flags & NF_NAT_RANGE_PROTO_RANDOM) + printf(" --random"); + + if (r->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) + printf(" --random-fully"); +} + +static struct xtables_target fullconenat_tg_reg = { + .name = "FULLCONENAT", + .version = XTABLES_VERSION, + .family = NFPROTO_IPV6, + .size = XT_ALIGN(sizeof(struct nf_nat_range)), + .userspacesize = XT_ALIGN(sizeof(struct nf_nat_range)), + .help = FULLCONENAT_help, + .x6_parse = FULLCONENAT_parse, + .print = FULLCONENAT_print, + .save = FULLCONENAT_save, + .x6_options = FULLCONENAT_opts, +}; + +void _init(void) +{ + xtables_register_target(&fullconenat_tg_reg); +} diff --git a/xt_FULLCONENAT.c b/xt_FULLCONENAT.c index 8555b54..6f68aa9 100644 --- a/xt_FULLCONENAT.c +++ b/xt_FULLCONENAT.c @@ -20,6 +20,11 @@ #endif #include #include +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) +#include +#include +#include +#endif #include #include #include @@ -43,11 +48,11 @@ static inline int nf_ct_netns_get(struct net *net, u8 nfproto) { return 0; } static inline void nf_ct_netns_put(struct net *net, u8 nfproto) {} static inline struct net_device *xt_in(const struct xt_action_param *par) { - return par->in; + return (struct net_device *)par->in; } static inline struct net_device *xt_out(const struct xt_action_param *par) { - return par->out; + return (struct net_device *)par->out; } static inline unsigned int xt_hooknum(const struct xt_action_param *par) { @@ -79,6 +84,25 @@ struct nat_mapping { }; +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) +struct nat_mapping6 { + uint16_t port; /* external source port */ + union nf_inet_addr addr; /* external source ip address */ + + union nf_inet_addr int_addr; /* internal source ip address */ + uint16_t int_port; /* internal source port */ + + int refer_count; /* how many references linked to this mapping + * aka. length of original_tuple_list */ + + struct list_head original_tuple_list; + + struct hlist_node node_by_ext_port; + struct hlist_node node_by_int_src; + +}; +#endif + struct tuple_list { struct nf_conntrack_tuple tuple_original; struct nf_conntrack_tuple tuple_reply; @@ -100,6 +124,13 @@ static DEFINE_HASHTABLE(mapping_table_by_int_src, HASHTABLE_BUCKET_BITS); static DEFINE_SPINLOCK(fullconenat_lock); +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) +static DEFINE_HASHTABLE(mapping6_table_by_ext_port, HASHTABLE_BUCKET_BITS); +static DEFINE_HASHTABLE(mapping6_table_by_int_src, HASHTABLE_BUCKET_BITS); + +static DEFINE_SPINLOCK(fullconenat6_lock); +#endif + static LIST_HEAD(dying_tuple_list); static DEFINE_SPINLOCK(dying_tuple_list_lock); static void gc_worker(struct work_struct *work); @@ -107,6 +138,435 @@ static struct workqueue_struct *wq __read_mostly = NULL; static DECLARE_DELAYED_WORK(gc_worker_wk, gc_worker); static char tuple_tmp_string[512]; + +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) +/* non-atomic: can only be called serially within lock zones. */ +static char* nf_ct_stringify_tuple6(const struct nf_conntrack_tuple *t) { + snprintf(tuple_tmp_string, sizeof(tuple_tmp_string), "[%pI6c]:%hu -> [%pI6c]:%hu", + &t->src.u3.ip6, be16_to_cpu(t->src.u.all), + &t->dst.u3.ip6, be16_to_cpu(t->dst.u.all)); + return tuple_tmp_string; +} + +static struct nat_mapping6* allocate_mapping6(const union nf_inet_addr *int_addr, const uint16_t int_port, const uint16_t port, const union nf_inet_addr *addr) { + struct nat_mapping6 *p_new; + u32 hash_src; + + p_new = kmalloc(sizeof(struct nat_mapping6), GFP_ATOMIC); + if (p_new == NULL) { + pr_debug("xt_FULLCONENAT: ERROR: kmalloc() for new nat_mapping failed.\n"); + return NULL; + } + p_new->addr = *addr; + p_new->port = port; + p_new->int_addr = *int_addr; + p_new->int_port = int_port; + p_new->refer_count = 0; + (p_new->original_tuple_list).next = &(p_new->original_tuple_list); + (p_new->original_tuple_list).prev = &(p_new->original_tuple_list); + + hash_src = jhash2((u32 *)int_addr->all, 4, (u32)int_port); + + hash_add(mapping6_table_by_ext_port, &p_new->node_by_ext_port, port); + hash_add(mapping6_table_by_int_src, &p_new->node_by_int_src, hash_src); + + pr_debug("xt_FULLCONENAT: new mapping allocated for [%pI6c]:%d ==> [%pI6c]:%d\n", + &p_new->int_addr, p_new->int_port, &p_new->addr, p_new->port); + + return p_new; +} + +static void add_original_tuple_to_mapping6(struct nat_mapping6 *mapping, const struct nf_conntrack_tuple* original_tuple) { + struct nat_mapping_original_tuple *item = kmalloc(sizeof(struct nat_mapping_original_tuple), GFP_ATOMIC); + if (item == NULL) { + pr_debug("xt_FULLCONENAT: ERROR: kmalloc() for nat_mapping_original_tuple failed.\n"); + return; + } + memcpy(&item->tuple, original_tuple, sizeof(struct nf_conntrack_tuple)); + list_add(&item->node, &mapping->original_tuple_list); + (mapping->refer_count)++; +} + +static struct nat_mapping6* get_mapping6_by_int_src(const union nf_inet_addr *src_ip, const uint16_t src_port, const union nf_inet_addr *ext_ip) { + struct nat_mapping6 *p_current; + u32 hash_src = jhash2((u32 *)src_ip->all, 4, (u32)src_port); + + hash_for_each_possible(mapping6_table_by_int_src, p_current, node_by_int_src, hash_src) { + if (nf_inet_addr_cmp(&p_current->int_addr, src_ip) && p_current->int_port == src_port && nf_inet_addr_cmp(&p_current->addr, ext_ip)) { + return p_current; + } + } + + return NULL; +} + +static struct nat_mapping6* get_mapping6_by_int_src_inrange(const union nf_inet_addr *src_ip, const uint16_t src_port, const union nf_inet_addr *min_ip, const union nf_inet_addr *max_ip) { + struct nat_mapping6 *p_current; + u32 hash_src = jhash2((u32 *)src_ip->all, 4, (u32)src_port); + + hash_for_each_possible(mapping6_table_by_int_src, p_current, node_by_int_src, hash_src) { + if (nf_inet_addr_cmp(&p_current->int_addr, src_ip) && p_current->int_port == src_port && memcmp(&p_current->addr, min_ip, sizeof(union nf_inet_addr)) >= 0 && memcmp(&p_current->addr, max_ip, sizeof(union nf_inet_addr)) <= 0) { + return p_current; + } + } + + return NULL; +} + +static void kill_mapping6(struct nat_mapping6 *mapping) { + struct list_head *iter, *tmp; + struct nat_mapping_original_tuple *original_tuple_item; + + if (mapping == NULL) { + return; + } + + list_for_each_safe(iter, tmp, &mapping->original_tuple_list) { + original_tuple_item = list_entry(iter, struct nat_mapping_original_tuple, node); + list_del(&original_tuple_item->node); + kfree(original_tuple_item); + } + + hash_del(&mapping->node_by_ext_port); + hash_del(&mapping->node_by_int_src); + kfree(mapping); +} + +/* check if a mapping is valid. + * possibly delete and free an invalid mapping. + * the mapping should not be used anymore after check_mapping6() returns 0. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +static int check_mapping6(struct nat_mapping6* mapping, struct net *net, const struct nf_conntrack_zone *zone) { +#else +static int check_mapping6(struct nat_mapping6* mapping, struct net *net, const u16 zone) { +#endif + struct list_head *iter, *tmp; + struct nat_mapping_original_tuple *original_tuple_item; + struct nf_conntrack_tuple_hash *tuple_hash; + struct nf_conn *ct; + + /* for dying/unconfirmed conntrack tuples, an IPCT_DESTROY event may NOT be fired. + * so we manually kill one of those tuples once we acquire one. */ + + list_for_each_safe(iter, tmp, &mapping->original_tuple_list) { + original_tuple_item = list_entry(iter, struct nat_mapping_original_tuple, node); + + tuple_hash = nf_conntrack_find_get(net, zone, &original_tuple_item->tuple); + + if (tuple_hash == NULL) { + pr_debug("xt_FULLCONENAT: check_mapping6(): tuple %s dying/unconfirmed. free this tuple.\n", nf_ct_stringify_tuple6(&original_tuple_item->tuple)); + + list_del(&original_tuple_item->node); + kfree(original_tuple_item); + (mapping->refer_count)--; + } else { + ct = nf_ct_tuplehash_to_ctrack(tuple_hash); + if (likely(ct != NULL)) + nf_ct_put(ct); + } + + } + + /* kill the mapping if need */ + pr_debug("xt_FULLCONENAT: check_mapping6() refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); + if (mapping->refer_count <= 0) { + pr_debug("xt_FULLCONENAT: check_mapping6(): kill dying/unconfirmed mapping at ext port %d\n", mapping->port); + kill_mapping6(mapping); + return 0; + } else { + return 1; + } +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +static struct nat_mapping6* get_mapping6_by_ext_port(const uint16_t port, const union nf_inet_addr *ext_ip, struct net *net, const struct nf_conntrack_zone *zone) { +#else +static struct nat_mapping6* get_mapping6_by_ext_port(const uint16_t port, const union nf_inet_addr *ext_ip, struct net *net, const u16 zone) { +#endif + struct nat_mapping6 *p_current; + struct hlist_node *tmp; + + hash_for_each_possible_safe(mapping6_table_by_ext_port, p_current, tmp, node_by_ext_port, port) { + if (p_current->port == port && check_mapping6(p_current, net, zone) && nf_inet_addr_cmp(&p_current->addr, ext_ip)) { + return p_current; + } + } + + return NULL; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +static uint16_t find_appropriate_port6(struct net *net, const struct nf_conntrack_zone *zone, const uint16_t original_port, const union nf_inet_addr *ext_ip, const struct nf_nat_range *range) { +#else +static uint16_t find_appropriate_port6(struct net *net, const u16 zone, const uint16_t original_port, const union nf_inet_addr *ext_ip, const struct nf_nat_range *range) { +#endif + uint16_t min, start, selected, range_size, i; + struct nat_mapping6* mapping = NULL; + + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { + min = be16_to_cpu((range->min_proto).udp.port); + range_size = be16_to_cpu((range->max_proto).udp.port) - min + 1; + } else { + /* minimum port is 1024. same behavior as default linux NAT. */ + min = 1024; + range_size = 65535 - min + 1; + } + + if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM) + || (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY)) { + /* for now we do the same thing for both --random and --random-fully */ + + /* select a random starting point */ + start = (uint16_t)(prandom_u32() % (u32)range_size); + } else { + + if ((original_port >= min && original_port <= min + range_size - 1) + || !(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + /* 1. try to preserve the port if it's available */ + mapping = get_mapping6_by_ext_port(original_port, ext_ip, net, zone); + if (mapping == NULL) { + return original_port; + } + } + + /* otherwise, we start from zero */ + start = 0; + } + + for (i = 0; i < range_size; i++) { + /* 2. try to find an available port */ + selected = min + ((start + i) % range_size); + mapping = get_mapping6_by_ext_port(selected, ext_ip, net, zone); + if (mapping == NULL) { + return selected; + } + } + + /* 3. at least we tried. override a previous mapping. */ + selected = min + start; + mapping = get_mapping6_by_ext_port(selected, ext_ip, net, zone); + kill_mapping6(mapping); + + return selected; +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) +static void find_leastused_ip6(const struct nf_conntrack_zone *zone, const struct nf_nat_range *range, const union nf_inet_addr *src, const union nf_inet_addr *dst, union nf_inet_addr *var_ipp) +#else +static void find_leastused_ip6(const u16 zone, const struct nf_nat_range *range, const union nf_inet_addr *src, const union nf_inet_addr *dst, union nf_inet_addr *var_ipp) +#endif +{ + unsigned int i; + /* Host order */ + u32 minip, maxip, j, dist; + bool full_range; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) + j = jhash2((u32 *)src, 4, range->flags & NF_NAT_RANGE_PERSISTENT ? 0 : dst->all[3] ^ zone->id); +#else + j = jhash2((u32 *)src, 4, range->flags & NF_NAT_RANGE_PERSISTENT ? 0 : dst->all[3] ^ zone); +#endif + + full_range = false; + for (i = 0; i <= 3; i++) { + /* If first bytes of the address are at the maximum, use the + * distance. Otherwise use the full range. */ + if (!full_range) { + minip = ntohl(range->min_addr.all[i]); + maxip = ntohl(range->max_addr.all[i]); + dist = maxip - minip + 1; + } else { + minip = 0; + dist = ~0; + } + + var_ipp->all[i] = (__force __be32) htonl(minip + reciprocal_scale(j, dist)); + if (var_ipp->all[i] != range->max_addr.all[i]) + full_range = true; + + if (!(range->flags & NF_NAT_RANGE_PERSISTENT)) + j ^= (__force u32)dst->all[i]; + } +} + +static unsigned int fullconenat_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) + const struct nf_conntrack_zone *zone; +#else + u16 zone; +#endif + struct net *net; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + struct nf_conntrack_tuple *ct_tuple, *ct_tuple_origin; + + struct nat_mapping6 *mapping, *src_mapping; + unsigned int ret; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0) + struct nf_nat_range2 newrange; +#else + struct nf_nat_range newrange; +#endif + + union nf_inet_addr *ip; + uint16_t port, original_port, want_port; + uint8_t protonum; + + ip = NULL; + original_port = 0; + src_mapping = NULL; + + range = par->targinfo; + + mapping = NULL; + ret = XT_CONTINUE; + + ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(ct); + zone = nf_ct_zone(ct); + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + if (xt_hooknum(par) == NF_INET_PRE_ROUTING) { + /* inbound packets */ + ct_tuple_origin = &(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + protonum = (ct_tuple_origin->dst).protonum; + if (protonum != IPPROTO_UDP) { + return ret; + } + ip = &(ct_tuple_origin->dst).u3; + port = be16_to_cpu((ct_tuple_origin->dst).u.udp.port); + + spin_lock_bh(&fullconenat6_lock); + + /* find an active mapping based on the inbound port */ + mapping = get_mapping6_by_ext_port(port, ip, net, zone); + if (mapping != NULL) { + newrange.flags = NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED; + newrange.min_addr = mapping->int_addr; + newrange.max_addr = mapping->int_addr; + newrange.min_proto.udp.port = cpu_to_be16(mapping->int_port); + newrange.max_proto = newrange.min_proto; + + pr_debug("xt_FULLCONENAT: %s ==> [%pI6c]:%d\n", nf_ct_stringify_tuple6(ct_tuple_origin), &mapping->int_addr, mapping->int_port); + + ret = nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); + + if (ret == NF_ACCEPT) { + add_original_tuple_to_mapping6(mapping, ct_tuple_origin); + pr_debug("xt_FULLCONENAT: fullconenat_tg6(): INBOUND: refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); + } + } + spin_unlock_bh(&fullconenat6_lock); + return ret; + + } else if (xt_hooknum(par) == NF_INET_POST_ROUTING) { + /* outbound packets */ + ct_tuple_origin = &(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + protonum = (ct_tuple_origin->dst).protonum; + + if(range->flags & NF_NAT_RANGE_MAP_IPS) { + newrange.min_addr = range->min_addr; + newrange.max_addr = range->max_addr; + } else { + if (unlikely(ipv6_dev_get_saddr(nf_ct_net(ct), xt_out(par), &ipv6_hdr(skb)->daddr, 0, (struct in6_addr*)&newrange.min_addr) < 0)) + return NF_DROP; + newrange.max_addr = newrange.min_addr; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) + nat = nf_ct_nat_ext_add(ct); +#else + nat = nfct_nat(ct); +#endif + if (likely(nat)) + nat->masq_index = xt_out(par)->ifindex; + + } + + spin_lock_bh(&fullconenat6_lock); + + if (protonum == IPPROTO_UDP) { + ip = &(ct_tuple_origin->src).u3; + original_port = be16_to_cpu((ct_tuple_origin->src).u.udp.port); + + if (!nf_inet_addr_cmp(&newrange.min_addr, &newrange.max_addr)) + src_mapping = get_mapping6_by_int_src_inrange(ip, original_port, &newrange.min_addr, &newrange.max_addr); + else + src_mapping = get_mapping6_by_int_src(ip, original_port, &newrange.min_addr); + + if (src_mapping != NULL && check_mapping6(src_mapping, net, zone)) { + + /* outbound nat: if a previously established mapping is active, + * we will reuse that mapping. */ + + newrange.flags = NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED; + newrange.min_proto.udp.port = cpu_to_be16(src_mapping->port); + newrange.max_proto = newrange.min_proto; + if (!nf_inet_addr_cmp(&newrange.min_addr, &newrange.max_addr)) { + newrange.min_addr = src_mapping->addr; + newrange.max_addr = newrange.min_addr; + } + + } else { + /* if not, we find a new external IP:port to map to. + * the SNAT may fail so we should re-check the mapped port later. */ + if (!nf_inet_addr_cmp(&newrange.min_addr, &newrange.max_addr)) { + find_leastused_ip6(zone, range, ip, &(ct_tuple_origin->dst).u3, &newrange.min_addr); + newrange.max_addr = newrange.min_addr; + } + + want_port = find_appropriate_port6(net, zone, original_port, &newrange.min_addr, range); + + newrange.flags = NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED; + newrange.min_proto.udp.port = cpu_to_be16(want_port); + newrange.max_proto = newrange.min_proto; + + src_mapping = NULL; + + } + } + + /* do SNAT now */ + ret = nf_nat_setup_info(ct, &newrange, HOOK2MANIP(xt_hooknum(par))); + + if (protonum != IPPROTO_UDP || ret != NF_ACCEPT) { + /* for non-UDP packets and failed SNAT, bailout */ + spin_unlock_bh(&fullconenat6_lock); + return ret; + } + + /* the reply tuple contains the mapped port. */ + ct_tuple = &(ct->tuplehash[IP_CT_DIR_REPLY].tuple); + /* this is the resulted mapped port. */ + port = be16_to_cpu((ct_tuple->dst).u.udp.port); + + pr_debug("xt_FULLCONENAT: %s ==> %d\n", nf_ct_stringify_tuple6(ct_tuple_origin), port); + + /* save the mapping information into our mapping table */ + mapping = src_mapping; + if (mapping == NULL) { + mapping = allocate_mapping6(ip, original_port, port, &(ct_tuple->dst).u3); + } + if (likely(mapping != NULL)) { + add_original_tuple_to_mapping6(mapping, ct_tuple_origin); + pr_debug("xt_FULLCONENAT: fullconenat_tg6(): OUTBOUND: refer_count for mapping at ext_port %d is now %d\n", mapping->port, mapping->refer_count); + } + + spin_unlock_bh(&fullconenat6_lock); + return ret; + } + + return ret; +} +#endif + /* non-atomic: can only be called serially within lock zones. */ static char* nf_ct_stringify_tuple(const struct nf_conntrack_tuple *t) { snprintf(tuple_tmp_string, sizeof(tuple_tmp_string), "%pI4:%hu -> %pI4:%hu", @@ -200,6 +660,9 @@ static void kill_mapping(struct nat_mapping *mapping) { static void destroy_mappings(void) { struct nat_mapping *p_current; +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) + struct nat_mapping6 *p6_current; +#endif struct hlist_node *tmp; int i; @@ -210,12 +673,26 @@ static void destroy_mappings(void) { } spin_unlock_bh(&fullconenat_lock); + +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) + spin_lock_bh(&fullconenat6_lock); + + hash_for_each_safe(mapping6_table_by_ext_port, i, tmp, p6_current, node_by_ext_port) { + kill_mapping6(p6_current); + } + + spin_unlock_bh(&fullconenat6_lock); +#endif } /* check if a mapping is valid. * possibly delete and free an invalid mapping. * the mapping should not be used anymore after check_mapping() returns 0. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) static int check_mapping(struct nat_mapping* mapping, struct net *net, const struct nf_conntrack_zone *zone) { +#else +static int check_mapping(struct nat_mapping* mapping, struct net *net, const u16 zone) { +#endif struct list_head *iter, *tmp; struct nat_mapping_original_tuple *original_tuple_item; struct nf_conntrack_tuple_hash *tuple_hash; @@ -270,6 +747,11 @@ static void handle_dying_tuples(void) { __be32 ip; uint16_t port; struct nat_mapping_original_tuple *original_tuple_item; +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) + struct nat_mapping6 *mapping6; + union nf_inet_addr *ip6, *ext_ip6; + spin_lock_bh(&fullconenat6_lock); +#endif spin_lock_bh(&fullconenat_lock); spin_lock_bh(&dying_tuple_list_lock); @@ -278,7 +760,57 @@ static void handle_dying_tuples(void) { item = list_entry(iter, struct tuple_list, list); /* we dont know the conntrack direction for now so we try in both ways. */ - ct_tuple = &(item->tuple_original); + ct_tuple = &(item->tuple_original); +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) + if (ct_tuple->src.l3num == PF_INET6) { + ip6 = &(ct_tuple->src).u3; + port = be16_to_cpu((ct_tuple->src).u.udp.port); + ext_ip6 = &item->tuple_reply.dst.u3; + mapping6 = get_mapping6_by_int_src(ip6, port, ext_ip6); + if (mapping6 == NULL) { + ext_ip6 = &(ct_tuple->dst).u3; + ct_tuple = &(item->tuple_reply); + ip6 = &(ct_tuple->src).u3; + port = be16_to_cpu((ct_tuple->src).u.udp.port); + mapping6 = get_mapping6_by_int_src(ip6, port, ext_ip6); + if (mapping6 != NULL) { + pr_debug("xt_FULLCONENAT: handle_dying_tuples(): INBOUND dying conntrack at ext port %d\n", mapping6->port); + } + } else { + pr_debug("xt_FULLCONENAT: handle_dying_tuples(): OUTBOUND dying conntrack at ext port %d\n", mapping6->port); + } + + if (mapping6 == NULL) { + goto next; + } + + /* look for the corresponding out-dated tuple and free it */ + list_for_each_safe(iter_2, tmp_2, &mapping6->original_tuple_list) { + original_tuple_item = list_entry(iter_2, struct nat_mapping_original_tuple, node); + + if (nf_ct_tuple_equal(&original_tuple_item->tuple, &(item->tuple_original))) { + pr_debug("xt_FULLCONENAT: handle_dying_tuples(): tuple %s expired. free this tuple.\n", + nf_ct_stringify_tuple6(&original_tuple_item->tuple)); + list_del(&original_tuple_item->node); + kfree(original_tuple_item); + (mapping6->refer_count)--; + } + } + + /* then kill the mapping if needed*/ + pr_debug("xt_FULLCONENAT: handle_dying_tuples(): refer_count for mapping at ext_port %d is now %d\n", mapping6->port, mapping6->refer_count); + if (mapping6->refer_count <= 0) { + pr_debug("xt_FULLCONENAT: handle_dying_tuples(): kill expired mapping at ext port %d\n", mapping6->port); + kill_mapping6(mapping6); + } + goto next; + } + if (unlikely(ct_tuple->src.l3num != PF_INET)) +#else + if (ct_tuple->src.l3num != PF_INET) +#endif + goto next; + ip = (ct_tuple->src).u3.ip; port = be16_to_cpu((ct_tuple->src).u.udp.port); mapping = get_mapping_by_int_src(ip, port); @@ -325,6 +857,9 @@ static void handle_dying_tuples(void) { spin_unlock_bh(&dying_tuple_list_lock); spin_unlock_bh(&fullconenat_lock); +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) + spin_unlock_bh(&fullconenat6_lock); +#endif } static void gc_worker(struct work_struct *work) { @@ -406,7 +941,11 @@ static __be32 get_device_ip(const struct net_device* dev) { } } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) static uint16_t find_appropriate_port(struct net *net, const struct nf_conntrack_zone *zone, const uint16_t original_port, const int ifindex, const struct nf_nat_ipv4_range *range) { +#else +static uint16_t find_appropriate_port(struct net *net, const u16 zone, const uint16_t original_port, const int ifindex, const struct nf_nat_ipv4_range *range) { +#endif uint16_t min, start, selected, range_size, i; struct nat_mapping* mapping = NULL; @@ -462,7 +1001,11 @@ static unsigned int fullconenat_tg(struct sk_buff *skb, const struct xt_action_p const struct nf_nat_ipv4_multi_range_compat *mr; const struct nf_nat_ipv4_range *range; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0) const struct nf_conntrack_zone *zone; +#else + u16 zone; +#endif struct net *net; struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -634,6 +1177,8 @@ static unsigned int fullconenat_tg(struct sk_buff *skb, const struct xt_action_p static int fullconenat_tg_check(const struct xt_tgchk_param *par) { + nf_ct_netns_get(par->net, par->family); + mutex_lock(&nf_ct_net_event_lock); tg_refer_count++; @@ -641,7 +1186,6 @@ static int fullconenat_tg_check(const struct xt_tgchk_param *par) pr_debug("xt_FULLCONENAT: fullconenat_tg_check(): tg_refer_count is now %d\n", tg_refer_count); if (tg_refer_count == 1) { - nf_ct_netns_get(par->net, par->family); #ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS ct_event_notifier.notifier_call = ct_event_cb; #else @@ -678,13 +1222,29 @@ static void fullconenat_tg_destroy(const struct xt_tgdtor_param *par) pr_debug("xt_FULLCONENAT: fullconenat_tg_destroy(): ct_event_notifier unregistered\n"); } - nf_ct_netns_put(par->net, par->family); } mutex_unlock(&nf_ct_net_event_lock); + + nf_ct_netns_put(par->net, par->family); } static struct xt_target tg_reg[] __read_mostly = { +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) + { + .name = "FULLCONENAT", + .family = NFPROTO_IPV6, + .revision = 0, + .target = fullconenat_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), + .checkentry = fullconenat_tg_check, + .destroy = fullconenat_tg_destroy, + .me = THIS_MODULE, + }, +#endif { .name = "FULLCONENAT", .family = NFPROTO_IPV4, @@ -730,4 +1290,7 @@ module_exit(fullconenat_tg_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: implementation of RFC3489 full cone NAT"); MODULE_AUTHOR("Chion Tang "); +#if IS_ENABLED(CONFIG_NF_NAT_IPV6) || (IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(5, 1, 0)) +MODULE_ALIAS("ip6t_FULLCONENAT"); +#endif MODULE_ALIAS("ipt_FULLCONENAT");