diff -Naur old/include/linux/jhash.h new/include/linux/jhash.h --- old/include/linux/jhash.h 1970-01-01 01:00:00.000000000 +0100 +++ new/include/linux/jhash.h 2003-05-17 17:57:59.000000000 +0200 @@ -0,0 +1,143 @@ +#ifndef _LINUX_JHASH_H +#define _LINUX_JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup2.c, by Bob Jenkins, December 1996, Public Domain. + * hash(), hash2(), hash3, and mix() are externally useful functions. + * Routines to test the hash are included if SELF_TEST is defined. + * You can use this free for any purpose. It has no warranty. + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are surely my fault. -DaveM + */ + +/* NOTE: Arguments are modified. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* The golden ration: an arbitrary value */ +#define JHASH_GOLDEN_RATIO 0x9e3779b9 + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + */ +static inline u32 jhash(void *key, u32 length, u32 initval) +{ + u32 a, b, c, len; + u8 *k = key; + + len = length; + a = b = JHASH_GOLDEN_RATIO; + c = initval; + + while (len >= 12) { + a += (k[0] +((u32)k[1]<<8) +((u32)k[2]<<16) +((u32)k[3]<<24)); + b += (k[4] +((u32)k[5]<<8) +((u32)k[6]<<16) +((u32)k[7]<<24)); + c += (k[8] +((u32)k[9]<<8) +((u32)k[10]<<16)+((u32)k[11]<<24)); + + __jhash_mix(a,b,c); + + k += 12; + len -= 12; + } + + c += length; + switch (len) { + case 11: c += ((u32)k[10]<<24); + case 10: c += ((u32)k[9]<<16); + case 9 : c += ((u32)k[8]<<8); + case 8 : b += ((u32)k[7]<<24); + case 7 : b += ((u32)k[6]<<16); + case 6 : b += ((u32)k[5]<<8); + case 5 : b += k[4]; + case 4 : a += ((u32)k[3]<<24); + case 3 : a += ((u32)k[2]<<16); + case 2 : a += ((u32)k[1]<<8); + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + +/* A special optimized version that handles 1 or more of u32s. + * The length parameter here is the number of u32s in the key. + */ +static inline u32 jhash2(u32 *k, u32 length, u32 initval) +{ + u32 a, b, c, len; + + a = b = JHASH_GOLDEN_RATIO; + c = initval; + len = length; + + while (len >= 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + k += 3; len -= 3; + } + + c += length * 4; + + switch (len) { + case 2 : b += k[1]; + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + + +/* A special ultra-optimized versions that knows they are hashing exactly + * 3, 2 or 1 word(s). + * + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * done at the end is not done here. + */ +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += initval; + + __jhash_mix(a, b, c); + + return c; +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return jhash_3words(a, b, 0, initval); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return jhash_3words(a, 0, 0, initval); +} + +#endif /* _LINUX_JHASH_H */ diff -Naur old/include/linux/sysctl.h new/include/linux/sysctl.h --- old/include/linux/sysctl.h 2002-11-29 00:53:15.000000000 +0100 +++ new/include/linux/sysctl.h 2003-05-17 17:57:59.000000000 +0200 @@ -312,7 +312,8 @@ NET_IPV4_ROUTE_GC_ELASTICITY=14, NET_IPV4_ROUTE_MTU_EXPIRES=15, NET_IPV4_ROUTE_MIN_PMTU=16, - NET_IPV4_ROUTE_MIN_ADVMSS=17 + NET_IPV4_ROUTE_MIN_ADVMSS=17, + NET_IPV4_ROUTE_SECRET_INTERVAL=18, }; enum diff -Naur old/include/net/tcp.h new/include/net/tcp.h --- old/include/net/tcp.h 2002-11-29 00:53:15.000000000 +0100 +++ new/include/net/tcp.h 2003-05-17 17:57:59.000000000 +0200 @@ -1595,6 +1595,7 @@ int qlen; int qlen_young; int clock_hand; + u32 hash_rnd; struct open_request *syn_table[TCP_SYNQ_HSIZE]; }; diff -Naur old/net/ipv4/netfilter/ip_conntrack_core.c new/net/ipv4/netfilter/ip_conntrack_core.c --- old/net/ipv4/netfilter/ip_conntrack_core.c 2002-11-29 00:53:15.000000000 +0100 +++ new/net/ipv4/netfilter/ip_conntrack_core.c 2003-05-17 17:57:59.000000000 +0200 @@ -31,6 +31,8 @@ #include #include #include +#include +#include /* For ERR_PTR(). Yeah, I know... --RR */ #include @@ -107,20 +109,19 @@ nf_conntrack_put(&ct->infos[0]); } -static inline u_int32_t +static int ip_conntrack_hash_rnd_initted; +static unsigned int ip_conntrack_hash_rnd; + +static u_int32_t hash_conntrack(const struct ip_conntrack_tuple *tuple) { #if 0 dump_tuple(tuple); #endif - /* ntohl because more differences in low bits. */ - /* To ensure that halves of the same connection don't hash - clash, we add the source per-proto again. */ - return (ntohl(tuple->src.ip + tuple->dst.ip - + tuple->src.u.all + tuple->dst.u.all - + tuple->dst.protonum) - + ntohs(tuple->src.u.all)) - % ip_conntrack_htable_size; + return (jhash_3words(tuple->src.ip, + (tuple->dst.ip ^ tuple->dst.protonum), + (tuple->src.u.all | (tuple->dst.u.all << 16)), + ip_conntrack_hash_rnd) % ip_conntrack_htable_size); } inline int @@ -633,11 +634,16 @@ { struct ip_conntrack *conntrack; struct ip_conntrack_tuple repl_tuple; - size_t hash, repl_hash; + size_t hash; struct ip_conntrack_expect *expected; int i; static unsigned int drop_next = 0; + if (!ip_conntrack_hash_rnd_initted) { + get_random_bytes(&ip_conntrack_hash_rnd, 4); + ip_conntrack_hash_rnd_initted = 1; + } + hash = hash_conntrack(tuple); if (ip_conntrack_max && @@ -661,7 +667,6 @@ DEBUGP("Can't invert tuple.\n"); return NULL; } - repl_hash = hash_conntrack(&repl_tuple); conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { @@ -1423,7 +1428,7 @@ ip_conntrack_max = 8 * ip_conntrack_htable_size; printk("ip_conntrack version %s (%u buckets, %d max)" - " - %d bytes per conntrack\n", IP_CONNTRACK_VERSION, + " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, ip_conntrack_htable_size, ip_conntrack_max, sizeof(struct ip_conntrack)); diff -Naur old/net/ipv4/route.c new/net/ipv4/route.c --- old/net/ipv4/route.c 2002-11-29 00:53:15.000000000 +0100 +++ new/net/ipv4/route.c 2003-05-17 17:57:59.000000000 +0200 @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -117,13 +118,14 @@ int ip_rt_mtu_expires = 10 * 60 * HZ; int ip_rt_min_pmtu = 512 + 20 + 20; int ip_rt_min_advmss = 256; - +int ip_rt_secret_interval = 10 * 60 * HZ; static unsigned long rt_deadline; #define RTprint(a...) printk(KERN_DEBUG a) static struct timer_list rt_flush_timer; static struct timer_list rt_periodic_timer; +static struct timer_list rt_secret_timer; /* * Interface to generic destination cache. @@ -194,19 +196,17 @@ static struct rt_hash_bucket *rt_hash_table; static unsigned rt_hash_mask; static int rt_hash_log; +static unsigned int rt_hash_rnd; struct rt_cache_stat rt_cache_stat[NR_CPUS]; static int rt_intern_hash(unsigned hash, struct rtable *rth, struct rtable **res); -static __inline__ unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos) +static unsigned int rt_hash_code(u32 daddr, u32 saddr, u8 tos) { - unsigned hash = ((daddr & 0xF0F0F0F0) >> 4) | - ((daddr & 0x0F0F0F0F) << 4); - hash ^= saddr ^ tos; - hash ^= (hash >> 16); - return (hash ^ (hash >> 8)) & rt_hash_mask; + return (jhash_3words(daddr, saddr, (u32) tos, rt_hash_rnd) + & rt_hash_mask); } static int rt_cache_get_info(char *buffer, char **start, off_t offset, @@ -421,6 +421,8 @@ rt_deadline = 0; + get_random_bytes(&rt_hash_rnd, 4); + for (i = rt_hash_mask; i >= 0; i--) { write_lock_bh(&rt_hash_table[i].lock); rth = rt_hash_table[i].chain; @@ -479,6 +481,14 @@ spin_unlock_bh(&rt_flush_lock); } +static void rt_secret_rebuild(unsigned long dummy) +{ + unsigned long now = jiffies; + + rt_cache_flush(0); + mod_timer(&rt_secret_timer, now + ip_rt_secret_interval); +} + /* Short description of GC goals. @@ -2412,6 +2422,15 @@ mode: 0644, proc_handler: &proc_dointvec, }, + { + ctl_name: NET_IPV4_ROUTE_SECRET_INTERVAL, + procname: "secret_interval", + data: &ip_rt_secret_interval, + maxlen: sizeof(int), + mode: 0644, + proc_handler: &proc_dointvec_jiffies, + strategy: &sysctl_jiffies, + }, { 0 } }; #endif @@ -2442,15 +2461,25 @@ *eof = 1; } - /* Copy first cpu. */ - *start = buffer; - memcpy(buffer, IP_RT_ACCT_CPU(0), length); - - /* Add the other cpus in, one int at a time */ - for (i = 1; i < smp_num_cpus; i++) { - unsigned int j; - for (j = 0; j < length/4; j++) - ((u32*)buffer)[j] += ((u32*)IP_RT_ACCT_CPU(i))[j]; + offset /= sizeof(u32); + + if (length > 0) { + u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset; + u32 *dst = (u32 *) buffer; + + /* Copy first cpu. */ + *start = buffer; + memcpy(dst, src, length); + + /* Add the other cpus in, one int at a time */ + for (i = 1; i < smp_num_cpus; i++) { + unsigned int j; + + src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset; + + for (j = 0; j < length/4; j++) + dst[j] += src[j]; + } } return length; } @@ -2460,6 +2489,9 @@ { int i, order, goal; + rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ + (jiffies ^ (jiffies >> 7))); + #ifdef CONFIG_NET_CLS_ROUTE for (order = 0; (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) @@ -2516,6 +2548,7 @@ rt_flush_timer.function = rt_run_flush; rt_periodic_timer.function = rt_check_expire; + rt_secret_timer.function = rt_secret_rebuild; /* All the timers, started at system startup tend to synchronize. Perturb it a bit. @@ -2524,6 +2557,10 @@ ip_rt_gc_interval; add_timer(&rt_periodic_timer); + rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval + + ip_rt_secret_interval; + add_timer(&rt_secret_timer); + proc_net_create ("rt_cache", 0, rt_cache_get_info); proc_net_create ("rt_cache_stat", 0, rt_cache_stat_get_info); #ifdef CONFIG_NET_CLS_ROUTE diff -Naur old/net/ipv4/tcp.c new/net/ipv4/tcp.c --- old/net/ipv4/tcp.c 2002-11-29 00:53:15.000000000 +0100 +++ new/net/ipv4/tcp.c 2003-05-17 17:57:59.000000000 +0200 @@ -252,6 +252,7 @@ #include #include #include +#include #include #include @@ -542,6 +543,7 @@ for (lopt->max_qlen_log = 6; ; lopt->max_qlen_log++) if ((1<max_qlen_log) >= sysctl_max_syn_backlog) break; + get_random_bytes(&lopt->hash_rnd, 4); write_lock_bh(&tp->syn_wait_lock); tp->listen_opt = lopt; diff -Naur old/net/ipv4/tcp_ipv4.c new/net/ipv4/tcp_ipv4.c --- old/net/ipv4/tcp_ipv4.c 2002-11-29 00:53:15.000000000 +0100 +++ new/net/ipv4/tcp_ipv4.c 2003-05-17 17:57:59.000000000 +0200 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -857,12 +858,9 @@ return ((struct rtable*)skb->dst)->rt_iif; } -static __inline__ unsigned tcp_v4_synq_hash(u32 raddr, u16 rport) +static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) { - unsigned h = raddr ^ rport; - h ^= h>>16; - h ^= h>>8; - return h&(TCP_SYNQ_HSIZE-1); + return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); } static struct open_request *tcp_v4_search_req(struct tcp_opt *tp, @@ -873,7 +871,7 @@ struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *req, **prev; - for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport)]; + for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; (req = *prev) != NULL; prev = &req->dl_next) { if (req->rmt_port == rport && @@ -893,7 +891,7 @@ { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct tcp_listen_opt *lopt = tp->listen_opt; - unsigned h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port); + u32 h = tcp_v4_synq_hash(req->af.v4_req.rmt_addr, req->rmt_port, lopt->hash_rnd); req->expires = jiffies + TCP_TIMEOUT_INIT; req->retrans = 0; diff -Naur old/net/ipv4/tcp_minisocks.c new/net/ipv4/tcp_minisocks.c --- old/net/ipv4/tcp_minisocks.c 2002-11-29 00:53:15.000000000 +0100 +++ new/net/ipv4/tcp_minisocks.c 2003-05-17 17:57:59.000000000 +0200 @@ -447,6 +447,8 @@ while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) { tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death; + if (tw->next_death) + tw->next_death->pprev_death = tw->pprev_death; tw->pprev_death = NULL; spin_unlock(&tw_death_lock); diff -Naur old/net/ipv6/tcp_ipv6.c new/net/ipv6/tcp_ipv6.c --- old/net/ipv6/tcp_ipv6.c 2002-11-29 00:53:15.000000000 +0100 +++ new/net/ipv6/tcp_ipv6.c 2003-05-17 17:57:59.000000000 +0200 @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -357,12 +358,11 @@ * Open request hash tables. */ -static __inline__ unsigned tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport) +static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) { - unsigned h = raddr->s6_addr32[3] ^ rport; - h ^= h>>16; - h ^= h>>8; - return h&(TCP_SYNQ_HSIZE-1); + return (jhash_3words(raddr->s6_addr32[0] ^ raddr->s6_addr32[1], + raddr->s6_addr32[2] ^ raddr->s6_addr32[3], + (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); } static struct open_request *tcp_v6_search_req(struct tcp_opt *tp, @@ -375,7 +375,7 @@ struct tcp_listen_opt *lopt = tp->listen_opt; struct open_request *req, **prev; - for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport)]; + for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; (req = *prev) != NULL; prev = &req->dl_next) { if (req->rmt_port == rport && @@ -1121,7 +1121,7 @@ { struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct tcp_listen_opt *lopt = tp->listen_opt; - unsigned h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port); + u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd); req->sk = NULL; req->expires = jiffies + TCP_TIMEOUT_INIT;