diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 2 | ||||
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/fib_rules.c | 4 | ||||
-rw-r--r-- | net/core/flow.c | 38 | ||||
-rw-r--r-- | net/core/link_watch.c | 2 | ||||
-rw-r--r-- | net/core/neighbour.c | 8 | ||||
-rw-r--r-- | net/core/netpoll.c | 4 | ||||
-rw-r--r-- | net/core/pktgen.c | 8 | ||||
-rw-r--r-- | net/core/scm.c | 2 | ||||
-rw-r--r-- | net/core/secure_seq.c | 184 | ||||
-rw-r--r-- | net/core/skbuff.c | 39 |
11 files changed, 267 insertions, 36 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 8a04dd22cf77..0d357b1c4e57 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -3,7 +3,7 @@ # obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o diff --git a/net/core/dev.c b/net/core/dev.c index 9444c5cb4137..b10ff0a71855 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1515,6 +1515,14 @@ static inline bool is_skb_forwardable(struct net_device *dev, */ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) { + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + if (skb_copy_ubufs(skb, GFP_ATOMIC)) { + atomic_long_inc(&dev->rx_dropped); + kfree_skb(skb); + return NET_RX_DROP; + } + } + skb_orphan(skb); nf_reset(skb); @@ -4497,10 +4505,10 @@ void __dev_set_rx_mode(struct net_device *dev) */ if (!netdev_uc_empty(dev) && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); - dev->uc_promisc = 1; + dev->uc_promisc = true; } else if (netdev_uc_empty(dev) && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); - dev->uc_promisc = 0; + dev->uc_promisc = false; } if (ops->ndo_set_multicast_list) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b5..3231b468bb72 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -384,8 +384,8 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) */ list_for_each_entry(r, &ops->rules_list, list) { if (r->action == FR_ACT_GOTO && - r->target == rule->pref) { - BUG_ON(rtnl_dereference(r->ctarget) != NULL); + r->target == rule->pref && + rtnl_dereference(r->ctarget) == NULL) { rcu_assign_pointer(r->ctarget, rule); if (--ops->unresolved_rules == 0) break; diff --git a/net/core/flow.c b/net/core/flow.c index 990703b8863b..555a456efb07 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -22,7 +22,7 @@ #include <linux/cpumask.h> #include <linux/mutex.h> #include <net/flow.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/security.h> struct flow_cache_entry { @@ -30,6 +30,7 @@ struct flow_cache_entry { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -172,29 +173,26 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - const struct flowi *key) + const struct flowi *key, + size_t keysize) { const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { const flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); - - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); k1 = (const flow_compare_t *) key1; - k1_lim = k1 + n_elem; + k1_lim = k1 + keysize; k2 = (const flow_compare_t *) key2; @@ -215,6 +213,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, struct flow_cache_entry *fle, *tfle; struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -222,6 +221,11 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -230,11 +234,12 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); + hash = flow_hash_code(fc, fcp, key, keysize); hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -246,9 +251,10 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; diff --git a/net/core/link_watch.c b/net/core/link_watch.c index a7b342131869..357bd4ee4baa 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -126,7 +126,7 @@ static void linkwatch_schedule_work(int urgent) return; /* It's already running which is good enough. */ - if (!cancel_delayed_work(&linkwatch_work)) + if (!__cancel_delayed_work(&linkwatch_work)) return; /* Otherwise we reschedule it again for immediate execution. */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0bb203..1334d7e56f02 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1319,11 +1319,15 @@ static void neigh_proxy_process(unsigned long arg) if (tdif <= 0) { struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); - if (tbl->proxy_redo && netif_running(dev)) + if (tbl->proxy_redo && netif_running(dev)) { + rcu_read_lock(); tbl->proxy_redo(skb); - else + rcu_read_unlock(); + } else { kfree_skb(skb); + } dev_put(dev); } else if (!sched_next || tdif < sched_next) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index adf84dd8c7b5..52622517e0d8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -558,13 +558,14 @@ int __netpoll_rx(struct sk_buff *skb) if (skb_shared(skb)) goto out; - iph = (struct iphdr *)skb->data; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; + iph = (struct iphdr *)skb->data; if (iph->ihl < 5 || iph->version != 4) goto out; if (!pskb_may_pull(skb, iph->ihl*4)) goto out; + iph = (struct iphdr *)skb->data; if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) goto out; @@ -579,6 +580,7 @@ int __netpoll_rx(struct sk_buff *skb) if (pskb_trim_rcsum(skb, len)) goto out; + iph = (struct iphdr *)skb->data; if (iph->protocol != IPPROTO_UDP) goto out; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f76079cd750c..e35a6fbb8110 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1070,7 +1070,9 @@ static ssize_t pktgen_if_write(struct file *file, len = num_arg(&user_buffer[i], 10, &value); if (len < 0) return len; - + if ((value > 0) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; i += len; pkt_dev->clone_skb = value; @@ -3555,7 +3557,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->min_pkt_size = ETH_ZLEN; pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->delay = pg_delay_d; pkt_dev->count = pg_count_d; pkt_dev->sofar = 0; @@ -3563,7 +3564,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->udp_src_max = 9; pkt_dev->udp_dst_min = 9; pkt_dev->udp_dst_max = 9; - pkt_dev->vlan_p = 0; pkt_dev->vlan_cfi = 0; pkt_dev->vlan_id = 0xffff; @@ -3575,6 +3575,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) err = pktgen_setup_dev(pkt_dev, ifname); if (err) goto out1; + if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) + pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, &pktgen_if_fops, pkt_dev); diff --git a/net/core/scm.c b/net/core/scm.c index 4c1ef026d695..811b53fb330e 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -192,7 +192,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) goto error; cred->uid = cred->euid = p->creds.uid; - cred->gid = cred->egid = p->creds.uid; + cred->gid = cred->egid = p->creds.gid; put_cred(p->cred); p->cred = cred; } diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c new file mode 100644 index 000000000000..45329d7c9dd9 --- /dev/null +++ b/net/core/secure_seq.c @@ -0,0 +1,184 @@ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/cryptohash.h> +#include <linux/module.h> +#include <linux/cache.h> +#include <linux/random.h> +#include <linux/hrtimer.h> +#include <linux/ktime.h> +#include <linux/string.h> + +#include <net/secure_seq.h> + +static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; + +static int __init net_secret_init(void) +{ + get_random_bytes(net_secret, sizeof(net_secret)); + return 0; +} +late_initcall(net_secret_init); + +static u32 seq_scale(u32 seq) +{ + /* + * As close as possible to RFC 793, which + * suggests using a 250 kHz clock. + * Further reading shows this assumes 2 Mb/s networks. + * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. + * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but + * we also need to limit the resolution so that the u32 seq + * overlaps less than one time per MSL (2 minutes). + * Choosing a clock of 64 ns period is OK. (period of 274 s) + */ + return seq + (ktime_to_ns(ktime_get_real()) >> 6); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return seq_scale(hash[0]); +} +EXPORT_SYMBOL(secure_tcpv6_sequence_number); + +u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, + __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + (__force u32) daddr[i]; + secret[4] = net_secret[4] + (__force u32)dport; + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + return hash[0]; +} +#endif + +#ifdef CONFIG_INET +__u32 secure_ip_id(__be32 daddr) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force __u32) daddr; + hash[1] = net_secret[13]; + hash[2] = net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_ipv6_id(const __be32 daddr[4]) +{ + __u32 hash[4]; + + memcpy(hash, daddr, 16); + md5_transform(hash, net_secret); + + return hash[0]; +} + +__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return seq_scale(hash[0]); +} + +u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = (__force u32)dport ^ net_secret[14]; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + return hash[0]; +} +EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); +#endif + +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + + hash[0] = (__force u32)saddr; + hash[1] = (__force u32)daddr; + hash[2] = ((__force u16)sport << 16) + (__force u16)dport; + hash[3] = net_secret[15]; + + md5_transform(hash, net_secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccp_sequence_number); + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, + __be16 sport, __be16 dport) +{ + u32 secret[MD5_MESSAGE_BYTES / 4]; + u32 hash[MD5_DIGEST_WORDS]; + u64 seq; + u32 i; + + memcpy(hash, saddr, 16); + for (i = 0; i < 4; i++) + secret[i] = net_secret[i] + daddr[i]; + secret[4] = net_secret[4] + + (((__force u16)sport << 16) + (__force u16)dport); + for (i = 5; i < MD5_MESSAGE_BYTES / 4; i++) + secret[i] = net_secret[i]; + + md5_transform(hash, secret); + + seq = hash[0] | (((u64)hash[1]) << 32); + seq += ktime_to_ns(ktime_get_real()); + seq &= (1ull << 48) - 1; + + return seq; +} +EXPORT_SYMBOL(secure_dccpv6_sequence_number); +#endif +#endif diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2beda824636e..387703f56fce 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -611,8 +611,21 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); -/* skb frags copy userspace buffers to kernel */ -static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) +/* skb_copy_ubufs - copy userspace skb frags buffers to kernel + * @skb: the skb to modify + * @gfp_mask: allocation priority + * + * This must be called on SKBTX_DEV_ZEROCOPY skb. + * It will copy all frags into kernel and drop the reference + * to userspace pages. + * + * If this function is called from an interrupt gfp_mask() must be + * %GFP_ATOMIC. + * + * Returns 0 on success or a negative error code on failure + * to allocate kernel memory to copy to. + */ +int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { int i; int num_frags = skb_shinfo(skb)->nr_frags; @@ -652,6 +665,8 @@ static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) skb_shinfo(skb)->frags[i - 1].page = head; head = (struct page *)head->private; } + + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; return 0; } @@ -677,7 +692,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) return NULL; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } n = skb + 1; @@ -803,7 +817,6 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) n = NULL; goto out; } - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; @@ -896,7 +909,6 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { if (skb_copy_ubufs(skb, gfp_mask)) goto nofrags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) get_page(skb_shinfo(skb)->frags[i].page); @@ -1369,8 +1381,21 @@ pull_pages: } EXPORT_SYMBOL(__pskb_pull_tail); -/* Copy some data bits from skb to kernel buffer. */ - +/** + * skb_copy_bits - copy bits from skb to kernel buffer + * @skb: source skb + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy the specified number of bytes from the source skb to the + * destination buffer. + * + * CAUTION ! : + * If its prototype is ever changed, + * check arch/{*}/net/{*}.S files, + * since it is called from BPF assembly code. + */ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { int start = skb_headlen(skb); |