aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/route.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/route.c')
-rw-r--r--net/ipv4/route.c256
1 files changed, 152 insertions, 104 deletions
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 723ac9181558..e5603e84b20d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1027,6 +1027,19 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
struct fib_nh_common *nhc;
fib_select_path(net, &res, fl4, NULL);
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (fib_info_num_path(res.fi) > 1) {
+ int nhsel;
+
+ for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) {
+ nhc = fib_info_nhc(res.fi, nhsel);
+ update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
+ jiffies + net->ipv4.ip_rt_mtu_expires);
+ }
+ rcu_read_unlock();
+ return;
+ }
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
nhc = FIB_RES_NHC(res);
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
jiffies + net->ipv4.ip_rt_mtu_expires);
@@ -1263,7 +1276,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
struct flowi4 fl4 = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowi4_tos = iph->tos & INET_DSCP_MASK,
+ .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)),
.flowi4_oif = rt->dst.dev->ifindex,
.flowi4_iif = skb->dev->ifindex,
.flowi4_mark = skb->mark,
@@ -1665,49 +1678,54 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
EXPORT_SYMBOL(rt_dst_clone);
/* called in rcu_read_lock() section */
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct in_device *in_dev, u32 *itag)
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
- int err;
+ enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
- return -EINVAL;
+ return SKB_DROP_REASON_NOT_SPECIFIED;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- skb->protocol != htons(ETH_P_IP))
- return -EINVAL;
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return SKB_DROP_REASON_INVALID_PROTO;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- return -EINVAL;
+ return SKB_DROP_REASON_IP_LOCALNET;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr) &&
ip_hdr(skb)->protocol != IPPROTO_IGMP)
- return -EINVAL;
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
} else {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, itag);
- if (err < 0)
- return err;
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, itag);
+ if (reason)
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, int our)
+static enum skb_drop_reason
+ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev, int our)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
+ enum skb_drop_reason reason;
struct rtable *rth;
u32 itag = 0;
- int err;
- err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
- if (err)
- return err;
+ reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
+ &itag);
+ if (reason)
+ return reason;
if (our)
flags |= RTCF_LOCAL;
@@ -1718,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
false);
if (!rth)
- return -ENOBUFS;
+ return SKB_DROP_REASON_NOMEM;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1734,7 +1752,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
@@ -1764,11 +1782,12 @@ static void ip_handle_martian_source(struct net_device *dev,
}
/* called in rcu_read_lock() section */
-static int __mkroute_input(struct sk_buff *skb,
- const struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos)
+static enum skb_drop_reason
+__mkroute_input(struct sk_buff *skb, const struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
@@ -1782,12 +1801,13 @@ static int __mkroute_input(struct sk_buff *skb,
out_dev = __in_dev_get_rcu(dev);
if (!out_dev) {
net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
- return -EINVAL;
+ return reason;
}
- err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
+ err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
+ reason = -err;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -1815,7 +1835,7 @@ static int __mkroute_input(struct sk_buff *skb,
*/
if (out_dev == in_dev &&
IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
- err = -EINVAL;
+ reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE;
goto cleanup;
}
}
@@ -1838,7 +1858,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth = rt_dst_alloc(out_dev->dev, 0, res->type,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto cleanup;
}
@@ -1852,9 +1872,9 @@ static int __mkroute_input(struct sk_buff *skb,
lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
- err = 0;
- cleanup:
- return err;
+ reason = SKB_NOT_DROPPED_YET;
+cleanup:
+ return reason;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -2112,11 +2132,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
-static int ip_mkroute_input(struct sk_buff *skb,
- struct fib_result *res,
- struct in_device *in_dev,
- __be32 daddr, __be32 saddr, u32 tos,
- struct flow_keys *hkeys)
+static enum skb_drop_reason
+ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp, struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && fib_info_num_path(res->fi) > 1) {
@@ -2128,50 +2147,57 @@ static int ip_mkroute_input(struct sk_buff *skb,
#endif
/* create a routing cache entry */
- return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
+ return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp);
}
/* Implements all the saddr-related checks as ip_route_input_slow(),
* assuming daddr is valid and the destination is not a local broadcast one.
* Uses the provided hint instead of performing a route lookup.
*/
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- const struct sk_buff *hint)
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ const struct sk_buff *hint)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
- int err = -EINVAL;
u32 tag = 0;
if (!in_dev)
- return -EINVAL;
+ return reason;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- tos &= INET_DSCP_MASK;
- err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev,
+ in_dev, &tag);
+ if (reason)
goto martian_source;
skip_validate_source:
skb_dst_copy(skb, hint);
- return 0;
+ return SKB_NOT_DROPPED_YET;
martian_source:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
- return err;
+ return reason;
}
/* get device for dst_alloc with local routes */
@@ -2200,10 +2226,12 @@ static struct net_device *ip_rt_get_dev(struct net *net,
* called with rcu_read_lock()
*/
-static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev,
- struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
struct net *net = dev_net(dev);
@@ -2231,8 +2259,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_tun_key.tun_id = 0;
skb_dst_drop(skb);
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
res->fi = NULL;
res->table = NULL;
@@ -2242,21 +2272,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
/* Accept zero addresses only to limited broadcast;
* I even do not know to fix it or not. Waiting for complains :-)
*/
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(daddr))
+ if (ipv4_is_zeronet(daddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
* and call it once if daddr or/and saddr are loopback addresses
*/
if (ipv4_is_loopback(daddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_destination;
+ }
} else if (ipv4_is_loopback(saddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
}
/*
@@ -2266,7 +2304,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_oif = 0;
fl4.flowi4_iif = dev->ifindex;
fl4.flowi4_mark = skb->mark;
- fl4.flowi4_tos = tos;
+ fl4.flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
fl4.flowi4_flags = 0;
fl4.daddr = daddr;
@@ -2298,10 +2336,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
}
+ err = -EINVAL;
if (res->type == RTN_LOCAL) {
- err = fib_validate_source(skb, saddr, daddr, tos,
- 0, dev, in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, daddr, dscp,
+ 0, dev, in_dev, &itag);
+ if (reason)
goto martian_source;
goto local_input;
}
@@ -2310,21 +2349,28 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res->type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
make_route:
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys);
-out: return err;
+ reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp,
+ flkeys);
+
+out:
+ return reason;
brd_input:
- if (skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ if (skb->protocol != htons(ETH_P_IP)) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
+ goto out;
+ }
if (!ipv4_is_zeronet(saddr)) {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
- in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0,
+ dev, in_dev, &itag);
+ if (reason)
goto martian_source;
}
flags |= RTCF_BROADCAST;
@@ -2342,7 +2388,7 @@ local_input:
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
}
}
@@ -2379,7 +2425,7 @@ local_input:
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
no_route:
@@ -2400,12 +2446,8 @@ martian_destination:
&daddr, &saddr, dev->name);
#endif
-e_inval:
- err = -EINVAL;
- goto out;
-
e_nobufs:
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto out;
martian_source:
@@ -2414,8 +2456,10 @@ martian_source:
}
/* called with rcu_read_lock held */
-static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev, struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
* The problem was that too many Ethernet cards have broken/missing
@@ -2429,12 +2473,12 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* route cache entry is created eventually.
*/
if (ipv4_is_multicast(daddr)) {
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
- int err = -EINVAL;
if (!in_dev)
- return err;
+ return -EINVAL;
our = ip_check_mc_rcu(in_dev, daddr, saddr,
ip_hdr(skb)->protocol);
@@ -2455,27 +2499,27 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- err = ip_route_input_mc(skb, daddr, saddr,
- tos, dev, our);
+ reason = ip_route_input_mc(skb, daddr, saddr, dscp,
+ dev, our);
}
- return err;
+ return reason;
}
- return ip_route_input_slow(skb, daddr, saddr, tos, dev, res);
+ return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- u8 tos, struct net_device *dev)
+enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr,
+ __be32 saddr, dscp_t dscp,
+ struct net_device *dev)
{
+ enum skb_drop_reason reason;
struct fib_result res;
- int err;
- tos &= INET_DSCP_MASK;
rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res);
+ reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return err;
+ return reason;
}
EXPORT_SYMBOL(ip_route_input_noref);
@@ -3233,10 +3277,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
rtm = nlmsg_data(nlh);
- src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
- dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
- iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
- mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
+ src = nla_get_in_addr_default(tb[RTA_SRC], 0);
+ dst = nla_get_in_addr_default(tb[RTA_DST], 0);
+ iif = nla_get_u32_default(tb[RTA_IIF], 0);
+ mark = nla_get_u32_default(tb[RTA_MARK], 0);
if (tb[RTA_UID])
uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
else
@@ -3262,7 +3306,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.daddr = dst;
fl4.saddr = src;
fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK;
- fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
+ fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0);
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
if (sport)
@@ -3286,8 +3330,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
skb->dev = dev;
skb->mark = mark;
err = ip_route_input_rcu(skb, dst, src,
- rtm->rtm_tos & INET_DSCP_MASK, dev,
- &res);
+ inet_dsfield_to_dscp(rtm->rtm_tos),
+ dev, &res) ? -EINVAL : 0;
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
@@ -3634,6 +3678,11 @@ static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
#endif /* CONFIG_IP_ROUTE_CLASSID */
+static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = {
+ {.protocol = PF_INET, .msgtype = RTM_GETROUTE,
+ .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED},
+};
+
int __init ip_rt_init(void)
{
void *idents_hash;
@@ -3691,8 +3740,7 @@ int __init ip_rt_init(void)
xfrm_init();
xfrm4_init();
#endif
- rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register_many(ip_rt_rtnl_msg_handlers);
#ifdef CONFIG_SYSCTL
register_pernet_subsys(&sysctl_route_ops);