Merge branch 'icmp_frag'

Andy Zhou says:

====================
fragmentation ICMP

Currently, we send ICMP packets when errors occur during fragmentation or
de-fragmentation.  However, it is a bug when sending those ICMP packets
in the context of using netfilter for bridging.

Those ICMP packets are only expected in the context of routing, not in
bridging mode.

The local stack is not involved in bridging forward decisions, thus
should be not used for deciding the reverse path for those ICMP messages.

This bug only affects IPV4, not in IPv6.

v1->v2:  restructure the patches into two patches that fix defragmentation and
         fragmentation respectively.

	 A bit is add in IPCB to control whether ICMP packet should be
	 generated for defragmentation.

	 Fragmentation ICMP is now removed by restructuring the
	 ip_fragment() API.

v2->v3:  Add droping icmp for bridging contrack users
         drop exporting ip_fragment() API.

v3->v4:  Remove unnecessary parentheses in 'return' statements

v4->v5:  Drop the patch that sets and checks a bit in IPCB
         that prevents ip_defrag to send ICMP.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-05-19 00:15:50 -04:00
commit 76d7c45765
4 changed files with 71 additions and 19 deletions

View File

@ -108,8 +108,8 @@ int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *));
int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *));
int ip_do_nat(struct sk_buff *skb);
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb);
@ -478,6 +478,16 @@ enum ip_defrag_users {
IP_DEFRAG_MACVLAN,
};
/* Return true if the value of 'user' is between 'lower_bond'
* and 'upper_bond' inclusively.
*/
static inline bool ip_defrag_user_in_between(u32 user,
enum ip_defrag_users lower_bond,
enum ip_defrag_users upper_bond)
{
return user >= lower_bond && user <= upper_bond;
}
int ip_defrag(struct sk_buff *skb, u32 user);
#ifdef CONFIG_INET
struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user);

View File

@ -853,6 +853,25 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
return br_dev_queue_push_xmit(sk, skb);
}
static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
unsigned int mtu = ip_skb_dst_mtu(skb);
struct iphdr *iph = ip_hdr(skb);
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(sk, skb, output);
}
static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
{
int ret;
@ -886,7 +905,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
ret = br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
} else {
nf_bridge_info_free(skb);
ret = br_dev_queue_push_xmit(sk, skb);

View File

@ -173,6 +173,15 @@ static void ipq_kill(struct ipq *ipq)
inet_frag_kill(&ipq->q, &ip4_frags);
}
static bool frag_expire_skip_icmp(u32 user)
{
return user == IP_DEFRAG_AF_PACKET ||
ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_IN,
__IP_DEFRAG_CONNTRACK_IN_END) ||
ip_defrag_user_in_between(user, IP_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP_DEFRAG_CONNTRACK_BRIDGE_IN);
}
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
@ -217,10 +226,8 @@ static void ip_expire(unsigned long arg)
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
*/
if (qp->user == IP_DEFRAG_AF_PACKET ||
((qp->user >= IP_DEFRAG_CONNTRACK_IN) &&
(qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) &&
(skb_rtable(head)->rt_type != RTN_LOCAL)))
if (frag_expire_skip_icmp(qp->user) &&
(skb_rtable(head)->rt_type != RTN_LOCAL))
goto out_rcu_unlock;
/* Send an ICMP "Fragment Reassembly Timeout" message. */

View File

@ -83,6 +83,9 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
static int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *));
/* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph)
{
@ -478,6 +481,28 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
static int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph = ip_hdr(skb);
unsigned int mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
return ip_do_fragment(sk, skb, output);
}
/*
* This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus
@ -485,8 +510,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
* single device frame, and queue such a frame for sending.
*/
int ip_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
int (*output)(struct sock *, struct sk_buff *))
{
struct iphdr *iph;
int ptr;
@ -507,15 +532,6 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
iph = ip_hdr(skb);
mtu = ip_skb_dst_mtu(skb);
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
/*
* Setup starting values.
@ -751,7 +767,7 @@ int ip_fragment(struct sock *sk, struct sk_buff *skb,
IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
return err;
}
EXPORT_SYMBOL(ip_fragment);
EXPORT_SYMBOL(ip_do_fragment);
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)