2008-01-11 21:57:09 +07:00
|
|
|
/* SCTP kernel implementation
|
2005-04-17 05:20:36 +07:00
|
|
|
* (C) Copyright IBM Corp. 2001, 2004
|
|
|
|
* Copyright (c) 1999-2000 Cisco, Inc.
|
|
|
|
* Copyright (c) 1999-2001 Motorola, Inc.
|
|
|
|
* Copyright (c) 2001 Intel Corp.
|
|
|
|
*
|
2008-01-11 21:57:09 +07:00
|
|
|
* This file is part of the SCTP kernel implementation
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
2008-01-11 21:57:09 +07:00
|
|
|
* This SCTP implementation is free software;
|
2005-04-17 05:20:36 +07:00
|
|
|
* you can redistribute it and/or modify it under the terms of
|
|
|
|
* the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
|
|
* any later version.
|
|
|
|
*
|
2008-01-11 21:57:09 +07:00
|
|
|
* This SCTP implementation is distributed in the hope that it
|
2005-04-17 05:20:36 +07:00
|
|
|
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
|
|
* ************************
|
|
|
|
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
* See the GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
2013-12-06 21:28:48 +07:00
|
|
|
* along with GNU CC; see the file COPYING. If not, see
|
|
|
|
* <http://www.gnu.org/licenses/>.
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
|
|
|
* Please send any bug reports or fixes you make to the
|
|
|
|
* email addresses:
|
2013-07-23 19:51:47 +07:00
|
|
|
* lksctp developers <linux-sctp@vger.kernel.org>
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
|
|
|
* Written or modified by:
|
|
|
|
* Randall Stewart <randall@sctp.chicago.il.us>
|
|
|
|
* Ken Morneau <kmorneau@cisco.com>
|
|
|
|
* Qiaobing Xie <qxie1@email.mot.com>
|
|
|
|
* La Monte H.P. Yarroll <piggy@acm.org>
|
|
|
|
* Karl Knutson <karl@athena.chicago.il.us>
|
|
|
|
* Jon Grimm <jgrimm@us.ibm.com>
|
|
|
|
* Xingang Guo <xingang.guo@intel.com>
|
|
|
|
* Hui Huang <hui.huang@nokia.com>
|
|
|
|
* Sridhar Samudrala <sri@us.ibm.com>
|
|
|
|
* Daisy Chang <daisyc@us.ibm.com>
|
|
|
|
* Dajiang Zhang <dajiang.zhang@nokia.com>
|
|
|
|
* Ardelle Fan <ardelle.fan@intel.com>
|
|
|
|
* Ryan Layer <rmlayer@us.ibm.com>
|
|
|
|
* Anup Pemmaiah <pemmaiah@cc.usu.edu>
|
|
|
|
* Kevin Gao <kevin.gao@intel.com>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __sctp_structs_h__
|
|
|
|
#define __sctp_structs_h__
|
|
|
|
|
2013-06-25 23:17:27 +07:00
|
|
|
#include <linux/ktime.h>
|
2018-06-18 09:52:50 +07:00
|
|
|
#include <linux/rhashtable-types.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/socket.h> /* linux/in.h needs this!! */
|
|
|
|
#include <linux/in.h> /* We get struct sockaddr_in. */
|
|
|
|
#include <linux/in6.h> /* We get struct in6_addr */
|
|
|
|
#include <linux/ipv6.h>
|
|
|
|
#include <asm/param.h> /* We get MAXHOSTNAMELEN. */
|
2011-07-27 06:09:06 +07:00
|
|
|
#include <linux/atomic.h> /* This gets us atomic counters. */
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/skbuff.h> /* We need sk_buff_head. */
|
|
|
|
#include <linux/workqueue.h> /* We need tq_struct. */
|
2018-08-11 00:11:43 +07:00
|
|
|
#include <linux/flex_array.h> /* We need flex_array. */
|
2005-04-17 05:20:36 +07:00
|
|
|
#include <linux/sctp.h> /* We need sctp* header structs. */
|
2007-10-09 15:15:59 +07:00
|
|
|
#include <net/sctp/auth.h> /* We need auth specific structs */
|
2016-07-14 01:08:55 +07:00
|
|
|
#include <net/ip.h> /* For inet_skb_parm */
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* A convenience structure for handling sockaddr structures.
|
|
|
|
* We should wean ourselves off this.
|
|
|
|
*/
|
|
|
|
union sctp_addr {
|
|
|
|
struct sockaddr_in v4;
|
|
|
|
struct sockaddr_in6 v6;
|
|
|
|
struct sockaddr sa;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Forward declarations for data structures. */
|
|
|
|
struct sctp_globals;
|
|
|
|
struct sctp_endpoint;
|
|
|
|
struct sctp_association;
|
|
|
|
struct sctp_transport;
|
|
|
|
struct sctp_packet;
|
|
|
|
struct sctp_chunk;
|
|
|
|
struct sctp_inq;
|
|
|
|
struct sctp_outq;
|
|
|
|
struct sctp_bind_addr;
|
|
|
|
struct sctp_ulpq;
|
|
|
|
struct sctp_ep_common;
|
2016-01-24 20:20:12 +07:00
|
|
|
struct crypto_shash;
|
2017-03-20 16:46:27 +07:00
|
|
|
struct sctp_stream;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
|
|
|
|
#include <net/sctp/tsnmap.h>
|
|
|
|
#include <net/sctp/ulpevent.h>
|
|
|
|
#include <net/sctp/ulpqueue.h>
|
2017-12-08 20:04:01 +07:00
|
|
|
#include <net/sctp/stream_interleave.h>
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Structures useful for managing bind/connect. */
|
|
|
|
|
|
|
|
struct sctp_bind_bucket {
|
|
|
|
unsigned short port;
|
|
|
|
unsigned short fastreuse;
|
2007-11-09 23:43:40 +07:00
|
|
|
struct hlist_node node;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct hlist_head owner;
|
2012-08-06 15:39:38 +07:00
|
|
|
struct net *net;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct sctp_bind_hashbucket {
|
|
|
|
spinlock_t lock;
|
2007-11-09 23:43:40 +07:00
|
|
|
struct hlist_head chain;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Used for hashing all associations. */
|
|
|
|
struct sctp_hashbucket {
|
|
|
|
rwlock_t lock;
|
2007-11-09 23:43:40 +07:00
|
|
|
struct hlist_head chain;
|
2005-04-17 05:20:36 +07:00
|
|
|
} __attribute__((__aligned__(8)));
|
|
|
|
|
|
|
|
|
|
|
|
/* The SCTP globals structure. */
|
|
|
|
extern struct sctp_globals {
|
|
|
|
/* This is a list of groups of functions for each address
|
|
|
|
* family that we support.
|
|
|
|
*/
|
|
|
|
struct list_head address_families;
|
|
|
|
|
|
|
|
/* This is the hash of all endpoints. */
|
|
|
|
struct sctp_hashbucket *ep_hashtable;
|
|
|
|
/* This is the sctp port control hash. */
|
|
|
|
struct sctp_bind_hashbucket *port_hashtable;
|
2015-12-30 22:50:46 +07:00
|
|
|
/* This is the hash of all transports. */
|
2016-11-15 22:23:11 +07:00
|
|
|
struct rhltable transport_hashtable;
|
2013-08-10 03:09:41 +07:00
|
|
|
|
2013-08-26 21:34:00 +07:00
|
|
|
/* Sizes of above hashtables. */
|
|
|
|
int ep_hashsize;
|
|
|
|
int port_hashsize;
|
|
|
|
|
|
|
|
/* Default initialization values to be applied to new associations. */
|
|
|
|
__u16 max_instreams;
|
|
|
|
__u16 max_outstreams;
|
|
|
|
|
2013-08-10 03:09:41 +07:00
|
|
|
/* Flag to indicate whether computing and verifying checksum
|
|
|
|
* is disabled. */
|
|
|
|
bool checksum_disable;
|
2005-04-17 05:20:36 +07:00
|
|
|
} sctp_globals;
|
|
|
|
|
|
|
|
#define sctp_max_instreams (sctp_globals.max_instreams)
|
|
|
|
#define sctp_max_outstreams (sctp_globals.max_outstreams)
|
|
|
|
#define sctp_address_families (sctp_globals.address_families)
|
|
|
|
#define sctp_ep_hashsize (sctp_globals.ep_hashsize)
|
|
|
|
#define sctp_ep_hashtable (sctp_globals.ep_hashtable)
|
|
|
|
#define sctp_port_hashsize (sctp_globals.port_hashsize)
|
|
|
|
#define sctp_port_hashtable (sctp_globals.port_hashtable)
|
2015-12-30 22:50:46 +07:00
|
|
|
#define sctp_transport_hashtable (sctp_globals.transport_hashtable)
|
2013-08-10 03:09:41 +07:00
|
|
|
#define sctp_checksum_disable (sctp_globals.checksum_disable)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* SCTP Socket type: UDP or TCP style. */
|
2017-08-11 09:23:50 +07:00
|
|
|
enum sctp_socket_type {
|
2005-04-17 05:20:36 +07:00
|
|
|
SCTP_SOCKET_UDP = 0,
|
|
|
|
SCTP_SOCKET_UDP_HIGH_BANDWIDTH,
|
|
|
|
SCTP_SOCKET_TCP
|
2017-08-11 09:23:50 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Per socket SCTP information. */
|
|
|
|
struct sctp_sock {
|
|
|
|
/* inet_sock has to be the first member of sctp_sock */
|
|
|
|
struct inet_sock inet;
|
|
|
|
/* What kind of a socket is this? */
|
2017-08-11 09:23:50 +07:00
|
|
|
enum sctp_socket_type type;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* PF_ family specific functions. */
|
|
|
|
struct sctp_pf *pf;
|
|
|
|
|
|
|
|
/* Access to HMAC transform. */
|
2016-01-24 20:20:12 +07:00
|
|
|
struct crypto_shash *hmac;
|
2012-10-24 16:20:03 +07:00
|
|
|
char *sctp_hmac_alg;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* What is our base endpointer? */
|
|
|
|
struct sctp_endpoint *ep;
|
|
|
|
|
|
|
|
struct sctp_bind_bucket *bind_hash;
|
|
|
|
/* Various Socket Options. */
|
|
|
|
__u16 default_stream;
|
|
|
|
__u32 default_ppid;
|
|
|
|
__u16 default_flags;
|
|
|
|
__u32 default_context;
|
|
|
|
__u32 default_timetolive;
|
2006-12-14 07:34:22 +07:00
|
|
|
__u32 default_rcv_context;
|
2007-03-24 01:34:36 +07:00
|
|
|
int max_burst;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-12-23 02:36:46 +07:00
|
|
|
/* Heartbeat interval: The endpoint sends out a Heartbeat chunk to
|
|
|
|
* the destination address every heartbeat interval. This value
|
|
|
|
* will be inherited by all new associations.
|
|
|
|
*/
|
|
|
|
__u32 hbinterval;
|
|
|
|
|
|
|
|
/* This is the max_retrans value for new associations. */
|
|
|
|
__u16 pathmaxrxt;
|
|
|
|
|
2018-07-02 17:21:12 +07:00
|
|
|
__u32 flowlabel;
|
|
|
|
__u8 dscp;
|
|
|
|
|
2005-12-23 02:36:46 +07:00
|
|
|
/* The initial Path MTU to use for new associations. */
|
|
|
|
__u32 pathmtu;
|
|
|
|
|
|
|
|
/* The default SACK delay timeout for new associations. */
|
|
|
|
__u32 sackdelay;
|
2008-05-10 05:13:26 +07:00
|
|
|
__u32 sackfreq;
|
2005-12-23 02:36:46 +07:00
|
|
|
|
2007-12-21 04:56:32 +07:00
|
|
|
/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
|
2005-12-23 02:36:46 +07:00
|
|
|
__u32 param_flags;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_rtoinfo rtoinfo;
|
|
|
|
struct sctp_paddrparams paddrparam;
|
|
|
|
struct sctp_assocparams assocparams;
|
2014-07-13 01:30:37 +07:00
|
|
|
|
2017-08-25 06:57:57 +07:00
|
|
|
/*
|
|
|
|
* These two structures must be grouped together for the usercopy
|
|
|
|
* whitelist region.
|
|
|
|
*/
|
|
|
|
struct sctp_event_subscribe subscribe;
|
|
|
|
struct sctp_initmsg initmsg;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
int user_frag;
|
2014-07-13 01:30:37 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
__u32 autoclose;
|
2006-12-21 07:07:04 +07:00
|
|
|
__u32 adaptation_ind;
|
2007-03-24 01:32:00 +07:00
|
|
|
__u32 pd_point;
|
2016-04-09 02:41:27 +07:00
|
|
|
__u16 nodelay:1,
|
2018-06-28 14:31:00 +07:00
|
|
|
reuse:1,
|
2016-04-09 02:41:27 +07:00
|
|
|
disable_fragments:1,
|
|
|
|
v4mapped:1,
|
|
|
|
frag_interleave:1,
|
2017-12-08 20:03:58 +07:00
|
|
|
strm_interleave:1,
|
2016-04-09 02:41:27 +07:00
|
|
|
recvrcvinfo:1,
|
2016-04-09 02:41:28 +07:00
|
|
|
recvnxtinfo:1,
|
2016-04-30 00:17:08 +07:00
|
|
|
data_ready_signalled:1;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-04-21 02:23:15 +07:00
|
|
|
atomic_t pd_mode;
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Receive to here while partial delivery is in effect. */
|
|
|
|
struct sk_buff_head pd_lobby;
|
2015-06-12 20:16:41 +07:00
|
|
|
|
|
|
|
/* These must be the last fields, as they will skipped on copies,
|
|
|
|
* like on accept and peeloff operations
|
|
|
|
*/
|
2011-04-26 17:32:51 +07:00
|
|
|
struct list_head auto_asconf_list;
|
|
|
|
int do_auto_asconf;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct sctp_sock *sctp_sk(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return (struct sctp_sock *)sk;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
|
|
|
|
{
|
|
|
|
return (struct sock *)sp;
|
|
|
|
}
|
|
|
|
|
2011-12-10 16:48:31 +07:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp6_sock {
|
|
|
|
struct sctp_sock sctp;
|
|
|
|
struct ipv6_pinfo inet6;
|
|
|
|
};
|
|
|
|
#endif /* CONFIG_IPV6 */
|
|
|
|
|
|
|
|
|
|
|
|
/* This is our APPLICATION-SPECIFIC state cookie.
|
|
|
|
* THIS IS NOT DICTATED BY THE SPECIFICATION.
|
|
|
|
*/
|
|
|
|
/* These are the parts of an association which we send in the cookie.
|
|
|
|
* Most of these are straight out of:
|
|
|
|
* RFC2960 12.2 Parameters necessary per association (i.e. the TCB)
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct sctp_cookie {
|
|
|
|
|
|
|
|
/* My : Tag expected in every inbound packet and sent
|
|
|
|
* Verification: in the INIT or INIT ACK chunk.
|
|
|
|
* Tag :
|
|
|
|
*/
|
|
|
|
__u32 my_vtag;
|
|
|
|
|
|
|
|
/* Peer's : Tag expected in every outbound packet except
|
|
|
|
* Verification: in the INIT chunk.
|
|
|
|
* Tag :
|
|
|
|
*/
|
|
|
|
__u32 peer_vtag;
|
|
|
|
|
|
|
|
/* The rest of these are not from the spec, but really need to
|
|
|
|
* be in the cookie.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* My Tie Tag : Assist in discovering a restarting association. */
|
|
|
|
__u32 my_ttag;
|
|
|
|
|
|
|
|
/* Peer's Tie Tag: Assist in discovering a restarting association. */
|
|
|
|
__u32 peer_ttag;
|
|
|
|
|
|
|
|
/* When does this cookie expire? */
|
2013-06-25 23:17:27 +07:00
|
|
|
ktime_t expiration;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Number of inbound/outbound streams which are set
|
|
|
|
* and negotiated during the INIT process.
|
|
|
|
*/
|
|
|
|
__u16 sinit_num_ostreams;
|
|
|
|
__u16 sinit_max_instreams;
|
|
|
|
|
|
|
|
/* This is the first sequence number I used. */
|
|
|
|
__u32 initial_tsn;
|
|
|
|
|
|
|
|
/* This holds the originating address of the INIT packet. */
|
|
|
|
union sctp_addr peer_addr;
|
|
|
|
|
|
|
|
/* IG Section 2.35.3
|
|
|
|
* Include the source port of the INIT-ACK
|
|
|
|
*/
|
|
|
|
__u16 my_port;
|
|
|
|
|
|
|
|
__u8 prsctp_capable;
|
|
|
|
|
|
|
|
/* Padding for future use */
|
|
|
|
__u8 padding;
|
|
|
|
|
2006-12-21 07:07:04 +07:00
|
|
|
__u32 adaptation_ind;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2017-06-30 10:52:16 +07:00
|
|
|
__u8 auth_random[sizeof(struct sctp_paramhdr) +
|
|
|
|
SCTP_AUTH_RANDOM_LENGTH];
|
2011-04-01 06:38:54 +07:00
|
|
|
__u8 auth_hmacs[SCTP_AUTH_NUM_HMACS * sizeof(__u16) + 2];
|
2017-06-30 10:52:16 +07:00
|
|
|
__u8 auth_chunks[sizeof(struct sctp_paramhdr) + SCTP_AUTH_MAX_CHUNKS];
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This is a shim for my peer's INIT packet, followed by
|
|
|
|
* a copy of the raw address list of the association.
|
|
|
|
* The length of the raw address list is saved in the
|
|
|
|
* raw_addr_list_len field, which will be used at the time when
|
|
|
|
* the association TCB is re-constructed from the cookie.
|
|
|
|
*/
|
|
|
|
__u32 raw_addr_list_len;
|
|
|
|
struct sctp_init_chunk peer_init[0];
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* The format of our cookie that we send to our peer. */
|
|
|
|
struct sctp_signed_cookie {
|
|
|
|
__u8 signature[SCTP_SECRET_SIZE];
|
2006-01-18 02:52:12 +07:00
|
|
|
__u32 __pad; /* force sctp_cookie alignment to 64 bits */
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_cookie c;
|
2010-06-03 17:21:52 +07:00
|
|
|
} __packed;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This is another convenience type to allocate memory for address
|
|
|
|
* params for the maximum size and pass such structures around
|
|
|
|
* internally.
|
|
|
|
*/
|
|
|
|
union sctp_addr_param {
|
2007-09-19 16:19:52 +07:00
|
|
|
struct sctp_paramhdr p;
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_ipv4addr_param v4;
|
|
|
|
struct sctp_ipv6addr_param v6;
|
|
|
|
};
|
|
|
|
|
|
|
|
/* A convenience type to allow walking through the various
|
|
|
|
* parameters and avoid casting all over the place.
|
|
|
|
*/
|
|
|
|
union sctp_params {
|
|
|
|
void *v;
|
|
|
|
struct sctp_paramhdr *p;
|
|
|
|
struct sctp_cookie_preserve_param *life;
|
|
|
|
struct sctp_hostname_param *dns;
|
|
|
|
struct sctp_cookie_param *cookie;
|
|
|
|
struct sctp_supported_addrs_param *sat;
|
|
|
|
struct sctp_ipv4addr_param *v4;
|
|
|
|
struct sctp_ipv6addr_param *v6;
|
|
|
|
union sctp_addr_param *addr;
|
2006-12-21 07:07:04 +07:00
|
|
|
struct sctp_adaptation_ind_param *aind;
|
2007-09-17 05:53:56 +07:00
|
|
|
struct sctp_supported_ext_param *ext;
|
2007-10-09 15:15:59 +07:00
|
|
|
struct sctp_random_param *random;
|
|
|
|
struct sctp_chunks_param *chunks;
|
|
|
|
struct sctp_hmac_algo_param *hmac_algo;
|
2007-12-21 05:10:00 +07:00
|
|
|
struct sctp_addip_param *addip;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/* RFC 2960. Section 3.3.5 Heartbeat.
|
|
|
|
* Heartbeat Information: variable length
|
|
|
|
* The Sender-specific Heartbeat Info field should normally include
|
|
|
|
* information about the sender's current time when this HEARTBEAT
|
|
|
|
* chunk is sent and the destination transport address to which this
|
|
|
|
* HEARTBEAT is sent (see Section 8.3).
|
|
|
|
*/
|
2017-08-11 09:23:46 +07:00
|
|
|
struct sctp_sender_hb_info {
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_paramhdr param_hdr;
|
|
|
|
union sctp_addr daddr;
|
|
|
|
unsigned long sent_at;
|
2006-07-22 04:48:50 +07:00
|
|
|
__u64 hb_nonce;
|
2017-08-11 09:23:46 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2017-05-31 15:36:32 +07:00
|
|
|
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
|
|
|
|
gfp_t gfp);
|
2017-10-04 05:20:11 +07:00
|
|
|
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid);
|
2017-01-06 21:18:33 +07:00
|
|
|
void sctp_stream_free(struct sctp_stream *stream);
|
|
|
|
void sctp_stream_clear(struct sctp_stream *stream);
|
2017-05-31 15:36:31 +07:00
|
|
|
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* What is the current SSN number for this stream? */
|
2017-01-06 21:18:33 +07:00
|
|
|
#define sctp_ssn_peek(stream, type, sid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->ssn)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Return the next SSN number for this stream. */
|
2017-01-06 21:18:33 +07:00
|
|
|
#define sctp_ssn_next(stream, type, sid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->ssn++)
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Skip over this ssn and all below. */
|
2017-01-06 21:18:33 +07:00
|
|
|
#define sctp_ssn_skip(stream, type, sid, ssn) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->ssn = ssn + 1)
|
2017-01-06 21:18:33 +07:00
|
|
|
|
2017-12-08 20:04:02 +07:00
|
|
|
/* What is the current MID number for this stream? */
|
|
|
|
#define sctp_mid_peek(stream, type, sid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->mid)
|
2017-12-08 20:04:02 +07:00
|
|
|
|
|
|
|
/* Return the next MID number for this stream. */
|
|
|
|
#define sctp_mid_next(stream, type, sid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->mid++)
|
2017-12-08 20:04:02 +07:00
|
|
|
|
|
|
|
/* Skip over this mid and all below. */
|
|
|
|
#define sctp_mid_skip(stream, type, sid, mid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->mid = mid + 1)
|
2017-12-08 20:04:04 +07:00
|
|
|
|
2017-12-08 20:04:09 +07:00
|
|
|
/* What is the current MID_uo number for this stream? */
|
|
|
|
#define sctp_mid_uo_peek(stream, type, sid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->mid_uo)
|
2017-12-08 20:04:09 +07:00
|
|
|
|
|
|
|
/* Return the next MID_uo number for this stream. */
|
|
|
|
#define sctp_mid_uo_next(stream, type, sid) \
|
2018-08-11 00:11:42 +07:00
|
|
|
(sctp_stream_##type((stream), (sid))->mid_uo++)
|
2017-12-08 20:04:09 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/*
|
|
|
|
* Pointers to address related SCTP functions.
|
|
|
|
* (i.e. things that depend on the address family.)
|
|
|
|
*/
|
|
|
|
struct sctp_af {
|
|
|
|
int (*sctp_xmit) (struct sk_buff *skb,
|
2008-08-04 11:15:08 +07:00
|
|
|
struct sctp_transport *);
|
2005-04-17 05:20:36 +07:00
|
|
|
int (*setsockopt) (struct sock *sk,
|
|
|
|
int level,
|
|
|
|
int optname,
|
|
|
|
char __user *optval,
|
2009-10-01 06:12:20 +07:00
|
|
|
unsigned int optlen);
|
2005-04-17 05:20:36 +07:00
|
|
|
int (*getsockopt) (struct sock *sk,
|
|
|
|
int level,
|
|
|
|
int optname,
|
|
|
|
char __user *optval,
|
|
|
|
int __user *optlen);
|
2006-03-21 13:45:21 +07:00
|
|
|
int (*compat_setsockopt) (struct sock *sk,
|
|
|
|
int level,
|
|
|
|
int optname,
|
|
|
|
char __user *optval,
|
2009-10-01 06:12:20 +07:00
|
|
|
unsigned int optlen);
|
2006-03-21 13:45:21 +07:00
|
|
|
int (*compat_getsockopt) (struct sock *sk,
|
|
|
|
int level,
|
|
|
|
int optname,
|
|
|
|
char __user *optval,
|
|
|
|
int __user *optlen);
|
2011-04-27 04:54:17 +07:00
|
|
|
void (*get_dst) (struct sctp_transport *t,
|
2011-04-27 04:51:31 +07:00
|
|
|
union sctp_addr *saddr,
|
|
|
|
struct flowi *fl,
|
|
|
|
struct sock *sk);
|
2008-05-29 17:55:05 +07:00
|
|
|
void (*get_saddr) (struct sctp_sock *sk,
|
2011-04-27 04:51:31 +07:00
|
|
|
struct sctp_transport *t,
|
|
|
|
struct flowi *fl);
|
2005-04-17 05:20:36 +07:00
|
|
|
void (*copy_addrlist) (struct list_head *,
|
|
|
|
struct net_device *);
|
|
|
|
int (*cmp_addr) (const union sctp_addr *addr1,
|
|
|
|
const union sctp_addr *addr2);
|
|
|
|
void (*addr_copy) (union sctp_addr *dst,
|
|
|
|
union sctp_addr *src);
|
|
|
|
void (*from_skb) (union sctp_addr *,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
int saddr);
|
|
|
|
void (*from_sk) (union sctp_addr *,
|
|
|
|
struct sock *sk);
|
|
|
|
void (*from_addr_param) (union sctp_addr *,
|
|
|
|
union sctp_addr_param *,
|
2006-11-21 08:11:13 +07:00
|
|
|
__be16 port, int iif);
|
2005-04-17 05:20:36 +07:00
|
|
|
int (*to_addr_param) (const union sctp_addr *,
|
|
|
|
union sctp_addr_param *);
|
|
|
|
int (*addr_valid) (union sctp_addr *,
|
2006-06-18 12:55:35 +07:00
|
|
|
struct sctp_sock *,
|
|
|
|
const struct sk_buff *);
|
2017-08-05 18:59:54 +07:00
|
|
|
enum sctp_scope (*scope)(union sctp_addr *);
|
2006-11-21 08:24:53 +07:00
|
|
|
void (*inaddr_any) (union sctp_addr *, __be16);
|
2005-04-17 05:20:36 +07:00
|
|
|
int (*is_any) (const union sctp_addr *);
|
|
|
|
int (*available) (union sctp_addr *,
|
|
|
|
struct sctp_sock *);
|
|
|
|
int (*skb_iif) (const struct sk_buff *sk);
|
|
|
|
int (*is_ce) (const struct sk_buff *sk);
|
|
|
|
void (*seq_dump_addr)(struct seq_file *seq,
|
|
|
|
union sctp_addr *addr);
|
2008-06-05 02:40:15 +07:00
|
|
|
void (*ecn_capable)(struct sock *sk);
|
2005-04-17 05:20:36 +07:00
|
|
|
__u16 net_header_len;
|
|
|
|
int sockaddr_len;
|
2018-02-24 23:18:51 +07:00
|
|
|
int (*ip_options_len)(struct sock *sk);
|
2005-04-17 05:20:36 +07:00
|
|
|
sa_family_t sa_family;
|
|
|
|
struct list_head list;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct sctp_af *sctp_get_af_specific(sa_family_t);
|
|
|
|
int sctp_register_af(struct sctp_af *);
|
|
|
|
|
|
|
|
/* Protocol family functions. */
|
|
|
|
struct sctp_pf {
|
|
|
|
void (*event_msgname)(struct sctp_ulpevent *, char *, int *);
|
|
|
|
void (*skb_msgname) (struct sk_buff *, char *, int *);
|
|
|
|
int (*af_supported) (sa_family_t, struct sctp_sock *);
|
|
|
|
int (*cmp_addr) (const union sctp_addr *,
|
|
|
|
const union sctp_addr *,
|
|
|
|
struct sctp_sock *);
|
|
|
|
int (*bind_verify) (struct sctp_sock *, union sctp_addr *);
|
|
|
|
int (*send_verify) (struct sctp_sock *, union sctp_addr *);
|
2006-11-21 08:25:49 +07:00
|
|
|
int (*supported_addrs)(const struct sctp_sock *, __be16 *);
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sock *(*create_accept_sk) (struct sock *sk,
|
2017-03-09 15:09:05 +07:00
|
|
|
struct sctp_association *asoc,
|
|
|
|
bool kern);
|
2014-07-31 01:40:53 +07:00
|
|
|
int (*addr_to_user)(struct sctp_sock *sk, union sctp_addr *addr);
|
|
|
|
void (*to_sk_saddr)(union sctp_addr *, struct sock *sk);
|
|
|
|
void (*to_sk_daddr)(union sctp_addr *, struct sock *sk);
|
2018-02-24 23:18:51 +07:00
|
|
|
void (*copy_ip_options)(struct sock *sk, struct sock *newsk);
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_af *af;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* Structure to track chunk fragments that have been acked, but peer
|
|
|
|
* fragments of the same message have not.
|
|
|
|
*/
|
|
|
|
struct sctp_datamsg {
|
|
|
|
/* Chunks waiting to be submitted to lower layer. */
|
|
|
|
struct list_head chunks;
|
|
|
|
/* Reference counting. */
|
2017-07-04 19:53:25 +07:00
|
|
|
refcount_t refcnt;
|
2005-04-17 05:20:36 +07:00
|
|
|
/* When is this message no longer interesting to the peer? */
|
|
|
|
unsigned long expires_at;
|
|
|
|
/* Did the messenge fail to send? */
|
|
|
|
int send_error;
|
2010-05-01 09:41:10 +07:00
|
|
|
u8 send_failed:1,
|
2017-11-25 20:18:35 +07:00
|
|
|
can_delay:1, /* should this message be Nagle delayed */
|
|
|
|
abandoned:1; /* should this message be abandoned */
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
|
|
|
|
struct sctp_sndrcvinfo *,
|
2014-11-15 13:11:23 +07:00
|
|
|
struct iov_iter *);
|
2016-09-14 01:04:20 +07:00
|
|
|
void sctp_datamsg_free(struct sctp_datamsg *);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_datamsg_put(struct sctp_datamsg *);
|
|
|
|
void sctp_chunk_fail(struct sctp_chunk *, int error);
|
|
|
|
int sctp_chunk_abandoned(struct sctp_chunk *);
|
|
|
|
|
|
|
|
/* RFC2960 1.4 Key Terms
|
|
|
|
*
|
|
|
|
* o Chunk: A unit of information within an SCTP packet, consisting of
|
|
|
|
* a chunk header and chunk-specific content.
|
|
|
|
*
|
|
|
|
* As a matter of convenience, we remember the SCTP common header for
|
|
|
|
* each chunk as well as a few other header pointers...
|
|
|
|
*/
|
|
|
|
struct sctp_chunk {
|
2005-07-09 11:47:49 +07:00
|
|
|
struct list_head list;
|
|
|
|
|
2017-07-04 19:53:26 +07:00
|
|
|
refcount_t refcnt;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2016-09-29 01:37:26 +07:00
|
|
|
/* How many times this chunk have been sent, for prsctp RTX policy */
|
|
|
|
int sent_count;
|
|
|
|
|
sctp: introduce stream scheduler foundations
This patch introduces the hooks necessary to do stream scheduling, as
per RFC Draft ndata. It also introduces the first scheduler, which is
what we do today but now factored out: first come first served (FCFS).
With stream scheduling now we have to track which chunk was enqueued on
which stream and be able to select another other than the in front of
the main outqueue. So we introduce a list on sctp_stream_out_ext
structure for this purpose.
We reuse sctp_chunk->transmitted_list space for the list above, as the
chunk cannot belong to the two lists at the same time. By using the
union in there, we can have distinct names for these moments.
sctp_sched_ops are the operations expected to be implemented by each
scheduler. The dequeueing is a bit particular to this implementation but
it is to match how we dequeue packets today. We first dequeue and then
check if it fits the packet and if not, we requeue it at head. Thus why
we don't have a peek operation but have dequeue_done instead, which is
called once the chunk can be safely considered as transmitted.
The check removed from sctp_outq_flush is now performed by
sctp_stream_outq_migrate, which is only called during assoc setup.
(sctp_sendmsg() also checks for it)
The only operation that is foreseen but not yet added here is a way to
signalize that a new packet is starting or that the packet is done, for
round robin scheduler per packet, but is intentionally left to the
patch that actually implements it.
Support for I-DATA chunks, also described in this RFC, with user message
interleaving is straightforward as it just requires the schedulers to
probe for the feature and ignore datamsg boundaries when dequeueing.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:13 +07:00
|
|
|
union {
|
|
|
|
/* This is our link to the per-transport transmitted list. */
|
|
|
|
struct list_head transmitted_list;
|
|
|
|
/* List in specific stream outq */
|
|
|
|
struct list_head stream_list;
|
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This field is used by chunks that hold fragmented data.
|
|
|
|
* For the first fragment this is the list that holds the rest of
|
|
|
|
* fragments. For the remaining fragments, this is the link to the
|
|
|
|
* frag_list maintained in the first fragment.
|
|
|
|
*/
|
|
|
|
struct list_head frag_list;
|
|
|
|
|
|
|
|
/* This points to the sk_buff containing the actual data. */
|
|
|
|
struct sk_buff *skb;
|
|
|
|
|
2018-03-14 18:05:30 +07:00
|
|
|
union {
|
|
|
|
/* In case of GSO packets, this will store the head one */
|
|
|
|
struct sk_buff *head_skb;
|
|
|
|
/* In case of auth enabled, this will point to the shkey */
|
|
|
|
struct sctp_shared_key *shkey;
|
|
|
|
};
|
2016-06-03 01:05:43 +07:00
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* These are the SCTP headers by reverse order in a packet.
|
|
|
|
* Note that some of these may happen more than once. In that
|
|
|
|
* case, we point at the "current" one, whatever that means
|
|
|
|
* for that level of header.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* We point this at the FIRST TLV parameter to chunk_hdr. */
|
|
|
|
union sctp_params param_hdr;
|
|
|
|
union {
|
|
|
|
__u8 *v;
|
|
|
|
struct sctp_datahdr *data_hdr;
|
|
|
|
struct sctp_inithdr *init_hdr;
|
|
|
|
struct sctp_sackhdr *sack_hdr;
|
|
|
|
struct sctp_heartbeathdr *hb_hdr;
|
|
|
|
struct sctp_sender_hb_info *hbs_hdr;
|
|
|
|
struct sctp_shutdownhdr *shutdown_hdr;
|
|
|
|
struct sctp_signed_cookie *cookie_hdr;
|
|
|
|
struct sctp_ecnehdr *ecne_hdr;
|
|
|
|
struct sctp_cwrhdr *ecn_cwr_hdr;
|
|
|
|
struct sctp_errhdr *err_hdr;
|
|
|
|
struct sctp_addiphdr *addip_hdr;
|
|
|
|
struct sctp_fwdtsn_hdr *fwdtsn_hdr;
|
2007-10-09 15:15:59 +07:00
|
|
|
struct sctp_authhdr *auth_hdr;
|
2017-12-08 20:04:00 +07:00
|
|
|
struct sctp_idatahdr *idata_hdr;
|
2017-12-14 23:41:25 +07:00
|
|
|
struct sctp_ifwdtsn_hdr *ifwdtsn_hdr;
|
2005-04-17 05:20:36 +07:00
|
|
|
} subh;
|
|
|
|
|
|
|
|
__u8 *chunk_end;
|
|
|
|
|
|
|
|
struct sctp_chunkhdr *chunk_hdr;
|
|
|
|
struct sctphdr *sctp_hdr;
|
|
|
|
|
|
|
|
/* This needs to be recoverable for SCTP_SEND_FAILED events. */
|
|
|
|
struct sctp_sndrcvinfo sinfo;
|
|
|
|
|
|
|
|
/* Which association does this belong to? */
|
|
|
|
struct sctp_association *asoc;
|
|
|
|
|
|
|
|
/* What endpoint received this chunk? */
|
|
|
|
struct sctp_ep_common *rcvr;
|
|
|
|
|
|
|
|
/* We fill this in if we are calculating RTT. */
|
|
|
|
unsigned long sent_at;
|
|
|
|
|
|
|
|
/* What is the origin IP address for this chunk? */
|
|
|
|
union sctp_addr source;
|
|
|
|
/* Destination address for this chunk. */
|
|
|
|
union sctp_addr dest;
|
|
|
|
|
|
|
|
/* For outbound message, track all fragments for SEND_FAILED. */
|
|
|
|
struct sctp_datamsg *msg;
|
|
|
|
|
|
|
|
/* For an inbound chunk, this tells us where it came from.
|
|
|
|
* For an outbound chunk, it tells us where we'd like it to
|
|
|
|
* go. It is NULL if we have no preference.
|
|
|
|
*/
|
|
|
|
struct sctp_transport *transport;
|
|
|
|
|
2007-10-04 07:51:34 +07:00
|
|
|
/* SCTP-AUTH: For the special case inbound processing of COOKIE-ECHO
|
|
|
|
* we need save a pointer to the AUTH chunk, since the SCTP-AUTH
|
|
|
|
* spec violates the principle premis that all chunks are processed
|
|
|
|
* in order.
|
|
|
|
*/
|
|
|
|
struct sk_buff *auth_chunk;
|
|
|
|
|
2008-07-25 23:44:09 +07:00
|
|
|
#define SCTP_CAN_FRTX 0x0
|
|
|
|
#define SCTP_NEED_FRTX 0x1
|
|
|
|
#define SCTP_DONT_FRTX 0x2
|
|
|
|
__u16 rtt_in_progress:1, /* This chunk used for RTT calc? */
|
|
|
|
has_tsn:1, /* Does this chunk have a TSN yet? */
|
|
|
|
has_ssn:1, /* Does this chunk have a SSN yet? */
|
2017-12-08 20:04:02 +07:00
|
|
|
#define has_mid has_ssn
|
2008-07-25 23:44:09 +07:00
|
|
|
singleton:1, /* Only chunk in the packet? */
|
|
|
|
end_of_packet:1, /* Last chunk in the packet? */
|
|
|
|
ecn_ce_done:1, /* Have we processed the ECN CE bit? */
|
|
|
|
pdiscard:1, /* Discard the whole packet now? */
|
|
|
|
tsn_gap_acked:1, /* Is this chunk acked by a GAP ACK? */
|
|
|
|
data_accepted:1, /* At least 1 chunk accepted */
|
|
|
|
auth:1, /* IN: was auth'ed | OUT: needs auth */
|
|
|
|
has_asconf:1, /* IN: have seen an asconf before */
|
|
|
|
tsn_missing_report:2, /* Data chunk missing counter. */
|
|
|
|
fast_retransmit:2; /* Is this chunk fast retransmitted? */
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2016-10-08 10:36:05 +07:00
|
|
|
#define sctp_chunk_retransmitted(chunk) (chunk->sent_count > 1)
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_chunk_hold(struct sctp_chunk *);
|
|
|
|
void sctp_chunk_put(struct sctp_chunk *);
|
2014-11-15 13:11:23 +07:00
|
|
|
int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len,
|
|
|
|
struct iov_iter *from);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_chunk_free(struct sctp_chunk *);
|
|
|
|
void *sctp_addto_chunk(struct sctp_chunk *, int len, const void *data);
|
|
|
|
struct sctp_chunk *sctp_chunkify(struct sk_buff *,
|
|
|
|
const struct sctp_association *,
|
2016-03-11 04:33:07 +07:00
|
|
|
struct sock *, gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *,
|
|
|
|
union sctp_addr *);
|
|
|
|
const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
|
|
|
|
|
2017-10-04 05:20:12 +07:00
|
|
|
static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch)
|
|
|
|
{
|
|
|
|
return ntohs(ch->subh.data_hdr->stream);
|
|
|
|
}
|
|
|
|
|
2007-12-21 05:12:24 +07:00
|
|
|
enum {
|
|
|
|
SCTP_ADDR_NEW, /* new address added to assoc/ep */
|
|
|
|
SCTP_ADDR_SRC, /* address can be used as source */
|
|
|
|
SCTP_ADDR_DEL, /* address about to be deleted */
|
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* This is a structure for holding either an IPv6 or an IPv4 address. */
|
|
|
|
struct sctp_sockaddr_entry {
|
|
|
|
struct list_head list;
|
2007-09-17 06:02:12 +07:00
|
|
|
struct rcu_head rcu;
|
2005-04-17 05:20:36 +07:00
|
|
|
union sctp_addr a;
|
2007-12-21 05:12:24 +07:00
|
|
|
__u8 state;
|
2007-09-17 06:02:12 +07:00
|
|
|
__u8 valid;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2011-04-26 17:32:51 +07:00
|
|
|
#define SCTP_ADDRESS_TICK_DELAY 500
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* This structure holds lists of chunks as we are assembling for
|
|
|
|
* transmission.
|
|
|
|
*/
|
|
|
|
struct sctp_packet {
|
|
|
|
/* These are the SCTP header values (host order) for the packet. */
|
|
|
|
__u16 source_port;
|
|
|
|
__u16 destination_port;
|
|
|
|
__u32 vtag;
|
|
|
|
|
|
|
|
/* This contains the payload chunks. */
|
2005-07-09 11:47:49 +07:00
|
|
|
struct list_head chunk_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This is the overhead of the sctp and ip headers. */
|
|
|
|
size_t overhead;
|
|
|
|
/* This is the total size of all chunks INCLUDING padding. */
|
|
|
|
size_t size;
|
2016-06-03 01:05:43 +07:00
|
|
|
/* This is the maximum size this packet may have */
|
|
|
|
size_t max_size;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* The packet is destined for this transport address.
|
|
|
|
* The function we finally use to pass down to the next lower
|
|
|
|
* layer lives in the transport structure.
|
|
|
|
*/
|
|
|
|
struct sctp_transport *transport;
|
|
|
|
|
2007-10-09 15:15:59 +07:00
|
|
|
/* pointer to the auth chunk for this packet */
|
|
|
|
struct sctp_chunk *auth;
|
|
|
|
|
2009-09-05 05:21:01 +07:00
|
|
|
u8 has_cookie_echo:1, /* This packet contains a COOKIE-ECHO chunk. */
|
|
|
|
has_sack:1, /* This packet contains a SACK chunk. */
|
|
|
|
has_auth:1, /* This packet contains an AUTH chunk */
|
|
|
|
has_data:1, /* This packet contains at least 1 DATA chunk */
|
2013-04-19 04:59:37 +07:00
|
|
|
ipfragok:1; /* So let ip fragment this packet */
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2016-12-28 18:26:34 +07:00
|
|
|
void sctp_packet_init(struct sctp_packet *, struct sctp_transport *,
|
|
|
|
__u16 sport, __u16 dport);
|
|
|
|
void sctp_packet_config(struct sctp_packet *, __u32 vtag, int);
|
2017-08-05 18:59:57 +07:00
|
|
|
enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet,
|
|
|
|
struct sctp_chunk *chunk,
|
|
|
|
int one_packet, gfp_t gfp);
|
|
|
|
enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet,
|
|
|
|
struct sctp_chunk *chunk);
|
2016-03-11 04:33:07 +07:00
|
|
|
int sctp_packet_transmit(struct sctp_packet *, gfp_t);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_packet_free(struct sctp_packet *);
|
|
|
|
|
|
|
|
static inline int sctp_packet_empty(struct sctp_packet *packet)
|
|
|
|
{
|
2010-09-23 03:43:57 +07:00
|
|
|
return packet->size == packet->overhead;
|
2005-04-17 05:20:36 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* This represents a remote transport address.
|
|
|
|
* For local transport addresses, we just use union sctp_addr.
|
|
|
|
*
|
|
|
|
* RFC2960 Section 1.4 Key Terms
|
|
|
|
*
|
|
|
|
* o Transport address: A Transport Address is traditionally defined
|
|
|
|
* by Network Layer address, Transport Layer protocol and Transport
|
|
|
|
* Layer port number. In the case of SCTP running over IP, a
|
|
|
|
* transport address is defined by the combination of an IP address
|
|
|
|
* and an SCTP port number (where SCTP is the Transport protocol).
|
|
|
|
*
|
|
|
|
* RFC2960 Section 7.1 SCTP Differences from TCP Congestion control
|
|
|
|
*
|
|
|
|
* o The sender keeps a separate congestion control parameter set for
|
|
|
|
* each of the destination addresses it can send to (not each
|
|
|
|
* source-destination pair but for each destination). The parameters
|
|
|
|
* should decay if the address is not used for a long enough time
|
|
|
|
* period.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
struct sctp_transport {
|
|
|
|
/* A list of transports. */
|
|
|
|
struct list_head transports;
|
2016-11-15 22:23:11 +07:00
|
|
|
struct rhlist_head node;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Reference counting. */
|
2017-07-04 19:53:27 +07:00
|
|
|
refcount_t refcnt;
|
2010-05-01 09:41:09 +07:00
|
|
|
/* RTO-Pending : A flag used to track if one of the DATA
|
|
|
|
* chunks sent to this address is currently being
|
|
|
|
* used to compute a RTT. If this flag is 0,
|
|
|
|
* the next DATA chunk sent to this destination
|
|
|
|
* should be used to compute a RTT and this flag
|
|
|
|
* should be set. Every time the RTT
|
|
|
|
* calculation completes (i.e. the DATA chunk
|
|
|
|
* is SACK'd) clear this flag.
|
|
|
|
*/
|
2016-01-22 00:49:09 +07:00
|
|
|
__u32 rto_pending:1,
|
2010-05-01 09:41:09 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* hb_sent : a flag that signals that we have a pending
|
|
|
|
* heartbeat.
|
|
|
|
*/
|
|
|
|
hb_sent:1,
|
|
|
|
|
|
|
|
/* Is the Path MTU update pending on this tranport */
|
2015-11-30 21:17:06 +07:00
|
|
|
pmtu_pending:1,
|
2010-05-01 09:41:09 +07:00
|
|
|
|
2017-03-18 18:27:23 +07:00
|
|
|
dst_pending_confirm:1, /* need to confirm neighbour */
|
|
|
|
|
2015-11-30 21:17:06 +07:00
|
|
|
/* Has this transport moved the ctsn since we last sacked */
|
|
|
|
sack_generation:1;
|
2013-08-05 16:13:03 +07:00
|
|
|
u32 dst_cookie;
|
2012-06-30 10:04:26 +07:00
|
|
|
|
2011-05-07 06:32:47 +07:00
|
|
|
struct flowi fl;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This is the peer's IP address and port. */
|
|
|
|
union sctp_addr ipaddr;
|
|
|
|
|
|
|
|
/* These are the functions we call to handle LLP stuff. */
|
|
|
|
struct sctp_af *af_specific;
|
|
|
|
|
|
|
|
/* Which association do we belong to? */
|
|
|
|
struct sctp_association *asoc;
|
|
|
|
|
|
|
|
/* RFC2960
|
|
|
|
*
|
|
|
|
* 12.3 Per Transport Address Data
|
|
|
|
*
|
|
|
|
* For each destination transport address in the peer's
|
|
|
|
* address list derived from the INIT or INIT ACK chunk, a
|
|
|
|
* number of data elements needs to be maintained including:
|
|
|
|
*/
|
|
|
|
/* RTO : The current retransmission timeout value. */
|
2006-01-18 02:55:17 +07:00
|
|
|
unsigned long rto;
|
2007-10-25 02:59:16 +07:00
|
|
|
|
|
|
|
__u32 rtt; /* This is the most recent RTT. */
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* RTTVAR : The current RTT variation. */
|
|
|
|
__u32 rttvar;
|
|
|
|
|
|
|
|
/* SRTT : The current smoothed round trip time. */
|
|
|
|
__u32 srtt;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These are the congestion stats.
|
|
|
|
*/
|
|
|
|
/* cwnd : The current congestion window. */
|
|
|
|
__u32 cwnd; /* This is the actual cwnd. */
|
|
|
|
|
|
|
|
/* ssthresh : The current slow start threshold value. */
|
|
|
|
__u32 ssthresh;
|
|
|
|
|
|
|
|
/* partial : The tracking method for increase of cwnd when in
|
|
|
|
* bytes acked : congestion avoidance mode (see Section 6.2.2)
|
|
|
|
*/
|
|
|
|
__u32 partial_bytes_acked;
|
|
|
|
|
|
|
|
/* Data that has been sent, but not acknowledged. */
|
|
|
|
__u32 flight_size;
|
|
|
|
|
2009-11-24 03:54:00 +07:00
|
|
|
__u32 burst_limited; /* Holds old cwnd when max.burst is applied */
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Destination */
|
|
|
|
struct dst_entry *dst;
|
|
|
|
/* Source address. */
|
|
|
|
union sctp_addr saddr;
|
|
|
|
|
|
|
|
/* Heartbeat interval: The endpoint sends out a Heartbeat chunk to
|
|
|
|
* the destination address every heartbeat interval.
|
|
|
|
*/
|
2006-01-18 02:55:17 +07:00
|
|
|
unsigned long hbinterval;
|
2005-12-23 02:36:46 +07:00
|
|
|
|
|
|
|
/* SACK delay timeout */
|
2006-01-18 02:55:17 +07:00
|
|
|
unsigned long sackdelay;
|
2008-05-10 05:13:26 +07:00
|
|
|
__u32 sackfreq;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2018-10-15 18:58:29 +07:00
|
|
|
atomic_t mtu_info;
|
|
|
|
|
2014-06-11 23:19:30 +07:00
|
|
|
/* When was the last time that we heard from this transport? We use
|
|
|
|
* this to pick new active and retran paths.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2014-06-11 23:19:30 +07:00
|
|
|
ktime_t last_time_heard;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
sctp: avoid refreshing heartbeat timer too often
Currently on high rate SCTP streams the heartbeat timer refresh can
consume quite a lot of resources as timer updates are costly and it
contains a random factor, which a) is also costly and b) invalidates
mod_timer() optimization for not editing a timer to the same value.
It may even cause the timer to be slightly advanced, for no good reason.
As suggested by David Laight this patch now removes this timer update
from hot path by leaving the timer on and re-evaluating upon its
expiration if the heartbeat is still needed or not, similarly to what is
done for TCP. If it's not needed anymore the timer is re-scheduled to
the new timeout, considering the time already elapsed.
For this, we now record the last tx timestamp per transport, updated in
the same spots as hb timer was restarted on tx. Also split up
sctp_transport_reset_timers into sctp_transport_reset_t3_rtx and
sctp_transport_reset_hb_timer, so we can re-arm T3 without re-arming the
heartbeat one.
On loopback with MTU of 65535 and data chunks with 1636, so that we
have a considerable amount of chunks without stressing system calls,
netperf -t SCTP_STREAM -l 30, perf looked like this before:
Samples: 103K of event 'cpu-clock', Event count (approx.): 25833000000
Overhead Command Shared Object Symbol
+ 6,15% netperf [kernel.vmlinux] [k] copy_user_enhanced_fast_string
- 5,43% netperf [kernel.vmlinux] [k] _raw_write_unlock_irqrestore
- _raw_write_unlock_irqrestore
- 96,54% _raw_spin_unlock_irqrestore
- 36,14% mod_timer
+ 97,24% sctp_transport_reset_timers
+ 2,76% sctp_do_sm
+ 33,65% __wake_up_sync_key
+ 28,77% sctp_ulpq_tail_event
+ 1,40% del_timer
- 1,84% mod_timer
+ 99,03% sctp_transport_reset_timers
+ 0,97% sctp_do_sm
+ 1,50% sctp_ulpq_tail_event
And after this patch, now with netperf -l 60:
Samples: 230K of event 'cpu-clock', Event count (approx.): 57707250000
Overhead Command Shared Object Symbol
+ 5,65% netperf [kernel.vmlinux] [k] memcpy_erms
+ 5,59% netperf [kernel.vmlinux] [k] copy_user_enhanced_fast_string
- 5,05% netperf [kernel.vmlinux] [k] _raw_spin_unlock_irqrestore
- _raw_spin_unlock_irqrestore
+ 49,89% __wake_up_sync_key
+ 45,68% sctp_ulpq_tail_event
- 2,85% mod_timer
+ 76,51% sctp_transport_reset_t3_rtx
+ 23,49% sctp_do_sm
+ 1,55% del_timer
+ 2,50% netperf [sctp] [k] sctp_datamsg_from_user
+ 2,26% netperf [sctp] [k] sctp_sendmsg
Throughput-wise, from 6800mbps without the patch to 7050mbps with it,
~3.7%.
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-07 01:15:19 +07:00
|
|
|
/* When was the last time that we sent a chunk using this
|
|
|
|
* transport? We use this to check for idle transports
|
|
|
|
*/
|
|
|
|
unsigned long last_time_sent;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Last time(in jiffies) when cwnd is reduced due to the congestion
|
|
|
|
* indication based on ECNE chunk.
|
|
|
|
*/
|
|
|
|
unsigned long last_time_ecne_reduced;
|
|
|
|
|
2006-01-18 02:55:17 +07:00
|
|
|
/* This is the max_retrans value for the transport and will
|
|
|
|
* be initialized from the assocs value. This can be changed
|
2012-07-21 14:56:07 +07:00
|
|
|
* using the SCTP_SET_PEER_ADDR_PARAMS socket option.
|
2006-01-18 02:55:17 +07:00
|
|
|
*/
|
|
|
|
__u16 pathmaxrxt;
|
|
|
|
|
2018-07-02 17:21:12 +07:00
|
|
|
__u32 flowlabel;
|
|
|
|
__u8 dscp;
|
|
|
|
|
2012-07-21 14:56:07 +07:00
|
|
|
/* This is the partially failed retrans value for the transport
|
|
|
|
* and will be initialized from the assocs value. This can be changed
|
|
|
|
* using the SCTP_PEER_ADDR_THLDS socket option
|
|
|
|
*/
|
|
|
|
int pf_retrans;
|
2006-01-18 02:55:17 +07:00
|
|
|
/* PMTU : The current known path MTU. */
|
|
|
|
__u32 pathmtu;
|
|
|
|
|
2007-12-21 04:56:32 +07:00
|
|
|
/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
|
2006-01-18 02:55:17 +07:00
|
|
|
__u32 param_flags;
|
|
|
|
|
2005-06-21 03:14:57 +07:00
|
|
|
/* The number of times INIT has been sent on this transport. */
|
|
|
|
int init_sent_count;
|
|
|
|
|
|
|
|
/* state : The current state of this destination,
|
tree-wide: fix assorted typos all over the place
That is "success", "unknown", "through", "performance", "[re|un]mapping"
, "access", "default", "reasonable", "[con]currently", "temperature"
, "channel", "[un]used", "application", "example","hierarchy", "therefore"
, "[over|under]flow", "contiguous", "threshold", "enough" and others.
Signed-off-by: André Goddard Rosa <andre.goddard@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2009-11-14 22:09:05 +07:00
|
|
|
* : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKNOWN.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2005-06-21 03:14:57 +07:00
|
|
|
int state;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* These are the error stats for this destination. */
|
|
|
|
|
|
|
|
/* Error count : The current error count for this destination. */
|
|
|
|
unsigned short error_count;
|
|
|
|
|
|
|
|
/* Per : A timer used by each destination.
|
|
|
|
* Destination :
|
|
|
|
* Timer :
|
|
|
|
*
|
|
|
|
* [Everywhere else in the text this is called T3-rtx. -ed]
|
|
|
|
*/
|
|
|
|
struct timer_list T3_rtx_timer;
|
|
|
|
|
|
|
|
/* Heartbeat timer is per destination. */
|
|
|
|
struct timer_list hb_timer;
|
|
|
|
|
2010-05-06 14:56:07 +07:00
|
|
|
/* Timer to handle ICMP proto unreachable envets */
|
|
|
|
struct timer_list proto_unreach_timer;
|
|
|
|
|
2017-01-17 23:44:43 +07:00
|
|
|
/* Timer to handler reconf chunk rtx */
|
|
|
|
struct timer_list reconf_timer;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Since we're using per-destination retransmission timers
|
|
|
|
* (see above), we're also using per-destination "transmitted"
|
|
|
|
* queues. This probably ought to be a private struct
|
|
|
|
* accessible only within the outqueue, but it's not, yet.
|
|
|
|
*/
|
|
|
|
struct list_head transmitted;
|
|
|
|
|
|
|
|
/* We build bundle-able packets for this transport here. */
|
|
|
|
struct sctp_packet packet;
|
|
|
|
|
|
|
|
/* This is the list of transports that have chunks to send. */
|
|
|
|
struct list_head send_ready;
|
|
|
|
|
|
|
|
/* State information saved for SFR_CACC algorithm. The key
|
|
|
|
* idea in SFR_CACC is to maintain state at the sender on a
|
|
|
|
* per-destination basis when a changeover happens.
|
|
|
|
* char changeover_active;
|
|
|
|
* char cycling_changeover;
|
|
|
|
* __u32 next_tsn_at_change;
|
|
|
|
* char cacc_saw_newack;
|
|
|
|
*/
|
|
|
|
struct {
|
|
|
|
/* An unsigned integer, which stores the next TSN to be
|
|
|
|
* used by the sender, at the moment of changeover.
|
|
|
|
*/
|
|
|
|
__u32 next_tsn_at_change;
|
|
|
|
|
|
|
|
/* A flag which indicates the occurrence of a changeover */
|
|
|
|
char changeover_active;
|
|
|
|
|
|
|
|
/* A flag which indicates whether the change of primary is
|
|
|
|
* the first switch to this destination address during an
|
|
|
|
* active switch.
|
|
|
|
*/
|
|
|
|
char cycling_changeover;
|
|
|
|
|
|
|
|
/* A temporary flag, which is used during the processing of
|
|
|
|
* a SACK to estimate the causative TSN(s)'s group.
|
|
|
|
*/
|
|
|
|
char cacc_saw_newack;
|
|
|
|
} cacc;
|
2006-07-22 04:48:50 +07:00
|
|
|
|
|
|
|
/* 64-bit random number sent with heartbeat. */
|
|
|
|
__u64 hb_nonce;
|
2012-12-06 16:25:05 +07:00
|
|
|
|
|
|
|
struct rcu_head rcu;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
2012-08-07 14:26:14 +07:00
|
|
|
struct sctp_transport *sctp_transport_new(struct net *, const union sctp_addr *,
|
2005-10-07 13:46:04 +07:00
|
|
|
gfp_t);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_transport_set_owner(struct sctp_transport *,
|
|
|
|
struct sctp_association *);
|
|
|
|
void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
|
|
|
|
struct sctp_sock *);
|
2011-04-27 04:51:31 +07:00
|
|
|
void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_transport_free(struct sctp_transport *);
|
sctp: avoid refreshing heartbeat timer too often
Currently on high rate SCTP streams the heartbeat timer refresh can
consume quite a lot of resources as timer updates are costly and it
contains a random factor, which a) is also costly and b) invalidates
mod_timer() optimization for not editing a timer to the same value.
It may even cause the timer to be slightly advanced, for no good reason.
As suggested by David Laight this patch now removes this timer update
from hot path by leaving the timer on and re-evaluating upon its
expiration if the heartbeat is still needed or not, similarly to what is
done for TCP. If it's not needed anymore the timer is re-scheduled to
the new timeout, considering the time already elapsed.
For this, we now record the last tx timestamp per transport, updated in
the same spots as hb timer was restarted on tx. Also split up
sctp_transport_reset_timers into sctp_transport_reset_t3_rtx and
sctp_transport_reset_hb_timer, so we can re-arm T3 without re-arming the
heartbeat one.
On loopback with MTU of 65535 and data chunks with 1636, so that we
have a considerable amount of chunks without stressing system calls,
netperf -t SCTP_STREAM -l 30, perf looked like this before:
Samples: 103K of event 'cpu-clock', Event count (approx.): 25833000000
Overhead Command Shared Object Symbol
+ 6,15% netperf [kernel.vmlinux] [k] copy_user_enhanced_fast_string
- 5,43% netperf [kernel.vmlinux] [k] _raw_write_unlock_irqrestore
- _raw_write_unlock_irqrestore
- 96,54% _raw_spin_unlock_irqrestore
- 36,14% mod_timer
+ 97,24% sctp_transport_reset_timers
+ 2,76% sctp_do_sm
+ 33,65% __wake_up_sync_key
+ 28,77% sctp_ulpq_tail_event
+ 1,40% del_timer
- 1,84% mod_timer
+ 99,03% sctp_transport_reset_timers
+ 0,97% sctp_do_sm
+ 1,50% sctp_ulpq_tail_event
And after this patch, now with netperf -l 60:
Samples: 230K of event 'cpu-clock', Event count (approx.): 57707250000
Overhead Command Shared Object Symbol
+ 5,65% netperf [kernel.vmlinux] [k] memcpy_erms
+ 5,59% netperf [kernel.vmlinux] [k] copy_user_enhanced_fast_string
- 5,05% netperf [kernel.vmlinux] [k] _raw_spin_unlock_irqrestore
- _raw_spin_unlock_irqrestore
+ 49,89% __wake_up_sync_key
+ 45,68% sctp_ulpq_tail_event
- 2,85% mod_timer
+ 76,51% sctp_transport_reset_t3_rtx
+ 23,49% sctp_do_sm
+ 1,55% del_timer
+ 2,50% netperf [sctp] [k] sctp_datamsg_from_user
+ 2,26% netperf [sctp] [k] sctp_sendmsg
Throughput-wise, from 6800mbps without the patch to 7050mbps with it,
~3.7%.
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-04-07 01:15:19 +07:00
|
|
|
void sctp_transport_reset_t3_rtx(struct sctp_transport *);
|
|
|
|
void sctp_transport_reset_hb_timer(struct sctp_transport *);
|
2017-01-17 23:44:43 +07:00
|
|
|
void sctp_transport_reset_reconf_timer(struct sctp_transport *transport);
|
2016-01-22 00:49:07 +07:00
|
|
|
int sctp_transport_hold(struct sctp_transport *);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_transport_put(struct sctp_transport *);
|
|
|
|
void sctp_transport_update_rto(struct sctp_transport *, __u32);
|
|
|
|
void sctp_transport_raise_cwnd(struct sctp_transport *, __u32, __u32);
|
2017-08-05 18:59:51 +07:00
|
|
|
void sctp_transport_lower_cwnd(struct sctp_transport *t,
|
|
|
|
enum sctp_lower_cwnd reason);
|
2009-11-24 03:54:00 +07:00
|
|
|
void sctp_transport_burst_limited(struct sctp_transport *);
|
|
|
|
void sctp_transport_burst_reset(struct sctp_transport *);
|
2005-04-17 05:20:36 +07:00
|
|
|
unsigned long sctp_transport_timeout(struct sctp_transport *);
|
2017-04-04 12:39:55 +07:00
|
|
|
void sctp_transport_reset(struct sctp_transport *t);
|
sctp: fix the handling of ICMP Frag Needed for too small MTUs
syzbot reported a hang involving SCTP, on which it kept flooding dmesg
with the message:
[ 246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512
That happened because whenever SCTP hits an ICMP Frag Needed, it tries
to adjust to the new MTU and triggers an immediate retransmission. But
it didn't consider the fact that MTUs smaller than the SCTP minimum MTU
allowed (512) would not cause the PMTU to change, and issued the
retransmission anyway (thus leading to another ICMP Frag Needed, and so
on).
As IPv4 (ip_rt_min_pmtu=556) and IPv6 (IPV6_MIN_MTU=1280) minimum MTU
are higher than that, sctp_transport_update_pmtu() is changed to
re-fetch the PMTU that got set after our request, and with that, detect
if there was an actual change or not.
The fix, thus, skips the immediate retransmission if the received ICMP
resulted in no change, in the hope that SCTP will select another path.
Note: The value being used for the minimum MTU (512,
SCTP_DEFAULT_MINSEGMENT) is not right and instead it should be (576,
SCTP_MIN_PMTU), but such change belongs to another patch.
Changes from v1:
- do not disable PMTU discovery, in the light of commit
06ad391919b2 ("[SCTP] Don't disable PMTU discovery when mtu is small")
and as suggested by Xin Long.
- changed the way to break the rtx loop by detecting if the icmp
resulted in a change or not
Changes from v2:
none
See-also: https://lkml.org/lkml/2017/12/22/811
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-01-05 20:17:18 +07:00
|
|
|
bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
|
2011-06-17 09:03:23 +07:00
|
|
|
void sctp_transport_immediate_rtx(struct sctp_transport *);
|
2017-02-07 04:14:13 +07:00
|
|
|
void sctp_transport_dst_release(struct sctp_transport *t);
|
|
|
|
void sctp_transport_dst_confirm(struct sctp_transport *t);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
|
|
|
|
/* This is the structure we use to queue packets as they come into
|
|
|
|
* SCTP. We write packets to it and read chunks from it.
|
|
|
|
*/
|
|
|
|
struct sctp_inq {
|
|
|
|
/* This is actually a queue of sctp_chunk each
|
|
|
|
* containing a partially decoded packet.
|
|
|
|
*/
|
2005-07-09 11:47:49 +07:00
|
|
|
struct list_head in_chunk_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
/* This is the packet which is currently off the in queue and is
|
|
|
|
* being worked on through the inbound chunk processing.
|
|
|
|
*/
|
|
|
|
struct sctp_chunk *in_progress;
|
|
|
|
|
|
|
|
/* This is the delayed task to finish delivering inbound
|
|
|
|
* messages.
|
|
|
|
*/
|
|
|
|
struct work_struct immediate;
|
|
|
|
};
|
|
|
|
|
|
|
|
void sctp_inq_init(struct sctp_inq *);
|
|
|
|
void sctp_inq_free(struct sctp_inq *);
|
|
|
|
void sctp_inq_push(struct sctp_inq *, struct sctp_chunk *packet);
|
|
|
|
struct sctp_chunk *sctp_inq_pop(struct sctp_inq *);
|
2007-10-04 07:51:34 +07:00
|
|
|
struct sctp_chunkhdr *sctp_inq_peek(struct sctp_inq *);
|
2006-11-22 21:57:56 +07:00
|
|
|
void sctp_inq_set_th_handler(struct sctp_inq *, work_func_t);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* This is the structure we use to hold outbound chunks. You push
|
|
|
|
* chunks in and they automatically pop out the other end as bundled
|
|
|
|
* packets (it calls (*output_handler)()).
|
|
|
|
*
|
|
|
|
* This structure covers sections 6.3, 6.4, 6.7, 6.8, 6.10, 7., 8.1,
|
|
|
|
* and 8.2 of the v13 draft.
|
|
|
|
*
|
|
|
|
* It handles retransmissions. The connection to the timeout portion
|
|
|
|
* of the state machine is through sctp_..._timeout() and timeout_handler.
|
|
|
|
*
|
|
|
|
* If you feed it SACKs, it will eat them.
|
|
|
|
*
|
|
|
|
* If you give it big chunks, it will fragment them.
|
|
|
|
*
|
|
|
|
* It assigns TSN's to data chunks. This happens at the last possible
|
|
|
|
* instant before transmission.
|
|
|
|
*
|
|
|
|
* When free()'d, it empties itself out via output_handler().
|
|
|
|
*/
|
|
|
|
struct sctp_outq {
|
|
|
|
struct sctp_association *asoc;
|
|
|
|
|
|
|
|
/* Data pending that has never been transmitted. */
|
2005-07-09 11:47:49 +07:00
|
|
|
struct list_head out_chunk_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
sctp: introduce stream scheduler foundations
This patch introduces the hooks necessary to do stream scheduling, as
per RFC Draft ndata. It also introduces the first scheduler, which is
what we do today but now factored out: first come first served (FCFS).
With stream scheduling now we have to track which chunk was enqueued on
which stream and be able to select another other than the in front of
the main outqueue. So we introduce a list on sctp_stream_out_ext
structure for this purpose.
We reuse sctp_chunk->transmitted_list space for the list above, as the
chunk cannot belong to the two lists at the same time. By using the
union in there, we can have distinct names for these moments.
sctp_sched_ops are the operations expected to be implemented by each
scheduler. The dequeueing is a bit particular to this implementation but
it is to match how we dequeue packets today. We first dequeue and then
check if it fits the packet and if not, we requeue it at head. Thus why
we don't have a peek operation but have dequeue_done instead, which is
called once the chunk can be safely considered as transmitted.
The check removed from sctp_outq_flush is now performed by
sctp_stream_outq_migrate, which is only called during assoc setup.
(sctp_sendmsg() also checks for it)
The only operation that is foreseen but not yet added here is a way to
signalize that a new packet is starting or that the packet is done, for
round robin scheduler per packet, but is intentionally left to the
patch that actually implements it.
Support for I-DATA chunks, also described in this RFC, with user message
interleaving is straightforward as it just requires the schedulers to
probe for the feature and ignore datamsg boundaries when dequeueing.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:13 +07:00
|
|
|
/* Stream scheduler being used */
|
|
|
|
struct sctp_sched_ops *sched;
|
|
|
|
|
2012-04-15 12:58:06 +07:00
|
|
|
unsigned int out_qlen; /* Total length of queued data chunks. */
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Error of send failed, may used in SCTP_SEND_FAILED event. */
|
2012-04-15 12:58:06 +07:00
|
|
|
unsigned int error;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* These are control chunks we want to send. */
|
2005-07-09 11:47:49 +07:00
|
|
|
struct list_head control_chunk_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* These are chunks that have been sacked but are above the
|
|
|
|
* CTSN, or cumulative tsn ack point.
|
|
|
|
*/
|
|
|
|
struct list_head sacked;
|
|
|
|
|
|
|
|
/* Put chunks on this list to schedule them for
|
|
|
|
* retransmission.
|
|
|
|
*/
|
|
|
|
struct list_head retransmit;
|
|
|
|
|
|
|
|
/* Put chunks on this list to save them for FWD TSN processing as
|
|
|
|
* they were abandoned.
|
|
|
|
*/
|
|
|
|
struct list_head abandoned;
|
|
|
|
|
|
|
|
/* How many unackd bytes do we have in-flight? */
|
|
|
|
__u32 outstanding_bytes;
|
|
|
|
|
2008-06-05 02:39:11 +07:00
|
|
|
/* Are we doing fast-rtx on this queue */
|
|
|
|
char fast_rtx;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Corked? */
|
|
|
|
char cork;
|
|
|
|
};
|
|
|
|
|
|
|
|
void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
|
|
|
|
void sctp_outq_teardown(struct sctp_outq *);
|
|
|
|
void sctp_outq_free(struct sctp_outq*);
|
2016-09-14 01:04:22 +07:00
|
|
|
void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
|
2012-10-03 12:43:22 +07:00
|
|
|
int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
|
2005-04-17 05:20:36 +07:00
|
|
|
int sctp_outq_is_empty(const struct sctp_outq *);
|
|
|
|
void sctp_outq_restart(struct sctp_outq *);
|
|
|
|
|
2017-08-05 18:59:52 +07:00
|
|
|
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
|
|
|
|
enum sctp_retransmit_reason reason);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
|
2016-09-14 01:04:22 +07:00
|
|
|
void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
|
sctp: implement prsctp PRIO policy
prsctp PRIO policy is a policy to abandon lower priority chunks when
asoc doesn't have enough snd buffer, so that the current chunk with
higher priority can be queued successfully.
Similar to TTL/RTX policy, we will set the priority of the chunk to
prsctp_param with sinfo->sinfo_timetolive in sctp_set_prsctp_policy().
So if PRIO policy is enabled, msg->expire_at won't work.
asoc->sent_cnt_removable will record how many chunks can be checked to
remove. If priority policy is enabled, when the chunk is queued into
the out_queue, we will increase sent_cnt_removable. When the chunk is
moved to abandon_queue or dequeue and free, we will decrease
sent_cnt_removable.
In sctp_sendmsg, we will check if there is enough snd buffer for current
msg and if sent_cnt_removable is not 0. Then try to abandon chunks in
sctp_prune_prsctp when sendmsg from the retransmit/transmited queue, and
free chunks from out_queue in right order until the abandon+free size >
msg_len - sctp_wfree. For the abandon size, we have to wait until it
sends FORWARD TSN, receives the sack and the chunks are really freed.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-07-09 18:47:45 +07:00
|
|
|
void sctp_prsctp_prune(struct sctp_association *asoc,
|
|
|
|
struct sctp_sndrcvinfo *sinfo, int msg_len);
|
2017-12-14 23:41:26 +07:00
|
|
|
void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Uncork and flush an outqueue. */
|
|
|
|
static inline void sctp_outq_cork(struct sctp_outq *q)
|
|
|
|
{
|
|
|
|
q->cork = 1;
|
|
|
|
}
|
|
|
|
|
2016-07-14 01:08:55 +07:00
|
|
|
/* SCTP skb control block.
|
|
|
|
* sctp_input_cb is currently used on rx and sock rx queue
|
|
|
|
*/
|
|
|
|
struct sctp_input_cb {
|
|
|
|
union {
|
|
|
|
struct inet_skb_parm h4;
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
struct inet6_skb_parm h6;
|
|
|
|
#endif
|
|
|
|
} header;
|
|
|
|
struct sctp_chunk *chunk;
|
2016-07-14 01:08:58 +07:00
|
|
|
struct sctp_af *af;
|
2016-07-14 01:08:55 +07:00
|
|
|
};
|
|
|
|
#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0]))
|
|
|
|
|
2018-06-14 06:37:02 +07:00
|
|
|
struct sctp_output_cb {
|
|
|
|
struct sk_buff *last;
|
|
|
|
};
|
|
|
|
#define SCTP_OUTPUT_CB(__skb) ((struct sctp_output_cb *)&((__skb)->cb[0]))
|
|
|
|
|
2016-07-14 01:08:57 +07:00
|
|
|
static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb)
|
|
|
|
{
|
|
|
|
const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
|
|
|
|
|
|
|
|
return chunk->head_skb ? : skb;
|
|
|
|
}
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* These bind address data fields common between endpoints and associations */
|
|
|
|
struct sctp_bind_addr {
|
|
|
|
|
|
|
|
/* RFC 2960 12.1 Parameters necessary for the SCTP instance
|
|
|
|
*
|
|
|
|
* SCTP Port: The local SCTP port number the endpoint is
|
|
|
|
* bound to.
|
|
|
|
*/
|
|
|
|
__u16 port;
|
|
|
|
|
|
|
|
/* RFC 2960 12.1 Parameters necessary for the SCTP instance
|
|
|
|
*
|
|
|
|
* Address List: The list of IP addresses that this instance
|
|
|
|
* has bound. This information is passed to one's
|
|
|
|
* peer(s) in INIT and INIT ACK chunks.
|
|
|
|
*/
|
|
|
|
struct list_head address_list;
|
|
|
|
};
|
|
|
|
|
|
|
|
void sctp_bind_addr_init(struct sctp_bind_addr *, __u16 port);
|
|
|
|
void sctp_bind_addr_free(struct sctp_bind_addr *);
|
2012-08-06 15:42:04 +07:00
|
|
|
int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
|
2005-04-17 05:20:36 +07:00
|
|
|
const struct sctp_bind_addr *src,
|
2017-08-05 18:59:54 +07:00
|
|
|
enum sctp_scope scope, gfp_t gfp,
|
2005-07-12 10:57:47 +07:00
|
|
|
int flags);
|
2007-12-07 13:50:54 +07:00
|
|
|
int sctp_bind_addr_dup(struct sctp_bind_addr *dest,
|
|
|
|
const struct sctp_bind_addr *src,
|
|
|
|
gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
|
2016-03-08 20:34:28 +07:00
|
|
|
int new_size, __u8 addr_state, gfp_t gfp);
|
2007-10-25 03:10:00 +07:00
|
|
|
int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
|
2005-04-17 05:20:36 +07:00
|
|
|
int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
|
|
|
|
struct sctp_sock *);
|
2008-07-19 13:05:40 +07:00
|
|
|
int sctp_bind_addr_conflict(struct sctp_bind_addr *, const union sctp_addr *,
|
|
|
|
struct sctp_sock *, struct sctp_sock *);
|
2007-12-21 05:12:59 +07:00
|
|
|
int sctp_bind_addr_state(const struct sctp_bind_addr *bp,
|
|
|
|
const union sctp_addr *addr);
|
2005-04-17 05:20:36 +07:00
|
|
|
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
|
|
|
|
const union sctp_addr *addrs,
|
|
|
|
int addrcnt,
|
|
|
|
struct sctp_sock *opt);
|
|
|
|
union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
|
2005-07-12 10:57:47 +07:00
|
|
|
int *addrs_len,
|
2005-10-07 13:46:04 +07:00
|
|
|
gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
|
2005-10-07 13:46:04 +07:00
|
|
|
__u16 port, gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2017-08-05 18:59:54 +07:00
|
|
|
enum sctp_scope sctp_scope(const union sctp_addr *addr);
|
|
|
|
int sctp_in_scope(struct net *net, const union sctp_addr *addr,
|
|
|
|
const enum sctp_scope scope);
|
2008-08-18 21:34:34 +07:00
|
|
|
int sctp_is_any(struct sock *sk, const union sctp_addr *addr);
|
2011-04-26 17:32:51 +07:00
|
|
|
int sctp_is_ep_boundall(struct sock *sk);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
|
|
|
|
/* What type of endpoint? */
|
2017-08-11 09:23:47 +07:00
|
|
|
enum sctp_endpoint_type {
|
2005-04-17 05:20:36 +07:00
|
|
|
SCTP_EP_TYPE_SOCKET,
|
|
|
|
SCTP_EP_TYPE_ASSOCIATION,
|
2017-08-11 09:23:47 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* A common base class to bridge the implmentation view of a
|
|
|
|
* socket (usually listening) endpoint versus an association's
|
|
|
|
* local endpoint.
|
|
|
|
* This common structure is useful for several purposes:
|
|
|
|
* 1) Common interface for lookup routines.
|
|
|
|
* a) Subfunctions work for either endpoint or association
|
|
|
|
* b) Single interface to lookup allows hiding the lookup lock rather
|
|
|
|
* than acquiring it externally.
|
|
|
|
* 2) Common interface for the inbound chunk handling/state machine.
|
|
|
|
* 3) Common object handling routines for reference counting, etc.
|
|
|
|
* 4) Disentangle association lookup from endpoint lookup, where we
|
|
|
|
* do not have to find our endpoint to find our association.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct sctp_ep_common {
|
|
|
|
/* Fields to help us manage our entries in the hash tables. */
|
2007-11-09 23:43:40 +07:00
|
|
|
struct hlist_node node;
|
2005-04-17 05:20:36 +07:00
|
|
|
int hashent;
|
|
|
|
|
|
|
|
/* Runtime type information. What kind of endpoint is this? */
|
2017-08-11 09:23:47 +07:00
|
|
|
enum sctp_endpoint_type type;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Some fields to help us manage this object.
|
|
|
|
* refcnt - Reference count access to this object.
|
|
|
|
* dead - Do not attempt to use this object.
|
|
|
|
*/
|
2017-07-04 19:53:28 +07:00
|
|
|
refcount_t refcnt;
|
2013-04-15 10:27:18 +07:00
|
|
|
bool dead;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* What socket does this endpoint belong to? */
|
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
/* This is where we receive inbound chunks. */
|
|
|
|
struct sctp_inq inqueue;
|
|
|
|
|
|
|
|
/* This substructure includes the defining parameters of the
|
|
|
|
* endpoint:
|
|
|
|
* bind_addr.port is our shared port number.
|
|
|
|
* bind_addr.address_list is our set of local IP addresses.
|
|
|
|
*/
|
|
|
|
struct sctp_bind_addr bind_addr;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* RFC Section 1.4 Key Terms
|
|
|
|
*
|
|
|
|
* o SCTP endpoint: The logical sender/receiver of SCTP packets. On a
|
|
|
|
* multi-homed host, an SCTP endpoint is represented to its peers as a
|
|
|
|
* combination of a set of eligible destination transport addresses to
|
|
|
|
* which SCTP packets can be sent and a set of eligible source
|
|
|
|
* transport addresses from which SCTP packets can be received.
|
|
|
|
* All transport addresses used by an SCTP endpoint must use the
|
|
|
|
* same port number, but can use multiple IP addresses. A transport
|
|
|
|
* address used by an SCTP endpoint must not be used by another
|
|
|
|
* SCTP endpoint. In other words, a transport address is unique
|
|
|
|
* to an SCTP endpoint.
|
|
|
|
*
|
|
|
|
* From an implementation perspective, each socket has one of these.
|
|
|
|
* A TCP-style socket will have exactly one association on one of
|
|
|
|
* these. An UDP-style socket will have multiple associations hanging
|
|
|
|
* off one of these.
|
|
|
|
*/
|
|
|
|
|
|
|
|
struct sctp_endpoint {
|
|
|
|
/* Common substructure for endpoint and association. */
|
|
|
|
struct sctp_ep_common base;
|
|
|
|
|
|
|
|
/* Associations: A list of current associations and mappings
|
|
|
|
* to the data consumers for each association. This
|
|
|
|
* may be in the form of a hash table or other
|
|
|
|
* implementation dependent structure. The data
|
|
|
|
* consumers may be process identification
|
|
|
|
* information such as file descriptors, named pipe
|
|
|
|
* pointer, or table pointers dependent on how SCTP
|
|
|
|
* is implemented.
|
|
|
|
*/
|
|
|
|
/* This is really a list of struct sctp_association entries. */
|
|
|
|
struct list_head asocs;
|
|
|
|
|
|
|
|
/* Secret Key: A secret key used by this endpoint to compute
|
|
|
|
* the MAC. This SHOULD be a cryptographic quality
|
|
|
|
* random number with a sufficient length.
|
|
|
|
* Discussion in [RFC1750] can be helpful in
|
|
|
|
* selection of the key.
|
|
|
|
*/
|
2013-02-12 12:15:33 +07:00
|
|
|
__u8 secret_key[SCTP_SECRET_SIZE];
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-01-18 02:55:57 +07:00
|
|
|
/* digest: This is a digest of the sctp cookie. This field is
|
|
|
|
* only used on the receive path when we try to validate
|
|
|
|
* that the cookie has not been tampered with. We put
|
|
|
|
* this here so we pre-allocate this once and can re-use
|
|
|
|
* on every receive.
|
|
|
|
*/
|
2006-11-10 07:29:57 +07:00
|
|
|
__u8 *digest;
|
2006-01-18 02:55:57 +07:00
|
|
|
|
2005-04-29 02:02:04 +07:00
|
|
|
/* sendbuf acct. policy. */
|
|
|
|
__u32 sndbuf_policy;
|
2005-11-12 07:08:24 +07:00
|
|
|
|
|
|
|
/* rcvbuf acct. policy. */
|
|
|
|
__u32 rcvbuf_policy;
|
2007-10-09 15:15:59 +07:00
|
|
|
|
|
|
|
/* SCTP AUTH: array of the HMACs that will be allocated
|
|
|
|
* we need this per association so that we don't serialize
|
|
|
|
*/
|
2016-01-24 20:20:12 +07:00
|
|
|
struct crypto_shash **auth_hmacs;
|
2007-10-09 15:15:59 +07:00
|
|
|
|
|
|
|
/* SCTP-AUTH: hmacs for the endpoint encoded into parameter */
|
|
|
|
struct sctp_hmac_algo_param *auth_hmacs_list;
|
|
|
|
|
|
|
|
/* SCTP-AUTH: chunks to authenticate encoded into parameter */
|
|
|
|
struct sctp_chunks_param *auth_chunk_list;
|
|
|
|
|
|
|
|
/* SCTP-AUTH: endpoint shared keys */
|
|
|
|
struct list_head endpoint_shared_keys;
|
|
|
|
__u16 active_key_id;
|
2016-07-09 18:47:40 +07:00
|
|
|
__u8 auth_enable:1,
|
2017-01-17 23:44:45 +07:00
|
|
|
prsctp_enable:1,
|
|
|
|
reconf_enable:1;
|
2017-01-17 23:44:46 +07:00
|
|
|
|
|
|
|
__u8 strreset_enable;
|
2018-02-14 03:56:24 +07:00
|
|
|
|
|
|
|
/* Security identifiers from incoming (INIT). These are set by
|
|
|
|
* security_sctp_assoc_request(). These will only be used by
|
|
|
|
* SCTP TCP type sockets and peeled off connections as they
|
|
|
|
* cause a new socket to be generated. security_sctp_sk_clone()
|
|
|
|
* will then plug these into the new socket.
|
|
|
|
*/
|
|
|
|
|
|
|
|
u32 secid;
|
|
|
|
u32 peer_secid;
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Recover the outter endpoint structure. */
|
|
|
|
static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base)
|
|
|
|
{
|
|
|
|
struct sctp_endpoint *ep;
|
|
|
|
|
|
|
|
ep = container_of(base, struct sctp_endpoint, base);
|
|
|
|
return ep;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* These are function signatures for manipulating endpoints. */
|
2005-10-07 13:46:04 +07:00
|
|
|
struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_endpoint_free(struct sctp_endpoint *);
|
|
|
|
void sctp_endpoint_put(struct sctp_endpoint *);
|
|
|
|
void sctp_endpoint_hold(struct sctp_endpoint *);
|
|
|
|
void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *);
|
|
|
|
struct sctp_association *sctp_endpoint_lookup_assoc(
|
|
|
|
const struct sctp_endpoint *ep,
|
|
|
|
const union sctp_addr *paddr,
|
|
|
|
struct sctp_transport **);
|
2018-03-26 15:55:00 +07:00
|
|
|
bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
|
|
|
|
const union sctp_addr *paddr);
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
|
2012-08-06 15:40:21 +07:00
|
|
|
struct net *, const union sctp_addr *);
|
2018-03-26 15:55:00 +07:00
|
|
|
bool sctp_has_association(struct net *net, const union sctp_addr *laddr,
|
|
|
|
const union sctp_addr *paddr);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
net: sctp: cache auth_enable per endpoint
Currently, it is possible to create an SCTP socket, then switch
auth_enable via sysctl setting to 1 and crash the system on connect:
Oops[#1]:
CPU: 0 PID: 0 Comm: swapper Not tainted 3.14.1-mipsgit-20140415 #1
task: ffffffff8056ce80 ti: ffffffff8055c000 task.ti: ffffffff8055c000
[...]
Call Trace:
[<ffffffff8043c4e8>] sctp_auth_asoc_set_default_hmac+0x68/0x80
[<ffffffff8042b300>] sctp_process_init+0x5e0/0x8a4
[<ffffffff8042188c>] sctp_sf_do_5_1B_init+0x234/0x34c
[<ffffffff804228c8>] sctp_do_sm+0xb4/0x1e8
[<ffffffff80425a08>] sctp_endpoint_bh_rcv+0x1c4/0x214
[<ffffffff8043af68>] sctp_rcv+0x588/0x630
[<ffffffff8043e8e8>] sctp6_rcv+0x10/0x24
[<ffffffff803acb50>] ip6_input+0x2c0/0x440
[<ffffffff8030fc00>] __netif_receive_skb_core+0x4a8/0x564
[<ffffffff80310650>] process_backlog+0xb4/0x18c
[<ffffffff80313cbc>] net_rx_action+0x12c/0x210
[<ffffffff80034254>] __do_softirq+0x17c/0x2ac
[<ffffffff800345e0>] irq_exit+0x54/0xb0
[<ffffffff800075a4>] ret_from_irq+0x0/0x4
[<ffffffff800090ec>] rm7k_wait_irqoff+0x24/0x48
[<ffffffff8005e388>] cpu_startup_entry+0xc0/0x148
[<ffffffff805a88b0>] start_kernel+0x37c/0x398
Code: dd0900b8 000330f8 0126302d <dcc60000> 50c0fff1 0047182a a48306a0
03e00008 00000000
---[ end trace b530b0551467f2fd ]---
Kernel panic - not syncing: Fatal exception in interrupt
What happens while auth_enable=0 in that case is, that
ep->auth_hmacs is initialized to NULL in sctp_auth_init_hmacs()
when endpoint is being created.
After that point, if an admin switches over to auth_enable=1,
the machine can crash due to NULL pointer dereference during
reception of an INIT chunk. When we enter sctp_process_init()
via sctp_sf_do_5_1B_init() in order to respond to an INIT chunk,
the INIT verification succeeds and while we walk and process
all INIT params via sctp_process_param() we find that
net->sctp.auth_enable is set, therefore do not fall through,
but invoke sctp_auth_asoc_set_default_hmac() instead, and thus,
dereference what we have set to NULL during endpoint
initialization phase.
The fix is to make auth_enable immutable by caching its value
during endpoint initialization, so that its original value is
being carried along until destruction. The bug seems to originate
from the very first days.
Fix in joint work with Daniel Borkmann.
Reported-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Tested-by: Joshua Kinard <kumba@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-17 22:26:50 +07:00
|
|
|
int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
|
|
|
|
const struct sctp_association *asoc,
|
2017-06-30 10:52:22 +07:00
|
|
|
enum sctp_cid cid, struct sctp_init_chunk *peer_init,
|
2012-08-07 14:29:08 +07:00
|
|
|
struct sctp_chunk *chunk, struct sctp_chunk **err_chunk);
|
2011-04-20 04:30:51 +07:00
|
|
|
int sctp_process_init(struct sctp_association *, struct sctp_chunk *chunk,
|
2005-04-17 05:20:36 +07:00
|
|
|
const union sctp_addr *peer,
|
2017-06-30 10:52:22 +07:00
|
|
|
struct sctp_init_chunk *init, gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
__u32 sctp_generate_tag(const struct sctp_endpoint *);
|
|
|
|
__u32 sctp_generate_tsn(const struct sctp_endpoint *);
|
|
|
|
|
2006-11-21 08:25:32 +07:00
|
|
|
struct sctp_inithdr_host {
|
|
|
|
__u32 init_tag;
|
|
|
|
__u32 a_rwnd;
|
|
|
|
__u16 num_outbound_streams;
|
|
|
|
__u16 num_inbound_streams;
|
|
|
|
__u32 initial_tsn;
|
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
sctp: introduce priority based stream scheduler
This patch introduces RFC Draft ndata section 3.4 Priority Based
Scheduler (SCTP_SS_PRIO).
It works by having a struct sctp_stream_priority for each priority
configured. This struct is then enlisted on a queue ordered per priority
if, and only if, there is a stream with data queued, so that dequeueing
is very straightforward: either finish current datamsg or simply dequeue
from the highest priority queued, which is the next stream pointed, and
that's it.
If there are multiple streams assigned with the same priority and with
data queued, it will do round robin amongst them while respecting
datamsgs boundaries (when not using idata chunks), to be reasonably
fair.
We intentionally don't maintain a list of priorities nor a list of all
streams with the same priority to save memory. The first would mean at
least 2 other pointers per priority (which, for 1000 priorities, that
can mean 16kB) and the second would also mean 2 other pointers but per
stream. As SCTP supports up to 65535 streams on a given asoc, that's
1MB. This impacts when giving a priority to some stream, as we have to
find out if the new priority is already being used and if we can free
the old one, and also when tearing down.
The new fields in struct sctp_stream_out_ext and sctp_stream are added
under a union because that memory is to be shared with other schedulers.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:16 +07:00
|
|
|
struct sctp_stream_priorities {
|
|
|
|
/* List of priorities scheduled */
|
|
|
|
struct list_head prio_sched;
|
|
|
|
/* List of streams scheduled */
|
|
|
|
struct list_head active;
|
|
|
|
/* The next stream stream in line */
|
|
|
|
struct sctp_stream_out_ext *next;
|
|
|
|
__u16 prio;
|
|
|
|
};
|
|
|
|
|
2017-10-04 05:20:11 +07:00
|
|
|
struct sctp_stream_out_ext {
|
|
|
|
__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
|
|
|
|
__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
|
sctp: introduce stream scheduler foundations
This patch introduces the hooks necessary to do stream scheduling, as
per RFC Draft ndata. It also introduces the first scheduler, which is
what we do today but now factored out: first come first served (FCFS).
With stream scheduling now we have to track which chunk was enqueued on
which stream and be able to select another other than the in front of
the main outqueue. So we introduce a list on sctp_stream_out_ext
structure for this purpose.
We reuse sctp_chunk->transmitted_list space for the list above, as the
chunk cannot belong to the two lists at the same time. By using the
union in there, we can have distinct names for these moments.
sctp_sched_ops are the operations expected to be implemented by each
scheduler. The dequeueing is a bit particular to this implementation but
it is to match how we dequeue packets today. We first dequeue and then
check if it fits the packet and if not, we requeue it at head. Thus why
we don't have a peek operation but have dequeue_done instead, which is
called once the chunk can be safely considered as transmitted.
The check removed from sctp_outq_flush is now performed by
sctp_stream_outq_migrate, which is only called during assoc setup.
(sctp_sendmsg() also checks for it)
The only operation that is foreseen but not yet added here is a way to
signalize that a new packet is starting or that the packet is done, for
round robin scheduler per packet, but is intentionally left to the
patch that actually implements it.
Support for I-DATA chunks, also described in this RFC, with user message
interleaving is straightforward as it just requires the schedulers to
probe for the feature and ignore datamsg boundaries when dequeueing.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:13 +07:00
|
|
|
struct list_head outq; /* chunks enqueued by this stream */
|
sctp: introduce priority based stream scheduler
This patch introduces RFC Draft ndata section 3.4 Priority Based
Scheduler (SCTP_SS_PRIO).
It works by having a struct sctp_stream_priority for each priority
configured. This struct is then enlisted on a queue ordered per priority
if, and only if, there is a stream with data queued, so that dequeueing
is very straightforward: either finish current datamsg or simply dequeue
from the highest priority queued, which is the next stream pointed, and
that's it.
If there are multiple streams assigned with the same priority and with
data queued, it will do round robin amongst them while respecting
datamsgs boundaries (when not using idata chunks), to be reasonably
fair.
We intentionally don't maintain a list of priorities nor a list of all
streams with the same priority to save memory. The first would mean at
least 2 other pointers per priority (which, for 1000 priorities, that
can mean 16kB) and the second would also mean 2 other pointers but per
stream. As SCTP supports up to 65535 streams on a given asoc, that's
1MB. This impacts when giving a priority to some stream, as we have to
find out if the new priority is already being used and if we can free
the old one, and also when tearing down.
The new fields in struct sctp_stream_out_ext and sctp_stream are added
under a union because that memory is to be shared with other schedulers.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:16 +07:00
|
|
|
union {
|
|
|
|
struct {
|
|
|
|
/* Scheduled streams list */
|
|
|
|
struct list_head prio_list;
|
|
|
|
struct sctp_stream_priorities *prio_head;
|
|
|
|
};
|
2017-10-04 05:20:17 +07:00
|
|
|
/* Fields used by RR scheduler */
|
|
|
|
struct {
|
|
|
|
struct list_head rr_list;
|
|
|
|
};
|
sctp: introduce priority based stream scheduler
This patch introduces RFC Draft ndata section 3.4 Priority Based
Scheduler (SCTP_SS_PRIO).
It works by having a struct sctp_stream_priority for each priority
configured. This struct is then enlisted on a queue ordered per priority
if, and only if, there is a stream with data queued, so that dequeueing
is very straightforward: either finish current datamsg or simply dequeue
from the highest priority queued, which is the next stream pointed, and
that's it.
If there are multiple streams assigned with the same priority and with
data queued, it will do round robin amongst them while respecting
datamsgs boundaries (when not using idata chunks), to be reasonably
fair.
We intentionally don't maintain a list of priorities nor a list of all
streams with the same priority to save memory. The first would mean at
least 2 other pointers per priority (which, for 1000 priorities, that
can mean 16kB) and the second would also mean 2 other pointers but per
stream. As SCTP supports up to 65535 streams on a given asoc, that's
1MB. This impacts when giving a priority to some stream, as we have to
find out if the new priority is already being used and if we can free
the old one, and also when tearing down.
The new fields in struct sctp_stream_out_ext and sctp_stream are added
under a union because that memory is to be shared with other schedulers.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:16 +07:00
|
|
|
};
|
2017-10-04 05:20:11 +07:00
|
|
|
};
|
|
|
|
|
2017-01-06 21:18:33 +07:00
|
|
|
struct sctp_stream_out {
|
2017-12-08 20:04:02 +07:00
|
|
|
union {
|
|
|
|
__u32 mid;
|
|
|
|
__u16 ssn;
|
|
|
|
};
|
2017-12-08 20:04:09 +07:00
|
|
|
__u32 mid_uo;
|
2017-10-04 05:20:11 +07:00
|
|
|
struct sctp_stream_out_ext *ext;
|
2017-12-08 20:04:09 +07:00
|
|
|
__u8 state;
|
2017-01-06 21:18:33 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct sctp_stream_in {
|
2017-12-08 20:04:03 +07:00
|
|
|
union {
|
|
|
|
__u32 mid;
|
|
|
|
__u16 ssn;
|
|
|
|
};
|
2017-12-08 20:04:09 +07:00
|
|
|
__u32 mid_uo;
|
2017-12-08 20:04:03 +07:00
|
|
|
__u32 fsn;
|
2017-12-08 20:04:09 +07:00
|
|
|
__u32 fsn_uo;
|
2017-12-08 20:04:04 +07:00
|
|
|
char pd_mode;
|
2017-12-08 20:04:09 +07:00
|
|
|
char pd_mode_uo;
|
2017-01-06 21:18:33 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
struct sctp_stream {
|
2018-08-11 00:11:43 +07:00
|
|
|
struct flex_array *out;
|
|
|
|
struct flex_array *in;
|
2017-01-06 21:18:33 +07:00
|
|
|
__u16 outcnt;
|
|
|
|
__u16 incnt;
|
sctp: introduce stream scheduler foundations
This patch introduces the hooks necessary to do stream scheduling, as
per RFC Draft ndata. It also introduces the first scheduler, which is
what we do today but now factored out: first come first served (FCFS).
With stream scheduling now we have to track which chunk was enqueued on
which stream and be able to select another other than the in front of
the main outqueue. So we introduce a list on sctp_stream_out_ext
structure for this purpose.
We reuse sctp_chunk->transmitted_list space for the list above, as the
chunk cannot belong to the two lists at the same time. By using the
union in there, we can have distinct names for these moments.
sctp_sched_ops are the operations expected to be implemented by each
scheduler. The dequeueing is a bit particular to this implementation but
it is to match how we dequeue packets today. We first dequeue and then
check if it fits the packet and if not, we requeue it at head. Thus why
we don't have a peek operation but have dequeue_done instead, which is
called once the chunk can be safely considered as transmitted.
The check removed from sctp_outq_flush is now performed by
sctp_stream_outq_migrate, which is only called during assoc setup.
(sctp_sendmsg() also checks for it)
The only operation that is foreseen but not yet added here is a way to
signalize that a new packet is starting or that the packet is done, for
round robin scheduler per packet, but is intentionally left to the
patch that actually implements it.
Support for I-DATA chunks, also described in this RFC, with user message
interleaving is straightforward as it just requires the schedulers to
probe for the feature and ignore datamsg boundaries when dequeueing.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:13 +07:00
|
|
|
/* Current stream being sent, if any */
|
|
|
|
struct sctp_stream_out *out_curr;
|
sctp: introduce priority based stream scheduler
This patch introduces RFC Draft ndata section 3.4 Priority Based
Scheduler (SCTP_SS_PRIO).
It works by having a struct sctp_stream_priority for each priority
configured. This struct is then enlisted on a queue ordered per priority
if, and only if, there is a stream with data queued, so that dequeueing
is very straightforward: either finish current datamsg or simply dequeue
from the highest priority queued, which is the next stream pointed, and
that's it.
If there are multiple streams assigned with the same priority and with
data queued, it will do round robin amongst them while respecting
datamsgs boundaries (when not using idata chunks), to be reasonably
fair.
We intentionally don't maintain a list of priorities nor a list of all
streams with the same priority to save memory. The first would mean at
least 2 other pointers per priority (which, for 1000 priorities, that
can mean 16kB) and the second would also mean 2 other pointers but per
stream. As SCTP supports up to 65535 streams on a given asoc, that's
1MB. This impacts when giving a priority to some stream, as we have to
find out if the new priority is already being used and if we can free
the old one, and also when tearing down.
The new fields in struct sctp_stream_out_ext and sctp_stream are added
under a union because that memory is to be shared with other schedulers.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:16 +07:00
|
|
|
union {
|
|
|
|
/* Fields used by priority scheduler */
|
|
|
|
struct {
|
|
|
|
/* List of priorities scheduled */
|
|
|
|
struct list_head prio_list;
|
|
|
|
};
|
2017-10-04 05:20:17 +07:00
|
|
|
/* Fields used by RR scheduler */
|
|
|
|
struct {
|
|
|
|
/* List of streams scheduled */
|
|
|
|
struct list_head rr_list;
|
|
|
|
/* The next stream stream in line */
|
|
|
|
struct sctp_stream_out_ext *rr_next;
|
|
|
|
};
|
sctp: introduce priority based stream scheduler
This patch introduces RFC Draft ndata section 3.4 Priority Based
Scheduler (SCTP_SS_PRIO).
It works by having a struct sctp_stream_priority for each priority
configured. This struct is then enlisted on a queue ordered per priority
if, and only if, there is a stream with data queued, so that dequeueing
is very straightforward: either finish current datamsg or simply dequeue
from the highest priority queued, which is the next stream pointed, and
that's it.
If there are multiple streams assigned with the same priority and with
data queued, it will do round robin amongst them while respecting
datamsgs boundaries (when not using idata chunks), to be reasonably
fair.
We intentionally don't maintain a list of priorities nor a list of all
streams with the same priority to save memory. The first would mean at
least 2 other pointers per priority (which, for 1000 priorities, that
can mean 16kB) and the second would also mean 2 other pointers but per
stream. As SCTP supports up to 65535 streams on a given asoc, that's
1MB. This impacts when giving a priority to some stream, as we have to
find out if the new priority is already being used and if we can free
the old one, and also when tearing down.
The new fields in struct sctp_stream_out_ext and sctp_stream are added
under a union because that memory is to be shared with other schedulers.
See-also: https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-13
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-04 05:20:16 +07:00
|
|
|
};
|
2017-12-08 20:04:01 +07:00
|
|
|
struct sctp_stream_interleave *si;
|
2017-01-06 21:18:33 +07:00
|
|
|
};
|
|
|
|
|
2018-08-11 00:11:42 +07:00
|
|
|
static inline struct sctp_stream_out *sctp_stream_out(
|
|
|
|
const struct sctp_stream *stream,
|
|
|
|
__u16 sid)
|
|
|
|
{
|
2018-08-11 00:11:43 +07:00
|
|
|
return flex_array_get(stream->out, sid);
|
2018-08-11 00:11:42 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct sctp_stream_in *sctp_stream_in(
|
|
|
|
const struct sctp_stream *stream,
|
|
|
|
__u16 sid)
|
|
|
|
{
|
2018-08-11 00:11:43 +07:00
|
|
|
return flex_array_get(stream->in, sid);
|
2018-08-11 00:11:42 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
#define SCTP_SO(s, i) sctp_stream_out((s), (i))
|
|
|
|
#define SCTP_SI(s, i) sctp_stream_in((s), (i))
|
|
|
|
|
2017-01-06 21:18:33 +07:00
|
|
|
#define SCTP_STREAM_CLOSED 0x00
|
|
|
|
#define SCTP_STREAM_OPEN 0x01
|
|
|
|
|
2017-12-08 20:04:01 +07:00
|
|
|
static inline __u16 sctp_datachk_len(const struct sctp_stream *stream)
|
|
|
|
{
|
|
|
|
return stream->si->data_chunk_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __u16 sctp_datahdr_len(const struct sctp_stream *stream)
|
|
|
|
{
|
|
|
|
return stream->si->data_chunk_len - sizeof(struct sctp_chunkhdr);
|
|
|
|
}
|
|
|
|
|
sctp: implement validate_ftsn for sctp_stream_interleave
validate_ftsn is added as a member of sctp_stream_interleave, used to
validate ssn/chunk type for fwdtsn or mid (message id)/chunk type for
ifwdtsn, called in sctp_sf_eat_fwd_tsn, just as validate_data.
If this check fails, an abort packet will be sent, as said in section
2.3.1 of RFC8260.
As ifwdtsn and fwdtsn chunks have different length, it also defines
ftsn_chunk_len for sctp_stream_interleave to describe the chunk size.
Then it replaces all sizeof(struct sctp_fwdtsn_chunk) with
sctp_ftsnchk_len.
It also adds the process for ifwdtsn in rx path. As Marcelo pointed
out, there's no need to add event table for ifwdtsn, but just share
prsctp_chunk_event_table with fwdtsn's. It would drop fwdtsn chunk
for ifwdtsn and drop ifwdtsn chunk for fwdtsn by calling validate_ftsn
in sctp_sf_eat_fwd_tsn.
After this patch, the ifwdtsn can be accepted.
Note that this patch also removes the sctp.intl_enable check for
idata chunks in sctp_chunk_event_lookup, as it will do this check
in validate_data later.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo R. Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-12-14 23:41:27 +07:00
|
|
|
static inline __u16 sctp_ftsnchk_len(const struct sctp_stream *stream)
|
|
|
|
{
|
|
|
|
return stream->si->ftsn_chunk_len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __u16 sctp_ftsnhdr_len(const struct sctp_stream *stream)
|
|
|
|
{
|
|
|
|
return stream->si->ftsn_chunk_len - sizeof(struct sctp_chunkhdr);
|
|
|
|
}
|
|
|
|
|
2012-12-01 11:49:42 +07:00
|
|
|
/* SCTP_GET_ASSOC_STATS counters */
|
|
|
|
struct sctp_priv_assoc_stats {
|
|
|
|
/* Maximum observed rto in the association during subsequent
|
|
|
|
* observations. Value is set to 0 if no RTO measurement took place
|
|
|
|
* The transport where the max_rto was observed is returned in
|
|
|
|
* obs_rto_ipaddr
|
|
|
|
*/
|
|
|
|
struct sockaddr_storage obs_rto_ipaddr;
|
|
|
|
__u64 max_obs_rto;
|
|
|
|
/* Total In and Out SACKs received and sent */
|
|
|
|
__u64 isacks;
|
|
|
|
__u64 osacks;
|
|
|
|
/* Total In and Out packets received and sent */
|
|
|
|
__u64 opackets;
|
|
|
|
__u64 ipackets;
|
|
|
|
/* Total retransmitted chunks */
|
|
|
|
__u64 rtxchunks;
|
|
|
|
/* TSN received > next expected */
|
|
|
|
__u64 outofseqtsns;
|
|
|
|
/* Duplicate Chunks received */
|
|
|
|
__u64 idupchunks;
|
|
|
|
/* Gap Ack Blocks received */
|
|
|
|
__u64 gapcnt;
|
|
|
|
/* Unordered data chunks sent and received */
|
|
|
|
__u64 ouodchunks;
|
|
|
|
__u64 iuodchunks;
|
|
|
|
/* Ordered data chunks sent and received */
|
|
|
|
__u64 oodchunks;
|
|
|
|
__u64 iodchunks;
|
|
|
|
/* Control chunks sent and received */
|
|
|
|
__u64 octrlchunks;
|
|
|
|
__u64 ictrlchunks;
|
|
|
|
};
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* RFC2960
|
|
|
|
*
|
|
|
|
* 12. Recommended Transmission Control Block (TCB) Parameters
|
|
|
|
*
|
|
|
|
* This section details a recommended set of parameters that should
|
|
|
|
* be contained within the TCB for an implementation. This section is
|
|
|
|
* for illustrative purposes and should not be deemed as requirements
|
|
|
|
* on an implementation or as an exhaustive list of all parameters
|
|
|
|
* inside an SCTP TCB. Each implementation may need its own additional
|
|
|
|
* parameters for optimization.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* Here we have information about each individual association. */
|
|
|
|
struct sctp_association {
|
|
|
|
|
|
|
|
/* A base structure common to endpoint and association.
|
|
|
|
* In this context, it represents the associations's view
|
|
|
|
* of the local endpoint of the association.
|
|
|
|
*/
|
|
|
|
struct sctp_ep_common base;
|
|
|
|
|
|
|
|
/* Associations on the same socket. */
|
|
|
|
struct list_head asocs;
|
|
|
|
|
|
|
|
/* association id. */
|
|
|
|
sctp_assoc_t assoc_id;
|
|
|
|
|
|
|
|
/* This is our parent endpoint. */
|
|
|
|
struct sctp_endpoint *ep;
|
|
|
|
|
|
|
|
/* These are those association elements needed in the cookie. */
|
|
|
|
struct sctp_cookie c;
|
|
|
|
|
|
|
|
/* This is all information about our peer. */
|
|
|
|
struct {
|
|
|
|
/* transport_addr_list
|
|
|
|
*
|
|
|
|
* Peer : A list of SCTP transport addresses that the
|
|
|
|
* Transport : peer is bound to. This information is derived
|
|
|
|
* Address : from the INIT or INIT ACK and is used to
|
|
|
|
* List : associate an inbound packet with a given
|
|
|
|
* : association. Normally this information is
|
|
|
|
* : hashed or keyed for quick lookup and access
|
|
|
|
* : of the TCB.
|
2005-06-21 03:14:57 +07:00
|
|
|
* : The list is also initialized with the list
|
|
|
|
* : of addresses passed with the sctp_connectx()
|
|
|
|
* : call.
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
|
|
|
* It is a list of SCTP_transport's.
|
|
|
|
*/
|
|
|
|
struct list_head transport_addr_list;
|
|
|
|
|
2013-12-13 12:51:04 +07:00
|
|
|
/* rwnd
|
|
|
|
*
|
|
|
|
* Peer Rwnd : Current calculated value of the peer's rwnd.
|
|
|
|
*/
|
|
|
|
__u32 rwnd;
|
|
|
|
|
2005-06-21 03:14:57 +07:00
|
|
|
/* transport_count
|
|
|
|
*
|
|
|
|
* Peer : A count of the number of peer addresses
|
|
|
|
* Transport : in the Peer Transport Address List.
|
|
|
|
* Address :
|
|
|
|
* Count :
|
|
|
|
*/
|
|
|
|
__u16 transport_count;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* port
|
|
|
|
* The transport layer port number.
|
|
|
|
*/
|
|
|
|
__u16 port;
|
|
|
|
|
|
|
|
/* primary_path
|
|
|
|
*
|
|
|
|
* Primary : This is the current primary destination
|
|
|
|
* Path : transport address of the peer endpoint. It
|
|
|
|
* : may also specify a source transport address
|
|
|
|
* : on this endpoint.
|
|
|
|
*
|
|
|
|
* All of these paths live on transport_addr_list.
|
|
|
|
*
|
|
|
|
* At the bakeoffs, we discovered that the intent of
|
|
|
|
* primaryPath is that it only changes when the ULP
|
|
|
|
* asks to have it changed. We add the activePath to
|
|
|
|
* designate the connection we are currently using to
|
|
|
|
* transmit new data and most control chunks.
|
|
|
|
*/
|
|
|
|
struct sctp_transport *primary_path;
|
|
|
|
|
|
|
|
/* Cache the primary path address here, when we
|
|
|
|
* need a an address for msg_name.
|
|
|
|
*/
|
|
|
|
union sctp_addr primary_addr;
|
|
|
|
|
|
|
|
/* active_path
|
|
|
|
* The path that we are currently using to
|
|
|
|
* transmit new data and most control chunks.
|
|
|
|
*/
|
|
|
|
struct sctp_transport *active_path;
|
|
|
|
|
|
|
|
/* retran_path
|
|
|
|
*
|
|
|
|
* RFC2960 6.4 Multi-homed SCTP Endpoints
|
|
|
|
* ...
|
|
|
|
* Furthermore, when its peer is multi-homed, an
|
|
|
|
* endpoint SHOULD try to retransmit a chunk to an
|
|
|
|
* active destination transport address that is
|
|
|
|
* different from the last destination address to
|
|
|
|
* which the DATA chunk was sent.
|
|
|
|
*/
|
|
|
|
struct sctp_transport *retran_path;
|
|
|
|
|
|
|
|
/* Pointer to last transport I have sent on. */
|
|
|
|
struct sctp_transport *last_sent_to;
|
|
|
|
|
|
|
|
/* This is the last transport I have received DATA on. */
|
|
|
|
struct sctp_transport *last_data_from;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Mapping An array of bits or bytes indicating which out of
|
|
|
|
* Array order TSN's have been received (relative to the
|
|
|
|
* Last Rcvd TSN). If no gaps exist, i.e. no out of
|
|
|
|
* order packets have been received, this array
|
|
|
|
* will be set to all zero. This structure may be
|
|
|
|
* in the form of a circular buffer or bit array.
|
|
|
|
*
|
|
|
|
* Last Rcvd : This is the last TSN received in
|
|
|
|
* TSN : sequence. This value is set initially by
|
|
|
|
* : taking the peer's Initial TSN, received in
|
|
|
|
* : the INIT or INIT ACK chunk, and subtracting
|
|
|
|
* : one from it.
|
|
|
|
*
|
|
|
|
* Throughout most of the specification this is called the
|
|
|
|
* "Cumulative TSN ACK Point". In this case, we
|
|
|
|
* ignore the advice in 12.2 in favour of the term
|
|
|
|
* used in the bulk of the text. This value is hidden
|
|
|
|
* in tsn_map--we get it by calling sctp_tsnmap_get_ctsn().
|
|
|
|
*/
|
|
|
|
struct sctp_tsnmap tsn_map;
|
|
|
|
|
2013-12-13 12:51:04 +07:00
|
|
|
/* This mask is used to disable sending the ASCONF chunk
|
|
|
|
* with specified parameter to peer.
|
|
|
|
*/
|
|
|
|
__be16 addip_disabled_mask;
|
|
|
|
|
|
|
|
/* These are capabilities which our peer advertised. */
|
|
|
|
__u8 ecn_capable:1, /* Can peer do ECN? */
|
|
|
|
ipv4_address:1, /* Peer understands IPv4 addresses? */
|
|
|
|
ipv6_address:1, /* Peer understands IPv6 addresses? */
|
|
|
|
hostname_address:1, /* Peer understands DNS addresses? */
|
|
|
|
asconf_capable:1, /* Does peer support ADDIP? */
|
|
|
|
prsctp_capable:1, /* Can peer do PR-SCTP? */
|
2017-01-17 23:44:45 +07:00
|
|
|
reconf_capable:1, /* Can peer do RE-CONFIG? */
|
2013-12-13 12:51:04 +07:00
|
|
|
auth_capable:1; /* Is peer doing SCTP-AUTH? */
|
|
|
|
|
2015-11-30 21:17:06 +07:00
|
|
|
/* sack_needed : This flag indicates if the next received
|
2005-04-17 05:20:36 +07:00
|
|
|
* : packet is to be responded to with a
|
2015-11-30 21:17:06 +07:00
|
|
|
* : SACK. This is initialized to 0. When a packet
|
|
|
|
* : is received sack_cnt is incremented. If this value
|
2005-04-17 05:20:36 +07:00
|
|
|
* : reaches 2 or more, a SACK is sent and the
|
|
|
|
* : value is reset to 0. Note: This is used only
|
|
|
|
* : when no DATA chunks are received out of
|
|
|
|
* : order. When DATA chunks are out of order,
|
|
|
|
* : SACK's are not delayed (see Section 6).
|
|
|
|
*/
|
2015-11-30 21:17:06 +07:00
|
|
|
__u8 sack_needed:1, /* Do we need to sack the peer? */
|
sctp: start t5 timer only when peer rwnd is 0 and local state is SHUTDOWN_PENDING
when A sends a data to B, then A close() and enter into SHUTDOWN_PENDING
state, if B neither claim his rwnd is 0 nor send SACK for this data, A
will keep retransmitting this data until t5 timeout, Max.Retrans times
can't work anymore, which is bad.
if B's rwnd is not 0, it should send abort after Max.Retrans times, only
when B's rwnd == 0 and A's retransmitting beyonds Max.Retrans times, A
will start t5 timer, which is also commit f8d960524328 ("sctp: Enforce
retransmission limit during shutdown") means, but it lacks the condition
peer rwnd == 0.
so fix it by adding a bit (zero_window_announced) in peer to record if
the last rwnd is 0. If it was, zero_window_announced will be set. and use
this bit to decide if start t5 timer when local.state is SHUTDOWN_PENDING.
Fixes: commit f8d960524328 ("sctp: Enforce retransmission limit during shutdown")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-12-05 14:35:36 +07:00
|
|
|
sack_generation:1,
|
|
|
|
zero_window_announced:1;
|
2008-05-10 05:13:26 +07:00
|
|
|
__u32 sack_cnt;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-12-21 07:07:04 +07:00
|
|
|
__u32 adaptation_ind; /* Adaptation Code point. */
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2006-11-21 08:25:32 +07:00
|
|
|
struct sctp_inithdr_host i;
|
2005-04-17 05:20:36 +07:00
|
|
|
void *cookie;
|
2013-12-13 12:51:04 +07:00
|
|
|
int cookie_len;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* ADDIP Section 4.2 Upon reception of an ASCONF Chunk.
|
|
|
|
* C1) ... "Peer-Serial-Number'. This value MUST be initialized to the
|
|
|
|
* Initial TSN Value minus 1
|
|
|
|
*/
|
|
|
|
__u32 addip_serial;
|
2007-10-09 15:15:59 +07:00
|
|
|
|
|
|
|
/* SCTP-AUTH: We need to know pears random number, hmac list
|
|
|
|
* and authenticated chunk list. All that is part of the
|
|
|
|
* cookie and these are just pointers to those locations
|
|
|
|
*/
|
2017-07-17 10:29:57 +07:00
|
|
|
struct sctp_random_param *peer_random;
|
2017-07-17 10:29:58 +07:00
|
|
|
struct sctp_chunks_param *peer_chunks;
|
2017-07-17 10:29:59 +07:00
|
|
|
struct sctp_hmac_algo_param *peer_hmacs;
|
2005-04-17 05:20:36 +07:00
|
|
|
} peer;
|
|
|
|
|
|
|
|
/* State : A state variable indicating what state the
|
|
|
|
* : association is in, i.e. COOKIE-WAIT,
|
|
|
|
* : COOKIE-ECHOED, ESTABLISHED, SHUTDOWN-PENDING,
|
|
|
|
* : SHUTDOWN-SENT, SHUTDOWN-RECEIVED, SHUTDOWN-ACK-SENT.
|
|
|
|
*
|
|
|
|
* Note: No "CLOSED" state is illustrated since if a
|
|
|
|
* association is "CLOSED" its TCB SHOULD be removed.
|
|
|
|
*
|
|
|
|
* In this implementation we DO have a CLOSED
|
|
|
|
* state which is used during initiation and shutdown.
|
|
|
|
*
|
|
|
|
* State takes values from SCTP_STATE_*.
|
|
|
|
*/
|
2017-08-05 18:59:59 +07:00
|
|
|
enum sctp_state state;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Overall : The overall association error count.
|
|
|
|
* Error Count : [Clear this any time I get something.]
|
|
|
|
*/
|
|
|
|
int overall_error_count;
|
|
|
|
|
2013-12-13 12:51:04 +07:00
|
|
|
/* The cookie life I award for any cookie. */
|
|
|
|
ktime_t cookie_life;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* These are the association's initial, max, and min RTO values.
|
|
|
|
* These values will be initialized by system defaults, but can
|
|
|
|
* be modified via the SCTP_RTOINFO socket option.
|
|
|
|
*/
|
2006-01-18 02:55:17 +07:00
|
|
|
unsigned long rto_initial;
|
|
|
|
unsigned long rto_max;
|
|
|
|
unsigned long rto_min;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Maximum number of new data packets that can be sent in a burst. */
|
|
|
|
int max_burst;
|
|
|
|
|
|
|
|
/* This is the max_retrans value for the association. This value will
|
|
|
|
* be initialized initialized from system defaults, but can be
|
|
|
|
* modified by the SCTP_ASSOCINFO socket option.
|
|
|
|
*/
|
|
|
|
int max_retrans;
|
|
|
|
|
2012-07-21 14:56:07 +07:00
|
|
|
/* This is the partially failed retrans value for the transport
|
|
|
|
* and will be initialized from the assocs value. This can be
|
|
|
|
* changed using the SCTP_PEER_ADDR_THLDS socket option
|
|
|
|
*/
|
|
|
|
int pf_retrans;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Maximum number of times the endpoint will retransmit INIT */
|
|
|
|
__u16 max_init_attempts;
|
|
|
|
|
|
|
|
/* How many times have we resent an INIT? */
|
|
|
|
__u16 init_retries;
|
|
|
|
|
|
|
|
/* The largest timeout or RTO value to use in attempting an INIT */
|
2006-01-18 02:55:17 +07:00
|
|
|
unsigned long max_init_timeo;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-12-23 02:36:46 +07:00
|
|
|
/* Heartbeat interval: The endpoint sends out a Heartbeat chunk to
|
|
|
|
* the destination address every heartbeat interval. This value
|
|
|
|
* will be inherited by all new transports.
|
|
|
|
*/
|
2006-01-18 02:55:17 +07:00
|
|
|
unsigned long hbinterval;
|
2005-12-23 02:36:46 +07:00
|
|
|
|
|
|
|
/* This is the max_retrans value for new transports in the
|
|
|
|
* association.
|
|
|
|
*/
|
|
|
|
__u16 pathmaxrxt;
|
|
|
|
|
2018-07-02 17:21:12 +07:00
|
|
|
__u32 flowlabel;
|
|
|
|
__u8 dscp;
|
|
|
|
|
2007-06-08 01:21:05 +07:00
|
|
|
/* Flag that path mtu update is pending */
|
|
|
|
__u8 pmtu_pending;
|
|
|
|
|
2005-12-23 02:36:46 +07:00
|
|
|
/* Association : The smallest PMTU discovered for all of the
|
|
|
|
* PMTU : peer's transport addresses.
|
|
|
|
*/
|
|
|
|
__u32 pathmtu;
|
|
|
|
|
2007-12-21 04:56:32 +07:00
|
|
|
/* Flags controlling Heartbeat, SACK delay, and Path MTU Discovery. */
|
2005-12-23 02:36:46 +07:00
|
|
|
__u32 param_flags;
|
|
|
|
|
2013-12-13 12:51:04 +07:00
|
|
|
__u32 sackfreq;
|
2006-01-18 02:55:17 +07:00
|
|
|
/* SACK delay timeout */
|
|
|
|
unsigned long sackdelay;
|
|
|
|
|
|
|
|
unsigned long timeouts[SCTP_NUM_TIMEOUT_TYPES];
|
2005-04-17 05:20:36 +07:00
|
|
|
struct timer_list timers[SCTP_NUM_TIMEOUT_TYPES];
|
|
|
|
|
|
|
|
/* Transport to which SHUTDOWN chunk was last sent. */
|
|
|
|
struct sctp_transport *shutdown_last_sent_to;
|
|
|
|
|
2005-06-21 03:14:57 +07:00
|
|
|
/* Transport to which INIT chunk was last sent. */
|
|
|
|
struct sctp_transport *init_last_sent_to;
|
|
|
|
|
2013-12-13 12:51:04 +07:00
|
|
|
/* How many times have we resent a SHUTDOWN */
|
|
|
|
int shutdown_retries;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* Next TSN : The next TSN number to be assigned to a new
|
|
|
|
* : DATA chunk. This is sent in the INIT or INIT
|
|
|
|
* : ACK chunk to the peer and incremented each
|
|
|
|
* : time a DATA chunk is assigned a TSN
|
|
|
|
* : (normally just prior to transmit or during
|
|
|
|
* : fragmentation).
|
|
|
|
*/
|
|
|
|
__u32 next_tsn;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Last Rcvd : This is the last TSN received in sequence. This value
|
|
|
|
* TSN : is set initially by taking the peer's Initial TSN,
|
|
|
|
* : received in the INIT or INIT ACK chunk, and
|
|
|
|
* : subtracting one from it.
|
|
|
|
*
|
|
|
|
* Most of RFC 2960 refers to this as the Cumulative TSN Ack Point.
|
|
|
|
*/
|
|
|
|
|
|
|
|
__u32 ctsn_ack_point;
|
|
|
|
|
|
|
|
/* PR-SCTP Advanced.Peer.Ack.Point */
|
|
|
|
__u32 adv_peer_ack_point;
|
|
|
|
|
|
|
|
/* Highest TSN that is acknowledged by incoming SACKs. */
|
|
|
|
__u32 highest_sacked;
|
|
|
|
|
2010-05-01 09:41:10 +07:00
|
|
|
/* TSN marking the fast recovery exit point */
|
|
|
|
__u32 fast_recovery_exit;
|
|
|
|
|
|
|
|
/* Flag to track the current fast recovery state */
|
|
|
|
__u8 fast_recovery;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* The number of unacknowledged data chunks. Reported through
|
|
|
|
* the SCTP_STATUS sockopt.
|
|
|
|
*/
|
|
|
|
__u16 unack_data;
|
|
|
|
|
2008-03-01 02:40:56 +07:00
|
|
|
/* The total number of data chunks that we've had to retransmit
|
|
|
|
* as the result of a T3 timer expiration
|
|
|
|
*/
|
|
|
|
__u32 rtx_data_chunks;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* This is the association's receive buffer space. This value is used
|
|
|
|
* to set a_rwnd field in an INIT or a SACK chunk.
|
|
|
|
*/
|
|
|
|
__u32 rwnd;
|
|
|
|
|
|
|
|
/* This is the last advertised value of rwnd over a SACK chunk. */
|
|
|
|
__u32 a_rwnd;
|
|
|
|
|
Revert "net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer"
This reverts commit ef2820a735f7 ("net: sctp: Fix a_rwnd/rwnd management
to reflect real state of the receiver's buffer") as it introduced a
serious performance regression on SCTP over IPv4 and IPv6, though a not
as dramatic on the latter. Measurements are on 10Gbit/s with ixgbe NICs.
Current state:
[root@Lab200slot2 ~]# iperf3 --sctp -4 -c 192.168.241.3 -V -l 1452 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0 #1 SMP Thu Apr 3 23:18:29 EDT 2014 x86_64
Time: Fri, 11 Apr 2014 17:56:21 GMT
Connecting to host 192.168.241.3, port 5201
Cookie: Lab200slot2.1397238981.812898.548918
[ 4] local 192.168.241.2 port 38616 connected to 192.168.241.3 port 5201
Starting Test: protocol: SCTP, 1 streams, 1452 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.09 sec 20.8 MBytes 161 Mbits/sec
[ 4] 1.09-2.13 sec 10.8 MBytes 86.8 Mbits/sec
[ 4] 2.13-3.15 sec 3.57 MBytes 29.5 Mbits/sec
[ 4] 3.15-4.16 sec 4.33 MBytes 35.7 Mbits/sec
[ 4] 4.16-6.21 sec 10.4 MBytes 42.7 Mbits/sec
[ 4] 6.21-6.21 sec 0.00 Bytes 0.00 bits/sec
[ 4] 6.21-7.35 sec 34.6 MBytes 253 Mbits/sec
[ 4] 7.35-11.45 sec 22.0 MBytes 45.0 Mbits/sec
[ 4] 11.45-11.45 sec 0.00 Bytes 0.00 bits/sec
[ 4] 11.45-11.45 sec 0.00 Bytes 0.00 bits/sec
[ 4] 11.45-11.45 sec 0.00 Bytes 0.00 bits/sec
[ 4] 11.45-12.51 sec 16.0 MBytes 126 Mbits/sec
[ 4] 12.51-13.59 sec 20.3 MBytes 158 Mbits/sec
[ 4] 13.59-14.65 sec 13.4 MBytes 107 Mbits/sec
[ 4] 14.65-16.79 sec 33.3 MBytes 130 Mbits/sec
[ 4] 16.79-16.79 sec 0.00 Bytes 0.00 bits/sec
[ 4] 16.79-17.82 sec 5.94 MBytes 48.7 Mbits/sec
(etc)
[root@Lab200slot2 ~]# iperf3 --sctp -6 -c 2001:db8:0:f101::1 -V -l 1400 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0 #1 SMP Thu Apr 3 23:18:29 EDT 2014 x86_64
Time: Fri, 11 Apr 2014 19:08:41 GMT
Connecting to host 2001:db8:0:f101::1, port 5201
Cookie: Lab200slot2.1397243321.714295.2b3f7c
[ 4] local 2001:db8:0:f101::2 port 55804 connected to 2001:db8:0:f101::1 port 5201
Starting Test: protocol: SCTP, 1 streams, 1400 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.00 sec 169 MBytes 1.42 Gbits/sec
[ 4] 1.00-2.00 sec 201 MBytes 1.69 Gbits/sec
[ 4] 2.00-3.00 sec 188 MBytes 1.58 Gbits/sec
[ 4] 3.00-4.00 sec 174 MBytes 1.46 Gbits/sec
[ 4] 4.00-5.00 sec 165 MBytes 1.39 Gbits/sec
[ 4] 5.00-6.00 sec 199 MBytes 1.67 Gbits/sec
[ 4] 6.00-7.00 sec 163 MBytes 1.36 Gbits/sec
[ 4] 7.00-8.00 sec 174 MBytes 1.46 Gbits/sec
[ 4] 8.00-9.00 sec 193 MBytes 1.62 Gbits/sec
[ 4] 9.00-10.00 sec 196 MBytes 1.65 Gbits/sec
[ 4] 10.00-11.00 sec 157 MBytes 1.31 Gbits/sec
[ 4] 11.00-12.00 sec 175 MBytes 1.47 Gbits/sec
[ 4] 12.00-13.00 sec 192 MBytes 1.61 Gbits/sec
[ 4] 13.00-14.00 sec 199 MBytes 1.67 Gbits/sec
(etc)
After patch:
[root@Lab200slot2 ~]# iperf3 --sctp -4 -c 192.168.240.3 -V -l 1452 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0+ #1 SMP Mon Apr 14 12:06:40 EDT 2014 x86_64
Time: Mon, 14 Apr 2014 16:40:48 GMT
Connecting to host 192.168.240.3, port 5201
Cookie: Lab200slot2.1397493648.413274.65e131
[ 4] local 192.168.240.2 port 50548 connected to 192.168.240.3 port 5201
Starting Test: protocol: SCTP, 1 streams, 1452 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.00 sec 240 MBytes 2.02 Gbits/sec
[ 4] 1.00-2.00 sec 239 MBytes 2.01 Gbits/sec
[ 4] 2.00-3.00 sec 240 MBytes 2.01 Gbits/sec
[ 4] 3.00-4.00 sec 239 MBytes 2.00 Gbits/sec
[ 4] 4.00-5.00 sec 245 MBytes 2.05 Gbits/sec
[ 4] 5.00-6.00 sec 240 MBytes 2.01 Gbits/sec
[ 4] 6.00-7.00 sec 240 MBytes 2.02 Gbits/sec
[ 4] 7.00-8.00 sec 239 MBytes 2.01 Gbits/sec
With the reverted patch applied, the SCTP/IPv4 performance is back
to normal on latest upstream for IPv4 and IPv6 and has same throughput
as 3.4.2 test kernel, steady and interval reports are smooth again.
Fixes: ef2820a735f7 ("net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer")
Reported-by: Peter Butler <pbutler@sonusnet.com>
Reported-by: Dongsheng Song <dongsheng.song@gmail.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Peter Butler <pbutler@sonusnet.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Cc: Alexander Sverdlin <alexander.sverdlin@nsn.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-15 02:45:17 +07:00
|
|
|
/* Number of bytes by which the rwnd has slopped. The rwnd is allowed
|
|
|
|
* to slop over a maximum of the association's frag_point.
|
|
|
|
*/
|
|
|
|
__u32 rwnd_over;
|
|
|
|
|
|
|
|
/* Keeps treack of rwnd pressure. This happens when we have
|
|
|
|
* a window, but not recevie buffer (i.e small packets). This one
|
|
|
|
* is releases slowly (1 PMTU at a time ).
|
|
|
|
*/
|
|
|
|
__u32 rwnd_press;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* This is the sndbuf size in use for the association.
|
|
|
|
* This corresponds to the sndbuf size for the association,
|
|
|
|
* as specified in the sk->sndbuf.
|
|
|
|
*/
|
|
|
|
int sndbuf_used;
|
|
|
|
|
2005-11-12 07:08:24 +07:00
|
|
|
/* This is the amount of memory that this association has allocated
|
|
|
|
* in the receive path at any given time.
|
|
|
|
*/
|
|
|
|
atomic_t rmem_alloc;
|
|
|
|
|
2005-04-17 05:20:36 +07:00
|
|
|
/* This is the wait queue head for send requests waiting on
|
|
|
|
* the association sndbuf space.
|
|
|
|
*/
|
|
|
|
wait_queue_head_t wait;
|
|
|
|
|
|
|
|
/* The message size at which SCTP fragmentation will occur. */
|
|
|
|
__u32 frag_point;
|
2009-09-05 05:21:00 +07:00
|
|
|
__u32 user_frag;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2005-06-21 03:14:57 +07:00
|
|
|
/* Counter used to count INIT errors. */
|
|
|
|
int init_err_counter;
|
|
|
|
|
|
|
|
/* Count the number of INIT cycles (for doubling timeout). */
|
|
|
|
int init_cycle;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Default send parameters. */
|
|
|
|
__u16 default_stream;
|
|
|
|
__u16 default_flags;
|
|
|
|
__u32 default_ppid;
|
|
|
|
__u32 default_context;
|
|
|
|
__u32 default_timetolive;
|
|
|
|
|
2006-12-14 07:34:22 +07:00
|
|
|
/* Default receive parameters */
|
|
|
|
__u32 default_rcv_context;
|
|
|
|
|
2017-01-06 21:18:33 +07:00
|
|
|
/* Stream arrays */
|
2017-05-31 15:36:31 +07:00
|
|
|
struct sctp_stream stream;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* All outbound chunks go through this structure. */
|
|
|
|
struct sctp_outq outqueue;
|
|
|
|
|
|
|
|
/* A smart pipe that will handle reordering and fragmentation,
|
|
|
|
* as well as handle passing events up to the ULP.
|
|
|
|
*/
|
|
|
|
struct sctp_ulpq ulpq;
|
|
|
|
|
|
|
|
/* Last TSN that caused an ECNE Chunk to be sent. */
|
|
|
|
__u32 last_ecne_tsn;
|
|
|
|
|
|
|
|
/* Last TSN that caused a CWR Chunk to be sent. */
|
|
|
|
__u32 last_cwr_tsn;
|
|
|
|
|
|
|
|
/* How many duplicated TSNs have we seen? */
|
|
|
|
int numduptsns;
|
|
|
|
|
|
|
|
/* These are to support
|
|
|
|
* "SCTP Extensions for Dynamic Reconfiguration of IP Addresses
|
|
|
|
* and Enforcement of Flow and Message Limits"
|
|
|
|
* <draft-ietf-tsvwg-addip-sctp-02.txt>
|
|
|
|
* or "ADDIP" for short.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* ADDIP Section 4.1.1 Congestion Control of ASCONF Chunks
|
|
|
|
*
|
|
|
|
* R1) One and only one ASCONF Chunk MAY be in transit and
|
|
|
|
* unacknowledged at any one time. If a sender, after sending
|
|
|
|
* an ASCONF chunk, decides it needs to transfer another
|
|
|
|
* ASCONF Chunk, it MUST wait until the ASCONF-ACK Chunk
|
|
|
|
* returns from the previous ASCONF Chunk before sending a
|
|
|
|
* subsequent ASCONF. Note this restriction binds each side,
|
|
|
|
* so at any time two ASCONF may be in-transit on any given
|
|
|
|
* association (one sent from each endpoint).
|
|
|
|
*
|
|
|
|
* [This is our one-and-only-one ASCONF in flight. If we do
|
|
|
|
* not have an ASCONF in flight, this is NULL.]
|
|
|
|
*/
|
|
|
|
struct sctp_chunk *addip_last_asconf;
|
|
|
|
|
2007-12-21 05:11:47 +07:00
|
|
|
/* ADDIP Section 5.2 Upon reception of an ASCONF Chunk.
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
2007-12-21 05:11:47 +07:00
|
|
|
* This is needed to implement itmes E1 - E4 of the updated
|
|
|
|
* spec. Here is the justification:
|
2005-04-17 05:20:36 +07:00
|
|
|
*
|
2007-12-21 05:11:47 +07:00
|
|
|
* Since the peer may bundle multiple ASCONF chunks toward us,
|
|
|
|
* we now need the ability to cache multiple ACKs. The section
|
|
|
|
* describes in detail how they are cached and cleaned up.
|
2005-04-17 05:20:36 +07:00
|
|
|
*/
|
2007-12-21 05:11:47 +07:00
|
|
|
struct list_head asconf_ack_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* These ASCONF chunks are waiting to be sent.
|
|
|
|
*
|
|
|
|
* These chunaks can't be pushed to outqueue until receiving
|
|
|
|
* ASCONF_ACK for the previous ASCONF indicated by
|
|
|
|
* addip_last_asconf, so as to guarantee that only one ASCONF
|
|
|
|
* is in flight at any time.
|
|
|
|
*
|
|
|
|
* ADDIP Section 4.1.1 Congestion Control of ASCONF Chunks
|
|
|
|
*
|
|
|
|
* In defining the ASCONF Chunk transfer procedures, it is
|
|
|
|
* essential that these transfers MUST NOT cause congestion
|
|
|
|
* within the network. To achieve this, we place these
|
|
|
|
* restrictions on the transfer of ASCONF Chunks:
|
|
|
|
*
|
|
|
|
* R1) One and only one ASCONF Chunk MAY be in transit and
|
|
|
|
* unacknowledged at any one time. If a sender, after sending
|
|
|
|
* an ASCONF chunk, decides it needs to transfer another
|
|
|
|
* ASCONF Chunk, it MUST wait until the ASCONF-ACK Chunk
|
|
|
|
* returns from the previous ASCONF Chunk before sending a
|
|
|
|
* subsequent ASCONF. Note this restriction binds each side,
|
|
|
|
* so at any time two ASCONF may be in-transit on any given
|
|
|
|
* association (one sent from each endpoint).
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* [I really think this is EXACTLY the sort of intelligence
|
|
|
|
* which already resides in sctp_outq. Please move this
|
|
|
|
* queue and its supporting logic down there. --piggy]
|
|
|
|
*/
|
2005-07-09 11:47:49 +07:00
|
|
|
struct list_head addip_chunk_list;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* ADDIP Section 4.1 ASCONF Chunk Procedures
|
|
|
|
*
|
|
|
|
* A2) A serial number should be assigned to the Chunk. The
|
|
|
|
* serial number SHOULD be a monotonically increasing
|
|
|
|
* number. The serial number SHOULD be initialized at
|
|
|
|
* the start of the association to the same value as the
|
|
|
|
* Initial TSN and every time a new ASCONF chunk is created
|
|
|
|
* it is incremented by one after assigning the serial number
|
|
|
|
* to the newly created chunk.
|
|
|
|
*
|
|
|
|
* ADDIP
|
|
|
|
* 3.1.1 Address/Stream Configuration Change Chunk (ASCONF)
|
|
|
|
*
|
|
|
|
* Serial Number : 32 bits (unsigned integer)
|
|
|
|
*
|
|
|
|
* This value represents a Serial Number for the ASCONF
|
|
|
|
* Chunk. The valid range of Serial Number is from 0 to
|
|
|
|
* 4294967295 (2^32 - 1). Serial Numbers wrap back to 0
|
|
|
|
* after reaching 4294967295.
|
|
|
|
*/
|
|
|
|
__u32 addip_serial;
|
2011-04-26 18:19:36 +07:00
|
|
|
int src_out_of_asoc_ok;
|
2013-12-13 12:51:04 +07:00
|
|
|
union sctp_addr *asconf_addr_del_pending;
|
2011-06-16 15:14:34 +07:00
|
|
|
struct sctp_transport *new_transport;
|
2005-04-17 05:20:36 +07:00
|
|
|
|
2007-10-09 15:15:59 +07:00
|
|
|
/* SCTP AUTH: list of the endpoint shared keys. These
|
|
|
|
* keys are provided out of band by the user applicaton
|
|
|
|
* and can't change during the lifetime of the association
|
|
|
|
*/
|
|
|
|
struct list_head endpoint_shared_keys;
|
|
|
|
|
|
|
|
/* SCTP AUTH:
|
|
|
|
* The current generated assocaition shared key (secret)
|
|
|
|
*/
|
|
|
|
struct sctp_auth_bytes *asoc_shared_key;
|
2018-03-14 18:05:30 +07:00
|
|
|
struct sctp_shared_key *shkey;
|
2007-10-09 15:15:59 +07:00
|
|
|
|
|
|
|
/* SCTP AUTH: hmac id of the first peer requested algorithm
|
|
|
|
* that we support.
|
|
|
|
*/
|
|
|
|
__u16 default_hmac_id;
|
|
|
|
|
|
|
|
__u16 active_key_id;
|
|
|
|
|
2009-09-04 13:33:19 +07:00
|
|
|
__u8 need_ecne:1, /* Need to send an ECNE Chunk? */
|
2016-07-09 18:47:40 +07:00
|
|
|
temp:1, /* Is it a temporary association? */
|
2017-03-26 23:21:15 +07:00
|
|
|
force_delay:1,
|
2017-12-08 20:03:58 +07:00
|
|
|
intl_enable:1,
|
2017-01-17 23:44:45 +07:00
|
|
|
prsctp_enable:1,
|
|
|
|
reconf_enable:1;
|
2012-12-01 11:49:42 +07:00
|
|
|
|
2017-01-17 23:44:46 +07:00
|
|
|
__u8 strreset_enable;
|
2017-01-17 23:44:47 +07:00
|
|
|
__u8 strreset_outstanding; /* request param count on the fly */
|
2017-01-17 23:44:46 +07:00
|
|
|
|
2017-01-17 23:44:42 +07:00
|
|
|
__u32 strreset_outseq; /* Update after receiving response */
|
|
|
|
__u32 strreset_inseq; /* Update after receiving request */
|
2017-04-15 21:00:27 +07:00
|
|
|
__u32 strreset_result[2]; /* save the results of last 2 responses */
|
2017-01-17 23:44:42 +07:00
|
|
|
|
2017-01-17 23:44:43 +07:00
|
|
|
struct sctp_chunk *strreset_chunk; /* save request chunk */
|
|
|
|
|
2012-12-01 11:49:42 +07:00
|
|
|
struct sctp_priv_assoc_stats stats;
|
2016-07-09 18:47:42 +07:00
|
|
|
|
sctp: implement prsctp PRIO policy
prsctp PRIO policy is a policy to abandon lower priority chunks when
asoc doesn't have enough snd buffer, so that the current chunk with
higher priority can be queued successfully.
Similar to TTL/RTX policy, we will set the priority of the chunk to
prsctp_param with sinfo->sinfo_timetolive in sctp_set_prsctp_policy().
So if PRIO policy is enabled, msg->expire_at won't work.
asoc->sent_cnt_removable will record how many chunks can be checked to
remove. If priority policy is enabled, when the chunk is queued into
the out_queue, we will increase sent_cnt_removable. When the chunk is
moved to abandon_queue or dequeue and free, we will decrease
sent_cnt_removable.
In sctp_sendmsg, we will check if there is enough snd buffer for current
msg and if sent_cnt_removable is not 0. Then try to abandon chunks in
sctp_prune_prsctp when sendmsg from the retransmit/transmited queue, and
free chunks from out_queue in right order until the abandon+free size >
msg_len - sctp_wfree. For the abandon size, we have to wait until it
sends FORWARD TSN, receives the sack and the chunks are really freed.
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-07-09 18:47:45 +07:00
|
|
|
int sent_cnt_removable;
|
|
|
|
|
2016-07-09 18:47:42 +07:00
|
|
|
__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
|
|
|
|
__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
|
2005-04-17 05:20:36 +07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/* An eyecatcher for determining if we are really looking at an
|
|
|
|
* association data structure.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
SCTP_ASSOC_EYECATCHER = 0xa550c123,
|
|
|
|
};
|
|
|
|
|
|
|
|
/* Recover the outter association structure. */
|
|
|
|
static inline struct sctp_association *sctp_assoc(struct sctp_ep_common *base)
|
|
|
|
{
|
|
|
|
struct sctp_association *asoc;
|
|
|
|
|
|
|
|
asoc = container_of(base, struct sctp_association, base);
|
|
|
|
return asoc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* These are function signatures for manipulating associations. */
|
|
|
|
|
|
|
|
|
|
|
|
struct sctp_association *
|
2017-08-05 18:59:54 +07:00
|
|
|
sctp_association_new(const struct sctp_endpoint *ep, const struct sock *sk,
|
|
|
|
enum sctp_scope scope, gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_association_free(struct sctp_association *);
|
|
|
|
void sctp_association_put(struct sctp_association *);
|
|
|
|
void sctp_association_hold(struct sctp_association *);
|
|
|
|
|
2009-05-12 20:52:51 +07:00
|
|
|
struct sctp_transport *sctp_assoc_choose_alter_transport(
|
|
|
|
struct sctp_association *, struct sctp_transport *);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_assoc_update_retran_path(struct sctp_association *);
|
|
|
|
struct sctp_transport *sctp_assoc_lookup_paddr(const struct sctp_association *,
|
|
|
|
const union sctp_addr *);
|
|
|
|
int sctp_assoc_lookup_laddr(struct sctp_association *asoc,
|
|
|
|
const union sctp_addr *laddr);
|
|
|
|
struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *,
|
|
|
|
const union sctp_addr *address,
|
2005-10-07 13:46:04 +07:00
|
|
|
const gfp_t gfp,
|
2005-06-21 03:14:57 +07:00
|
|
|
const int peer_state);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_assoc_del_peer(struct sctp_association *asoc,
|
|
|
|
const union sctp_addr *addr);
|
2005-06-21 03:14:57 +07:00
|
|
|
void sctp_assoc_rm_peer(struct sctp_association *asoc,
|
|
|
|
struct sctp_transport *peer);
|
2017-08-05 18:59:55 +07:00
|
|
|
void sctp_assoc_control_transport(struct sctp_association *asoc,
|
|
|
|
struct sctp_transport *transport,
|
|
|
|
enum sctp_transport_cmd command,
|
|
|
|
sctp_sn_error_t error);
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32);
|
|
|
|
void sctp_assoc_migrate(struct sctp_association *, struct sock *);
|
2017-06-20 15:05:11 +07:00
|
|
|
int sctp_assoc_update(struct sctp_association *old,
|
|
|
|
struct sctp_association *new);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
__u32 sctp_association_get_next_tsn(struct sctp_association *);
|
|
|
|
|
2018-04-27 02:58:55 +07:00
|
|
|
void sctp_assoc_update_frag_point(struct sctp_association *asoc);
|
2018-04-27 02:58:53 +07:00
|
|
|
void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu);
|
2017-04-04 12:39:55 +07:00
|
|
|
void sctp_assoc_sync_pmtu(struct sctp_association *asoc);
|
Revert "net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer"
This reverts commit ef2820a735f7 ("net: sctp: Fix a_rwnd/rwnd management
to reflect real state of the receiver's buffer") as it introduced a
serious performance regression on SCTP over IPv4 and IPv6, though a not
as dramatic on the latter. Measurements are on 10Gbit/s with ixgbe NICs.
Current state:
[root@Lab200slot2 ~]# iperf3 --sctp -4 -c 192.168.241.3 -V -l 1452 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0 #1 SMP Thu Apr 3 23:18:29 EDT 2014 x86_64
Time: Fri, 11 Apr 2014 17:56:21 GMT
Connecting to host 192.168.241.3, port 5201
Cookie: Lab200slot2.1397238981.812898.548918
[ 4] local 192.168.241.2 port 38616 connected to 192.168.241.3 port 5201
Starting Test: protocol: SCTP, 1 streams, 1452 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.09 sec 20.8 MBytes 161 Mbits/sec
[ 4] 1.09-2.13 sec 10.8 MBytes 86.8 Mbits/sec
[ 4] 2.13-3.15 sec 3.57 MBytes 29.5 Mbits/sec
[ 4] 3.15-4.16 sec 4.33 MBytes 35.7 Mbits/sec
[ 4] 4.16-6.21 sec 10.4 MBytes 42.7 Mbits/sec
[ 4] 6.21-6.21 sec 0.00 Bytes 0.00 bits/sec
[ 4] 6.21-7.35 sec 34.6 MBytes 253 Mbits/sec
[ 4] 7.35-11.45 sec 22.0 MBytes 45.0 Mbits/sec
[ 4] 11.45-11.45 sec 0.00 Bytes 0.00 bits/sec
[ 4] 11.45-11.45 sec 0.00 Bytes 0.00 bits/sec
[ 4] 11.45-11.45 sec 0.00 Bytes 0.00 bits/sec
[ 4] 11.45-12.51 sec 16.0 MBytes 126 Mbits/sec
[ 4] 12.51-13.59 sec 20.3 MBytes 158 Mbits/sec
[ 4] 13.59-14.65 sec 13.4 MBytes 107 Mbits/sec
[ 4] 14.65-16.79 sec 33.3 MBytes 130 Mbits/sec
[ 4] 16.79-16.79 sec 0.00 Bytes 0.00 bits/sec
[ 4] 16.79-17.82 sec 5.94 MBytes 48.7 Mbits/sec
(etc)
[root@Lab200slot2 ~]# iperf3 --sctp -6 -c 2001:db8:0:f101::1 -V -l 1400 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0 #1 SMP Thu Apr 3 23:18:29 EDT 2014 x86_64
Time: Fri, 11 Apr 2014 19:08:41 GMT
Connecting to host 2001:db8:0:f101::1, port 5201
Cookie: Lab200slot2.1397243321.714295.2b3f7c
[ 4] local 2001:db8:0:f101::2 port 55804 connected to 2001:db8:0:f101::1 port 5201
Starting Test: protocol: SCTP, 1 streams, 1400 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.00 sec 169 MBytes 1.42 Gbits/sec
[ 4] 1.00-2.00 sec 201 MBytes 1.69 Gbits/sec
[ 4] 2.00-3.00 sec 188 MBytes 1.58 Gbits/sec
[ 4] 3.00-4.00 sec 174 MBytes 1.46 Gbits/sec
[ 4] 4.00-5.00 sec 165 MBytes 1.39 Gbits/sec
[ 4] 5.00-6.00 sec 199 MBytes 1.67 Gbits/sec
[ 4] 6.00-7.00 sec 163 MBytes 1.36 Gbits/sec
[ 4] 7.00-8.00 sec 174 MBytes 1.46 Gbits/sec
[ 4] 8.00-9.00 sec 193 MBytes 1.62 Gbits/sec
[ 4] 9.00-10.00 sec 196 MBytes 1.65 Gbits/sec
[ 4] 10.00-11.00 sec 157 MBytes 1.31 Gbits/sec
[ 4] 11.00-12.00 sec 175 MBytes 1.47 Gbits/sec
[ 4] 12.00-13.00 sec 192 MBytes 1.61 Gbits/sec
[ 4] 13.00-14.00 sec 199 MBytes 1.67 Gbits/sec
(etc)
After patch:
[root@Lab200slot2 ~]# iperf3 --sctp -4 -c 192.168.240.3 -V -l 1452 -t 60
iperf version 3.0.1 (10 January 2014)
Linux Lab200slot2 3.14.0+ #1 SMP Mon Apr 14 12:06:40 EDT 2014 x86_64
Time: Mon, 14 Apr 2014 16:40:48 GMT
Connecting to host 192.168.240.3, port 5201
Cookie: Lab200slot2.1397493648.413274.65e131
[ 4] local 192.168.240.2 port 50548 connected to 192.168.240.3 port 5201
Starting Test: protocol: SCTP, 1 streams, 1452 byte blocks, omitting 0 seconds, 60 second test
[ ID] Interval Transfer Bandwidth
[ 4] 0.00-1.00 sec 240 MBytes 2.02 Gbits/sec
[ 4] 1.00-2.00 sec 239 MBytes 2.01 Gbits/sec
[ 4] 2.00-3.00 sec 240 MBytes 2.01 Gbits/sec
[ 4] 3.00-4.00 sec 239 MBytes 2.00 Gbits/sec
[ 4] 4.00-5.00 sec 245 MBytes 2.05 Gbits/sec
[ 4] 5.00-6.00 sec 240 MBytes 2.01 Gbits/sec
[ 4] 6.00-7.00 sec 240 MBytes 2.02 Gbits/sec
[ 4] 7.00-8.00 sec 239 MBytes 2.01 Gbits/sec
With the reverted patch applied, the SCTP/IPv4 performance is back
to normal on latest upstream for IPv4 and IPv6 and has same throughput
as 3.4.2 test kernel, steady and interval reports are smooth again.
Fixes: ef2820a735f7 ("net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver's buffer")
Reported-by: Peter Butler <pbutler@sonusnet.com>
Reported-by: Dongsheng Song <dongsheng.song@gmail.com>
Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Tested-by: Peter Butler <pbutler@sonusnet.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nsn.com>
Cc: Alexander Sverdlin <alexander.sverdlin@nsn.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-04-15 02:45:17 +07:00
|
|
|
void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
|
|
|
|
void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
|
2005-04-17 05:20:36 +07:00
|
|
|
void sctp_assoc_set_primary(struct sctp_association *,
|
|
|
|
struct sctp_transport *);
|
2007-12-21 05:08:56 +07:00
|
|
|
void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
|
|
|
|
struct sctp_transport *);
|
2017-08-05 18:59:54 +07:00
|
|
|
int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
|
|
|
|
enum sctp_scope scope, gfp_t gfp);
|
2005-04-17 05:20:36 +07:00
|
|
|
int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *,
|
2005-07-12 10:57:47 +07:00
|
|
|
struct sctp_cookie*,
|
2005-10-07 13:46:04 +07:00
|
|
|
gfp_t gfp);
|
2007-05-05 03:55:27 +07:00
|
|
|
int sctp_assoc_set_id(struct sctp_association *, gfp_t);
|
2007-12-21 05:11:47 +07:00
|
|
|
void sctp_assoc_clean_asconf_ack_cache(const struct sctp_association *asoc);
|
|
|
|
struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
|
|
|
|
const struct sctp_association *asoc,
|
|
|
|
__be32 serial);
|
2011-05-30 06:23:36 +07:00
|
|
|
void sctp_asconf_queue_teardown(struct sctp_association *asoc);
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
int sctp_cmp_addr_exact(const union sctp_addr *ss1,
|
|
|
|
const union sctp_addr *ss2);
|
|
|
|
struct sctp_chunk *sctp_get_ecne_prepend(struct sctp_association *asoc);
|
|
|
|
|
|
|
|
/* A convenience structure to parse out SCTP specific CMSGs. */
|
2017-08-11 09:23:48 +07:00
|
|
|
struct sctp_cmsgs {
|
2005-04-17 05:20:36 +07:00
|
|
|
struct sctp_initmsg *init;
|
2014-07-13 01:30:36 +07:00
|
|
|
struct sctp_sndrcvinfo *srinfo;
|
|
|
|
struct sctp_sndinfo *sinfo;
|
2018-03-05 19:44:18 +07:00
|
|
|
struct sctp_prinfo *prinfo;
|
2018-03-14 18:05:31 +07:00
|
|
|
struct sctp_authinfo *authinfo;
|
2018-03-05 19:44:19 +07:00
|
|
|
struct msghdr *addrs_msg;
|
2017-08-11 09:23:48 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
/* Structure for tracking memory objects */
|
2017-08-11 09:23:49 +07:00
|
|
|
struct sctp_dbg_objcnt_entry {
|
2005-04-17 05:20:36 +07:00
|
|
|
char *label;
|
|
|
|
atomic_t *counter;
|
2017-08-11 09:23:49 +07:00
|
|
|
};
|
2005-04-17 05:20:36 +07:00
|
|
|
|
|
|
|
#endif /* __sctp_structs_h__ */
|