mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-21 09:08:45 +07:00
ffd81558d5
Update hbm_out_kern.c to support returning cn notifications. Also updates relevant files to allow disabling cn notifications. Signed-off-by: Lawrence Brakmo <brakmo@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
441 lines
12 KiB
Bash
Executable File
441 lines
12 KiB
Bash
Executable File
#!/bin/bash
|
|
# SPDX-License-Identifier: GPL-2.0
|
|
#
|
|
# Copyright (c) 2019 Facebook
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of version 2 of the GNU General Public
|
|
# License as published by the Free Software Foundation.
|
|
|
|
Usage() {
|
|
echo "Script for testing HBM (Host Bandwidth Manager) framework."
|
|
echo "It creates a cgroup to use for testing and load a BPF program to limit"
|
|
echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
|
|
echo "loads. The output is the goodput in Mbps (unless -D was used)."
|
|
echo ""
|
|
echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
|
|
echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E]"
|
|
echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
|
|
echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
|
|
echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
|
|
echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
|
|
echo " Where:"
|
|
echo " out egress (default)"
|
|
echo " -b or --bpf BPF program filename to load and attach."
|
|
echo " Default is hbm_out_kern.o for egress,"
|
|
echo " -c or -cc TCP congestion control (cubic or dctcp)"
|
|
echo " --debug print BPF trace buffer"
|
|
echo " -d or --delay add a delay in ms using netem"
|
|
echo " -D In addition to the goodput in Mbps, it also outputs"
|
|
echo " other detailed information. This information is"
|
|
echo " test dependent (i.e. iperf3 or netperf)."
|
|
echo " -E enable ECN (not required for dctcp)"
|
|
echo " -f or --flows number of concurrent flows (default=1)"
|
|
echo " -i or --id cgroup id (an integer, default is 1)"
|
|
echo " -N use netperf instead of iperf3"
|
|
echo " --no_cn Do not return CN notifications"
|
|
echo " -l do not limit flows using loopback"
|
|
echo " -h Help"
|
|
echo " -p or --port iperf3 port (default is 5201)"
|
|
echo " -P use an iperf3 instance for each flow"
|
|
echo " -q use the specified qdisc"
|
|
echo " -r or --rate rate in Mbps (default 1s 1Gbps)"
|
|
echo " -R Use TCP_RR for netperf. 1st flow has req"
|
|
echo " size of 10KB, rest of 1MB. Reply in all"
|
|
echo " cases is 1 byte."
|
|
echo " More detailed output for each flow can be found"
|
|
echo " in the files netperf.<cg>.<flow>, where <cg> is the"
|
|
echo " cgroup id as specified with the -i flag, and <flow>"
|
|
echo " is the flow id starting at 1 and increasing by 1 for"
|
|
echo " flow (as specified by -f)."
|
|
echo " -s or --server hostname of netperf server. Used to create netperf"
|
|
echo " test traffic between to hosts (default is within host)"
|
|
echo " netserver must be running on the host."
|
|
echo " -S or --stats whether to update hbm stats (default is yes)."
|
|
echo " -t or --time duration of iperf3 in seconds (default=5)"
|
|
echo " -w Work conserving flag. cgroup can increase its"
|
|
echo " bandwidth beyond the rate limit specified"
|
|
echo " while there is available bandwidth. Current"
|
|
echo " implementation assumes there is only one NIC"
|
|
echo " (eth0), but can be extended to support multiple"
|
|
echo " NICs."
|
|
echo " cubic or dctcp specify which TCP CC to use"
|
|
echo " "
|
|
exit
|
|
}
|
|
|
|
#set -x
|
|
|
|
debug_flag=0
|
|
args="$@"
|
|
name="$0"
|
|
netem=0
|
|
cc=x
|
|
dir="-o"
|
|
dir_name="out"
|
|
dur=5
|
|
flows=1
|
|
id=1
|
|
prog=""
|
|
port=5201
|
|
rate=1000
|
|
multi_iperf=0
|
|
flow_cnt=1
|
|
use_netperf=0
|
|
rr=0
|
|
ecn=0
|
|
details=0
|
|
server=""
|
|
qdisc=""
|
|
flags=""
|
|
do_stats=0
|
|
|
|
function start_hbm () {
|
|
rm -f hbm.out
|
|
echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
|
|
echo " " >> hbm.out
|
|
./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 &
|
|
echo $!
|
|
}
|
|
|
|
processArgs () {
|
|
for i in $args ; do
|
|
case $i in
|
|
# Support for upcomming ingress rate limiting
|
|
#in) # support for upcoming ingress rate limiting
|
|
# dir="-i"
|
|
# dir_name="in"
|
|
# ;;
|
|
out)
|
|
dir="-o"
|
|
dir_name="out"
|
|
;;
|
|
-b=*|--bpf=*)
|
|
prog="${i#*=}"
|
|
;;
|
|
-c=*|--cc=*)
|
|
cc="${i#*=}"
|
|
;;
|
|
--no_cn)
|
|
flags="$flags --no_cn"
|
|
;;
|
|
--debug)
|
|
flags="$flags -d"
|
|
debug_flag=1
|
|
;;
|
|
-d=*|--delay=*)
|
|
netem="${i#*=}"
|
|
;;
|
|
-D)
|
|
details=1
|
|
;;
|
|
-E)
|
|
ecn=1
|
|
;;
|
|
# Support for upcomming fq Early Departure Time egress rate limiting
|
|
#--edt)
|
|
# prog="hbm_out_edt_kern.o"
|
|
# qdisc="fq"
|
|
# ;;
|
|
-f=*|--flows=*)
|
|
flows="${i#*=}"
|
|
;;
|
|
-i=*|--id=*)
|
|
id="${i#*=}"
|
|
;;
|
|
-l)
|
|
flags="$flags -l"
|
|
;;
|
|
-N)
|
|
use_netperf=1
|
|
;;
|
|
-p=*|--port=*)
|
|
port="${i#*=}"
|
|
;;
|
|
-P)
|
|
multi_iperf=1
|
|
;;
|
|
-q=*)
|
|
qdisc="${i#*=}"
|
|
;;
|
|
-r=*|--rate=*)
|
|
rate="${i#*=}"
|
|
;;
|
|
-R)
|
|
rr=1
|
|
;;
|
|
-s=*|--server=*)
|
|
server="${i#*=}"
|
|
;;
|
|
-S|--stats)
|
|
flags="$flags -s"
|
|
do_stats=1
|
|
;;
|
|
-t=*|--time=*)
|
|
dur="${i#*=}"
|
|
;;
|
|
-w)
|
|
flags="$flags -w"
|
|
;;
|
|
cubic)
|
|
cc=cubic
|
|
;;
|
|
dctcp)
|
|
cc=dctcp
|
|
;;
|
|
*)
|
|
echo "Unknown arg:$i"
|
|
Usage
|
|
;;
|
|
esac
|
|
done
|
|
}
|
|
|
|
processArgs
|
|
|
|
if [ $debug_flag -eq 1 ] ; then
|
|
rm -f hbm_out.log
|
|
fi
|
|
|
|
hbm_pid=$(start_hbm)
|
|
usleep 100000
|
|
|
|
host=`hostname`
|
|
cg_base_dir=/sys/fs/cgroup
|
|
cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
|
|
|
|
echo $$ >> $cg_dir/cgroup.procs
|
|
|
|
ulimit -l unlimited
|
|
|
|
rm -f ss.out
|
|
rm -f hbm.[0-9]*.$dir_name
|
|
if [ $ecn -ne 0 ] ; then
|
|
sysctl -w -q -n net.ipv4.tcp_ecn=1
|
|
fi
|
|
|
|
if [ $use_netperf -eq 0 ] ; then
|
|
cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
|
|
if [ "$cc" != "x" ] ; then
|
|
sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
|
|
fi
|
|
fi
|
|
|
|
if [ "$netem" -ne "0" ] ; then
|
|
if [ "$qdisc" != "" ] ; then
|
|
echo "WARNING: Ignoring -q options because -d option used"
|
|
fi
|
|
tc qdisc del dev lo root > /dev/null 2>&1
|
|
tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
|
|
elif [ "$qdisc" != "" ] ; then
|
|
tc qdisc del dev lo root > /dev/null 2>&1
|
|
tc qdisc add dev lo root $qdisc > /dev/null 2>&1
|
|
fi
|
|
|
|
n=0
|
|
m=$[$dur * 5]
|
|
hn="::1"
|
|
if [ $use_netperf -ne 0 ] ; then
|
|
if [ "$server" != "" ] ; then
|
|
hn=$server
|
|
fi
|
|
fi
|
|
|
|
( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
|
|
|
|
if [ $use_netperf -ne 0 ] ; then
|
|
begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
|
|
awk '{ print $1 }'`
|
|
if [ "$begNetserverPid" == "" ] ; then
|
|
if [ "$server" == "" ] ; then
|
|
( ./netserver > /dev/null 2>&1) &
|
|
usleep 100000
|
|
fi
|
|
fi
|
|
flow_cnt=1
|
|
if [ "$server" == "" ] ; then
|
|
np_server=$host
|
|
else
|
|
np_server=$server
|
|
fi
|
|
if [ "$cc" == "x" ] ; then
|
|
np_cc=""
|
|
else
|
|
np_cc="-K $cc,$cc"
|
|
fi
|
|
replySize=1
|
|
while [ $flow_cnt -le $flows ] ; do
|
|
if [ $rr -ne 0 ] ; then
|
|
reqSize=1M
|
|
if [ $flow_cnt -eq 1 ] ; then
|
|
reqSize=10K
|
|
fi
|
|
if [ "$dir" == "-i" ] ; then
|
|
replySize=$reqSize
|
|
reqSize=1
|
|
fi
|
|
( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
|
|
else
|
|
if [ "$dir" == "-i" ] ; then
|
|
( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
|
|
else
|
|
( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
|
|
fi
|
|
fi
|
|
flow_cnt=$[flow_cnt+1]
|
|
done
|
|
|
|
# sleep for duration of test (plus some buffer)
|
|
n=$[dur+2]
|
|
sleep $n
|
|
|
|
# force graceful termination of netperf
|
|
pids=`pgrep netperf`
|
|
for p in $pids ; do
|
|
kill -SIGALRM $p
|
|
done
|
|
|
|
flow_cnt=1
|
|
rate=0
|
|
if [ $details -ne 0 ] ; then
|
|
echo ""
|
|
echo "Details for HBM in cgroup $id"
|
|
if [ $do_stats -eq 1 ] ; then
|
|
if [ -e hbm.$id.$dir_name ] ; then
|
|
cat hbm.$id.$dir_name
|
|
fi
|
|
fi
|
|
fi
|
|
while [ $flow_cnt -le $flows ] ; do
|
|
if [ "$dir" == "-i" ] ; then
|
|
r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
|
|
else
|
|
r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
|
|
fi
|
|
echo "rate for flow $flow_cnt: $r"
|
|
rate=$[rate+r]
|
|
if [ $details -ne 0 ] ; then
|
|
echo "-----"
|
|
echo "Details for cgroup $id, flow $flow_cnt"
|
|
cat netperf.$id.$flow_cnt
|
|
fi
|
|
flow_cnt=$[flow_cnt+1]
|
|
done
|
|
if [ $details -ne 0 ] ; then
|
|
echo ""
|
|
delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
|
|
echo "PING AVG DELAY:$delay"
|
|
echo "AGGREGATE_GOODPUT:$rate"
|
|
else
|
|
echo $rate
|
|
fi
|
|
elif [ $multi_iperf -eq 0 ] ; then
|
|
(iperf3 -s -p $port -1 > /dev/null 2>&1) &
|
|
usleep 100000
|
|
iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
|
|
rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
|
|
rate=`echo $rates | grep -o "[0-9]*$"`
|
|
|
|
if [ $details -ne 0 ] ; then
|
|
echo ""
|
|
echo "Details for HBM in cgroup $id"
|
|
if [ $do_stats -eq 1 ] ; then
|
|
if [ -e hbm.$id.$dir_name ] ; then
|
|
cat hbm.$id.$dir_name
|
|
fi
|
|
fi
|
|
delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
|
|
echo "PING AVG DELAY:$delay"
|
|
echo "AGGREGATE_GOODPUT:$rate"
|
|
else
|
|
echo $rate
|
|
fi
|
|
else
|
|
flow_cnt=1
|
|
while [ $flow_cnt -le $flows ] ; do
|
|
(iperf3 -s -p $port -1 > /dev/null 2>&1) &
|
|
( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
|
|
port=$[port+1]
|
|
flow_cnt=$[flow_cnt+1]
|
|
done
|
|
n=$[dur+1]
|
|
sleep $n
|
|
flow_cnt=1
|
|
rate=0
|
|
if [ $details -ne 0 ] ; then
|
|
echo ""
|
|
echo "Details for HBM in cgroup $id"
|
|
if [ $do_stats -eq 1 ] ; then
|
|
if [ -e hbm.$id.$dir_name ] ; then
|
|
cat hbm.$id.$dir_name
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
while [ $flow_cnt -le $flows ] ; do
|
|
r=`cat iperf3.$id.$flow_cnt`
|
|
# echo "rate for flow $flow_cnt: $r"
|
|
if [ $details -ne 0 ] ; then
|
|
echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
|
|
fi
|
|
rate=$[rate+r]
|
|
flow_cnt=$[flow_cnt+1]
|
|
done
|
|
if [ $details -ne 0 ] ; then
|
|
delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
|
|
echo "PING AVG DELAY:$delay"
|
|
echo "AGGREGATE_GOODPUT:$rate"
|
|
else
|
|
echo $rate
|
|
fi
|
|
fi
|
|
|
|
if [ $use_netperf -eq 0 ] ; then
|
|
sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
|
|
fi
|
|
if [ $ecn -ne 0 ] ; then
|
|
sysctl -w -q -n net.ipv4.tcp_ecn=0
|
|
fi
|
|
if [ "$netem" -ne "0" ] ; then
|
|
tc qdisc del dev lo root > /dev/null 2>&1
|
|
fi
|
|
|
|
sleep 2
|
|
|
|
hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
|
|
if [ "$hbmPid" == "$hbm_pid" ] ; then
|
|
kill $hbm_pid
|
|
fi
|
|
|
|
sleep 1
|
|
|
|
# Detach any BPF programs that may have lingered
|
|
ttx=`bpftool cgroup tree | grep hbm`
|
|
v=2
|
|
for x in $ttx ; do
|
|
if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
|
|
cg=$x ; v=0
|
|
else
|
|
if [ $v -eq 0 ] ; then
|
|
id=$x ; v=1
|
|
else
|
|
if [ $v -eq 1 ] ; then
|
|
type=$x ; bpftool cgroup detach $cg $type id $id
|
|
v=0
|
|
fi
|
|
fi
|
|
fi
|
|
done
|
|
|
|
if [ $use_netperf -ne 0 ] ; then
|
|
if [ "$server" == "" ] ; then
|
|
if [ "$begNetserverPid" == "" ] ; then
|
|
netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
|
|
if [ "$netserverPid" != "" ] ; then
|
|
kill $netserverPid
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
exit
|