[libvirt] [PATCH v3 3/8] bandwidth: Create hierarchical shaping classes
Laine Stump
laine at laine.org
Tue Dec 11 16:30:18 UTC 2012
On 12/11/2012 11:09 AM, Michal Privoznik wrote:
> These classes can borrow unused bandwidth. Basically,
> only egress qdsics can have classes, therefore we can
s/qdsic/qdisc/
> do this kind of traffic shaping only on host's outgoing,
> that is domain's incoming traffic.
> ---
> src/lxc/lxc_process.c | 3 +-
> src/network/bridge_driver.c | 3 +-
> src/qemu/qemu_command.c | 3 +-
> src/qemu/qemu_driver.c | 2 +-
> src/util/virnetdevbandwidth.c | 93 +++++++++++++++++++++++++++++++++++++---
> src/util/virnetdevbandwidth.h | 4 +-
> src/util/virnetdevmacvlan.c | 2 +-
> 7 files changed, 97 insertions(+), 13 deletions(-)
>
> diff --git a/src/lxc/lxc_process.c b/src/lxc/lxc_process.c
> index 50c61c5..3e7fcb8 100644
> --- a/src/lxc/lxc_process.c
> +++ b/src/lxc/lxc_process.c
> @@ -341,7 +341,8 @@ static int virLXCProcessSetupInterfaceBridged(virConnectPtr conn,
> goto cleanup;
>
> if (virNetDevBandwidthSet(net->ifname,
> - virDomainNetGetActualBandwidth(net)) < 0) {
> + virDomainNetGetActualBandwidth(net),
> + false) < 0) {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("cannot set bandwidth limits on %s"),
> net->ifname);
> diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
> index 00cffee..58f1d2e 100644
> --- a/src/network/bridge_driver.c
> +++ b/src/network/bridge_driver.c
> @@ -2284,7 +2284,8 @@ networkStartNetworkVirtual(struct network_driver *driver,
> VIR_FORCE_CLOSE(tapfd);
> }
>
> - if (virNetDevBandwidthSet(network->def->bridge, network->def->bandwidth) < 0) {
> + if (virNetDevBandwidthSet(network->def->bridge,
> + network->def->bandwidth, true) < 0) {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("cannot set bandwidth limits on %s"),
> network->def->bridge);
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index 9009bd2..e10eb09 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -292,7 +292,8 @@ qemuNetworkIfaceConnect(virDomainDefPtr def,
>
> if (tapfd >= 0 &&
> virNetDevBandwidthSet(net->ifname,
> - virDomainNetGetActualBandwidth(net)) < 0) {
> + virDomainNetGetActualBandwidth(net),
> + false) < 0) {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("cannot set bandwidth limits on %s"),
> net->ifname);
> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
> index d449579..e6ae3fd 100644
> --- a/src/qemu/qemu_driver.c
> +++ b/src/qemu/qemu_driver.c
> @@ -9034,7 +9034,7 @@ qemuDomainSetInterfaceParameters(virDomainPtr dom,
> sizeof(*newBandwidth->out));
> }
>
> - if (virNetDevBandwidthSet(net->ifname, newBandwidth) < 0) {
> + if (virNetDevBandwidthSet(net->ifname, newBandwidth, false) < 0) {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("cannot set bandwidth limits on %s"),
> device);
> diff --git a/src/util/virnetdevbandwidth.c b/src/util/virnetdevbandwidth.c
> index 49fc425..71c272e 100644
> --- a/src/util/virnetdevbandwidth.c
> +++ b/src/util/virnetdevbandwidth.c
> @@ -45,17 +45,21 @@ virNetDevBandwidthFree(virNetDevBandwidthPtr def)
> * virNetDevBandwidthSet:
> * @ifname: on which interface
> * @bandwidth: rates to set (may be NULL)
> + * @hierarchical_class: whether to create hierarchical class
> *
> * This function enables QoS on specified interface
> * and set given traffic limits for both, incoming
> * and outgoing traffic. Any previous setting get
> - * overwritten.
> + * overwritten. If @hierarchical_class is TRUE, create
> + * hierarchical class. It is used to guarantee minimal
> + * throughput ('floor' attribute in NIC).
> *
> * Return 0 on success, -1 otherwise.
> */
> int
> virNetDevBandwidthSet(const char *ifname,
> - virNetDevBandwidthPtr bandwidth)
> + virNetDevBandwidthPtr bandwidth,
> + bool hierarchical_class)
> {
> int ret = -1;
> virCommandPtr cmd = NULL;
> @@ -71,7 +75,7 @@ virNetDevBandwidthSet(const char *ifname,
>
> virNetDevBandwidthClear(ifname);
>
> - if (bandwidth->in) {
> + if (bandwidth->in && bandwidth->in->average) {
> if (virAsprintf(&average, "%llukbps", bandwidth->in->average) < 0)
> goto cleanup;
> if (bandwidth->in->peak &&
> @@ -83,15 +87,89 @@ virNetDevBandwidthSet(const char *ifname,
>
> cmd = virCommandNew(TC);
> virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "root",
> - "handle", "1:", "htb", "default", "1", NULL);
> + "handle", "1:", "htb", "default",
> + hierarchical_class ? "2" : "1", NULL);
> if (virCommandRun(cmd, NULL) < 0)
> goto cleanup;
>
> + /* If we are creating a hierarchical class, all non guaranteed traffic
> + * goes to the 1:2 class which will adjust 'rate' dynamically as NICs
> + * with guaranteed throughput are plugged and unplugged. Class 1:1
> + * exists so we don't exceed the maximum limit for the network. For each
> + * NIC with guaranteed throughput a separate classid will be created.
> + * NB '1:' is just a shorter notation of '1:0'.
> + *
> + * To get a picture how this works:
> + *
> + * +-----+ +---------+ +-----------+ +-----------+ +-----+
> + * | | | qdisc | | class 1:1 | | class 1:2 | | |
> + * | NIC | | def 1:2 | | rate | | rate | | sfq |
> + * | | --> | | --> | peak | -+-> | peak | --> | |
> + * +-----+ +---------+ +-----------+ | +-----------+ +-----+
> + * |
> + * | +-----------+ +-----+
> + * | | class 1:3 | | |
> + * | | rate | | sfq |
> + * +-> | peak | --> | |
> + * | +-----------+ +-----+
> + * ...
> + * | +-----------+ +-----+
> + * | | class 1:n | | |
> + * | | rate | | sfq |
> + * +-> | peak | --> | |
> + * +-----------+ +-----+
> + *
> + * After the routing decision, when is it clear a packet is to be sent
> + * via a particular NIC, it is sent to the root qdisc (queueing
> + * discipline). In this case HTB (Hierarchical Token Bucket). It has
> + * only one direct child class (with id 1:1) which shapes the overall
> + * rate that is sent through the NIC. This class has at least one child
> + * (1:2) which is meant for all non-privileged (non guaranteed) traffic
> + * from all domains. Then, for each interface with guaranteed
> + * throughput, a separate class (1:n) is created. Imagine a class is a
> + * box. Whenever a packet ends up in a class it is stored in this box
> + * until the kernel sends it, then it is removed from box. Packets are
> + * placed into boxes based on rules (filters) - e.g. depending on
> + * destination IP/MAC address. If there is no rule to be applied, the
> + * root qdisc has a default where such packets go (1:2 in this case).
> + * Packets come in over and over again and boxes get filled more and
> + * more. Imagine that kernel sends packets just once a second. So it
> + * starts to traverse through this tree. It starts with the root qdisc
> + * and through 1:1 it gets to 1:2. It sends packets up to 1:2's 'rate'.
> + * Then it moves to 1:3 and again sends packets up to 1:3's 'rate'. The
> + * whole process is repeated until 1:n is processed. So now we have
> + * ensured each class its guaranteed bandwidth. If the sum of sent data
> + * doesn't exceed the 'rate' in 1:1 class, we can go further and send
> + * more packets. The rest of available bandwidth is distributed to the
> + * 1:2,1:3...1:n classes by ratio of their 'rate'. As soon as the root
> + * 'rate' limit is reached or there are no more packets to send, we stop
> + * sending and wait another second. Each class has an SFQ qdisc which
> + * shuffles packets in boxes stochastically, so one sender cannot
> + * starve others.
> + *
> + * Therefore, whenever we want to plug in a new guaranteed interface, we
> + * need to create a new class and adjust the 'rate' of the 1:2 class.
> + * When unplugging we do the exact opposite - remove the associated
> + * class, and adjust the 'rate'.
> + *
> + * This description is rather long, but it is still a good idea to read
> + * it before you dig into the code.
> + */
> + if (hierarchical_class) {
> + virCommandFree(cmd);
> + cmd = virCommandNew(TC);
> + virCommandAddArgList(cmd, "class", "add", "dev", ifname, "parent",
> + "1:", "classid", "1:1", "htb", "rate", average,
> + "ceil", peak ? peak : average, NULL);
> + if (virCommandRun(cmd, NULL) < 0)
> + goto cleanup;
> + }
> virCommandFree(cmd);
> cmd = virCommandNew(TC);
> virCommandAddArgList(cmd,"class", "add", "dev", ifname, "parent",
> - "1:", "classid", "1:1", "htb", NULL);
> - virCommandAddArgList(cmd, "rate", average, NULL);
> + hierarchical_class ? "1:1" : "1:", "classid",
> + hierarchical_class ? "1:2" : "1:1", "htb",
> + "rate", average, NULL);
>
> if (peak)
> virCommandAddArgList(cmd, "ceil", peak, NULL);
> @@ -104,7 +182,8 @@ virNetDevBandwidthSet(const char *ifname,
> virCommandFree(cmd);
> cmd = virCommandNew(TC);
> virCommandAddArgList(cmd, "qdisc", "add", "dev", ifname, "parent",
> - "1:1", "handle", "2:", "sfq", "perturb",
> + hierarchical_class ? "1:2" : "1:1",
> + "handle", "2:", "sfq", "perturb",
> "10", NULL);
>
> if (virCommandRun(cmd, NULL) < 0)
> diff --git a/src/util/virnetdevbandwidth.h b/src/util/virnetdevbandwidth.h
> index 35f8b89..d308ab2 100644
> --- a/src/util/virnetdevbandwidth.h
> +++ b/src/util/virnetdevbandwidth.h
> @@ -42,7 +42,9 @@ struct _virNetDevBandwidth {
>
> void virNetDevBandwidthFree(virNetDevBandwidthPtr def);
>
> -int virNetDevBandwidthSet(const char *ifname, virNetDevBandwidthPtr bandwidth)
> +int virNetDevBandwidthSet(const char *ifname,
> + virNetDevBandwidthPtr bandwidth,
> + bool hierarchical_class)
> ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURN_CHECK;
> int virNetDevBandwidthClear(const char *ifname)
> ATTRIBUTE_NONNULL(1);
> diff --git a/src/util/virnetdevmacvlan.c b/src/util/virnetdevmacvlan.c
> index d8e646a..657c484 100644
> --- a/src/util/virnetdevmacvlan.c
> +++ b/src/util/virnetdevmacvlan.c
> @@ -925,7 +925,7 @@ create_name:
> rc = 0;
> }
>
> - if (virNetDevBandwidthSet(cr_ifname, bandwidth) < 0) {
> + if (virNetDevBandwidthSet(cr_ifname, bandwidth, false) < 0) {
> virReportError(VIR_ERR_INTERNAL_ERROR,
> _("cannot set bandwidth limits on %s"),
> cr_ifname);
ACK.
More information about the libvir-list
mailing list