[libvirt] [PATCH] Add virNetlinkNewLink for simplifying virNetDev*Create

Shi Lei shi_lei at massclouds.com
Sun Sep 2 04:37:57 UTC 2018


On 2018-08-31 at 20:02, Erik Skultety wrote:
>On Thu, Aug 30, 2018 at 02:27:05PM +0800, Shi Lei wrote:
>> Thanks for your comments.
>> But I have several questions, please see below...
>>
>> On 2018-08-29 at 19:43, Erik Skultety wrote:
>> >On Thu, Aug 23, 2018 at 12:15:08PM +0800, Shi Lei wrote:
>> >> This patch adds virNetlinkNewLink for simplifying virNetDevMacVLanCreate
>> >> and virNetDevBridgeCreate.
>> >>
>> >> Signed-off-by: Shi Lei <shi_lei at massclouds.com>
>> >> ---
>> >
>> >There are multiple changes happening in this patch so it will need to be split
>> >into several patches, see below...
>>
>> Okay. Then the v2 series should be like:
>> [patch v2 0/4]:  ... cover ...
>> [patch v2 1/4]:  Add wrapper macros to make code more readable
>> [patch v2 2/4]:  Introduce virNetlinkNewLink
>
>2/4 would come before 1/4 

Okay.

>
>> [patch v2 3/4]:  Replace the implementation of virNetDevBridgeCreate
>> [patch v2 4/4]:  Replace the implementation of virNetDevMacVLanCreate
>
>3/4 + 4/4 would be a single patch 

Okay.

>
>...
>
>> >> +static int
>> >> +virNetDevMacVLanCreateCallback(struct nl_msg *nl_msg, const void *opaque)
>> >> +{
>> >> +    const uint32_t *mode = (const uint32_t *) opaque;
>> >> +    if (!nl_msg || !opaque) {
>> >> +        virReportError(VIR_ERR_INTERNAL_ERROR,
>> >> +                       _("nl_msg %p or opaque %p is NULL"),
>> >> +                       nl_msg, opaque);
>> >> +        return -1;
>> >> +    }
>> >> +
>> >> +    if (*mode > 0) {
>> >> +        struct nlattr *info_data;
>> >> +        if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
>> >> +            goto buffer_too_small;
>> >> +
>> >> +        if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(*mode), mode) < 0)
>> >> +            goto buffer_too_small;
>> >> +
>> >> +        nla_nest_end(nl_msg, info_data);
>> >> +    }
>> >> +
>> >> +    return 0;
>> >> +
>> >> + buffer_too_small:
>> >> +    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> >> +                   _("allocated netlink buffer is too small"));
>> >> +    return -1;
>> >> +}
>> >
>> >Having a callback just to add a nested data into nl_msg doesn't feel like the
>> >right approach, I'd expect a callback to do more specific stuff, this should be
>> >special-cased in virNetlinkNewLink.
>>
>> I looked through the code for ip-link in iproute2 which could be a good example
>> for netlink. And I found that the differences of attributes of various net-devices are
>> confined to IFLA_INFO_DATA or IFLA_INFO_SLAVE_DATA(this is not used in libvirt).
>> The code fragment in iplink.c:
>>
>>         if (ulinep && !strcmp(ulinep, "_slave")) {
>>             ... ...
>>             lu = get_link_slave_kind(slavebuf);
>>             iflatype = IFLA_INFO_SLAVE_DATA;
>>         } else {
>> =>       lu = get_link_kind(type);             /* Get pointer to the driver for this type */
>> =>       iflatype = IFLA_INFO_DATA;       /* NEST will be for IFLA_INFO_DATA */
>>         }
>>         if (lu && argc) {
>> =>       struct rtattr *data = addattr_nest(&req.n, sizeof(req), iflatype);    /* NEST BEGIN */
>>
>>             ****************************
>>             if (lu->parse_opt &&
>>                 lu->parse_opt(lu, argc, argv, &req.n))
>>                 return -1;
>>             ****************************
>>             ^The lu points to various types of net-drivers(e.g. macvlan/bridge/vxlan/vlan ...)
>>                which pack all special self-attributes in their parse_opt callback
>>
>> =>        addattr_nest_end(&req.n, data);    /* NEST END */
>>
>> So I think that it's reasonable for us to have a callback to handle it just as iproute2
>> since iproute2 could almost create all the types of net-devices.
>
>I went through the iproute2 code and it completely makes sense for them to
>utilize callbacks here, given the amount of driver-specific options each of the
>device type has. While I agree that we could make use of this in libvirt one
>day, I don't feel like we need to do it now, since I don't think it brings
>any noticeable benefit in its currently proposed form + as I already mentioned,
>we can switch to this scheme once there are more than 1 netlink types which
>could make use of it. 

Okay.

>
>>
>> >
>> >> +
>> >> +
>> >>  /**
>> >>   * virNetDevMacVLanCreate:
>> >>   *
>> >> @@ -307,113 +338,29 @@ virNetDevMacVLanCreate(const char *ifname,
>> >>                         uint32_t macvlan_mode,
>> >>                         int *retry)
>> >>  {
>> >
>> >^This replacement would be the last patch in the series.
>>
>> Okay.
>>
>> >
>> >> -    int rc = -1;
>> >> -    struct nlmsgerr *err;
>> >> -    struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
>> >>      int ifindex;
>> >> -    unsigned int recvbuflen;
>> >> -    struct nl_msg *nl_msg;
>> >> -    struct nlattr *linkinfo, *info_data;
>> >> -    char macstr[VIR_MAC_STRING_BUFLEN];
>> >> -    VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
>> >> -
>> >> -    if (virNetDevGetIndex(srcdev, &ifindex) < 0)
>> >> -        return -1;
>> >> -
>> >> +    int error = 0;
>> >>      *retry = 0;
>> >>
>> >> -    nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
>> >> -                                NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
>> >> -    if (!nl_msg) {
>> >> -        virReportOOMError();
>> >> +    if (virNetDevGetIndex(srcdev, &ifindex) < 0)
>> >>          return -1;
>> >> -    }
>> >> -
>> >> -    if (nlmsg_append(nl_msg,  &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
>> >> -        goto buffer_too_small;
>> >> -
>> >> -    if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
>> >> -        goto buffer_too_small;
>> >> -
>> >> -    if (nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, macaddress) < 0)
>> >> -        goto buffer_too_small;
>> >> -
>> >> -    if (ifname &&
>> >> -        nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
>> >> -        goto buffer_too_small;
>> >>
>> >> -    if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
>> >> -        goto buffer_too_small;
>> >> -
>> >> -    if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
>> >> -        goto buffer_too_small;
>> >> -
>> >> -    if (macvlan_mode > 0) {
>> >> -        if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
>> >> -            goto buffer_too_small;
>> >> -
>> >> -        if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
>> >> -                    &macvlan_mode) < 0)
>> >> -            goto buffer_too_small;
>> >> -
>> >> -        nla_nest_end(nl_msg, info_data);
>> >> -    }
>> >> -
>> >> -    nla_nest_end(nl_msg, linkinfo);
>> >> -
>> >> -    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
>> >> -                          NETLINK_ROUTE, 0) < 0) {
>> >> -        goto cleanup;
>> >> -    }
>> >> -
>> >> -    if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL)
>> >> -        goto malformed_resp;
>> >> -
>> >> -    switch (resp->nlmsg_type) {
>> >> -    case NLMSG_ERROR:
>> >> -        err = (struct nlmsgerr *)NLMSG_DATA(resp);
>> >> -        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
>> >> -            goto malformed_resp;
>> >> -
>> >> -        switch (err->error) {
>> >> -
>> >> -        case 0:
>> >> -            break;
>> >> -
>> >> -        case -EEXIST:
>> >> +    if (virNetlinkNewLink(&ifindex, ifname, macaddress, type,
>> >> +                          virNetDevMacVLanCreateCallback, &macvlan_mode,
>> >> +                          &error) < 0) {
>> >> +        char macstr[VIR_MAC_STRING_BUFLEN];
>> >> +        if (error == -EEXIST)
>> >>              *retry = 1;
>> >> -            goto cleanup;
>> >> -
>> >> -        default:
>> >> -            virReportSystemError(-err->error,
>> >> +        else
>> >> +            virReportSystemError(-error,
>> >>                                   _("error creating %s interface %s@%s (%s)"),
>> >>                                   type, ifname, srcdev,
>> >>                                   virMacAddrFormat(macaddress, macstr));
>> >> -            goto cleanup;
>> >> -        }
>> >> -        break;
>> >> -
>> >> -    case NLMSG_DONE:
>> >> -        break;
>> >>
>> >> -    default:
>> >> -        goto malformed_resp;
>> >> +        return -1;
>> >>      }
>> >>
>> >> -    rc = 0;
>> >> - cleanup:
>> >> -    nlmsg_free(nl_msg);
>> >> -    return rc;
>> >> -
>> >> - malformed_resp:
>> >> -    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> >> -                   _("malformed netlink response message"));
>> >> -    goto cleanup;
>> >> -
>> >> - buffer_too_small:
>> >> -    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>> >> -                   _("allocated netlink buffer is too small"));
>> >> -    goto cleanup;
>> >> +    return 0;
>> >>  }
>> >>
>> >>  /**
>> >> diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
>> >> index 8f06446..817e347 100644
>> >> --- a/src/util/virnetlink.c
>> >> +++ b/src/util/virnetlink.c
>> >> @@ -489,6 +489,110 @@ virNetlinkDumpLink(const char *ifname, int ifindex,
>> >>
>> >>
>> >>  /**
>> >> + * virNetlinkNewLink:
>> >
>> >Introduction of this function should come in a separate patch before you start
>> >replacing the existing ones.
>>
>> Okay.
>>
>> >
>> >> + *
>> >> + * @ifindex: The index for the 'link' device
>> >> + * @ifname: The name of the link
>> >> + * @mac: The MAC address of the device
>> >
>> >Most of ^these attributes could be packed into a virNetlinkNewLinkData
>> >structure (or something similarly named). You'll have to test check for
>> >VIR_DOMAIN_NET_TYPE_DIRECT and VIR_NETDEV_MACVLAN_MODE_VEPA according to my
>> >comments below inside <comment_block>...
>>
>> I have studied your comments inside <comment_block>, but I still don't know  how to
>> test check for VIR_DOMAIN_NET_TYPE_DIRECT and VIR_NETDEV_MACVLAN_MODE_VEPA.
>> Could you explain it for me?  :-)
>
>Actually, I retract on checking the modes ^this way, I was hasty with my
>response and then went through the code more thoroughly and realized that mode
>corresponds to netlink modes rather than to libvirt MACVLAN_MODE_X and we have
>a corresponding modemap for this. Not that it could not be achieved with our
>enums, but it's just not worth passing it around, so we'll need to imo rely on
>the @type string, which in this case would either be "macvtap" or "macvlan". 

Okay.

>
>>
>> >
>> >> + * @type: The type of device, i.e., "bridge", "macvtap", "macvlan"
>> >> + * @cb: The callback for filling in IFLA_INFO_DATA for this type
>> >
>> >I don't think callback will be necessary
>>
>> I think that it's reasonable to use callback here.
>> Another mode might be:
>> 1) pre-alloc buffer and fill in IFLA_INFO_DATA
>> 2) pass the buffer pointer as a param into virNetlinkNewLink which will
>>     embed it into nlmsg
>> 3) free the buffer
>> I think that this mode might be a bit more complicated and it's hard to find good references.
>>
>> >
>> >> + * @opaque: opaque for the callback
>> >> + * @error: for retrieving error code
>> >
>> >^see below for my comment on return value...
>>
>> Okay.
>>
>> >
>> >> + *
>> >> + * Create a network "link" (aka interface aka device) with the given
>> >> + * args. This works for many different types of network devices,
>> >> + * including macvtap and bridges.
>> >> + *
>> >> + * Returns 0 on success, -1 on fatal error.
>> >
>
><brain fart>
>
>> >Since this is a nice libvirt wrapper around the netlink communication
>> >machinery, I think that returning -<netlink_error> in case of an error is
>> >reasonable. Then, each of the callers of this API can handle the return values
>> >as they please.
>
><brain fart/>
>
>>
>> Okay. I will use netlink error as return-value.
>> But how to handle -1?
>> 1) always return -1 for *other* failures
>> 2) return -ENOMEM for OutofMemory; return -ENOBUFS for buffer_too_small; ...
>
>Good point, we couldn't differentiate between -1 from libvirt and -ENOPERM or
>-NLE_FAILURE (from libnl), so the @error variable should stay. 

Okay.

>
>> I should take which one?
>>
>> >
>> >> + */
>> >> +int
>> >> +virNetlinkNewLink(const int *ifindex,
>> >> +                  const char *ifname,
>> >> +                  const virMacAddr *mac,
>> >> +                  const char *type,
>> >> +                  virNetlinkNewLinkCallback cb,
>> >> +                  const void *opaque,
>> >> +                  int *error)
>> >> +{
>> >> +    struct nlmsgerr *err;
>> >> +    struct nlattr *linkinfo;
>> >> +    unsigned int buflen;
>> >> +    struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
>> >> +    VIR_AUTOPTR(virNetlinkMsg) nl_msg = NULL;
>> >> +    VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
>> >> +
>> >> +    *error = 0;
>> >> +
>> >> +    nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
>> >> +                                NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
>> >> +    if (!nl_msg) {
>> >> +        virReportOOMError();
>> >> +        return -1;
>> >> +    }
>> >> +
>> >> +    if (nlmsg_append(nl_msg,  &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
>> >> +        goto buffer_too_small;
>> >> +
>> >
>>
>> No offence. Let me try talking about nla_put* in comment_block.
>>
>> ><comment_block>
>> >
>> >> +    if (ifindex && nla_put_u32(nl_msg, IFLA_LINK, *ifindex) < 0)
>> >
>> >pre-existing, but is there a particular reason why ^this has to be u32??
>>
>> prototype of nla_put_u32:
>> int nla_put_u32(struct nl_msg *msg, int attrtype, uint32_t value);
>> It is used for adding 32 bit integer into msg.
>> We got ifindex by calling ioctl with SIOCGIFINDEX and this ifindex >= 0.
>> So I think that it's right to use nla_put_u32.
>>
>> >
>> >> +        goto buffer_too_small;
>> >> +
>> >> +    if (mac && nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, mac) < 0)
>> >> +        goto buffer_too_small;
>> >
>> >Similarly, is there a reason why ^this is not necessary to be u32?
>
>This was a brain fart too, of course it wouldn't work because we're not
>adding an int here, so you're right. 

Okay.

>
>>
>> prototype of nla_put:
>> int nla_put(struct nl_msg *msg, int attrtype, int datalen, const void *data);
>> It means put a sequence of octets into msg. It could be used for string and
>> other packed struct.
>> The type of mac is struct virMacAddr which size is VIR_MAC_BUFLEN.
>> So I think it's right.
>>
>> >
>> >> +
>> >> +    if (ifname && nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
>> >> +        goto buffer_too_small;
>> >> +
>> >> +    if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
>> >
>> >Since we're already touching the code, I would find it more readable if ^this
>> >was written as linkinfo = nla_nest_start. The reason for that is that it's not
>> >immediately visible where the matching counterpart to "nla_nest_end" is.
>>
>> Okay.
>>
>> >
>> >> +        goto buffer_too_small;
>> >> +
>> >> +    if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
>> >
>> >I'm puzzled why ^this doesn't need strlen(foo)+1 instead of plain strlen(foo)
>>
>> This code is same as iproute2. There's no problem here.
>> In net/core/rtnetlink.c of kernel (source v4.4.0):
>>     nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
>> and the implementation of nla_strlcpy:
>>     size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
>>     {
>>         size_t srclen = nla_len(nla);   /* whatever it is strlen(foo)+1 or strlen(foo) */
>>         char *src = nla_data(nla);
>> =>   if (srclen > 0 && src[srclen - 1] == '\0')
>> =>       srclen--;
>>
>> But I think it should always be strlen(foo)+1 for consistency of code.
>
>It's also safer, because that way you're always sure that you're getting a
>nul-terminated string, especially since you'll be doing a plain memcpy in
>nla_put + you'll avoid questions like mine out of sheer confusion. 

Okay.

>>
>> >
>> >> +        goto buffer_too_small;
>> >
>> ></comment_block>
>> >
>> >We could actually create a wrapper macro around nla_put which would make the
>> >block (begin-end) tiny more readable. See my notes above, as I have further
>> >questions...Now, the macros would again be a separate patch to make the change
>> >more gradual. Depending on the comments to my questions above, the macro would
>> >either take the size of the type and always call nla_put or a specialized
>> >nla_put_<string|u32|whatever> function. I'm CC'ing Laine to help answering my
>> >questions inside <comment_block>.
>> >
>> >Erik
>>
>> I think that the usage of nla_put* is clear now.
>> So I don't know how to create wrapper macros.
>> Could you give me some instructions for it?
>
>Currently what I have on my branch is the following (more changes needed to
>actually compile it):
>
>diff --git a/src/util/virnetlink.c b/src/util/virnetlink.c
>index 817e347cec..e626d53a31 100644
>--- a/src/util/virnetlink.c
>+++ b/src/util/virnetlink.c
>@@ -515,12 +515,25 @@ virNetlinkNewLink(const int *ifindex,
>                   int *error)
> {
>     struct nlmsgerr *err;
>-    struct nlattr *linkinfo;
>     unsigned int buflen;
>     struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
>+    struct nlattr *linkinfo = NULL;
>+    struct nlattr *info_data = NULL;
>     VIR_AUTOPTR(virNetlinkMsg) nl_msg = NULL;
>     VIR_AUTOFREE(struct nlmsghdr *) resp = NULL;
>
>+# define NETLINK_MSG_NEST_START(container, attrtype) \
>+    container = nla_nest_start(nl_msg, attrtype); \
>+    if (!container) \
>+        goto buffer_too_small;
>+
>+# define NETLINK_MSG_NEST_END(container) \
>+    nla_nest_end(nl_msg, container);
>+
>+# define NETLINK_MSG_PUT(attrtype, datalen, data) \
>+    if (data && nla_put(nl_msg, attrtype, datalen, data) < 0) \
>+        goto buffer_too_small;
>+
>     *error = 0;
>
>     nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
>@@ -533,25 +546,24 @@ virNetlinkNewLink(const int *ifindex,
>     if (nlmsg_append(nl_msg,  &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
>         goto buffer_too_small;
>
>-    if (ifindex && nla_put_u32(nl_msg, IFLA_LINK, *ifindex) < 0)
>-        goto buffer_too_small;
>+    NETLINK_MSG_PUT(IFLA_LINK, sizeof(uint32_t), ifindex)
>+    NETLINK_MSG_PUT(IFLA_ADDRESS, VIR_MAC_BUFLEN, mac)
>+    NETLINK_MSG_PUT(IFLA_IFNAME, (strlen(ifname)+1), ifname)
>
>-    if (mac && nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, mac) < 0)
>-        goto buffer_too_small;
>+    NETLINK_MSG_NEST_START(linkinfo, IFLA_LINKINFO)
>
>-    if (ifname && nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
>-        goto buffer_too_small;
>+    NETLINK_MSG_PUT(IFLA_INFO_KIND, (strlen(type)+1), type)
>
>-    if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
>-        goto buffer_too_small;
>+    if ((STREQ(type, "macvtap") || STREQ(type, "macvlan")) &&
>+        macvlan_mode && *macvlan_mode > 0) {
>+        NETLINK_MSG_NEST_START(info_data, IFLA_INFO_DATA)
>
>-    if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
>-        goto buffer_too_small;
>+        NETLINK_MSG_PUT(IFLA_MACVLAN_MODE, sizeof(uint32_t), macvlan_mode)
>
>-    if (cb && cb(nl_msg, opaque) < 0)
>-        return -1;
>+        NETLINK_MSG_NEST_END(info_data)
>+    }
>
>-    nla_nest_end(nl_msg, linkinfo);
>+    NETLINK_MSG_NEST_END(linkinfo)
>
>     if (virNetlinkCommand(nl_msg, &resp, &buflen, 0, 0, NETLINK_ROUTE, 0) < 0)
>         return -1;
>@@ -578,6 +590,10 @@ virNetlinkNewLink(const int *ifindex,
>         goto malformed_resp;
>     }
>
>+# undef NETLINK_MSG_NEST_START
>+# undef NETLINK_MSG_NEST_END
>+# undef NETLINK_MSG_PUT
>+
>     return 0;
>
>  malformed_resp:
>
>
>Regards,
>Erik 

I got it! Thanks.

Shi Lei




More information about the libvir-list mailing list