diff mbox series

[bug#66573,v2,06/11] gnu: ucx: Update to 1.15.0.

Message ID 2b52dbcebc89d066b7c7919c24d16457d5e5c5b8.1697744099.git.ludo@gnu.org
State New
Headers show
Series None | expand

Commit Message

Ludovic Courtès Oct. 19, 2023, 7:36 p.m. UTC
* gnu/packages/fabric-management.scm (ucx): Update to 1.15.0.
* gnu/packages/patches/ucx-tcp-iface-ioctl.patch: Update for 1.15.0.
---
 gnu/packages/fabric-management.scm            |   4 +-
 .../patches/ucx-tcp-iface-ioctl.patch         | 105 +++++++++++-------
 2 files changed, 64 insertions(+), 45 deletions(-)

Comments

Ludovic Courtès Oct. 19, 2023, 7:41 p.m. UTC | #1
Ludovic Courtès <ludo@gnu.org> skribis:

> * gnu/packages/fabric-management.scm (ucx): Update to 1.15.0.
> * gnu/packages/patches/ucx-tcp-iface-ioctl.patch: Update for 1.15.0.

The change compared to v1 is the updated patch here (somehow I was so
excited by my new tool that I failed to test the actual patch
adequately).

Ludo’.
diff mbox series

Patch

diff --git a/gnu/packages/fabric-management.scm b/gnu/packages/fabric-management.scm
index ccdaa0ee0a..f41b4e99ed 100644
--- a/gnu/packages/fabric-management.scm
+++ b/gnu/packages/fabric-management.scm
@@ -185,7 +185,7 @@  (define-public ibutils
 (define-public ucx
   (package
     (name "ucx")
-    (version "1.14.0")
+    (version "1.15.0")
     (source (origin
               (method git-fetch)
               (uri (git-reference
@@ -195,7 +195,7 @@  (define-public ucx
               (patches (search-patches "ucx-tcp-iface-ioctl.patch"))
               (sha256
                (base32
-                "0ki2r768wqm92qv06wxrh3kv2nl2yj4ds9fz0s0b5rr2ycjiw9ir"))))
+                "1mk46vyfp8hsivk88s8gv0nf458jfs59fczpf66wwa3a9yp324jp"))))
     (build-system gnu-build-system)
     (arguments
      (list
diff --git a/gnu/packages/patches/ucx-tcp-iface-ioctl.patch b/gnu/packages/patches/ucx-tcp-iface-ioctl.patch
index c441a0861a..2a0e4ce138 100644
--- a/gnu/packages/patches/ucx-tcp-iface-ioctl.patch
+++ b/gnu/packages/patches/ucx-tcp-iface-ioctl.patch
@@ -3,102 +3,121 @@  TCP network interfaces cannot be obtained via /sys/class/net.  This patch
 provides alternative code that uses the SIOCGIFCONF ioctl to get the
 names of the available TCP network interfaces.
 
+Initially submitted at <https://github.com/openucx/ucx/pull/4462>.
+
 diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c
-index cad4a2709..7c1d2c9de 100644
+index 6a6cd34fa..af32bb2e9 100644
 --- a/src/uct/tcp/tcp_iface.c
 +++ b/src/uct/tcp/tcp_iface.c
-@@ -17,6 +17,8 @@
- #include <sys/poll.h>
+@@ -18,6 +18,8 @@
  #include <netinet/tcp.h>
  #include <dirent.h>
+ #include <float.h>
 +#include <net/if.h>
 +#include <sys/ioctl.h>
  
+ #define UCT_TCP_IFACE_NETDEV_DIR "/sys/class/net"
  
- extern ucs_class_t UCS_CLASS_DECL_NAME(uct_tcp_iface_t);
-@@ -586,6 +588,68 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h,
+@@ -875,6 +877,85 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h,
                                   uct_worker_h, const uct_iface_params_t*,
                                   const uct_iface_config_t*);
  
 +/* Fetch information about available network devices through an ioctl.  */
-+static ucs_status_t query_devices_ioctl(uct_md_h md,
-+					uct_tl_device_resource_t **tl_devices_p,
-+					unsigned *num_tl_devices_p)
++static ucs_status_t uct_tcp_query_devices_ioctl(uct_md_h md,
++                                                uct_tl_device_resource_t **devices_p,
++                                                unsigned *num_devices_p)
 +{
 +    int sock, err, i;
-+    uct_tl_device_resource_t *resources, *tmp;
-+    unsigned num_resources;
++    uct_tl_device_resource_t *devices, *tmp;
++    unsigned num_devices;
 +    ucs_status_t status;
 +    struct ifconf conf;
-+    struct ifreq reqs[10];
 +
-+    conf.ifc_len = sizeof reqs;
-+    conf.ifc_req = reqs;
++    conf.ifc_len = 0;
++    conf.ifc_req = NULL;
 +
-+    sock = socket(SOCK_STREAM, AF_INET, 0);
-+    if (sock < 0) {
-+	ucs_error("socket(2) failed: %m");
-+	status = UCS_ERR_IO_ERROR;
-+	goto out;
++    status = ucs_socket_create(AF_INET, SOCK_STREAM, &sock);
++    if (status != UCS_OK) {
++        goto out;
 +    }
 +
 +    err = ioctl(sock, SIOCGIFCONF, &conf);
 +    if (err < 0) {
-+	ucs_error("SIOCGIFCONF ioctl failed: %m");
-+	status = UCS_ERR_IO_ERROR;
-+	goto out;
++        ucs_error("ioctl(SIOCGIFCONF) failed: %m");
++        status = UCS_ERR_IO_ERROR;
++        goto out;
 +    }
 +
-+    resources     = NULL;
-+    num_resources = 0;
-+    for (i = 0; i < conf.ifc_len / sizeof(struct ifreq); i++) {
-+	const char *name = reqs[i].ifr_name;
++    conf.ifc_req = ucs_calloc(1, conf.ifc_len, "ifreq");
++    if (conf.ifc_req == NULL) {
++        ucs_error("memory alocation failed");
++        status = UCS_ERR_NO_MEMORY;
++        goto out;
++    }
++
++    err = ioctl(sock, SIOCGIFCONF, &conf);
++    if (err < 0) {
++        ucs_error("ioctl(SIOCGIFCONF) failed: %m");
++        status = UCS_ERR_IO_ERROR;
++        goto out_free;
++    }
++
++    devices     = NULL;
++    num_devices = 0;
++    for (i = 0; i < (conf.ifc_len / sizeof(struct ifreq)); i++) {
++        const char *name = conf.ifc_req[i].ifr_name;
++	sa_family_t family = conf.ifc_req[i].ifr_addr.sa_family;
 +
-+        if (!ucs_netif_is_active(name, AF_INET)) {
++        if (!ucs_netif_is_active(name, family)) {
 +            continue;
 +        }
 +
-+        tmp = ucs_realloc(resources, sizeof(*resources) * (num_resources + 1),
-+                          "tcp resources");
++        tmp = ucs_realloc(devices, sizeof(*devices) * (num_devices + 1),
++                          "tcp devices");
 +        if (tmp == NULL) {
-+            ucs_free(resources);
++            ucs_free(devices);
 +            status = UCS_ERR_NO_MEMORY;
-+            goto out;
++            goto out_free;
 +        }
-+        resources = tmp;
++        devices = tmp;
 +
-+        ucs_snprintf_zero(resources[i].name, sizeof(resources[i].name),
++        ucs_snprintf_zero(devices[num_devices].name,
++                          sizeof(devices[num_devices].name),
 +                          "%s", name);
-+        resources[i].type = UCT_DEVICE_TYPE_NET;
-+        ++num_resources;
++        devices[num_devices].type = UCT_DEVICE_TYPE_NET;
++        ++num_devices;
 +    }
 +
-+    *num_tl_devices_p = num_resources;
-+    *tl_devices_p     = resources;
-+    status            = UCS_OK;
++    *num_devices_p = num_devices;
++    *devices_p     = devices;
++    status         = UCS_OK;
 +
++out_free:
++    ucs_free(conf.ifc_req);
 +out:
-+    if (sock >= 0) close(sock);
++    if (sock >= 0) {
++        close(sock);
++    }
 +    return status;
 +}
 +
  ucs_status_t uct_tcp_query_devices(uct_md_h md,
                                     uct_tl_device_resource_t **devices_p,
                                     unsigned *num_devices_p)
-@@ -599,9 +663,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md,
+@@ -893,9 +974,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md,
  
      dir = opendir(UCT_TCP_IFACE_NETDEV_DIR);
      if (dir == NULL) {
 -        ucs_error("opendir(%s) failed: %m", UCT_TCP_IFACE_NETDEV_DIR);
 -        status = UCS_ERR_IO_ERROR;
 -        goto out;
-+	/* When /sys is unavailable, as can be the case in a container,
-+	 * resort to a good old 'ioctl'.  */
-+	return query_devices_ioctl(md, devices_p, num_devices_p);
++        /* When /sys is unavailable, as can be the case in a container,
++         * resort to a good old 'ioctl'.  */
++        return uct_tcp_query_devices_ioctl(md, devices_p, num_devices_p);
      }
  
      devices     = NULL;
-@@ -655,7 +719,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md,
+@@ -963,7 +1044,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md,
  
  out_closedir:
      closedir(dir);