Skip to content

Commit a0f60ae

Browse files
committed
btl/uct: move tl attributes off of tl context structure
In theory the tl attributes do not differ betweeen contexts so query them once when the tl is created not once per context. This removes the need to allocate the first context so that code has also been removed.
1 parent 0586a28 commit a0f60ae

File tree

8 files changed

+97
-66
lines changed

8 files changed

+97
-66
lines changed

opal/mca/btl/uct/btl_uct.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ int mca_btl_uct_process_connection_request(mca_btl_uct_module_t *module,
319319
*/
320320
static inline bool mca_btl_uct_tl_supports_rdma(mca_btl_uct_tl_t *tl)
321321
{
322-
return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags
322+
return (tl->uct_iface_attr.cap.flags
323323
& (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY))
324324
== (UCT_IFACE_FLAG_PUT_ZCOPY | UCT_IFACE_FLAG_GET_ZCOPY);
325325
}
@@ -329,7 +329,7 @@ static inline bool mca_btl_uct_tl_supports_rdma(mca_btl_uct_tl_t *tl)
329329
*/
330330
static inline bool mca_btl_uct_tl_support_am(mca_btl_uct_tl_t *tl)
331331
{
332-
return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags
332+
return (tl->uct_iface_attr.cap.flags
333333
& (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_AM_BCOPY | UCT_IFACE_FLAG_AM_ZCOPY));
334334
}
335335

@@ -340,7 +340,7 @@ static inline bool mca_btl_uct_tl_support_am(mca_btl_uct_tl_t *tl)
340340
*/
341341
static inline bool mca_btl_uct_tl_supports_conn(mca_btl_uct_tl_t *tl)
342342
{
343-
return (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags
343+
return (tl->uct_iface_attr.cap.flags
344344
& (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE))
345345
== (UCT_IFACE_FLAG_AM_SHORT | UCT_IFACE_FLAG_CONNECT_TO_IFACE);
346346
}
@@ -352,7 +352,7 @@ static inline bool mca_btl_uct_tl_supports_conn(mca_btl_uct_tl_t *tl)
352352
*/
353353
static inline bool mca_btl_uct_tl_requires_connection_tl(mca_btl_uct_tl_t *tl)
354354
{
355-
return !(MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE);
355+
return !(tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE);
356356
}
357357

358358
/**

opal/mca/btl/uct/btl_uct_am.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/*
33
* Copyright (c) 2018 Los Alamos National Security, LLC. All rights
44
* reserved.
5+
* Copyright (c) 2025 Google, LLC. All rights reserved.
56
* $COPYRIGHT$
67
*
78
* Additional copyrights may follow
@@ -26,7 +27,7 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc(mca_btl_base_module_t *btl,
2627
mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
2728
mca_btl_uct_base_frag_t *frag = NULL;
2829

29-
if (size <= (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
30+
if (size <= (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) {
3031
frag = mca_btl_uct_frag_alloc_short(uct_btl, endpoint);
3132
} else if (size <= uct_btl->super.btl_eager_limit) {
3233
frag = mca_btl_uct_frag_alloc_eager(uct_btl, endpoint);
@@ -105,7 +106,7 @@ struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t
105106
frag->uct_iov.length = total_size;
106107
frag->base.order = order;
107108
frag->base.des_flags = flags;
108-
if (total_size > (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
109+
if (total_size > (size_t) uct_btl->am_tl->uct_iface_attr.cap.am.max_short) {
109110
frag->segments[0].seg_len = reserve;
110111
frag->segments[1].seg_len = *size;
111112
frag->segments[1].seg_addr.pval = data_ptr;
@@ -181,7 +182,7 @@ int mca_btl_uct_send_frag(mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t
181182
mca_btl_uct_context_lock(context);
182183
/* attempt to post the fragment */
183184
if (NULL != frag->base.super.registration
184-
&& (context->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY)) {
185+
&& (uct_btl->am_tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_AM_ZCOPY)) {
185186
frag->comp.dev_context = context;
186187
ucs_status = uct_ep_am_zcopy(ep_handle, MCA_BTL_UCT_FRAG, &frag->header,
187188
sizeof(frag->header), &frag->uct_iov, 1, 0,
@@ -196,7 +197,7 @@ int mca_btl_uct_send_frag(mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t
196197
/* short message */
197198
if (1 == frag->base.des_segment_count
198199
&& (frag->uct_iov.length + 8)
199-
< MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
200+
< uct_btl->am_tl->uct_iface_attr.cap.am.max_short) {
200201
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, frag->header.value,
201202
frag->uct_iov.buffer, frag->uct_iov.length);
202203

@@ -290,9 +291,9 @@ static size_t mca_btl_uct_sendi_pack(void *data, void *arg)
290291
return args->header_size + args->payload_size + 8;
291292
}
292293

293-
static inline size_t mca_btl_uct_max_sendi(mca_btl_uct_module_t *uct_btl, int context_id)
294+
static inline size_t mca_btl_uct_max_sendi(mca_btl_uct_module_t *uct_btl)
294295
{
295-
return MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context_id).cap.am.max_bcopy;
296+
return uct_btl->am_tl->uct_iface_attr.cap.am.max_bcopy;
296297
}
297298

298299
int mca_btl_uct_sendi(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
@@ -312,7 +313,7 @@ int mca_btl_uct_sendi(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo
312313

313314
rc = mca_btl_uct_endpoint_check_am(uct_btl, endpoint, context, &ep_handle);
314315
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc
315-
|| msg_size > mca_btl_uct_max_sendi(uct_btl, context->context_id))) {
316+
|| msg_size > mca_btl_uct_max_sendi(uct_btl))) {
316317
if (descriptor) {
317318
*descriptor = mca_btl_uct_alloc(btl, endpoint, order, total_size, flags);
318319
}
@@ -326,7 +327,7 @@ int mca_btl_uct_sendi(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo
326327
if (0 == payload_size) {
327328
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, header,
328329
header_size);
329-
} else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id)
330+
} else if (msg_size < (size_t) uct_btl->am_tl->uct_iface_attr
330331
.cap.am.max_short) {
331332
int8_t *data = alloca(total_size);
332333
size_t packed_payload_size = payload_size;

opal/mca/btl/uct/btl_uct_component.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -246,14 +246,14 @@ static size_t mca_btl_uct_tl_modex_size(mca_btl_uct_tl_t *tl)
246246
{
247247
const size_t size = strlen(tl->uct_tl_name) + 1;
248248

249-
if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
249+
if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
250250
/* pad out to a multiple of 4 bytes */
251-
return (4 + 3 + size + MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len
252-
+ MCA_BTL_UCT_TL_ATTR(tl, 0).iface_addr_len)
251+
return (4 + 3 + size + tl->uct_iface_attr.device_addr_len
252+
+ tl->uct_iface_attr.iface_addr_len)
253253
& ~3;
254254
}
255255

256-
return (4 + 3 + size + MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len) & ~3;
256+
return (4 + 3 + size + tl->uct_iface_attr.device_addr_len) & ~3;
257257
}
258258

259259
static size_t mca_btl_uct_module_modex_size(mca_btl_uct_module_t *module)
@@ -292,13 +292,13 @@ static size_t mca_btl_uct_tl_modex_pack(mca_btl_uct_tl_t *tl, uint8_t *modex_dat
292292
* the same endpoint since we are only doing RDMA. if any of these assumptions are
293293
* wrong then we can't delay creating the other contexts and must include their
294294
* information in the modex. */
295-
if (MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
295+
if (tl->uct_iface_attr.cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE) {
296296
uct_iface_get_address(dev_context->uct_iface, (uct_iface_addr_t *) modex_data);
297-
modex_data += MCA_BTL_UCT_TL_ATTR(tl, 0).iface_addr_len;
297+
modex_data += tl->uct_iface_attr.iface_addr_len;
298298
}
299299

300300
uct_iface_get_device_address(dev_context->uct_iface, (uct_device_addr_t *) modex_data);
301-
modex_data += MCA_BTL_UCT_TL_ATTR(tl, 0).device_addr_len;
301+
modex_data += tl->uct_iface_attr.device_addr_len;
302302

303303
return modex_size;
304304
}
@@ -406,10 +406,6 @@ ucs_status_t mca_btl_uct_am_handler(void *arg, void *data, size_t length, unsign
406406
mca_btl_uct_device_context_t *tl_context = (mca_btl_uct_device_context_t *) arg;
407407
mca_btl_uct_module_t *uct_btl = tl_context->uct_btl;
408408
mca_btl_uct_am_header_t *header = (mca_btl_uct_am_header_t *) data;
409-
if (header->data.tag == 0xff) {
410-
fprintf (stderr, "%d: got an invalid tag\n", getpid());
411-
while (true) {}
412-
}
413409
mca_btl_active_message_callback_t *reg = mca_btl_base_active_message_trigger + header->data.tag;
414410
mca_btl_base_segment_t seg = {.seg_addr = {.pval = (void *) ((intptr_t) data
415411
+ sizeof(*header))},
@@ -424,7 +420,6 @@ ucs_status_t mca_btl_uct_am_handler(void *arg, void *data, size_t length, unsign
424420
tl_context->in_am_callback = true;
425421
reg->cbfunc(&uct_btl->super, &desc);
426422
tl_context->in_am_callback = false;
427-
header->data.tag = 0xff;
428423

429424
return UCS_OK;
430425
}

opal/mca/btl/uct/btl_uct_endpoint.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ static int mca_btl_uct_endpoint_connect_iface(mca_btl_uct_module_t *uct_btl, mca
150150
/* easy case. just connect to the interface */
151151
iface_addr = (uct_iface_addr_t *) tl_data;
152152
device_addr = (uct_device_addr_t *) ((uintptr_t) iface_addr
153-
+ MCA_BTL_UCT_TL_ATTR(tl, tl_context->context_id)
153+
+ tl->uct_iface_attr
154154
.iface_addr_len);
155155

156156
BTL_VERBOSE(("connecting endpoint to interface"));
@@ -270,7 +270,7 @@ static int mca_btl_uct_endpoint_get_helper_endpoint(mca_btl_uct_module_t *uct_bt
270270

271271
uct_iface_addr_t *iface_addr = (uct_iface_addr_t *) conn_tl_data;
272272
uct_device_addr_t *device_addr = (uct_device_addr_t *) ((uintptr_t) conn_tl_data
273-
+ MCA_BTL_UCT_TL_ATTR(conn_tl, 0).iface_addr_len);
273+
+ conn_tl->uct_iface_attr.iface_addr_len);
274274

275275
endpoint->conn_ep = OBJ_NEW(mca_btl_uct_connection_ep_t);
276276
if (OPAL_UNLIKELY(NULL == endpoint->conn_ep)) {
@@ -308,7 +308,7 @@ static int mca_btl_uct_endpoint_send_connection_data(
308308
BTL_VERBOSE(("connecting endpoint to remote endpoint"));
309309

310310
size_t request_length = sizeof(mca_btl_uct_conn_req_t)
311-
+ MCA_BTL_UCT_TL_ATTR(tl, tl_context->context_id).ep_addr_len;
311+
+ tl->uct_iface_attr.ep_addr_len;
312312
mca_btl_uct_conn_req_t *request = alloca(request_length);
313313

314314
/* fill in common request parameters */

opal/mca/btl/uct/btl_uct_module.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ static int mca_btl_uct_add_procs(mca_btl_base_module_t *btl, size_t nprocs,
9090
if (am_tl) {
9191
rc = opal_free_list_init(&uct_module->short_frags, sizeof(mca_btl_uct_base_frag_t),
9292
opal_cache_line_size, OBJ_CLASS(mca_btl_uct_base_frag_t),
93-
MCA_BTL_UCT_TL_ATTR(am_tl, 0).cap.am.max_short,
93+
am_tl->uct_iface_attr.cap.am.max_short,
9494
opal_cache_line_size, 0, 1024, 64, NULL, 0, NULL, NULL, NULL);
9595

9696
rc = opal_free_list_init(&uct_module->eager_frags, sizeof(mca_btl_uct_base_frag_t),

opal/mca/btl/uct/btl_uct_rdma.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ int mca_btl_uct_get(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoin
126126

127127
mca_btl_uct_context_lock(context);
128128

129-
if (size <= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.get.max_bcopy) {
129+
if (size <= uct_btl->rdma_tl->uct_iface_attr.cap.get.max_bcopy) {
130130
ucs_status = uct_ep_get_bcopy(ep_handle, mca_btl_uct_get_unpack, local_address, size,
131131
remote_address, rkey.rkey, &comp->uct_comp);
132132
} else {
@@ -223,7 +223,7 @@ int mca_btl_uct_put(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoin
223223
/* determine what UCT prototol should be used */
224224
if (size <= uct_btl->super.btl_put_local_registration_threshold) {
225225
use_short = size
226-
<= MCA_BTL_UCT_TL_ATTR(uct_btl->rdma_tl, context->context_id).cap.put.max_short;
226+
<= uct_btl->rdma_tl->uct_iface_attr.cap.put.max_short;
227227
use_bcopy = !use_short;
228228
}
229229

0 commit comments

Comments
 (0)