Skip to content

Commit 0586a28

Browse files
committed
btl/uct: allow connections to be formed using a separate memory domain
It is possible that the current memory domain does not have an adequate transport for forming endpoint to endpoint connections. When this is the case the btl will fail to function. To support these situations this CL adds support for using an alternate transport (usually tcp) which can be used to make the endpoint connections. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent 01da1c4 commit 0586a28

File tree

6 files changed

+467
-217
lines changed

6 files changed

+467
-217
lines changed

opal/mca/btl/uct/btl_uct.h

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct mca_btl_uct_module_t {
6464
/** base BTL interface */
6565
mca_btl_base_module_t super;
6666

67+
/** module index in the component module array */
68+
int module_index;
69+
6770
/** whether the module has been fully initialized or not */
6871
bool initialized;
6972

@@ -141,9 +144,15 @@ struct mca_btl_uct_component_t {
141144

142145
/** allowed UCT memory domains */
143146
char *memory_domains;
147+
mca_btl_uct_include_list_t memory_domain_list;
144148

145149
/** allowed transports */
146150
char *allowed_transports;
151+
mca_btl_uct_include_list_t allowed_transport_list;
152+
153+
/** transports to consider for forming connections */
154+
char *connection_domains;
155+
mca_btl_uct_include_list_t connection_domain_list;
147156

148157
/** number of worker contexts to create */
149158
int num_contexts_per_module;
@@ -158,6 +167,10 @@ struct mca_btl_uct_component_t {
158167

159168
/** connection retry timeout */
160169
unsigned int connection_retry_timeout;
170+
171+
/** alternate connection-only module that can be used if no suitable
172+
* connection tl is found. this is usually a tcp tl. */
173+
mca_btl_uct_module_t *conn_module;
161174
};
162175
typedef struct mca_btl_uct_component_t mca_btl_uct_component_t;
163176

@@ -294,7 +307,8 @@ struct mca_btl_base_endpoint_t *mca_btl_uct_get_ep(struct mca_btl_base_module_t
294307
opal_proc_t *proc);
295308

296309
int mca_btl_uct_query_tls(mca_btl_uct_module_t *module, mca_btl_uct_md_t *md,
297-
uct_tl_resource_desc_t *tl_descs, unsigned tl_count);
310+
uct_tl_resource_desc_t *tl_descs, unsigned tl_count,
311+
bool evaluate_for_conn_only);
298312
int mca_btl_uct_process_connection_request(mca_btl_uct_module_t *module,
299313
mca_btl_uct_conn_req_t *req);
300314

@@ -341,5 +355,15 @@ static inline bool mca_btl_uct_tl_requires_connection_tl(mca_btl_uct_tl_t *tl)
341355
return !(MCA_BTL_UCT_TL_ATTR(tl, 0).cap.flags & UCT_IFACE_FLAG_CONNECT_TO_IFACE);
342356
}
343357

358+
/**
359+
* @brief Find the rank of `name` in the include list `list`.
360+
*
361+
* @param[in] name name to find
362+
* @param[in] list list to search
363+
*
364+
* A negative result means the name is not present or the list is negated.
365+
*/
366+
int mca_btl_uct_include_list_rank (const char *name, const mca_btl_uct_include_list_t *list);
367+
344368
END_C_DECLS
345369
#endif

opal/mca/btl/uct/btl_uct_am.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t heade
5555
{
5656
uint32_t iov_count = 1;
5757
struct iovec iov;
58-
size_t length;
5958

6059
if (header_size > 0) {
6160
assert(NULL != header);

0 commit comments

Comments
 (0)