Skip to content

Commit 45138d3

Browse files
authored
gh-131876: extract _hashlib helpers into a separate directory (#136995)
The `Modules/hashlib.h` helper file is now removed and split into multiple files: * `Modules/_hashlib/hashlib_buffer.[ch]` -- Utilities for getting a buffer view and handling buffer inputs. * `Modules/_hashlib/hashlib_fetch.h` -- Utilities used when fetching a message digest from a digest-like identifier. Currently, this file only contains common error messages as the fetching API is not yet implemented. * `Modules/_hashlib/hashlib_mutex.h` -- Utilities for managing the lock on cryptographic hash objects.
1 parent eefd70f commit 45138d3

20 files changed

+346
-221
lines changed

Makefile.pre.in

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ ENSUREPIP= @ENSUREPIP@
227227
# Internal static libraries
228228
LIBMPDEC_A= Modules/_decimal/libmpdec/libmpdec.a
229229
LIBEXPAT_A= Modules/expat/libexpat.a
230+
LIBHASHLIB_INTERNAL_A=Modules/_hashlib/libhashlib.a
230231

231232
# HACL* build configuration
232233
LIBHACL_CFLAGS=@LIBHACL_CFLAGS@
@@ -761,6 +762,17 @@ LIBHACL_HMAC_HEADERS= \
761762
$(LIBHACL_BLAKE2_HEADERS) \
762763
$(LIBHACL_HEADERS)
763764

765+
##########################################################################
766+
# Internal library for cryptographic primitives
767+
768+
LIBHASHLIB_INTERNAL_OBJS= \
769+
Modules/_hashlib/hashlib_buffer.o
770+
771+
LIBHASHLIB_INTERNAL_HEADERS= \
772+
Modules/_hashlib/hashlib_buffer.h \
773+
Modules/_hashlib/hashlib_fetch.h \
774+
Modules/_hashlib/hashlib_mutex.h
775+
764776
#########################################################################
765777
# Rules
766778

@@ -1511,6 +1523,17 @@ $(LIBEXPAT_A): $(LIBEXPAT_OBJS)
15111523
-rm -f $@
15121524
$(AR) $(ARFLAGS) $@ $(LIBEXPAT_OBJS)
15131525

1526+
##########################################################################
1527+
# '_hashlib', '_hmac' and HACL*-based modules helpers
1528+
LIBHASHLIB_INTERNAL_CFLAGS=@LIBHASHLIB_INTERNAL_CFLAGS@ $(PY_STDMODULE_CFLAGS) $(CCSHARED)
1529+
1530+
Modules/_hashlib/hashlib_buffer.o: Modules/_hashlib/hashlib_buffer.c $(LIBHASHLIB_INTERNAL_HEADERS) $(PYTHON_HEADERS)
1531+
$(CC) -I$(srcdir)/Modules/_hashlib -c $(LIBHASHLIB_INTERNAL_CFLAGS) -o $@ $(srcdir)/Modules/_hashlib/hashlib_buffer.c
1532+
1533+
$(LIBHASHLIB_INTERNAL_A): $(LIBHASHLIB_INTERNAL_OBJS)
1534+
-rm -f $@
1535+
$(AR) $(ARFLAGS) $@ $(LIBHASHLIB_INTERNAL_OBJS)
1536+
15141537
##########################################################################
15151538
# HACL* library build
15161539
#
@@ -3353,21 +3376,21 @@ MODULE__CTYPES_TEST_DEPS=$(srcdir)/Modules/_ctypes/_ctypes_test_generated.c.h
33533376
MODULE__CTYPES_MALLOC_CLOSURE=@MODULE__CTYPES_MALLOC_CLOSURE@
33543377
MODULE__DECIMAL_DEPS=$(srcdir)/Modules/_decimal/docstrings.h @LIBMPDEC_INTERNAL@
33553378
MODULE__ELEMENTTREE_DEPS=$(srcdir)/Modules/pyexpat.c @LIBEXPAT_INTERNAL@
3356-
MODULE__HASHLIB_DEPS=$(srcdir)/Modules/hashlib.h
3379+
MODULE__HASHLIB_DEPS=@LIBHASHLIB_INTERNAL@
33573380
MODULE__IO_DEPS=$(srcdir)/Modules/_io/_iomodule.h
33583381

33593382
# HACL*-based cryptographic primitives
3360-
MODULE__MD5_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_MD5_HEADERS) $(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3383+
MODULE__MD5_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_MD5_HEADERS) $(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33613384
MODULE__MD5_LDEPS=$(LIBHACL_MD5_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3362-
MODULE__SHA1_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA1_HEADERS) $(LIBHACL_SHA1_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3385+
MODULE__SHA1_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_SHA1_HEADERS) $(LIBHACL_SHA1_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33633386
MODULE__SHA1_LDEPS=$(LIBHACL_SHA1_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3364-
MODULE__SHA2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA2_HEADERS) $(LIBHACL_SHA2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3387+
MODULE__SHA2_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_SHA2_HEADERS) $(LIBHACL_SHA2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33653388
MODULE__SHA2_LDEPS=$(LIBHACL_SHA2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3366-
MODULE__SHA3_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_SHA3_HEADERS) $(LIBHACL_SHA3_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3389+
MODULE__SHA3_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_SHA3_HEADERS) $(LIBHACL_SHA3_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33673390
MODULE__SHA3_LDEPS=$(LIBHACL_SHA3_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3368-
MODULE__BLAKE2_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_BLAKE2_HEADERS) $(LIBHACL_BLAKE2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3391+
MODULE__BLAKE2_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_BLAKE2_HEADERS) $(LIBHACL_BLAKE2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33693392
MODULE__BLAKE2_LDEPS=$(LIBHACL_BLAKE2_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3370-
MODULE__HMAC_DEPS=$(srcdir)/Modules/hashlib.h $(LIBHACL_HMAC_HEADERS) $(LIBHACL_HMAC_LIB_@LIBHACL_LDEPS_LIBTYPE@)
3393+
MODULE__HMAC_DEPS=$(MODULE__HASHLIB_DEPS) $(LIBHACL_HMAC_HEADERS) $(LIBHACL_HMAC_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33713394
MODULE__HMAC_LDEPS=$(LIBHACL_HMAC_LIB_@LIBHACL_LDEPS_LIBTYPE@)
33723395

33733396
MODULE__SOCKET_DEPS=$(srcdir)/Modules/socketmodule.h $(srcdir)/Modules/addrinfo.h $(srcdir)/Modules/getaddrinfo.c $(srcdir)/Modules/getnameinfo.c
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Remove :file:`!Modules/hashlib.h` and move its content into dedicated files
2+
now located in ``Modules/_hashlib``. Patch by Bénédikt Tran.

Modules/_hashlib/hashlib_buffer.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#include "hashlib_buffer.h"
2+
3+
int
4+
_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string)
5+
{
6+
if (data != NULL && string == NULL) {
7+
// called as H(data) or H(data=...)
8+
*res = data;
9+
return 1;
10+
}
11+
else if (data == NULL && string != NULL) {
12+
// called as H(string=...)
13+
if (PyErr_WarnEx(PyExc_DeprecationWarning,
14+
"the 'string' keyword parameter is deprecated since "
15+
"Python 3.15 and slated for removal in Python 3.19; "
16+
"use the 'data' keyword parameter or pass the data "
17+
"to hash as a positional argument instead", 1) < 0)
18+
{
19+
*res = NULL;
20+
return -1;
21+
}
22+
*res = string;
23+
return 1;
24+
}
25+
else if (data == NULL && string == NULL) {
26+
// fast path when no data is given
27+
assert(!PyErr_Occurred());
28+
*res = NULL;
29+
return 0;
30+
}
31+
else {
32+
// called as H(data=..., string)
33+
*res = NULL;
34+
PyErr_SetString(PyExc_TypeError,
35+
"'data' and 'string' are mutually exclusive "
36+
"and support for 'string' keyword parameter "
37+
"is slated for removal in a future version.");
38+
return -1;
39+
}
40+
}
41+
42+
int
43+
_Py_hashlib_get_buffer_view(PyObject *obj, Py_buffer *view)
44+
{
45+
if (PyUnicode_Check(obj)) {
46+
PyErr_SetString(PyExc_TypeError,
47+
"Strings must be encoded before hashing");
48+
return -1;
49+
}
50+
if (!PyObject_CheckBuffer(obj)) {
51+
PyErr_SetString(PyExc_TypeError,
52+
"object supporting the buffer API required");
53+
return -1;
54+
}
55+
if (PyObject_GetBuffer(obj, view, PyBUF_SIMPLE) == -1) {
56+
return -1;
57+
}
58+
if (view->ndim > 1) {
59+
PyErr_SetString(PyExc_BufferError,
60+
"Buffer must be single dimension");
61+
PyBuffer_Release(view);
62+
return -1;
63+
}
64+
return 0;
65+
}

Modules/_hashlib/hashlib_buffer.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#ifndef _HASHLIB_HASHLIB_BUFFER_H
2+
#define _HASHLIB_HASHLIB_BUFFER_H
3+
4+
#include "Python.h"
5+
6+
/*
7+
* Allow to use the 'data' or 'string' keyword in hashlib.new()
8+
* and other hash functions named constructors.
9+
*
10+
* - If 'data' and 'string' are both non-NULL, set an exception and return -1.
11+
* - If 'data' and 'string' are both NULL, set '*res' to NULL and return 0.
12+
* - Otherwise, set '*res' to 'data' or 'string' and return 1. A deprecation
13+
* warning is set when 'string' is specified.
14+
*
15+
* The symbol is exported for '_hashlib' and HACL*-based extension modules.
16+
*/
17+
PyAPI_FUNC(int)
18+
_Py_hashlib_data_argument(PyObject **res, PyObject *data, PyObject *string);
19+
20+
/*
21+
* Obtain a buffer view from a buffer-like object 'obj'.
22+
*
23+
* On success, store the result in 'view' and return 0.
24+
* On error, set an exception and return -1.
25+
*
26+
* The symbol is exported for '_hashlib' and HACL*-based extension modules.
27+
*/
28+
PyAPI_FUNC(int)
29+
_Py_hashlib_get_buffer_view(PyObject *obj, Py_buffer *view);
30+
31+
/*
32+
* Call _Py_hashlib_get_buffer_view() and check if it succeeded.
33+
*
34+
* On error, set an exception and execute the ERRACTION statements.
35+
*/
36+
#define GET_BUFFER_VIEW_OR_ERROR(OBJ, VIEW, ERRACTION) \
37+
do { \
38+
if (_Py_hashlib_get_buffer_view(OBJ, VIEW) < 0) { \
39+
assert(PyErr_Occurred()); \
40+
ERRACTION; \
41+
} \
42+
} while (0)
43+
44+
/* Specialization of GET_BUFFER_VIEW_OR_ERROR() returning NULL on error. */
45+
#define GET_BUFFER_VIEW_OR_ERROUT(OBJ, VIEW) \
46+
GET_BUFFER_VIEW_OR_ERROR(OBJ, VIEW, return NULL)
47+
48+
#endif // !_HASHLIB_HASHLIB_BUFFER_H

Modules/_hashlib/hashlib_fetch.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Utilities used when fetching a message digest from a digest-like identifier.
3+
*/
4+
5+
#ifndef _HASHLIB_HASHLIB_FETCH_H
6+
#define _HASHLIB_HASHLIB_FETCH_H
7+
8+
#include "Python.h"
9+
10+
/*
11+
* Internal error messages used for reporting an unsupported hash algorithm.
12+
* The algorithm can be given by its name, a callable or a PEP-247 module.
13+
* The same message is raised by Lib/hashlib.py::__get_builtin_constructor()
14+
* and _hmacmodule.c::find_hash_info().
15+
*/
16+
#define _Py_HASHLIB_UNSUPPORTED_ALGORITHM "unsupported hash algorithm %S"
17+
#define _Py_HASHLIB_UNSUPPORTED_STR_ALGORITHM "unsupported hash algorithm %s"
18+
19+
#endif // !_HASHLIB_HASHLIB_FETCH_H

Modules/_hashlib/hashlib_mutex.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#ifndef _HASHLIB_HASHLIB_MUTEX_H
2+
#define _HASHLIB_HASHLIB_MUTEX_H
3+
4+
#include "Python.h"
5+
#include "pycore_lock.h" // PyMutex
6+
7+
/*
8+
* Message length above which the GIL is to be released
9+
* when performing hashing operations.
10+
*/
11+
#define HASHLIB_GIL_MINSIZE 2048
12+
13+
/*
14+
* Helper code to synchronize access to the hash object when the GIL is
15+
* released around a CPU consuming hashlib operation.
16+
*
17+
* Code accessing a mutable part of the hash object must be enclosed in
18+
* an HASHLIB_{ACQUIRE,RELEASE}_LOCK block or explicitly acquire and release
19+
* the mutex inside a Py_BEGIN_ALLOW_THREADS -- Py_END_ALLOW_THREADS block if
20+
* they wish to release the GIL for an operation.
21+
*/
22+
23+
#define HASHLIB_OBJECT_HEAD \
24+
PyObject_HEAD \
25+
/* Guard against race conditions during incremental update(). */ \
26+
PyMutex mutex;
27+
28+
#define HASHLIB_INIT_MUTEX(OBJ) \
29+
do { \
30+
(OBJ)->mutex = (PyMutex){0}; \
31+
} while (0)
32+
33+
#define HASHLIB_ACQUIRE_LOCK(OBJ) PyMutex_Lock(&(OBJ)->mutex)
34+
#define HASHLIB_RELEASE_LOCK(OBJ) PyMutex_Unlock(&(OBJ)->mutex)
35+
36+
// Macros for executing code while conditionally holding the GIL.
37+
//
38+
// These only drop the GIL if the lock acquisition itself is likely to
39+
// block. Thus the non-blocking acquire gating the GIL release for a
40+
// blocking lock acquisition. The intent of these macros is to surround
41+
// the assumed always "fast" operations that you aren't releasing the
42+
// GIL around.
43+
44+
/*
45+
* Execute a suite of C statements 'STATEMENTS'.
46+
*
47+
* The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold.
48+
*/
49+
#define HASHLIB_EXTERNAL_INSTRUCTIONS_UNLOCKED(SIZE, STATEMENTS) \
50+
do { \
51+
if ((SIZE) > HASHLIB_GIL_MINSIZE) { \
52+
Py_BEGIN_ALLOW_THREADS \
53+
STATEMENTS; \
54+
Py_END_ALLOW_THREADS \
55+
} \
56+
else { \
57+
STATEMENTS; \
58+
} \
59+
} while (0)
60+
61+
/*
62+
* Lock 'OBJ' and execute a suite of C statements 'STATEMENTS'.
63+
*
64+
* The GIL is held if 'SIZE' is below the HASHLIB_GIL_MINSIZE threshold.
65+
*/
66+
#define HASHLIB_EXTERNAL_INSTRUCTIONS_LOCKED(OBJ, SIZE, STATEMENTS) \
67+
do { \
68+
if ((SIZE) > HASHLIB_GIL_MINSIZE) { \
69+
Py_BEGIN_ALLOW_THREADS \
70+
HASHLIB_ACQUIRE_LOCK(OBJ); \
71+
STATEMENTS; \
72+
HASHLIB_RELEASE_LOCK(OBJ); \
73+
Py_END_ALLOW_THREADS \
74+
} \
75+
else { \
76+
HASHLIB_ACQUIRE_LOCK(OBJ); \
77+
STATEMENTS; \
78+
HASHLIB_RELEASE_LOCK(OBJ); \
79+
} \
80+
} while (0)
81+
82+
#endif // !_HASHLIB_HASHLIB_MUTEX_H

Modules/_hashopenssl.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,17 @@
2424

2525
#include "Python.h"
2626
#include "pycore_hashtable.h"
27-
#include "pycore_strhex.h" // _Py_strhex()
28-
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_RELAXED
29-
#include "hashlib.h"
27+
#include "pycore_strhex.h" // _Py_strhex()
28+
#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_PTR_RELAXED
29+
30+
#include "_hashlib/hashlib_buffer.h"
31+
#include "_hashlib/hashlib_fetch.h"
32+
#include "_hashlib/hashlib_mutex.h"
3033

3134
/* EVP is the preferred interface to hashing in OpenSSL */
3235
#include <openssl/evp.h>
3336
#include <openssl/hmac.h>
34-
#include <openssl/crypto.h> // FIPS_mode()
37+
#include <openssl/crypto.h> // FIPS_mode()
3538
/* We use the object interface to discover what hashes OpenSSL supports. */
3639
#include <openssl/objects.h>
3740
#include <openssl/err.h>
@@ -532,7 +535,7 @@ raise_unsupported_algorithm_error(_hashlibstate *state, PyObject *digestmod)
532535
{
533536
raise_unsupported_algorithm_impl(
534537
state->unsupported_digestmod_error,
535-
HASHLIB_UNSUPPORTED_ALGORITHM,
538+
_Py_HASHLIB_UNSUPPORTED_ALGORITHM,
536539
digestmod
537540
);
538541
}
@@ -542,7 +545,7 @@ raise_unsupported_str_algorithm_error(_hashlibstate *state, const char *name)
542545
{
543546
raise_unsupported_algorithm_impl(
544547
state->unsupported_digestmod_error,
545-
HASHLIB_UNSUPPORTED_STR_ALGORITHM,
548+
_Py_HASHLIB_UNSUPPORTED_STR_ALGORITHM,
546549
name
547550
);
548551
}

Modules/blake2module.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
#endif
1616

1717
#include "Python.h"
18-
#include "hashlib.h"
19-
#include "pycore_strhex.h" // _Py_strhex()
20-
#include "pycore_typeobject.h"
2118
#include "pycore_moduleobject.h"
19+
#include "pycore_strhex.h" // _Py_strhex()
20+
#include "pycore_typeobject.h"
21+
22+
#include "_hashlib/hashlib_buffer.h"
23+
#include "_hashlib/hashlib_mutex.h"
2224

2325
// QUICK CPU AUTODETECTION
2426
//

0 commit comments

Comments
 (0)