From c83c7fe650b69b1947fd6761adaef1bfb518d9e9 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 14 Oct 2020 22:37:56 +0500 Subject: [PATCH 01/76] Basic structure laid out for reading parameters from shared memory before passing to the function --- azure_functions_worker/bindings/datumdef.py | 4 ++++ .../bindings/shared_memory_manager.py | 14 ++++++++++++++ azure_functions_worker/protos/__init__.py | 3 ++- 3 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 azure_functions_worker/bindings/shared_memory_manager.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 4c511a9eb..38a182662 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -4,6 +4,7 @@ from typing import Any import json from .. import protos +from .shared_memory_manager import SharedMemoryManager class Datum: @@ -83,6 +84,9 @@ def from_typed_data(cls, td: protos.TypedData): val = td.collection_string elif tt == 'collection_sint64': val = td.collection_sint64 + elif tt == 'shared_memory_data': + shared_memory_manager = SharedMemoryManager() + val, tt = shared_memory_manager.get(td.shared_memory_data) elif tt is None: return None else: diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py new file mode 100644 index 000000000..e83fc4514 --- /dev/null +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -0,0 +1,14 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from .. import protos +from ..logging import logger + +class SharedMemoryManager: + def __init__(self): + pass + + def get(self, td: protos.SharedMemoryData): + logger.info('Reading from Shared Memory: %s', td) + print('Reading from Shared Memory: %s' % td) + return 'foo'.encode('utf-8'), td.type diff --git a/azure_functions_worker/protos/__init__.py b/azure_functions_worker/protos/__init__.py index 82b35ecaa..d22debe8c 100644 --- a/azure_functions_worker/protos/__init__.py +++ b/azure_functions_worker/protos/__init__.py @@ -22,4 +22,5 @@ ParameterBinding, TypedData, RpcHttp, - RpcLog) + RpcLog, + SharedMemoryData) From c96eba99ffdf84a8d234fdb07e8a56501d89b5ec Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 15 Oct 2020 13:22:06 +0500 Subject: [PATCH 02/76] Writing output from worker to Shared Memory --- azure_functions_worker/bindings/datumdef.py | 31 ++++++++++++++-- .../bindings/shared_memory_manager.py | 36 ++++++++++++++++--- .../protos/_src/src/proto/FunctionRpc.proto | 15 ++++++++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 38a182662..455b44e15 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -85,8 +85,17 @@ def from_typed_data(cls, td: protos.TypedData): elif tt == 'collection_sint64': val = td.collection_sint64 elif tt == 'shared_memory_data': - shared_memory_manager = SharedMemoryManager() - val, tt = shared_memory_manager.get(td.shared_memory_data) + shmem_mgr = SharedMemoryManager() + shmem_data = td.shared_memory_data + mmap_name = shmem_data.memory_mapped_file_name + offset = shmem_data.offset + count = shmem_data.count + ret = shmem_mgr.get(mmap_name, offset, count) + if ret is None: + return None + else: + val = ret + tt = shmem_data.type elif tt is None: return None else: @@ -101,7 +110,23 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': - return protos.TypedData(bytes=datum.value) + if SharedMemoryManager.is_enabled(): + shmem_mgr = SharedMemoryManager() + value = datum.value + mmap_name = shmem_mgr.put(value) + if mmap_name is not None: + shmem_data = protos.SharedMemoryData( + memory_mapped_file_name=mmap_name, + offset=0, + count=len(value), + type='bytes') + return protos.TypedData(shared_memory_data=shmem_data) + else: + raise Exception( + 'cannot write datum value into Shared Memory' + ) + else: + return protos.TypedData(bytes=datum.value) elif datum.type == 'json': return protos.TypedData(json=datum.value) elif datum.type == 'http': diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index e83fc4514..3131c4d86 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -1,14 +1,40 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from .. import protos +import uuid from ..logging import logger class SharedMemoryManager: + """ + Performs all operations related to reading/writing data from/to Shared + Memory. + """ def __init__(self): pass - def get(self, td: protos.SharedMemoryData): - logger.info('Reading from Shared Memory: %s', td) - print('Reading from Shared Memory: %s' % td) - return 'foo'.encode('utf-8'), td.type + @staticmethod + def is_enabled(): + """ + Whether supported types should be transferred between Functions host + and the worker using Shared Memory. + """ + return True + + def get(self, mmap_name: str, offset: int, count: int) -> (bytes): + """ + Reads data from the given Memory Mapped File with the provided name, + starting at the provided offset and reading a total of count bytes. + Returns a tuple containing the binary data read from Shared Memory + if successful, None otherwise. + """ + logger.info('Reading from Shared Memory: %s', mmap_name) + return 'foo'.encode('utf-8') + + def put(self, data: bytes) -> (str): + """ + Writes the given data into Shared Memory. + Returns the name of the Memory Mapped File into which the data was + written if succesful, None otherwise. + """ + mmap_name = str(uuid.uuid4()) + return mmap_name \ No newline at end of file diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index 3ed1f0586..ec392c39e 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -96,6 +96,9 @@ message WorkerInitRequest { // inform worker of supported categories and their levels // i.e. Worker = Verbose, Function.MyFunc = None map log_categories = 3; + + // Full path of worker.config.json location + string worker_directory = 4; } // Worker responds with the result of initializing itself @@ -312,9 +315,18 @@ message TypedData { CollectionString collection_string = 9; CollectionDouble collection_double = 10; CollectionSInt64 collection_sint64 = 11; + SharedMemoryData shared_memory_data = 12; } } +// Used to provide metadata about shared memory region to read/write data +message SharedMemoryData { + string memory_mapped_file_name = 1; + int64 offset = 2; + int64 count = 3; + string type = 4; +} + // Used to encapsulate collection string message CollectionString { repeated string string = 1; @@ -481,4 +493,7 @@ message RpcHttp { TypedData rawBody = 17; repeated RpcClaimsIdentity identities = 18; repeated RpcHttpCookie cookies = 19; + map nullable_headers = 20; + map nullable_params = 21; + map nullable_query = 22; } From 65107aae438ca8325c509a497e08705d06c50587 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 15 Oct 2020 16:43:59 +0500 Subject: [PATCH 03/76] Put output from worker into Shared Memory --- azure_functions_worker/bindings/shared_memory_manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 3131c4d86..6d4806ffc 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -4,6 +4,7 @@ import uuid from ..logging import logger + class SharedMemoryManager: """ Performs all operations related to reading/writing data from/to Shared @@ -37,4 +38,4 @@ def put(self, data: bytes) -> (str): written if succesful, None otherwise. """ mmap_name = str(uuid.uuid4()) - return mmap_name \ No newline at end of file + return mmap_name From d071bd8a3298b9b0dedc3d0b8a710802c9bb3744 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 16 Oct 2020 15:14:56 +0500 Subject: [PATCH 04/76] Free shared memory resources after use --- azure_functions_worker/bindings/datumdef.py | 14 +- azure_functions_worker/bindings/meta.py | 15 +- .../bindings/shared_memory_manager.py | 29 ++- azure_functions_worker/dispatcher.py | 42 +++- .../mmap_handler/__init__.py | 0 .../mmap_handler/file_accessor.py | 196 ++++++++++++++++++ .../mmap_handler/file_reader.py | 82 ++++++++ .../mmap_handler/file_writer.py | 36 ++++ .../memorymappedfile_constants.py | 20 ++ .../memorymappedfile_controlflags.py | 60 ++++++ azure_functions_worker/protos/__init__.py | 4 +- .../protos/_src/src/proto/FunctionRpc.proto | 12 ++ 12 files changed, 490 insertions(+), 20 deletions(-) create mode 100644 azure_functions_worker/mmap_handler/__init__.py create mode 100644 azure_functions_worker/mmap_handler/file_accessor.py create mode 100644 azure_functions_worker/mmap_handler/file_reader.py create mode 100644 azure_functions_worker/mmap_handler/file_writer.py create mode 100644 azure_functions_worker/mmap_handler/memorymappedfile_constants.py create mode 100644 azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 455b44e15..fdf2df774 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -51,7 +51,7 @@ def __repr__(self): return ''.format(self.type, val_repr) @classmethod - def from_typed_data(cls, td: protos.TypedData): + def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): tt = td.WhichOneof('data') if tt == 'http': http = td.http @@ -62,7 +62,7 @@ def from_typed_data(cls, td: protos.TypedData): k: Datum(v, 'string') for k, v in http.headers.items() }, body=( - Datum.from_typed_data(http.body) + Datum.from_typed_data(http.body, shmem_mgr) or Datum(type='bytes', value=b'') ), params={ @@ -85,7 +85,6 @@ def from_typed_data(cls, td: protos.TypedData): elif tt == 'collection_sint64': val = td.collection_sint64 elif tt == 'shared_memory_data': - shmem_mgr = SharedMemoryManager() shmem_data = td.shared_memory_data mmap_name = shmem_data.memory_mapped_file_name offset = shmem_data.offset @@ -106,14 +105,14 @@ def from_typed_data(cls, td: protos.TypedData): return cls(val, tt) -def datum_as_proto(datum: Datum) -> protos.TypedData: +def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': if SharedMemoryManager.is_enabled(): - shmem_mgr = SharedMemoryManager() value = datum.value - mmap_name = shmem_mgr.put(value) + mmap_name = shmem_mgr.put(value, invocation_id) if mmap_name is not None: shmem_data = protos.SharedMemoryData( memory_mapped_file_name=mmap_name, @@ -137,7 +136,8 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: for k, v in datum.value['headers'].items() }, enable_content_negotiation=False, - body=datum_as_proto(datum.value['body']), + body=datum_as_proto(datum.value['body'], shmem_mgr, + invocation_id), )) else: raise NotImplementedError( diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 2827c9a21..ba0f0587c 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -7,6 +7,7 @@ from . import datumdef from . import generic +from .shared_memory_manager import SharedMemoryManager def get_binding_registry(): @@ -57,14 +58,14 @@ def from_incoming_proto( binding: str, val: protos.TypedData, *, pytype: typing.Optional[type], - trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]])\ - -> typing.Any: + trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]], + shmem_mgr: SharedMemoryManager) -> typing.Any: binding = get_binding(binding) - datum = datumdef.Datum.from_typed_data(val) + datum = datumdef.Datum.from_typed_data(val, shmem_mgr) if trigger_metadata: metadata = { - k: datumdef.Datum.from_typed_data(v) + k: datumdef.Datum.from_typed_data(v, shmem_mgr) for k, v in trigger_metadata.items() } else: @@ -83,7 +84,9 @@ def from_incoming_proto( def to_outgoing_proto(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type]) -> protos.TypedData: + pytype: typing.Optional[type], + shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.TypedData: binding = get_binding(binding) try: @@ -95,4 +98,4 @@ def to_outgoing_proto(binding: str, obj: typing.Any, *, f'unsupported type "{binding}" for ' f'Python type "{type(obj).__name__}"') - return datumdef.datum_as_proto(datum) + return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 6d4806ffc..34b03c528 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -3,6 +3,9 @@ import uuid from ..logging import logger +from ..mmap_handler.file_writer import FileWriter +from ..mmap_handler.file_reader import FileReader +from ..mmap_handler.file_accessor import FileAccessor class SharedMemoryManager: @@ -11,7 +14,7 @@ class SharedMemoryManager: Memory. """ def __init__(self): - pass + self.allocated_mmaps = {} # type dict[string, [(mmap_name, mmap)] @staticmethod def is_enabled(): @@ -29,13 +32,33 @@ def get(self, mmap_name: str, offset: int, count: int) -> (bytes): if successful, None otherwise. """ logger.info('Reading from Shared Memory: %s', mmap_name) - return 'foo'.encode('utf-8') + data = FileReader.read_content_as_bytes(mmap_name, offset) + return data - def put(self, data: bytes) -> (str): + def put(self, data: bytes, invocation_id: str) -> (str): """ Writes the given data into Shared Memory. Returns the name of the Memory Mapped File into which the data was written if succesful, None otherwise. """ mmap_name = str(uuid.uuid4()) + logger.info('Writing to Shared Memory: %s', mmap_name) + mmap = FileWriter.create_with_content_bytes(mmap_name, data) + + if invocation_id not in self.allocated_mmaps: + self.allocated_mmaps[invocation_id] = [] + self.allocated_mmaps[invocation_id].append((mmap_name, mmap)) + return mmap_name + + def free(self, invocation_id: str): + """ + Free up the resources allocated for the given invocation_id. + This includes closing and deleting mmaps that were produced as outputs + during the given invocation_id. + """ + if invocation_id in self.allocated_mmaps: + for mmap_name, mmap in self.allocated_mmaps[invocation_id]: + FileAccessor.delete_mmap(mmap_name) + mmap.close() + del self.allocated_mmaps[invocation_id] diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 8837ed77b..56c688aa5 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -33,6 +33,7 @@ from .utils.tracing import marshall_exception_trace from .utils.dependency import DependencyManager from .utils.wrappers import disable_feature_by +from .bindings.shared_memory_manager import SharedMemoryManager _TRUE = "true" @@ -71,6 +72,7 @@ def __init__(self, loop: BaseEventLoop, host: str, port: int, self._request_id = request_id self._worker_id = worker_id self._functions = functions.Registry() + self._shmem_mgr = SharedMemoryManager() self._old_task_factory = None @@ -357,7 +359,8 @@ async def _handle__invocation_request(self, req): args[pb.name] = bindings.from_incoming_proto( pb_type_info.binding_name, pb.data, trigger_metadata=trigger_metadata, - pytype=pb_type_info.pytype) + pytype=pb_type_info.pytype, + shmem_mgr=self._shmem_mgr) if fi.requires_context: args['context'] = bindings.Context( @@ -388,7 +391,8 @@ async def _handle__invocation_request(self, req): rpc_val = bindings.to_outgoing_proto( out_type_info.binding_name, val, - pytype=out_type_info.pytype) + pytype=out_type_info.pytype, + shmem_mgr=self._shmem_mgr, invocation_id=invocation_id) assert rpc_val is not None output_data.append( @@ -400,7 +404,8 @@ async def _handle__invocation_request(self, req): if fi.return_type is not None: return_value = bindings.to_outgoing_proto( fi.return_type.binding_name, call_result, - pytype=fi.return_type.pytype) + pytype=fi.return_type.pytype, + shmem_mgr=self._shmem_mgr, invocation_id=invocation_id) # Actively flush customer print() function to console sys.stdout.flush() @@ -485,6 +490,37 @@ async def _handle__function_environment_reload_request(self, req): request_id=self.request_id, function_environment_reload_response=failure_response) + async def _handle__close_shared_memory_resources_request(self, req): + """ + Frees any mmaps that were produced as output for a given invocation. + This is called after the Functions Host is done reading the output from the worker and + wants the worker to free up those resources. + TODO gochaudh: Rename CloseSharedMemory* to FreeSharedMemory* and also this method name. + """ + try: + close_request = req.close_shared_memory_resources_request + + invocation_id_to_free = close_request.invocation_id + self._shmem_mgr.free(invocation_id_to_free) + + success_response = protos.CloseSharedMemoryResourcesResponse( + result=protos.StatusResult( + status=protos.StatusResult.Success)) + + return protos.StreamingMessage( + request_id=self.request_id, + close_shared_memory_resources_response=success_response) + + except Exception as ex: + failure_response = protos.CloseSharedMemoryResourcesResponse( + result=protos.StatusResult( + status=protos.StatusResult.Failure, + exception=self._serialize_exception(ex))) + + return protos.StreamingMessage( + request_id=self.request_id, + close_shared_memory_resources_response=failure_response) + @disable_feature_by(constants.PYTHON_ROLLBACK_CWD_PATH) def _change_cwd(self, new_cwd: str): if os.path.exists(new_cwd): diff --git a/azure_functions_worker/mmap_handler/__init__.py b/azure_functions_worker/mmap_handler/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py new file mode 100644 index 000000000..52b5aa8bc --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -0,0 +1,196 @@ +# -*- coding: utf-8 -*- + +import os +import sys +import mmap +import time +import struct +import hashlib +import urllib.parse +from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .memorymappedfile_controlflags import MemoryMappedFileControlFlags as flags +from .memorymappedfile_controlflags import MemoryMappedFileControlFlagsUtils as flags_utils + +""" +TODO +Clean up this class and use logger instead of prints +""" +class FileAccessor: + @staticmethod + def _open_mmap_file_linux(map_name): + """Get the file descriptor of an existing memory map. + """ + escaped_map_name = urllib.parse.quote_plus(map_name) + for mmap_temp_dir in consts.TEMP_DIRS: + filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) + try: + file = open(filename, "r+b") + return file + except FileNotFoundError: + pass + raise FileNotFoundError("File for '%s' does not exist" % (map_name)) + + @staticmethod + def open_mmap(map_name, map_size, access=mmap.ACCESS_READ): + """Open an existing memory map. + """ + try: + if os.name == "posix": + file = FileAccessor._open_mmap_file_linux(map_name) + mmap_ret = mmap.mmap(file.fileno(), map_size, access=access) + else: + mmap_ret = mmap.mmap(-1, map_size, map_name, access=access) + mmap_ret.seek(0) + return mmap_ret + except ValueError: + # mmap length is greater than file size + #print("Cannot open memory map '%s': %s" % (map_name, value_error)) + return None + except FileNotFoundError: + # TODO Log Error + return None + + @staticmethod + def _create_mmap_dir_linux(): + """Create a directory to create memory maps. + """ + for mmap_temp_dir in consts.TEMP_DIRS: + dirname = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) + if os.path.isdir(dirname): + # One of the directories already exists, no need + return + try: + os.makedirs(dirname) + return + except Exception as ex: + print.error("Cannot create dir '%s': %s" % (dirname, str(ex))) + + @staticmethod + def _create_mmap_file_linux(map_name, map_size): + """Get the file descriptor for a new memory map. + """ + escaped_map_name = urllib.parse.quote_plus(map_name) + dir_exists = False + for mmap_temp_dir in consts.TEMP_DIRS: + # Check if the file already exists + filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) + if os.path.exists(filename): + raise Exception("File '%s' for memory map '%s' already exists" % + (filename, map_name)) + # Check if the parent directory exists + dir_name = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) + if os.path.isdir(dir_name): + dir_exists = True + # Check if any of the parent directories exists + if not dir_exists: + FileAccessor._create_mmap_dir_linux() + # Create the file + for mmap_temp_dir in consts.TEMP_DIRS: + filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) + try: + file = os.open(filename, os.O_CREAT | os.O_TRUNC | os.O_RDWR) + # Write 0s to allocate + bytes_written = os.write(file, b'\x00' * map_size) + if bytes_written != map_size: + print("Cannot write 0s into new memory map file '%s': %d != %d" % + (filename, bytes_written, map_size)) + return file + except Exception as ex: + print("Cannot create memory map file '%s': %s" % (filename, ex)) + raise Exception("Cannot create memory map file for '%s'" % (map_name)) + + @staticmethod + def create_mmap(map_name, map_size): + """Create a new memory map. + """ + if os.name == 'posix': + file = FileAccessor._create_mmap_file_linux(map_name, map_size) + mem_map = mmap.mmap(file, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) + else: + # Windows creates it when trying to open it + mem_map = FileAccessor.open_mmap(map_name, map_size, mmap.ACCESS_WRITE) + # Verify that the file is actually created and not existing before + mem_map.seek(0) + byte_read = mem_map.read(1) + if byte_read != b'\x00': + raise Exception("Memory map '%s' already exists" % (map_name)) + mem_map.seek(0) + return mem_map + + @staticmethod + def release_mmap(map_name): + """Release the memory map. + We should not change the data as it is actually cached in C#. + """ + try: + map_content = FileAccessor.open_mmap(map_name, consts.CONTENT_HEADER_TOTAL_BYTES, + mmap.ACCESS_WRITE) + except FileNotFoundError: + # TODO Log Debug + return + if map_content is None: + return + try: + # Only change the control flag + release_bytes = bytes([flags.READY_TO_DISPOSE.value]) + map_content.write(release_bytes) + except ValueError as value_error: + print("Cannot release memory map '%s': %s" % (map_name, value_error)) + finally: + map_content.close() + + @staticmethod + def delete_mmap(map_name): + """Delete a memory map. + """ + FileAccessor.release_mmap(map_name) + if os.name == 'posix': + try: + file = FileAccessor._open_mmap_file_linux(map_name) + os.remove(file.name) + except FileNotFoundError: + pass # Nothing to do if the file is not there anyway + + @staticmethod + def _get_control_flag(map_name): + """Check if the control flag is readable. + """ + try: + mem_map = FileAccessor.open_mmap( + map_name, consts.CONTROL_FLAG_NUM_BYTES, mmap.ACCESS_READ) + except FileNotFoundError: + # TODO Log Error + return None + if mem_map is None: + # TODO Log Error + return None + try: + header_bytes = mem_map.read(consts.CONTENT_HEADER_TOTAL_BYTES) + control_flag = header_bytes[0] + return control_flag + except ValueError as value_error: + print("Cannot get control flag for memory map '%s': %s" % (map_name, value_error)) + return 0 + finally: + mem_map.close() + + @staticmethod + def is_mmap_available(map_name): + control_flag = FileAccessor._get_control_flag(map_name) + if control_flag is None: + return False + return flags_utils.is_available(control_flag) + + @staticmethod + def is_mmap_readable(map_name): + control_flag = FileAccessor._get_control_flag(map_name) + if control_flag is None: + return False + return flags_utils.is_readable(control_flag) + + @staticmethod + def is_mmap_disposable(map_name): + control_flag = FileAccessor._get_control_flag(map_name) + if control_flag is None: + return False + return flags_utils.is_disposable(control_flag) \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py new file mode 100644 index 000000000..2444ce016 --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +import mmap +import os +import struct +from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .memorymappedfile_controlflags import MemoryMappedFileControlFlagsUtils as flags_utils +from .file_accessor import FileAccessor + + +class FileReader: + @staticmethod + def _bytes_to_long(input_bytes): + """Decode a set of bytes representing a long. + This uses the format that C# uses. + """ + return struct.unpack(" 0: + map_content.seek(content_offset, os.SEEK_CUR) + content = map_content.read() + return content + except ValueError as value_error: + print("Cannot get content for memory map '%s': %s" % (map_name, value_error)) + finally: + map_content.close() + except FileNotFoundError: + #print("Cannot get content for '%s'" % (map_name)) + pass + # If we cannot get the content return None + return None + + @staticmethod + def read_content_as_string(map_name, content_offset: int = 0): + """Read content from a memory mapped file as a string. + """ + content_bytes = FileReader.read_content_as_bytes(map_name, content_offset) + if content_bytes is None: + return None + content_str = content_bytes.decode('utf-8') + return content_str diff --git a/azure_functions_worker/mmap_handler/file_writer.py b/azure_functions_worker/mmap_handler/file_writer.py new file mode 100644 index 000000000..8d6b8eac2 --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_writer.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- + +import sys +import mmap +from typing import Optional +from typing import Union +from .file_accessor import FileAccessor +from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .memorymappedfile_controlflags import MemoryMappedFileControlFlags as flags + + +class FileWriter: + @staticmethod + def create_with_content_bytes(map_name: str, content: bytes) -> Optional[mmap.mmap]: + if content is None: + return None + content_size = len(content) + map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size + mem_map = FileAccessor.create_mmap(map_name, map_size) + # Skip the first byte as it will be written at the end, when the rest of the content is ready + mem_map.seek(consts.CONTROL_FLAG_NUM_BYTES) + content_size_bytes = content_size.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) + mem_map.write(content_size_bytes) + mem_map.write(content) + mem_map.seek(0) + flag_bytes = bytes([flags.READY_TO_READ.value]) + mem_map.write(flag_bytes) + mem_map.flush() + return mem_map + + @staticmethod + def create_with_content_string(map_name: str, content: str) -> Optional[mmap.mmap]: + if content is None: + return None + content_bytes = content.encode('utf-8') + return FileWriter.create_with_content_bytes(map_name, content_bytes) diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py new file mode 100644 index 000000000..920ad07e8 --- /dev/null +++ b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +# TODO use protobuf to define these constants between C# and Python +class MemoryMappedFileConstants: + # Directories in Linux where the memory maps can be found + TEMP_DIRS = ["/dev/shm", "/tmp"] + # Suffix for the temp directories containing memory maps + TEMP_DIR_SUFFIX = "AzureFunctions" + + # The length of the MD5 at the beginning of the content shared memory + REQUESTS_MD5_MARK_LENGTH = 16 + # The MD5 that marks that we can read + REQUESTS_MD5_MARK_RESET = b'\x00' * REQUESTS_MD5_MARK_LENGTH + + # The length of a long which is the length of the header in the content mmap + CONTENT_LENGTH_NUM_BYTES = 8 + # The length of control flag + CONTROL_FLAG_NUM_BYTES = 1 + # The length of the header: control flag + content length + CONTENT_HEADER_TOTAL_BYTES = CONTROL_FLAG_NUM_BYTES + CONTENT_LENGTH_NUM_BYTES \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py b/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py new file mode 100644 index 000000000..fbbfbe721 --- /dev/null +++ b/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +import enum + + +class MemoryMappedFileControlFlags(enum.Enum): + """Flag to indicate state of memory mapped file. + Note: Must be kept in sync with the DotNet runtime version of this: + TODO path to MemStore constants + """ + UNKNOWN = 0 + READY_TO_READ = 1 + READY_TO_DISPOSE = 2 + WRITE_IN_PROGRESS = 3 + PENDING_READ = 4 + + +class MemoryMappedFileControlFlagsUtils: + @staticmethod + def is_available(control_flag): + if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: + return False + elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: + return True + elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: + return True + elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: + return True + else: + raise Exception("Unknown control flag: '%s'" % (control_flag)) + + @staticmethod + def is_readable(control_flag): + if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: + return False + elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: + return False + elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: + return True + elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: + return False + elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: + return False + else: + raise Exception("Unknown control flag: '%s'" % (control_flag)) + + @staticmethod + def is_disposable(control_flag): + if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: + return False + elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: + return False + elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: + return False + elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: + return True + elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: + return False + else: + raise Exception("Unknown control flag: '%s'" % (control_flag)) diff --git a/azure_functions_worker/protos/__init__.py b/azure_functions_worker/protos/__init__.py index d22debe8c..536d1ca40 100644 --- a/azure_functions_worker/protos/__init__.py +++ b/azure_functions_worker/protos/__init__.py @@ -23,4 +23,6 @@ TypedData, RpcHttp, RpcLog, - SharedMemoryData) + SharedMemoryData, + CloseSharedMemoryResourcesRequest, + CloseSharedMemoryResourcesResponse) diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index ec392c39e..b0aa795ad 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -71,6 +71,10 @@ message StreamingMessage { FunctionEnvironmentReloadRequest function_environment_reload_request = 25; FunctionEnvironmentReloadResponse function_environment_reload_response = 26; + + // Ask the worker to close any open shared memory resources for a given invocation + CloseSharedMemoryResourcesRequest close_shared_memory_resources_request = 27; + CloseSharedMemoryResourcesResponse close_shared_memory_resources_response = 28; } } @@ -201,6 +205,14 @@ message FunctionEnvironmentReloadResponse { StatusResult result = 3; } +message CloseSharedMemoryResourcesRequest { + string invocation_id = 1; +} + +message CloseSharedMemoryResourcesResponse { + StatusResult result = 1; +} + // Host tells the worker to load a Function message FunctionLoadRequest { // unique function identifier (avoid name collisions, facilitate reload case) From 81c565caa759066a6cd39950cba7c49fa71e0390 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 19 Oct 2020 12:56:15 +0500 Subject: [PATCH 05/76] Removed control flag from mmap header --- .../bindings/shared_memory_manager.py | 3 +- .../mmap_handler/file_accessor.py | 72 +------------------ .../mmap_handler/file_reader.py | 7 +- .../mmap_handler/file_writer.py | 6 -- .../memorymappedfile_constants.py | 13 +--- .../memorymappedfile_controlflags.py | 60 ---------------- 6 files changed, 7 insertions(+), 154 deletions(-) delete mode 100644 azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 34b03c528..51fe85fb4 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -59,6 +59,5 @@ def free(self, invocation_id: str): """ if invocation_id in self.allocated_mmaps: for mmap_name, mmap in self.allocated_mmaps[invocation_id]: - FileAccessor.delete_mmap(mmap_name) - mmap.close() + FileAccessor.delete_mmap(mmap_name, mmap) del self.allocated_mmaps[invocation_id] diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index 52b5aa8bc..8f597a038 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -8,8 +8,6 @@ import hashlib import urllib.parse from .memorymappedfile_constants import MemoryMappedFileConstants as consts -from .memorymappedfile_controlflags import MemoryMappedFileControlFlags as flags -from .memorymappedfile_controlflags import MemoryMappedFileControlFlagsUtils as flags_utils """ TODO @@ -118,79 +116,13 @@ def create_mmap(map_name, map_size): return mem_map @staticmethod - def release_mmap(map_name): - """Release the memory map. - We should not change the data as it is actually cached in C#. - """ - try: - map_content = FileAccessor.open_mmap(map_name, consts.CONTENT_HEADER_TOTAL_BYTES, - mmap.ACCESS_WRITE) - except FileNotFoundError: - # TODO Log Debug - return - if map_content is None: - return - try: - # Only change the control flag - release_bytes = bytes([flags.READY_TO_DISPOSE.value]) - map_content.write(release_bytes) - except ValueError as value_error: - print("Cannot release memory map '%s': %s" % (map_name, value_error)) - finally: - map_content.close() - - @staticmethod - def delete_mmap(map_name): + def delete_mmap(map_name, mmap): """Delete a memory map. """ - FileAccessor.release_mmap(map_name) if os.name == 'posix': try: file = FileAccessor._open_mmap_file_linux(map_name) os.remove(file.name) except FileNotFoundError: pass # Nothing to do if the file is not there anyway - - @staticmethod - def _get_control_flag(map_name): - """Check if the control flag is readable. - """ - try: - mem_map = FileAccessor.open_mmap( - map_name, consts.CONTROL_FLAG_NUM_BYTES, mmap.ACCESS_READ) - except FileNotFoundError: - # TODO Log Error - return None - if mem_map is None: - # TODO Log Error - return None - try: - header_bytes = mem_map.read(consts.CONTENT_HEADER_TOTAL_BYTES) - control_flag = header_bytes[0] - return control_flag - except ValueError as value_error: - print("Cannot get control flag for memory map '%s': %s" % (map_name, value_error)) - return 0 - finally: - mem_map.close() - - @staticmethod - def is_mmap_available(map_name): - control_flag = FileAccessor._get_control_flag(map_name) - if control_flag is None: - return False - return flags_utils.is_available(control_flag) - - @staticmethod - def is_mmap_readable(map_name): - control_flag = FileAccessor._get_control_flag(map_name) - if control_flag is None: - return False - return flags_utils.is_readable(control_flag) - - @staticmethod - def is_mmap_disposable(map_name): - control_flag = FileAccessor._get_control_flag(map_name) - if control_flag is None: - return False - return flags_utils.is_disposable(control_flag) \ No newline at end of file + mmap.close() \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 2444ce016..68ba05cc3 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -4,7 +4,6 @@ import os import struct from .memorymappedfile_constants import MemoryMappedFileConstants as consts -from .memorymappedfile_controlflags import MemoryMappedFileControlFlagsUtils as flags_utils from .file_accessor import FileAccessor @@ -31,11 +30,7 @@ def get_content_length(map_name): return -1 try: header_bytes = map_content_length.read(consts.CONTENT_HEADER_TOTAL_BYTES) - control_flag = header_bytes[0] - if not flags_utils.is_readable(control_flag): - # TODO test for these cases - return -1 - content_length = FileReader._bytes_to_long(header_bytes[1:]) + content_length = FileReader._bytes_to_long(header_bytes) return content_length except ValueError as value_error: print("Cannot get content length for memory map '%s': %s" % (map_name, value_error)) diff --git a/azure_functions_worker/mmap_handler/file_writer.py b/azure_functions_worker/mmap_handler/file_writer.py index 8d6b8eac2..cf8a44ce6 100644 --- a/azure_functions_worker/mmap_handler/file_writer.py +++ b/azure_functions_worker/mmap_handler/file_writer.py @@ -6,7 +6,6 @@ from typing import Union from .file_accessor import FileAccessor from .memorymappedfile_constants import MemoryMappedFileConstants as consts -from .memorymappedfile_controlflags import MemoryMappedFileControlFlags as flags class FileWriter: @@ -17,14 +16,9 @@ def create_with_content_bytes(map_name: str, content: bytes) -> Optional[mmap.mm content_size = len(content) map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size mem_map = FileAccessor.create_mmap(map_name, map_size) - # Skip the first byte as it will be written at the end, when the rest of the content is ready - mem_map.seek(consts.CONTROL_FLAG_NUM_BYTES) content_size_bytes = content_size.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) mem_map.write(content_size_bytes) mem_map.write(content) - mem_map.seek(0) - flag_bytes = bytes([flags.READY_TO_READ.value]) - mem_map.write(flag_bytes) mem_map.flush() return mem_map diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py index 920ad07e8..386186e86 100644 --- a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py +++ b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py @@ -3,18 +3,11 @@ # TODO use protobuf to define these constants between C# and Python class MemoryMappedFileConstants: # Directories in Linux where the memory maps can be found - TEMP_DIRS = ["/dev/shm", "/tmp"] + TEMP_DIRS = ["/dev/shm"] # Suffix for the temp directories containing memory maps TEMP_DIR_SUFFIX = "AzureFunctions" - # The length of the MD5 at the beginning of the content shared memory - REQUESTS_MD5_MARK_LENGTH = 16 - # The MD5 that marks that we can read - REQUESTS_MD5_MARK_RESET = b'\x00' * REQUESTS_MD5_MARK_LENGTH - # The length of a long which is the length of the header in the content mmap CONTENT_LENGTH_NUM_BYTES = 8 - # The length of control flag - CONTROL_FLAG_NUM_BYTES = 1 - # The length of the header: control flag + content length - CONTENT_HEADER_TOTAL_BYTES = CONTROL_FLAG_NUM_BYTES + CONTENT_LENGTH_NUM_BYTES \ No newline at end of file + # The length of the header: content length + CONTENT_HEADER_TOTAL_BYTES = CONTENT_LENGTH_NUM_BYTES \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py b/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py deleted file mode 100644 index fbbfbe721..000000000 --- a/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py +++ /dev/null @@ -1,60 +0,0 @@ -# -*- coding: utf-8 -*- - -import enum - - -class MemoryMappedFileControlFlags(enum.Enum): - """Flag to indicate state of memory mapped file. - Note: Must be kept in sync with the DotNet runtime version of this: - TODO path to MemStore constants - """ - UNKNOWN = 0 - READY_TO_READ = 1 - READY_TO_DISPOSE = 2 - WRITE_IN_PROGRESS = 3 - PENDING_READ = 4 - - -class MemoryMappedFileControlFlagsUtils: - @staticmethod - def is_available(control_flag): - if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: - return False - elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: - return True - elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: - return True - elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: - return True - else: - raise Exception("Unknown control flag: '%s'" % (control_flag)) - - @staticmethod - def is_readable(control_flag): - if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: - return False - elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: - return False - elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: - return True - elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: - return False - elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: - return False - else: - raise Exception("Unknown control flag: '%s'" % (control_flag)) - - @staticmethod - def is_disposable(control_flag): - if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: - return False - elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: - return False - elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: - return False - elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: - return True - elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: - return False - else: - raise Exception("Unknown control flag: '%s'" % (control_flag)) From d4ab3b062aa8ff82a1d0ddf86d26aedf6f9c2fe7 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 20 Oct 2020 11:59:13 +0500 Subject: [PATCH 06/76] Proto change --- azure_functions_worker/bindings/datumdef.py | 18 +++++++++--------- azure_functions_worker/protos/__init__.py | 2 +- .../protos/_src/src/proto/FunctionRpc.proto | 14 ++++++++++---- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index fdf2df774..4ad1e57e6 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -84,17 +84,17 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): val = td.collection_string elif tt == 'collection_sint64': val = td.collection_sint64 - elif tt == 'shared_memory_data': - shmem_data = td.shared_memory_data - mmap_name = shmem_data.memory_mapped_file_name - offset = shmem_data.offset - count = shmem_data.count + elif tt == 'rpc_shared_memory_info': + shmem_info = td.rpc_shared_memory_info + mmap_name = shmem_info.name + offset = shmem_info.offset + count = shmem_info.count ret = shmem_mgr.get(mmap_name, offset, count) if ret is None: return None else: val = ret - tt = shmem_data.type + tt = shmem_info.type elif tt is None: return None else: @@ -114,12 +114,12 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, value = datum.value mmap_name = shmem_mgr.put(value, invocation_id) if mmap_name is not None: - shmem_data = protos.SharedMemoryData( - memory_mapped_file_name=mmap_name, + shmem_info = protos.RpcSharedMemoryInfo( + name=mmap_name, offset=0, count=len(value), type='bytes') - return protos.TypedData(shared_memory_data=shmem_data) + return protos.TypedData(rpc_shared_memory_info=shmem_info) else: raise Exception( 'cannot write datum value into Shared Memory' diff --git a/azure_functions_worker/protos/__init__.py b/azure_functions_worker/protos/__init__.py index 536d1ca40..930c011d0 100644 --- a/azure_functions_worker/protos/__init__.py +++ b/azure_functions_worker/protos/__init__.py @@ -23,6 +23,6 @@ TypedData, RpcHttp, RpcLog, - SharedMemoryData, + RpcSharedMemoryInfo, CloseSharedMemoryResourcesRequest, CloseSharedMemoryResourcesResponse) diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index b0aa795ad..5ce3cb211 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -205,10 +205,12 @@ message FunctionEnvironmentReloadResponse { StatusResult result = 3; } +// Tell the out-of-proc worker to close any shared memory maps it allocated for given invocation message CloseSharedMemoryResourcesRequest { string invocation_id = 1; } +// Response from the worker indicating whether the shared memory maps it had allocated have been successfully closed message CloseSharedMemoryResourcesResponse { StatusResult result = 1; } @@ -327,15 +329,19 @@ message TypedData { CollectionString collection_string = 9; CollectionDouble collection_double = 10; CollectionSInt64 collection_sint64 = 11; - SharedMemoryData shared_memory_data = 12; + RpcSharedMemoryInfo rpc_shared_memory_info = 12; } } -// Used to provide metadata about shared memory region to read/write data -message SharedMemoryData { - string memory_mapped_file_name = 1; +// Used to provide metadata about shared memory region to read data from +message RpcSharedMemoryInfo { + // Name of the shared memory map containing data + string name = 1; + // Offset in the shared memory map to start reading data from int64 offset = 2; + // Number of bytes to read (starting from the offset) int64 count = 3; + // Final type to which the read data (in bytes) is to be interpreted as string type = 4; } From 7de45f3a3537e8e93ec1c0abad86a7c64fea8fb4 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 22 Oct 2020 15:45:05 +0500 Subject: [PATCH 07/76] Working for blob shared memory data transfer; needs clean up, comments and tests --- azure_functions_worker/bindings/__init__.py | 2 +- azure_functions_worker/bindings/datumdef.py | 43 ++++------- azure_functions_worker/bindings/meta.py | 73 +++++++++++++++++-- .../bindings/shared_memory_manager.py | 29 +++++--- azure_functions_worker/constants.py | 1 + azure_functions_worker/dispatcher.py | 16 ++-- azure_functions_worker/protos/__init__.py | 3 +- .../protos/_src/src/proto/FunctionRpc.proto | 26 ++++++- 8 files changed, 134 insertions(+), 59 deletions(-) diff --git a/azure_functions_worker/bindings/__init__.py b/azure_functions_worker/bindings/__init__.py index 934e3d2d7..d0a268031 100644 --- a/azure_functions_worker/bindings/__init__.py +++ b/azure_functions_worker/bindings/__init__.py @@ -6,7 +6,7 @@ from .meta import check_output_type_annotation from .meta import has_implicit_output from .meta import is_trigger_binding -from .meta import from_incoming_proto, to_outgoing_proto +from .meta import from_incoming_proto, to_outgoing_proto, to_outgoing_param_binding from .out import Out diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 4ad1e57e6..47be45500 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -51,7 +51,7 @@ def __repr__(self): return ''.format(self.type, val_repr) @classmethod - def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): + def from_typed_data(cls, td: protos.TypedData): tt = td.WhichOneof('data') if tt == 'http': http = td.http @@ -62,7 +62,7 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): k: Datum(v, 'string') for k, v in http.headers.items() }, body=( - Datum.from_typed_data(http.body, shmem_mgr) + Datum.from_typed_data(http.body) or Datum(type='bytes', value=b'') ), params={ @@ -84,17 +84,6 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): val = td.collection_string elif tt == 'collection_sint64': val = td.collection_sint64 - elif tt == 'rpc_shared_memory_info': - shmem_info = td.rpc_shared_memory_info - mmap_name = shmem_info.name - offset = shmem_info.offset - count = shmem_info.count - ret = shmem_mgr.get(mmap_name, offset, count) - if ret is None: - return None - else: - val = ret - tt = shmem_info.type elif tt is None: return None else: @@ -104,28 +93,24 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): return cls(val, tt) + @classmethod + def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: SharedMemoryManager): + mmap_name = shmem.name + offset = shmem.offset + count = shmem.count + data_type = shmem.type + val = shmem_mgr.get_bytes(mmap_name, offset, count) + if val is not None: + if data_type == protos.RpcSharedMemoryDataType.bytes: + return cls(val, 'bytes') + return None def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, invocation_id: str) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': - if SharedMemoryManager.is_enabled(): - value = datum.value - mmap_name = shmem_mgr.put(value, invocation_id) - if mmap_name is not None: - shmem_info = protos.RpcSharedMemoryInfo( - name=mmap_name, - offset=0, - count=len(value), - type='bytes') - return protos.TypedData(rpc_shared_memory_info=shmem_info) - else: - raise Exception( - 'cannot write datum value into Shared Memory' - ) - else: - return protos.TypedData(bytes=datum.value) + return protos.TypedData(bytes=datum.value) elif datum.type == 'json': return protos.TypedData(json=datum.value) elif datum.type == 'http': diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index ba0f0587c..4e90ff4f8 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -56,21 +56,33 @@ def has_implicit_output(bind_name: str) -> bool: def from_incoming_proto( binding: str, - val: protos.TypedData, *, + pb: protos.ParameterBinding, *, pytype: typing.Optional[type], trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]], shmem_mgr: SharedMemoryManager) -> typing.Any: + # TODO gochaudh: + # Ideally, we should use WhichOneOf (if back compat issue is not there) + # Otherwise, a None check is not applicable as even if rpc_shared_memory is + # not set, its not None + datum = None + if pb.rpc_shared_memory.name is not '': + # Data was sent over shared memory, attempt to read + datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) + # TODO gochaudh: check trigger_metadata (try with blob triggered func) binding = get_binding(binding) - datum = datumdef.Datum.from_typed_data(val, shmem_mgr) if trigger_metadata: metadata = { - k: datumdef.Datum.from_typed_data(v, shmem_mgr) + k: datumdef.Datum.from_typed_data(v) for k, v in trigger_metadata.items() } else: metadata = {} + if datum is None: + val = pb.data + datum = datumdef.Datum.from_typed_data(val) + try: return binding.decode(datum, trigger_metadata=metadata) except NotImplementedError: @@ -83,10 +95,8 @@ def from_incoming_proto( f'and expected binding type {binding}') -def to_outgoing_proto(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type], - shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.TypedData: +def get_datum(binding: str, obj: typing.Any, + pytype: typing.Optional[type]): binding = get_binding(binding) try: @@ -97,5 +107,54 @@ def to_outgoing_proto(binding: str, obj: typing.Any, *, f'unable to encode outgoing TypedData: ' f'unsupported type "{binding}" for ' f'Python type "{type(obj).__name__}"') + return datum + +def to_outgoing_proto(binding: str, obj: typing.Any, *, + pytype: typing.Optional[type], + shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.TypedData: + datum = get_datum(binding, obj, pytype) return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + + +def to_outgoing_param_binding(binding: str, obj: typing.Any, *, + pytype: typing.Optional[type], + out_name: str, + shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.ParameterBinding: + datum = get_datum(binding, obj, pytype) + # TODO gochaudh: IMPORTANT: Right now we set the AppSetting to disable this + # However that takes impact only for data coming from host -> worker + # Is there a way to check the AppSetting here so that this does not respond back + # with shared memory? + param_binding = None + if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): + if datum.type == 'bytes': + value = datum.value + map_name = shmem_mgr.put_bytes(value, invocation_id) + if map_name is not None: + shmem = protos.RpcSharedMemory( + name=map_name, + offset=0, + count=len(value), + type=protos.RpcSharedMemoryDataType.bytes) + param_binding = protos.ParameterBinding( + name=out_name, + rpc_shared_memory=shmem) + else: + raise Exception( + 'cannot write datum value into shared memory' + ) + else: + raise Exception( + 'unsupported datum type for shared memory' + ) + + if param_binding is None: + rpc_val = datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + param_binding = protos.ParameterBinding( + name=out_name, + data=rpc_val) + + return param_binding \ No newline at end of file diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 51fe85fb4..d252f01d8 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -16,33 +16,44 @@ class SharedMemoryManager: def __init__(self): self.allocated_mmaps = {} # type dict[string, [(mmap_name, mmap)] - @staticmethod - def is_enabled(): + def is_enabled(self) -> bool: """ Whether supported types should be transferred between Functions host - and the worker using Shared Memory. + and the worker using shared memory. """ return True - def get(self, mmap_name: str, offset: int, count: int) -> (bytes): + def is_supported(self, datum) -> bool: + """ + Whether the given Datum object can be transferred to the Functions host + using shared memory. + """ + if datum.type == 'bytes': + # TODO gochaudh: Check for min size config + # Is there a common place to put configs shared b/w host and worker? + return True + else: + return False + + def get_bytes(self, mmap_name: str, offset: int, count: int) -> bytes: """ Reads data from the given Memory Mapped File with the provided name, starting at the provided offset and reading a total of count bytes. - Returns a tuple containing the binary data read from Shared Memory + Returns a tuple containing the binary data read from shared memory if successful, None otherwise. """ - logger.info('Reading from Shared Memory: %s', mmap_name) + logger.info('Reading from shared memory: %s', mmap_name) data = FileReader.read_content_as_bytes(mmap_name, offset) return data - def put(self, data: bytes, invocation_id: str) -> (str): + def put_bytes(self, data: bytes, invocation_id: str) -> str: """ - Writes the given data into Shared Memory. + Writes the given data into shared memory. Returns the name of the Memory Mapped File into which the data was written if succesful, None otherwise. """ mmap_name = str(uuid.uuid4()) - logger.info('Writing to Shared Memory: %s', mmap_name) + logger.info('Writing to shared memory: %s', mmap_name) mmap = FileWriter.create_with_content_bytes(mmap_name, data) if invocation_id not in self.allocated_mmaps: diff --git a/azure_functions_worker/constants.py b/azure_functions_worker/constants.py index f0678a845..177827aed 100644 --- a/azure_functions_worker/constants.py +++ b/azure_functions_worker/constants.py @@ -9,6 +9,7 @@ TYPED_DATA_COLLECTION = "TypedDataCollection" RPC_HTTP_BODY_ONLY = "RpcHttpBodyOnly" RPC_HTTP_TRIGGER_METADATA_REMOVED = "RpcHttpTriggerMetadataRemoved" +SHARED_MEMORY_DATA_TRANSFER = "SharedMemoryDataTransfer" # Debug Flags PYAZURE_WEBHOST_DEBUG = "PYAZURE_WEBHOST_DEBUG" diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 56c688aa5..2f0d622ce 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -263,6 +263,7 @@ async def _handle__worker_init_request(self, req): constants.TYPED_DATA_COLLECTION: _TRUE, constants.RPC_HTTP_BODY_ONLY: _TRUE, constants.RPC_HTTP_TRIGGER_METADATA_REMOVED: _TRUE, + constants.SHARED_MEMORY_DATA_TRANSFER: _TRUE, } # Can detech worker packages @@ -356,8 +357,9 @@ async def _handle__invocation_request(self, req): trigger_metadata = invoc_request.trigger_metadata else: trigger_metadata = None + args[pb.name] = bindings.from_incoming_proto( - pb_type_info.binding_name, pb.data, + pb_type_info.binding_name, pb, trigger_metadata=trigger_metadata, pytype=pb_type_info.pytype, shmem_mgr=self._shmem_mgr) @@ -389,16 +391,12 @@ async def _handle__invocation_request(self, req): # Can "None" be marshaled into protos.TypedData? continue - rpc_val = bindings.to_outgoing_proto( + param_binding = bindings.to_outgoing_param_binding( out_type_info.binding_name, val, pytype=out_type_info.pytype, - shmem_mgr=self._shmem_mgr, invocation_id=invocation_id) - assert rpc_val is not None - - output_data.append( - protos.ParameterBinding( - name=out_name, - data=rpc_val)) + out_name=out_name, shmem_mgr=self._shmem_mgr, + invocation_id=invocation_id) + output_data.append(param_binding) return_value = None if fi.return_type is not None: diff --git a/azure_functions_worker/protos/__init__.py b/azure_functions_worker/protos/__init__.py index 930c011d0..6ec9a6c74 100644 --- a/azure_functions_worker/protos/__init__.py +++ b/azure_functions_worker/protos/__init__.py @@ -23,6 +23,7 @@ TypedData, RpcHttp, RpcLog, - RpcSharedMemoryInfo, + RpcSharedMemory, + RpcSharedMemoryDataType, CloseSharedMemoryResourcesRequest, CloseSharedMemoryResourcesResponse) diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index 5ce3cb211..20db572e9 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -329,12 +329,27 @@ message TypedData { CollectionString collection_string = 9; CollectionDouble collection_double = 10; CollectionSInt64 collection_sint64 = 11; - RpcSharedMemoryInfo rpc_shared_memory_info = 12; } } +// Specify which type of data is contained in the shared memory region being read +enum RpcSharedMemoryDataType { + unknown = 0; + string = 1; + json = 2; + bytes = 3; + stream = 4; + http = 5; + int = 6; + double = 7; + collection_bytes = 8; + collection_string = 9; + collection_double = 10; + collection_sint64 = 11; +} + // Used to provide metadata about shared memory region to read data from -message RpcSharedMemoryInfo { +message RpcSharedMemory { // Name of the shared memory map containing data string name = 1; // Offset in the shared memory map to start reading data from @@ -342,7 +357,7 @@ message RpcSharedMemoryInfo { // Number of bytes to read (starting from the offset) int64 count = 3; // Final type to which the read data (in bytes) is to be interpreted as - string type = 4; + RpcSharedMemoryDataType type = 4; } // Used to encapsulate collection string @@ -370,8 +385,13 @@ message ParameterBinding { // Name for the binding string name = 1; + // Note: Either *data* or *rpc_shared_memory* field will be populated + // Data for the binding TypedData data = 2; + + // Metadata about the shared memory region to read data from + RpcSharedMemory rpc_shared_memory = 3; } // Used to describe a given binding on load From 0f6ccde0bbb86fd721548ff65987ce9a6e7850ae Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 23 Oct 2020 03:12:38 +0500 Subject: [PATCH 08/76] Changing message for closing mmaps --- azure_functions_worker/bindings/datumdef.py | 6 +-- azure_functions_worker/bindings/meta.py | 12 +++--- .../bindings/shared_memory_manager.py | 38 +++++++++---------- azure_functions_worker/dispatcher.py | 10 ++--- .../protos/_src/src/proto/FunctionRpc.proto | 26 ++++++------- 5 files changed, 43 insertions(+), 49 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 47be45500..d03770b57 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -105,8 +105,7 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared return cls(val, 'bytes') return None -def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.TypedData: +def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': @@ -121,8 +120,7 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, for k, v in datum.value['headers'].items() }, enable_content_negotiation=False, - body=datum_as_proto(datum.value['body'], shmem_mgr, - invocation_id), + body=datum_as_proto(datum.value['body'], shmem_mgr), )) else: raise NotImplementedError( diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 4e90ff4f8..580d02c30 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -112,17 +112,15 @@ def get_datum(binding: str, obj: typing.Any, def to_outgoing_proto(binding: str, obj: typing.Any, *, pytype: typing.Optional[type], - shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.TypedData: + shmem_mgr: SharedMemoryManager) -> protos.TypedData: datum = get_datum(binding, obj, pytype) - return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + return datumdef.datum_as_proto(datum, shmem_mgr) def to_outgoing_param_binding(binding: str, obj: typing.Any, *, pytype: typing.Optional[type], out_name: str, - shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.ParameterBinding: + shmem_mgr: SharedMemoryManager) -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) # TODO gochaudh: IMPORTANT: Right now we set the AppSetting to disable this # However that takes impact only for data coming from host -> worker @@ -132,7 +130,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): if datum.type == 'bytes': value = datum.value - map_name = shmem_mgr.put_bytes(value, invocation_id) + map_name = shmem_mgr.put_bytes(value) if map_name is not None: shmem = protos.RpcSharedMemory( name=map_name, @@ -152,7 +150,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, ) if param_binding is None: - rpc_val = datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + rpc_val = datumdef.datum_as_proto(datum, shmem_mgr) param_binding = protos.ParameterBinding( name=out_name, data=rpc_val) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index d252f01d8..eb78fe524 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -14,7 +14,7 @@ class SharedMemoryManager: Memory. """ def __init__(self): - self.allocated_mmaps = {} # type dict[string, [(mmap_name, mmap)] + self.allocated_mmaps = {} # type dict[map_name, mmap] def is_enabled(self) -> bool: """ @@ -35,40 +35,38 @@ def is_supported(self, datum) -> bool: else: return False - def get_bytes(self, mmap_name: str, offset: int, count: int) -> bytes: + def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: """ Reads data from the given Memory Mapped File with the provided name, starting at the provided offset and reading a total of count bytes. Returns a tuple containing the binary data read from shared memory if successful, None otherwise. """ - logger.info('Reading from shared memory: %s', mmap_name) - data = FileReader.read_content_as_bytes(mmap_name, offset) + logger.info('Reading from shared memory: %s', map_name) + data = FileReader.read_content_as_bytes(map_name, offset) return data - def put_bytes(self, data: bytes, invocation_id: str) -> str: + def put_bytes(self, data: bytes) -> str: """ Writes the given data into shared memory. Returns the name of the Memory Mapped File into which the data was written if succesful, None otherwise. """ - mmap_name = str(uuid.uuid4()) - logger.info('Writing to shared memory: %s', mmap_name) - mmap = FileWriter.create_with_content_bytes(mmap_name, data) + map_name = str(uuid.uuid4()) + logger.info('Writing to shared memory: %s', map_name) + mmap = FileWriter.create_with_content_bytes(map_name, data) - if invocation_id not in self.allocated_mmaps: - self.allocated_mmaps[invocation_id] = [] - self.allocated_mmaps[invocation_id].append((mmap_name, mmap)) + # Hold a reference to the mmap to prevent it from closing before the + # host has read it. + self.allocated_mmaps[map_name] = mmap - return mmap_name + return map_name - def free(self, invocation_id: str): + def free_map(self, map_name: str): """ - Free up the resources allocated for the given invocation_id. - This includes closing and deleting mmaps that were produced as outputs - during the given invocation_id. """ - if invocation_id in self.allocated_mmaps: - for mmap_name, mmap in self.allocated_mmaps[invocation_id]: - FileAccessor.delete_mmap(mmap_name, mmap) - del self.allocated_mmaps[invocation_id] + if map_name in self.allocated_mmaps: + mmap = self.allocated_mmaps[map_name] + FileAccessor.delete_mmap(map_name, mmap) + del self.allocated_mmaps[map_name] + diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 2f0d622ce..52fab5630 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -394,8 +394,7 @@ async def _handle__invocation_request(self, req): param_binding = bindings.to_outgoing_param_binding( out_type_info.binding_name, val, pytype=out_type_info.pytype, - out_name=out_name, shmem_mgr=self._shmem_mgr, - invocation_id=invocation_id) + out_name=out_name, shmem_mgr=self._shmem_mgr) output_data.append(param_binding) return_value = None @@ -403,7 +402,7 @@ async def _handle__invocation_request(self, req): return_value = bindings.to_outgoing_proto( fi.return_type.binding_name, call_result, pytype=fi.return_type.pytype, - shmem_mgr=self._shmem_mgr, invocation_id=invocation_id) + shmem_mgr=self._shmem_mgr) # Actively flush customer print() function to console sys.stdout.flush() @@ -498,8 +497,9 @@ async def _handle__close_shared_memory_resources_request(self, req): try: close_request = req.close_shared_memory_resources_request - invocation_id_to_free = close_request.invocation_id - self._shmem_mgr.free(invocation_id_to_free) + map_names = close_request.map_names + for map_name in map_names: + self._shmem_mgr.free_map(map_name) success_response = protos.CloseSharedMemoryResourcesResponse( result=protos.StatusResult( diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index 20db572e9..0f64d095f 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -207,7 +207,7 @@ message FunctionEnvironmentReloadResponse { // Tell the out-of-proc worker to close any shared memory maps it allocated for given invocation message CloseSharedMemoryResourcesRequest { - string invocation_id = 1; + repeated string map_names = 1; } // Response from the worker indicating whether the shared memory maps it had allocated have been successfully closed @@ -334,18 +334,18 @@ message TypedData { // Specify which type of data is contained in the shared memory region being read enum RpcSharedMemoryDataType { - unknown = 0; - string = 1; - json = 2; - bytes = 3; - stream = 4; - http = 5; - int = 6; - double = 7; - collection_bytes = 8; - collection_string = 9; - collection_double = 10; - collection_sint64 = 11; + unknown = 0; + string = 1; + json = 2; + bytes = 3; + stream = 4; + http = 5; + int = 6; + double = 7; + collection_bytes = 8; + collection_string = 9; + collection_double = 10; + collection_sint64 = 11; } // Used to provide metadata about shared memory region to read data from From c736890f8bd7006627942d43f4e6cb88847d494b Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 26 Oct 2020 12:30:47 +0500 Subject: [PATCH 09/76] Support for string datatype for shared memory data transfer --- azure_functions_worker/bindings/datumdef.py | 14 +++++---- azure_functions_worker/bindings/meta.py | 21 ++++++++++++-- .../bindings/shared_memory_manager.py | 29 +++++++++++++++++-- azure_functions_worker/dispatcher.py | 3 +- 4 files changed, 54 insertions(+), 13 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index d03770b57..0abd1cf0e 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -99,13 +99,17 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared offset = shmem.offset count = shmem.count data_type = shmem.type - val = shmem_mgr.get_bytes(mmap_name, offset, count) - if val is not None: - if data_type == protos.RpcSharedMemoryDataType.bytes: + if data_type == protos.RpcSharedMemoryDataType.bytes: + val = shmem_mgr.get_bytes(mmap_name, offset, count) + if val is not None: return cls(val, 'bytes') + elif data_type == protos.RpcSharedMemoryDataType.string: + val = shmem_mgr.get_string(mmap_name, offset, count) + if val is not None: + return cls(val, 'string') return None -def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager) -> protos.TypedData: +def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': @@ -120,7 +124,7 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager) -> protos.Typed for k, v in datum.value['headers'].items() }, enable_content_negotiation=False, - body=datum_as_proto(datum.value['body'], shmem_mgr), + body=datum_as_proto(datum.value['body']), )) else: raise NotImplementedError( diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 580d02c30..c49c2584f 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -111,10 +111,9 @@ def get_datum(binding: str, obj: typing.Any, def to_outgoing_proto(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type], - shmem_mgr: SharedMemoryManager) -> protos.TypedData: + pytype: typing.Optional[type]) -> protos.TypedData: datum = get_datum(binding, obj, pytype) - return datumdef.datum_as_proto(datum, shmem_mgr) + return datumdef.datum_as_proto(datum) def to_outgoing_param_binding(binding: str, obj: typing.Any, *, @@ -144,6 +143,22 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, raise Exception( 'cannot write datum value into shared memory' ) + elif datum.type == 'string': + value = datum.value + map_name = shmem_mgr.put_string(value) + if map_name is not None: + shmem = protos.RpcSharedMemory( + name=map_name, + offset=0, + count=len(value), + type=protos.RpcSharedMemoryDataType.string) + param_binding = protos.ParameterBinding( + name=out_name, + rpc_shared_memory=shmem) + else: + raise Exception( + 'cannot write datum value into shared memory' + ) else: raise Exception( 'unsupported datum type for shared memory' diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index eb78fe524..c2545a78f 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -32,6 +32,8 @@ def is_supported(self, datum) -> bool: # TODO gochaudh: Check for min size config # Is there a common place to put configs shared b/w host and worker? return True + elif datum.type == 'string': + return True else: return False @@ -42,18 +44,23 @@ def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: Returns a tuple containing the binary data read from shared memory if successful, None otherwise. """ - logger.info('Reading from shared memory: %s', map_name) + logger.info('Reading bytes from shared memory: %s', map_name) data = FileReader.read_content_as_bytes(map_name, offset) return data + def get_string(self, map_name: str, offset: int, count: int) -> str: + logger.info('Reading string from shared memory: %s', map_name) + data = FileReader.read_content_as_string(map_name, offset) + return data + def put_bytes(self, data: bytes) -> str: """ - Writes the given data into shared memory. + Writes the given bytes into shared memory. Returns the name of the Memory Mapped File into which the data was written if succesful, None otherwise. """ map_name = str(uuid.uuid4()) - logger.info('Writing to shared memory: %s', map_name) + logger.info('Writing bytes to shared memory: %s', map_name) mmap = FileWriter.create_with_content_bytes(map_name, data) # Hold a reference to the mmap to prevent it from closing before the @@ -62,6 +69,22 @@ def put_bytes(self, data: bytes) -> str: return map_name + def put_string(self, data: str) -> str: + """ + Writes the given string into shared memory. + Returns the name of the Memory Mapped File into which the data was + written if succesful, None otherwise. + """ + map_name = str(uuid.uuid4()) + logger.info('Writing string to shared memory: %s', map_name) + mmap = FileWriter.create_with_content_string(map_name, data) + + # Hold a reference to the mmap to prevent it from closing before the + # host has read it. + self.allocated_mmaps[map_name] = mmap + + return map_name + def free_map(self, map_name: str): """ """ diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 52fab5630..c89657bfc 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -401,8 +401,7 @@ async def _handle__invocation_request(self, req): if fi.return_type is not None: return_value = bindings.to_outgoing_proto( fi.return_type.binding_name, call_result, - pytype=fi.return_type.pytype, - shmem_mgr=self._shmem_mgr) + pytype=fi.return_type.pytype) # Actively flush customer print() function to console sys.stdout.flush() From 2dba4e712ffe50c9ed15f5fe135a44f67499d1d1 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 12 Nov 2020 01:20:34 +0500 Subject: [PATCH 10/76] Change to oneof --- azure_functions_worker/bindings/meta.py | 19 ++++++++----------- .../protos/_src/src/proto/FunctionRpc.proto | 2 ++ 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c49c2584f..474a45e6c 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -60,16 +60,6 @@ def from_incoming_proto( pytype: typing.Optional[type], trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]], shmem_mgr: SharedMemoryManager) -> typing.Any: - # TODO gochaudh: - # Ideally, we should use WhichOneOf (if back compat issue is not there) - # Otherwise, a None check is not applicable as even if rpc_shared_memory is - # not set, its not None - datum = None - if pb.rpc_shared_memory.name is not '': - # Data was sent over shared memory, attempt to read - datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) - # TODO gochaudh: check trigger_metadata (try with blob triggered func) - binding = get_binding(binding) if trigger_metadata: metadata = { @@ -79,9 +69,16 @@ def from_incoming_proto( else: metadata = {} - if datum is None: + pb_type = pb.WhichOneof('binding') + if pb_type == 'rpc_shared_memory': + # Data was sent over shared memory, attempt to read + datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) + # TODO gochaudh: check trigger_metadata (try with blob triggered func) + elif pb_type == 'data': val = pb.data datum = datumdef.Datum.from_typed_data(val) + else: + raise TypeError(f'Unknown ParameterBindingType: {pb_type}') try: return binding.decode(datum, trigger_metadata=metadata) diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index 0f64d095f..f5d10753f 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -387,11 +387,13 @@ message ParameterBinding { // Note: Either *data* or *rpc_shared_memory* field will be populated + oneof binding { // Data for the binding TypedData data = 2; // Metadata about the shared memory region to read data from RpcSharedMemory rpc_shared_memory = 3; + } } // Used to describe a given binding on load From 925cf55663c55a699c315514bf0755ea514b2d9a Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 12 Nov 2020 01:23:11 +0500 Subject: [PATCH 11/76] Use oneof in .proto --- .../protos/_src/src/proto/FunctionRpc.proto | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index f5d10753f..b4db8b20a 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -385,14 +385,12 @@ message ParameterBinding { // Name for the binding string name = 1; - // Note: Either *data* or *rpc_shared_memory* field will be populated - oneof binding { - // Data for the binding - TypedData data = 2; + // Data for the binding + TypedData data = 2; - // Metadata about the shared memory region to read data from - RpcSharedMemory rpc_shared_memory = 3; + // Metadata about the shared memory region to read data from + RpcSharedMemory rpc_shared_memory = 3; } } From 997a1af132d990aecaac12cc74a9fbe94b8ce253 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 15 Dec 2020 13:04:37 -1000 Subject: [PATCH 12/76] Refactoring mmap_handler --- azure_functions_worker/bindings/datumdef.py | 4 +- azure_functions_worker/bindings/meta.py | 6 +- .../bindings/shared_memory_manager.py | 20 +-- .../mmap_handler/file_accessor.py | 148 +++++------------- .../mmap_handler/file_accessor_factory.py | 15 ++ .../mmap_handler/file_accessor_linux.py | 100 ++++++++++++ .../mmap_handler/file_accessor_windows.py | 27 ++++ .../mmap_handler/file_reader.py | 33 ++-- .../mmap_handler/file_writer.py | 20 ++- azure_functions_worker/protos/__init__.py | 2 +- .../protos/_src/src/proto/FunctionRpc.proto | 11 +- 11 files changed, 229 insertions(+), 157 deletions(-) create mode 100644 azure_functions_worker/mmap_handler/file_accessor_factory.py create mode 100644 azure_functions_worker/mmap_handler/file_accessor_linux.py create mode 100644 azure_functions_worker/mmap_handler/file_accessor_windows.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 0abd1cf0e..242dec985 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -99,11 +99,11 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared offset = shmem.offset count = shmem.count data_type = shmem.type - if data_type == protos.RpcSharedMemoryDataType.bytes: + if data_type == protos.RpcDataType.bytes: val = shmem_mgr.get_bytes(mmap_name, offset, count) if val is not None: return cls(val, 'bytes') - elif data_type == protos.RpcSharedMemoryDataType.string: + elif data_type == protos.RpcDataType.string: val = shmem_mgr.get_string(mmap_name, offset, count) if val is not None: return cls(val, 'string') diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 474a45e6c..c8bf8fa15 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -69,7 +69,7 @@ def from_incoming_proto( else: metadata = {} - pb_type = pb.WhichOneof('binding') + pb_type = pb.WhichOneof('rpc_data') if pb_type == 'rpc_shared_memory': # Data was sent over shared memory, attempt to read datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) @@ -132,7 +132,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, name=map_name, offset=0, count=len(value), - type=protos.RpcSharedMemoryDataType.bytes) + type=protos.RpcDataType.bytes) param_binding = protos.ParameterBinding( name=out_name, rpc_shared_memory=shmem) @@ -148,7 +148,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, name=map_name, offset=0, count=len(value), - type=protos.RpcSharedMemoryDataType.string) + type=protos.RpcDataType.string) param_binding = protos.ParameterBinding( name=out_name, rpc_shared_memory=shmem) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index c2545a78f..8f9c35ece 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -5,7 +5,7 @@ from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader -from ..mmap_handler.file_accessor import FileAccessor +from ..mmap_handler.file_accessor_factory import FileAccessorFactory class SharedMemoryManager: @@ -15,6 +15,9 @@ class SharedMemoryManager: """ def __init__(self): self.allocated_mmaps = {} # type dict[map_name, mmap] + self.file_accessor = FileAccessorFactory.create_file_accessor() + self.file_reader = FileReader() + self.file_writer = FileWriter() def is_enabled(self) -> bool: """ @@ -31,6 +34,7 @@ def is_supported(self, datum) -> bool: if datum.type == 'bytes': # TODO gochaudh: Check for min size config # Is there a common place to put configs shared b/w host and worker? + # Env variable? App Setting? return True elif datum.type == 'string': return True @@ -45,12 +49,12 @@ def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: if successful, None otherwise. """ logger.info('Reading bytes from shared memory: %s', map_name) - data = FileReader.read_content_as_bytes(map_name, offset) + data = self.file_reader.read_content_as_bytes(map_name, offset) return data def get_string(self, map_name: str, offset: int, count: int) -> str: logger.info('Reading string from shared memory: %s', map_name) - data = FileReader.read_content_as_string(map_name, offset) + data = self.file_reader.read_content_as_string(map_name, offset) return data def put_bytes(self, data: bytes) -> str: @@ -61,12 +65,10 @@ def put_bytes(self, data: bytes) -> str: """ map_name = str(uuid.uuid4()) logger.info('Writing bytes to shared memory: %s', map_name) - mmap = FileWriter.create_with_content_bytes(map_name, data) - + mmap = self.file_writer.create_with_content_bytes(map_name, data) # Hold a reference to the mmap to prevent it from closing before the # host has read it. self.allocated_mmaps[map_name] = mmap - return map_name def put_string(self, data: str) -> str: @@ -77,12 +79,10 @@ def put_string(self, data: str) -> str: """ map_name = str(uuid.uuid4()) logger.info('Writing string to shared memory: %s', map_name) - mmap = FileWriter.create_with_content_string(map_name, data) - + mmap = self.file_writer.create_with_content_string(map_name, data) # Hold a reference to the mmap to prevent it from closing before the # host has read it. self.allocated_mmaps[map_name] = mmap - return map_name def free_map(self, map_name: str): @@ -90,6 +90,6 @@ def free_map(self, map_name: str): """ if map_name in self.allocated_mmaps: mmap = self.allocated_mmaps[map_name] - FileAccessor.delete_mmap(map_name, mmap) + self.file_accessor.delete_mmap(map_name, mmap) del self.allocated_mmaps[map_name] diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index 8f597a038..a937db55f 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -1,128 +1,52 @@ -# -*- coding: utf-8 -*- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. -import os -import sys +import abc import mmap -import time -import struct -import hashlib -import urllib.parse -from .memorymappedfile_constants import MemoryMappedFileConstants as consts -""" -TODO -Clean up this class and use logger instead of prints -""" -class FileAccessor: - @staticmethod - def _open_mmap_file_linux(map_name): - """Get the file descriptor of an existing memory map. - """ - escaped_map_name = urllib.parse.quote_plus(map_name) - for mmap_temp_dir in consts.TEMP_DIRS: - filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) - try: - file = open(filename, "r+b") - return file - except FileNotFoundError: - pass - raise FileNotFoundError("File for '%s' does not exist" % (map_name)) - @staticmethod - def open_mmap(map_name, map_size, access=mmap.ACCESS_READ): +class FileAccessor(metaclass=abc.ABCMeta): + """ + TODO write docstring. + """ + @classmethod + def __subclasshook__(cls, subclass): + return (hasattr(subclass, 'open_mmap') and + callable(subclass.load_data_source) and + hasattr(subclass, 'create_mmap') and + callable(subclass.extract_text) or + hasattr(subclass, 'delete_mmap') and + callable(subclass.extract_text) or + NotImplemented) + + @abc.abstractmethod + def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_READ): """Open an existing memory map. """ - try: - if os.name == "posix": - file = FileAccessor._open_mmap_file_linux(map_name) - mmap_ret = mmap.mmap(file.fileno(), map_size, access=access) - else: - mmap_ret = mmap.mmap(-1, map_size, map_name, access=access) - mmap_ret.seek(0) - return mmap_ret - except ValueError: - # mmap length is greater than file size - #print("Cannot open memory map '%s': %s" % (map_name, value_error)) - return None - except FileNotFoundError: - # TODO Log Error - return None + raise NotImplementedError - @staticmethod - def _create_mmap_dir_linux(): - """Create a directory to create memory maps. + @abc.abstractmethod + def create_mmap(self, map_name: str, map_size: int): + """Create a new memory map. """ - for mmap_temp_dir in consts.TEMP_DIRS: - dirname = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) - if os.path.isdir(dirname): - # One of the directories already exists, no need - return - try: - os.makedirs(dirname) - return - except Exception as ex: - print.error("Cannot create dir '%s': %s" % (dirname, str(ex))) + raise NotImplementedError - @staticmethod - def _create_mmap_file_linux(map_name, map_size): - """Get the file descriptor for a new memory map. + @abc.abstractmethod + def delete_mmap(self, map_name: str, mmap): + """Delete a memory map. """ - escaped_map_name = urllib.parse.quote_plus(map_name) - dir_exists = False - for mmap_temp_dir in consts.TEMP_DIRS: - # Check if the file already exists - filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) - if os.path.exists(filename): - raise Exception("File '%s' for memory map '%s' already exists" % - (filename, map_name)) - # Check if the parent directory exists - dir_name = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) - if os.path.isdir(dir_name): - dir_exists = True - # Check if any of the parent directories exists - if not dir_exists: - FileAccessor._create_mmap_dir_linux() - # Create the file - for mmap_temp_dir in consts.TEMP_DIRS: - filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) - try: - file = os.open(filename, os.O_CREAT | os.O_TRUNC | os.O_RDWR) - # Write 0s to allocate - bytes_written = os.write(file, b'\x00' * map_size) - if bytes_written != map_size: - print("Cannot write 0s into new memory map file '%s': %d != %d" % - (filename, bytes_written, map_size)) - return file - except Exception as ex: - print("Cannot create memory map file '%s': %s" % (filename, ex)) - raise Exception("Cannot create memory map file for '%s'" % (map_name)) + raise NotImplementedError - @staticmethod - def create_mmap(map_name, map_size): - """Create a new memory map. + def _verify_new_map_created(self, map_name: str, mem_map) -> bool: + """Checks if the first byte of the memory map is zero. + If it is not, this memory map already existed. """ - if os.name == 'posix': - file = FileAccessor._create_mmap_file_linux(map_name, map_size) - mem_map = mmap.mmap(file, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) - else: - # Windows creates it when trying to open it - mem_map = FileAccessor.open_mmap(map_name, map_size, mmap.ACCESS_WRITE) - # Verify that the file is actually created and not existing before mem_map.seek(0) byte_read = mem_map.read(1) + is_new_mmap = False if byte_read != b'\x00': - raise Exception("Memory map '%s' already exists" % (map_name)) + is_new_mmap = False + else: + is_new_mmap = True mem_map.seek(0) - return mem_map - - @staticmethod - def delete_mmap(map_name, mmap): - """Delete a memory map. - """ - if os.name == 'posix': - try: - file = FileAccessor._open_mmap_file_linux(map_name) - os.remove(file.name) - except FileNotFoundError: - pass # Nothing to do if the file is not there anyway - mmap.close() \ No newline at end of file + return is_new_mmap \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_factory.py b/azure_functions_worker/mmap_handler/file_accessor_factory.py new file mode 100644 index 000000000..ce94c8def --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_accessor_factory.py @@ -0,0 +1,15 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os +from .file_accessor_linux import FileAccessorLinux +from .file_accessor_windows import FileAccessorWindows + + +class FileAccessorFactory: + @staticmethod + def create_file_accessor(): + if os.name == 'posix': + return FileAccessorLinux() + else: + return FileAccessorWindows() \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_linux.py b/azure_functions_worker/mmap_handler/file_accessor_linux.py new file mode 100644 index 000000000..34895c9d0 --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_accessor_linux.py @@ -0,0 +1,100 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os +import mmap +import urllib.parse +from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .file_accessor import FileAccessor + +class FileAccessorLinux(FileAccessor): + """ + TODO + """ + def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_READ): + try: + file = self._open_mmap_file(map_name) + mmap_ret = mmap.mmap(file.fileno(), map_size, access=access) + mmap_ret.seek(0) + return mmap_ret + except Exception as e: + # TODO Log Error + print(e) + return None + + def create_mmap(self, map_name: str, map_size: int): + file = self._create_mmap_file(map_name, map_size) + mem_map = mmap.mmap(file, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) + if not self._verify_new_map_created(map_name, mem_map): + raise Exception("Memory map '%s' already exists" % (map_name)) + return mem_map + + def delete_mmap(self, map_name: str, mmap): + try: + file = self._open_mmap_file(map_name) + os.remove(file.name) + except FileNotFoundError: + # TODO log debug + pass # Nothing to do if the file is not there anyway + mmap.close() + + def _open_mmap_file(self, map_name: str): + """Get the file descriptor of an existing memory map. + """ + escaped_map_name = urllib.parse.quote_plus(map_name) + for mmap_temp_dir in consts.TEMP_DIRS: + filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) + try: + file = open(filename, "r+b") + return file + except FileNotFoundError: + # TODO log debug + pass + raise FileNotFoundError("File for '%s' does not exist" % (map_name)) + + def _create_mmap_dir(self): + """Create a directory to create memory maps. + """ + for mmap_temp_dir in consts.TEMP_DIRS: + dirname = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) + if os.path.isdir(dirname): + # One of the directories already exists, no need + return + try: + os.makedirs(dirname) + return + except Exception as ex: + print("Cannot create dir '%s': %s" % (dirname, str(ex))) + + def _create_mmap_file(self, map_name: str, map_size: int): + """Get the file descriptor for a new memory map. + """ + escaped_map_name = urllib.parse.quote_plus(map_name) + dir_exists = False + for mmap_temp_dir in consts.TEMP_DIRS: + # Check if the file already exists + filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) + if os.path.exists(filename): + raise Exception("File '%s' for memory map '%s' already exists" % + (filename, map_name)) + # Check if the parent directory exists + dir_name = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) + if os.path.isdir(dir_name): + dir_exists = True + # Check if any of the parent directories exists + if not dir_exists: + self._create_mmap_dir() + # Create the file + for mmap_temp_dir in consts.TEMP_DIRS: + filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) + try: + file = os.open(filename, os.O_CREAT | os.O_TRUNC | os.O_RDWR) + # Write 0s to allocate + bytes_written = os.write(file, b'\x00' * map_size) + if bytes_written != map_size: + print("Cannot write 0s into new memory map file '%s': %d != %d" % + (filename, bytes_written, map_size)) + return file + except Exception as ex: + print("Cannot create memory map file '%s': %s" % (filename, ex)) + raise Exception("Cannot create memory map file for '%s'" % (map_name)) \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/mmap_handler/file_accessor_windows.py new file mode 100644 index 000000000..18ac88b44 --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_accessor_windows.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import mmap +from .file_accessor import FileAccessor + + +class FileAccessorWindows(FileAccessor): + def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_READ): + try: + mmap_ret = mmap.mmap(-1, map_size, map_name, access=access) + mmap_ret.seek(0) + return mmap_ret + except Exception as e: + # TODO Log Error + print(e) + return None + + def create_mmap(self, map_name: str, map_size: int): + # Windows creates the mmap when trying to open it + mem_map = self.open_mmap(map_name, map_size, mmap.ACCESS_WRITE) + if not self._verify_new_map_created(map_name, mem_map): + raise Exception("Memory map '%s' already exists" % (map_name)) + return mem_map + + def delete_mmap(self, map_name: str, mmap): + mmap.close() \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 68ba05cc3..9977cdae4 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -1,28 +1,32 @@ -# -*- coding: utf-8 -*- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. import mmap import os import struct from .memorymappedfile_constants import MemoryMappedFileConstants as consts -from .file_accessor import FileAccessor +from .file_accessor_factory import FileAccessorFactory class FileReader: - @staticmethod - def _bytes_to_long(input_bytes): + """ + """ + def __init__(self): + self.file_accessor = FileAccessorFactory.create_file_accessor() + + def _bytes_to_long(self, input_bytes): """Decode a set of bytes representing a long. This uses the format that C# uses. """ return struct.unpack(" Optional[mmap.mmap]: + """ + """ + def __init__(self): + self.file_accessor = FileAccessorFactory.create_file_accessor() + + def create_with_content_bytes(self, map_name: str, content: bytes) -> Optional[mmap.mmap]: if content is None: return None content_size = len(content) map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size - mem_map = FileAccessor.create_mmap(map_name, map_size) + mem_map = self.file_accessor.create_mmap(map_name, map_size) content_size_bytes = content_size.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) mem_map.write(content_size_bytes) mem_map.write(content) mem_map.flush() return mem_map - @staticmethod - def create_with_content_string(map_name: str, content: str) -> Optional[mmap.mmap]: + def create_with_content_string(self, map_name: str, content: str) -> Optional[mmap.mmap]: if content is None: return None content_bytes = content.encode('utf-8') - return FileWriter.create_with_content_bytes(map_name, content_bytes) + return self.create_with_content_bytes(map_name, content_bytes) diff --git a/azure_functions_worker/protos/__init__.py b/azure_functions_worker/protos/__init__.py index 6ec9a6c74..d7a535826 100644 --- a/azure_functions_worker/protos/__init__.py +++ b/azure_functions_worker/protos/__init__.py @@ -24,6 +24,6 @@ RpcHttp, RpcLog, RpcSharedMemory, - RpcSharedMemoryDataType, + RpcDataType, CloseSharedMemoryResourcesRequest, CloseSharedMemoryResourcesResponse) diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index b4db8b20a..403156e24 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -210,9 +210,10 @@ message CloseSharedMemoryResourcesRequest { repeated string map_names = 1; } -// Response from the worker indicating whether the shared memory maps it had allocated have been successfully closed +// Response from the worker indicating which of the shared memory maps have been successfully closed and which have not been closed +// The key (string) is the map name and the value (bool) is true if it was closed, false if not message CloseSharedMemoryResourcesResponse { - StatusResult result = 1; + map close_map_results = 1; } // Host tells the worker to load a Function @@ -333,7 +334,7 @@ message TypedData { } // Specify which type of data is contained in the shared memory region being read -enum RpcSharedMemoryDataType { +enum RpcDataType { unknown = 0; string = 1; json = 2; @@ -357,7 +358,7 @@ message RpcSharedMemory { // Number of bytes to read (starting from the offset) int64 count = 3; // Final type to which the read data (in bytes) is to be interpreted as - RpcSharedMemoryDataType type = 4; + RpcDataType type = 4; } // Used to encapsulate collection string @@ -385,7 +386,7 @@ message ParameterBinding { // Name for the binding string name = 1; - oneof binding { + oneof rpc_data { // Data for the binding TypedData data = 2; From 10648c0a294c9bae37e97965b97fb605faffd3ea Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 10:01:08 -1000 Subject: [PATCH 13/76] Refactoring, cleaning up and adding docstrings --- azure_functions_worker/bindings/datumdef.py | 49 ++++++++++++- azure_functions_worker/bindings/meta.py | 60 +++++----------- .../bindings/shared_memory_manager.py | 68 +++++++++++-------- azure_functions_worker/dispatcher.py | 5 +- .../mmap_handler/file_accessor.py | 34 +++++----- .../mmap_handler/file_accessor_linux.py | 30 ++++---- .../mmap_handler/file_accessor_windows.py | 11 ++- .../mmap_handler/file_reader.py | 40 +++++++---- .../mmap_handler/file_writer.py | 12 ++++ 9 files changed, 187 insertions(+), 122 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 242dec985..61f67939f 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -2,6 +2,7 @@ # Licensed under the MIT License. from typing import Any +from typing import Optional import json from .. import protos from .shared_memory_manager import SharedMemoryManager @@ -94,7 +95,11 @@ def from_typed_data(cls, td: protos.TypedData): return cls(val, tt) @classmethod - def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: SharedMemoryManager): + def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: SharedMemoryManager) -> Optional[Datum]: + """ + Reads the specified shared memory region and converts the read data into a datum object of + the corresponding type. + """ mmap_name = shmem.name offset = shmem.offset count = shmem.count @@ -109,6 +114,46 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared return cls(val, 'string') return None + def to_rpc_shared_memory(self, shmem_mgr: SharedMemoryManager) -> protos.RpcSharedMemory: + """ + Writes the given value to shared memory and returns the corresponding RpcSharedMemory + object which can be sent back to the functions host over RPC. + """ + if self.type == 'bytes': + value = self.value + map_name = shmem_mgr.put_bytes(value) + if map_name is not None: + shmem = protos.RpcSharedMemory( + name=map_name, + offset=0, + count=len(value), + type=protos.RpcDataType.bytes) + return shmem + else: + raise Exception( + f'cannot write datum value (type: {self.type}) into ' + f'shared memory (name: {map_name})' + ) + elif self.type == 'string': + value = self.value + map_name = shmem_mgr.put_string(value) + if map_name is not None: + shmem = protos.RpcSharedMemory( + name=map_name, + offset=0, + count=len(value), + type=protos.RpcDataType.string) + return shmem + else: + raise Exception( + f'cannot write datum value (type: {self.type}) into ' + f'shared memory (name: {map_name})' + ) + else: + raise NotImplementedError( + f'unsupported datum type ({self.type}) for shared memory' + ) + def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) @@ -129,4 +174,4 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: else: raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) - ) + ) \ No newline at end of file diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c8bf8fa15..4be97c19f 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -93,7 +93,7 @@ def from_incoming_proto( def get_datum(binding: str, obj: typing.Any, - pytype: typing.Optional[type]): + pytype: typing.Optional[type]) -> datumdef.Datum: binding = get_binding(binding) try: @@ -122,49 +122,21 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, # However that takes impact only for data coming from host -> worker # Is there a way to check the AppSetting here so that this does not respond back # with shared memory? - param_binding = None + shared_mem_value = None + parameter_binding = None + # If shared memory is enabled, try to transfer to host over shared memory if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): - if datum.type == 'bytes': - value = datum.value - map_name = shmem_mgr.put_bytes(value) - if map_name is not None: - shmem = protos.RpcSharedMemory( - name=map_name, - offset=0, - count=len(value), - type=protos.RpcDataType.bytes) - param_binding = protos.ParameterBinding( - name=out_name, - rpc_shared_memory=shmem) - else: - raise Exception( - 'cannot write datum value into shared memory' - ) - elif datum.type == 'string': - value = datum.value - map_name = shmem_mgr.put_string(value) - if map_name is not None: - shmem = protos.RpcSharedMemory( - name=map_name, - offset=0, - count=len(value), - type=protos.RpcDataType.string) - param_binding = protos.ParameterBinding( - name=out_name, - rpc_shared_memory=shmem) - else: - raise Exception( - 'cannot write datum value into shared memory' - ) - else: - raise Exception( - 'unsupported datum type for shared memory' - ) - - if param_binding is None: - rpc_val = datumdef.datum_as_proto(datum, shmem_mgr) - param_binding = protos.ParameterBinding( + shared_mem_value = datum.to_rpc_shared_memory(shmem_mgr) + if shared_mem_value is not None: + # Check if data was transferred over shared memory. + # If it was, then use the rpc_shared_memory field in the response message. + parameter_binding = protos.ParameterBinding( + name=out_name, + rpc_shared_memory=shared_mem_value) + else: + # If data was not trasnferred over shared memory, send it as part of the response message + rpc_val = datumdef.datum_as_proto(datum) + parameter_binding = protos.ParameterBinding( name=out_name, data=rpc_val) - - return param_binding \ No newline at end of file + return parameter_binding diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 8f9c35ece..cb01848b7 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -2,6 +2,8 @@ # Licensed under the MIT License. import uuid +from typing import Optional +from . import datumdef from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader @@ -10,26 +12,34 @@ class SharedMemoryManager: """ - Performs all operations related to reading/writing data from/to Shared - Memory. + Performs all operations related to reading/writing data from/to shared memory. + This is used for transferring input/output data of the function from/to the functions host over + shared memory as opposed to RPC to improve the rate of data transfer and the function's + end-to-end latency. """ def __init__(self): - self.allocated_mmaps = {} # type dict[map_name, mmap] + # The allocated memory maps are tracked here so that a reference to them is kept open until + # they have been used (e.g. if they contain a function's output, it is read by the + # functions host). + # Having a mapping of the name and the memory map is then later used to close a given + # memory map by its name, after it has been used. + # Type: dict[map_name, mmap] + self.allocated_mmaps = {} self.file_accessor = FileAccessorFactory.create_file_accessor() self.file_reader = FileReader() self.file_writer = FileWriter() def is_enabled(self) -> bool: """ - Whether supported types should be transferred between Functions host - and the worker using shared memory. + Whether supported types should be transferred between functions host and the worker using + shared memory. """ return True - def is_supported(self, datum) -> bool: + def is_supported(self, datum: datumdef.Datum) -> bool: """ - Whether the given Datum object can be transferred to the Functions host - using shared memory. + Whether the given Datum object can be transferred to the functions host using shared + memory. """ if datum.type == 'bytes': # TODO gochaudh: Check for min size config @@ -41,55 +51,59 @@ def is_supported(self, datum) -> bool: else: return False - def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: + def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: """ - Reads data from the given Memory Mapped File with the provided name, - starting at the provided offset and reading a total of count bytes. - Returns a tuple containing the binary data read from shared memory - if successful, None otherwise. + Reads data from the given memory map with the provided name, starting at the provided + offset and reading a total of count bytes. + Returns the data read from shared memory as bytes if successful, None otherwise. """ logger.info('Reading bytes from shared memory: %s', map_name) data = self.file_reader.read_content_as_bytes(map_name, offset) return data - def get_string(self, map_name: str, offset: int, count: int) -> str: + def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: + """ + Reads data from the given memory map with the provided name, starting at the provided + offset and reading a total of count bytes. + Returns the data read from shared memory as a string if successful, None otherwise. + """ logger.info('Reading string from shared memory: %s', map_name) data = self.file_reader.read_content_as_string(map_name, offset) return data - def put_bytes(self, data: bytes) -> str: + def put_bytes(self, data: bytes) -> Optional[str]: """ Writes the given bytes into shared memory. - Returns the name of the Memory Mapped File into which the data was - written if succesful, None otherwise. + Returns the name of the memory map into which the data was written if successful, None + otherwise. """ map_name = str(uuid.uuid4()) logger.info('Writing bytes to shared memory: %s', map_name) mmap = self.file_writer.create_with_content_bytes(map_name, data) - # Hold a reference to the mmap to prevent it from closing before the - # host has read it. - self.allocated_mmaps[map_name] = mmap + if mmap is not None: + self.allocated_mmaps[map_name] = mmap return map_name - def put_string(self, data: str) -> str: + def put_string(self, data: str) -> Optional[str]: """ Writes the given string into shared memory. - Returns the name of the Memory Mapped File into which the data was - written if succesful, None otherwise. + Returns the name of the memory map into which the data was written if succesful, None + otherwise. """ map_name = str(uuid.uuid4()) logger.info('Writing string to shared memory: %s', map_name) mmap = self.file_writer.create_with_content_string(map_name, data) - # Hold a reference to the mmap to prevent it from closing before the - # host has read it. - self.allocated_mmaps[map_name] = mmap + if mmap is not None: + self.allocated_mmaps[map_name] = mmap return map_name def free_map(self, map_name: str): """ + Frees the memory map and any backing resources (e.g. file in the case of Linux) associated + with it. + If there is no memory map with the given name being tracked, then no action is performed. """ if map_name in self.allocated_mmaps: mmap = self.allocated_mmaps[map_name] self.file_accessor.delete_mmap(map_name, mmap) del self.allocated_mmaps[map_name] - diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index c89657bfc..614447201 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -488,10 +488,9 @@ async def _handle__function_environment_reload_request(self, req): async def _handle__close_shared_memory_resources_request(self, req): """ - Frees any mmaps that were produced as output for a given invocation. - This is called after the Functions Host is done reading the output from the worker and + Frees any memory maps that were produced as output for a given invocation. + This is called after the functions host is done reading the output from the worker and wants the worker to free up those resources. - TODO gochaudh: Rename CloseSharedMemory* to FreeSharedMemory* and also this method name. """ try: close_request = req.close_shared_memory_resources_request diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index a937db55f..8af18ed0b 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -3,37 +3,39 @@ import abc import mmap +from typing import Optional class FileAccessor(metaclass=abc.ABCMeta): """ - TODO write docstring. + For accessing memory maps. + This is an interface that must be implemented by sub-classes to provide platform-specific + support for accessing memory maps. + Currently the following two sub-classes are implemented: + 1) FileAccessorWindows + 2) FileAccessorLinux """ - @classmethod - def __subclasshook__(cls, subclass): - return (hasattr(subclass, 'open_mmap') and - callable(subclass.load_data_source) and - hasattr(subclass, 'create_mmap') and - callable(subclass.extract_text) or - hasattr(subclass, 'delete_mmap') and - callable(subclass.extract_text) or - NotImplemented) - @abc.abstractmethod - def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_READ): - """Open an existing memory map. + def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: + """ + Opens an existing memory map. + Returns the mmap if successful, None otherwise. """ raise NotImplementedError @abc.abstractmethod def create_mmap(self, map_name: str, map_size: int): - """Create a new memory map. + """ + Creates a new memory map. + Returns the mmap if successful, None otherwise. """ raise NotImplementedError @abc.abstractmethod - def delete_mmap(self, map_name: str, mmap): - """Delete a memory map. + def delete_mmap(self, map_name: str, mem_map: mmap.mmap): + """ + Deletes the memory map and any backing resources associated with it. + If there is no memory map with the given name, then no action is performed. """ raise NotImplementedError diff --git a/azure_functions_worker/mmap_handler/file_accessor_linux.py b/azure_functions_worker/mmap_handler/file_accessor_linux.py index 34895c9d0..c8003a809 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_linux.py +++ b/azure_functions_worker/mmap_handler/file_accessor_linux.py @@ -4,42 +4,46 @@ import os import mmap import urllib.parse +from typing import Optional from .memorymappedfile_constants import MemoryMappedFileConstants as consts from .file_accessor import FileAccessor + class FileAccessorLinux(FileAccessor): """ - TODO + For accessing memory maps. + This implements the FileAccessor interface for Linux. """ - def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_READ): + def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: try: - file = self._open_mmap_file(map_name) - mmap_ret = mmap.mmap(file.fileno(), map_size, access=access) - mmap_ret.seek(0) - return mmap_ret + file = self._open_mmap_file(map_name, access) + mem_map = mmap.mmap(file.fileno(), map_size, access=access) + mem_map.seek(0) + return mem_map except Exception as e: # TODO Log Error print(e) return None - def create_mmap(self, map_name: str, map_size: int): + def create_mmap(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: file = self._create_mmap_file(map_name, map_size) mem_map = mmap.mmap(file, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) if not self._verify_new_map_created(map_name, mem_map): raise Exception("Memory map '%s' already exists" % (map_name)) return mem_map - def delete_mmap(self, map_name: str, mmap): + def delete_mmap(self, map_name: str, mem_map: mmap.mmap) -> Optional[mmap.mmap]: try: file = self._open_mmap_file(map_name) os.remove(file.name) except FileNotFoundError: # TODO log debug pass # Nothing to do if the file is not there anyway - mmap.close() + mem_map.close() def _open_mmap_file(self, map_name: str): - """Get the file descriptor of an existing memory map. + """ + Get the file descriptor of an existing memory map. """ escaped_map_name = urllib.parse.quote_plus(map_name) for mmap_temp_dir in consts.TEMP_DIRS: @@ -53,7 +57,8 @@ def _open_mmap_file(self, map_name: str): raise FileNotFoundError("File for '%s' does not exist" % (map_name)) def _create_mmap_dir(self): - """Create a directory to create memory maps. + """ + Create a directory to create memory maps. """ for mmap_temp_dir in consts.TEMP_DIRS: dirname = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) @@ -67,7 +72,8 @@ def _create_mmap_dir(self): print("Cannot create dir '%s': %s" % (dirname, str(ex))) def _create_mmap_file(self, map_name: str, map_size: int): - """Get the file descriptor for a new memory map. + """ + Get the file descriptor for a new memory map. """ escaped_map_name = urllib.parse.quote_plus(map_name) dir_exists = False diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/mmap_handler/file_accessor_windows.py index 18ac88b44..4362fd41d 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_windows.py +++ b/azure_functions_worker/mmap_handler/file_accessor_windows.py @@ -2,11 +2,16 @@ # Licensed under the MIT License. import mmap +from typing import Optional from .file_accessor import FileAccessor class FileAccessorWindows(FileAccessor): - def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_READ): + """ + For accessing memory maps. + This implements the FileAccessor interface for Windows. + """ + def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: try: mmap_ret = mmap.mmap(-1, map_size, map_name, access=access) mmap_ret.seek(0) @@ -16,8 +21,8 @@ def open_mmap(self, map_name: str, map_size: int , access: int = mmap.ACCESS_REA print(e) return None - def create_mmap(self, map_name: str, map_size: int): - # Windows creates the mmap when trying to open it + def create_mmap(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: + # Windows also creates the mmap when trying to open it, if it does not already exist. mem_map = self.open_mmap(map_name, map_size, mmap.ACCESS_WRITE) if not self._verify_new_map_created(map_name, mem_map): raise Exception("Memory map '%s' already exists" % (map_name)) diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 9977cdae4..2bccea2fa 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -4,49 +4,57 @@ import mmap import os import struct +from typing import Optional from .memorymappedfile_constants import MemoryMappedFileConstants as consts from .file_accessor_factory import FileAccessorFactory class FileReader: """ + For reading data from memory maps in shared memory. + Assumes a particular format when reading data (i.e. particular header before the content). + For writing data that could be read by the FileReader, use FileWriter. """ def __init__(self): self.file_accessor = FileAccessorFactory.create_file_accessor() - def _bytes_to_long(self, input_bytes): - """Decode a set of bytes representing a long. - This uses the format that C# uses. + def _bytes_to_long(self, input_bytes) -> int: + """ + Decode a set of bytes representing a long. + This uses the format that the functions host (i.e. C#) uses. """ return struct.unpack(" Optional[int]: + """ + Read the header of the memory map to determine the length of content contained in that + memory map. + Returns the content length as a non-negative integer if successful, None otherwise. """ try: map_content_length = self.file_accessor.open_mmap( map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) except FileNotFoundError: - return -1 + return None if map_content_length is None: - return -1 + return None try: header_bytes = map_content_length.read(consts.CONTENT_HEADER_TOTAL_BYTES) content_length = self._bytes_to_long(header_bytes) return content_length except ValueError as value_error: print("Cannot get content length for memory map '%s': %s" % (map_name, value_error)) - return 0 + return None finally: map_content_length.close() - def read_content_as_bytes(self, map_name: str, content_offset: int = 0): - """Read content from a memory mapped file as bytes. + def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optional[bytes]: + """ + Read content from the memory map with the given name and starting at the given offset. + Returns the content as bytes if successful, None otherwise. """ content_length = self._get_content_length(map_name) - if content_length < 0: + if content_length is None: return None map_length = content_length + consts.CONTENT_HEADER_TOTAL_BYTES try: @@ -68,8 +76,10 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0): # If we cannot get the content return None return None - def read_content_as_string(self, map_name: str, content_offset: int = 0): - """Read content from a memory mapped file as a string. + def read_content_as_string(self, map_name: str, content_offset: int = 0) -> Optional[str]: + """ + Read content from the memory map with the given name and starting at the given offset. + Returns the content as a string if successful, None otherwise. """ content_bytes = self.read_content_as_bytes(map_name, content_offset) if content_bytes is None: diff --git a/azure_functions_worker/mmap_handler/file_writer.py b/azure_functions_worker/mmap_handler/file_writer.py index d4aa0a78f..71d98d0f6 100644 --- a/azure_functions_worker/mmap_handler/file_writer.py +++ b/azure_functions_worker/mmap_handler/file_writer.py @@ -11,11 +11,19 @@ class FileWriter: """ + For writing data into memory maps in shared memory. + Follows a particular format for writing data (i.e. particular header before appending the + content). + For reading data as written by the FileWriter, use the FileReader class. """ def __init__(self): self.file_accessor = FileAccessorFactory.create_file_accessor() def create_with_content_bytes(self, map_name: str, content: bytes) -> Optional[mmap.mmap]: + """ + Create a new memory map with the given name and content (as bytes). + Returns the newly created memory map if successful, None otherwise. + """ if content is None: return None content_size = len(content) @@ -28,6 +36,10 @@ def create_with_content_bytes(self, map_name: str, content: bytes) -> Optional[m return mem_map def create_with_content_string(self, map_name: str, content: str) -> Optional[mmap.mmap]: + """ + Create a new memory map with the given name and content (as a string). + Returns the newly created memory map if successful, None otherwise. + """ if content is None: return None content_bytes = content.encode('utf-8') From 3ac5ef227f02067e48f59934b109aa4352e16c2f Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 13:33:24 -1000 Subject: [PATCH 14/76] Updating CloseSharedMemoryResourcesResponse usage --- azure_functions_worker/bindings/datumdef.py | 19 +++++---- azure_functions_worker/bindings/meta.py | 2 +- .../bindings/shared_memory_manager.py | 42 +++++++++++-------- azure_functions_worker/dispatcher.py | 32 ++++++-------- .../mmap_handler/file_accessor.py | 7 ++-- .../mmap_handler/file_accessor_linux.py | 39 ++++++++--------- .../mmap_handler/file_accessor_windows.py | 11 ++--- .../mmap_handler/file_reader.py | 19 ++++++--- .../mmap_handler/file_writer.py | 2 +- tests/endtoend/blob_functions/ping/main.py | 7 ++++ .../endtoend/cosmosdb_functions/ping/main.py | 7 ++++ .../endtoend/eventgrid_functions/ping/main.py | 7 ++++ .../eventhub_batch_functions/ping/main.py | 7 ++++ .../endtoend/eventhub_functions/ping/main.py | 7 ++++ tests/endtoend/queue_functions/ping/main.py | 7 ++++ .../servicebus_functions/ping/main.py | 7 ++++ tests/endtoend/table_functions/ping/main.py | 7 ++++ tests/unittests/http_functions/ping/main.py | 7 ++++ tests/unittests/load_functions/ping/main.py | 7 ++++ .../log_filtering_functions/ping/main.py | 7 ++++ 20 files changed, 170 insertions(+), 80 deletions(-) create mode 100644 tests/endtoend/blob_functions/ping/main.py create mode 100644 tests/endtoend/cosmosdb_functions/ping/main.py create mode 100644 tests/endtoend/eventgrid_functions/ping/main.py create mode 100644 tests/endtoend/eventhub_batch_functions/ping/main.py create mode 100644 tests/endtoend/eventhub_functions/ping/main.py create mode 100644 tests/endtoend/queue_functions/ping/main.py create mode 100644 tests/endtoend/servicebus_functions/ping/main.py create mode 100644 tests/endtoend/table_functions/ping/main.py create mode 100644 tests/unittests/http_functions/ping/main.py create mode 100644 tests/unittests/load_functions/ping/main.py create mode 100644 tests/unittests/log_filtering_functions/ping/main.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 61f67939f..df390bef5 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -1,11 +1,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations from typing import Any from typing import Optional import json from .. import protos -from .shared_memory_manager import SharedMemoryManager class Datum: @@ -114,13 +114,14 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared return cls(val, 'string') return None - def to_rpc_shared_memory(self, shmem_mgr: SharedMemoryManager) -> protos.RpcSharedMemory: + @classmethod + def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> protos.RpcSharedMemory: """ Writes the given value to shared memory and returns the corresponding RpcSharedMemory object which can be sent back to the functions host over RPC. """ - if self.type == 'bytes': - value = self.value + if datum.type == 'bytes': + value = datum.value map_name = shmem_mgr.put_bytes(value) if map_name is not None: shmem = protos.RpcSharedMemory( @@ -131,11 +132,11 @@ def to_rpc_shared_memory(self, shmem_mgr: SharedMemoryManager) -> protos.RpcShar return shmem else: raise Exception( - f'cannot write datum value (type: {self.type}) into ' + f'cannot write datum value (type: {datum.type}) into ' f'shared memory (name: {map_name})' ) - elif self.type == 'string': - value = self.value + elif datum.type == 'string': + value = datum.value map_name = shmem_mgr.put_string(value) if map_name is not None: shmem = protos.RpcSharedMemory( @@ -146,12 +147,12 @@ def to_rpc_shared_memory(self, shmem_mgr: SharedMemoryManager) -> protos.RpcShar return shmem else: raise Exception( - f'cannot write datum value (type: {self.type}) into ' + f'cannot write datum value (type: {datum.type}) into ' f'shared memory (name: {map_name})' ) else: raise NotImplementedError( - f'unsupported datum type ({self.type}) for shared memory' + f'unsupported datum type ({datum.type}) for shared memory' ) def datum_as_proto(datum: Datum) -> protos.TypedData: diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 4be97c19f..c27fb9ea4 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -126,7 +126,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, parameter_binding = None # If shared memory is enabled, try to transfer to host over shared memory if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): - shared_mem_value = datum.to_rpc_shared_memory(shmem_mgr) + shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) if shared_mem_value is not None: # Check if data was transferred over shared memory. # If it was, then use the rpc_shared_memory field in the response message. diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index cb01848b7..5ac62a0ba 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -1,9 +1,10 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import uuid +from typing import Dict from typing import Optional -from . import datumdef from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader @@ -23,8 +24,8 @@ def __init__(self): # functions host). # Having a mapping of the name and the memory map is then later used to close a given # memory map by its name, after it has been used. - # Type: dict[map_name, mmap] - self.allocated_mmaps = {} + # key: map_name, val: mmap.mmap + self.allocated_mem_maps: Dict[str, mmap.mmap] = {} self.file_accessor = FileAccessorFactory.create_file_accessor() self.file_reader = FileReader() self.file_writer = FileWriter() @@ -36,7 +37,7 @@ def is_enabled(self) -> bool: """ return True - def is_supported(self, datum: datumdef.Datum) -> bool: + def is_supported(self, datum: Datum) -> bool: """ Whether the given Datum object can be transferred to the functions host using shared memory. @@ -58,7 +59,7 @@ def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: Returns the data read from shared memory as bytes if successful, None otherwise. """ logger.info('Reading bytes from shared memory: %s', map_name) - data = self.file_reader.read_content_as_bytes(map_name, offset) + data = self.file_reader.read_content_as_bytes(map_name, offset, count) return data def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: @@ -68,7 +69,7 @@ def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: Returns the data read from shared memory as a string if successful, None otherwise. """ logger.info('Reading string from shared memory: %s', map_name) - data = self.file_reader.read_content_as_string(map_name, offset) + data = self.file_reader.read_content_as_string(map_name, offset, count) return data def put_bytes(self, data: bytes) -> Optional[str]: @@ -79,9 +80,9 @@ def put_bytes(self, data: bytes) -> Optional[str]: """ map_name = str(uuid.uuid4()) logger.info('Writing bytes to shared memory: %s', map_name) - mmap = self.file_writer.create_with_content_bytes(map_name, data) - if mmap is not None: - self.allocated_mmaps[map_name] = mmap + mem_map = self.file_writer.create_with_content_bytes(map_name, data) + if mem_map is not None: + self.allocated_mem_maps[map_name] = mem_map return map_name def put_string(self, data: str) -> Optional[str]: @@ -92,18 +93,25 @@ def put_string(self, data: str) -> Optional[str]: """ map_name = str(uuid.uuid4()) logger.info('Writing string to shared memory: %s', map_name) - mmap = self.file_writer.create_with_content_string(map_name, data) - if mmap is not None: - self.allocated_mmaps[map_name] = mmap + mem_map = self.file_writer.create_with_content_string(map_name, data) + if mem_map is not None: + self.allocated_mem_maps[map_name] = mem_map return map_name - def free_map(self, map_name: str): + def free_mem_map(self, map_name: str): """ Frees the memory map and any backing resources (e.g. file in the case of Linux) associated with it. If there is no memory map with the given name being tracked, then no action is performed. + Returns True if the memory map was freed successfully, False otherwise. """ - if map_name in self.allocated_mmaps: - mmap = self.allocated_mmaps[map_name] - self.file_accessor.delete_mmap(map_name, mmap) - del self.allocated_mmaps[map_name] + if map_name not in self.allocated_mem_maps: + # TODO Log Error + return False + mem_map = self.allocated_mem_maps[map_name] + success = self.file_accessor.delete_mem_map(map_name, mem_map) + del self.allocated_mem_maps[map_name] + if not success: + # TODO Log Error + return False + return True diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 614447201..03f2f3da1 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -492,30 +492,24 @@ async def _handle__close_shared_memory_resources_request(self, req): This is called after the functions host is done reading the output from the worker and wants the worker to free up those resources. """ - try: - close_request = req.close_shared_memory_resources_request + close_request = req.close_shared_memory_resources_request + map_names = close_request.map_names + # Assign default value of False to all result values. + # If we are successfully able to close a memory map, its result will be set to True. + results = {map_name: False for map_name in map_names} - map_names = close_request.map_names + try: for map_name in map_names: - self._shmem_mgr.free_map(map_name) - - success_response = protos.CloseSharedMemoryResourcesResponse( - result=protos.StatusResult( - status=protos.StatusResult.Success)) - - return protos.StreamingMessage( - request_id=self.request_id, - close_shared_memory_resources_response=success_response) - + success = self._shmem_mgr.free_mem_map(map_name) + results[map_name] = success except Exception as ex: - failure_response = protos.CloseSharedMemoryResourcesResponse( - result=protos.StatusResult( - status=protos.StatusResult.Failure, - exception=self._serialize_exception(ex))) - + # TODO log exception + print(str(ex)) + finally: + response = protos.CloseSharedMemoryResourcesResponse(close_map_results=results) return protos.StreamingMessage( request_id=self.request_id, - close_shared_memory_resources_response=failure_response) + close_shared_memory_resources_response=response) @disable_feature_by(constants.PYTHON_ROLLBACK_CWD_PATH) def _change_cwd(self, new_cwd: str): diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index 8af18ed0b..7a06c9876 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -16,7 +16,7 @@ class FileAccessor(metaclass=abc.ABCMeta): 2) FileAccessorLinux """ @abc.abstractmethod - def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: + def open_mem_map(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: """ Opens an existing memory map. Returns the mmap if successful, None otherwise. @@ -24,7 +24,7 @@ def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap raise NotImplementedError @abc.abstractmethod - def create_mmap(self, map_name: str, map_size: int): + def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: """ Creates a new memory map. Returns the mmap if successful, None otherwise. @@ -32,10 +32,11 @@ def create_mmap(self, map_name: str, map_size: int): raise NotImplementedError @abc.abstractmethod - def delete_mmap(self, map_name: str, mem_map: mmap.mmap): + def delete_mem_map(self, map_name: str, mem_map: mmap.mmap) -> bool: """ Deletes the memory map and any backing resources associated with it. If there is no memory map with the given name, then no action is performed. + Returns True if the memory map was successfully deleted, False otherwise. """ raise NotImplementedError diff --git a/azure_functions_worker/mmap_handler/file_accessor_linux.py b/azure_functions_worker/mmap_handler/file_accessor_linux.py index c8003a809..0f578739f 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_linux.py +++ b/azure_functions_worker/mmap_handler/file_accessor_linux.py @@ -14,10 +14,10 @@ class FileAccessorLinux(FileAccessor): For accessing memory maps. This implements the FileAccessor interface for Linux. """ - def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: + def open_mem_map(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: try: - file = self._open_mmap_file(map_name, access) - mem_map = mmap.mmap(file.fileno(), map_size, access=access) + fd = self._open_mem_map_file(map_name) + mem_map = mmap.mmap(fd.fileno(), map_size, access=access) mem_map.seek(0) return mem_map except Exception as e: @@ -25,23 +25,24 @@ def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap print(e) return None - def create_mmap(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: - file = self._create_mmap_file(map_name, map_size) - mem_map = mmap.mmap(file, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) + def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: + fd = self._create_mem_map_file(map_name, map_size) + mem_map = mmap.mmap(fd, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) if not self._verify_new_map_created(map_name, mem_map): raise Exception("Memory map '%s' already exists" % (map_name)) return mem_map - def delete_mmap(self, map_name: str, mem_map: mmap.mmap) -> Optional[mmap.mmap]: + def delete_mem_map(self, map_name: str, mem_map: mmap.mmap) -> bool: try: - file = self._open_mmap_file(map_name) - os.remove(file.name) + fd = self._open_mem_map_file(map_name) + os.remove(fd.name) except FileNotFoundError: # TODO log debug - pass # Nothing to do if the file is not there anyway + return False mem_map.close() + return True - def _open_mmap_file(self, map_name: str): + def _open_mem_map_file(self, map_name: str): """ Get the file descriptor of an existing memory map. """ @@ -49,14 +50,14 @@ def _open_mmap_file(self, map_name: str): for mmap_temp_dir in consts.TEMP_DIRS: filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) try: - file = open(filename, "r+b") - return file + fd = open(filename, "r+b") + return fd except FileNotFoundError: # TODO log debug pass raise FileNotFoundError("File for '%s' does not exist" % (map_name)) - def _create_mmap_dir(self): + def _create_mem_map_dir(self): """ Create a directory to create memory maps. """ @@ -71,7 +72,7 @@ def _create_mmap_dir(self): except Exception as ex: print("Cannot create dir '%s': %s" % (dirname, str(ex))) - def _create_mmap_file(self, map_name: str, map_size: int): + def _create_mem_map_file(self, map_name: str, map_size: int): """ Get the file descriptor for a new memory map. """ @@ -89,18 +90,18 @@ def _create_mmap_file(self, map_name: str, map_size: int): dir_exists = True # Check if any of the parent directories exists if not dir_exists: - self._create_mmap_dir() + self._create_mem_map_dir() # Create the file for mmap_temp_dir in consts.TEMP_DIRS: filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) try: - file = os.open(filename, os.O_CREAT | os.O_TRUNC | os.O_RDWR) + fd = os.open(filename, os.O_CREAT | os.O_TRUNC | os.O_RDWR) # Write 0s to allocate - bytes_written = os.write(file, b'\x00' * map_size) + bytes_written = os.write(fd, b'\x00' * map_size) if bytes_written != map_size: print("Cannot write 0s into new memory map file '%s': %d != %d" % (filename, bytes_written, map_size)) - return file + return fd except Exception as ex: print("Cannot create memory map file '%s': %s" % (filename, ex)) raise Exception("Cannot create memory map file for '%s'" % (map_name)) \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/mmap_handler/file_accessor_windows.py index 4362fd41d..c8c3909d1 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_windows.py +++ b/azure_functions_worker/mmap_handler/file_accessor_windows.py @@ -11,7 +11,7 @@ class FileAccessorWindows(FileAccessor): For accessing memory maps. This implements the FileAccessor interface for Windows. """ - def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: + def open_mem_map(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: try: mmap_ret = mmap.mmap(-1, map_size, map_name, access=access) mmap_ret.seek(0) @@ -21,12 +21,13 @@ def open_mmap(self, map_name: str, map_size: int , access: int) -> Optional[mmap print(e) return None - def create_mmap(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: + def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: # Windows also creates the mmap when trying to open it, if it does not already exist. - mem_map = self.open_mmap(map_name, map_size, mmap.ACCESS_WRITE) + mem_map = self.open_mem_map(map_name, map_size, mmap.ACCESS_WRITE) if not self._verify_new_map_created(map_name, mem_map): raise Exception("Memory map '%s' already exists" % (map_name)) return mem_map - def delete_mmap(self, map_name: str, mmap): - mmap.close() \ No newline at end of file + def delete_mem_map(self, map_name: str, mmap) -> bool: + mmap.close() + return True \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 2bccea2fa..65e022180 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -32,7 +32,7 @@ def _get_content_length(self, map_name) -> Optional[int]: Returns the content length as a non-negative integer if successful, None otherwise. """ try: - map_content_length = self.file_accessor.open_mmap( + map_content_length = self.file_accessor.open_mem_map( map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) except FileNotFoundError: return None @@ -48,9 +48,11 @@ def _get_content_length(self, map_name) -> Optional[int]: finally: map_content_length.close() - def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optional[bytes]: + def read_content_as_bytes(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[bytes]: """ Read content from the memory map with the given name and starting at the given offset. + content_offset = 0 means read from the beginning of the content. + bytes_to_read = 0 means read the entire content. Returns the content as bytes if successful, None otherwise. """ content_length = self._get_content_length(map_name) @@ -58,13 +60,18 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optio return None map_length = content_length + consts.CONTENT_HEADER_TOTAL_BYTES try: - map_content = self.file_accessor.open_mmap(map_name, map_length, mmap.ACCESS_READ) + map_content = self.file_accessor.open_mem_map(map_name, map_length, mmap.ACCESS_READ) if map_content is not None: try: map_content.seek(consts.CONTENT_HEADER_TOTAL_BYTES) if content_offset > 0: map_content.seek(content_offset, os.SEEK_CUR) - content = map_content.read() + if bytes_to_read > 0: + # Read up to the specified number of bytes to read + content = map_content.read(bytes_to_read) + else: + # Read the entire content + content = map_content.read() return content except ValueError as value_error: print("Cannot get content for memory map '%s': %s" % (map_name, value_error)) @@ -76,12 +83,12 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optio # If we cannot get the content return None return None - def read_content_as_string(self, map_name: str, content_offset: int = 0) -> Optional[str]: + def read_content_as_string(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[str]: """ Read content from the memory map with the given name and starting at the given offset. Returns the content as a string if successful, None otherwise. """ - content_bytes = self.read_content_as_bytes(map_name, content_offset) + content_bytes = self.read_content_as_bytes(map_name, content_offset, bytes_to_read) if content_bytes is None: return None content_str = content_bytes.decode('utf-8') diff --git a/azure_functions_worker/mmap_handler/file_writer.py b/azure_functions_worker/mmap_handler/file_writer.py index 71d98d0f6..5048279e6 100644 --- a/azure_functions_worker/mmap_handler/file_writer.py +++ b/azure_functions_worker/mmap_handler/file_writer.py @@ -28,7 +28,7 @@ def create_with_content_bytes(self, map_name: str, content: bytes) -> Optional[m return None content_size = len(content) map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size - mem_map = self.file_accessor.create_mmap(map_name, map_size) + mem_map = self.file_accessor.create_mem_map(map_name, map_size) content_size_bytes = content_size.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) mem_map.write(content_size_bytes) mem_map.write(content) diff --git a/tests/endtoend/blob_functions/ping/main.py b/tests/endtoend/blob_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/blob_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/cosmosdb_functions/ping/main.py b/tests/endtoend/cosmosdb_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/cosmosdb_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/eventgrid_functions/ping/main.py b/tests/endtoend/eventgrid_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/eventgrid_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/eventhub_batch_functions/ping/main.py b/tests/endtoend/eventhub_batch_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/eventhub_batch_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/eventhub_functions/ping/main.py b/tests/endtoend/eventhub_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/eventhub_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/queue_functions/ping/main.py b/tests/endtoend/queue_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/queue_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/servicebus_functions/ping/main.py b/tests/endtoend/servicebus_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/servicebus_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/table_functions/ping/main.py b/tests/endtoend/table_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/table_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/unittests/http_functions/ping/main.py b/tests/unittests/http_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/unittests/http_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/unittests/load_functions/ping/main.py b/tests/unittests/load_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/unittests/load_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/unittests/log_filtering_functions/ping/main.py b/tests/unittests/log_filtering_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/unittests/log_filtering_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return From 6f2160fb9280bb72df0d4fee099f74044cdc7f90 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 13:36:43 -1000 Subject: [PATCH 15/76] Fixing accidental changes to tests/* --- tests/endtoend/blob_functions/ping/main.py | 7 ------- tests/endtoend/cosmosdb_functions/ping/main.py | 7 ------- tests/endtoend/eventgrid_functions/ping/main.py | 7 ------- tests/endtoend/eventhub_batch_functions/ping/main.py | 7 ------- tests/endtoend/eventhub_functions/ping/main.py | 7 ------- tests/endtoend/queue_functions/ping/main.py | 7 ------- tests/endtoend/servicebus_functions/ping/main.py | 7 ------- tests/endtoend/table_functions/ping/main.py | 7 ------- tests/unittests/http_functions/ping/main.py | 7 ------- tests/unittests/load_functions/ping/main.py | 7 ------- tests/unittests/log_filtering_functions/ping/main.py | 7 ------- 11 files changed, 77 deletions(-) delete mode 100644 tests/endtoend/blob_functions/ping/main.py delete mode 100644 tests/endtoend/cosmosdb_functions/ping/main.py delete mode 100644 tests/endtoend/eventgrid_functions/ping/main.py delete mode 100644 tests/endtoend/eventhub_batch_functions/ping/main.py delete mode 100644 tests/endtoend/eventhub_functions/ping/main.py delete mode 100644 tests/endtoend/queue_functions/ping/main.py delete mode 100644 tests/endtoend/servicebus_functions/ping/main.py delete mode 100644 tests/endtoend/table_functions/ping/main.py delete mode 100644 tests/unittests/http_functions/ping/main.py delete mode 100644 tests/unittests/load_functions/ping/main.py delete mode 100644 tests/unittests/log_filtering_functions/ping/main.py diff --git a/tests/endtoend/blob_functions/ping/main.py b/tests/endtoend/blob_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/blob_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/cosmosdb_functions/ping/main.py b/tests/endtoend/cosmosdb_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/cosmosdb_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/eventgrid_functions/ping/main.py b/tests/endtoend/eventgrid_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/eventgrid_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/eventhub_batch_functions/ping/main.py b/tests/endtoend/eventhub_batch_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/eventhub_batch_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/eventhub_functions/ping/main.py b/tests/endtoend/eventhub_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/eventhub_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/queue_functions/ping/main.py b/tests/endtoend/queue_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/queue_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/servicebus_functions/ping/main.py b/tests/endtoend/servicebus_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/servicebus_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/table_functions/ping/main.py b/tests/endtoend/table_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/table_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/unittests/http_functions/ping/main.py b/tests/unittests/http_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/unittests/http_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/unittests/load_functions/ping/main.py b/tests/unittests/load_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/unittests/load_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/unittests/log_filtering_functions/ping/main.py b/tests/unittests/log_filtering_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/unittests/log_filtering_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return From 93981659c8b88e555deeea5a1929ee2d73268855 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 15:44:00 -1000 Subject: [PATCH 16/76] Addressing comments --- azure_functions_worker/bindings/datumdef.py | 24 +++++-------------- azure_functions_worker/bindings/meta.py | 4 +++- .../bindings/shared_memory_manager.py | 3 +-- .../mmap_handler/__init__.py | 7 ++++++ .../mmap_handler/file_accessor.py | 3 ++- .../memorymappedfile_constants.py | 14 +++++++---- 6 files changed, 29 insertions(+), 26 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index df390bef5..abfa373f2 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -130,11 +130,6 @@ def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> p count=len(value), type=protos.RpcDataType.bytes) return shmem - else: - raise Exception( - f'cannot write datum value (type: {datum.type}) into ' - f'shared memory (name: {map_name})' - ) elif datum.type == 'string': value = datum.value map_name = shmem_mgr.put_string(value) @@ -145,15 +140,9 @@ def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> p count=len(value), type=protos.RpcDataType.string) return shmem - else: - raise Exception( - f'cannot write datum value (type: {datum.type}) into ' - f'shared memory (name: {map_name})' - ) - else: - raise NotImplementedError( - f'unsupported datum type ({datum.type}) for shared memory' - ) + raise NotImplementedError( + f'unsupported datum type ({datum.type}) for shared memory' + ) def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': @@ -172,7 +161,6 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: enable_content_negotiation=False, body=datum_as_proto(datum.value['body']), )) - else: - raise NotImplementedError( - 'unexpected Datum type: {!r}'.format(datum.type) - ) \ No newline at end of file + raise NotImplementedError( + 'unexpected Datum type: {!r}'.format(datum.type) + ) \ No newline at end of file diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c27fb9ea4..0430efa20 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -94,8 +94,10 @@ def from_incoming_proto( def get_datum(binding: str, obj: typing.Any, pytype: typing.Optional[type]) -> datumdef.Datum: + """ + Convert an object to a datum with the specified type. + """ binding = get_binding(binding) - try: datum = binding.encode(obj, expected_type=pytype) except NotImplementedError: diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 5ac62a0ba..3ca69d553 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -49,8 +49,7 @@ def is_supported(self, datum: Datum) -> bool: return True elif datum.type == 'string': return True - else: - return False + return False def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: """ diff --git a/azure_functions_worker/mmap_handler/__init__.py b/azure_functions_worker/mmap_handler/__init__.py index e69de29bb..c5b67f340 100644 --- a/azure_functions_worker/mmap_handler/__init__.py +++ b/azure_functions_worker/mmap_handler/__init__.py @@ -0,0 +1,7 @@ +""" +This module provides functionality for accessing shared memory maps. +These are used for transferring data between functions host and the worker proces. +The initial set of corresponding changes to enable shared memory maps in the functions host can be +found in the following Pull Request: +https://github.com/Azure/azure-functions-host/pull/6836 +""" \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index 7a06c9876..2a6283aa3 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -4,6 +4,7 @@ import abc import mmap from typing import Optional +from .memorymappedfile_constants import MemoryMappedFileConstants as consts class FileAccessor(metaclass=abc.ABCMeta): @@ -47,7 +48,7 @@ def _verify_new_map_created(self, map_name: str, mem_map) -> bool: mem_map.seek(0) byte_read = mem_map.read(1) is_new_mmap = False - if byte_read != b'\x00': + if byte_read != consts.ZERO_BYTE: is_new_mmap = False else: is_new_mmap = True diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py index 386186e86..c79db1e95 100644 --- a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py +++ b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py @@ -1,13 +1,19 @@ -# -*- coding: utf-8 -*- +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. -# TODO use protobuf to define these constants between C# and Python +# TODO use protobuf to define these constants between C# and Python? class MemoryMappedFileConstants: # Directories in Linux where the memory maps can be found TEMP_DIRS = ["/dev/shm"] # Suffix for the temp directories containing memory maps TEMP_DIR_SUFFIX = "AzureFunctions" - # The length of a long which is the length of the header in the content mmap + # The length of a long which is the length of the header in the content memory map CONTENT_LENGTH_NUM_BYTES = 8 # The length of the header: content length - CONTENT_HEADER_TOTAL_BYTES = CONTENT_LENGTH_NUM_BYTES \ No newline at end of file + CONTENT_HEADER_TOTAL_BYTES = CONTENT_LENGTH_NUM_BYTES + + # Zero byte. + # E.g. Used to compare the first byte of a newly created memory map against this; if it is a + # non-zero byte then the memory map was already created. + ZERO_BYTE = b'\x00' \ No newline at end of file From ded499866269254825dbf66de335a779c286121d Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 14 Oct 2020 22:37:56 +0500 Subject: [PATCH 17/76] Basic structure laid out for reading parameters from shared memory before passing to the function --- azure_functions_worker/bindings/datumdef.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index abfa373f2..0be3f5fbd 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -6,6 +6,7 @@ from typing import Optional import json from .. import protos +from .shared_memory_manager import SharedMemoryManager class Datum: @@ -85,6 +86,9 @@ def from_typed_data(cls, td: protos.TypedData): val = td.collection_string elif tt == 'collection_sint64': val = td.collection_sint64 + elif tt == 'shared_memory_data': + shared_memory_manager = SharedMemoryManager() + val, tt = shared_memory_manager.get(td.shared_memory_data) elif tt is None: return None else: From 38c5502247c809d7b3c079ffe09dab08f932e1f3 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 15 Oct 2020 13:22:06 +0500 Subject: [PATCH 18/76] Writing output from worker to Shared Memory --- azure_functions_worker/bindings/datumdef.py | 31 +++++++++++++++++-- .../bindings/shared_memory_manager.py | 29 +++++++++++++++++ .../protos/_src/src/proto/FunctionRpc.proto | 1 + 3 files changed, 58 insertions(+), 3 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 0be3f5fbd..5507362ef 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -87,8 +87,17 @@ def from_typed_data(cls, td: protos.TypedData): elif tt == 'collection_sint64': val = td.collection_sint64 elif tt == 'shared_memory_data': - shared_memory_manager = SharedMemoryManager() - val, tt = shared_memory_manager.get(td.shared_memory_data) + shmem_mgr = SharedMemoryManager() + shmem_data = td.shared_memory_data + mmap_name = shmem_data.memory_mapped_file_name + offset = shmem_data.offset + count = shmem_data.count + ret = shmem_mgr.get(mmap_name, offset, count) + if ret is None: + return None + else: + val = ret + tt = shmem_data.type elif tt is None: return None else: @@ -152,7 +161,23 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': - return protos.TypedData(bytes=datum.value) + if SharedMemoryManager.is_enabled(): + shmem_mgr = SharedMemoryManager() + value = datum.value + mmap_name = shmem_mgr.put(value) + if mmap_name is not None: + shmem_data = protos.SharedMemoryData( + memory_mapped_file_name=mmap_name, + offset=0, + count=len(value), + type='bytes') + return protos.TypedData(shared_memory_data=shmem_data) + else: + raise Exception( + 'cannot write datum value into Shared Memory' + ) + else: + return protos.TypedData(bytes=datum.value) elif datum.type == 'json': return protos.TypedData(json=datum.value) elif datum.type == 'http': diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 3ca69d553..0e36c46ec 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -97,6 +97,7 @@ def put_string(self, data: str) -> Optional[str]: self.allocated_mem_maps[map_name] = mem_map return map_name +<<<<<<< HEAD def free_mem_map(self, map_name: str): """ Frees the memory map and any backing resources (e.g. file in the case of Linux) associated @@ -114,3 +115,31 @@ def free_mem_map(self, map_name: str): # TODO Log Error return False return True +======= + @staticmethod + def is_enabled(): + """ + Whether supported types should be transferred between Functions host + and the worker using Shared Memory. + """ + return True + + def get(self, mmap_name: str, offset: int, count: int) -> (bytes): + """ + Reads data from the given Memory Mapped File with the provided name, + starting at the provided offset and reading a total of count bytes. + Returns a tuple containing the binary data read from Shared Memory + if successful, None otherwise. + """ + logger.info('Reading from Shared Memory: %s', mmap_name) + return 'foo'.encode('utf-8') + + def put(self, data: bytes) -> (str): + """ + Writes the given data into Shared Memory. + Returns the name of the Memory Mapped File into which the data was + written if succesful, None otherwise. + """ + mmap_name = str(uuid.uuid4()) + return mmap_name +>>>>>>> Writing output from worker to Shared Memory diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index 403156e24..f88bb1209 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -330,6 +330,7 @@ message TypedData { CollectionString collection_string = 9; CollectionDouble collection_double = 10; CollectionSInt64 collection_sint64 = 11; + SharedMemoryData shared_memory_data = 12; } } From 9a15243cd3f86bf4e57f4bd235797b035be297cb Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 15 Oct 2020 16:43:59 +0500 Subject: [PATCH 19/76] Put output from worker into Shared Memory --- .../bindings/shared_memory_manager.py | 32 ++----------------- 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 0e36c46ec..9bb70c2af 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -11,6 +11,7 @@ from ..mmap_handler.file_accessor_factory import FileAccessorFactory + class SharedMemoryManager: """ Performs all operations related to reading/writing data from/to shared memory. @@ -97,7 +98,6 @@ def put_string(self, data: str) -> Optional[str]: self.allocated_mem_maps[map_name] = mem_map return map_name -<<<<<<< HEAD def free_mem_map(self, map_name: str): """ Frees the memory map and any backing resources (e.g. file in the case of Linux) associated @@ -114,32 +114,4 @@ def free_mem_map(self, map_name: str): if not success: # TODO Log Error return False - return True -======= - @staticmethod - def is_enabled(): - """ - Whether supported types should be transferred between Functions host - and the worker using Shared Memory. - """ - return True - - def get(self, mmap_name: str, offset: int, count: int) -> (bytes): - """ - Reads data from the given Memory Mapped File with the provided name, - starting at the provided offset and reading a total of count bytes. - Returns a tuple containing the binary data read from Shared Memory - if successful, None otherwise. - """ - logger.info('Reading from Shared Memory: %s', mmap_name) - return 'foo'.encode('utf-8') - - def put(self, data: bytes) -> (str): - """ - Writes the given data into Shared Memory. - Returns the name of the Memory Mapped File into which the data was - written if succesful, None otherwise. - """ - mmap_name = str(uuid.uuid4()) - return mmap_name ->>>>>>> Writing output from worker to Shared Memory + return True \ No newline at end of file From fa0931d566002e5732267e6e1efc30b8af3e3264 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 16 Oct 2020 15:14:56 +0500 Subject: [PATCH 20/76] Free shared memory resources after use --- azure_functions_worker/bindings/datumdef.py | 14 ++--- azure_functions_worker/bindings/meta.py | 13 +++- .../bindings/shared_memory_manager.py | 2 +- azure_functions_worker/dispatcher.py | 3 +- .../mmap_handler/file_accessor.py | 2 +- .../memorymappedfile_constants.py | 2 +- .../memorymappedfile_controlflags.py | 60 +++++++++++++++++++ 7 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 5507362ef..2c1a41640 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -53,7 +53,7 @@ def __repr__(self): return ''.format(self.type, val_repr) @classmethod - def from_typed_data(cls, td: protos.TypedData): + def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): tt = td.WhichOneof('data') if tt == 'http': http = td.http @@ -64,7 +64,7 @@ def from_typed_data(cls, td: protos.TypedData): k: Datum(v, 'string') for k, v in http.headers.items() }, body=( - Datum.from_typed_data(http.body) + Datum.from_typed_data(http.body, shmem_mgr) or Datum(type='bytes', value=b'') ), params={ @@ -87,7 +87,6 @@ def from_typed_data(cls, td: protos.TypedData): elif tt == 'collection_sint64': val = td.collection_sint64 elif tt == 'shared_memory_data': - shmem_mgr = SharedMemoryManager() shmem_data = td.shared_memory_data mmap_name = shmem_data.memory_mapped_file_name offset = shmem_data.offset @@ -157,14 +156,14 @@ def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> p f'unsupported datum type ({datum.type}) for shared memory' ) -def datum_as_proto(datum: Datum) -> protos.TypedData: +def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': if SharedMemoryManager.is_enabled(): - shmem_mgr = SharedMemoryManager() value = datum.value - mmap_name = shmem_mgr.put(value) + mmap_name = shmem_mgr.put(value, invocation_id) if mmap_name is not None: shmem_data = protos.SharedMemoryData( memory_mapped_file_name=mmap_name, @@ -188,7 +187,8 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: for k, v in datum.value['headers'].items() }, enable_content_negotiation=False, - body=datum_as_proto(datum.value['body']), + body=datum_as_proto(datum.value['body'], shmem_mgr, + invocation_id), )) raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 0430efa20..69eaa7025 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -63,7 +63,7 @@ def from_incoming_proto( binding = get_binding(binding) if trigger_metadata: metadata = { - k: datumdef.Datum.from_typed_data(v) + k: datumdef.Datum.from_typed_data(v, shmem_mgr) for k, v in trigger_metadata.items() } else: @@ -92,11 +92,18 @@ def from_incoming_proto( f'and expected binding type {binding}') +<<<<<<< HEAD def get_datum(binding: str, obj: typing.Any, pytype: typing.Optional[type]) -> datumdef.Datum: """ Convert an object to a datum with the specified type. """ +======= +def to_outgoing_proto(binding: str, obj: typing.Any, *, + pytype: typing.Optional[type], + shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.TypedData: +>>>>>>> Free shared memory resources after use binding = get_binding(binding) try: datum = binding.encode(obj, expected_type=pytype) @@ -108,6 +115,7 @@ def get_datum(binding: str, obj: typing.Any, f'Python type "{type(obj).__name__}"') return datum +<<<<<<< HEAD def to_outgoing_proto(binding: str, obj: typing.Any, *, pytype: typing.Optional[type]) -> protos.TypedData: @@ -142,3 +150,6 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, name=out_name, data=rpc_val) return parameter_binding +======= + return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) +>>>>>>> Free shared memory resources after use diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 9bb70c2af..91a5a2f29 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -114,4 +114,4 @@ def free_mem_map(self, map_name: str): if not success: # TODO Log Error return False - return True \ No newline at end of file + return True diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 03f2f3da1..d1b97d972 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -401,7 +401,8 @@ async def _handle__invocation_request(self, req): if fi.return_type is not None: return_value = bindings.to_outgoing_proto( fi.return_type.binding_name, call_result, - pytype=fi.return_type.pytype) + pytype=fi.return_type.pytype, + shmem_mgr=self._shmem_mgr, invocation_id=invocation_id) # Actively flush customer print() function to console sys.stdout.flush() diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index 2a6283aa3..df87e559d 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -53,4 +53,4 @@ def _verify_new_map_created(self, map_name: str, mem_map) -> bool: else: is_new_mmap = True mem_map.seek(0) - return is_new_mmap \ No newline at end of file + return is_new_mmap diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py index c79db1e95..8e3c6919a 100644 --- a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py +++ b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py @@ -16,4 +16,4 @@ class MemoryMappedFileConstants: # Zero byte. # E.g. Used to compare the first byte of a newly created memory map against this; if it is a # non-zero byte then the memory map was already created. - ZERO_BYTE = b'\x00' \ No newline at end of file + ZERO_BYTE = b'\x00' diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py b/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py new file mode 100644 index 000000000..fbbfbe721 --- /dev/null +++ b/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +import enum + + +class MemoryMappedFileControlFlags(enum.Enum): + """Flag to indicate state of memory mapped file. + Note: Must be kept in sync with the DotNet runtime version of this: + TODO path to MemStore constants + """ + UNKNOWN = 0 + READY_TO_READ = 1 + READY_TO_DISPOSE = 2 + WRITE_IN_PROGRESS = 3 + PENDING_READ = 4 + + +class MemoryMappedFileControlFlagsUtils: + @staticmethod + def is_available(control_flag): + if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: + return False + elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: + return True + elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: + return True + elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: + return True + else: + raise Exception("Unknown control flag: '%s'" % (control_flag)) + + @staticmethod + def is_readable(control_flag): + if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: + return False + elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: + return False + elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: + return True + elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: + return False + elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: + return False + else: + raise Exception("Unknown control flag: '%s'" % (control_flag)) + + @staticmethod + def is_disposable(control_flag): + if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: + return False + elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: + return False + elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: + return False + elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: + return True + elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: + return False + else: + raise Exception("Unknown control flag: '%s'" % (control_flag)) From b469f2a3e0839ae48d4c93233b9d6b585750e5ca Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 19 Oct 2020 12:56:15 +0500 Subject: [PATCH 21/76] Removed control flag from mmap header --- .../mmap_handler/file_reader.py | 4 ++ .../memorymappedfile_controlflags.py | 60 ------------------- 2 files changed, 4 insertions(+), 60 deletions(-) delete mode 100644 azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 65e022180..138504db2 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -6,7 +6,11 @@ import struct from typing import Optional from .memorymappedfile_constants import MemoryMappedFileConstants as consts +<<<<<<< HEAD from .file_accessor_factory import FileAccessorFactory +======= +from .file_accessor import FileAccessor +>>>>>>> Removed control flag from mmap header class FileReader: diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py b/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py deleted file mode 100644 index fbbfbe721..000000000 --- a/azure_functions_worker/mmap_handler/memorymappedfile_controlflags.py +++ /dev/null @@ -1,60 +0,0 @@ -# -*- coding: utf-8 -*- - -import enum - - -class MemoryMappedFileControlFlags(enum.Enum): - """Flag to indicate state of memory mapped file. - Note: Must be kept in sync with the DotNet runtime version of this: - TODO path to MemStore constants - """ - UNKNOWN = 0 - READY_TO_READ = 1 - READY_TO_DISPOSE = 2 - WRITE_IN_PROGRESS = 3 - PENDING_READ = 4 - - -class MemoryMappedFileControlFlagsUtils: - @staticmethod - def is_available(control_flag): - if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: - return False - elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: - return True - elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: - return True - elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: - return True - else: - raise Exception("Unknown control flag: '%s'" % (control_flag)) - - @staticmethod - def is_readable(control_flag): - if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: - return False - elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: - return False - elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: - return True - elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: - return False - elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: - return False - else: - raise Exception("Unknown control flag: '%s'" % (control_flag)) - - @staticmethod - def is_disposable(control_flag): - if control_flag == MemoryMappedFileControlFlags.UNKNOWN.value: - return False - elif control_flag == MemoryMappedFileControlFlags.WRITE_IN_PROGRESS.value: - return False - elif control_flag == MemoryMappedFileControlFlags.READY_TO_READ.value: - return False - elif control_flag == MemoryMappedFileControlFlags.READY_TO_DISPOSE.value: - return True - elif control_flag == MemoryMappedFileControlFlags.PENDING_READ.value: - return False - else: - raise Exception("Unknown control flag: '%s'" % (control_flag)) From be814e25b91248603bd0fb87748f677e522c6cba Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 20 Oct 2020 11:59:13 +0500 Subject: [PATCH 22/76] Proto change --- azure_functions_worker/bindings/datumdef.py | 18 +++++++++--------- .../protos/_src/src/proto/FunctionRpc.proto | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 2c1a41640..793d75649 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -86,17 +86,17 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): val = td.collection_string elif tt == 'collection_sint64': val = td.collection_sint64 - elif tt == 'shared_memory_data': - shmem_data = td.shared_memory_data - mmap_name = shmem_data.memory_mapped_file_name - offset = shmem_data.offset - count = shmem_data.count + elif tt == 'rpc_shared_memory_info': + shmem_info = td.rpc_shared_memory_info + mmap_name = shmem_info.name + offset = shmem_info.offset + count = shmem_info.count ret = shmem_mgr.get(mmap_name, offset, count) if ret is None: return None else: val = ret - tt = shmem_data.type + tt = shmem_info.type elif tt is None: return None else: @@ -165,12 +165,12 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, value = datum.value mmap_name = shmem_mgr.put(value, invocation_id) if mmap_name is not None: - shmem_data = protos.SharedMemoryData( - memory_mapped_file_name=mmap_name, + shmem_info = protos.RpcSharedMemoryInfo( + name=mmap_name, offset=0, count=len(value), type='bytes') - return protos.TypedData(shared_memory_data=shmem_data) + return protos.TypedData(rpc_shared_memory_info=shmem_info) else: raise Exception( 'cannot write datum value into Shared Memory' diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index f88bb1209..825c937d7 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -330,7 +330,7 @@ message TypedData { CollectionString collection_string = 9; CollectionDouble collection_double = 10; CollectionSInt64 collection_sint64 = 11; - SharedMemoryData shared_memory_data = 12; + RpcSharedMemoryInfo rpc_shared_memory_info = 12; } } From a1d5d988494a546398ae38e3f89af15e6ea9cf3d Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 22 Oct 2020 15:45:05 +0500 Subject: [PATCH 23/76] Working for blob shared memory data transfer; needs clean up, comments and tests --- azure_functions_worker/bindings/datumdef.py | 32 +------ azure_functions_worker/bindings/meta.py | 89 ++++++++++--------- .../bindings/shared_memory_manager.py | 40 +++++++++ .../protos/_src/src/proto/FunctionRpc.proto | 1 - 4 files changed, 90 insertions(+), 72 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 793d75649..45da37bfd 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -53,7 +53,7 @@ def __repr__(self): return ''.format(self.type, val_repr) @classmethod - def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): + def from_typed_data(cls, td: protos.TypedData): tt = td.WhichOneof('data') if tt == 'http': http = td.http @@ -64,7 +64,7 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): k: Datum(v, 'string') for k, v in http.headers.items() }, body=( - Datum.from_typed_data(http.body, shmem_mgr) + Datum.from_typed_data(http.body) or Datum(type='bytes', value=b'') ), params={ @@ -86,17 +86,6 @@ def from_typed_data(cls, td: protos.TypedData, shmem_mgr: SharedMemoryManager): val = td.collection_string elif tt == 'collection_sint64': val = td.collection_sint64 - elif tt == 'rpc_shared_memory_info': - shmem_info = td.rpc_shared_memory_info - mmap_name = shmem_info.name - offset = shmem_info.offset - count = shmem_info.count - ret = shmem_mgr.get(mmap_name, offset, count) - if ret is None: - return None - else: - val = ret - tt = shmem_info.type elif tt is None: return None else: @@ -161,22 +150,7 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': - if SharedMemoryManager.is_enabled(): - value = datum.value - mmap_name = shmem_mgr.put(value, invocation_id) - if mmap_name is not None: - shmem_info = protos.RpcSharedMemoryInfo( - name=mmap_name, - offset=0, - count=len(value), - type='bytes') - return protos.TypedData(rpc_shared_memory_info=shmem_info) - else: - raise Exception( - 'cannot write datum value into Shared Memory' - ) - else: - return protos.TypedData(bytes=datum.value) + return protos.TypedData(bytes=datum.value) elif datum.type == 'json': return protos.TypedData(json=datum.value) elif datum.type == 'http': diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 69eaa7025..4e90ff4f8 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -60,25 +60,28 @@ def from_incoming_proto( pytype: typing.Optional[type], trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]], shmem_mgr: SharedMemoryManager) -> typing.Any: + # TODO gochaudh: + # Ideally, we should use WhichOneOf (if back compat issue is not there) + # Otherwise, a None check is not applicable as even if rpc_shared_memory is + # not set, its not None + datum = None + if pb.rpc_shared_memory.name is not '': + # Data was sent over shared memory, attempt to read + datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) + # TODO gochaudh: check trigger_metadata (try with blob triggered func) + binding = get_binding(binding) if trigger_metadata: metadata = { - k: datumdef.Datum.from_typed_data(v, shmem_mgr) + k: datumdef.Datum.from_typed_data(v) for k, v in trigger_metadata.items() } else: metadata = {} - pb_type = pb.WhichOneof('rpc_data') - if pb_type == 'rpc_shared_memory': - # Data was sent over shared memory, attempt to read - datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) - # TODO gochaudh: check trigger_metadata (try with blob triggered func) - elif pb_type == 'data': + if datum is None: val = pb.data datum = datumdef.Datum.from_typed_data(val) - else: - raise TypeError(f'Unknown ParameterBindingType: {pb_type}') try: return binding.decode(datum, trigger_metadata=metadata) @@ -92,19 +95,10 @@ def from_incoming_proto( f'and expected binding type {binding}') -<<<<<<< HEAD def get_datum(binding: str, obj: typing.Any, - pytype: typing.Optional[type]) -> datumdef.Datum: - """ - Convert an object to a datum with the specified type. - """ -======= -def to_outgoing_proto(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type], - shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.TypedData: ->>>>>>> Free shared memory resources after use + pytype: typing.Optional[type]): binding = get_binding(binding) + try: datum = binding.encode(obj, expected_type=pytype) except NotImplementedError: @@ -115,41 +109,52 @@ def to_outgoing_proto(binding: str, obj: typing.Any, *, f'Python type "{type(obj).__name__}"') return datum -<<<<<<< HEAD def to_outgoing_proto(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type]) -> protos.TypedData: + pytype: typing.Optional[type], + shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.TypedData: datum = get_datum(binding, obj, pytype) - return datumdef.datum_as_proto(datum) + return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) def to_outgoing_param_binding(binding: str, obj: typing.Any, *, pytype: typing.Optional[type], out_name: str, - shmem_mgr: SharedMemoryManager) -> protos.ParameterBinding: + shmem_mgr: SharedMemoryManager, + invocation_id: str) -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) # TODO gochaudh: IMPORTANT: Right now we set the AppSetting to disable this # However that takes impact only for data coming from host -> worker # Is there a way to check the AppSetting here so that this does not respond back # with shared memory? - shared_mem_value = None - parameter_binding = None - # If shared memory is enabled, try to transfer to host over shared memory + param_binding = None if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): - shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) - if shared_mem_value is not None: - # Check if data was transferred over shared memory. - # If it was, then use the rpc_shared_memory field in the response message. - parameter_binding = protos.ParameterBinding( - name=out_name, - rpc_shared_memory=shared_mem_value) - else: - # If data was not trasnferred over shared memory, send it as part of the response message - rpc_val = datumdef.datum_as_proto(datum) - parameter_binding = protos.ParameterBinding( + if datum.type == 'bytes': + value = datum.value + map_name = shmem_mgr.put_bytes(value, invocation_id) + if map_name is not None: + shmem = protos.RpcSharedMemory( + name=map_name, + offset=0, + count=len(value), + type=protos.RpcSharedMemoryDataType.bytes) + param_binding = protos.ParameterBinding( + name=out_name, + rpc_shared_memory=shmem) + else: + raise Exception( + 'cannot write datum value into shared memory' + ) + else: + raise Exception( + 'unsupported datum type for shared memory' + ) + + if param_binding is None: + rpc_val = datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + param_binding = protos.ParameterBinding( name=out_name, data=rpc_val) - return parameter_binding -======= - return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) ->>>>>>> Free shared memory resources after use + + return param_binding \ No newline at end of file diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 91a5a2f29..e7faaf288 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -33,6 +33,7 @@ def __init__(self): def is_enabled(self) -> bool: """ +<<<<<<< HEAD Whether supported types should be transferred between functions host and the worker using shared memory. """ @@ -71,6 +72,45 @@ def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: logger.info('Reading string from shared memory: %s', map_name) data = self.file_reader.read_content_as_string(map_name, offset, count) return data +======= + Whether supported types should be transferred between Functions host + and the worker using shared memory. + """ + return True + + def is_supported(self, datum) -> bool: + """ + Whether the given Datum object can be transferred to the Functions host + using shared memory. + """ + if datum.type == 'bytes': + # TODO gochaudh: Check for min size config + # Is there a common place to put configs shared b/w host and worker? + return True + else: + return False + + def get_bytes(self, mmap_name: str, offset: int, count: int) -> bytes: + """ + Reads data from the given Memory Mapped File with the provided name, + starting at the provided offset and reading a total of count bytes. + Returns a tuple containing the binary data read from shared memory + if successful, None otherwise. + """ + logger.info('Reading from shared memory: %s', mmap_name) + data = FileReader.read_content_as_bytes(mmap_name, offset) + return data + + def put_bytes(self, data: bytes, invocation_id: str) -> str: + """ + Writes the given data into shared memory. + Returns the name of the Memory Mapped File into which the data was + written if succesful, None otherwise. + """ + mmap_name = str(uuid.uuid4()) + logger.info('Writing to shared memory: %s', mmap_name) + mmap = FileWriter.create_with_content_bytes(mmap_name, data) +>>>>>>> Working for blob shared memory data transfer; needs clean up, comments and tests def put_bytes(self, data: bytes) -> Optional[str]: """ diff --git a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto index 825c937d7..403156e24 100644 --- a/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto +++ b/azure_functions_worker/protos/_src/src/proto/FunctionRpc.proto @@ -330,7 +330,6 @@ message TypedData { CollectionString collection_string = 9; CollectionDouble collection_double = 10; CollectionSInt64 collection_sint64 = 11; - RpcSharedMemoryInfo rpc_shared_memory_info = 12; } } From af9faebc1dbf497eace850c0420b8141257b51e2 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 23 Oct 2020 03:12:38 +0500 Subject: [PATCH 24/76] Changing message for closing mmaps --- azure_functions_worker/bindings/datumdef.py | 3 +- azure_functions_worker/bindings/meta.py | 12 +- .../bindings/shared_memory_manager.py | 124 +++--------------- azure_functions_worker/dispatcher.py | 2 +- 4 files changed, 26 insertions(+), 115 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 45da37bfd..d76322e24 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -161,8 +161,7 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, for k, v in datum.value['headers'].items() }, enable_content_negotiation=False, - body=datum_as_proto(datum.value['body'], shmem_mgr, - invocation_id), + body=datum_as_proto(datum.value['body'], shmem_mgr), )) raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 4e90ff4f8..580d02c30 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -112,17 +112,15 @@ def get_datum(binding: str, obj: typing.Any, def to_outgoing_proto(binding: str, obj: typing.Any, *, pytype: typing.Optional[type], - shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.TypedData: + shmem_mgr: SharedMemoryManager) -> protos.TypedData: datum = get_datum(binding, obj, pytype) - return datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + return datumdef.datum_as_proto(datum, shmem_mgr) def to_outgoing_param_binding(binding: str, obj: typing.Any, *, pytype: typing.Optional[type], out_name: str, - shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.ParameterBinding: + shmem_mgr: SharedMemoryManager) -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) # TODO gochaudh: IMPORTANT: Right now we set the AppSetting to disable this # However that takes impact only for data coming from host -> worker @@ -132,7 +130,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): if datum.type == 'bytes': value = datum.value - map_name = shmem_mgr.put_bytes(value, invocation_id) + map_name = shmem_mgr.put_bytes(value) if map_name is not None: shmem = protos.RpcSharedMemory( name=map_name, @@ -152,7 +150,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, ) if param_binding is None: - rpc_val = datumdef.datum_as_proto(datum, shmem_mgr, invocation_id) + rpc_val = datumdef.datum_as_proto(datum, shmem_mgr) param_binding = protos.ParameterBinding( name=out_name, data=rpc_val) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index e7faaf288..1834b08ed 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -1,78 +1,23 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations import uuid -from typing import Dict -from typing import Optional from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader -from ..mmap_handler.file_accessor_factory import FileAccessorFactory - +from ..mmap_handler.file_accessor import FileAccessor class SharedMemoryManager: """ - Performs all operations related to reading/writing data from/to shared memory. - This is used for transferring input/output data of the function from/to the functions host over - shared memory as opposed to RPC to improve the rate of data transfer and the function's - end-to-end latency. + Performs all operations related to reading/writing data from/to Shared + Memory. """ def __init__(self): - # The allocated memory maps are tracked here so that a reference to them is kept open until - # they have been used (e.g. if they contain a function's output, it is read by the - # functions host). - # Having a mapping of the name and the memory map is then later used to close a given - # memory map by its name, after it has been used. - # key: map_name, val: mmap.mmap - self.allocated_mem_maps: Dict[str, mmap.mmap] = {} - self.file_accessor = FileAccessorFactory.create_file_accessor() - self.file_reader = FileReader() - self.file_writer = FileWriter() + self.allocated_mmaps = {} # type dict[map_name, mmap] def is_enabled(self) -> bool: """ -<<<<<<< HEAD - Whether supported types should be transferred between functions host and the worker using - shared memory. - """ - return True - - def is_supported(self, datum: Datum) -> bool: - """ - Whether the given Datum object can be transferred to the functions host using shared - memory. - """ - if datum.type == 'bytes': - # TODO gochaudh: Check for min size config - # Is there a common place to put configs shared b/w host and worker? - # Env variable? App Setting? - return True - elif datum.type == 'string': - return True - return False - - def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: - """ - Reads data from the given memory map with the provided name, starting at the provided - offset and reading a total of count bytes. - Returns the data read from shared memory as bytes if successful, None otherwise. - """ - logger.info('Reading bytes from shared memory: %s', map_name) - data = self.file_reader.read_content_as_bytes(map_name, offset, count) - return data - - def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: - """ - Reads data from the given memory map with the provided name, starting at the provided - offset and reading a total of count bytes. - Returns the data read from shared memory as a string if successful, None otherwise. - """ - logger.info('Reading string from shared memory: %s', map_name) - data = self.file_reader.read_content_as_string(map_name, offset, count) - return data -======= Whether supported types should be transferred between Functions host and the worker using shared memory. """ @@ -90,68 +35,37 @@ def is_supported(self, datum) -> bool: else: return False - def get_bytes(self, mmap_name: str, offset: int, count: int) -> bytes: + def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: """ Reads data from the given Memory Mapped File with the provided name, starting at the provided offset and reading a total of count bytes. Returns a tuple containing the binary data read from shared memory if successful, None otherwise. """ - logger.info('Reading from shared memory: %s', mmap_name) - data = FileReader.read_content_as_bytes(mmap_name, offset) + logger.info('Reading from shared memory: %s', map_name) + data = FileReader.read_content_as_bytes(map_name, offset) return data - def put_bytes(self, data: bytes, invocation_id: str) -> str: + def put_bytes(self, data: bytes) -> str: """ Writes the given data into shared memory. Returns the name of the Memory Mapped File into which the data was written if succesful, None otherwise. """ - mmap_name = str(uuid.uuid4()) - logger.info('Writing to shared memory: %s', mmap_name) - mmap = FileWriter.create_with_content_bytes(mmap_name, data) ->>>>>>> Working for blob shared memory data transfer; needs clean up, comments and tests - - def put_bytes(self, data: bytes) -> Optional[str]: - """ - Writes the given bytes into shared memory. - Returns the name of the memory map into which the data was written if successful, None - otherwise. - """ map_name = str(uuid.uuid4()) - logger.info('Writing bytes to shared memory: %s', map_name) - mem_map = self.file_writer.create_with_content_bytes(map_name, data) - if mem_map is not None: - self.allocated_mem_maps[map_name] = mem_map - return map_name + logger.info('Writing to shared memory: %s', map_name) + mmap = FileWriter.create_with_content_bytes(map_name, data) + + # Hold a reference to the mmap to prevent it from closing before the + # host has read it. + self.allocated_mmaps[map_name] = mmap - def put_string(self, data: str) -> Optional[str]: - """ - Writes the given string into shared memory. - Returns the name of the memory map into which the data was written if succesful, None - otherwise. - """ - map_name = str(uuid.uuid4()) - logger.info('Writing string to shared memory: %s', map_name) - mem_map = self.file_writer.create_with_content_string(map_name, data) - if mem_map is not None: - self.allocated_mem_maps[map_name] = mem_map return map_name - def free_mem_map(self, map_name: str): + def free_map(self, map_name: str): """ - Frees the memory map and any backing resources (e.g. file in the case of Linux) associated - with it. - If there is no memory map with the given name being tracked, then no action is performed. - Returns True if the memory map was freed successfully, False otherwise. """ - if map_name not in self.allocated_mem_maps: - # TODO Log Error - return False - mem_map = self.allocated_mem_maps[map_name] - success = self.file_accessor.delete_mem_map(map_name, mem_map) - del self.allocated_mem_maps[map_name] - if not success: - # TODO Log Error - return False - return True + if map_name in self.allocated_mmaps: + mmap = self.allocated_mmaps[map_name] + FileAccessor.delete_mmap(map_name, mmap) + del self.allocated_mmaps[map_name] diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index d1b97d972..865466fbf 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -402,7 +402,7 @@ async def _handle__invocation_request(self, req): return_value = bindings.to_outgoing_proto( fi.return_type.binding_name, call_result, pytype=fi.return_type.pytype, - shmem_mgr=self._shmem_mgr, invocation_id=invocation_id) + shmem_mgr=self._shmem_mgr) # Actively flush customer print() function to console sys.stdout.flush() From ea3fbaeea5419faabfe49de1f2e1b7c0beb87a3c Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 26 Oct 2020 12:30:47 +0500 Subject: [PATCH 25/76] Support for string datatype for shared memory data transfer --- azure_functions_worker/bindings/datumdef.py | 2 +- azure_functions_worker/bindings/meta.py | 21 ++++++++++++-- .../bindings/shared_memory_manager.py | 29 +++++++++++++++++-- azure_functions_worker/dispatcher.py | 3 +- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index d76322e24..b072503a5 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -161,7 +161,7 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, for k, v in datum.value['headers'].items() }, enable_content_negotiation=False, - body=datum_as_proto(datum.value['body'], shmem_mgr), + body=datum_as_proto(datum.value['body']), )) raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 580d02c30..c49c2584f 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -111,10 +111,9 @@ def get_datum(binding: str, obj: typing.Any, def to_outgoing_proto(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type], - shmem_mgr: SharedMemoryManager) -> protos.TypedData: + pytype: typing.Optional[type]) -> protos.TypedData: datum = get_datum(binding, obj, pytype) - return datumdef.datum_as_proto(datum, shmem_mgr) + return datumdef.datum_as_proto(datum) def to_outgoing_param_binding(binding: str, obj: typing.Any, *, @@ -144,6 +143,22 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, raise Exception( 'cannot write datum value into shared memory' ) + elif datum.type == 'string': + value = datum.value + map_name = shmem_mgr.put_string(value) + if map_name is not None: + shmem = protos.RpcSharedMemory( + name=map_name, + offset=0, + count=len(value), + type=protos.RpcSharedMemoryDataType.string) + param_binding = protos.ParameterBinding( + name=out_name, + rpc_shared_memory=shmem) + else: + raise Exception( + 'cannot write datum value into shared memory' + ) else: raise Exception( 'unsupported datum type for shared memory' diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 1834b08ed..a2c4a9e33 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -32,6 +32,8 @@ def is_supported(self, datum) -> bool: # TODO gochaudh: Check for min size config # Is there a common place to put configs shared b/w host and worker? return True + elif datum.type == 'string': + return True else: return False @@ -42,18 +44,23 @@ def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: Returns a tuple containing the binary data read from shared memory if successful, None otherwise. """ - logger.info('Reading from shared memory: %s', map_name) + logger.info('Reading bytes from shared memory: %s', map_name) data = FileReader.read_content_as_bytes(map_name, offset) return data + def get_string(self, map_name: str, offset: int, count: int) -> str: + logger.info('Reading string from shared memory: %s', map_name) + data = FileReader.read_content_as_string(map_name, offset) + return data + def put_bytes(self, data: bytes) -> str: """ - Writes the given data into shared memory. + Writes the given bytes into shared memory. Returns the name of the Memory Mapped File into which the data was written if succesful, None otherwise. """ map_name = str(uuid.uuid4()) - logger.info('Writing to shared memory: %s', map_name) + logger.info('Writing bytes to shared memory: %s', map_name) mmap = FileWriter.create_with_content_bytes(map_name, data) # Hold a reference to the mmap to prevent it from closing before the @@ -62,6 +69,22 @@ def put_bytes(self, data: bytes) -> str: return map_name + def put_string(self, data: str) -> str: + """ + Writes the given string into shared memory. + Returns the name of the Memory Mapped File into which the data was + written if succesful, None otherwise. + """ + map_name = str(uuid.uuid4()) + logger.info('Writing string to shared memory: %s', map_name) + mmap = FileWriter.create_with_content_string(map_name, data) + + # Hold a reference to the mmap to prevent it from closing before the + # host has read it. + self.allocated_mmaps[map_name] = mmap + + return map_name + def free_map(self, map_name: str): """ """ diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 865466fbf..03f2f3da1 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -401,8 +401,7 @@ async def _handle__invocation_request(self, req): if fi.return_type is not None: return_value = bindings.to_outgoing_proto( fi.return_type.binding_name, call_result, - pytype=fi.return_type.pytype, - shmem_mgr=self._shmem_mgr) + pytype=fi.return_type.pytype) # Actively flush customer print() function to console sys.stdout.flush() From eb337011c4434cae9b56f230ea5d62a03877d61e Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 12 Nov 2020 01:20:34 +0500 Subject: [PATCH 26/76] Change to oneof --- azure_functions_worker/bindings/meta.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c49c2584f..474a45e6c 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -60,16 +60,6 @@ def from_incoming_proto( pytype: typing.Optional[type], trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]], shmem_mgr: SharedMemoryManager) -> typing.Any: - # TODO gochaudh: - # Ideally, we should use WhichOneOf (if back compat issue is not there) - # Otherwise, a None check is not applicable as even if rpc_shared_memory is - # not set, its not None - datum = None - if pb.rpc_shared_memory.name is not '': - # Data was sent over shared memory, attempt to read - datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) - # TODO gochaudh: check trigger_metadata (try with blob triggered func) - binding = get_binding(binding) if trigger_metadata: metadata = { @@ -79,9 +69,16 @@ def from_incoming_proto( else: metadata = {} - if datum is None: + pb_type = pb.WhichOneof('binding') + if pb_type == 'rpc_shared_memory': + # Data was sent over shared memory, attempt to read + datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) + # TODO gochaudh: check trigger_metadata (try with blob triggered func) + elif pb_type == 'data': val = pb.data datum = datumdef.Datum.from_typed_data(val) + else: + raise TypeError(f'Unknown ParameterBindingType: {pb_type}') try: return binding.decode(datum, trigger_metadata=metadata) From e0fd812fdff3746ae618fc161dcc779d8d8c8cc3 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 15 Dec 2020 13:04:37 -1000 Subject: [PATCH 27/76] Refactoring mmap_handler --- azure_functions_worker/bindings/meta.py | 6 +- .../bindings/shared_memory_manager.py | 20 +++--- .../mmap_handler/file_accessor_windows.py | 2 +- .../mmap_handler/file_reader.py | 61 ++++++------------- 4 files changed, 34 insertions(+), 55 deletions(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 474a45e6c..c8bf8fa15 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -69,7 +69,7 @@ def from_incoming_proto( else: metadata = {} - pb_type = pb.WhichOneof('binding') + pb_type = pb.WhichOneof('rpc_data') if pb_type == 'rpc_shared_memory': # Data was sent over shared memory, attempt to read datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) @@ -132,7 +132,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, name=map_name, offset=0, count=len(value), - type=protos.RpcSharedMemoryDataType.bytes) + type=protos.RpcDataType.bytes) param_binding = protos.ParameterBinding( name=out_name, rpc_shared_memory=shmem) @@ -148,7 +148,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, name=map_name, offset=0, count=len(value), - type=protos.RpcSharedMemoryDataType.string) + type=protos.RpcDataType.string) param_binding = protos.ParameterBinding( name=out_name, rpc_shared_memory=shmem) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index a2c4a9e33..311060ee0 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -5,7 +5,7 @@ from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader -from ..mmap_handler.file_accessor import FileAccessor +from ..mmap_handler.file_accessor_factory import FileAccessorFactory class SharedMemoryManager: @@ -15,6 +15,9 @@ class SharedMemoryManager: """ def __init__(self): self.allocated_mmaps = {} # type dict[map_name, mmap] + self.file_accessor = FileAccessorFactory.create_file_accessor() + self.file_reader = FileReader() + self.file_writer = FileWriter() def is_enabled(self) -> bool: """ @@ -31,6 +34,7 @@ def is_supported(self, datum) -> bool: if datum.type == 'bytes': # TODO gochaudh: Check for min size config # Is there a common place to put configs shared b/w host and worker? + # Env variable? App Setting? return True elif datum.type == 'string': return True @@ -45,12 +49,12 @@ def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: if successful, None otherwise. """ logger.info('Reading bytes from shared memory: %s', map_name) - data = FileReader.read_content_as_bytes(map_name, offset) + data = self.file_reader.read_content_as_bytes(map_name, offset) return data def get_string(self, map_name: str, offset: int, count: int) -> str: logger.info('Reading string from shared memory: %s', map_name) - data = FileReader.read_content_as_string(map_name, offset) + data = self.file_reader.read_content_as_string(map_name, offset) return data def put_bytes(self, data: bytes) -> str: @@ -61,12 +65,10 @@ def put_bytes(self, data: bytes) -> str: """ map_name = str(uuid.uuid4()) logger.info('Writing bytes to shared memory: %s', map_name) - mmap = FileWriter.create_with_content_bytes(map_name, data) - + mmap = self.file_writer.create_with_content_bytes(map_name, data) # Hold a reference to the mmap to prevent it from closing before the # host has read it. self.allocated_mmaps[map_name] = mmap - return map_name def put_string(self, data: str) -> str: @@ -77,12 +79,10 @@ def put_string(self, data: str) -> str: """ map_name = str(uuid.uuid4()) logger.info('Writing string to shared memory: %s', map_name) - mmap = FileWriter.create_with_content_string(map_name, data) - + mmap = self.file_writer.create_with_content_string(map_name, data) # Hold a reference to the mmap to prevent it from closing before the # host has read it. self.allocated_mmaps[map_name] = mmap - return map_name def free_map(self, map_name: str): @@ -90,5 +90,5 @@ def free_map(self, map_name: str): """ if map_name in self.allocated_mmaps: mmap = self.allocated_mmaps[map_name] - FileAccessor.delete_mmap(map_name, mmap) + self.file_accessor.delete_mmap(map_name, mmap) del self.allocated_mmaps[map_name] diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/mmap_handler/file_accessor_windows.py index c8c3909d1..22c6f4172 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_windows.py +++ b/azure_functions_worker/mmap_handler/file_accessor_windows.py @@ -30,4 +30,4 @@ def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: def delete_mem_map(self, map_name: str, mmap) -> bool: mmap.close() - return True \ No newline at end of file + return True diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 138504db2..0ba132c7b 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -4,78 +4,59 @@ import mmap import os import struct -from typing import Optional from .memorymappedfile_constants import MemoryMappedFileConstants as consts -<<<<<<< HEAD from .file_accessor_factory import FileAccessorFactory -======= -from .file_accessor import FileAccessor ->>>>>>> Removed control flag from mmap header class FileReader: """ - For reading data from memory maps in shared memory. - Assumes a particular format when reading data (i.e. particular header before the content). - For writing data that could be read by the FileReader, use FileWriter. """ def __init__(self): self.file_accessor = FileAccessorFactory.create_file_accessor() - def _bytes_to_long(self, input_bytes) -> int: - """ - Decode a set of bytes representing a long. - This uses the format that the functions host (i.e. C#) uses. + def _bytes_to_long(self, input_bytes): + """Decode a set of bytes representing a long. + This uses the format that C# uses. """ return struct.unpack(" Optional[int]: - """ - Read the header of the memory map to determine the length of content contained in that - memory map. - Returns the content length as a non-negative integer if successful, None otherwise. + def _get_content_length(self, map_name): + """Read the first header from a shared memory. + These bytes contains the length of the rest of the shared memory. + TODO throw exceptions in case of failures as opposed to special values like -1. """ try: - map_content_length = self.file_accessor.open_mem_map( + map_content_length = self.file_accessor.open_mmap( map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) except FileNotFoundError: - return None + return -1 if map_content_length is None: - return None + return -1 try: header_bytes = map_content_length.read(consts.CONTENT_HEADER_TOTAL_BYTES) content_length = self._bytes_to_long(header_bytes) return content_length except ValueError as value_error: print("Cannot get content length for memory map '%s': %s" % (map_name, value_error)) - return None + return 0 finally: map_content_length.close() - def read_content_as_bytes(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[bytes]: - """ - Read content from the memory map with the given name and starting at the given offset. - content_offset = 0 means read from the beginning of the content. - bytes_to_read = 0 means read the entire content. - Returns the content as bytes if successful, None otherwise. + def read_content_as_bytes(self, map_name: str, content_offset: int = 0): + """Read content from a memory mapped file as bytes. """ content_length = self._get_content_length(map_name) - if content_length is None: + if content_length < 0: return None map_length = content_length + consts.CONTENT_HEADER_TOTAL_BYTES try: - map_content = self.file_accessor.open_mem_map(map_name, map_length, mmap.ACCESS_READ) + map_content = self.file_accessor.open_mmap(map_name, map_length, mmap.ACCESS_READ) if map_content is not None: try: map_content.seek(consts.CONTENT_HEADER_TOTAL_BYTES) if content_offset > 0: map_content.seek(content_offset, os.SEEK_CUR) - if bytes_to_read > 0: - # Read up to the specified number of bytes to read - content = map_content.read(bytes_to_read) - else: - # Read the entire content - content = map_content.read() + content = map_content.read() return content except ValueError as value_error: print("Cannot get content for memory map '%s': %s" % (map_name, value_error)) @@ -87,13 +68,11 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0, bytes_to # If we cannot get the content return None return None - def read_content_as_string(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[str]: - """ - Read content from the memory map with the given name and starting at the given offset. - Returns the content as a string if successful, None otherwise. + def read_content_as_string(self, map_name: str, content_offset: int = 0): + """Read content from a memory mapped file as a string. """ - content_bytes = self.read_content_as_bytes(map_name, content_offset, bytes_to_read) + content_bytes = self.read_content_as_bytes(map_name, content_offset) if content_bytes is None: return None content_str = content_bytes.decode('utf-8') - return content_str + return content_str \ No newline at end of file From beb1d235c820a0d3e6fab83f635e08172f7a3d6b Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 10:01:08 -1000 Subject: [PATCH 28/76] Refactoring, cleaning up and adding docstrings --- azure_functions_worker/bindings/datumdef.py | 2 +- azure_functions_worker/bindings/meta.py | 60 +++++------------ .../bindings/shared_memory_manager.py | 67 ++++++++++++------- .../mmap_handler/file_reader.py | 40 ++++++----- 4 files changed, 83 insertions(+), 86 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index b072503a5..efcc1a5d4 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -165,4 +165,4 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, )) raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) - ) \ No newline at end of file + ) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c8bf8fa15..4be97c19f 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -93,7 +93,7 @@ def from_incoming_proto( def get_datum(binding: str, obj: typing.Any, - pytype: typing.Optional[type]): + pytype: typing.Optional[type]) -> datumdef.Datum: binding = get_binding(binding) try: @@ -122,49 +122,21 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, # However that takes impact only for data coming from host -> worker # Is there a way to check the AppSetting here so that this does not respond back # with shared memory? - param_binding = None + shared_mem_value = None + parameter_binding = None + # If shared memory is enabled, try to transfer to host over shared memory if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): - if datum.type == 'bytes': - value = datum.value - map_name = shmem_mgr.put_bytes(value) - if map_name is not None: - shmem = protos.RpcSharedMemory( - name=map_name, - offset=0, - count=len(value), - type=protos.RpcDataType.bytes) - param_binding = protos.ParameterBinding( - name=out_name, - rpc_shared_memory=shmem) - else: - raise Exception( - 'cannot write datum value into shared memory' - ) - elif datum.type == 'string': - value = datum.value - map_name = shmem_mgr.put_string(value) - if map_name is not None: - shmem = protos.RpcSharedMemory( - name=map_name, - offset=0, - count=len(value), - type=protos.RpcDataType.string) - param_binding = protos.ParameterBinding( - name=out_name, - rpc_shared_memory=shmem) - else: - raise Exception( - 'cannot write datum value into shared memory' - ) - else: - raise Exception( - 'unsupported datum type for shared memory' - ) - - if param_binding is None: - rpc_val = datumdef.datum_as_proto(datum, shmem_mgr) - param_binding = protos.ParameterBinding( + shared_mem_value = datum.to_rpc_shared_memory(shmem_mgr) + if shared_mem_value is not None: + # Check if data was transferred over shared memory. + # If it was, then use the rpc_shared_memory field in the response message. + parameter_binding = protos.ParameterBinding( + name=out_name, + rpc_shared_memory=shared_mem_value) + else: + # If data was not trasnferred over shared memory, send it as part of the response message + rpc_val = datumdef.datum_as_proto(datum) + parameter_binding = protos.ParameterBinding( name=out_name, data=rpc_val) - - return param_binding \ No newline at end of file + return parameter_binding diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 311060ee0..cb01848b7 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -2,6 +2,8 @@ # Licensed under the MIT License. import uuid +from typing import Optional +from . import datumdef from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader @@ -10,26 +12,34 @@ class SharedMemoryManager: """ - Performs all operations related to reading/writing data from/to Shared - Memory. + Performs all operations related to reading/writing data from/to shared memory. + This is used for transferring input/output data of the function from/to the functions host over + shared memory as opposed to RPC to improve the rate of data transfer and the function's + end-to-end latency. """ def __init__(self): - self.allocated_mmaps = {} # type dict[map_name, mmap] + # The allocated memory maps are tracked here so that a reference to them is kept open until + # they have been used (e.g. if they contain a function's output, it is read by the + # functions host). + # Having a mapping of the name and the memory map is then later used to close a given + # memory map by its name, after it has been used. + # Type: dict[map_name, mmap] + self.allocated_mmaps = {} self.file_accessor = FileAccessorFactory.create_file_accessor() self.file_reader = FileReader() self.file_writer = FileWriter() def is_enabled(self) -> bool: """ - Whether supported types should be transferred between Functions host - and the worker using shared memory. + Whether supported types should be transferred between functions host and the worker using + shared memory. """ return True - def is_supported(self, datum) -> bool: + def is_supported(self, datum: datumdef.Datum) -> bool: """ - Whether the given Datum object can be transferred to the Functions host - using shared memory. + Whether the given Datum object can be transferred to the functions host using shared + memory. """ if datum.type == 'bytes': # TODO gochaudh: Check for min size config @@ -41,52 +51,57 @@ def is_supported(self, datum) -> bool: else: return False - def get_bytes(self, map_name: str, offset: int, count: int) -> bytes: + def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: """ - Reads data from the given Memory Mapped File with the provided name, - starting at the provided offset and reading a total of count bytes. - Returns a tuple containing the binary data read from shared memory - if successful, None otherwise. + Reads data from the given memory map with the provided name, starting at the provided + offset and reading a total of count bytes. + Returns the data read from shared memory as bytes if successful, None otherwise. """ logger.info('Reading bytes from shared memory: %s', map_name) data = self.file_reader.read_content_as_bytes(map_name, offset) return data - def get_string(self, map_name: str, offset: int, count: int) -> str: + def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: + """ + Reads data from the given memory map with the provided name, starting at the provided + offset and reading a total of count bytes. + Returns the data read from shared memory as a string if successful, None otherwise. + """ logger.info('Reading string from shared memory: %s', map_name) data = self.file_reader.read_content_as_string(map_name, offset) return data - def put_bytes(self, data: bytes) -> str: + def put_bytes(self, data: bytes) -> Optional[str]: """ Writes the given bytes into shared memory. - Returns the name of the Memory Mapped File into which the data was - written if succesful, None otherwise. + Returns the name of the memory map into which the data was written if successful, None + otherwise. """ map_name = str(uuid.uuid4()) logger.info('Writing bytes to shared memory: %s', map_name) mmap = self.file_writer.create_with_content_bytes(map_name, data) - # Hold a reference to the mmap to prevent it from closing before the - # host has read it. - self.allocated_mmaps[map_name] = mmap + if mmap is not None: + self.allocated_mmaps[map_name] = mmap return map_name - def put_string(self, data: str) -> str: + def put_string(self, data: str) -> Optional[str]: """ Writes the given string into shared memory. - Returns the name of the Memory Mapped File into which the data was - written if succesful, None otherwise. + Returns the name of the memory map into which the data was written if succesful, None + otherwise. """ map_name = str(uuid.uuid4()) logger.info('Writing string to shared memory: %s', map_name) mmap = self.file_writer.create_with_content_string(map_name, data) - # Hold a reference to the mmap to prevent it from closing before the - # host has read it. - self.allocated_mmaps[map_name] = mmap + if mmap is not None: + self.allocated_mmaps[map_name] = mmap return map_name def free_map(self, map_name: str): """ + Frees the memory map and any backing resources (e.g. file in the case of Linux) associated + with it. + If there is no memory map with the given name being tracked, then no action is performed. """ if map_name in self.allocated_mmaps: mmap = self.allocated_mmaps[map_name] diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index 0ba132c7b..b464328a7 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -4,49 +4,57 @@ import mmap import os import struct +from typing import Optional from .memorymappedfile_constants import MemoryMappedFileConstants as consts from .file_accessor_factory import FileAccessorFactory class FileReader: """ + For reading data from memory maps in shared memory. + Assumes a particular format when reading data (i.e. particular header before the content). + For writing data that could be read by the FileReader, use FileWriter. """ def __init__(self): self.file_accessor = FileAccessorFactory.create_file_accessor() - def _bytes_to_long(self, input_bytes): - """Decode a set of bytes representing a long. - This uses the format that C# uses. + def _bytes_to_long(self, input_bytes) -> int: + """ + Decode a set of bytes representing a long. + This uses the format that the functions host (i.e. C#) uses. """ return struct.unpack(" Optional[int]: + """ + Read the header of the memory map to determine the length of content contained in that + memory map. + Returns the content length as a non-negative integer if successful, None otherwise. """ try: map_content_length = self.file_accessor.open_mmap( map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) except FileNotFoundError: - return -1 + return None if map_content_length is None: - return -1 + return None try: header_bytes = map_content_length.read(consts.CONTENT_HEADER_TOTAL_BYTES) content_length = self._bytes_to_long(header_bytes) return content_length except ValueError as value_error: print("Cannot get content length for memory map '%s': %s" % (map_name, value_error)) - return 0 + return None finally: map_content_length.close() - def read_content_as_bytes(self, map_name: str, content_offset: int = 0): - """Read content from a memory mapped file as bytes. + def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optional[bytes]: + """ + Read content from the memory map with the given name and starting at the given offset. + Returns the content as bytes if successful, None otherwise. """ content_length = self._get_content_length(map_name) - if content_length < 0: + if content_length is None: return None map_length = content_length + consts.CONTENT_HEADER_TOTAL_BYTES try: @@ -68,8 +76,10 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0): # If we cannot get the content return None return None - def read_content_as_string(self, map_name: str, content_offset: int = 0): - """Read content from a memory mapped file as a string. + def read_content_as_string(self, map_name: str, content_offset: int = 0) -> Optional[str]: + """ + Read content from the memory map with the given name and starting at the given offset. + Returns the content as a string if successful, None otherwise. """ content_bytes = self.read_content_as_bytes(map_name, content_offset) if content_bytes is None: From b5a7e20ef99975f0d5259cf70dc94e1cb5f10340 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 13:33:24 -1000 Subject: [PATCH 29/76] Updating CloseSharedMemoryResourcesResponse usage --- azure_functions_worker/bindings/datumdef.py | 1 - azure_functions_worker/bindings/meta.py | 2 +- .../bindings/shared_memory_manager.py | 42 +++++++++++-------- .../mmap_handler/file_reader.py | 19 ++++++--- tests/endtoend/blob_functions/ping/main.py | 7 ++++ .../endtoend/cosmosdb_functions/ping/main.py | 7 ++++ .../endtoend/eventgrid_functions/ping/main.py | 7 ++++ .../eventhub_batch_functions/ping/main.py | 7 ++++ .../endtoend/eventhub_functions/ping/main.py | 7 ++++ tests/endtoend/queue_functions/ping/main.py | 7 ++++ .../servicebus_functions/ping/main.py | 7 ++++ tests/endtoend/table_functions/ping/main.py | 7 ++++ tests/unittests/http_functions/ping/main.py | 7 ++++ tests/unittests/load_functions/ping/main.py | 7 ++++ .../log_filtering_functions/ping/main.py | 7 ++++ 15 files changed, 116 insertions(+), 25 deletions(-) create mode 100644 tests/endtoend/blob_functions/ping/main.py create mode 100644 tests/endtoend/cosmosdb_functions/ping/main.py create mode 100644 tests/endtoend/eventgrid_functions/ping/main.py create mode 100644 tests/endtoend/eventhub_batch_functions/ping/main.py create mode 100644 tests/endtoend/eventhub_functions/ping/main.py create mode 100644 tests/endtoend/queue_functions/ping/main.py create mode 100644 tests/endtoend/servicebus_functions/ping/main.py create mode 100644 tests/endtoend/table_functions/ping/main.py create mode 100644 tests/unittests/http_functions/ping/main.py create mode 100644 tests/unittests/load_functions/ping/main.py create mode 100644 tests/unittests/log_filtering_functions/ping/main.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index efcc1a5d4..e4839a670 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -6,7 +6,6 @@ from typing import Optional import json from .. import protos -from .shared_memory_manager import SharedMemoryManager class Datum: diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 4be97c19f..c27fb9ea4 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -126,7 +126,7 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, parameter_binding = None # If shared memory is enabled, try to transfer to host over shared memory if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): - shared_mem_value = datum.to_rpc_shared_memory(shmem_mgr) + shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) if shared_mem_value is not None: # Check if data was transferred over shared memory. # If it was, then use the rpc_shared_memory field in the response message. diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index cb01848b7..5ac62a0ba 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -1,9 +1,10 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import uuid +from typing import Dict from typing import Optional -from . import datumdef from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader @@ -23,8 +24,8 @@ def __init__(self): # functions host). # Having a mapping of the name and the memory map is then later used to close a given # memory map by its name, after it has been used. - # Type: dict[map_name, mmap] - self.allocated_mmaps = {} + # key: map_name, val: mmap.mmap + self.allocated_mem_maps: Dict[str, mmap.mmap] = {} self.file_accessor = FileAccessorFactory.create_file_accessor() self.file_reader = FileReader() self.file_writer = FileWriter() @@ -36,7 +37,7 @@ def is_enabled(self) -> bool: """ return True - def is_supported(self, datum: datumdef.Datum) -> bool: + def is_supported(self, datum: Datum) -> bool: """ Whether the given Datum object can be transferred to the functions host using shared memory. @@ -58,7 +59,7 @@ def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: Returns the data read from shared memory as bytes if successful, None otherwise. """ logger.info('Reading bytes from shared memory: %s', map_name) - data = self.file_reader.read_content_as_bytes(map_name, offset) + data = self.file_reader.read_content_as_bytes(map_name, offset, count) return data def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: @@ -68,7 +69,7 @@ def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: Returns the data read from shared memory as a string if successful, None otherwise. """ logger.info('Reading string from shared memory: %s', map_name) - data = self.file_reader.read_content_as_string(map_name, offset) + data = self.file_reader.read_content_as_string(map_name, offset, count) return data def put_bytes(self, data: bytes) -> Optional[str]: @@ -79,9 +80,9 @@ def put_bytes(self, data: bytes) -> Optional[str]: """ map_name = str(uuid.uuid4()) logger.info('Writing bytes to shared memory: %s', map_name) - mmap = self.file_writer.create_with_content_bytes(map_name, data) - if mmap is not None: - self.allocated_mmaps[map_name] = mmap + mem_map = self.file_writer.create_with_content_bytes(map_name, data) + if mem_map is not None: + self.allocated_mem_maps[map_name] = mem_map return map_name def put_string(self, data: str) -> Optional[str]: @@ -92,18 +93,25 @@ def put_string(self, data: str) -> Optional[str]: """ map_name = str(uuid.uuid4()) logger.info('Writing string to shared memory: %s', map_name) - mmap = self.file_writer.create_with_content_string(map_name, data) - if mmap is not None: - self.allocated_mmaps[map_name] = mmap + mem_map = self.file_writer.create_with_content_string(map_name, data) + if mem_map is not None: + self.allocated_mem_maps[map_name] = mem_map return map_name - def free_map(self, map_name: str): + def free_mem_map(self, map_name: str): """ Frees the memory map and any backing resources (e.g. file in the case of Linux) associated with it. If there is no memory map with the given name being tracked, then no action is performed. + Returns True if the memory map was freed successfully, False otherwise. """ - if map_name in self.allocated_mmaps: - mmap = self.allocated_mmaps[map_name] - self.file_accessor.delete_mmap(map_name, mmap) - del self.allocated_mmaps[map_name] + if map_name not in self.allocated_mem_maps: + # TODO Log Error + return False + mem_map = self.allocated_mem_maps[map_name] + success = self.file_accessor.delete_mem_map(map_name, mem_map) + del self.allocated_mem_maps[map_name] + if not success: + # TODO Log Error + return False + return True diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index b464328a7..dde98cdb5 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -32,7 +32,7 @@ def _get_content_length(self, map_name) -> Optional[int]: Returns the content length as a non-negative integer if successful, None otherwise. """ try: - map_content_length = self.file_accessor.open_mmap( + map_content_length = self.file_accessor.open_mem_map( map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) except FileNotFoundError: return None @@ -48,9 +48,11 @@ def _get_content_length(self, map_name) -> Optional[int]: finally: map_content_length.close() - def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optional[bytes]: + def read_content_as_bytes(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[bytes]: """ Read content from the memory map with the given name and starting at the given offset. + content_offset = 0 means read from the beginning of the content. + bytes_to_read = 0 means read the entire content. Returns the content as bytes if successful, None otherwise. """ content_length = self._get_content_length(map_name) @@ -58,13 +60,18 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optio return None map_length = content_length + consts.CONTENT_HEADER_TOTAL_BYTES try: - map_content = self.file_accessor.open_mmap(map_name, map_length, mmap.ACCESS_READ) + map_content = self.file_accessor.open_mem_map(map_name, map_length, mmap.ACCESS_READ) if map_content is not None: try: map_content.seek(consts.CONTENT_HEADER_TOTAL_BYTES) if content_offset > 0: map_content.seek(content_offset, os.SEEK_CUR) - content = map_content.read() + if bytes_to_read > 0: + # Read up to the specified number of bytes to read + content = map_content.read(bytes_to_read) + else: + # Read the entire content + content = map_content.read() return content except ValueError as value_error: print("Cannot get content for memory map '%s': %s" % (map_name, value_error)) @@ -76,12 +83,12 @@ def read_content_as_bytes(self, map_name: str, content_offset: int = 0) -> Optio # If we cannot get the content return None return None - def read_content_as_string(self, map_name: str, content_offset: int = 0) -> Optional[str]: + def read_content_as_string(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[str]: """ Read content from the memory map with the given name and starting at the given offset. Returns the content as a string if successful, None otherwise. """ - content_bytes = self.read_content_as_bytes(map_name, content_offset) + content_bytes = self.read_content_as_bytes(map_name, content_offset, bytes_to_read) if content_bytes is None: return None content_str = content_bytes.decode('utf-8') diff --git a/tests/endtoend/blob_functions/ping/main.py b/tests/endtoend/blob_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/blob_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/cosmosdb_functions/ping/main.py b/tests/endtoend/cosmosdb_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/cosmosdb_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/eventgrid_functions/ping/main.py b/tests/endtoend/eventgrid_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/eventgrid_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/eventhub_batch_functions/ping/main.py b/tests/endtoend/eventhub_batch_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/eventhub_batch_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/eventhub_functions/ping/main.py b/tests/endtoend/eventhub_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/eventhub_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/queue_functions/ping/main.py b/tests/endtoend/queue_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/queue_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/servicebus_functions/ping/main.py b/tests/endtoend/servicebus_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/servicebus_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/endtoend/table_functions/ping/main.py b/tests/endtoend/table_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/endtoend/table_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/unittests/http_functions/ping/main.py b/tests/unittests/http_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/unittests/http_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/unittests/load_functions/ping/main.py b/tests/unittests/load_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/unittests/load_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return diff --git a/tests/unittests/log_filtering_functions/ping/main.py b/tests/unittests/log_filtering_functions/ping/main.py new file mode 100644 index 000000000..dd96ce158 --- /dev/null +++ b/tests/unittests/log_filtering_functions/ping/main.py @@ -0,0 +1,7 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +import azure.functions as func + + +def main(req: func.HttpRequest): + return From b0e645e0d64740309f601cf89d7f2c576d94053f Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 13:36:43 -1000 Subject: [PATCH 30/76] Fixing accidental changes to tests/* --- tests/endtoend/blob_functions/ping/main.py | 7 ------- tests/endtoend/cosmosdb_functions/ping/main.py | 7 ------- tests/endtoend/eventgrid_functions/ping/main.py | 7 ------- tests/endtoend/eventhub_batch_functions/ping/main.py | 7 ------- tests/endtoend/eventhub_functions/ping/main.py | 7 ------- tests/endtoend/queue_functions/ping/main.py | 7 ------- tests/endtoend/servicebus_functions/ping/main.py | 7 ------- tests/endtoend/table_functions/ping/main.py | 7 ------- tests/unittests/http_functions/ping/main.py | 7 ------- tests/unittests/load_functions/ping/main.py | 7 ------- tests/unittests/log_filtering_functions/ping/main.py | 7 ------- 11 files changed, 77 deletions(-) delete mode 100644 tests/endtoend/blob_functions/ping/main.py delete mode 100644 tests/endtoend/cosmosdb_functions/ping/main.py delete mode 100644 tests/endtoend/eventgrid_functions/ping/main.py delete mode 100644 tests/endtoend/eventhub_batch_functions/ping/main.py delete mode 100644 tests/endtoend/eventhub_functions/ping/main.py delete mode 100644 tests/endtoend/queue_functions/ping/main.py delete mode 100644 tests/endtoend/servicebus_functions/ping/main.py delete mode 100644 tests/endtoend/table_functions/ping/main.py delete mode 100644 tests/unittests/http_functions/ping/main.py delete mode 100644 tests/unittests/load_functions/ping/main.py delete mode 100644 tests/unittests/log_filtering_functions/ping/main.py diff --git a/tests/endtoend/blob_functions/ping/main.py b/tests/endtoend/blob_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/blob_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/cosmosdb_functions/ping/main.py b/tests/endtoend/cosmosdb_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/cosmosdb_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/eventgrid_functions/ping/main.py b/tests/endtoend/eventgrid_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/eventgrid_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/eventhub_batch_functions/ping/main.py b/tests/endtoend/eventhub_batch_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/eventhub_batch_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/eventhub_functions/ping/main.py b/tests/endtoend/eventhub_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/eventhub_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/queue_functions/ping/main.py b/tests/endtoend/queue_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/queue_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/servicebus_functions/ping/main.py b/tests/endtoend/servicebus_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/servicebus_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/endtoend/table_functions/ping/main.py b/tests/endtoend/table_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/endtoend/table_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/unittests/http_functions/ping/main.py b/tests/unittests/http_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/unittests/http_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/unittests/load_functions/ping/main.py b/tests/unittests/load_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/unittests/load_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return diff --git a/tests/unittests/log_filtering_functions/ping/main.py b/tests/unittests/log_filtering_functions/ping/main.py deleted file mode 100644 index dd96ce158..000000000 --- a/tests/unittests/log_filtering_functions/ping/main.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. -import azure.functions as func - - -def main(req: func.HttpRequest): - return From a04982d8cf42f78dadee4ea6a6d72361199558fc Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 16 Dec 2020 15:44:00 -1000 Subject: [PATCH 31/76] Addressing comments --- azure_functions_worker/bindings/meta.py | 4 +++- azure_functions_worker/bindings/shared_memory_manager.py | 3 +-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c27fb9ea4..0430efa20 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -94,8 +94,10 @@ def from_incoming_proto( def get_datum(binding: str, obj: typing.Any, pytype: typing.Optional[type]) -> datumdef.Datum: + """ + Convert an object to a datum with the specified type. + """ binding = get_binding(binding) - try: datum = binding.encode(obj, expected_type=pytype) except NotImplementedError: diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 5ac62a0ba..3ca69d553 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -49,8 +49,7 @@ def is_supported(self, datum: Datum) -> bool: return True elif datum.type == 'string': return True - else: - return False + return False def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: """ From 84f08a218c368624f21e46fe0a1e0df1cf25212d Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 18 Feb 2021 11:22:17 -0800 Subject: [PATCH 32/76] Cleaning up, addressing comments --- azure_functions_worker/bindings/datumdef.py | 64 +++++---- azure_functions_worker/bindings/meta.py | 12 +- .../bindings/shared_memory_manager.py | 59 ++++---- azure_functions_worker/constants.py | 3 + .../mmap_handler/__init__.py | 2 + .../mmap_handler/file_accessor.py | 61 ++++++--- .../mmap_handler/file_accessor_factory.py | 12 +- .../mmap_handler/file_accessor_linux.py | 107 --------------- .../mmap_handler/file_accessor_unix.py | 128 ++++++++++++++++++ .../mmap_handler/file_accessor_windows.py | 27 ++-- .../mmap_handler/file_reader.py | 75 +++++----- .../mmap_handler/file_writer.py | 13 +- .../memorymappedfile_constants.py | 28 ++-- 13 files changed, 332 insertions(+), 259 deletions(-) delete mode 100644 azure_functions_worker/mmap_handler/file_accessor_linux.py create mode 100644 azure_functions_worker/mmap_handler/file_accessor_unix.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index e4839a670..a7bba7e54 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -2,10 +2,10 @@ # Licensed under the MIT License. from __future__ import annotations -from typing import Any -from typing import Optional +from typing import Any, Optional import json from .. import protos +from ..logging import logger class Datum: @@ -100,49 +100,52 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared Reads the specified shared memory region and converts the read data into a datum object of the corresponding type. """ - mmap_name = shmem.name + mem_map_name = shmem.name offset = shmem.offset count = shmem.count data_type = shmem.type + ret_val = None if data_type == protos.RpcDataType.bytes: - val = shmem_mgr.get_bytes(mmap_name, offset, count) + val = shmem_mgr.get_bytes(mem_map_name, offset, count) if val is not None: - return cls(val, 'bytes') + ret_val = cls(val, 'bytes') elif data_type == protos.RpcDataType.string: - val = shmem_mgr.get_string(mmap_name, offset, count) + val = shmem_mgr.get_string(mem_map_name, offset, count) if val is not None: - return cls(val, 'string') + ret_val = cls(val, 'string') + if ret_val is not None: + logger.info(f'Read {count} bytes from memory map {mem_map_name} for data type {data_type}') + return ret_val return None @classmethod - def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> protos.RpcSharedMemory: + def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> Optional[protos.RpcSharedMemory]: """ Writes the given value to shared memory and returns the corresponding RpcSharedMemory object which can be sent back to the functions host over RPC. """ if datum.type == 'bytes': value = datum.value - map_name = shmem_mgr.put_bytes(value) - if map_name is not None: - shmem = protos.RpcSharedMemory( - name=map_name, - offset=0, - count=len(value), - type=protos.RpcDataType.bytes) - return shmem + mem_map_name = shmem_mgr.put_bytes(value) + data_type = protos.RpcDataType.bytes elif datum.type == 'string': value = datum.value - map_name = shmem_mgr.put_string(value) - if map_name is not None: - shmem = protos.RpcSharedMemory( - name=map_name, - offset=0, - count=len(value), - type=protos.RpcDataType.string) - return shmem - raise NotImplementedError( - f'unsupported datum type ({datum.type}) for shared memory' - ) + mem_map_name = shmem_mgr.put_string(value) + data_type = protos.RpcDataType.string + else: + raise NotImplementedError( + f'Unsupported datum type ({datum.type}) for shared memory' + ) + if mem_map_name is None: + return None + content_size = len(value) + shmem = protos.RpcSharedMemory( + name=mem_map_name, + offset=0, + count=content_size, + type=data_type) + logger.info(f'Wrote {content_size} bytes to memory map {mem_map_name} for data type {data_type}') + return shmem def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, invocation_id: str) -> protos.TypedData: @@ -162,6 +165,13 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, enable_content_negotiation=False, body=datum_as_proto(datum.value['body']), )) +<<<<<<< HEAD raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) ) +======= + else: + raise NotImplementedError( + 'unexpected Datum type: {!r}'.format(datum.type) + ) +>>>>>>> Cleaning up, addressing comments diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 0430efa20..4d5d39805 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -85,7 +85,6 @@ def from_incoming_proto( except NotImplementedError: # Binding does not support the data. dt = val.WhichOneof('data') - raise TypeError( f'unable to decode incoming TypedData: ' f'unsupported combination of TypedData field {dt!r} ' @@ -120,24 +119,21 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, out_name: str, shmem_mgr: SharedMemoryManager) -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) - # TODO gochaudh: IMPORTANT: Right now we set the AppSetting to disable this - # However that takes impact only for data coming from host -> worker - # Is there a way to check the AppSetting here so that this does not respond back - # with shared memory? shared_mem_value = None parameter_binding = None # If shared memory is enabled, try to transfer to host over shared memory if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) + # Check if data was written into shared memory if shared_mem_value is not None: - # Check if data was transferred over shared memory. - # If it was, then use the rpc_shared_memory field in the response message. + # If it was, then use the rpc_shared_memory field in the response message parameter_binding = protos.ParameterBinding( name=out_name, rpc_shared_memory=shared_mem_value) else: - # If data was not trasnferred over shared memory, send it as part of the response message + # If not, send it as part of the response message over RPC rpc_val = datumdef.datum_as_proto(datum) + assert rpc_val is not None parameter_binding = protos.ParameterBinding( name=out_name, data=rpc_val) diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 3ca69d553..88b7643e1 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -3,12 +3,13 @@ from __future__ import annotations import uuid -from typing import Dict -from typing import Optional +from typing import Dict, Optional from ..logging import logger from ..mmap_handler.file_writer import FileWriter from ..mmap_handler.file_reader import FileReader from ..mmap_handler.file_accessor_factory import FileAccessorFactory +from ..utils.common import is_envvar_true +from ..constants import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED class SharedMemoryManager: @@ -24,7 +25,7 @@ def __init__(self): # functions host). # Having a mapping of the name and the memory map is then later used to close a given # memory map by its name, after it has been used. - # key: map_name, val: mmap.mmap + # key: mem_map_name, val: mmap.mmap self.allocated_mem_maps: Dict[str, mmap.mmap] = {} self.file_accessor = FileAccessorFactory.create_file_accessor() self.file_reader = FileReader() @@ -35,7 +36,7 @@ def is_enabled(self) -> bool: Whether supported types should be transferred between functions host and the worker using shared memory. """ - return True + return is_envvar_true(FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) def is_supported(self, datum: Datum) -> bool: """ @@ -51,24 +52,22 @@ def is_supported(self, datum: Datum) -> bool: return True return False - def get_bytes(self, map_name: str, offset: int, count: int) -> Optional[bytes]: + def get_bytes(self, mem_map_name: str, offset: int, count: int) -> Optional[bytes]: """ Reads data from the given memory map with the provided name, starting at the provided offset and reading a total of count bytes. Returns the data read from shared memory as bytes if successful, None otherwise. """ - logger.info('Reading bytes from shared memory: %s', map_name) - data = self.file_reader.read_content_as_bytes(map_name, offset, count) + data = self.file_reader.read_content_as_bytes(mem_map_name, offset, count) return data - def get_string(self, map_name: str, offset: int, count: int) -> Optional[str]: + def get_string(self, mem_map_name: str, offset: int, count: int) -> Optional[str]: """ Reads data from the given memory map with the provided name, starting at the provided offset and reading a total of count bytes. Returns the data read from shared memory as a string if successful, None otherwise. """ - logger.info('Reading string from shared memory: %s', map_name) - data = self.file_reader.read_content_as_string(map_name, offset, count) + data = self.file_reader.read_content_as_string(mem_map_name, offset, count) return data def put_bytes(self, data: bytes) -> Optional[str]: @@ -77,12 +76,12 @@ def put_bytes(self, data: bytes) -> Optional[str]: Returns the name of the memory map into which the data was written if successful, None otherwise. """ - map_name = str(uuid.uuid4()) - logger.info('Writing bytes to shared memory: %s', map_name) - mem_map = self.file_writer.create_with_content_bytes(map_name, data) - if mem_map is not None: - self.allocated_mem_maps[map_name] = mem_map - return map_name + mem_map_name = str(uuid.uuid4()) + mem_map = self.file_writer.create_with_content_bytes(mem_map_name, data) + if mem_map is None: + return None + self.allocated_mem_maps[mem_map_name] = mem_map + return mem_map_name def put_string(self, data: str) -> Optional[str]: """ @@ -90,27 +89,27 @@ def put_string(self, data: str) -> Optional[str]: Returns the name of the memory map into which the data was written if succesful, None otherwise. """ - map_name = str(uuid.uuid4()) - logger.info('Writing string to shared memory: %s', map_name) - mem_map = self.file_writer.create_with_content_string(map_name, data) - if mem_map is not None: - self.allocated_mem_maps[map_name] = mem_map - return map_name + mem_map_name = str(uuid.uuid4()) + mem_map = self.file_writer.create_with_content_string(mem_map_name, data) + if mem_map is None: + return None + self.allocated_mem_maps[mem_map_name] = mem_map + return mem_map_name - def free_mem_map(self, map_name: str): + def free_mem_map(self, mem_map_name: str): """ - Frees the memory map and any backing resources (e.g. file in the case of Linux) associated + Frees the memory map and any backing resources (e.g. file in the case of Unix) associated with it. If there is no memory map with the given name being tracked, then no action is performed. Returns True if the memory map was freed successfully, False otherwise. """ - if map_name not in self.allocated_mem_maps: - # TODO Log Error + if mem_map_name not in self.allocated_mem_maps: + logger.error('Cannot find shared memory in list of allocations: %s', mem_map_name) return False - mem_map = self.allocated_mem_maps[map_name] - success = self.file_accessor.delete_mem_map(map_name, mem_map) - del self.allocated_mem_maps[map_name] + mem_map = self.allocated_mem_maps[mem_map_name] + success = self.file_accessor.delete_mem_map(mem_map_name, mem_map) + del self.allocated_mem_maps[mem_map_name] if not success: - # TODO Log Error + logger.error('Cannot delete shared memory: %s', mem_map_name) return False return True diff --git a/azure_functions_worker/constants.py b/azure_functions_worker/constants.py index 177827aed..bce5b6a34 100644 --- a/azure_functions_worker/constants.py +++ b/azure_functions_worker/constants.py @@ -31,3 +31,6 @@ # External Site URLs MODULE_NOT_FOUND_TS_URL = "https://aka.ms/functions-modulenotfound" + +# App Settings +FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED = "FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED" \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/__init__.py b/azure_functions_worker/mmap_handler/__init__.py index c5b67f340..a70bd1294 100644 --- a/azure_functions_worker/mmap_handler/__init__.py +++ b/azure_functions_worker/mmap_handler/__init__.py @@ -4,4 +4,6 @@ The initial set of corresponding changes to enable shared memory maps in the functions host can be found in the following Pull Request: https://github.com/Azure/azure-functions-host/pull/6836 +The issue tracking shared memory transfer related changes is: +https://github.com/Azure/azure-functions-host/issues/6791 """ \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index df87e559d..26947f682 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -1,39 +1,43 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import abc -import mmap +from __future__ import annotations +from abc import ABCMeta, abstractmethod from typing import Optional from .memorymappedfile_constants import MemoryMappedFileConstants as consts -class FileAccessor(metaclass=abc.ABCMeta): +class FileAccessor(metaclass=ABCMeta): """ For accessing memory maps. This is an interface that must be implemented by sub-classes to provide platform-specific support for accessing memory maps. Currently the following two sub-classes are implemented: 1) FileAccessorWindows - 2) FileAccessorLinux + 2) FileAccessorUnix """ - @abc.abstractmethod - def open_mem_map(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: + @abstractmethod + def open_mem_map( + self, + mem_map_name: str, + mem_map_size: int, + access: int) -> Optional[mmap.mmap]: """ Opens an existing memory map. - Returns the mmap if successful, None otherwise. + Returns the opened mmap if successful, None otherwise. """ raise NotImplementedError - @abc.abstractmethod - def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: + @abstractmethod + def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap.mmap]: """ Creates a new memory map. - Returns the mmap if successful, None otherwise. + Returns the created mmap if successful, None otherwise. """ raise NotImplementedError - @abc.abstractmethod - def delete_mem_map(self, map_name: str, mem_map: mmap.mmap) -> bool: + @abstractmethod + def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: """ Deletes the memory map and any backing resources associated with it. If there is no memory map with the given name, then no action is performed. @@ -41,16 +45,33 @@ def delete_mem_map(self, map_name: str, mem_map: mmap.mmap) -> bool: """ raise NotImplementedError - def _verify_new_map_created(self, map_name: str, mem_map) -> bool: - """Checks if the first byte of the memory map is zero. - If it is not, this memory map already existed. + def _is_dirty_bit_set(self, mem_map_name: str, mem_map) -> bool: """ + Checks if the dirty bit of the memory map has been set or not. + This is used to check if a new memory map was created successfully and we don't end up + using an existing one. + """ + # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) + # Read the first byte byte_read = mem_map.read(1) - is_new_mmap = False - if byte_read != consts.ZERO_BYTE: - is_new_mmap = False + # Check if the dirty bit was set or not + if byte_read == consts.DIRTY_BIT_SET: + is_set = True else: - is_new_mmap = True + is_set = False + # Seek back the memory map to the begginging + mem_map.seek(0) + return is_set + + def _set_dirty_bit(self, mem_map_name: str, mem_map): + """ + Sets the dirty bit in the header of the memory map to indicate that this memory map is not + new anymore. + """ + # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) - return is_new_mmap + # Set the dirty bit + mem_map.write(consts.DIRTY_BIT_SET) + # Seek back the memory map to the begginging + mem_map.seek(0) \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_factory.py b/azure_functions_worker/mmap_handler/file_accessor_factory.py index ce94c8def..4af13a7d3 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_factory.py +++ b/azure_functions_worker/mmap_handler/file_accessor_factory.py @@ -2,14 +2,18 @@ # Licensed under the MIT License. import os -from .file_accessor_linux import FileAccessorLinux +from .file_accessor_unix import FileAccessorUnix from .file_accessor_windows import FileAccessorWindows class FileAccessorFactory: + """ + For creating the platform-appropriate instance of FileAccessor to perform memory map related + operations. + """ @staticmethod def create_file_accessor(): - if os.name == 'posix': - return FileAccessorLinux() + if os.name == 'nt': + return FileAccessorWindows() else: - return FileAccessorWindows() \ No newline at end of file + return FileAccessorUnix() \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_linux.py b/azure_functions_worker/mmap_handler/file_accessor_linux.py deleted file mode 100644 index 0f578739f..000000000 --- a/azure_functions_worker/mmap_handler/file_accessor_linux.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import os -import mmap -import urllib.parse -from typing import Optional -from .memorymappedfile_constants import MemoryMappedFileConstants as consts -from .file_accessor import FileAccessor - - -class FileAccessorLinux(FileAccessor): - """ - For accessing memory maps. - This implements the FileAccessor interface for Linux. - """ - def open_mem_map(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: - try: - fd = self._open_mem_map_file(map_name) - mem_map = mmap.mmap(fd.fileno(), map_size, access=access) - mem_map.seek(0) - return mem_map - except Exception as e: - # TODO Log Error - print(e) - return None - - def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: - fd = self._create_mem_map_file(map_name, map_size) - mem_map = mmap.mmap(fd, map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) - if not self._verify_new_map_created(map_name, mem_map): - raise Exception("Memory map '%s' already exists" % (map_name)) - return mem_map - - def delete_mem_map(self, map_name: str, mem_map: mmap.mmap) -> bool: - try: - fd = self._open_mem_map_file(map_name) - os.remove(fd.name) - except FileNotFoundError: - # TODO log debug - return False - mem_map.close() - return True - - def _open_mem_map_file(self, map_name: str): - """ - Get the file descriptor of an existing memory map. - """ - escaped_map_name = urllib.parse.quote_plus(map_name) - for mmap_temp_dir in consts.TEMP_DIRS: - filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) - try: - fd = open(filename, "r+b") - return fd - except FileNotFoundError: - # TODO log debug - pass - raise FileNotFoundError("File for '%s' does not exist" % (map_name)) - - def _create_mem_map_dir(self): - """ - Create a directory to create memory maps. - """ - for mmap_temp_dir in consts.TEMP_DIRS: - dirname = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) - if os.path.isdir(dirname): - # One of the directories already exists, no need - return - try: - os.makedirs(dirname) - return - except Exception as ex: - print("Cannot create dir '%s': %s" % (dirname, str(ex))) - - def _create_mem_map_file(self, map_name: str, map_size: int): - """ - Get the file descriptor for a new memory map. - """ - escaped_map_name = urllib.parse.quote_plus(map_name) - dir_exists = False - for mmap_temp_dir in consts.TEMP_DIRS: - # Check if the file already exists - filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) - if os.path.exists(filename): - raise Exception("File '%s' for memory map '%s' already exists" % - (filename, map_name)) - # Check if the parent directory exists - dir_name = "%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX) - if os.path.isdir(dir_name): - dir_exists = True - # Check if any of the parent directories exists - if not dir_exists: - self._create_mem_map_dir() - # Create the file - for mmap_temp_dir in consts.TEMP_DIRS: - filename = "%s/%s/%s" % (mmap_temp_dir, consts.TEMP_DIR_SUFFIX, escaped_map_name) - try: - fd = os.open(filename, os.O_CREAT | os.O_TRUNC | os.O_RDWR) - # Write 0s to allocate - bytes_written = os.write(fd, b'\x00' * map_size) - if bytes_written != map_size: - print("Cannot write 0s into new memory map file '%s': %d != %d" % - (filename, bytes_written, map_size)) - return fd - except Exception as ex: - print("Cannot create memory map file '%s': %s" % (filename, ex)) - raise Exception("Cannot create memory map file for '%s'" % (map_name)) \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_unix.py b/azure_functions_worker/mmap_handler/file_accessor_unix.py new file mode 100644 index 000000000..5da4e3184 --- /dev/null +++ b/azure_functions_worker/mmap_handler/file_accessor_unix.py @@ -0,0 +1,128 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from __future__ import annotations +import os +import mmap +from typing import Optional +from io import BufferedRandom +from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .file_accessor import FileAccessor +from ..logging import logger + + +class FileAccessorUnix(FileAccessor): + """ + For accessing memory maps. + This implements the FileAccessor interface for Unix platforms. + """ + def open_mem_map( + self, + mem_map_name: str, + mem_map_size: int, + access: int) -> Optional[mmap.mmap]: + fd = self._open_mem_map_file(mem_map_name) + if fd is None: + return None + mem_map = mmap.mmap(fd.fileno(), mem_map_size, access=access) + return mem_map + + def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap.mmap]: + fd = self._create_mem_map_file(mem_map_name, mem_map_size) + if fd is None: + return None + mem_map = mmap.mmap(fd, mem_map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) + if self._is_dirty_bit_set(mem_map_name, mem_map): + raise Exception(f'Memory map {mem_map_name} already exists') + self._set_dirty_bit(mem_map_name, mem_map) + return mem_map + + def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: + try: + fd = self._open_mem_map_file(mem_map_name) + os.remove(fd.name) + except Exception as e: + # In this case, we don't want to fail right away but log that deletion was unsuccessful. + # These logs can help identify if we may be leaking memory and not cleaning up the + # created memory maps. + logger.error(f'Cannot delete memory map {mem_map_name} - {e}', exc_info=True) + return False + mem_map.close() + return True + + def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: + """ + Get the file descriptor of an existing memory map. + Returns the BufferedRandom stream to the file. + """ + # Iterate over all the possible directories where the memory map could be present and try + # to open it. + for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: + file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + try: + fd = open(file_path, 'r+b') + return fd + except FileNotFoundError: + pass + # The memory map was not found in any of the known directories + logger.warn(f'Cannot open memory map {mem_map_name}') + return None + + def _create_mem_map_dir(self): + """ + Create a directory to create memory maps. + """ + # Iterate over all the possible directories where the memory map could be created and try + # to create in one of them. + for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: + dir_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) + if os.path.isdir(dir_path): + # One of the directories already exists, no need + return + try: + os.makedirs(dir_path) + return + except: + # We try to create a directory in each of the applicable directory paths until we + # successfully create one or one that already exists is found. + # Even if this fails, we keep trying others. + pass + # Could not create a directory in any of the applicable directory paths. + # We will not be able to create any memory maps so we fail. + raise Exception(f'Cannot create directory for memory maps') + + def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) -> Optional[int]: + """ + Get the file descriptor for a new memory map. + Returns the file descriptor. + """ + dir_exists = False + for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: + # Check if the file already exists + file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + if os.path.exists(file_path): + raise Exception(f'File {file_path} for memory map {mem_map_name} already exists') + # Check if the parent directory exists + dir_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFI) + if os.path.isdir(dir_path): + dir_exists = True + # Check if any of the parent directories exists + if not dir_exists: + self._create_mem_map_dir() + # Create the file + for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: + file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + try: + fd = os.open(file_path, os.O_CREAT | os.O_TRUNC | os.O_RDWR) + # Write 0s to allocate + # TODO use truncate here instead of zeroeing out the memory + bytes_written = os.write(fd, b'\x00' * mem_mem_map_size) + if bytes_written != mem_mem_map_size: + raise Exception( + f'Cannot write 0s into new memory map {file_path} ' + f'({bytes_written} != {mem_mem_map_size})') + return fd + except: + pass + logger.warn(f'Cannot create memory map {mem_map_name} with size {mem_mem_map_size}') + return None \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/mmap_handler/file_accessor_windows.py index 22c6f4172..b81d46b75 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_windows.py +++ b/azure_functions_worker/mmap_handler/file_accessor_windows.py @@ -1,9 +1,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations import mmap from typing import Optional from .file_accessor import FileAccessor +from ..logging import logger class FileAccessorWindows(FileAccessor): @@ -11,23 +13,28 @@ class FileAccessorWindows(FileAccessor): For accessing memory maps. This implements the FileAccessor interface for Windows. """ - def open_mem_map(self, map_name: str, map_size: int , access: int) -> Optional[mmap.mmap]: + def open_mem_map( + self, + mem_map_name: str, + mem_map_size: int, + access: int) -> Optional[mmap.mmap]: try: - mmap_ret = mmap.mmap(-1, map_size, map_name, access=access) - mmap_ret.seek(0) + mmap_ret = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) return mmap_ret except Exception as e: - # TODO Log Error - print(e) + logger.warn(f'Cannot open memory map {mem_map_name} with size {mem_map_size} - {e}') return None - def create_mem_map(self, map_name: str, map_size: int) -> Optional[mmap.mmap]: + def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap.mmap]: # Windows also creates the mmap when trying to open it, if it does not already exist. - mem_map = self.open_mem_map(map_name, map_size, mmap.ACCESS_WRITE) - if not self._verify_new_map_created(map_name, mem_map): - raise Exception("Memory map '%s' already exists" % (map_name)) + mem_map = self.open_mem_map(mem_map_name, mem_map_size, mmap.ACCESS_WRITE) + if mem_map is None: + return None + if self._is_dirty_bit_set(mem_map_name, mem_map): + raise Exception(f'Cannot create memory map {mem_map_name} as it already exists') + self._set_dirty_bit(mem_map_name, mem_map) return mem_map - def delete_mem_map(self, map_name: str, mmap) -> bool: + def delete_mem_map(self, mem_map_name: str, mmap) -> bool: mmap.close() return True diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py index dde98cdb5..61be42195 100644 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ b/azure_functions_worker/mmap_handler/file_reader.py @@ -25,70 +25,67 @@ def _bytes_to_long(self, input_bytes) -> int: """ return struct.unpack(" Optional[int]: + def _get_content_length(self, mem_map_name) -> Optional[int]: """ Read the header of the memory map to determine the length of content contained in that memory map. Returns the content length as a non-negative integer if successful, None otherwise. """ - try: - map_content_length = self.file_accessor.open_mem_map( - map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) - except FileNotFoundError: - return None - if map_content_length is None: + mem_map_content_length = self.file_accessor.open_mem_map( + mem_map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) + if mem_map_content_length is None: return None try: - header_bytes = map_content_length.read(consts.CONTENT_HEADER_TOTAL_BYTES) + mem_map_content_length.seek(consts.DIRTY_BIT_FLAG_NUM_BYTES) + header_bytes = mem_map_content_length.read(consts.CONTENT_LENGTH_NUM_BYTES) content_length = self._bytes_to_long(header_bytes) return content_length - except ValueError as value_error: - print("Cannot get content length for memory map '%s': %s" % (map_name, value_error)) - return None finally: - map_content_length.close() + mem_map_content_length.close() - def read_content_as_bytes(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[bytes]: + def read_content_as_bytes( + self, + mem_map_name: str, + content_offset: int = 0, + bytes_to_read: int = 0) -> Optional[bytes]: """ Read content from the memory map with the given name and starting at the given offset. content_offset = 0 means read from the beginning of the content. bytes_to_read = 0 means read the entire content. Returns the content as bytes if successful, None otherwise. """ - content_length = self._get_content_length(map_name) + content_length = self._get_content_length(mem_map_name) if content_length is None: return None - map_length = content_length + consts.CONTENT_HEADER_TOTAL_BYTES + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_length + mem_map_content = self.file_accessor.open_mem_map(mem_map_name, mem_map_size, + mmap.ACCESS_READ) + if mem_map_content is None: + return None try: - map_content = self.file_accessor.open_mem_map(map_name, map_length, mmap.ACCESS_READ) - if map_content is not None: - try: - map_content.seek(consts.CONTENT_HEADER_TOTAL_BYTES) - if content_offset > 0: - map_content.seek(content_offset, os.SEEK_CUR) - if bytes_to_read > 0: - # Read up to the specified number of bytes to read - content = map_content.read(bytes_to_read) - else: - # Read the entire content - content = map_content.read() - return content - except ValueError as value_error: - print("Cannot get content for memory map '%s': %s" % (map_name, value_error)) - finally: - map_content.close() - except FileNotFoundError: - #print("Cannot get content for '%s'" % (map_name)) - pass - # If we cannot get the content return None - return None + mem_map_content.seek(consts.CONTENT_HEADER_TOTAL_BYTES) + if content_offset > 0: + mem_map_content.seek(content_offset, os.SEEK_CUR) + if bytes_to_read > 0: + # Read up to the specified number of bytes to read + content = mem_map_content.read(bytes_to_read) + else: + # Read the entire content + content = mem_map_content.read() + return content + finally: + mem_map_content.close() - def read_content_as_string(self, map_name: str, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[str]: + def read_content_as_string( + self, + mem_map_name: str, + content_offset: int = 0, + bytes_to_read: int = 0) -> Optional[str]: """ Read content from the memory map with the given name and starting at the given offset. Returns the content as a string if successful, None otherwise. """ - content_bytes = self.read_content_as_bytes(map_name, content_offset, bytes_to_read) + content_bytes = self.read_content_as_bytes(mem_map_name, content_offset, bytes_to_read) if content_bytes is None: return None content_str = content_bytes.decode('utf-8') diff --git a/azure_functions_worker/mmap_handler/file_writer.py b/azure_functions_worker/mmap_handler/file_writer.py index 5048279e6..cf1a3f0e2 100644 --- a/azure_functions_worker/mmap_handler/file_writer.py +++ b/azure_functions_worker/mmap_handler/file_writer.py @@ -4,7 +4,6 @@ import sys import mmap from typing import Optional -from typing import Union from .file_accessor_factory import FileAccessorFactory from .memorymappedfile_constants import MemoryMappedFileConstants as consts @@ -19,7 +18,7 @@ class FileWriter: def __init__(self): self.file_accessor = FileAccessorFactory.create_file_accessor() - def create_with_content_bytes(self, map_name: str, content: bytes) -> Optional[mmap.mmap]: + def create_with_content_bytes(self, mem_map_name: str, content: bytes) -> Optional[mmap.mmap]: """ Create a new memory map with the given name and content (as bytes). Returns the newly created memory map if successful, None otherwise. @@ -27,15 +26,17 @@ def create_with_content_bytes(self, map_name: str, content: bytes) -> Optional[m if content is None: return None content_size = len(content) - map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size - mem_map = self.file_accessor.create_mem_map(map_name, map_size) + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size + mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) + if mem_map is None: + return None content_size_bytes = content_size.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) mem_map.write(content_size_bytes) mem_map.write(content) mem_map.flush() return mem_map - def create_with_content_string(self, map_name: str, content: str) -> Optional[mmap.mmap]: + def create_with_content_string(self, mem_map_name: str, content: str) -> Optional[mmap.mmap]: """ Create a new memory map with the given name and content (as a string). Returns the newly created memory map if successful, None otherwise. @@ -43,4 +44,4 @@ def create_with_content_string(self, map_name: str, content: str) -> Optional[mm if content is None: return None content_bytes = content.encode('utf-8') - return self.create_with_content_bytes(map_name, content_bytes) + return self.create_with_content_bytes(mem_map_name, content_bytes) diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py index 8e3c6919a..1a34ff3de 100644 --- a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py +++ b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py @@ -1,17 +1,29 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -# TODO use protobuf to define these constants between C# and Python? + class MemoryMappedFileConstants: - # Directories in Linux where the memory maps can be found - TEMP_DIRS = ["/dev/shm"] - # Suffix for the temp directories containing memory maps - TEMP_DIR_SUFFIX = "AzureFunctions" + # Directories in Unix where the memory maps can be found + UNIX_TEMP_DIRS = ["/dev/shm"] + + # Suffix for the temp directories containing memory maps in Unix + UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" + + # The length of a bool which is the length of the part of the header flag specifying if the + # memory map is already created and used. + # This is to distinguish between new memory maps and ones that were previously created and may + # be in use already. + DIRTY_BIT_FLAG_NUM_BYTES = 1 - # The length of a long which is the length of the header in the content memory map + # The length of a long which is the length of the part of the header specifying content length + # in the memory map. CONTENT_LENGTH_NUM_BYTES = 8 - # The length of the header: content length - CONTENT_HEADER_TOTAL_BYTES = CONTENT_LENGTH_NUM_BYTES + + # The total length of the header + CONTENT_HEADER_TOTAL_BYTES = DIRTY_BIT_FLAG_NUM_BYTES + CONTENT_LENGTH_NUM_BYTES + + # A flag to indicate that the memory map has been created, may be in use and is not new. + DIRTY_BIT_SET = b'\x01' # Zero byte. # E.g. Used to compare the first byte of a newly created memory map against this; if it is a From 72b4446f8bb1066d4a200507af218b79c183f401 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 18 Feb 2021 14:53:09 -0800 Subject: [PATCH 33/76] Following same class structure as the shared memory changes made for the host --- azure_functions_worker/bindings/datumdef.py | 15 +- .../bindings/shared_memory_manager.py | 135 ++++++++++++------ .../bindings/shared_memory_metadata.py | 12 ++ .../mmap_handler/file_accessor.py | 13 +- .../mmap_handler/file_accessor_unix.py | 12 +- .../mmap_handler/file_accessor_windows.py | 6 +- .../mmap_handler/file_reader.py | 92 ------------ .../mmap_handler/file_writer.py | 47 ------ .../memorymappedfile_constants.py | 31 ---- .../mmap_handler/shared_memory_constants.py | 53 +++++++ .../mmap_handler/shared_memory_map.py | 100 +++++++++++++ 11 files changed, 277 insertions(+), 239 deletions(-) create mode 100644 azure_functions_worker/bindings/shared_memory_metadata.py delete mode 100644 azure_functions_worker/mmap_handler/file_reader.py delete mode 100644 azure_functions_worker/mmap_handler/file_writer.py delete mode 100644 azure_functions_worker/mmap_handler/memorymappedfile_constants.py create mode 100644 azure_functions_worker/mmap_handler/shared_memory_constants.py create mode 100644 azure_functions_worker/mmap_handler/shared_memory_map.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index a7bba7e54..b9f9076bc 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -126,25 +126,26 @@ def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> O """ if datum.type == 'bytes': value = datum.value - mem_map_name = shmem_mgr.put_bytes(value) + shared_mem_meta = shmem_mgr.put_bytes(value) data_type = protos.RpcDataType.bytes elif datum.type == 'string': value = datum.value - mem_map_name = shmem_mgr.put_string(value) + shared_mem_meta = shmem_mgr.put_string(value) data_type = protos.RpcDataType.string else: raise NotImplementedError( f'Unsupported datum type ({datum.type}) for shared memory' ) - if mem_map_name is None: + if shared_mem_meta is None: return None - content_size = len(value) shmem = protos.RpcSharedMemory( - name=mem_map_name, + name=shared_mem_meta.mem_map_name, offset=0, - count=content_size, + count=shared_mem_meta.count, type=data_type) - logger.info(f'Wrote {content_size} bytes to memory map {mem_map_name} for data type {data_type}') + logger.info( + f'Wrote {shared_mem_meta.count} bytes to memory map {shared_mem_meta.mem_map_name} ' + f'for data type {data_type}') return shmem def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_manager.py index 88b7643e1..cde6515ca 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_manager.py @@ -2,12 +2,13 @@ # Licensed under the MIT License. from __future__ import annotations +from azure_functions_worker.bindings.shared_memory_metadata import SharedMemoryMetadata import uuid from typing import Dict, Optional from ..logging import logger -from ..mmap_handler.file_writer import FileWriter -from ..mmap_handler.file_reader import FileReader from ..mmap_handler.file_accessor_factory import FileAccessorFactory +from ..mmap_handler.shared_memory_constants import SharedMemoryConstants as consts +from ..mmap_handler.shared_memory_map import SharedMemoryMap from ..utils.common import is_envvar_true from ..constants import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED @@ -25,11 +26,9 @@ def __init__(self): # functions host). # Having a mapping of the name and the memory map is then later used to close a given # memory map by its name, after it has been used. - # key: mem_map_name, val: mmap.mmap - self.allocated_mem_maps: Dict[str, mmap.mmap] = {} + # key: mem_map_name, val: SharedMemoryMap + self.allocated_mem_maps: Dict[str, SharedMemoryMap] = {} self.file_accessor = FileAccessorFactory.create_file_accessor() - self.file_reader = FileReader() - self.file_writer = FileWriter() def is_enabled(self) -> bool: """ @@ -42,24 +41,70 @@ def is_supported(self, datum: Datum) -> bool: """ Whether the given Datum object can be transferred to the functions host using shared memory. + This logic is kept consistent with the host's which can be found in SharedMemoryManager.cs """ if datum.type == 'bytes': - # TODO gochaudh: Check for min size config - # Is there a common place to put configs shared b/w host and worker? - # Env variable? App Setting? - return True + num_bytes = len(datum.value) + if num_bytes >= consts.MIN_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER and \ + num_bytes <= consts.MAX_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER: + return True elif datum.type == 'string': - return True + num_bytes = len(datum.value) * consts.SIZE_OF_CHAR_BYTES + if num_bytes >= consts.MIN_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER and \ + num_bytes <= consts.MAX_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER: + return True return False + def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: + """ + Writes the given bytes into shared memory. + Returns metadata about the shared memory region to which the content was written if + successful, None otherwise. + """ + if content is None: + return None + mem_map_name = str(uuid.uuid4()) + content_length = len(content) + shared_mem_map = self.create(mem_map_name, content_length) + if shared_mem_map is None: + return None + num_bytes_written = shared_mem_map.put_bytes(content) + if num_bytes_written != content_length: + logger.error( + f'Cannot write data into shared memory {mem_map_name} ' + f'({num_bytes_written} != {content_length})') + return None + self.allocated_mem_maps[mem_map_name] = shared_mem_map + return SharedMemoryMetadata(mem_map_name, content_length) + + def put_string(self, content: str) -> Optional[SharedMemoryMetadata]: + """ + Writes the given string into shared memory. + Returns the name of the memory map into which the data was written if succesful, None + otherwise. + """ + if content is None: + return None + content_bytes = content.encode('utf-8') + return self.put_bytes(content_bytes) + def get_bytes(self, mem_map_name: str, offset: int, count: int) -> Optional[bytes]: """ Reads data from the given memory map with the provided name, starting at the provided offset and reading a total of count bytes. Returns the data read from shared memory as bytes if successful, None otherwise. """ - data = self.file_reader.read_content_as_bytes(mem_map_name, offset, count) - return data + if offset != 0: + logger.error(f'Cannot read bytes. Non-zero offset ({offset}) not supported.') + return None + shared_mem_map = self.open(mem_map_name, count) + if shared_mem_map is None: + return None + try: + content = shared_mem_map.get_bytes(content_offset=0, bytes_to_read=count) + finally: + shared_mem_map.dispose(is_delete_file=False) + return content def get_string(self, mem_map_name: str, offset: int, count: int) -> Optional[str]: """ @@ -67,34 +112,11 @@ def get_string(self, mem_map_name: str, offset: int, count: int) -> Optional[str offset and reading a total of count bytes. Returns the data read from shared memory as a string if successful, None otherwise. """ - data = self.file_reader.read_content_as_string(mem_map_name, offset, count) - return data - - def put_bytes(self, data: bytes) -> Optional[str]: - """ - Writes the given bytes into shared memory. - Returns the name of the memory map into which the data was written if successful, None - otherwise. - """ - mem_map_name = str(uuid.uuid4()) - mem_map = self.file_writer.create_with_content_bytes(mem_map_name, data) - if mem_map is None: - return None - self.allocated_mem_maps[mem_map_name] = mem_map - return mem_map_name - - def put_string(self, data: str) -> Optional[str]: - """ - Writes the given string into shared memory. - Returns the name of the memory map into which the data was written if succesful, None - otherwise. - """ - mem_map_name = str(uuid.uuid4()) - mem_map = self.file_writer.create_with_content_string(mem_map_name, data) - if mem_map is None: + content_bytes = self.get_bytes(mem_map_name, offset, count) + if content_bytes is None: return None - self.allocated_mem_maps[mem_map_name] = mem_map - return mem_map_name + content_str = content_bytes.decode('utf-8') + return content_str def free_mem_map(self, mem_map_name: str): """ @@ -104,12 +126,31 @@ def free_mem_map(self, mem_map_name: str): Returns True if the memory map was freed successfully, False otherwise. """ if mem_map_name not in self.allocated_mem_maps: - logger.error('Cannot find shared memory in list of allocations: %s', mem_map_name) + logger.error(f'Cannot find memory map in list of allocations {mem_map_name}') return False - mem_map = self.allocated_mem_maps[mem_map_name] - success = self.file_accessor.delete_mem_map(mem_map_name, mem_map) + shared_mem_map = self.allocated_mem_maps[mem_map_name] + success = shared_mem_map.dispose() del self.allocated_mem_maps[mem_map_name] - if not success: - logger.error('Cannot delete shared memory: %s', mem_map_name) - return False - return True + return success + + def create(self, mem_map_name: str, content_length: int) -> Optional[SharedMemoryMap]: + """ + Creates a new SharedMemoryMap with the given name and content length. + Returns the SharedMemoryMap object if successful, None otherwise. + """ + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_length + mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) + if mem_map is None: + return None + return SharedMemoryMap(self.file_accessor, mem_map_name, mem_map) + + def open(self, mem_map_name: str, content_length: int) -> Optional[SharedMemoryMap]: + """ + Opens an existing SharedMemoryMap with the given name and content length. + Returns the SharedMemoryMap object if successful, None otherwise. + """ + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_length + mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) + if mem_map is None: + return None + return SharedMemoryMap(self.file_accessor, mem_map_name, mem_map) \ No newline at end of file diff --git a/azure_functions_worker/bindings/shared_memory_metadata.py b/azure_functions_worker/bindings/shared_memory_metadata.py new file mode 100644 index 000000000..138aa15b8 --- /dev/null +++ b/azure_functions_worker/bindings/shared_memory_metadata.py @@ -0,0 +1,12 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +class SharedMemoryMetadata: + """ + Information about a shared memory region. + """ + def __init__(self, mem_map_name, count): + # Name of the memory map + self.mem_map_name = mem_map_name + # Number of bytes of content in the memory map + self.count = count \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/mmap_handler/file_accessor.py index 26947f682..a608f1d51 100644 --- a/azure_functions_worker/mmap_handler/file_accessor.py +++ b/azure_functions_worker/mmap_handler/file_accessor.py @@ -2,9 +2,10 @@ # Licensed under the MIT License. from __future__ import annotations +import mmap from abc import ABCMeta, abstractmethod from typing import Optional -from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .shared_memory_constants import SharedMemoryConstants as consts class FileAccessor(metaclass=ABCMeta): @@ -21,7 +22,7 @@ def open_mem_map( self, mem_map_name: str, mem_map_size: int, - access: int) -> Optional[mmap.mmap]: + access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: """ Opens an existing memory map. Returns the opened mmap if successful, None otherwise. @@ -45,7 +46,7 @@ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: """ raise NotImplementedError - def _is_dirty_bit_set(self, mem_map_name: str, mem_map) -> bool: + def _is_mem_map_initialized(self, mem_map: mmap.mmap) -> bool: """ Checks if the dirty bit of the memory map has been set or not. This is used to check if a new memory map was created successfully and we don't end up @@ -56,7 +57,7 @@ def _is_dirty_bit_set(self, mem_map_name: str, mem_map) -> bool: # Read the first byte byte_read = mem_map.read(1) # Check if the dirty bit was set or not - if byte_read == consts.DIRTY_BIT_SET: + if byte_read == consts.MEM_MAP_INITIALIZED_FLAG: is_set = True else: is_set = False @@ -64,7 +65,7 @@ def _is_dirty_bit_set(self, mem_map_name: str, mem_map) -> bool: mem_map.seek(0) return is_set - def _set_dirty_bit(self, mem_map_name: str, mem_map): + def _set_mem_map_initialized(self, mem_map: mmap.mmap): """ Sets the dirty bit in the header of the memory map to indicate that this memory map is not new anymore. @@ -72,6 +73,6 @@ def _set_dirty_bit(self, mem_map_name: str, mem_map): # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) # Set the dirty bit - mem_map.write(consts.DIRTY_BIT_SET) + mem_map.write(consts.MEM_MAP_INITIALIZED_FLAG) # Seek back the memory map to the begginging mem_map.seek(0) \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_unix.py b/azure_functions_worker/mmap_handler/file_accessor_unix.py index 5da4e3184..3c815d9ea 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_unix.py +++ b/azure_functions_worker/mmap_handler/file_accessor_unix.py @@ -6,7 +6,7 @@ import mmap from typing import Optional from io import BufferedRandom -from .memorymappedfile_constants import MemoryMappedFileConstants as consts +from .shared_memory_constants import SharedMemoryConstants as consts from .file_accessor import FileAccessor from ..logging import logger @@ -20,7 +20,7 @@ def open_mem_map( self, mem_map_name: str, mem_map_size: int, - access: int) -> Optional[mmap.mmap]: + access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: fd = self._open_mem_map_file(mem_map_name) if fd is None: return None @@ -32,9 +32,9 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap. if fd is None: return None mem_map = mmap.mmap(fd, mem_map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) - if self._is_dirty_bit_set(mem_map_name, mem_map): + if self._is_mem_map_initialized(mem_map): raise Exception(f'Memory map {mem_map_name} already exists') - self._set_dirty_bit(mem_map_name, mem_map) + self._set_mem_map_initialized(mem_map) return mem_map def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: @@ -65,7 +65,7 @@ def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: except FileNotFoundError: pass # The memory map was not found in any of the known directories - logger.warn(f'Cannot open memory map {mem_map_name}') + logger.error(f'Cannot open memory map {mem_map_name}') return None def _create_mem_map_dir(self): @@ -124,5 +124,5 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) -> Opti return fd except: pass - logger.warn(f'Cannot create memory map {mem_map_name} with size {mem_mem_map_size}') + logger.error(f'Cannot create memory map {mem_map_name} with size {mem_mem_map_size}') return None \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/mmap_handler/file_accessor_windows.py index b81d46b75..d50c6c814 100644 --- a/azure_functions_worker/mmap_handler/file_accessor_windows.py +++ b/azure_functions_worker/mmap_handler/file_accessor_windows.py @@ -17,7 +17,7 @@ def open_mem_map( self, mem_map_name: str, mem_map_size: int, - access: int) -> Optional[mmap.mmap]: + access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: try: mmap_ret = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) return mmap_ret @@ -30,9 +30,9 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap. mem_map = self.open_mem_map(mem_map_name, mem_map_size, mmap.ACCESS_WRITE) if mem_map is None: return None - if self._is_dirty_bit_set(mem_map_name, mem_map): + if self._is_mem_map_initialized(mem_map): raise Exception(f'Cannot create memory map {mem_map_name} as it already exists') - self._set_dirty_bit(mem_map_name, mem_map) + self._set_mem_map_initialized(mem_map) return mem_map def delete_mem_map(self, mem_map_name: str, mmap) -> bool: diff --git a/azure_functions_worker/mmap_handler/file_reader.py b/azure_functions_worker/mmap_handler/file_reader.py deleted file mode 100644 index 61be42195..000000000 --- a/azure_functions_worker/mmap_handler/file_reader.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import mmap -import os -import struct -from typing import Optional -from .memorymappedfile_constants import MemoryMappedFileConstants as consts -from .file_accessor_factory import FileAccessorFactory - - -class FileReader: - """ - For reading data from memory maps in shared memory. - Assumes a particular format when reading data (i.e. particular header before the content). - For writing data that could be read by the FileReader, use FileWriter. - """ - def __init__(self): - self.file_accessor = FileAccessorFactory.create_file_accessor() - - def _bytes_to_long(self, input_bytes) -> int: - """ - Decode a set of bytes representing a long. - This uses the format that the functions host (i.e. C#) uses. - """ - return struct.unpack(" Optional[int]: - """ - Read the header of the memory map to determine the length of content contained in that - memory map. - Returns the content length as a non-negative integer if successful, None otherwise. - """ - mem_map_content_length = self.file_accessor.open_mem_map( - mem_map_name, consts.CONTENT_HEADER_TOTAL_BYTES, mmap.ACCESS_READ) - if mem_map_content_length is None: - return None - try: - mem_map_content_length.seek(consts.DIRTY_BIT_FLAG_NUM_BYTES) - header_bytes = mem_map_content_length.read(consts.CONTENT_LENGTH_NUM_BYTES) - content_length = self._bytes_to_long(header_bytes) - return content_length - finally: - mem_map_content_length.close() - - def read_content_as_bytes( - self, - mem_map_name: str, - content_offset: int = 0, - bytes_to_read: int = 0) -> Optional[bytes]: - """ - Read content from the memory map with the given name and starting at the given offset. - content_offset = 0 means read from the beginning of the content. - bytes_to_read = 0 means read the entire content. - Returns the content as bytes if successful, None otherwise. - """ - content_length = self._get_content_length(mem_map_name) - if content_length is None: - return None - mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_length - mem_map_content = self.file_accessor.open_mem_map(mem_map_name, mem_map_size, - mmap.ACCESS_READ) - if mem_map_content is None: - return None - try: - mem_map_content.seek(consts.CONTENT_HEADER_TOTAL_BYTES) - if content_offset > 0: - mem_map_content.seek(content_offset, os.SEEK_CUR) - if bytes_to_read > 0: - # Read up to the specified number of bytes to read - content = mem_map_content.read(bytes_to_read) - else: - # Read the entire content - content = mem_map_content.read() - return content - finally: - mem_map_content.close() - - def read_content_as_string( - self, - mem_map_name: str, - content_offset: int = 0, - bytes_to_read: int = 0) -> Optional[str]: - """ - Read content from the memory map with the given name and starting at the given offset. - Returns the content as a string if successful, None otherwise. - """ - content_bytes = self.read_content_as_bytes(mem_map_name, content_offset, bytes_to_read) - if content_bytes is None: - return None - content_str = content_bytes.decode('utf-8') - return content_str \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/file_writer.py b/azure_functions_worker/mmap_handler/file_writer.py deleted file mode 100644 index cf1a3f0e2..000000000 --- a/azure_functions_worker/mmap_handler/file_writer.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import sys -import mmap -from typing import Optional -from .file_accessor_factory import FileAccessorFactory -from .memorymappedfile_constants import MemoryMappedFileConstants as consts - - -class FileWriter: - """ - For writing data into memory maps in shared memory. - Follows a particular format for writing data (i.e. particular header before appending the - content). - For reading data as written by the FileWriter, use the FileReader class. - """ - def __init__(self): - self.file_accessor = FileAccessorFactory.create_file_accessor() - - def create_with_content_bytes(self, mem_map_name: str, content: bytes) -> Optional[mmap.mmap]: - """ - Create a new memory map with the given name and content (as bytes). - Returns the newly created memory map if successful, None otherwise. - """ - if content is None: - return None - content_size = len(content) - mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size - mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) - if mem_map is None: - return None - content_size_bytes = content_size.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) - mem_map.write(content_size_bytes) - mem_map.write(content) - mem_map.flush() - return mem_map - - def create_with_content_string(self, mem_map_name: str, content: str) -> Optional[mmap.mmap]: - """ - Create a new memory map with the given name and content (as a string). - Returns the newly created memory map if successful, None otherwise. - """ - if content is None: - return None - content_bytes = content.encode('utf-8') - return self.create_with_content_bytes(mem_map_name, content_bytes) diff --git a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py b/azure_functions_worker/mmap_handler/memorymappedfile_constants.py deleted file mode 100644 index 1a34ff3de..000000000 --- a/azure_functions_worker/mmap_handler/memorymappedfile_constants.py +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - - -class MemoryMappedFileConstants: - # Directories in Unix where the memory maps can be found - UNIX_TEMP_DIRS = ["/dev/shm"] - - # Suffix for the temp directories containing memory maps in Unix - UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" - - # The length of a bool which is the length of the part of the header flag specifying if the - # memory map is already created and used. - # This is to distinguish between new memory maps and ones that were previously created and may - # be in use already. - DIRTY_BIT_FLAG_NUM_BYTES = 1 - - # The length of a long which is the length of the part of the header specifying content length - # in the memory map. - CONTENT_LENGTH_NUM_BYTES = 8 - - # The total length of the header - CONTENT_HEADER_TOTAL_BYTES = DIRTY_BIT_FLAG_NUM_BYTES + CONTENT_LENGTH_NUM_BYTES - - # A flag to indicate that the memory map has been created, may be in use and is not new. - DIRTY_BIT_SET = b'\x01' - - # Zero byte. - # E.g. Used to compare the first byte of a newly created memory map against this; if it is a - # non-zero byte then the memory map was already created. - ZERO_BYTE = b'\x00' diff --git a/azure_functions_worker/mmap_handler/shared_memory_constants.py b/azure_functions_worker/mmap_handler/shared_memory_constants.py new file mode 100644 index 000000000..f95cd0753 --- /dev/null +++ b/azure_functions_worker/mmap_handler/shared_memory_constants.py @@ -0,0 +1,53 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + + +class SharedMemoryConstants: + # Directories in Unix where the memory maps can be found + UNIX_TEMP_DIRS = ["/dev/shm"] + + # Suffix for the temp directories containing memory maps in Unix + UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" + + # The length of a bool which is the length of the part of the header flag specifying if the + # memory map is already created and used. + # This is to distinguish between new memory maps and ones that were previously created and may + # be in use already. + MEM_MAP_INITIALIZED_FLAG_NUM_BYTES = 1 + + # The length of a long which is the length of the part of the header specifying content length + # in the memory map. + CONTENT_LENGTH_NUM_BYTES = 8 + + # The total length of the header + CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + CONTENT_LENGTH_NUM_BYTES + + # A flag to indicate that the memory map has been initialized, may be in use and is not new. + # This represents a boolean value of True. + MEM_MAP_INITIALIZED_FLAG = b'\x01' + + # A flag to indicate that the memory map has not yet been initialized. + # This represents a boolean value of False. + MEM_MAP_UNINITIALIZED_FLAG = b'\x00' + + # Minimum size (in number of bytes) an object must be in order for it to be transferred over + # shared memory. + # If the object is smaller than this, gRPC is used. + # Note: This needs to be consistent among the host and workers. + # e.g. in the host, it is defined in SharedMemoryConstants.cs + MIN_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER = 1024 * 1024 # 1 MB + + # Maximum size (in number of bytes) an object must be in order for it to be transferred over + # shared memory. + # This limit is imposed because initializing objects like greater than 2GB is not allowed in + # DotNet. + # Ref: https://stackoverflow.com/a/3944336/3132415 + # Note: This needs to be consistent among the host and workers. + # e.g. in the host, it is defined in SharedMemoryConstants.cs + MAX_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER = 1024 * 1024 # 1 MB + + # This is what the size of a character is in DotNet. Can be verified by doing "sizeof(char)". + # To keep the limits consistent, when determining if a string can be transferred over shared + # memory, we multiply the number of characters by this constant. + # Corresponding logic in the host can be found in SharedMemoryManager.cs + SIZE_OF_CHAR_BYTES = 2 \ No newline at end of file diff --git a/azure_functions_worker/mmap_handler/shared_memory_map.py b/azure_functions_worker/mmap_handler/shared_memory_map.py new file mode 100644 index 000000000..6768df47c --- /dev/null +++ b/azure_functions_worker/mmap_handler/shared_memory_map.py @@ -0,0 +1,100 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from __future__ import annotations +import mmap +import os +import struct +import sys +from typing import Optional +from .shared_memory_constants import SharedMemoryConstants as consts +from ..logging import logger + + +class SharedMemoryMap: + """ + Shared memory region to read/write data from. + """ + def __init__(self, file_accessor: FileAccessor, mem_map_name: str, mem_map: mmap.mmap): + self.file_accessor = file_accessor + self.mem_map_name = mem_map_name + self.mem_map = mem_map + + def put_bytes(self, content: bytes) -> int: + """ + Writes the given content bytes into this SharedMemoryMap. + The number of bytes written must be less than or equal to the size of the SharedMemoryMap. + Returns the number of bytes of content written. + """ + if content is None: + return None + content_length = len(content) + # Seek past the MemoryMapInitialized flag section of the header + self.mem_map.seek(consts.MEM_MAP_INITIALIZED_FLAG_NUM_BYTES) + # Write the content length into the header + content_length_bytes = content_length.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, + byteorder=sys.byteorder) + num_content_length_bytes = len(content_length_bytes) + num_content_length_bytes_written = self.mem_map.write(content_length_bytes) + if num_content_length_bytes_written != num_content_length_bytes: + logger.error( + f'Cannot write content size into memory map {self.mem_map_name} ' + f'({num_content_length_bytes_written} != {num_content_length_bytes})') + return 0 + # Write the content + num_content_bytes_written = self.mem_map.write(content) + self.mem_map.flush() + return num_content_bytes_written + + def get_bytes(self, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[bytes]: + """ + Read content from this SharedMemoryMap with the given name and starting at the given offset. + content_offset = 0 means read from the beginning of the content. + bytes_to_read = 0 means read the entire content. + Returns the content as bytes if successful, None otherwise. + """ + content_length = self._get_content_length() + if content_length is None: + return None + # Seek past the header and get to the content + self.mem_map.seek(consts.CONTENT_HEADER_TOTAL_BYTES) + if content_offset > 0: + self.mem_map.seek(content_offset, os.SEEK_CUR) + if bytes_to_read > 0: + # Read up to the specified number of bytes to read + content = self.mem_map.read(bytes_to_read) + else: + # Read the entire content + content = self.mem_map.read() + return content + + def dispose(self, is_delete_file: bool = True) -> bool: + """ + Close the underlying memory map. + Returns True if the resources were disposed, False otherwise. + """ + success = True + if is_delete_file: + if not self.file_accessor.delete_mem_map(self.mem_map_name, self.mem_map): + success = False + mem_map = self.mem_map + mem_map.close() + return success + + def _bytes_to_long(self, input_bytes) -> int: + """ + Decode a set of bytes representing a long. + This uses the format that the functions host (i.e. C#) uses. + """ + return struct.unpack(" Optional[int]: + """ + Read the header of the memory map to determine the length of content contained in that + memory map. + Returns the content length as a non-negative integer if successful, None otherwise. + """ + self.mem_map.seek(consts.MEM_MAP_INITIALIZED_FLAG_NUM_BYTES) + header_bytes = self.mem_map.read(consts.CONTENT_LENGTH_NUM_BYTES) + content_length = self._bytes_to_long(header_bytes) + return content_length \ No newline at end of file From afa115b312a129ddbdf4f3dd8cdf635be26ece16 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 18 Feb 2021 16:49:47 -0800 Subject: [PATCH 34/76] Moving shared memory data transfer related changes into separate directory --- azure_functions_worker/bindings/datumdef.py | 13 ++++++++++--- azure_functions_worker/bindings/meta.py | 2 +- azure_functions_worker/dispatcher.py | 2 +- .../__init__.py | 0 .../file_accessor.py | 0 .../file_accessor_factory.py | 0 .../file_accessor_unix.py | 0 .../file_accessor_windows.py | 0 .../shared_memory_constants.py | 0 .../shared_memory_manager.py | 8 ++++---- .../shared_memory_map.py | 1 + .../shared_memory_metadata.py | 0 12 files changed, 17 insertions(+), 9 deletions(-) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/__init__.py (100%) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/file_accessor.py (100%) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/file_accessor_factory.py (100%) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/file_accessor_unix.py (100%) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/file_accessor_windows.py (100%) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/shared_memory_constants.py (100%) rename azure_functions_worker/{bindings => shared_memory_data_transfer}/shared_memory_manager.py (95%) rename azure_functions_worker/{mmap_handler => shared_memory_data_transfer}/shared_memory_map.py (99%) rename azure_functions_worker/{bindings => shared_memory_data_transfer}/shared_memory_metadata.py (100%) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index b9f9076bc..5aa6ffd3b 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -95,7 +95,10 @@ def from_typed_data(cls, td: protos.TypedData): return cls(val, tt) @classmethod - def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: SharedMemoryManager) -> Optional[Datum]: + def from_rpc_shared_memory( + cls, + shmem: protos.RpcSharedMemory, + shmem_mgr: SharedMemoryManager) -> Optional[Datum]: """ Reads the specified shared memory region and converts the read data into a datum object of the corresponding type. @@ -114,12 +117,16 @@ def from_rpc_shared_memory(cls, shmem: protos.RpcSharedMemory, shmem_mgr: Shared if val is not None: ret_val = cls(val, 'string') if ret_val is not None: - logger.info(f'Read {count} bytes from memory map {mem_map_name} for data type {data_type}') + logger.info( + f'Read {count} bytes from memory map {mem_map_name} for data type {data_type}') return ret_val return None @classmethod - def to_rpc_shared_memory(cls, datum: Datum, shmem_mgr: SharedMemoryManager) -> Optional[protos.RpcSharedMemory]: + def to_rpc_shared_memory( + cls, + datum: Datum, + shmem_mgr: SharedMemoryManager) -> Optional[protos.RpcSharedMemory]: """ Writes the given value to shared memory and returns the corresponding RpcSharedMemory object which can be sent back to the functions host over RPC. diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 4d5d39805..2bcde4403 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -7,7 +7,7 @@ from . import datumdef from . import generic -from .shared_memory_manager import SharedMemoryManager +from ..shared_memory_data_transfer.shared_memory_manager import SharedMemoryManager def get_binding_registry(): diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 03f2f3da1..f948d9592 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -33,7 +33,7 @@ from .utils.tracing import marshall_exception_trace from .utils.dependency import DependencyManager from .utils.wrappers import disable_feature_by -from .bindings.shared_memory_manager import SharedMemoryManager +from .shared_memory_data_transfer.shared_memory_manager import SharedMemoryManager _TRUE = "true" diff --git a/azure_functions_worker/mmap_handler/__init__.py b/azure_functions_worker/shared_memory_data_transfer/__init__.py similarity index 100% rename from azure_functions_worker/mmap_handler/__init__.py rename to azure_functions_worker/shared_memory_data_transfer/__init__.py diff --git a/azure_functions_worker/mmap_handler/file_accessor.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py similarity index 100% rename from azure_functions_worker/mmap_handler/file_accessor.py rename to azure_functions_worker/shared_memory_data_transfer/file_accessor.py diff --git a/azure_functions_worker/mmap_handler/file_accessor_factory.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py similarity index 100% rename from azure_functions_worker/mmap_handler/file_accessor_factory.py rename to azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py diff --git a/azure_functions_worker/mmap_handler/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py similarity index 100% rename from azure_functions_worker/mmap_handler/file_accessor_unix.py rename to azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py diff --git a/azure_functions_worker/mmap_handler/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py similarity index 100% rename from azure_functions_worker/mmap_handler/file_accessor_windows.py rename to azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py diff --git a/azure_functions_worker/mmap_handler/shared_memory_constants.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py similarity index 100% rename from azure_functions_worker/mmap_handler/shared_memory_constants.py rename to azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py diff --git a/azure_functions_worker/bindings/shared_memory_manager.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py similarity index 95% rename from azure_functions_worker/bindings/shared_memory_manager.py rename to azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py index cde6515ca..4627a587c 100644 --- a/azure_functions_worker/bindings/shared_memory_manager.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py @@ -2,13 +2,13 @@ # Licensed under the MIT License. from __future__ import annotations -from azure_functions_worker.bindings.shared_memory_metadata import SharedMemoryMetadata import uuid from typing import Dict, Optional +from .shared_memory_constants import SharedMemoryConstants as consts +from .file_accessor_factory import FileAccessorFactory +from .shared_memory_metadata import SharedMemoryMetadata +from .shared_memory_map import SharedMemoryMap from ..logging import logger -from ..mmap_handler.file_accessor_factory import FileAccessorFactory -from ..mmap_handler.shared_memory_constants import SharedMemoryConstants as consts -from ..mmap_handler.shared_memory_map import SharedMemoryMap from ..utils.common import is_envvar_true from ..constants import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED diff --git a/azure_functions_worker/mmap_handler/shared_memory_map.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py similarity index 99% rename from azure_functions_worker/mmap_handler/shared_memory_map.py rename to azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py index 6768df47c..b8a0e95fd 100644 --- a/azure_functions_worker/mmap_handler/shared_memory_map.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py @@ -8,6 +8,7 @@ import sys from typing import Optional from .shared_memory_constants import SharedMemoryConstants as consts +from .file_accessor import FileAccessor from ..logging import logger diff --git a/azure_functions_worker/bindings/shared_memory_metadata.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py similarity index 100% rename from azure_functions_worker/bindings/shared_memory_metadata.py rename to azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py From 26e2919904e2d5aadf8b8330f85ccfb4e64d2aea Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 18 Feb 2021 17:14:13 -0800 Subject: [PATCH 35/76] Input error checks for shared memory map ctor --- .../shared_memory_data_transfer/shared_memory_map.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py index b8a0e95fd..87f67e147 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py @@ -17,6 +17,10 @@ class SharedMemoryMap: Shared memory region to read/write data from. """ def __init__(self, file_accessor: FileAccessor, mem_map_name: str, mem_map: mmap.mmap): + if mem_map is None: + raise Exception(f'Cannot initialize SharedMemoryMap. Invalid memory map provided') + if mem_map_name is None or mem_map_name == '': + raise Exception(f'Cannot initialize SharedMemoryMap. Invalid name {mem_map_name}') self.file_accessor = file_accessor self.mem_map_name = mem_map_name self.mem_map = mem_map From 6001f6c1b49a8dc6b9b4355010b92e86aa12dac6 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 18 Feb 2021 17:58:32 -0800 Subject: [PATCH 36/76] Rebase fix --- azure_functions_worker/bindings/datumdef.py | 11 ++--------- azure_functions_worker/bindings/meta.py | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 5aa6ffd3b..f0dfe1430 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -155,8 +155,7 @@ def to_rpc_shared_memory( f'for data type {data_type}') return shmem -def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, - invocation_id: str) -> protos.TypedData: +def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) elif datum.type == 'bytes': @@ -173,13 +172,7 @@ def datum_as_proto(datum: Datum, shmem_mgr: SharedMemoryManager, enable_content_negotiation=False, body=datum_as_proto(datum.value['body']), )) -<<<<<<< HEAD - raise NotImplementedError( - 'unexpected Datum type: {!r}'.format(datum.type) - ) -======= else: raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) - ) ->>>>>>> Cleaning up, addressing comments + ) \ No newline at end of file diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 2bcde4403..24d35cd73 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -137,4 +137,4 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, parameter_binding = protos.ParameterBinding( name=out_name, data=rpc_val) - return parameter_binding + return parameter_binding \ No newline at end of file From 289255c58f1851ddc07c180a238c95c64f17c6c4 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Thu, 18 Feb 2021 19:39:16 -0800 Subject: [PATCH 37/76] Trying to make lint happy --- azure_functions_worker/bindings/__init__.py | 4 +- azure_functions_worker/bindings/datumdef.py | 2 +- azure_functions_worker/dispatcher.py | 16 ++-- .../file_accessor.py | 22 +++-- .../file_accessor_factory.py | 4 +- .../file_accessor_unix.py | 53 ++++++---- .../file_accessor_windows.py | 18 ++-- .../shared_memory_constants.py | 42 ++++---- .../shared_memory_manager.py | 96 +++++++++++-------- .../shared_memory_map.py | 50 ++++++---- tests/unittests/test_file_accessor.py | 17 ++++ 11 files changed, 199 insertions(+), 125 deletions(-) create mode 100644 tests/unittests/test_file_accessor.py diff --git a/azure_functions_worker/bindings/__init__.py b/azure_functions_worker/bindings/__init__.py index d0a268031..bfc9070c0 100644 --- a/azure_functions_worker/bindings/__init__.py +++ b/azure_functions_worker/bindings/__init__.py @@ -6,7 +6,8 @@ from .meta import check_output_type_annotation from .meta import has_implicit_output from .meta import is_trigger_binding -from .meta import from_incoming_proto, to_outgoing_proto, to_outgoing_param_binding +from .meta import from_incoming_proto, to_outgoing_proto, \ + to_outgoing_param_binding from .out import Out @@ -16,4 +17,5 @@ 'check_input_type_annotation', 'check_output_type_annotation', 'has_implicit_output', 'from_incoming_proto', 'to_outgoing_proto', 'TraceContext', + 'to_outgoing_param_binding' ) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index f0dfe1430..895d1126e 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -1,11 +1,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations from typing import Any, Optional import json from .. import protos from ..logging import logger +from ..shared_memory_data_transfer.shared_memory_manager import SharedMemoryManager class Datum: diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index f948d9592..5533a2ef4 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -33,7 +33,8 @@ from .utils.tracing import marshall_exception_trace from .utils.dependency import DependencyManager from .utils.wrappers import disable_feature_by -from .shared_memory_data_transfer.shared_memory_manager import SharedMemoryManager +from .shared_memory_data_transfer.shared_memory_manager import \ + SharedMemoryManager _TRUE = "true" @@ -488,14 +489,16 @@ async def _handle__function_environment_reload_request(self, req): async def _handle__close_shared_memory_resources_request(self, req): """ - Frees any memory maps that were produced as output for a given invocation. - This is called after the functions host is done reading the output from the worker and - wants the worker to free up those resources. + Frees any memory maps that were produced as output for a given + invocation. + This is called after the functions host is done reading the output from + the worker and wants the worker to free up those resources. """ close_request = req.close_shared_memory_resources_request map_names = close_request.map_names # Assign default value of False to all result values. - # If we are successfully able to close a memory map, its result will be set to True. + # If we are successfully able to close a memory map, its result will be + # set to True. results = {map_name: False for map_name in map_names} try: @@ -506,7 +509,8 @@ async def _handle__close_shared_memory_resources_request(self, req): # TODO log exception print(str(ex)) finally: - response = protos.CloseSharedMemoryResourcesResponse(close_map_results=results) + response = protos.CloseSharedMemoryResourcesResponse( + close_map_results=results) return protos.StreamingMessage( request_id=self.request_id, close_shared_memory_resources_response=response) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py index a608f1d51..eb586ed86 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations import mmap from abc import ABCMeta, abstractmethod from typing import Optional @@ -11,8 +10,8 @@ class FileAccessor(metaclass=ABCMeta): """ For accessing memory maps. - This is an interface that must be implemented by sub-classes to provide platform-specific - support for accessing memory maps. + This is an interface that must be implemented by sub-classes to provide + platform-specific support for accessing memory maps. Currently the following two sub-classes are implemented: 1) FileAccessorWindows 2) FileAccessorUnix @@ -30,7 +29,8 @@ def open_mem_map( raise NotImplementedError @abstractmethod - def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap.mmap]: + def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ + -> Optional[mmap.mmap]: """ Creates a new memory map. Returns the created mmap if successful, None otherwise. @@ -41,16 +41,18 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap. def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: """ Deletes the memory map and any backing resources associated with it. - If there is no memory map with the given name, then no action is performed. - Returns True if the memory map was successfully deleted, False otherwise. + If there is no memory map with the given name, then no action is + performed. + Returns True if the memory map was successfully deleted, False + otherwise. """ raise NotImplementedError def _is_mem_map_initialized(self, mem_map: mmap.mmap) -> bool: """ Checks if the dirty bit of the memory map has been set or not. - This is used to check if a new memory map was created successfully and we don't end up - using an existing one. + This is used to check if a new memory map was created successfully and + we don't end up using an existing one. """ # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) @@ -67,8 +69,8 @@ def _is_mem_map_initialized(self, mem_map: mmap.mmap) -> bool: def _set_mem_map_initialized(self, mem_map: mmap.mmap): """ - Sets the dirty bit in the header of the memory map to indicate that this memory map is not - new anymore. + Sets the dirty bit in the header of the memory map to indicate that this + memory map is not new anymore. """ # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py index 4af13a7d3..4a9c03275 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py @@ -8,8 +8,8 @@ class FileAccessorFactory: """ - For creating the platform-appropriate instance of FileAccessor to perform memory map related - operations. + For creating the platform-appropriate instance of FileAccessor to perform + memory map related operations. """ @staticmethod def create_file_accessor(): diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index 3c815d9ea..17ea51154 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations import os import mmap from typing import Optional @@ -27,7 +26,8 @@ def open_mem_map( mem_map = mmap.mmap(fd.fileno(), mem_map_size, access=access) return mem_map - def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap.mmap]: + def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ + -> Optional[mmap.mmap]: fd = self._create_mem_map_file(mem_map_name, mem_map_size) if fd is None: return None @@ -42,10 +42,12 @@ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: fd = self._open_mem_map_file(mem_map_name) os.remove(fd.name) except Exception as e: - # In this case, we don't want to fail right away but log that deletion was unsuccessful. - # These logs can help identify if we may be leaking memory and not cleaning up the - # created memory maps. - logger.error(f'Cannot delete memory map {mem_map_name} - {e}', exc_info=True) + # In this case, we don't want to fail right away but log that + # deletion was unsuccessful. + # These logs can help identify if we may be leaking memory and not + # cleaning up the created memory maps. + logger.error(f'Cannot delete memory map {mem_map_name} - {e}', + exc_info=True) return False mem_map.close() return True @@ -55,10 +57,11 @@ def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: Get the file descriptor of an existing memory map. Returns the BufferedRandom stream to the file. """ - # Iterate over all the possible directories where the memory map could be present and try - # to open it. + # Iterate over all the possible directories where the memory map could + # be present and try to open it. for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + file_path = os.path.join(mem_map_temp_dir, + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) try: fd = open(file_path, 'r+b') return fd @@ -72,10 +75,11 @@ def _create_mem_map_dir(self): """ Create a directory to create memory maps. """ - # Iterate over all the possible directories where the memory map could be created and try - # to create in one of them. + # Iterate over all the possible directories where the memory map could + # be created and try to create in one of them. for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - dir_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) + dir_path = os.path.join(mem_map_temp_dir, + consts.UNIX_TEMP_DIR_SUFFIX) if os.path.isdir(dir_path): # One of the directories already exists, no need return @@ -83,15 +87,17 @@ def _create_mem_map_dir(self): os.makedirs(dir_path) return except: - # We try to create a directory in each of the applicable directory paths until we - # successfully create one or one that already exists is found. + # We try to create a directory in each of the applicable + # directory paths until we successfully create one or one that + # already exists is found. # Even if this fails, we keep trying others. pass # Could not create a directory in any of the applicable directory paths. # We will not be able to create any memory maps so we fail. raise Exception(f'Cannot create directory for memory maps') - def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) -> Optional[int]: + def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ + -> Optional[int]: """ Get the file descriptor for a new memory map. Returns the file descriptor. @@ -99,11 +105,15 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) -> Opti dir_exists = False for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: # Check if the file already exists - file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + file_path = os.path.join(mem_map_temp_dir, + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) if os.path.exists(file_path): - raise Exception(f'File {file_path} for memory map {mem_map_name} already exists') + raise Exception( + f'File {file_path} for memory map {mem_map_name} ' + f'already exists') # Check if the parent directory exists - dir_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFI) + dir_path = os.path.join(mem_map_temp_dir, + consts.UNIX_TEMP_DIR_SUFFIX) if os.path.isdir(dir_path): dir_exists = True # Check if any of the parent directories exists @@ -111,7 +121,8 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) -> Opti self._create_mem_map_dir() # Create the file for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + file_path = os.path.join(mem_map_temp_dir, + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) try: fd = os.open(file_path, os.O_CREAT | os.O_TRUNC | os.O_RDWR) # Write 0s to allocate @@ -124,5 +135,7 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) -> Opti return fd except: pass - logger.error(f'Cannot create memory map {mem_map_name} with size {mem_mem_map_size}') + logger.error( + f'Cannot create memory map {mem_map_name} with size ' + f'{mem_mem_map_size}') return None \ No newline at end of file diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py index d50c6c814..b4ea69310 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations import mmap from typing import Optional from .file_accessor import FileAccessor @@ -22,16 +21,23 @@ def open_mem_map( mmap_ret = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) return mmap_ret except Exception as e: - logger.warn(f'Cannot open memory map {mem_map_name} with size {mem_map_size} - {e}') + logger.warn( + f'Cannot open memory map {mem_map_name} with size ' + f'{mem_map_size} - {e}') return None - def create_mem_map(self, mem_map_name: str, mem_map_size: int) -> Optional[mmap.mmap]: - # Windows also creates the mmap when trying to open it, if it does not already exist. - mem_map = self.open_mem_map(mem_map_name, mem_map_size, mmap.ACCESS_WRITE) + def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ + -> Optional[mmap.mmap]: + # Windows also creates the mmap when trying to open it, if it does not + # already exist. + mem_map = self.open_mem_map(mem_map_name, mem_map_size, + mmap.ACCESS_WRITE) if mem_map is None: return None if self._is_mem_map_initialized(mem_map): - raise Exception(f'Cannot create memory map {mem_map_name} as it already exists') + raise Exception( + f'Cannot create memory map {mem_map_name} as it ' + f'already exists') self._set_mem_map_initialized(mem_map) return mem_map diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py index f95cd0753..e3e5f7023 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py @@ -9,20 +9,22 @@ class SharedMemoryConstants: # Suffix for the temp directories containing memory maps in Unix UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" - # The length of a bool which is the length of the part of the header flag specifying if the - # memory map is already created and used. - # This is to distinguish between new memory maps and ones that were previously created and may - # be in use already. + # The length of a bool which is the length of the part of the header flag + # specifying if the memory map is already created and used. + # This is to distinguish between new memory maps and ones that were + # previously created and may be in use already. MEM_MAP_INITIALIZED_FLAG_NUM_BYTES = 1 - # The length of a long which is the length of the part of the header specifying content length - # in the memory map. + # The length of a long which is the length of the part of the header + # specifying content length in the memory map. CONTENT_LENGTH_NUM_BYTES = 8 # The total length of the header - CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + CONTENT_LENGTH_NUM_BYTES + CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + \ + CONTENT_LENGTH_NUM_BYTES - # A flag to indicate that the memory map has been initialized, may be in use and is not new. + # A flag to indicate that the memory map has been initialized, may be in use + # and is not new. # This represents a boolean value of True. MEM_MAP_INITIALIZED_FLAG = b'\x01' @@ -30,24 +32,26 @@ class SharedMemoryConstants: # This represents a boolean value of False. MEM_MAP_UNINITIALIZED_FLAG = b'\x00' - # Minimum size (in number of bytes) an object must be in order for it to be transferred over - # shared memory. + # Minimum size (in number of bytes) an object must be in order for it to be + # transferred over shared memory. # If the object is smaller than this, gRPC is used. # Note: This needs to be consistent among the host and workers. # e.g. in the host, it is defined in SharedMemoryConstants.cs - MIN_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER = 1024 * 1024 # 1 MB + MIN_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB - # Maximum size (in number of bytes) an object must be in order for it to be transferred over - # shared memory. - # This limit is imposed because initializing objects like greater than 2GB is not allowed in - # DotNet. + # Maximum size (in number of bytes) an object must be in order for it to be + # transferred over shared memory. + # This limit is imposed because initializing objects like greater than 2GB + # is not allowed in DotNet. # Ref: https://stackoverflow.com/a/3944336/3132415 # Note: This needs to be consistent among the host and workers. # e.g. in the host, it is defined in SharedMemoryConstants.cs - MAX_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER = 1024 * 1024 # 1 MB + MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB - # This is what the size of a character is in DotNet. Can be verified by doing "sizeof(char)". - # To keep the limits consistent, when determining if a string can be transferred over shared - # memory, we multiply the number of characters by this constant. + # This is what the size of a character is in DotNet. Can be verified by + # doing "sizeof(char)". + # To keep the limits consistent, when determining if a string can be + # transferred over shared memory, we multiply the number of characters + # by this constant. # Corresponding logic in the host can be found in SharedMemoryManager.cs SIZE_OF_CHAR_BYTES = 2 \ No newline at end of file diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py index 4627a587c..dfb713ae2 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations import uuid from typing import Dict, Optional from .shared_memory_constants import SharedMemoryConstants as consts @@ -15,51 +14,54 @@ class SharedMemoryManager: """ - Performs all operations related to reading/writing data from/to shared memory. - This is used for transferring input/output data of the function from/to the functions host over - shared memory as opposed to RPC to improve the rate of data transfer and the function's - end-to-end latency. + Performs all operations related to reading/writing data from/to shared + memory. + This is used for transferring input/output data of the function from/to the + functions host over shared memory as opposed to RPC to improve the rate of + data transfer and the function's nd-to-end latency. """ def __init__(self): - # The allocated memory maps are tracked here so that a reference to them is kept open until - # they have been used (e.g. if they contain a function's output, it is read by the - # functions host). - # Having a mapping of the name and the memory map is then later used to close a given - # memory map by its name, after it has been used. + # The allocated memory maps are tracked here so that a reference to them + # is kept open until they have been used (e.g. if they contain a + # function's output, it is read by the functions host). + # Having a mapping of the name and the memory map is then later used to + # close a given memory map by its name, after it has been used. # key: mem_map_name, val: SharedMemoryMap self.allocated_mem_maps: Dict[str, SharedMemoryMap] = {} self.file_accessor = FileAccessorFactory.create_file_accessor() def is_enabled(self) -> bool: """ - Whether supported types should be transferred between functions host and the worker using - shared memory. + Whether supported types should be transferred between functions host and + the worker using shared memory. """ - return is_envvar_true(FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) + return is_envvar_true( + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) def is_supported(self, datum: Datum) -> bool: """ - Whether the given Datum object can be transferred to the functions host using shared - memory. - This logic is kept consistent with the host's which can be found in SharedMemoryManager.cs + Whether the given Datum object can be transferred to the functions host + using shared memory. + This logic is kept consistent with the host's which can be found in + SharedMemoryManager.cs """ if datum.type == 'bytes': num_bytes = len(datum.value) - if num_bytes >= consts.MIN_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER and \ - num_bytes <= consts.MAX_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER: + if num_bytes >= consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER and \ + num_bytes <= consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER: return True elif datum.type == 'string': num_bytes = len(datum.value) * consts.SIZE_OF_CHAR_BYTES - if num_bytes >= consts.MIN_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER and \ - num_bytes <= consts.MAX_OBJECT_BYTES_FOR_SHARED_MEMORY_TRANSFER: + if num_bytes >= consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER and \ + num_bytes <= consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER: return True return False def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: """ Writes the given bytes into shared memory. - Returns metadata about the shared memory region to which the content was written if - successful, None otherwise. + Returns metadata about the shared memory region to which the content was + written if successful, None otherwise. """ if content is None: return None @@ -80,37 +82,44 @@ def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: def put_string(self, content: str) -> Optional[SharedMemoryMetadata]: """ Writes the given string into shared memory. - Returns the name of the memory map into which the data was written if succesful, None - otherwise. + Returns the name of the memory map into which the data was written if + succesful, None otherwise. """ if content is None: return None content_bytes = content.encode('utf-8') return self.put_bytes(content_bytes) - def get_bytes(self, mem_map_name: str, offset: int, count: int) -> Optional[bytes]: + def get_bytes(self, mem_map_name: str, offset: int, count: int) \ + -> Optional[bytes]: """ - Reads data from the given memory map with the provided name, starting at the provided - offset and reading a total of count bytes. - Returns the data read from shared memory as bytes if successful, None otherwise. + Reads data from the given memory map with the provided name, starting at + the provided offset and reading a total of count bytes. + Returns the data read from shared memory as bytes if successful, None + otherwise. """ if offset != 0: - logger.error(f'Cannot read bytes. Non-zero offset ({offset}) not supported.') + logger.error( + f'Cannot read bytes. Non-zero offset ({offset}) ' + f'not supported.') return None shared_mem_map = self.open(mem_map_name, count) if shared_mem_map is None: return None try: - content = shared_mem_map.get_bytes(content_offset=0, bytes_to_read=count) + content = shared_mem_map.get_bytes(content_offset=0, + bytes_to_read=count) finally: shared_mem_map.dispose(is_delete_file=False) return content - def get_string(self, mem_map_name: str, offset: int, count: int) -> Optional[str]: + def get_string(self, mem_map_name: str, offset: int, count: int) \ + -> Optional[str]: """ - Reads data from the given memory map with the provided name, starting at the provided - offset and reading a total of count bytes. - Returns the data read from shared memory as a string if successful, None otherwise. + Reads data from the given memory map with the provided name, starting at + the provided offset and reading a total of count bytes. + Returns the data read from shared memory as a string if successful, None + otherwise. """ content_bytes = self.get_bytes(mem_map_name, offset, count) if content_bytes is None: @@ -120,20 +129,23 @@ def get_string(self, mem_map_name: str, offset: int, count: int) -> Optional[str def free_mem_map(self, mem_map_name: str): """ - Frees the memory map and any backing resources (e.g. file in the case of Unix) associated - with it. - If there is no memory map with the given name being tracked, then no action is performed. + Frees the memory map and any backing resources (e.g. file in the case of + Unix) associated with it. + If there is no memory map with the given name being tracked, then no + action is performed. Returns True if the memory map was freed successfully, False otherwise. """ if mem_map_name not in self.allocated_mem_maps: - logger.error(f'Cannot find memory map in list of allocations {mem_map_name}') + logger.error( + f'Cannot find memory map in list of allocations {mem_map_name}') return False shared_mem_map = self.allocated_mem_maps[mem_map_name] success = shared_mem_map.dispose() del self.allocated_mem_maps[mem_map_name] return success - def create(self, mem_map_name: str, content_length: int) -> Optional[SharedMemoryMap]: + def create(self, mem_map_name: str, content_length: int) \ + -> Optional[SharedMemoryMap]: """ Creates a new SharedMemoryMap with the given name and content length. Returns the SharedMemoryMap object if successful, None otherwise. @@ -144,9 +156,11 @@ def create(self, mem_map_name: str, content_length: int) -> Optional[SharedMemor return None return SharedMemoryMap(self.file_accessor, mem_map_name, mem_map) - def open(self, mem_map_name: str, content_length: int) -> Optional[SharedMemoryMap]: + def open(self, mem_map_name: str, content_length: int) \ + -> Optional[SharedMemoryMap]: """ - Opens an existing SharedMemoryMap with the given name and content length. + Opens an existing SharedMemoryMap with the given name and content + length. Returns the SharedMemoryMap object if successful, None otherwise. """ mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_length diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py index 87f67e147..85e2eff42 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations import mmap import os import struct @@ -16,11 +15,19 @@ class SharedMemoryMap: """ Shared memory region to read/write data from. """ - def __init__(self, file_accessor: FileAccessor, mem_map_name: str, mem_map: mmap.mmap): + def __init__( + self, + file_accessor: FileAccessor, + mem_map_name: str, + mem_map: mmap.mmap): if mem_map is None: - raise Exception(f'Cannot initialize SharedMemoryMap. Invalid memory map provided') + raise Exception( + f'Cannot initialize SharedMemoryMap. ' + f'Invalid memory map provided') if mem_map_name is None or mem_map_name == '': - raise Exception(f'Cannot initialize SharedMemoryMap. Invalid name {mem_map_name}') + raise Exception( + f'Cannot initialize SharedMemoryMap. Invalid name ' + f'{mem_map_name}') self.file_accessor = file_accessor self.mem_map_name = mem_map_name self.mem_map = mem_map @@ -28,7 +35,8 @@ def __init__(self, file_accessor: FileAccessor, mem_map_name: str, mem_map: mmap def put_bytes(self, content: bytes) -> int: """ Writes the given content bytes into this SharedMemoryMap. - The number of bytes written must be less than or equal to the size of the SharedMemoryMap. + The number of bytes written must be less than or equal to the size of + the SharedMemoryMap. Returns the number of bytes of content written. """ if content is None: @@ -37,23 +45,27 @@ def put_bytes(self, content: bytes) -> int: # Seek past the MemoryMapInitialized flag section of the header self.mem_map.seek(consts.MEM_MAP_INITIALIZED_FLAG_NUM_BYTES) # Write the content length into the header - content_length_bytes = content_length.to_bytes(consts.CONTENT_LENGTH_NUM_BYTES, - byteorder=sys.byteorder) + content_length_bytes = content_length.to_bytes( + consts.CONTENT_LENGTH_NUM_BYTES, byteorder=sys.byteorder) num_content_length_bytes = len(content_length_bytes) - num_content_length_bytes_written = self.mem_map.write(content_length_bytes) + num_content_length_bytes_written = self.mem_map.write( + content_length_bytes) if num_content_length_bytes_written != num_content_length_bytes: logger.error( - f'Cannot write content size into memory map {self.mem_map_name} ' - f'({num_content_length_bytes_written} != {num_content_length_bytes})') + f'Cannot write content size to memory map {self.mem_map_name} ' + f'({num_content_length_bytes_written} != ' + f'{num_content_length_bytes})') return 0 # Write the content num_content_bytes_written = self.mem_map.write(content) self.mem_map.flush() return num_content_bytes_written - def get_bytes(self, content_offset: int = 0, bytes_to_read: int = 0) -> Optional[bytes]: + def get_bytes(self, content_offset: int = 0, bytes_to_read: int = 0) \ + -> Optional[bytes]: """ - Read content from this SharedMemoryMap with the given name and starting at the given offset. + Read content from this SharedMemoryMap with the given name and starting + at the given offset. content_offset = 0 means read from the beginning of the content. bytes_to_read = 0 means read the entire content. Returns the content as bytes if successful, None otherwise. @@ -80,10 +92,9 @@ def dispose(self, is_delete_file: bool = True) -> bool: """ success = True if is_delete_file: - if not self.file_accessor.delete_mem_map(self.mem_map_name, self.mem_map): - success = False - mem_map = self.mem_map - mem_map.close() + success = self.file_accessor.delete_mem_map(self.mem_map_name, + self.mem_map) + self.mem_map.close() return success def _bytes_to_long(self, input_bytes) -> int: @@ -95,9 +106,10 @@ def _bytes_to_long(self, input_bytes) -> int: def _get_content_length(self) -> Optional[int]: """ - Read the header of the memory map to determine the length of content contained in that - memory map. - Returns the content length as a non-negative integer if successful, None otherwise. + Read the header of the memory map to determine the length of content + contained in that memory map. + Returns the content length as a non-negative integer if successful, + None otherwise. """ self.mem_map.seek(consts.MEM_MAP_INITIALIZED_FLAG_NUM_BYTES) header_bytes = self.mem_map.read(consts.CONTENT_LENGTH_NUM_BYTES) diff --git a/tests/unittests/test_file_accessor.py b/tests/unittests/test_file_accessor.py new file mode 100644 index 000000000..81dd2dc0c --- /dev/null +++ b/tests/unittests/test_file_accessor.py @@ -0,0 +1,17 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import unittest +import uuid +from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory import FileAccessorFactory + + +class TestFileAccessor(unittest.TestCase): + def setUp(self): + self.file_accessor = FileAccessorFactory.create_file_accessor() + + def test_init_shared_memory_map(self): + mem_map_name = str(uuid.uuid4()) + content_size = 2 * 1024 * 1024 # 2 MB + mem_map = self.file_accessor.create_mem_map(mem_map_name, content_size) + assert mem_map is not None \ No newline at end of file From dca1c2c2ca7e82a336117fceee768bff51ec9b8b Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 06:46:40 -0800 Subject: [PATCH 38/76] Making flake8 happy --- azure_functions_worker/bindings/datumdef.py | 25 +++++++------ azure_functions_worker/bindings/meta.py | 25 +++++++------ azure_functions_worker/constants.py | 3 +- .../shared_memory_data_transfer/__init__.py | 9 ++--- .../file_accessor.py | 2 +- .../file_accessor_factory.py | 2 +- .../file_accessor_unix.py | 36 +++++++++++-------- .../file_accessor_windows.py | 6 ++-- .../shared_memory_constants.py | 6 ++-- .../shared_memory_manager.py | 5 +-- .../shared_memory_map.py | 4 +-- .../shared_memory_metadata.py | 3 +- 12 files changed, 72 insertions(+), 54 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 895d1126e..f55942de7 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -1,11 +1,11 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from __future__ import annotations from typing import Any, Optional import json from .. import protos from ..logging import logger -from ..shared_memory_data_transfer.shared_memory_manager import SharedMemoryManager class Datum: @@ -98,10 +98,10 @@ def from_typed_data(cls, td: protos.TypedData): def from_rpc_shared_memory( cls, shmem: protos.RpcSharedMemory, - shmem_mgr: SharedMemoryManager) -> Optional[Datum]: + shmem_mgr) -> Optional[Datum]: """ - Reads the specified shared memory region and converts the read data into a datum object of - the corresponding type. + Reads the specified shared memory region and converts the read data into + a datum object of the corresponding type. """ mem_map_name = shmem.name offset = shmem.offset @@ -118,7 +118,8 @@ def from_rpc_shared_memory( ret_val = cls(val, 'string') if ret_val is not None: logger.info( - f'Read {count} bytes from memory map {mem_map_name} for data type {data_type}') + f'Read {count} bytes from memory map {mem_map_name} ' + f'for data type {data_type}') return ret_val return None @@ -126,10 +127,11 @@ def from_rpc_shared_memory( def to_rpc_shared_memory( cls, datum: Datum, - shmem_mgr: SharedMemoryManager) -> Optional[protos.RpcSharedMemory]: + shmem_mgr) -> Optional[protos.RpcSharedMemory]: """ - Writes the given value to shared memory and returns the corresponding RpcSharedMemory - object which can be sent back to the functions host over RPC. + Writes the given value to shared memory and returns the corresponding + RpcSharedMemory object which can be sent back to the functions host over + RPC. """ if datum.type == 'bytes': value = datum.value @@ -151,10 +153,11 @@ def to_rpc_shared_memory( count=shared_mem_meta.count, type=data_type) logger.info( - f'Wrote {shared_mem_meta.count} bytes to memory map {shared_mem_meta.mem_map_name} ' - f'for data type {data_type}') + f'Wrote {shared_mem_meta.count} bytes to memory map ' + f'{shared_mem_meta.mem_map_name} for data type {data_type}') return shmem + def datum_as_proto(datum: Datum) -> protos.TypedData: if datum.type == 'string': return protos.TypedData(string=datum.value) @@ -175,4 +178,4 @@ def datum_as_proto(datum: Datum) -> protos.TypedData: else: raise NotImplementedError( 'unexpected Datum type: {!r}'.format(datum.type) - ) \ No newline at end of file + ) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 24d35cd73..10c18373f 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -7,7 +7,8 @@ from . import datumdef from . import generic -from ..shared_memory_data_transfer.shared_memory_manager import SharedMemoryManager +from ..shared_memory_data_transfer.shared_memory_manager \ + import SharedMemoryManager def get_binding_registry(): @@ -72,7 +73,8 @@ def from_incoming_proto( pb_type = pb.WhichOneof('rpc_data') if pb_type == 'rpc_shared_memory': # Data was sent over shared memory, attempt to read - datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) + datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, + shmem_mgr) # TODO gochaudh: check trigger_metadata (try with blob triggered func) elif pb_type == 'data': val = pb.data @@ -115,9 +117,10 @@ def to_outgoing_proto(binding: str, obj: typing.Any, *, def to_outgoing_param_binding(binding: str, obj: typing.Any, *, - pytype: typing.Optional[type], - out_name: str, - shmem_mgr: SharedMemoryManager) -> protos.ParameterBinding: + pytype: typing.Optional[type], + out_name: str, + shmem_mgr: SharedMemoryManager) \ + -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) shared_mem_value = None parameter_binding = None @@ -126,15 +129,15 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) # Check if data was written into shared memory if shared_mem_value is not None: - # If it was, then use the rpc_shared_memory field in the response message + # If it was, then use the rpc_shared_memory field in response message parameter_binding = protos.ParameterBinding( - name=out_name, - rpc_shared_memory=shared_mem_value) + name=out_name, + rpc_shared_memory=shared_mem_value) else: # If not, send it as part of the response message over RPC rpc_val = datumdef.datum_as_proto(datum) assert rpc_val is not None parameter_binding = protos.ParameterBinding( - name=out_name, - data=rpc_val) - return parameter_binding \ No newline at end of file + name=out_name, + data=rpc_val) + return parameter_binding diff --git a/azure_functions_worker/constants.py b/azure_functions_worker/constants.py index bce5b6a34..0996455c4 100644 --- a/azure_functions_worker/constants.py +++ b/azure_functions_worker/constants.py @@ -33,4 +33,5 @@ MODULE_NOT_FOUND_TS_URL = "https://aka.ms/functions-modulenotfound" # App Settings -FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED = "FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED" \ No newline at end of file +FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED = \ + "FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED" diff --git a/azure_functions_worker/shared_memory_data_transfer/__init__.py b/azure_functions_worker/shared_memory_data_transfer/__init__.py index a70bd1294..ccc438ca9 100644 --- a/azure_functions_worker/shared_memory_data_transfer/__init__.py +++ b/azure_functions_worker/shared_memory_data_transfer/__init__.py @@ -1,9 +1,10 @@ """ This module provides functionality for accessing shared memory maps. -These are used for transferring data between functions host and the worker proces. -The initial set of corresponding changes to enable shared memory maps in the functions host can be -found in the following Pull Request: +These are used for transferring data between functions host and the worker +proces. +The initial set of corresponding changes to enable shared memory maps in the +functions host can be found in the following Pull Request: https://github.com/Azure/azure-functions-host/pull/6836 The issue tracking shared memory transfer related changes is: https://github.com/Azure/azure-functions-host/issues/6791 -""" \ No newline at end of file +""" diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py index eb586ed86..fab15357c 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py @@ -77,4 +77,4 @@ def _set_mem_map_initialized(self, mem_map: mmap.mmap): # Set the dirty bit mem_map.write(consts.MEM_MAP_INITIALIZED_FLAG) # Seek back the memory map to the begginging - mem_map.seek(0) \ No newline at end of file + mem_map.seek(0) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py index 4a9c03275..290a2b843 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py @@ -16,4 +16,4 @@ def create_file_accessor(): if os.name == 'nt': return FileAccessorWindows() else: - return FileAccessorUnix() \ No newline at end of file + return FileAccessorUnix() diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index 17ea51154..adeaecbb0 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -47,7 +47,7 @@ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: # These logs can help identify if we may be leaking memory and not # cleaning up the created memory maps. logger.error(f'Cannot delete memory map {mem_map_name} - {e}', - exc_info=True) + exc_info=True) return False mem_map.close() return True @@ -61,7 +61,7 @@ def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: # be present and try to open it. for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) try: fd = open(file_path, 'r+b') return fd @@ -71,22 +71,24 @@ def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: logger.error(f'Cannot open memory map {mem_map_name}') return None - def _create_mem_map_dir(self): + def _create_mem_map_dir(self) -> bool: """ Create a directory to create memory maps. + Returns True if either a valid directory already exists or one was + created successfully, False otherwise. """ # Iterate over all the possible directories where the memory map could # be created and try to create in one of them. for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: dir_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX) + consts.UNIX_TEMP_DIR_SUFFIX) if os.path.isdir(dir_path): # One of the directories already exists, no need - return + return True try: os.makedirs(dir_path) - return - except: + return True + except Exception: # We try to create a directory in each of the applicable # directory paths until we successfully create one or one that # already exists is found. @@ -94,7 +96,8 @@ def _create_mem_map_dir(self): pass # Could not create a directory in any of the applicable directory paths. # We will not be able to create any memory maps so we fail. - raise Exception(f'Cannot create directory for memory maps') + logger.error(f'Cannot create directory for memory maps') + return False def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ -> Optional[int]: @@ -106,23 +109,24 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: # Check if the file already exists file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) if os.path.exists(file_path): raise Exception( f'File {file_path} for memory map {mem_map_name} ' f'already exists') # Check if the parent directory exists dir_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX) + consts.UNIX_TEMP_DIR_SUFFIX) if os.path.isdir(dir_path): dir_exists = True # Check if any of the parent directories exists if not dir_exists: - self._create_mem_map_dir() + if not self._create_mem_map_dir(): + return None # Create the file for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) try: fd = os.open(file_path, os.O_CREAT | os.O_TRUNC | os.O_RDWR) # Write 0s to allocate @@ -133,9 +137,13 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ f'Cannot write 0s into new memory map {file_path} ' f'({bytes_written} != {mem_mem_map_size})') return fd - except: + except Exception: + # If the memory map could not be created in this directory, we + # keep trying in other applicable directories. pass + # Could not create the memory map in any of the applicable directory + # paths so we fail. logger.error( f'Cannot create memory map {mem_map_name} with size ' f'{mem_mem_map_size}') - return None \ No newline at end of file + return None diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py index b4ea69310..f92a8fa6d 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py @@ -31,7 +31,7 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ # Windows also creates the mmap when trying to open it, if it does not # already exist. mem_map = self.open_mem_map(mem_map_name, mem_map_size, - mmap.ACCESS_WRITE) + mmap.ACCESS_WRITE) if mem_map is None: return None if self._is_mem_map_initialized(mem_map): @@ -41,6 +41,6 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ self._set_mem_map_initialized(mem_map) return mem_map - def delete_mem_map(self, mem_map_name: str, mmap) -> bool: - mmap.close() + def delete_mem_map(self, mem_map_name: str, mem_map) -> bool: + mem_map.close() return True diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py index e3e5f7023..2f17eac21 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py @@ -37,7 +37,7 @@ class SharedMemoryConstants: # If the object is smaller than this, gRPC is used. # Note: This needs to be consistent among the host and workers. # e.g. in the host, it is defined in SharedMemoryConstants.cs - MIN_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB + MIN_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB # Maximum size (in number of bytes) an object must be in order for it to be # transferred over shared memory. @@ -46,7 +46,7 @@ class SharedMemoryConstants: # Ref: https://stackoverflow.com/a/3944336/3132415 # Note: This needs to be consistent among the host and workers. # e.g. in the host, it is defined in SharedMemoryConstants.cs - MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB + MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB # This is what the size of a character is in DotNet. Can be verified by # doing "sizeof(char)". @@ -54,4 +54,4 @@ class SharedMemoryConstants: # transferred over shared memory, we multiply the number of characters # by this constant. # Corresponding logic in the host can be found in SharedMemoryManager.cs - SIZE_OF_CHAR_BYTES = 2 \ No newline at end of file + SIZE_OF_CHAR_BYTES = 2 diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py index dfb713ae2..13e2f5f30 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py @@ -7,6 +7,7 @@ from .file_accessor_factory import FileAccessorFactory from .shared_memory_metadata import SharedMemoryMetadata from .shared_memory_map import SharedMemoryMap +from ..bindings.datumdef import Datum from ..logging import logger from ..utils.common import is_envvar_true from ..constants import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED @@ -108,7 +109,7 @@ def get_bytes(self, mem_map_name: str, offset: int, count: int) \ return None try: content = shared_mem_map.get_bytes(content_offset=0, - bytes_to_read=count) + bytes_to_read=count) finally: shared_mem_map.dispose(is_delete_file=False) return content @@ -167,4 +168,4 @@ def open(self, mem_map_name: str, content_length: int) \ mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) if mem_map is None: return None - return SharedMemoryMap(self.file_accessor, mem_map_name, mem_map) \ No newline at end of file + return SharedMemoryMap(self.file_accessor, mem_map_name, mem_map) diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py index 85e2eff42..e163fe1da 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py @@ -93,7 +93,7 @@ def dispose(self, is_delete_file: bool = True) -> bool: success = True if is_delete_file: success = self.file_accessor.delete_mem_map(self.mem_map_name, - self.mem_map) + self.mem_map) self.mem_map.close() return success @@ -114,4 +114,4 @@ def _get_content_length(self) -> Optional[int]: self.mem_map.seek(consts.MEM_MAP_INITIALIZED_FLAG_NUM_BYTES) header_bytes = self.mem_map.read(consts.CONTENT_LENGTH_NUM_BYTES) content_length = self._bytes_to_long(header_bytes) - return content_length \ No newline at end of file + return content_length diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py index 138aa15b8..8e1c489aa 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py @@ -1,6 +1,7 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. + class SharedMemoryMetadata: """ Information about a shared memory region. @@ -9,4 +10,4 @@ def __init__(self, mem_map_name, count): # Name of the memory map self.mem_map_name = mem_map_name # Number of bytes of content in the memory map - self.count = count \ No newline at end of file + self.count = count From ad4def08cfd1959e50484139919b5eaa94ac13f2 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 06:54:34 -0800 Subject: [PATCH 39/76] Fixing more lint issues --- .../shared_memory_data_transfer/file_accessor_unix.py | 2 +- .../shared_memory_data_transfer/shared_memory_map.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index adeaecbb0..b3233879a 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -96,7 +96,7 @@ def _create_mem_map_dir(self) -> bool: pass # Could not create a directory in any of the applicable directory paths. # We will not be able to create any memory maps so we fail. - logger.error(f'Cannot create directory for memory maps') + logger.error('Cannot create directory for memory maps') return False def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py index e163fe1da..bf12e8827 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py @@ -22,8 +22,8 @@ def __init__( mem_map: mmap.mmap): if mem_map is None: raise Exception( - f'Cannot initialize SharedMemoryMap. ' - f'Invalid memory map provided') + 'Cannot initialize SharedMemoryMap. Invalid memory map ' + 'provided') if mem_map_name is None or mem_map_name == '': raise Exception( f'Cannot initialize SharedMemoryMap. Invalid name ' From 4be11701eb0f07f93e8fefcb8f7976f389dd7f0c Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 07:37:47 -0800 Subject: [PATCH 40/76] Removing the use of __annotations__ to be compatible with Python 3.6, adding FileAccessor tests --- azure_functions_worker/bindings/datumdef.py | 5 +- .../file_accessor_windows.py | 6 ++- tests/unittests/test_file_accessor.py | 50 +++++++++++++++++-- 3 files changed, 52 insertions(+), 9 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index f55942de7..3831d2e31 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -from __future__ import annotations from typing import Any, Optional import json from .. import protos @@ -98,7 +97,7 @@ def from_typed_data(cls, td: protos.TypedData): def from_rpc_shared_memory( cls, shmem: protos.RpcSharedMemory, - shmem_mgr) -> Optional[Datum]: + shmem_mgr) -> Optional['Datum']: """ Reads the specified shared memory region and converts the read data into a datum object of the corresponding type. @@ -126,7 +125,7 @@ def from_rpc_shared_memory( @classmethod def to_rpc_shared_memory( cls, - datum: Datum, + datum: 'Datum', shmem_mgr) -> Optional[protos.RpcSharedMemory]: """ Writes the given value to shared memory and returns the corresponding diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py index f92a8fa6d..2a052e52c 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py @@ -17,6 +17,10 @@ def open_mem_map( mem_map_name: str, mem_map_size: int, access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: + """ + Note: On Windows, an mmap is created if one does not exist even when + attempting to open it. + """ try: mmap_ret = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) return mmap_ret @@ -41,6 +45,6 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ self._set_mem_map_initialized(mem_map) return mem_map - def delete_mem_map(self, mem_map_name: str, mem_map) -> bool: + def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: mem_map.close() return True diff --git a/tests/unittests/test_file_accessor.py b/tests/unittests/test_file_accessor.py index 81dd2dc0c..ef0433669 100644 --- a/tests/unittests/test_file_accessor.py +++ b/tests/unittests/test_file_accessor.py @@ -1,17 +1,57 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import os +import shutil import unittest import uuid -from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory import FileAccessorFactory +from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory \ + import FileAccessorFactory +from azure_functions_worker.shared_memory_data_transfer. \ + shared_memory_constants import SharedMemoryConstants as consts class TestFileAccessor(unittest.TestCase): def setUp(self): self.file_accessor = FileAccessorFactory.create_file_accessor() - def test_init_shared_memory_map(self): + @unittest.skipIf(os.name == 'nt', + 'Deleting test files applicable only for Unix platform') + def tearDown(self): + for temp_dir in consts.UNIX_TEMP_DIRS: + temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) + shutil.rmtree(temp_dir_path) + + def test_create_and_delete_mem_map(self): + for mem_map_size in [1, 10, 1024, 2 * 1024 * 1024, 10 * 1024 * 1024]: + mem_map_name = str(uuid.uuid4()) + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + self.assertIsNotNone(mem_map) + delete_status = self.file_accessor.delete_mem_map(mem_map_name, + mem_map) + self.assertTrue(delete_status) + + def test_open_existing_mem_map(self): + mem_map_size = 1024 + mem_map_name = str(uuid.uuid4()) + mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) + o_mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) + self.assertIsNotNone(o_mem_map) + o_mem_map.close() + delete_status = self.file_accessor.delete_mem_map(mem_map_name, mem_map) + self.assertTrue(delete_status) + + @unittest.skipIf(os.name == 'nt', + 'Windows will create an mmap if one does not exist') + def test_open_deleted_mem_map(self): + mem_map_size = 1024 mem_map_name = str(uuid.uuid4()) - content_size = 2 * 1024 * 1024 # 2 MB - mem_map = self.file_accessor.create_mem_map(mem_map_name, content_size) - assert mem_map is not None \ No newline at end of file + mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) + o_mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) + self.assertIsNotNone(o_mem_map) + o_mem_map.close() + delete_status = self.file_accessor.delete_mem_map(mem_map_name, mem_map) + self.assertTrue(delete_status) + d_mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) + self.assertIsNone(d_mem_map) From bc10c1df5c78344e69376bb52f0c3f1dd335151f Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 10:09:26 -0800 Subject: [PATCH 41/76] Added tests for FileAccessor, SharedMemoryMap, FileAccessorFactory --- .../file_accessor_unix.py | 10 +++ .../file_accessor_windows.py | 12 +++- azure_functions_worker/testutils.py | 28 ++++++++ tests/unittests/test_file_accessor.py | 51 ++++++++------ tests/unittests/test_file_accessor_factory.py | 20 ++++++ tests/unittests/test_shared_memory_map.py | 69 +++++++++++++++++++ 6 files changed, 168 insertions(+), 22 deletions(-) create mode 100644 tests/unittests/test_file_accessor_factory.py create mode 100644 tests/unittests/test_shared_memory_map.py diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index b3233879a..2d345503e 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -20,6 +20,10 @@ def open_mem_map( mem_map_name: str, mem_map_size: int, access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: + if mem_map_name is None or mem_map_name == '': + raise Exception('Cannot open memory map. Invalid name.') + if mem_map_size < 0: + raise Exception('Cannot open memory map. Invalid size.') fd = self._open_mem_map_file(mem_map_name) if fd is None: return None @@ -28,6 +32,10 @@ def open_mem_map( def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ -> Optional[mmap.mmap]: + if mem_map_name is None or mem_map_name == '': + raise Exception('Cannot create memory map. Invalid name.') + if mem_map_size <= 0: + raise Exception('Cannot create memory map. Invalid size.') fd = self._create_mem_map_file(mem_map_name, mem_map_size) if fd is None: return None @@ -38,6 +46,8 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ return mem_map def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: + if mem_map_name is None or mem_map_name == '': + raise Exception('Cannot delete memory map. Invalid name.') try: fd = self._open_mem_map_file(mem_map_name) os.remove(fd.name) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py index 2a052e52c..985f7b141 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py @@ -21,9 +21,13 @@ def open_mem_map( Note: On Windows, an mmap is created if one does not exist even when attempting to open it. """ + if mem_map_name is None or mem_map_name == '': + raise Exception('Cannot open memory map. Invalid name.') + if mem_map_size < 0: + raise Exception('Cannot open memory map. Invalid size.') try: - mmap_ret = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) - return mmap_ret + mem_map = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) + return mem_map except Exception as e: logger.warn( f'Cannot open memory map {mem_map_name} with size ' @@ -34,6 +38,10 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ -> Optional[mmap.mmap]: # Windows also creates the mmap when trying to open it, if it does not # already exist. + if mem_map_name is None or mem_map_name == '': + raise Exception('Cannot create memory map. Invalid name.') + if mem_map_size <= 0: + raise Exception('Cannot create memory map. Invalid size.') mem_map = self.open_mem_map(mem_map_name, mem_map_size, mmap.ACCESS_WRITE) if mem_map is None: diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 71436cabe..337d72b2f 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -18,6 +18,7 @@ import pathlib import platform import queue +import random import re import shutil import socket @@ -33,6 +34,10 @@ import requests from azure_functions_worker._thirdparty import aio_compat +from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory \ + import FileAccessorFactory +from azure_functions_worker.shared_memory_data_transfer. \ + shared_memory_constants import SharedMemoryConstants as consts from . import dispatcher from . import protos from .constants import PYAZURE_WEBHOST_DEBUG @@ -239,6 +244,29 @@ def _run_test(self, test, *args, **kwargs): raise test_exception +class SharedMemoryTestCase(unittest.TestCase): + """ + For tests involving shared memory data transfer usage. + """ + def setUp(self): + self.file_accessor = FileAccessorFactory.create_file_accessor() + + def tearDown(self): + if os.name != 'nt': + self._tearDownUnix() + + def get_new_mem_map_name(self): + return str(uuid.uuid4()) + + def get_random_bytes(self, num_bytes): + return bytearray(random.getrandbits(8) for _ in range(num_bytes)) + + def _tearDownUnix(self): + for temp_dir in consts.UNIX_TEMP_DIRS: + temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) + shutil.rmtree(temp_dir_path) + + class _MockWebHostServicer(protos.FunctionRpcServicer): _STOP = object() diff --git a/tests/unittests/test_file_accessor.py b/tests/unittests/test_file_accessor.py index ef0433669..325b7dcb0 100644 --- a/tests/unittests/test_file_accessor.py +++ b/tests/unittests/test_file_accessor.py @@ -2,29 +2,14 @@ # Licensed under the MIT License. import os -import shutil import unittest -import uuid -from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory \ - import FileAccessorFactory -from azure_functions_worker.shared_memory_data_transfer. \ - shared_memory_constants import SharedMemoryConstants as consts +from azure_functions_worker import testutils -class TestFileAccessor(unittest.TestCase): - def setUp(self): - self.file_accessor = FileAccessorFactory.create_file_accessor() - - @unittest.skipIf(os.name == 'nt', - 'Deleting test files applicable only for Unix platform') - def tearDown(self): - for temp_dir in consts.UNIX_TEMP_DIRS: - temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) - shutil.rmtree(temp_dir_path) - +class TestFileAccessor(testutils.SharedMemoryTestCase): def test_create_and_delete_mem_map(self): for mem_map_size in [1, 10, 1024, 2 * 1024 * 1024, 10 * 1024 * 1024]: - mem_map_name = str(uuid.uuid4()) + mem_map_name = self.get_new_mem_map_name() mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) self.assertIsNotNone(mem_map) @@ -32,9 +17,22 @@ def test_create_and_delete_mem_map(self): mem_map) self.assertTrue(delete_status) + def test_create_mem_map_invalid_inputs(self): + mem_map_name = self.get_new_mem_map_name() + inv_mem_map_size = 0 + with self.assertRaisesRegex(Exception, 'Invalid size'): + self.file_accessor.create_mem_map(mem_map_name, inv_mem_map_size) + inv_mem_map_name = None + mem_map_size = 1024 + with self.assertRaisesRegex(Exception, 'Invalid name'): + self.file_accessor.create_mem_map(inv_mem_map_name, mem_map_size) + inv_mem_map_name = '' + with self.assertRaisesRegex(Exception, 'Invalid name'): + self.file_accessor.create_mem_map(inv_mem_map_name, mem_map_size) + def test_open_existing_mem_map(self): mem_map_size = 1024 - mem_map_name = str(uuid.uuid4()) + mem_map_name = self.get_new_mem_map_name() mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) o_mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) self.assertIsNotNone(o_mem_map) @@ -42,11 +40,24 @@ def test_open_existing_mem_map(self): delete_status = self.file_accessor.delete_mem_map(mem_map_name, mem_map) self.assertTrue(delete_status) + def test_open_mem_map_invalid_inputs(self): + mem_map_name = self.get_new_mem_map_name() + inv_mem_map_size = -1 + with self.assertRaisesRegex(Exception, 'Invalid size'): + self.file_accessor.open_mem_map(mem_map_name, inv_mem_map_size) + inv_mem_map_name = None + mem_map_size = 1024 + with self.assertRaisesRegex(Exception, 'Invalid name'): + self.file_accessor.open_mem_map(inv_mem_map_name, mem_map_size) + inv_mem_map_name = '' + with self.assertRaisesRegex(Exception, 'Invalid name'): + self.file_accessor.open_mem_map(inv_mem_map_name, mem_map_size) + @unittest.skipIf(os.name == 'nt', 'Windows will create an mmap if one does not exist') def test_open_deleted_mem_map(self): mem_map_size = 1024 - mem_map_name = str(uuid.uuid4()) + mem_map_name = self.get_new_mem_map_name() mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) o_mem_map = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) self.assertIsNotNone(o_mem_map) diff --git a/tests/unittests/test_file_accessor_factory.py b/tests/unittests/test_file_accessor_factory.py new file mode 100644 index 000000000..b81921e78 --- /dev/null +++ b/tests/unittests/test_file_accessor_factory.py @@ -0,0 +1,20 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import os +import unittest +from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory \ + import FileAccessorFactory +from azure_functions_worker.shared_memory_data_transfer.file_accessor_unix \ + import FileAccessorUnix +from azure_functions_worker.shared_memory_data_transfer.file_accessor_windows \ + import FileAccessorWindows + + +class TestFileAccessorFactory(unittest.TestCase): + def test_proper_subclass_generated(self): + file_accessor = FileAccessorFactory.create_file_accessor() + if os.name == 'nt': + self.assertTrue(type(file_accessor) is FileAccessorWindows) + else: + self.assertTrue(type(file_accessor) is FileAccessorUnix) diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py new file mode 100644 index 000000000..3fc5abcf9 --- /dev/null +++ b/tests/unittests/test_shared_memory_map.py @@ -0,0 +1,69 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +from azure_functions_worker import testutils +from azure_functions_worker.shared_memory_data_transfer.shared_memory_map \ + import SharedMemoryMap +from azure_functions_worker.shared_memory_data_transfer. \ + shared_memory_constants import SharedMemoryConstants as consts + + +class TestSharedMemoryMap(testutils.SharedMemoryTestCase): + def test_init(self): + mem_map_name = self.get_new_mem_map_name() + mem_map_size = 1024 + mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + self.assertIsNotNone(shared_mem_map) + dispose_status = shared_mem_map.dispose() + self.assertTrue(dispose_status) + + def test_init_with_invalid_inputs(self): + inv_mem_map_name = None + mem_map_name = self.get_new_mem_map_name() + mem_map_size = 1024 + mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) + with self.assertRaisesRegex(Exception, 'Invalid name'): + SharedMemoryMap(self.file_accessor, inv_mem_map_name, mem_map) + inv_mem_map_name = '' + with self.assertRaisesRegex(Exception, 'Invalid name'): + SharedMemoryMap(self.file_accessor, inv_mem_map_name, mem_map) + with self.assertRaisesRegex(Exception, 'Invalid memory map'): + SharedMemoryMap(self.file_accessor, mem_map_name, None) + + def test_put_bytes(self): + for content_size in [1, 10, 1024, 2 * 1024 * 1024, 20 * 1024 * 1024]: + mem_map_name = self.get_new_mem_map_name() + mem_map_size = content_size + consts.CONTENT_HEADER_TOTAL_BYTES + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + content = self.get_random_bytes(content_size) + num_bytes_written = shared_mem_map.put_bytes(content) + self.assertEqual(content_size, num_bytes_written) + dispose_status = shared_mem_map.dispose() + self.assertTrue(dispose_status) + + def test_get_bytes(self): + for content_size in [1, 10, 1024, 2 * 1024 * 1024, 20 * 1024 * 1024]: + mem_map_name = self.get_new_mem_map_name() + mem_map_size = content_size + consts.CONTENT_HEADER_TOTAL_BYTES + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + content = self.get_random_bytes(content_size) + num_bytes_written = shared_mem_map.put_bytes(content) + self.assertEqual(content_size, num_bytes_written) + read_content = shared_mem_map.get_bytes() + self.assertEqual(content, read_content) + dispose_status = shared_mem_map.dispose() + self.assertTrue(dispose_status) + + def test_put_bytes_more_than_capacity(self): + pass + + def test_dispose_without_delete_file(self): + pass \ No newline at end of file From b07cf6ac0c4aa152de666d680b8b7d51f4ad9992 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 10:17:28 -0800 Subject: [PATCH 42/76] Adding shared memory test setup for Unix --- azure_functions_worker/testutils.py | 8 ++++++++ tests/unittests/test_shared_memory_map.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 337d72b2f..f3cd77caf 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -250,6 +250,8 @@ class SharedMemoryTestCase(unittest.TestCase): """ def setUp(self): self.file_accessor = FileAccessorFactory.create_file_accessor() + if os.name != 'nt': + self._setUpUnix() def tearDown(self): if os.name != 'nt': @@ -261,6 +263,12 @@ def get_new_mem_map_name(self): def get_random_bytes(self, num_bytes): return bytearray(random.getrandbits(8) for _ in range(num_bytes)) + def _setUpUnix(self): + for temp_dir in consts.UNIX_TEMP_DIRS: + temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) + if not os.path.exists(temp_dir_path): + os.makedirs(temp_dir_path) + def _tearDownUnix(self): for temp_dir in consts.UNIX_TEMP_DIRS: temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index 3fc5abcf9..d308a543b 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -66,4 +66,4 @@ def test_put_bytes_more_than_capacity(self): pass def test_dispose_without_delete_file(self): - pass \ No newline at end of file + pass From 292b98085f0cf7c7a1a52eb3dfeadf0a1134e14c Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 12:00:26 -0800 Subject: [PATCH 43/76] Adding tests for SharedMemoryManager --- azure_functions_worker/bindings/datumdef.py | 5 + .../file_accessor_unix.py | 15 +- .../file_accessor_windows.py | 12 +- .../shared_memory_constants.py | 2 +- .../shared_memory_manager.py | 11 +- azure_functions_worker/testutils.py | 5 + tests/unittests/test_shared_memory_manager.py | 174 ++++++++++++++++++ 7 files changed, 209 insertions(+), 15 deletions(-) create mode 100644 tests/unittests/test_shared_memory_manager.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 3831d2e31..5cd63a9eb 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -107,6 +107,7 @@ def from_rpc_shared_memory( count = shmem.count data_type = shmem.type ret_val = None + if data_type == protos.RpcDataType.bytes: val = shmem_mgr.get_bytes(mem_map_name, offset, count) if val is not None: @@ -115,6 +116,7 @@ def from_rpc_shared_memory( val = shmem_mgr.get_string(mem_map_name, offset, count) if val is not None: ret_val = cls(val, 'string') + if ret_val is not None: logger.info( f'Read {count} bytes from memory map {mem_map_name} ' @@ -144,13 +146,16 @@ def to_rpc_shared_memory( raise NotImplementedError( f'Unsupported datum type ({datum.type}) for shared memory' ) + if shared_mem_meta is None: return None + shmem = protos.RpcSharedMemory( name=shared_mem_meta.mem_map_name, offset=0, count=shared_mem_meta.count, type=data_type) + logger.info( f'Wrote {shared_mem_meta.count} bytes to memory map ' f'{shared_mem_meta.mem_map_name} for data type {data_type}') diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index 2d345503e..d19ae3f6d 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -21,9 +21,11 @@ def open_mem_map( mem_map_size: int, access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: if mem_map_name is None or mem_map_name == '': - raise Exception('Cannot open memory map. Invalid name.') + raise Exception( + f'Cannot open memory map. Invalid name {mem_map_name}') if mem_map_size < 0: - raise Exception('Cannot open memory map. Invalid size.') + raise Exception( + f'Cannot open memory map. Invalid size {mem_map_size}') fd = self._open_mem_map_file(mem_map_name) if fd is None: return None @@ -33,9 +35,11 @@ def open_mem_map( def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ -> Optional[mmap.mmap]: if mem_map_name is None or mem_map_name == '': - raise Exception('Cannot create memory map. Invalid name.') + raise Exception( + f'Cannot create memory map. Invalid name {mem_map_name}') if mem_map_size <= 0: - raise Exception('Cannot create memory map. Invalid size.') + raise Exception( + f'Cannot create memory map. Invalid size {mem_map_size}') fd = self._create_mem_map_file(mem_map_name, mem_map_size) if fd is None: return None @@ -47,7 +51,8 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: if mem_map_name is None or mem_map_name == '': - raise Exception('Cannot delete memory map. Invalid name.') + raise Exception( + f'Cannot delete memory map. Invalid name {mem_map_name}') try: fd = self._open_mem_map_file(mem_map_name) os.remove(fd.name) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py index 985f7b141..35d0a7f63 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py @@ -22,9 +22,11 @@ def open_mem_map( attempting to open it. """ if mem_map_name is None or mem_map_name == '': - raise Exception('Cannot open memory map. Invalid name.') + raise Exception( + f'Cannot open memory map. Invalid name {mem_map_name}') if mem_map_size < 0: - raise Exception('Cannot open memory map. Invalid size.') + raise Exception( + f'Cannot open memory map. Invalid size {mem_map_size}') try: mem_map = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) return mem_map @@ -39,9 +41,11 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ # Windows also creates the mmap when trying to open it, if it does not # already exist. if mem_map_name is None or mem_map_name == '': - raise Exception('Cannot create memory map. Invalid name.') + raise Exception( + f'Cannot create memory map. Invalid name {mem_map_name}') if mem_map_size <= 0: - raise Exception('Cannot create memory map. Invalid size.') + raise Exception( + f'Cannot create memory map. Invalid size {mem_map_size}') mem_map = self.open_mem_map(mem_map_name, mem_map_size, mmap.ACCESS_WRITE) if mem_map is None: diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py index 2f17eac21..040a32ff3 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py @@ -46,7 +46,7 @@ class SharedMemoryConstants: # Ref: https://stackoverflow.com/a/3944336/3132415 # Note: This needs to be consistent among the host and workers. # e.g. in the host, it is defined in SharedMemoryConstants.cs - MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB + MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 2 * 1024 * 1024 * 1024 # 2 GB # This is what the size of a character is in DotNet. Can be verified by # doing "sizeof(char)". diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py index 13e2f5f30..e9442125d 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py @@ -68,7 +68,7 @@ def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: return None mem_map_name = str(uuid.uuid4()) content_length = len(content) - shared_mem_map = self.create(mem_map_name, content_length) + shared_mem_map = self._create(mem_map_name, content_length) if shared_mem_map is None: return None num_bytes_written = shared_mem_map.put_bytes(content) @@ -76,6 +76,7 @@ def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: logger.error( f'Cannot write data into shared memory {mem_map_name} ' f'({num_bytes_written} != {content_length})') + shared_mem_map.dispose() return None self.allocated_mem_maps[mem_map_name] = shared_mem_map return SharedMemoryMetadata(mem_map_name, content_length) @@ -104,7 +105,7 @@ def get_bytes(self, mem_map_name: str, offset: int, count: int) \ f'Cannot read bytes. Non-zero offset ({offset}) ' f'not supported.') return None - shared_mem_map = self.open(mem_map_name, count) + shared_mem_map = self._open(mem_map_name, count) if shared_mem_map is None: return None try: @@ -128,7 +129,7 @@ def get_string(self, mem_map_name: str, offset: int, count: int) \ content_str = content_bytes.decode('utf-8') return content_str - def free_mem_map(self, mem_map_name: str): + def free_mem_map(self, mem_map_name: str) -> bool: """ Frees the memory map and any backing resources (e.g. file in the case of Unix) associated with it. @@ -145,7 +146,7 @@ def free_mem_map(self, mem_map_name: str): del self.allocated_mem_maps[mem_map_name] return success - def create(self, mem_map_name: str, content_length: int) \ + def _create(self, mem_map_name: str, content_length: int) \ -> Optional[SharedMemoryMap]: """ Creates a new SharedMemoryMap with the given name and content length. @@ -157,7 +158,7 @@ def create(self, mem_map_name: str, content_length: int) \ return None return SharedMemoryMap(self.file_accessor, mem_map_name, mem_map) - def open(self, mem_map_name: str, content_length: int) \ + def _open(self, mem_map_name: str, content_length: int) \ -> Optional[SharedMemoryMap]: """ Opens an existing SharedMemoryMap with the given name and content diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index f3cd77caf..7df6a3063 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -22,6 +22,7 @@ import re import shutil import socket +import string import subprocess import sys import tempfile @@ -263,6 +264,10 @@ def get_new_mem_map_name(self): def get_random_bytes(self, num_bytes): return bytearray(random.getrandbits(8) for _ in range(num_bytes)) + def get_random_string(self, num_chars): + return ''.join(random.choices(string.ascii_uppercase + string.digits, + k=num_chars)) + def _setUpUnix(self): for temp_dir in consts.UNIX_TEMP_DIRS: temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) diff --git a/tests/unittests/test_shared_memory_manager.py b/tests/unittests/test_shared_memory_manager.py new file mode 100644 index 000000000..2936a79cc --- /dev/null +++ b/tests/unittests/test_shared_memory_manager.py @@ -0,0 +1,174 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import math +import uuid +from azure.functions import meta as bind_meta +from azure_functions_worker import testutils +from azure_functions_worker.shared_memory_data_transfer.shared_memory_manager \ + import SharedMemoryManager +from azure_functions_worker.shared_memory_data_transfer. \ + shared_memory_constants import SharedMemoryConstants as consts + + +class TestSharedMemoryManager(testutils.SharedMemoryTestCase): + def test_is_enabled(self): + pass + + def test_is_disabled(self): + pass + + def test_bytes_input_support(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + content = self.get_random_bytes(content_size) + bytes_datum = bind_meta.Datum(type='bytes', value=content) + is_supported = manager.is_supported(bytes_datum) + self.assertTrue(is_supported) + + def test_string_input_support(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) + content = self.get_random_string(num_chars) + bytes_datum = bind_meta.Datum(type='string', value=content) + is_supported = manager.is_supported(bytes_datum) + self.assertTrue(is_supported) + + def test_large_invalid_bytes_input_support(self): + manager = SharedMemoryManager() + content_size = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + # Not using get_random_bytes to avoid slowing down for creating a large + # random input + content = b'x01' * content_size + bytes_datum = bind_meta.Datum(type='bytes', value=content) + is_supported = manager.is_supported(bytes_datum) + self.assertFalse(is_supported) + + def test_small_invalid_bytes_input_support(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 + content = self.get_random_bytes(content_size) + bytes_datum = bind_meta.Datum(type='bytes', value=content) + is_supported = manager.is_supported(bytes_datum) + self.assertFalse(is_supported) + + def test_large_invalid_string_input_support(self): + manager = SharedMemoryManager() + content_size = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) + # Not using get_random_string to avoid slowing down for creating a large + # random input + content = 'a' * num_chars + string_datum = bind_meta.Datum(type='string', value=content) + is_supported = manager.is_supported(string_datum) + self.assertFalse(is_supported) + + def test_small_invalid_string_input_support(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 + num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) + content = self.get_random_string(num_chars) + string_datum = bind_meta.Datum(type='string', value=content) + is_supported = manager.is_supported(string_datum) + self.assertFalse(is_supported) + + def test_put_bytes(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + content = self.get_random_bytes(content_size) + shared_mem_meta = manager.put_bytes(content) + self.assertIsNotNone(shared_mem_meta) + self.assertTrue(self._is_valid_uuid(shared_mem_meta.mem_map_name)) + self.assertEqual(content_size, shared_mem_meta.count) + free_success = manager.free_mem_map(shared_mem_meta.mem_map_name) + self.assertTrue(free_success) + + def test_invalid_put_bytes(self): + manager = SharedMemoryManager() + shared_mem_meta = manager.put_bytes(None) + self.assertIsNone(shared_mem_meta) + + def test_get_bytes(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + content = self.get_random_bytes(content_size) + shared_mem_meta = manager.put_bytes(content) + mem_map_name = shared_mem_meta.mem_map_name + num_bytes_written = shared_mem_meta.count + read_content = manager.get_bytes(mem_map_name, offset=0, + count=num_bytes_written) + self.assertEqual(content, read_content) + free_success = manager.free_mem_map(mem_map_name) + self.assertTrue(free_success) + + def test_put_string(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) + content = self.get_random_string(num_chars) + expected_size = len(content.encode('utf-8')) + shared_mem_meta = manager.put_string(content) + self.assertIsNotNone(shared_mem_meta) + self.assertTrue(self._is_valid_uuid(shared_mem_meta.mem_map_name)) + self.assertEqual(expected_size, shared_mem_meta.count) + free_success = manager.free_mem_map(shared_mem_meta.mem_map_name) + self.assertTrue(free_success) + + def test_invalid_put_string(self): + manager = SharedMemoryManager() + shared_mem_meta = manager.put_string(None) + self.assertIsNone(shared_mem_meta) + + def test_get_string(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) + content = self.get_random_string(num_chars) + shared_mem_meta = manager.put_string(content) + mem_map_name = shared_mem_meta.mem_map_name + num_bytes_written = shared_mem_meta.count + read_content = manager.get_string(mem_map_name, offset=0, + count=num_bytes_written) + self.assertEqual(content, read_content) + free_success = manager.free_mem_map(mem_map_name) + self.assertTrue(free_success) + + def test_allocated_mem_maps(self): + manager = SharedMemoryManager() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + content = self.get_random_bytes(content_size) + shared_mem_meta = manager.put_bytes(content) + self.assertIsNotNone(shared_mem_meta) + mem_map_name = shared_mem_meta.mem_map_name + is_mem_map_found = mem_map_name in manager.allocated_mem_maps + self.assertTrue(is_mem_map_found) + self.assertEqual(1, len(manager.allocated_mem_maps.keys())) + free_success = manager.free_mem_map(mem_map_name) + self.assertTrue(free_success) + is_mem_map_found = mem_map_name in manager.allocated_mem_maps + self.assertFalse(is_mem_map_found) + self.assertEqual(0, len(manager.allocated_mem_maps.keys())) + + def test_invalid_put_allocated_mem_maps(self): + manager = SharedMemoryManager() + shared_mem_meta = manager.put_bytes(None) + self.assertIsNone(shared_mem_meta) + self.assertEqual(0, len(manager.allocated_mem_maps.keys())) + + def test_invalid_free_mem_map(self): + manager = SharedMemoryManager() + mem_map_name = self.get_new_mem_map_name() + free_success = manager.free_mem_map(mem_map_name) + self.assertFalse(free_success) + + def _is_valid_uuid(self, uuid_to_test: str, version: int = 4) -> bool: + """ + Check if uuid_to_test is a valid UUID. + Reference: https://stackoverflow.com/a/33245493/3132415 + """ + try: + uuid_obj = uuid.UUID(uuid_to_test, version=version) + except ValueError: + return False + return str(uuid_obj) == uuid_to_test From cfa943e6380e8e99fbed63632e8c681fe8d57072 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Feb 2021 17:53:46 -0800 Subject: [PATCH 44/76] Adding tests to ensure the dispatcher can invoke the function and send/receive inputs over shared memory using a mock host --- azure_functions_worker/bindings/meta.py | 6 +- azure_functions_worker/dispatcher.py | 3 +- .../shared_memory_data_transfer/__init__.py | 11 ++ azure_functions_worker/testutils.py | 27 ++- .../blob_functions/blob_trigger/function.json | 2 +- .../function.json | 23 +++ .../main.py | 26 +++ .../get_blob_bytes/function.json | 2 +- .../get_blob_filelike/function.json | 2 +- .../get_blob_return/function.json | 2 +- .../blob_functions/get_blob_str/function.json | 2 +- .../get_blob_triggered/function.json | 2 +- .../function.json | 23 +++ .../main.py | 29 ++++ .../put_blob_bytes/function.json | 2 +- .../put_blob_filelike/function.json | 2 +- .../blob_functions/put_blob_str/function.json | 2 +- .../put_blob_trigger/function.json | 2 +- tests/unittests/test_file_accessor_factory.py | 2 +- .../test_mock_blob_shared_memory_functions.py | 158 ++++++++++++++++++ tests/unittests/test_shared_memory_manager.py | 20 +-- tests/unittests/test_shared_memory_map.py | 7 +- 22 files changed, 314 insertions(+), 41 deletions(-) create mode 100644 tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/function.json create mode 100644 tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py create mode 100644 tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/function.json create mode 100644 tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py create mode 100644 tests/unittests/test_mock_blob_shared_memory_functions.py diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 10c18373f..33763f69f 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -7,8 +7,6 @@ from . import datumdef from . import generic -from ..shared_memory_data_transfer.shared_memory_manager \ - import SharedMemoryManager def get_binding_registry(): @@ -60,7 +58,7 @@ def from_incoming_proto( pb: protos.ParameterBinding, *, pytype: typing.Optional[type], trigger_metadata: typing.Optional[typing.Dict[str, protos.TypedData]], - shmem_mgr: SharedMemoryManager) -> typing.Any: + shmem_mgr) -> typing.Any: binding = get_binding(binding) if trigger_metadata: metadata = { @@ -119,7 +117,7 @@ def to_outgoing_proto(binding: str, obj: typing.Any, *, def to_outgoing_param_binding(binding: str, obj: typing.Any, *, pytype: typing.Optional[type], out_name: str, - shmem_mgr: SharedMemoryManager) \ + shmem_mgr) \ -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) shared_mem_value = None diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 5533a2ef4..a002fe3c0 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -33,8 +33,7 @@ from .utils.tracing import marshall_exception_trace from .utils.dependency import DependencyManager from .utils.wrappers import disable_feature_by -from .shared_memory_data_transfer.shared_memory_manager import \ - SharedMemoryManager +from .shared_memory_data_transfer import SharedMemoryManager _TRUE = "true" diff --git a/azure_functions_worker/shared_memory_data_transfer/__init__.py b/azure_functions_worker/shared_memory_data_transfer/__init__.py index ccc438ca9..b56bf1d67 100644 --- a/azure_functions_worker/shared_memory_data_transfer/__init__.py +++ b/azure_functions_worker/shared_memory_data_transfer/__init__.py @@ -8,3 +8,14 @@ The issue tracking shared memory transfer related changes is: https://github.com/Azure/azure-functions-host/issues/6791 """ + +from .file_accessor_factory import FileAccessorFactory +from .file_accessor import FileAccessor +from .shared_memory_constants import SharedMemoryConstants +from .shared_memory_map import SharedMemoryMap +from .shared_memory_manager import SharedMemoryManager + +__all__ = ( + 'FileAccessorFactory', 'FileAccessor', 'SharedMemoryConstants', + 'SharedMemoryMap', 'SharedMemoryManager' +) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 7df6a3063..95f439d53 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -35,13 +35,14 @@ import requests from azure_functions_worker._thirdparty import aio_compat -from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory \ +from azure_functions_worker.shared_memory_data_transfer \ import FileAccessorFactory -from azure_functions_worker.shared_memory_data_transfer. \ - shared_memory_constants import SharedMemoryConstants as consts +from azure_functions_worker.shared_memory_data_transfer \ + import SharedMemoryConstants as consts from . import dispatcher from . import protos -from .constants import PYAZURE_WEBHOST_DEBUG +from .constants import (PYAZURE_WEBHOST_DEBUG, + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) from .utils.common import is_envvar_true PROJECT_ROOT = pathlib.Path(__file__).parent.parent @@ -251,12 +252,19 @@ class SharedMemoryTestCase(unittest.TestCase): """ def setUp(self): self.file_accessor = FileAccessorFactory.create_file_accessor() + self.was_shmem_env_true = is_envvar_true( + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) + os.environ.update( + {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '1'}) if os.name != 'nt': self._setUpUnix() def tearDown(self): if os.name != 'nt': self._tearDownUnix() + if not self.was_shmem_env_true: + os.environ.update( + {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '0'}) def get_new_mem_map_name(self): return str(uuid.uuid4()) @@ -268,6 +276,17 @@ def get_random_string(self, num_chars): return ''.join(random.choices(string.ascii_uppercase + string.digits, k=num_chars)) + def is_valid_uuid(self, uuid_to_test: str, version: int = 4) -> bool: + """ + Check if uuid_to_test is a valid UUID. + Reference: https://stackoverflow.com/a/33245493/3132415 + """ + try: + uuid_obj = uuid.UUID(uuid_to_test, version=version) + except ValueError: + return False + return str(uuid_obj) == uuid_to_test + def _setUpUnix(self): for temp_dir in consts.UNIX_TEMP_DIRS: temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) diff --git a/tests/endtoend/blob_functions/blob_trigger/function.json b/tests/endtoend/blob_functions/blob_trigger/function.json index d66c57391..85f59728d 100644 --- a/tests/endtoend/blob_functions/blob_trigger/function.json +++ b/tests/endtoend/blob_functions/blob_trigger/function.json @@ -14,6 +14,6 @@ "name": "$return", "connection": "AzureWebJobsStorage", "path": "python-worker-tests/test-blob-triggered.txt" - }, + } ] } diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/function.json b/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/function.json new file mode 100644 index 000000000..17d0d2bf1 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "in", + "name": "file", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py new file mode 100644 index 000000000..708dacf72 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py @@ -0,0 +1,26 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import hashlib +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: bytes) -> azf.HttpResponse: + assert isinstance(file, bytes) + + content_size = len(file) + content_md5 = hashlib.md5(file).hexdigest() + + response_dict = { + 'content_size': content_size, + 'content_md5': content_md5 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/endtoend/blob_functions/get_blob_bytes/function.json b/tests/endtoend/blob_functions/get_blob_bytes/function.json index 3f6bddab3..380782d2b 100644 --- a/tests/endtoend/blob_functions/get_blob_bytes/function.json +++ b/tests/endtoend/blob_functions/get_blob_bytes/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/get_blob_filelike/function.json b/tests/endtoend/blob_functions/get_blob_filelike/function.json index 4709e7a01..7365a290d 100644 --- a/tests/endtoend/blob_functions/get_blob_filelike/function.json +++ b/tests/endtoend/blob_functions/get_blob_filelike/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/get_blob_return/function.json b/tests/endtoend/blob_functions/get_blob_return/function.json index b89022445..c4bffff01 100644 --- a/tests/endtoend/blob_functions/get_blob_return/function.json +++ b/tests/endtoend/blob_functions/get_blob_return/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/get_blob_str/function.json b/tests/endtoend/blob_functions/get_blob_str/function.json index c1b43cd28..d4a43c7c0 100644 --- a/tests/endtoend/blob_functions/get_blob_str/function.json +++ b/tests/endtoend/blob_functions/get_blob_str/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/get_blob_triggered/function.json b/tests/endtoend/blob_functions/get_blob_triggered/function.json index c558580d7..2513fd38a 100644 --- a/tests/endtoend/blob_functions/get_blob_triggered/function.json +++ b/tests/endtoend/blob_functions/get_blob_triggered/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/function.json b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/function.json new file mode 100644 index 000000000..be2691bd8 --- /dev/null +++ b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "out", + "name": "file", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes-out.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py new file mode 100644 index 000000000..53902d22a --- /dev/null +++ b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py @@ -0,0 +1,29 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import random +import json +import hashlib +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: azf.Out[bytes]) -> azf.HttpResponse: + content_size = int(req.params['content_size']) + + content = bytearray(random.getrandbits(8) for _ in range(content_size)) + content_md5 = hashlib.md5(content).hexdigest() + + file.set(content) + + response_dict = { + 'content_size': content_size, + 'content_md5': content_md5 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/endtoend/blob_functions/put_blob_bytes/function.json b/tests/endtoend/blob_functions/put_blob_bytes/function.json index 307fa509b..63a706c79 100644 --- a/tests/endtoend/blob_functions/put_blob_bytes/function.json +++ b/tests/endtoend/blob_functions/put_blob_bytes/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/put_blob_filelike/function.json b/tests/endtoend/blob_functions/put_blob_filelike/function.json index d906c1766..49ff4f6a2 100644 --- a/tests/endtoend/blob_functions/put_blob_filelike/function.json +++ b/tests/endtoend/blob_functions/put_blob_filelike/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/put_blob_str/function.json b/tests/endtoend/blob_functions/put_blob_str/function.json index f96a559a9..b21e51c00 100644 --- a/tests/endtoend/blob_functions/put_blob_str/function.json +++ b/tests/endtoend/blob_functions/put_blob_str/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/endtoend/blob_functions/put_blob_trigger/function.json b/tests/endtoend/blob_functions/put_blob_trigger/function.json index 10a2e5ee7..183cdaa69 100644 --- a/tests/endtoend/blob_functions/put_blob_trigger/function.json +++ b/tests/endtoend/blob_functions/put_blob_trigger/function.json @@ -16,7 +16,7 @@ { "type": "http", "direction": "out", - "name": "$return", + "name": "$return" } ] } diff --git a/tests/unittests/test_file_accessor_factory.py b/tests/unittests/test_file_accessor_factory.py index b81921e78..f3ae5e3c4 100644 --- a/tests/unittests/test_file_accessor_factory.py +++ b/tests/unittests/test_file_accessor_factory.py @@ -3,7 +3,7 @@ import os import unittest -from azure_functions_worker.shared_memory_data_transfer.file_accessor_factory \ +from azure_functions_worker.shared_memory_data_transfer \ import FileAccessorFactory from azure_functions_worker.shared_memory_data_transfer.file_accessor_unix \ import FileAccessorUnix diff --git a/tests/unittests/test_mock_blob_shared_memory_functions.py b/tests/unittests/test_mock_blob_shared_memory_functions.py new file mode 100644 index 000000000..52ae2b29d --- /dev/null +++ b/tests/unittests/test_mock_blob_shared_memory_functions.py @@ -0,0 +1,158 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import hashlib +from azure_functions_worker.shared_memory_data_transfer import SharedMemoryMap +from azure_functions_worker.shared_memory_data_transfer \ + import SharedMemoryConstants as consts +from azure_functions_worker import protos +from azure_functions_worker import testutils + + +class TestMockBlobSharedMemoryFunctions(testutils.SharedMemoryTestCase, + testutils.AsyncTestCase): + def setUp(self): + super().setUp() + self.blob_funcs_dir = testutils.E2E_TESTS_FOLDER / 'blob_functions' + + async def test_binary_blob_read_function(self): + func_name = 'get_blob_as_bytes_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + # Write binary content into shared memory + mem_map_name = self.get_new_mem_map_name() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + content = self.get_random_bytes(content_size) + content_md5 = hashlib.md5(content).hexdigest() + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + num_bytes_written = shared_mem_map.put_bytes(content) + + # Create a message to send to the worker containing info about the + # shared memory region to read input from + value = protos.RpcSharedMemory( + name=mem_map_name, + offset=0, + count=num_bytes_written, + type=protos.RpcDataType.bytes + ) + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, r = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET'))), + protos.ParameterBinding( + name='file', + rpc_shared_memory=value + ) + ]) + + # Dispose the shared memory map since the function is done using it + shared_mem_map.dispose() + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + r.response.result.status) + + json_response = json.loads(r.response.return_value.http.body.bytes) + func_received_content_size = json_response['content_size'] + func_received_content_md5 = json_response['content_md5'] + + # Check the function response to ensure that it read the complete + # input that we provided and the md5 matches + self.assertEqual(content_size, func_received_content_size) + self.assertEqual(content_md5, func_received_content_md5) + + async def test_binary_blob_write_function(self): + func_name = 'put_blob_as_bytes_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + http_params = {'content_size': str(content_size)} + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, r = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET', + query=http_params))), + ]) + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + r.response.result.status) + + # The function responds back in the HTTP body with the md5 digest of + # the output it created along with its size + json_response = json.loads(r.response.return_value.http.body.bytes) + func_created_content_size = json_response['content_size'] + func_created_content_md5 = json_response['content_md5'] + + # Verify if the worker produced an output blob which was written + # in shared memory + output_data = r.response.output_data + self.assertEqual(1, len(output_data)) + + output_binding = output_data[0] + binding_type = output_binding.WhichOneof('rpc_data') + self.assertEqual('rpc_shared_memory', binding_type) + + # Get the information about the shared memory region in which the + # worker wrote the function's output blob + shmem = output_binding.rpc_shared_memory + mem_map_name = shmem.name + offset = shmem.offset + count = shmem.count + data_type = shmem.type + + # Verify if the shared memory region's information makes sense + self.assertTrue(self.is_valid_uuid(mem_map_name)) + self.assertEqual(0, offset) + self.assertEqual(func_created_content_size, count) + self.assertEqual(protos.RpcDataType.bytes, data_type) + + # Read data from the shared memory region + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + count + mem_map = self.file_accessor.open_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + read_content = shared_mem_map.get_bytes() + + # Dispose the shared memory map since we have read the function's + # output now + shared_mem_map.dispose() + + # Verify if we were able to read the correct output that the + # function has produced + read_content_md5 = hashlib.md5(read_content).hexdigest() + self.assertEqual(func_created_content_md5, read_content_md5) + self.assertEqual(len(read_content), func_created_content_size) + + async def test_str_blob_read_function(self): + pass + + async def test_str_blob_write_function(self): + pass + + def test_close_shared_memory_maps(self): + pass diff --git a/tests/unittests/test_shared_memory_manager.py b/tests/unittests/test_shared_memory_manager.py index 2936a79cc..16455c80e 100644 --- a/tests/unittests/test_shared_memory_manager.py +++ b/tests/unittests/test_shared_memory_manager.py @@ -2,13 +2,12 @@ # Licensed under the MIT License. import math -import uuid from azure.functions import meta as bind_meta from azure_functions_worker import testutils -from azure_functions_worker.shared_memory_data_transfer.shared_memory_manager \ +from azure_functions_worker.shared_memory_data_transfer \ import SharedMemoryManager -from azure_functions_worker.shared_memory_data_transfer. \ - shared_memory_constants import SharedMemoryConstants as consts +from azure_functions_worker.shared_memory_data_transfer \ + import SharedMemoryConstants as consts class TestSharedMemoryManager(testutils.SharedMemoryTestCase): @@ -110,7 +109,7 @@ def test_put_string(self): expected_size = len(content.encode('utf-8')) shared_mem_meta = manager.put_string(content) self.assertIsNotNone(shared_mem_meta) - self.assertTrue(self._is_valid_uuid(shared_mem_meta.mem_map_name)) + self.assertTrue(self.is_valid_uuid(shared_mem_meta.mem_map_name)) self.assertEqual(expected_size, shared_mem_meta.count) free_success = manager.free_mem_map(shared_mem_meta.mem_map_name) self.assertTrue(free_success) @@ -161,14 +160,3 @@ def test_invalid_free_mem_map(self): mem_map_name = self.get_new_mem_map_name() free_success = manager.free_mem_map(mem_map_name) self.assertFalse(free_success) - - def _is_valid_uuid(self, uuid_to_test: str, version: int = 4) -> bool: - """ - Check if uuid_to_test is a valid UUID. - Reference: https://stackoverflow.com/a/33245493/3132415 - """ - try: - uuid_obj = uuid.UUID(uuid_to_test, version=version) - except ValueError: - return False - return str(uuid_obj) == uuid_to_test diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index d308a543b..2148247ab 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -2,10 +2,9 @@ # Licensed under the MIT License. from azure_functions_worker import testutils -from azure_functions_worker.shared_memory_data_transfer.shared_memory_map \ - import SharedMemoryMap -from azure_functions_worker.shared_memory_data_transfer. \ - shared_memory_constants import SharedMemoryConstants as consts +from azure_functions_worker.shared_memory_data_transfer import SharedMemoryMap +from azure_functions_worker.shared_memory_data_transfer \ + import SharedMemoryConstants as consts class TestSharedMemoryMap(testutils.SharedMemoryTestCase): From 2825e1109a410aca7b19ce7e91bafc059aeb6b28 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 22 Feb 2021 19:35:50 -0800 Subject: [PATCH 45/76] More tests for worker/dispatcher's use of shared memory --- azure_functions_worker/testutils.py | 15 + .../function.json | 23 ++ .../main.py | 27 ++ .../main.py | 9 +- .../function.json | 23 ++ .../main.py | 34 +++ .../test_mock_blob_shared_memory_functions.py | 264 +++++++++++++++++- 7 files changed, 384 insertions(+), 11 deletions(-) create mode 100644 tests/endtoend/blob_functions/get_blob_as_str_return_http_response/function.json create mode 100644 tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py create mode 100644 tests/endtoend/blob_functions/put_blob_as_str_return_http_response/function.json create mode 100644 tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 95f439d53..958bb48fd 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -470,6 +470,21 @@ async def invoke_function( return invocation_id, r + async def close_shared_memory_resources( + self, + map_names: typing.List[str]): + + request = protos.CloseSharedMemoryResourcesRequest( + map_names=map_names) + + r = await self.communicate( + protos.StreamingMessage( + close_shared_memory_resources_request=request + ), + wait_for='close_shared_memory_resources_response') + + return r + async def reload_environment( self, environment: typing.Dict[str, str], diff --git a/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/function.json b/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/function.json new file mode 100644 index 000000000..3de014ad3 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "in", + "name": "file", + "dataType": "string", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-str.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py new file mode 100644 index 000000000..9455d5e67 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import hashlib +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: str) -> azf.HttpResponse: + assert isinstance(file, str) + + num_chars = len(file) + content_bytes = file.encode('utf-8') + content_md5 = hashlib.md5(content_bytes).hexdigest() + + response_dict = { + 'num_chars': num_chars, + 'content_md5': content_md5 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py index 53902d22a..95236ed23 100644 --- a/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py +++ b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py @@ -10,7 +10,14 @@ def main(req: azf.HttpRequest, file: azf.Out[bytes]) -> azf.HttpResponse: content_size = int(req.params['content_size']) - content = bytearray(random.getrandbits(8) for _ in range(content_size)) + # When this is set, then 0x01 byte is repeated content_size number of + # times to use as input. + # This is to avoid generating random input for large size which can be + # slow. + if 'no_random_input' in req.params: + content = b'\x01' * content_size + else: + content = bytearray(random.getrandbits(8) for _ in range(content_size)) content_md5 = hashlib.md5(content).hexdigest() file.set(content) diff --git a/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/function.json b/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/function.json new file mode 100644 index 000000000..06d875094 --- /dev/null +++ b/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "out", + "name": "file", + "dataType": "string", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-str-out.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py b/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py new file mode 100644 index 000000000..97b3dbd13 --- /dev/null +++ b/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py @@ -0,0 +1,34 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import string +import random +import json +import hashlib +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: azf.Out[str]) -> azf.HttpResponse: + num_chars = int(req.params['num_chars']) + + content = ''.join(random.choices(string.ascii_uppercase + string.digits, + k=num_chars)) + content_bytes = content.encode('utf-8') + content_size = len(content_bytes) + content_md5 = hashlib.md5(content_bytes).hexdigest() + + file.set(content) + + response_dict = { + 'num_chars': num_chars, + 'content_size': content_size, + 'content_md5': content_md5 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/unittests/test_mock_blob_shared_memory_functions.py b/tests/unittests/test_mock_blob_shared_memory_functions.py index 52ae2b29d..57097561b 100644 --- a/tests/unittests/test_mock_blob_shared_memory_functions.py +++ b/tests/unittests/test_mock_blob_shared_memory_functions.py @@ -46,7 +46,7 @@ async def test_binary_blob_read_function(self): # Invoke the function; it should read the input blob from shared # memory and respond back in the HTTP body with the number of bytes # it read in the input - _, r = await host.invoke_function( + _, response_msg = await host.invoke_function( func_name, [ protos.ParameterBinding( name='req', @@ -64,9 +64,10 @@ async def test_binary_blob_read_function(self): # Verify if the function executed successfully self.assertEqual(protos.StatusResult.Success, - r.response.result.status) + response_msg.response.result.status) - json_response = json.loads(r.response.return_value.http.body.bytes) + response_bytes = response_msg.response.return_value.http.body.bytes + json_response = json.loads(response_bytes) func_received_content_size = json_response['content_size'] func_received_content_md5 = json_response['content_md5'] @@ -87,7 +88,7 @@ async def test_binary_blob_write_function(self): # Invoke the function; it should read the input blob from shared # memory and respond back in the HTTP body with the number of bytes # it read in the input - _, r = await host.invoke_function( + _, response_msg = await host.invoke_function( func_name, [ protos.ParameterBinding( name='req', @@ -99,17 +100,18 @@ async def test_binary_blob_write_function(self): # Verify if the function executed successfully self.assertEqual(protos.StatusResult.Success, - r.response.result.status) + response_msg.response.result.status) # The function responds back in the HTTP body with the md5 digest of # the output it created along with its size - json_response = json.loads(r.response.return_value.http.body.bytes) + response_bytes = response_msg.response.return_value.http.body.bytes + json_response = json.loads(response_bytes) func_created_content_size = json_response['content_size'] func_created_content_md5 = json_response['content_md5'] # Verify if the worker produced an output blob which was written # in shared memory - output_data = r.response.output_data + output_data = response_msg.response.output_data self.assertEqual(1, len(output_data)) output_binding = output_data[0] @@ -124,7 +126,7 @@ async def test_binary_blob_write_function(self): count = shmem.count data_type = shmem.type - # Verify if the shared memory region's information makes sense + # Verify if the shared memory region's information is valid self.assertTrue(self.is_valid_uuid(mem_map_name)) self.assertEqual(0, offset) self.assertEqual(func_created_content_size, count) @@ -149,10 +151,252 @@ async def test_binary_blob_write_function(self): self.assertEqual(len(read_content), func_created_content_size) async def test_str_blob_read_function(self): - pass + func_name = 'get_blob_as_str_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + # Write binary content into shared memory + mem_map_name = self.get_new_mem_map_name() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + num_chars = int(content_size / consts.SIZE_OF_CHAR_BYTES) + content = self.get_random_string(num_chars) + content_bytes = content.encode('utf-8') + content_md5 = hashlib.md5(content_bytes).hexdigest() + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + num_bytes_written = shared_mem_map.put_bytes(content_bytes) + + # Create a message to send to the worker containing info about the + # shared memory region to read input from + value = protos.RpcSharedMemory( + name=mem_map_name, + offset=0, + count=num_bytes_written, + type=protos.RpcDataType.string + ) + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, response_msg = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET'))), + protos.ParameterBinding( + name='file', + rpc_shared_memory=value + ) + ]) + + # Dispose the shared memory map since the function is done using it + shared_mem_map.dispose() + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + response_msg.response.result.status) + + response_bytes = response_msg.response.return_value.http.body.bytes + json_response = json.loads(response_bytes) + func_received_num_chars = json_response['num_chars'] + func_received_content_md5 = json_response['content_md5'] + + # Check the function response to ensure that it read the complete + # input that we provided and the md5 matches + self.assertEqual(num_chars, func_received_num_chars) + self.assertEqual(content_md5, func_received_content_md5) async def test_str_blob_write_function(self): + func_name = 'put_blob_as_str_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + num_chars = int(content_size / consts.SIZE_OF_CHAR_BYTES) + http_params = {'num_chars': str(num_chars)} + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, response_msg = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET', + query=http_params))), + ]) + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + response_msg.response.result.status) + + # The function responds back in the HTTP body with the md5 digest of + # the output it created along with its size + response_bytes = response_msg.response.return_value.http.body.bytes + json_response = json.loads(response_bytes) + func_created_num_chars = json_response['num_chars'] + func_created_content_md5 = json_response['content_md5'] + + # Verify if the worker produced an output blob which was written + # in shared memory + output_data = response_msg.response.output_data + self.assertEqual(1, len(output_data)) + + output_binding = output_data[0] + binding_type = output_binding.WhichOneof('rpc_data') + self.assertEqual('rpc_shared_memory', binding_type) + + # Get the information about the shared memory region in which the + # worker wrote the function's output blob + shmem = output_binding.rpc_shared_memory + mem_map_name = shmem.name + offset = shmem.offset + count = shmem.count + data_type = shmem.type + + # Verify if the shared memory region's information is valid + self.assertTrue(self.is_valid_uuid(mem_map_name)) + self.assertEqual(0, offset) + self.assertEqual(func_created_num_chars, count) + self.assertEqual(protos.RpcDataType.string, data_type) + + # Read data from the shared memory region + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + count + mem_map = self.file_accessor.open_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + read_content_bytes = shared_mem_map.get_bytes() + + # Dispose the shared memory map since we have read the function's + # output now + shared_mem_map.dispose() + + # Verify if we were able to read the correct output that the + # function has produced + read_content_md5 = hashlib.md5(read_content_bytes).hexdigest() + self.assertEqual(func_created_content_md5, read_content_md5) + read_content = read_content_bytes.decode('utf-8') + self.assertEqual(len(read_content), func_created_num_chars) + + async def test_close_shared_memory_maps(self): + func_name = 'put_blob_as_bytes_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + http_params = {'content_size': str(content_size)} + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, response_msg = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET', + query=http_params))), + ]) + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + response_msg.response.result.status) + + # Verify if the worker produced an output blob which was written + # in shared memory + output_data = response_msg.response.output_data + output_binding = output_data[0] + + # Get the information about the shared memory region in which the + # worker wrote the function's output blob + shmem = output_binding.rpc_shared_memory + mem_map_name = shmem.name + + # Request the worker to close the memory maps + mem_map_names = [mem_map_name] + response_msg = \ + await host.close_shared_memory_resources(mem_map_names) + + # Verify that the worker responds with a successful status after + # closing the requested memory map + mem_map_statuses = response_msg.response.close_map_results + self.assertEqual(len(mem_map_names), len(mem_map_statuses.keys())) + for mem_map_name in mem_map_names: + self.assertTrue(mem_map_name in mem_map_statuses) + status = mem_map_statuses[mem_map_name] + self.assertTrue(status) + + async def _test_shared_memory_not_used(self, content_size): + func_name = 'put_blob_as_bytes_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + http_params = { + 'content_size': str(content_size), + 'no_random_input': str('1')} + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, response_msg = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET', + query=http_params))), + ]) + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + response_msg.response.result.status) + + # Verify if the worker produced an output blob which was sent over + # RPC instead of shared memory + output_data = response_msg.response.output_data + self.assertEqual(1, len(output_data)) + + output_binding = output_data[0] + binding_type = output_binding.WhichOneof('rpc_data') + self.assertEqual('data', binding_type) + + async def test_shared_memory_not_used_with_small_output(self): + # TODO + # Type not supported but size within shared memory enabled range + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 + await self._test_shared_memory_not_used(content_size) + + async def test_shared_memory_not_used_with_large_output(self): + content_size = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + await self._test_shared_memory_not_used(content_size) + + def test_blob_input_as_stream(self): + # Use binary, use stream also in func + # TODO + pass + + def test_multiple_input_blobs(self): + # TODO + pass + + def test_multiple_output_blobs(self): + # TODO pass - def test_close_shared_memory_maps(self): + def test_multiple_input_and_output_blobs(self): + # TODO pass From 62deedc04d6f82997799045e5fb038e16e88fe63 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 23 Feb 2021 08:08:26 -0800 Subject: [PATCH 46/76] Using truncate instead of writing 0x00 byte manually upon creating mmap. More tests for multiple input/output blobs. --- .../file_accessor_unix.py | 25 +- .../function.json | 23 ++ .../main.py | 26 ++ .../function.json | 47 +++ .../main.py | 56 +++ .../test_mock_blob_shared_memory_functions.py | 363 +++++++++++++----- 6 files changed, 438 insertions(+), 102 deletions(-) create mode 100644 tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/function.json create mode 100644 tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py create mode 100644 tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/function.json create mode 100644 tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index d19ae3f6d..240da7d25 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -40,10 +40,11 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ if mem_map_size <= 0: raise Exception( f'Cannot create memory map. Invalid size {mem_map_size}') - fd = self._create_mem_map_file(mem_map_name, mem_map_size) - if fd is None: + file = self._create_mem_map_file(mem_map_name, mem_map_size) + if file is None: return None - mem_map = mmap.mmap(fd, mem_map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) + mem_map = mmap.mmap(file.fileno(), mem_map_size, mmap.MAP_SHARED, + mmap.PROT_WRITE) if self._is_mem_map_initialized(mem_map): raise Exception(f'Memory map {mem_map_name} already exists') self._set_mem_map_initialized(mem_map) @@ -114,8 +115,8 @@ def _create_mem_map_dir(self) -> bool: logger.error('Cannot create directory for memory maps') return False - def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ - -> Optional[int]: + def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ + -> Optional[BufferedRandom]: """ Get the file descriptor for a new memory map. Returns the file descriptor. @@ -143,15 +144,9 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) try: - fd = os.open(file_path, os.O_CREAT | os.O_TRUNC | os.O_RDWR) - # Write 0s to allocate - # TODO use truncate here instead of zeroeing out the memory - bytes_written = os.write(fd, b'\x00' * mem_mem_map_size) - if bytes_written != mem_mem_map_size: - raise Exception( - f'Cannot write 0s into new memory map {file_path} ' - f'({bytes_written} != {mem_mem_map_size})') - return fd + file = open(file_path, 'wb+') + file.truncate(mem_map_size) + return file except Exception: # If the memory map could not be created in this directory, we # keep trying in other applicable directories. @@ -160,5 +155,5 @@ def _create_mem_map_file(self, mem_map_name: str, mem_mem_map_size: int) \ # paths so we fail. logger.error( f'Cannot create memory map {mem_map_name} with size ' - f'{mem_mem_map_size}') + f'{mem_map_size}') return None diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/function.json b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/function.json new file mode 100644 index 000000000..17d0d2bf1 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "in", + "name": "file", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py new file mode 100644 index 000000000..7b9169be6 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py @@ -0,0 +1,26 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import hashlib +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: azf.InputStream) -> azf.HttpResponse: + file_bytes = file.read() + + content_size = len(file_bytes) + content_md5 = hashlib.md5(file_bytes).hexdigest() + + response_dict = { + 'content_size': content_size, + 'content_md5': content_md5 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/function.json b/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/function.json new file mode 100644 index 000000000..52a8f89a7 --- /dev/null +++ b/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/function.json @@ -0,0 +1,47 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "in", + "name": "input_file_1", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes-1.txt" + }, + { + "type": "blob", + "direction": "in", + "name": "input_file_2", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes-2.txt" + }, + { + "type": "blob", + "direction": "out", + "name": "output_file_1", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes-out-1.txt" + }, + { + "type": "blob", + "direction": "out", + "name": "output_file_2", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-bytes-out-2.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py new file mode 100644 index 000000000..3709d6afa --- /dev/null +++ b/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py @@ -0,0 +1,56 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import random +import json +import hashlib +import azure.functions as azf + + +def _generate_content_and_digest(content_size): + content = bytearray(random.getrandbits(8) for _ in range(content_size)) + content_md5 = hashlib.md5(content).hexdigest() + return content, content_md5 + + +def main( + req: azf.HttpRequest, + input_file_1: bytes, + input_file_2: bytes, + output_file_1: azf.Out[bytes], + output_file_2: azf.Out[bytes]) -> azf.HttpResponse: + input_content_size_1 = len(input_file_1) + input_content_size_2 = len(input_file_2) + + input_content_md5_1 = hashlib.md5(input_file_1).hexdigest() + input_content_md5_2 = hashlib.md5(input_file_2).hexdigest() + + output_content_size_1 = int(req.params['output_content_size_1']) + output_content_size_2 = int(req.params['output_content_size_2']) + + output_content_1, output_content_md5_1 = \ + _generate_content_and_digest(output_content_size_1) + output_content_2, output_content_md5_2 = \ + _generate_content_and_digest(output_content_size_2) + + output_file_1.set(output_content_1) + output_file_2.set(output_content_2) + + response_dict = { + 'input_content_size_1': input_content_size_1, + 'input_content_size_2': input_content_size_2, + 'input_content_md5_1': input_content_md5_1, + 'input_content_md5_2': input_content_md5_2, + 'output_content_size_1': output_content_size_1, + 'output_content_size_2': output_content_size_2, + 'output_content_md5_1': output_content_md5_1, + 'output_content_md5_2': output_content_md5_2 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/unittests/test_mock_blob_shared_memory_functions.py b/tests/unittests/test_mock_blob_shared_memory_functions.py index 57097561b..1366488bf 100644 --- a/tests/unittests/test_mock_blob_shared_memory_functions.py +++ b/tests/unittests/test_mock_blob_shared_memory_functions.py @@ -16,67 +16,29 @@ def setUp(self): super().setUp() self.blob_funcs_dir = testutils.E2E_TESTS_FOLDER / 'blob_functions' - async def test_binary_blob_read_function(self): + async def test_binary_blob_read_as_bytes_function(self): + """ + Read a blob with binary input that was transferred between the host and + worker over shared memory. + The function's input data type will be bytes. + """ func_name = 'get_blob_as_bytes_return_http_response' - async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ - as host: - await host.load_function(func_name) + await self._test_binary_blob_read_function(func_name) - # Write binary content into shared memory - mem_map_name = self.get_new_mem_map_name() - content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 - content = self.get_random_bytes(content_size) - content_md5 = hashlib.md5(content).hexdigest() - mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size - mem_map = self.file_accessor.create_mem_map(mem_map_name, - mem_map_size) - shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, - mem_map) - num_bytes_written = shared_mem_map.put_bytes(content) - - # Create a message to send to the worker containing info about the - # shared memory region to read input from - value = protos.RpcSharedMemory( - name=mem_map_name, - offset=0, - count=num_bytes_written, - type=protos.RpcDataType.bytes - ) - - # Invoke the function; it should read the input blob from shared - # memory and respond back in the HTTP body with the number of bytes - # it read in the input - _, response_msg = await host.invoke_function( - func_name, [ - protos.ParameterBinding( - name='req', - data=protos.TypedData( - http=protos.RpcHttp( - method='GET'))), - protos.ParameterBinding( - name='file', - rpc_shared_memory=value - ) - ]) - - # Dispose the shared memory map since the function is done using it - shared_mem_map.dispose() - - # Verify if the function executed successfully - self.assertEqual(protos.StatusResult.Success, - response_msg.response.result.status) - - response_bytes = response_msg.response.return_value.http.body.bytes - json_response = json.loads(response_bytes) - func_received_content_size = json_response['content_size'] - func_received_content_md5 = json_response['content_md5'] - - # Check the function response to ensure that it read the complete - # input that we provided and the md5 matches - self.assertEqual(content_size, func_received_content_size) - self.assertEqual(content_md5, func_received_content_md5) + async def test_binary_blob_read_as_stream_function(self): + """ + Read a blob with binary input that was transferred between the host and + worker over shared memory. + The function's input data type will be InputStream. + """ + func_name = 'get_blob_as_bytes_stream_return_http_response' + await self._test_binary_blob_read_function(func_name) async def test_binary_blob_write_function(self): + """ + Write a blob with binary output that was transferred between the worker + and host over shared memory. + """ func_name = 'put_blob_as_bytes_return_http_response' async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ as host: @@ -151,6 +113,11 @@ async def test_binary_blob_write_function(self): self.assertEqual(len(read_content), func_created_content_size) async def test_str_blob_read_function(self): + """ + Read a blob with binary input that was transferred between the host and + worker over shared memory. + The function's input data type will be str. + """ func_name = 'get_blob_as_str_return_http_response' async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ as host: @@ -213,6 +180,10 @@ async def test_str_blob_read_function(self): self.assertEqual(content_md5, func_received_content_md5) async def test_str_blob_write_function(self): + """ + Write a blob with string output that was transferred between the worker + and host over shared memory. + """ func_name = 'put_blob_as_str_return_http_response' async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ as host: @@ -289,6 +260,10 @@ async def test_str_blob_write_function(self): self.assertEqual(len(read_content), func_created_num_chars) async def test_close_shared_memory_maps(self): + """ + Close the shared memory maps created by the worker to transfer output + blob to the host after the host is done processing the response. + """ func_name = 'put_blob_as_bytes_return_http_response' async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ as host: @@ -338,15 +313,19 @@ async def test_close_shared_memory_maps(self): status = mem_map_statuses[mem_map_name] self.assertTrue(status) - async def _test_shared_memory_not_used(self, content_size): + async def test_shared_memory_not_used_with_small_output(self): + """ + Even though shared memory is enabled, small inputs will not be + transferred over shared memory (in this case from the worker to the + host.) + """ func_name = 'put_blob_as_bytes_return_http_response' async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ as host: await host.load_function(func_name) - http_params = { - 'content_size': str(content_size), - 'no_random_input': str('1')} + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 + http_params = {'content_size': str(content_size)} # Invoke the function; it should read the input blob from shared # memory and respond back in the HTTP body with the number of bytes @@ -374,29 +353,239 @@ async def _test_shared_memory_not_used(self, content_size): binding_type = output_binding.WhichOneof('rpc_data') self.assertEqual('data', binding_type) - async def test_shared_memory_not_used_with_small_output(self): - # TODO - # Type not supported but size within shared memory enabled range - content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 - await self._test_shared_memory_not_used(content_size) - - async def test_shared_memory_not_used_with_large_output(self): - content_size = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER + 10 - await self._test_shared_memory_not_used(content_size) - - def test_blob_input_as_stream(self): - # Use binary, use stream also in func - # TODO - pass - - def test_multiple_input_blobs(self): - # TODO - pass - - def test_multiple_output_blobs(self): - # TODO - pass - - def test_multiple_input_and_output_blobs(self): - # TODO - pass + async def test_multiple_input_output_blobs(self): + """ + Read two blobs and write two blobs, all over shared memory. + """ + func_name = 'put_get_multiple_blobs_as_bytes_return_http_response' + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + # Input 1 + # Write binary content into shared memory + mem_map_name_1 = self.get_new_mem_map_name() + input_content_size_1 = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + input_content_1 = self.get_random_bytes(input_content_size_1) + input_content_md5_1 = hashlib.md5(input_content_1).hexdigest() + input_mem_map_size_1 = \ + consts.CONTENT_HEADER_TOTAL_BYTES + input_content_size_1 + input_mem_map_1 = \ + self.file_accessor.create_mem_map(mem_map_name_1, + input_mem_map_size_1) + input_shared_mem_map_1 = \ + SharedMemoryMap(self.file_accessor, mem_map_name_1, + input_mem_map_1) + input_num_bytes_written_1 = \ + input_shared_mem_map_1.put_bytes(input_content_1) + + # Create a message to send to the worker containing info about the + # shared memory region to read input from + input_value_1 = protos.RpcSharedMemory( + name=mem_map_name_1, + offset=0, + count=input_num_bytes_written_1, + type=protos.RpcDataType.bytes + ) + + # Input 2 + # Write binary content into shared memory + mem_map_name_2 = self.get_new_mem_map_name() + input_content_size_2 = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 20 + input_content_2 = self.get_random_bytes(input_content_size_2) + input_content_md5_2 = hashlib.md5(input_content_2).hexdigest() + input_mem_map_size_2 = \ + consts.CONTENT_HEADER_TOTAL_BYTES + input_content_size_2 + input_mem_map_2 = \ + self.file_accessor.create_mem_map(mem_map_name_2, + input_mem_map_size_2) + input_shared_mem_map_2 = \ + SharedMemoryMap(self.file_accessor, mem_map_name_2, + input_mem_map_2) + input_num_bytes_written_2 = \ + input_shared_mem_map_2.put_bytes(input_content_2) + + # Outputs + output_content_size_1 = \ + consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 11 + output_content_size_2 = \ + consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 22 + http_params = { + 'output_content_size_1': str(output_content_size_1), + 'output_content_size_2': str(output_content_size_2)} + + # Create a message to send to the worker containing info about the + # shared memory region to read input from + input_value_2 = protos.RpcSharedMemory( + name=mem_map_name_2, + offset=0, + count=input_num_bytes_written_2, + type=protos.RpcDataType.bytes + ) + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, response_msg = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET', + query=http_params))), + protos.ParameterBinding( + name='input_file_1', + rpc_shared_memory=input_value_1 + ), + protos.ParameterBinding( + name='input_file_2', + rpc_shared_memory=input_value_2 + ) + ]) + + # Dispose the shared memory map since the function is done using it + input_shared_mem_map_1.dispose() + input_shared_mem_map_2.dispose() + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + response_msg.response.result.status) + + response_bytes = response_msg.response.return_value.http.body.bytes + json_response = json.loads(response_bytes) + + func_received_content_size_1 = json_response['input_content_size_1'] + func_received_content_md5_1 = json_response['input_content_md5_1'] + func_received_content_size_2 = json_response['input_content_size_2'] + func_received_content_md5_2 = json_response['input_content_md5_2'] + func_created_content_size_1 = json_response['output_content_size_1'] + func_created_content_size_2 = json_response['output_content_size_2'] + func_created_content_md5_1 = json_response['output_content_md5_1'] + func_created_content_md5_2 = json_response['output_content_md5_2'] + + # Check the function response to ensure that it read the complete + # input that we provided and the md5 matches + self.assertEqual(input_content_size_1, func_received_content_size_1) + self.assertEqual(input_content_md5_1, func_received_content_md5_1) + self.assertEqual(input_content_size_2, func_received_content_size_2) + self.assertEqual(input_content_md5_2, func_received_content_md5_2) + + # Verify if the worker produced two output blobs which were written + # in shared memory + output_data = response_msg.response.output_data + self.assertEqual(2, len(output_data)) + + # Output 1 + output_binding_1 = output_data[0] + binding_type = output_binding_1.WhichOneof('rpc_data') + self.assertEqual('rpc_shared_memory', binding_type) + + shmem_1 = output_binding_1.rpc_shared_memory + self._verify_function_output(shmem_1, func_created_content_size_1, + func_created_content_md5_1) + + # Output 2 + output_binding_2 = output_data[1] + binding_type = output_binding_2.WhichOneof('rpc_data') + self.assertEqual('rpc_shared_memory', binding_type) + + shmem_2 = output_binding_2.rpc_shared_memory + self._verify_function_output(shmem_2, func_created_content_size_2, + func_created_content_md5_2) + + async def _test_binary_blob_read_function(self, func_name): + async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ + as host: + await host.load_function(func_name) + + # Write binary content into shared memory + mem_map_name = self.get_new_mem_map_name() + content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 + content = self.get_random_bytes(content_size) + content_md5 = hashlib.md5(content).hexdigest() + mem_map_size = consts.CONTENT_HEADER_TOTAL_BYTES + content_size + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + num_bytes_written = shared_mem_map.put_bytes(content) + + # Create a message to send to the worker containing info about the + # shared memory region to read input from + value = protos.RpcSharedMemory( + name=mem_map_name, + offset=0, + count=num_bytes_written, + type=protos.RpcDataType.bytes + ) + + # Invoke the function; it should read the input blob from shared + # memory and respond back in the HTTP body with the number of bytes + # it read in the input + _, response_msg = await host.invoke_function( + func_name, [ + protos.ParameterBinding( + name='req', + data=protos.TypedData( + http=protos.RpcHttp( + method='GET'))), + protos.ParameterBinding( + name='file', + rpc_shared_memory=value + ) + ]) + + # Dispose the shared memory map since the function is done using it + shared_mem_map.dispose() + + # Verify if the function executed successfully + self.assertEqual(protos.StatusResult.Success, + response_msg.response.result.status) + + response_bytes = response_msg.response.return_value.http.body.bytes + json_response = json.loads(response_bytes) + func_received_content_size = json_response['content_size'] + func_received_content_md5 = json_response['content_md5'] + + # Check the function response to ensure that it read the complete + # input that we provided and the md5 matches + self.assertEqual(content_size, func_received_content_size) + self.assertEqual(content_md5, func_received_content_md5) + + def _verify_function_output( + self, + shmem: protos.RpcSharedMemory, + expected_size: int, + expected_md5: str): + output_mem_map_name = shmem.name + output_offset = shmem.offset + output_count = shmem.count + output_data_type = shmem.type + + # Verify if the shared memory region's information is valid + self.assertTrue(self.is_valid_uuid(output_mem_map_name)) + self.assertEqual(0, output_offset) + self.assertEqual(expected_size, output_count) + self.assertEqual(protos.RpcDataType.bytes, output_data_type) + + # Read data from the shared memory region + output_mem_map_size = \ + consts.CONTENT_HEADER_TOTAL_BYTES + output_count + output_mem_map = \ + self.file_accessor.open_mem_map(output_mem_map_name, + output_mem_map_size) + output_shared_mem_map = \ + SharedMemoryMap(self.file_accessor, output_mem_map_name, + output_mem_map) + output_read_content = output_shared_mem_map.get_bytes() + + # Dispose the shared memory map since we have read the function's + # output now + output_shared_mem_map.dispose() + + # Verify if we were able to read the correct output that the + # function has produced + output_read_content_md5 = hashlib.md5(output_read_content).hexdigest() + self.assertEqual(expected_md5, output_read_content_md5) + self.assertEqual(len(output_read_content), expected_size) From 66d53e1d9af92ad9b666f6823c2d785c27d07fc6 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 23 Feb 2021 09:32:05 -0800 Subject: [PATCH 47/76] Addressing comments --- .../file_accessor.py | 16 ++++++---- .../file_accessor_unix.py | 3 ++ .../file_accessor_windows.py | 5 ++++ .../shared_memory_constants.py | 29 ++++++++++--------- tests/unittests/test_shared_memory_manager.py | 2 +- 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py index fab15357c..a1e45d749 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor.py @@ -15,6 +15,8 @@ class FileAccessor(metaclass=ABCMeta): Currently the following two sub-classes are implemented: 1) FileAccessorWindows 2) FileAccessorUnix + Note: Platform specific details of mmap can be found in the official docs: + https://docs.python.org/3/library/mmap.html """ @abstractmethod def open_mem_map( @@ -54,17 +56,18 @@ def _is_mem_map_initialized(self, mem_map: mmap.mmap) -> bool: This is used to check if a new memory map was created successfully and we don't end up using an existing one. """ + original_pos = mem_map.tell() # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) # Read the first byte byte_read = mem_map.read(1) # Check if the dirty bit was set or not - if byte_read == consts.MEM_MAP_INITIALIZED_FLAG: + if byte_read == consts.HeaderFlags.Initialized: is_set = True else: is_set = False - # Seek back the memory map to the begginging - mem_map.seek(0) + # Seek back the memory map to the original position + mem_map.seek(original_pos) return is_set def _set_mem_map_initialized(self, mem_map: mmap.mmap): @@ -72,9 +75,10 @@ def _set_mem_map_initialized(self, mem_map: mmap.mmap): Sets the dirty bit in the header of the memory map to indicate that this memory map is not new anymore. """ + original_pos = mem_map.tell() # The dirty bit is the first byte of the header so seek to the beginning mem_map.seek(0) # Set the dirty bit - mem_map.write(consts.MEM_MAP_INITIALIZED_FLAG) - # Seek back the memory map to the begginging - mem_map.seek(0) + mem_map.write(consts.HeaderFlags.Initialized) + # Seek back the memory map to the original position + mem_map.seek(original_pos) diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py index 240da7d25..3a8c7644a 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py @@ -20,6 +20,9 @@ def open_mem_map( mem_map_name: str, mem_map_size: int, access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: + """ + Note: mem_map_size = 0 means open the entire mmap. + """ if mem_map_name is None or mem_map_name == '': raise Exception( f'Cannot open memory map. Invalid name {mem_map_name}') diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py index 35d0a7f63..8be032641 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py @@ -18,6 +18,7 @@ def open_mem_map( mem_map_size: int, access: int = mmap.ACCESS_READ) -> Optional[mmap.mmap]: """ + Note: mem_map_size = 0 means open the entire mmap. Note: On Windows, an mmap is created if one does not exist even when attempting to open it. """ @@ -58,5 +59,9 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ return mem_map def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: + """ + In Windows, an mmap is not backed by a file so no file needs to be + deleted. + """ mem_map.close() return True diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py index 040a32ff3..593bb131e 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py @@ -3,16 +3,20 @@ class SharedMemoryConstants: - # Directories in Unix where the memory maps can be found - UNIX_TEMP_DIRS = ["/dev/shm"] - - # Suffix for the temp directories containing memory maps in Unix - UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" + class HeaderFlags: + """ + Flags that are present in the header region of the memory maps. + """ + # Indicates that the memory map has been initialized, may be in use and + # is not new. + # This represents a boolean value of True. + Initialized = b'\x01' # The length of a bool which is the length of the part of the header flag # specifying if the memory map is already created and used. # This is to distinguish between new memory maps and ones that were # previously created and may be in use already. + # Header flags are defined in the class SharedMemoryConstants.HeaderFlags. MEM_MAP_INITIALIZED_FLAG_NUM_BYTES = 1 # The length of a long which is the length of the part of the header @@ -23,15 +27,6 @@ class SharedMemoryConstants: CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + \ CONTENT_LENGTH_NUM_BYTES - # A flag to indicate that the memory map has been initialized, may be in use - # and is not new. - # This represents a boolean value of True. - MEM_MAP_INITIALIZED_FLAG = b'\x01' - - # A flag to indicate that the memory map has not yet been initialized. - # This represents a boolean value of False. - MEM_MAP_UNINITIALIZED_FLAG = b'\x00' - # Minimum size (in number of bytes) an object must be in order for it to be # transferred over shared memory. # If the object is smaller than this, gRPC is used. @@ -55,3 +50,9 @@ class SharedMemoryConstants: # by this constant. # Corresponding logic in the host can be found in SharedMemoryManager.cs SIZE_OF_CHAR_BYTES = 2 + + # Directories in Unix where the memory maps can be found + UNIX_TEMP_DIRS = ["/dev/shm"] + + # Suffix for the temp directories containing memory maps in Unix + UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" diff --git a/tests/unittests/test_shared_memory_manager.py b/tests/unittests/test_shared_memory_manager.py index 16455c80e..24893182f 100644 --- a/tests/unittests/test_shared_memory_manager.py +++ b/tests/unittests/test_shared_memory_manager.py @@ -78,7 +78,7 @@ def test_put_bytes(self): content = self.get_random_bytes(content_size) shared_mem_meta = manager.put_bytes(content) self.assertIsNotNone(shared_mem_meta) - self.assertTrue(self._is_valid_uuid(shared_mem_meta.mem_map_name)) + self.assertTrue(self.is_valid_uuid(shared_mem_meta.mem_map_name)) self.assertEqual(content_size, shared_mem_meta.count) free_success = manager.free_mem_map(shared_mem_meta.mem_map_name) self.assertTrue(free_success) From c4665dbae407a90f45ff7b8f3d6e899d89b84eda Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 23 Feb 2021 11:04:26 -0800 Subject: [PATCH 48/76] Adding missing tests and doc strings for tests and their classes --- .../shared_memory_manager.py | 4 + .../main.py | 4 + .../main.py | 4 + .../main.py | 4 + .../main.py | 5 + .../main.py | 5 + .../main.py | 7 ++ tests/unittests/test_file_accessor.py | 22 ++++ tests/unittests/test_file_accessor_factory.py | 24 +++- .../test_mock_blob_shared_memory_functions.py | 12 ++ tests/unittests/test_shared_memory_manager.py | 108 +++++++++++++++++- tests/unittests/test_shared_memory_map.py | 62 +++++++++- 12 files changed, 252 insertions(+), 9 deletions(-) diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py index e9442125d..584981d69 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py @@ -86,6 +86,8 @@ def put_string(self, content: str) -> Optional[SharedMemoryMetadata]: Writes the given string into shared memory. Returns the name of the memory map into which the data was written if succesful, None otherwise. + Note: The encoding used here must be consistent with what is used by the + host in SharedMemoryManager.cs (GetStringAsync/PutStringAsync). """ if content is None: return None @@ -122,6 +124,8 @@ def get_string(self, mem_map_name: str, offset: int, count: int) \ the provided offset and reading a total of count bytes. Returns the data read from shared memory as a string if successful, None otherwise. + Note: The encoding used here must be consistent with what is used by the + host in SharedMemoryManager.cs (GetStringAsync/PutStringAsync). """ content_bytes = self.get_bytes(mem_map_name, offset, count) if content_bytes is None: diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py index 708dacf72..a10faca86 100644 --- a/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_return_http_response/main.py @@ -7,6 +7,10 @@ def main(req: azf.HttpRequest, file: bytes) -> azf.HttpResponse: + """ + Read a blob (bytes) and respond back (in HTTP response) with the number of + bytes read and the MD5 digest of the content. + """ assert isinstance(file, bytes) content_size = len(file) diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py index 7b9169be6..24b024449 100644 --- a/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py @@ -7,6 +7,10 @@ def main(req: azf.HttpRequest, file: azf.InputStream) -> azf.HttpResponse: + """ + Read a blob (bytes) and respond back (in HTTP response) with the number of + bytes read and the MD5 digest of the content. + """ file_bytes = file.read() content_size = len(file_bytes) diff --git a/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py index 9455d5e67..0e03f7d61 100644 --- a/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py +++ b/tests/endtoend/blob_functions/get_blob_as_str_return_http_response/main.py @@ -7,6 +7,10 @@ def main(req: azf.HttpRequest, file: str) -> azf.HttpResponse: + """ + Read a blob (string) and respond back (in HTTP response) with the number of + characters read and the MD5 digest of the utf-8 encoded content. + """ assert isinstance(file, str) num_chars = len(file) diff --git a/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py index 95236ed23..e6e7109a7 100644 --- a/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py +++ b/tests/endtoend/blob_functions/put_blob_as_bytes_return_http_response/main.py @@ -8,6 +8,11 @@ def main(req: azf.HttpRequest, file: azf.Out[bytes]) -> azf.HttpResponse: + """ + Write a blob (bytes) and respond back (in HTTP response) with the number of + bytes written and the MD5 digest of the content. + The number of bytes to write are specified in the input HTTP request. + """ content_size = int(req.params['content_size']) # When this is set, then 0x01 byte is repeated content_size number of diff --git a/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py b/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py index 97b3dbd13..901a7f3f1 100644 --- a/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py +++ b/tests/endtoend/blob_functions/put_blob_as_str_return_http_response/main.py @@ -9,6 +9,11 @@ def main(req: azf.HttpRequest, file: azf.Out[str]) -> azf.HttpResponse: + """ + Write a blob (string) and respond back (in HTTP response) with the number of + characters written and the MD5 digest of the utf-8 encoded content. + The number of characters to write are specified in the input HTTP request. + """ num_chars = int(req.params['num_chars']) content = ''.join(random.choices(string.ascii_uppercase + string.digits, diff --git a/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py b/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py index 3709d6afa..224d33d02 100644 --- a/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py +++ b/tests/endtoend/blob_functions/put_get_multiple_blobs_as_bytes_return_http_response/main.py @@ -19,6 +19,13 @@ def main( input_file_2: bytes, output_file_1: azf.Out[bytes], output_file_2: azf.Out[bytes]) -> azf.HttpResponse: + """ + Read two blobs (bytes) and respond back (in HTTP response) with the number + of bytes read from each blob and the MD5 digest of the content of each. + Write two blobs (bytes) and respond back (in HTTP response) with the number + bytes written in each blob and the MD5 digest of the content of each. + The number of bytes to write are specified in the input HTTP request. + """ input_content_size_1 = len(input_file_1) input_content_size_2 = len(input_file_2) diff --git a/tests/unittests/test_file_accessor.py b/tests/unittests/test_file_accessor.py index 325b7dcb0..798f0ec56 100644 --- a/tests/unittests/test_file_accessor.py +++ b/tests/unittests/test_file_accessor.py @@ -7,7 +7,13 @@ class TestFileAccessor(testutils.SharedMemoryTestCase): + """ + Tests for FileAccessor. + """ def test_create_and_delete_mem_map(self): + """ + Verify if memory maps were created and deleted. + """ for mem_map_size in [1, 10, 1024, 2 * 1024 * 1024, 10 * 1024 * 1024]: mem_map_name = self.get_new_mem_map_name() mem_map = self.file_accessor.create_mem_map(mem_map_name, @@ -18,6 +24,10 @@ def test_create_and_delete_mem_map(self): self.assertTrue(delete_status) def test_create_mem_map_invalid_inputs(self): + """ + Attempt to create memory maps with invalid inputs (size and name) and + verify that an Exception is raised. + """ mem_map_name = self.get_new_mem_map_name() inv_mem_map_size = 0 with self.assertRaisesRegex(Exception, 'Invalid size'): @@ -31,6 +41,9 @@ def test_create_mem_map_invalid_inputs(self): self.file_accessor.create_mem_map(inv_mem_map_name, mem_map_size) def test_open_existing_mem_map(self): + """ + Verify that an existing memory map can be opened. + """ mem_map_size = 1024 mem_map_name = self.get_new_mem_map_name() mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) @@ -41,6 +54,10 @@ def test_open_existing_mem_map(self): self.assertTrue(delete_status) def test_open_mem_map_invalid_inputs(self): + """ + Attempt to open a memory map with invalid inputs (size and name) and + verify that an Exception is raised. + """ mem_map_name = self.get_new_mem_map_name() inv_mem_map_size = -1 with self.assertRaisesRegex(Exception, 'Invalid size'): @@ -56,6 +73,11 @@ def test_open_mem_map_invalid_inputs(self): @unittest.skipIf(os.name == 'nt', 'Windows will create an mmap if one does not exist') def test_open_deleted_mem_map(self): + """ + Attempt to open a deleted memory map and verify that it fails. + Note: Windows creates a new memory map if one does not exist when + opening a memory map, so we skip this test on Windows. + """ mem_map_size = 1024 mem_map_name = self.get_new_mem_map_name() mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) diff --git a/tests/unittests/test_file_accessor_factory.py b/tests/unittests/test_file_accessor_factory.py index f3ae5e3c4..06dd0208f 100644 --- a/tests/unittests/test_file_accessor_factory.py +++ b/tests/unittests/test_file_accessor_factory.py @@ -12,9 +12,23 @@ class TestFileAccessorFactory(unittest.TestCase): - def test_proper_subclass_generated(self): + """ + Tests for FileAccessorFactory. + """ + @unittest.skipIf(os.name != 'nt', + 'FileAccessorWindows is only valid on Windows') + def test_file_accessor_windows_created(self): + """ + Verify that FileAccessorWindows was created when running on Windows. + """ file_accessor = FileAccessorFactory.create_file_accessor() - if os.name == 'nt': - self.assertTrue(type(file_accessor) is FileAccessorWindows) - else: - self.assertTrue(type(file_accessor) is FileAccessorUnix) + self.assertTrue(type(file_accessor) is FileAccessorWindows) + + @unittest.skipIf(os.name == 'nt', + 'FileAccessorUnix is only valid on Unix') + def test_file_accessor_unix_created(self): + """ + Verify that FileAccessorUnix was created when running on Windows. + """ + file_accessor = FileAccessorFactory.create_file_accessor() + self.assertTrue(type(file_accessor) is FileAccessorUnix) diff --git a/tests/unittests/test_mock_blob_shared_memory_functions.py b/tests/unittests/test_mock_blob_shared_memory_functions.py index 1366488bf..712f6158a 100644 --- a/tests/unittests/test_mock_blob_shared_memory_functions.py +++ b/tests/unittests/test_mock_blob_shared_memory_functions.py @@ -12,6 +12,10 @@ class TestMockBlobSharedMemoryFunctions(testutils.SharedMemoryTestCase, testutils.AsyncTestCase): + """ + Test the use of shared memory to transfer input and output data to and from + the host/worker. + """ def setUp(self): super().setUp() self.blob_funcs_dir = testutils.E2E_TESTS_FOLDER / 'blob_functions' @@ -495,6 +499,10 @@ async def test_multiple_input_output_blobs(self): func_created_content_md5_2) async def _test_binary_blob_read_function(self, func_name): + """ + Verify that the function executed successfully when the worker received + inputs for the function over shared memory. + """ async with testutils.start_mockhost(script_root=self.blob_funcs_dir) \ as host: await host.load_function(func_name) @@ -558,6 +566,10 @@ def _verify_function_output( shmem: protos.RpcSharedMemory, expected_size: int, expected_md5: str): + """ + Verify if the output produced by the worker is what we expect it to be + based on the size and MD5 digest. + """ output_mem_map_name = shmem.name output_offset = shmem.offset output_count = shmem.count diff --git a/tests/unittests/test_shared_memory_manager.py b/tests/unittests/test_shared_memory_manager.py index 24893182f..2946091fe 100644 --- a/tests/unittests/test_shared_memory_manager.py +++ b/tests/unittests/test_shared_memory_manager.py @@ -2,22 +2,62 @@ # Licensed under the MIT License. import math +import os +from azure_functions_worker.utils.common import is_envvar_true from azure.functions import meta as bind_meta from azure_functions_worker import testutils from azure_functions_worker.shared_memory_data_transfer \ import SharedMemoryManager from azure_functions_worker.shared_memory_data_transfer \ import SharedMemoryConstants as consts +from azure_functions_worker.constants \ + import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED class TestSharedMemoryManager(testutils.SharedMemoryTestCase): + """ + Tests for SharedMemoryManager. + """ def test_is_enabled(self): - pass + """ + Verify that when the AppSetting is enabled, SharedMemoryManager is + enabled. + """ + # Make sure shared memory data transfer is enabled + was_shmem_env_true = is_envvar_true( + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) + os.environ.update( + {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '1'}) + manager = SharedMemoryManager() + self.assertTrue(manager.is_enabled()) + # Restore the env variable to original value + if not was_shmem_env_true: + os.environ.update( + {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '0'}) def test_is_disabled(self): - pass + """ + Verify that when the AppSetting is disabled, SharedMemoryManager is + disabled. + """ + # Make sure shared memory data transfer is disabled + was_shmem_env_true = is_envvar_true( + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) + os.environ.update( + {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '0'}) + manager = SharedMemoryManager() + self.assertFalse(manager.is_enabled()) + # Restore the env variable to original value + if was_shmem_env_true: + os.environ.update( + {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '1'}) def test_bytes_input_support(self): + """ + Verify that the given input is supported by SharedMemoryManager to be + transfered over shared memory. + The input is bytes. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 content = self.get_random_bytes(content_size) @@ -26,6 +66,11 @@ def test_bytes_input_support(self): self.assertTrue(is_supported) def test_string_input_support(self): + """ + Verify that the given input is supported by SharedMemoryManager to be + transfered over shared memory. + The input is string. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) @@ -35,6 +80,11 @@ def test_string_input_support(self): self.assertTrue(is_supported) def test_large_invalid_bytes_input_support(self): + """ + Verify that the given input is NOT supported by SharedMemoryManager to + be transfered over shared memory. + The input is bytes of larger than the allowed size. + """ manager = SharedMemoryManager() content_size = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER + 10 # Not using get_random_bytes to avoid slowing down for creating a large @@ -45,6 +95,11 @@ def test_large_invalid_bytes_input_support(self): self.assertFalse(is_supported) def test_small_invalid_bytes_input_support(self): + """ + Verify that the given input is NOT supported by SharedMemoryManager to + be transfered over shared memory. + The input is bytes of smaller than the allowed size. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 content = self.get_random_bytes(content_size) @@ -53,6 +108,11 @@ def test_small_invalid_bytes_input_support(self): self.assertFalse(is_supported) def test_large_invalid_string_input_support(self): + """ + Verify that the given input is NOT supported by SharedMemoryManager to + be transfered over shared memory. + The input is string of larger than the allowed size. + """ manager = SharedMemoryManager() content_size = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER + 10 num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) @@ -64,6 +124,11 @@ def test_large_invalid_string_input_support(self): self.assertFalse(is_supported) def test_small_invalid_string_input_support(self): + """ + Verify that the given input is NOT supported by SharedMemoryManager to + be transfered over shared memory. + The input is string of smaller than the allowed size. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER - 10 num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) @@ -73,6 +138,10 @@ def test_small_invalid_string_input_support(self): self.assertFalse(is_supported) def test_put_bytes(self): + """ + Verify that the given input was successfully put into shared memory. + The input is bytes. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 content = self.get_random_bytes(content_size) @@ -84,11 +153,19 @@ def test_put_bytes(self): self.assertTrue(free_success) def test_invalid_put_bytes(self): + """ + Attempt to put bytes using an invalid input and verify that it fails. + """ manager = SharedMemoryManager() shared_mem_meta = manager.put_bytes(None) self.assertIsNone(shared_mem_meta) def test_get_bytes(self): + """ + Verify that the output object was successfully gotten from shared + memory. + The output is bytes. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 content = self.get_random_bytes(content_size) @@ -102,6 +179,10 @@ def test_get_bytes(self): self.assertTrue(free_success) def test_put_string(self): + """ + Verify that the given input was successfully put into shared memory. + The input is string. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) @@ -115,11 +196,19 @@ def test_put_string(self): self.assertTrue(free_success) def test_invalid_put_string(self): + """ + Attempt to put a string using an invalid input and verify that it fails. + """ manager = SharedMemoryManager() shared_mem_meta = manager.put_string(None) self.assertIsNone(shared_mem_meta) def test_get_string(self): + """ + Verify that the output object was successfully gotten from shared + memory. + The output is string. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 num_chars = math.floor(content_size / consts.SIZE_OF_CHAR_BYTES) @@ -134,6 +223,12 @@ def test_get_string(self): self.assertTrue(free_success) def test_allocated_mem_maps(self): + """ + Verify that the SharedMemoryManager is tracking the shared memory maps + it has allocated after put operations. + Verify that those shared memory maps are freed and no longer tracked + after attempting to free them. + """ manager = SharedMemoryManager() content_size = consts.MIN_BYTES_FOR_SHARED_MEM_TRANSFER + 10 content = self.get_random_bytes(content_size) @@ -150,12 +245,21 @@ def test_allocated_mem_maps(self): self.assertEqual(0, len(manager.allocated_mem_maps.keys())) def test_invalid_put_allocated_mem_maps(self): + """ + Verify that after an invalid put operation, no shared memory maps were + added to the list of allocated/tracked shared memory maps. + i.e. no resources were leaked for invalid operations. + """ manager = SharedMemoryManager() shared_mem_meta = manager.put_bytes(None) self.assertIsNone(shared_mem_meta) self.assertEqual(0, len(manager.allocated_mem_maps.keys())) def test_invalid_free_mem_map(self): + """ + Attempt to free a shared memory map that does not exist in the list of + allocated/tracked shared memory maps and verify that it fails. + """ manager = SharedMemoryManager() mem_map_name = self.get_new_mem_map_name() free_success = manager.free_mem_map(mem_map_name) diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index 2148247ab..9b382c7d5 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +import os +import unittest from azure_functions_worker import testutils from azure_functions_worker.shared_memory_data_transfer import SharedMemoryMap from azure_functions_worker.shared_memory_data_transfer \ @@ -8,7 +10,13 @@ class TestSharedMemoryMap(testutils.SharedMemoryTestCase): + """ + Tests for SharedMemoryMap. + """ def test_init(self): + """ + Verify the initialization of a SharedMemoryMap. + """ mem_map_name = self.get_new_mem_map_name() mem_map_size = 1024 mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) @@ -19,6 +27,10 @@ def test_init(self): self.assertTrue(dispose_status) def test_init_with_invalid_inputs(self): + """ + Attempt to initialize a SharedMemoryMap from invalid inputs (name and + mmap) and verify that an Exception is raised. + """ inv_mem_map_name = None mem_map_name = self.get_new_mem_map_name() mem_map_size = 1024 @@ -32,6 +44,9 @@ def test_init_with_invalid_inputs(self): SharedMemoryMap(self.file_accessor, mem_map_name, None) def test_put_bytes(self): + """ + Create a SharedMemoryMap and write bytes to it. + """ for content_size in [1, 10, 1024, 2 * 1024 * 1024, 20 * 1024 * 1024]: mem_map_name = self.get_new_mem_map_name() mem_map_size = content_size + consts.CONTENT_HEADER_TOTAL_BYTES @@ -46,6 +61,10 @@ def test_put_bytes(self): self.assertTrue(dispose_status) def test_get_bytes(self): + """ + Create a SharedMemoryMap, write bytes to it and then read them back. + Verify that the bytes written and read match. + """ for content_size in [1, 10, 1024, 2 * 1024 * 1024, 20 * 1024 * 1024]: mem_map_name = self.get_new_mem_map_name() mem_map_size = content_size + consts.CONTENT_HEADER_TOTAL_BYTES @@ -62,7 +81,46 @@ def test_get_bytes(self): self.assertTrue(dispose_status) def test_put_bytes_more_than_capacity(self): - pass + """ + Attempt to put more bytes into the created SharedMemoryMap than the + size with which it was created. Verify that an Exception is raised. + """ + mem_map_name = self.get_new_mem_map_name() + mem_map_size = 1024 + consts.CONTENT_HEADER_TOTAL_BYTES + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + # Attempt to write more bytes than the size of the memory map we created + # earlier (1024). + content_size = 2048 + content = self.get_random_bytes(content_size) + with self.assertRaisesRegex(Exception, 'out of range'): + shared_mem_map.put_bytes(content) + dispose_status = shared_mem_map.dispose() + self.assertTrue(dispose_status) + @unittest.skipIf(os.name == 'nt', + 'Windows will create an mmap if one does not exist') def test_dispose_without_delete_file(self): - pass + """ + Dispose a SharedMemoryMap without making it dispose the backing file + resources (on Unix). Verify that the same memory map can be opened again + as the backing file was still present. + """ + mem_map_name = self.get_new_mem_map_name() + mem_map_size = 1024 + consts.CONTENT_HEADER_TOTAL_BYTES + mem_map = self.file_accessor.create_mem_map(mem_map_name, + mem_map_size) + shared_mem_map = SharedMemoryMap(self.file_accessor, mem_map_name, + mem_map) + # Close the memory map but do not delete the backing file + dispose_status = shared_mem_map.dispose(is_delete_file=False) + self.assertTrue(dispose_status) + # Attempt to open the memory map again, it should still open since the + # backing file is present + mem_map_op = self.file_accessor.open_mem_map(mem_map_name, mem_map_size) + self.assertIsNotNone(mem_map_op) + delete_status = \ + self.file_accessor.delete_mem_map(mem_map_name, mem_map_op) + self.assertEqual(delete_status) From dc84cf54fae04a4df8b86080cf34c10e062a4d7e Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 23 Feb 2021 11:10:22 -0800 Subject: [PATCH 49/76] assertEqual -> assertTrue in test_dispose_without_delete_file --- tests/unittests/test_shared_memory_map.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index 9b382c7d5..bb5ec4b57 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -123,4 +123,4 @@ def test_dispose_without_delete_file(self): self.assertIsNotNone(mem_map_op) delete_status = \ self.file_accessor.delete_mem_map(mem_map_name, mem_map_op) - self.assertEqual(delete_status) + self.assertTrue(delete_status) From 47cffb9b8c68a22fc8c23fca6f6a96f366bf8403 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Tue, 23 Feb 2021 13:26:08 -0800 Subject: [PATCH 50/76] Tried with blob trigger function - removed TODO --- azure_functions_worker/bindings/meta.py | 1 - 1 file changed, 1 deletion(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 33763f69f..404ea89cc 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -73,7 +73,6 @@ def from_incoming_proto( # Data was sent over shared memory, attempt to read datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) - # TODO gochaudh: check trigger_metadata (try with blob triggered func) elif pb_type == 'data': val = pb.data datum = datumdef.Datum.from_typed_data(val) From 52a870111285b845d743b5dc5330f8c46dd95669 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 3 Mar 2021 15:44:58 -0800 Subject: [PATCH 51/76] Addressing comments --- azure_functions_worker/bindings/meta.py | 20 ++++++++++--------- .../shared_memory_data_transfer/__init__.py | 3 +++ .../file_accessor.py | 0 .../file_accessor_factory.py | 0 .../file_accessor_unix.py | 2 +- .../file_accessor_windows.py | 2 +- .../shared_memory_constants.py | 0 .../shared_memory_manager.py | 8 ++++---- .../shared_memory_map.py | 2 +- .../shared_memory_metadata.py | 0 azure_functions_worker/constants.py | 6 ++---- azure_functions_worker/dispatcher.py | 2 +- azure_functions_worker/testutils.py | 4 ++-- tests/unittests/test_file_accessor_factory.py | 10 +++++----- .../test_mock_blob_shared_memory_functions.py | 5 +++-- tests/unittests/test_shared_memory_manager.py | 4 ++-- tests/unittests/test_shared_memory_map.py | 5 +++-- 17 files changed, 39 insertions(+), 34 deletions(-) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/__init__.py (90%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/file_accessor.py (100%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/file_accessor_factory.py (100%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/file_accessor_unix.py (99%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/file_accessor_windows.py (98%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/shared_memory_constants.py (100%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/shared_memory_manager.py (97%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/shared_memory_map.py (99%) rename azure_functions_worker/{ => bindings}/shared_memory_data_transfer/shared_memory_metadata.py (100%) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 404ea89cc..c3dc7a621 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -8,6 +8,10 @@ from . import datumdef from . import generic +PB_TYPE = 'rpc_data' +PB_TYPE_DATA = 'data' +PB_TYPE_SHARED_MEMORY = 'rpc_shared_memory' + def get_binding_registry(): func = sys.modules.get('azure.functions') @@ -68,14 +72,14 @@ def from_incoming_proto( else: metadata = {} - pb_type = pb.WhichOneof('rpc_data') - if pb_type == 'rpc_shared_memory': + pb_type = pb.WhichOneof(PB_TYPE) + if pb_type == PB_TYPE_DATA: + val = pb.data + datum = datumdef.Datum.from_typed_data(val) + elif pb_type == PB_TYPE_RPC_SHARED_MEMORY: # Data was sent over shared memory, attempt to read datum = datumdef.Datum.from_rpc_shared_memory(pb.rpc_shared_memory, shmem_mgr) - elif pb_type == 'data': - val = pb.data - datum = datumdef.Datum.from_typed_data(val) else: raise TypeError(f'Unknown ParameterBindingType: {pb_type}') @@ -120,21 +124,19 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) shared_mem_value = None - parameter_binding = None # If shared memory is enabled, try to transfer to host over shared memory if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) # Check if data was written into shared memory if shared_mem_value is not None: # If it was, then use the rpc_shared_memory field in response message - parameter_binding = protos.ParameterBinding( + return protos.ParameterBinding( name=out_name, rpc_shared_memory=shared_mem_value) else: # If not, send it as part of the response message over RPC rpc_val = datumdef.datum_as_proto(datum) assert rpc_val is not None - parameter_binding = protos.ParameterBinding( + return protos.ParameterBinding( name=out_name, data=rpc_val) - return parameter_binding diff --git a/azure_functions_worker/shared_memory_data_transfer/__init__.py b/azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py similarity index 90% rename from azure_functions_worker/shared_memory_data_transfer/__init__.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py index b56bf1d67..9c7eba113 100644 --- a/azure_functions_worker/shared_memory_data_transfer/__init__.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + """ This module provides functionality for accessing shared memory maps. These are used for transferring data between functions host and the worker diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor.py similarity index 100% rename from azure_functions_worker/shared_memory_data_transfer/file_accessor.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor.py diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_factory.py similarity index 100% rename from azure_functions_worker/shared_memory_data_transfer/file_accessor_factory.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_factory.py diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py similarity index 99% rename from azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index 3a8c7644a..2b5e99801 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -7,7 +7,7 @@ from io import BufferedRandom from .shared_memory_constants import SharedMemoryConstants as consts from .file_accessor import FileAccessor -from ..logging import logger +from ...logging import logger class FileAccessorUnix(FileAccessor): diff --git a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py similarity index 98% rename from azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py index 8be032641..6b1b9bced 100644 --- a/azure_functions_worker/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py @@ -4,7 +4,7 @@ import mmap from typing import Optional from .file_accessor import FileAccessor -from ..logging import logger +from ...logging import logger class FileAccessorWindows(FileAccessor): diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py similarity index 100% rename from azure_functions_worker/shared_memory_data_transfer/shared_memory_constants.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py similarity index 97% rename from azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py index 584981d69..fa0838a56 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py @@ -7,10 +7,10 @@ from .file_accessor_factory import FileAccessorFactory from .shared_memory_metadata import SharedMemoryMetadata from .shared_memory_map import SharedMemoryMap -from ..bindings.datumdef import Datum -from ..logging import logger -from ..utils.common import is_envvar_true -from ..constants import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED +from ..datumdef import Datum +from ...logging import logger +from ...utils.common import is_envvar_true +from ...constants import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED class SharedMemoryManager: diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py similarity index 99% rename from azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py index bf12e8827..383b7a8a9 100644 --- a/azure_functions_worker/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py @@ -8,7 +8,7 @@ from typing import Optional from .shared_memory_constants import SharedMemoryConstants as consts from .file_accessor import FileAccessor -from ..logging import logger +from ...logging import logger class SharedMemoryMap: diff --git a/azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_metadata.py similarity index 100% rename from azure_functions_worker/shared_memory_data_transfer/shared_memory_metadata.py rename to azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_metadata.py diff --git a/azure_functions_worker/constants.py b/azure_functions_worker/constants.py index 0996455c4..dfe4b97df 100644 --- a/azure_functions_worker/constants.py +++ b/azure_functions_worker/constants.py @@ -21,6 +21,8 @@ PYTHON_ROLLBACK_CWD_PATH = "PYTHON_ROLLBACK_CWD_PATH" PYTHON_THREADPOOL_THREAD_COUNT = "PYTHON_THREADPOOL_THREAD_COUNT" PYTHON_ISOLATE_WORKER_DEPENDENCIES = "PYTHON_ISOLATE_WORKER_DEPENDENCIES" +FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED = \ + "FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED" # Setting Defaults PYTHON_THREADPOOL_THREAD_COUNT_DEFAULT = 1 @@ -31,7 +33,3 @@ # External Site URLs MODULE_NOT_FOUND_TS_URL = "https://aka.ms/functions-modulenotfound" - -# App Settings -FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED = \ - "FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED" diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index a002fe3c0..02d12c524 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -33,7 +33,7 @@ from .utils.tracing import marshall_exception_trace from .utils.dependency import DependencyManager from .utils.wrappers import disable_feature_by -from .shared_memory_data_transfer import SharedMemoryManager +from .bindings.shared_memory_data_transfer import SharedMemoryManager _TRUE = "true" diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 958bb48fd..a9de60873 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -35,9 +35,9 @@ import requests from azure_functions_worker._thirdparty import aio_compat -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ import FileAccessorFactory -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ import SharedMemoryConstants as consts from . import dispatcher from . import protos diff --git a/tests/unittests/test_file_accessor_factory.py b/tests/unittests/test_file_accessor_factory.py index 06dd0208f..e3bc225a2 100644 --- a/tests/unittests/test_file_accessor_factory.py +++ b/tests/unittests/test_file_accessor_factory.py @@ -3,12 +3,12 @@ import os import unittest -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ import FileAccessorFactory -from azure_functions_worker.shared_memory_data_transfer.file_accessor_unix \ - import FileAccessorUnix -from azure_functions_worker.shared_memory_data_transfer.file_accessor_windows \ - import FileAccessorWindows +from azure_functions_worker.bindings.\ + shared_memory_data_transfer.file_accessor_unix import FileAccessorUnix +from azure_functions_worker.bindings.\ + shared_memory_data_transfer.file_accessor_windows import FileAccessorWindows class TestFileAccessorFactory(unittest.TestCase): diff --git a/tests/unittests/test_mock_blob_shared_memory_functions.py b/tests/unittests/test_mock_blob_shared_memory_functions.py index 712f6158a..487b665bd 100644 --- a/tests/unittests/test_mock_blob_shared_memory_functions.py +++ b/tests/unittests/test_mock_blob_shared_memory_functions.py @@ -3,8 +3,9 @@ import json import hashlib -from azure_functions_worker.shared_memory_data_transfer import SharedMemoryMap -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ + import SharedMemoryMap +from azure_functions_worker.bindings.shared_memory_data_transfer \ import SharedMemoryConstants as consts from azure_functions_worker import protos from azure_functions_worker import testutils diff --git a/tests/unittests/test_shared_memory_manager.py b/tests/unittests/test_shared_memory_manager.py index 2946091fe..8f2e02aff 100644 --- a/tests/unittests/test_shared_memory_manager.py +++ b/tests/unittests/test_shared_memory_manager.py @@ -6,9 +6,9 @@ from azure_functions_worker.utils.common import is_envvar_true from azure.functions import meta as bind_meta from azure_functions_worker import testutils -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ import SharedMemoryManager -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ import SharedMemoryConstants as consts from azure_functions_worker.constants \ import FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index bb5ec4b57..7e5d2be0e 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -4,8 +4,9 @@ import os import unittest from azure_functions_worker import testutils -from azure_functions_worker.shared_memory_data_transfer import SharedMemoryMap -from azure_functions_worker.shared_memory_data_transfer \ +from azure_functions_worker.bindings.shared_memory_data_transfer \ + import SharedMemoryMap +from azure_functions_worker.bindings.shared_memory_data_transfer \ import SharedMemoryConstants as consts From 848de3de1c912525b7e44db83ed3744b30a41c17 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 3 Mar 2021 16:06:04 -0800 Subject: [PATCH 52/76] Minor const fix --- azure_functions_worker/bindings/meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c3dc7a621..c3993587c 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -10,7 +10,7 @@ PB_TYPE = 'rpc_data' PB_TYPE_DATA = 'data' -PB_TYPE_SHARED_MEMORY = 'rpc_shared_memory' +PB_TYPE_RPC_SHARED_MEMORY = 'rpc_shared_memory' def get_binding_registry(): From cd75b8557ea6ccc8a1dfaeb593d02e38a340be40 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 8 Mar 2021 08:29:28 -0800 Subject: [PATCH 53/76] Addressing comments --- azure_functions_worker/bindings/datumdef.py | 10 ++- azure_functions_worker/bindings/meta.py | 3 +- .../shared_memory_data_transfer/__init__.py | 3 +- .../file_accessor_unix.py | 62 +++++++------ .../file_accessor_windows.py | 11 +-- .../shared_memory_constants.py | 80 ++++++++++------- .../shared_memory_exception.py | 10 +++ .../shared_memory_manager.py | 20 ++++- .../shared_memory_map.py | 5 +- .../shared_memory_metadata.py | 4 +- azure_functions_worker/dispatcher.py | 15 ++-- .../blob_trigger_bytes/function.json | 21 +++++ .../blob_functions/blob_trigger_bytes/main.py | 22 +++++ .../main.py | 4 +- .../get_blob_triggered_bytes/function.json | 23 +++++ .../get_blob_triggered_bytes/main.py | 16 ++++ .../put_blob_trigger_bytes/function.json | 23 +++++ .../put_blob_trigger_bytes/main.py | 35 ++++++++ tests/unittests/test_file_accessor.py | 18 ++-- tests/unittests/test_shared_memory_manager.py | 87 ++++++++++++++++++- tests/unittests/test_shared_memory_map.py | 16 ++-- 21 files changed, 386 insertions(+), 102 deletions(-) create mode 100644 azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_exception.py create mode 100644 tests/endtoend/blob_functions/blob_trigger_bytes/function.json create mode 100644 tests/endtoend/blob_functions/blob_trigger_bytes/main.py create mode 100644 tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json create mode 100644 tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py create mode 100644 tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json create mode 100644 tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 5cd63a9eb..9691cff09 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -102,6 +102,11 @@ def from_rpc_shared_memory( Reads the specified shared memory region and converts the read data into a datum object of the corresponding type. """ + if shmem is None: + logger.warn('Cannot read from shared memory. ' + 'RpcSharedMemory is None.') + return None + mem_map_name = shmem.name offset = shmem.offset count = shmem.count @@ -148,16 +153,17 @@ def to_rpc_shared_memory( ) if shared_mem_meta is None: + logger.warn(f'Cannot write to shared memory for type: {datum.type}') return None shmem = protos.RpcSharedMemory( name=shared_mem_meta.mem_map_name, offset=0, - count=shared_mem_meta.count, + count=shared_mem_meta.count_bytes, type=data_type) logger.info( - f'Wrote {shared_mem_meta.count} bytes to memory map ' + f'Wrote {shared_mem_meta.count_bytes} bytes to memory map ' f'{shared_mem_meta.mem_map_name} for data type {data_type}') return shmem diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index c3993587c..332178f6e 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -124,7 +124,8 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, -> protos.ParameterBinding: datum = get_datum(binding, obj, pytype) shared_mem_value = None - # If shared memory is enabled, try to transfer to host over shared memory + # If shared memory is enabled and supported for the given datum, try to + # transfer to host over shared memory as a default if shmem_mgr.is_enabled() and shmem_mgr.is_supported(datum): shared_mem_value = datumdef.Datum.to_rpc_shared_memory(datum, shmem_mgr) # Check if data was written into shared memory diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py b/azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py index 9c7eba113..bc98616a4 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/__init__.py @@ -15,10 +15,11 @@ from .file_accessor_factory import FileAccessorFactory from .file_accessor import FileAccessor from .shared_memory_constants import SharedMemoryConstants +from .shared_memory_exception import SharedMemoryException from .shared_memory_map import SharedMemoryMap from .shared_memory_manager import SharedMemoryManager __all__ = ( 'FileAccessorFactory', 'FileAccessor', 'SharedMemoryConstants', - 'SharedMemoryMap', 'SharedMemoryManager' + 'SharedMemoryException', 'SharedMemoryMap', 'SharedMemoryManager' ) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index 2b5e99801..2ef8c812c 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -6,6 +6,7 @@ from typing import Optional from io import BufferedRandom from .shared_memory_constants import SharedMemoryConstants as consts +from .shared_memory_exception import SharedMemoryException from .file_accessor import FileAccessor from ...logging import logger @@ -24,13 +25,14 @@ def open_mem_map( Note: mem_map_size = 0 means open the entire mmap. """ if mem_map_name is None or mem_map_name == '': - raise Exception( + raise SharedMemoryException( f'Cannot open memory map. Invalid name {mem_map_name}') if mem_map_size < 0: - raise Exception( + raise SharedMemoryException( f'Cannot open memory map. Invalid size {mem_map_size}') fd = self._open_mem_map_file(mem_map_name) if fd is None: + logger.warn(f'Cannot open file: {mem_map_name}') return None mem_map = mmap.mmap(fd.fileno(), mem_map_size, access=access) return mem_map @@ -38,18 +40,20 @@ def open_mem_map( def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ -> Optional[mmap.mmap]: if mem_map_name is None or mem_map_name == '': - raise Exception( + raise SharedMemoryException( f'Cannot create memory map. Invalid name {mem_map_name}') if mem_map_size <= 0: - raise Exception( + raise SharedMemoryException( f'Cannot create memory map. Invalid size {mem_map_size}') file = self._create_mem_map_file(mem_map_name, mem_map_size) if file is None: + logger.warn(f'Cannot create file: {mem_map_name}') return None mem_map = mmap.mmap(file.fileno(), mem_map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) if self._is_mem_map_initialized(mem_map): - raise Exception(f'Memory map {mem_map_name} already exists') + raise SharedMemoryException(f'Memory map {mem_map_name} ' + 'already exists') self._set_mem_map_initialized(mem_map) return mem_map @@ -71,25 +75,6 @@ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: mem_map.close() return True - def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: - """ - Get the file descriptor of an existing memory map. - Returns the BufferedRandom stream to the file. - """ - # Iterate over all the possible directories where the memory map could - # be present and try to open it. - for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) - try: - fd = open(file_path, 'r+b') - return fd - except FileNotFoundError: - pass - # The memory map was not found in any of the known directories - logger.error(f'Cannot open memory map {mem_map_name}') - return None - def _create_mem_map_dir(self) -> bool: """ Create a directory to create memory maps. @@ -118,11 +103,31 @@ def _create_mem_map_dir(self) -> bool: logger.error('Cannot create directory for memory maps') return False + def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: + """ + Get the file descriptor of an existing memory map. + Returns the BufferedRandom stream to the file. + """ + # Iterate over all the possible directories where the memory map could + # be present and try to open it. + for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: + file_path = os.path.join(mem_map_temp_dir, + consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + try: + fd = open(file_path, 'r+b') + return fd + except FileNotFoundError: + pass + # The memory map was not found in any of the known directories + logger.error(f'Cannot open memory map {mem_map_name} in any of the ' + f'following directories: {consts.UNIX_TEMP_DIRS}') + return None + def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ -> Optional[BufferedRandom]: """ - Get the file descriptor for a new memory map. - Returns the file descriptor. + Create the file descriptor for a new memory map. + Returns the BufferedRandom stream to the file. """ dir_exists = False for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: @@ -130,7 +135,7 @@ def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ file_path = os.path.join(mem_map_temp_dir, consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) if os.path.exists(file_path): - raise Exception( + raise SharedMemoryException( f'File {file_path} for memory map {mem_map_name} ' f'already exists') # Check if the parent directory exists @@ -158,5 +163,6 @@ def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ # paths so we fail. logger.error( f'Cannot create memory map {mem_map_name} with size ' - f'{mem_map_size}') + f'{mem_map_size} in any of the following directories: ' + f'{consts.UNIX_TEMP_DIRS}') return None diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py index 6b1b9bced..c3ed8c933 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py @@ -3,6 +3,7 @@ import mmap from typing import Optional +from .shared_memory_exception import SharedMemoryException from .file_accessor import FileAccessor from ...logging import logger @@ -23,10 +24,10 @@ def open_mem_map( attempting to open it. """ if mem_map_name is None or mem_map_name == '': - raise Exception( + raise SharedMemoryException( f'Cannot open memory map. Invalid name {mem_map_name}') if mem_map_size < 0: - raise Exception( + raise SharedMemoryException( f'Cannot open memory map. Invalid size {mem_map_size}') try: mem_map = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) @@ -42,17 +43,17 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ # Windows also creates the mmap when trying to open it, if it does not # already exist. if mem_map_name is None or mem_map_name == '': - raise Exception( + raise SharedMemoryException( f'Cannot create memory map. Invalid name {mem_map_name}') if mem_map_size <= 0: - raise Exception( + raise SharedMemoryException( f'Cannot create memory map. Invalid size {mem_map_size}') mem_map = self.open_mem_map(mem_map_name, mem_map_size, mmap.ACCESS_WRITE) if mem_map is None: return None if self._is_mem_map_initialized(mem_map): - raise Exception( + raise SharedMemoryException( f'Cannot create memory map {mem_map_name} as it ' f'already exists') self._set_mem_map_initialized(mem_map) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py index 593bb131e..c69f77f22 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py @@ -7,52 +7,70 @@ class HeaderFlags: """ Flags that are present in the header region of the memory maps. """ - # Indicates that the memory map has been initialized, may be in use and - # is not new. - # This represents a boolean value of True. + """ + Indicates that the memory map has been initialized, may be in use and + is not new. + This represents a boolean value of True. + """ Initialized = b'\x01' - # The length of a bool which is the length of the part of the header flag - # specifying if the memory map is already created and used. - # This is to distinguish between new memory maps and ones that were - # previously created and may be in use already. - # Header flags are defined in the class SharedMemoryConstants.HeaderFlags. + """ + The length of a bool which is the length of the part of the header flag + specifying if the memory map is already created and used. + This is to distinguish between new memory maps and ones that were + previously created and may be in use already. + Header flags are defined in the class SharedMemoryConstants.HeaderFlags. + """ MEM_MAP_INITIALIZED_FLAG_NUM_BYTES = 1 - # The length of a long which is the length of the part of the header - # specifying content length in the memory map. + """ + The length of a long which is the length of the part of the header + specifying content length in the memory map. + """ CONTENT_LENGTH_NUM_BYTES = 8 - # The total length of the header + """ + The total length of the header + """ CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + \ CONTENT_LENGTH_NUM_BYTES - # Minimum size (in number of bytes) an object must be in order for it to be - # transferred over shared memory. - # If the object is smaller than this, gRPC is used. - # Note: This needs to be consistent among the host and workers. - # e.g. in the host, it is defined in SharedMemoryConstants.cs + """ + Minimum size (in number of bytes) an object must be in order for it to be + transferred over shared memory. + If the object is smaller than this, gRPC is used. + Note: This needs to be consistent among the host and workers. + e.g. in the host, it is defined in SharedMemoryConstants.cs + """ MIN_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB - # Maximum size (in number of bytes) an object must be in order for it to be - # transferred over shared memory. - # This limit is imposed because initializing objects like greater than 2GB - # is not allowed in DotNet. - # Ref: https://stackoverflow.com/a/3944336/3132415 - # Note: This needs to be consistent among the host and workers. - # e.g. in the host, it is defined in SharedMemoryConstants.cs + """ + Maximum size (in number of bytes) an object must be in order for it to be + transferred over shared memory. + This limit is imposed because initializing objects like greater than 2GB + is not allowed in DotNet. + Ref: https://stackoverflow.com/a/3944336/3132415 + Note: This needs to be consistent among the host and workers. + e.g. in the host, it is defined in SharedMemoryConstants.cs + """ MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 2 * 1024 * 1024 * 1024 # 2 GB - # This is what the size of a character is in DotNet. Can be verified by - # doing "sizeof(char)". - # To keep the limits consistent, when determining if a string can be - # transferred over shared memory, we multiply the number of characters - # by this constant. - # Corresponding logic in the host can be found in SharedMemoryManager.cs + """ + This is what the size of a character is in DotNet. Can be verified by + doing "sizeof(char)". + To keep the limits consistent, when determining if a string can be + transferred over shared memory, we multiply the number of characters + by this constant. + Corresponding logic in the host can be found in SharedMemoryManager.cs + """ SIZE_OF_CHAR_BYTES = 2 - # Directories in Unix where the memory maps can be found + """ + Directories in Unix where the memory maps can be found + """ UNIX_TEMP_DIRS = ["/dev/shm"] - # Suffix for the temp directories containing memory maps in Unix + """ + Suffix for the temp directories containing memory maps in Unix + """ UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_exception.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_exception.py new file mode 100644 index 000000000..cf802d336 --- /dev/null +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_exception.py @@ -0,0 +1,10 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + + +class SharedMemoryException(Exception): + """ + Exception raised when using shared memory. + """ + def __init__(self, msg: str) -> None: + super().__init__(msg) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py index fa0838a56..16f2b1109 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py @@ -19,7 +19,7 @@ class SharedMemoryManager: memory. This is used for transferring input/output data of the function from/to the functions host over shared memory as opposed to RPC to improve the rate of - data transfer and the function's nd-to-end latency. + data transfer and the function's end-to-end latency. """ def __init__(self): # The allocated memory maps are tracked here so that a reference to them @@ -28,8 +28,22 @@ def __init__(self): # Having a mapping of the name and the memory map is then later used to # close a given memory map by its name, after it has been used. # key: mem_map_name, val: SharedMemoryMap - self.allocated_mem_maps: Dict[str, SharedMemoryMap] = {} - self.file_accessor = FileAccessorFactory.create_file_accessor() + self._allocated_mem_maps: Dict[str, SharedMemoryMap] = {} + self._file_accessor = FileAccessorFactory.create_file_accessor() + + @property + def allocated_mem_maps(self): + """ + List of allocated shared memory maps. + """ + return self._allocated_mem_maps + + @property + def file_accessor(self): + """ + FileAccessor instance for accessing memory maps. + """ + return self._file_accessor def is_enabled(self) -> bool: """ diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py index 383b7a8a9..8bd0e977d 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py @@ -7,6 +7,7 @@ import sys from typing import Optional from .shared_memory_constants import SharedMemoryConstants as consts +from .shared_memory_exception import SharedMemoryException from .file_accessor import FileAccessor from ...logging import logger @@ -21,11 +22,11 @@ def __init__( mem_map_name: str, mem_map: mmap.mmap): if mem_map is None: - raise Exception( + raise SharedMemoryException( 'Cannot initialize SharedMemoryMap. Invalid memory map ' 'provided') if mem_map_name is None or mem_map_name == '': - raise Exception( + raise SharedMemoryException( f'Cannot initialize SharedMemoryMap. Invalid name ' f'{mem_map_name}') self.file_accessor = file_accessor diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_metadata.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_metadata.py index 8e1c489aa..ee5c50e07 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_metadata.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_metadata.py @@ -6,8 +6,8 @@ class SharedMemoryMetadata: """ Information about a shared memory region. """ - def __init__(self, mem_map_name, count): + def __init__(self, mem_map_name, count_bytes): # Name of the memory map self.mem_map_name = mem_map_name # Number of bytes of content in the memory map - self.count = count + self.count_bytes = count_bytes diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 34beb85ec..f20e7b00e 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -498,15 +498,16 @@ async def _handle__close_shared_memory_resources_request(self, req): # Assign default value of False to all result values. # If we are successfully able to close a memory map, its result will be # set to True. - results = {map_name: False for map_name in map_names} + results = {mem_map_name: False for mem_map_name in map_names} try: - for map_name in map_names: - success = self._shmem_mgr.free_mem_map(map_name) - results[map_name] = success - except Exception as ex: - # TODO log exception - print(str(ex)) + for mem_map_name in map_names: + try: + success = self._shmem_mgr.free_mem_map(mem_map_name) + results[mem_map_name] = success + except Exception as e: + logger.error(f'Cannot free memory map {mem_map_name} - {e}', + exc_info=True) finally: response = protos.CloseSharedMemoryResourcesResponse( close_map_results=results) diff --git a/tests/endtoend/blob_functions/blob_trigger_bytes/function.json b/tests/endtoend/blob_functions/blob_trigger_bytes/function.json new file mode 100644 index 000000000..b8efe6509 --- /dev/null +++ b/tests/endtoend/blob_functions/blob_trigger_bytes/function.json @@ -0,0 +1,21 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "blobTrigger", + "direction": "in", + "name": "file", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-blob-trigger-bytes.txt" + }, + { + "type": "blob", + "direction": "out", + "name": "$return", + "dataType": "string", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-blob-triggered-bytes.json" + } + ] +} diff --git a/tests/endtoend/blob_functions/blob_trigger_bytes/main.py b/tests/endtoend/blob_functions/blob_trigger_bytes/main.py new file mode 100644 index 000000000..7a596577b --- /dev/null +++ b/tests/endtoend/blob_functions/blob_trigger_bytes/main.py @@ -0,0 +1,22 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import hashlib + + +def main(file: bytes) -> str: + """ + Reads an input file (bytes) and writes the number of bytes read and the MD5 + digest of the read content into an output file, in JSON format. + """ + content_size = len(file) + content_md5 = hashlib.md5(file).hexdigest() + + output_content = { + 'content_size': content_size, + 'content_md5': content_md5 + } + + output_json = json.dumps(output_content) + return output_json diff --git a/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py index 24b024449..6b49a9423 100644 --- a/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py +++ b/tests/endtoend/blob_functions/get_blob_as_bytes_stream_return_http_response/main.py @@ -8,8 +8,8 @@ def main(req: azf.HttpRequest, file: azf.InputStream) -> azf.HttpResponse: """ - Read a blob (bytes) and respond back (in HTTP response) with the number of - bytes read and the MD5 digest of the content. + Read a blob (as azf.InputStream) and respond back (in HTTP response) with + the number of bytes read and the MD5 digest of the content. """ file_bytes = file.read() diff --git a/tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json b/tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json new file mode 100644 index 000000000..66d57e99c --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "in", + "name": "file", + "dataType": "string", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-blob-triggered-bytes.json" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py b/tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py new file mode 100644 index 000000000..6f086c703 --- /dev/null +++ b/tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py @@ -0,0 +1,16 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: str) -> azf.HttpResponse: + """ + Read the given file (assumed to be in JSON format) and respond back with its + content in the HTTP response. + """ + return azf.HttpResponse( + body=file, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json b/tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json new file mode 100644 index 000000000..b60abd39e --- /dev/null +++ b/tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json @@ -0,0 +1,23 @@ +{ + "scriptFile": "main.py", + "bindings": [ + { + "type": "httpTrigger", + "direction": "in", + "name": "req" + }, + { + "type": "blob", + "direction": "out", + "name": "file", + "dataType": "binary", + "connection": "AzureWebJobsStorage", + "path": "python-worker-tests/shmem-test-blob-trigger-bytes.txt" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py b/tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py new file mode 100644 index 000000000..d7cda92a5 --- /dev/null +++ b/tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py @@ -0,0 +1,35 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import json +import random +import hashlib +import azure.functions as azf + + +def main(req: azf.HttpRequest, file: azf.Out[bytes]) -> azf.HttpResponse: + """ + Write a blob (bytes) and respond back (in HTTP response) with the number of + bytes written and the MD5 digest of the content. + The number of bytes to write are specified in the input HTTP request. + This function's output blob triggers another function: blob_trigger_bytes + """ + content_size = int(req.params['content_size']) + + content = bytearray(random.getrandbits(8) for _ in range(content_size)) + content_md5 = hashlib.md5(content).hexdigest() + + file.set(content) + + response_dict = { + 'content_size': content_size, + 'content_md5': content_md5 + } + + response_body = json.dumps(response_dict, indent=2) + + return azf.HttpResponse( + body=response_body, + mimetype="application/json", + status_code=200 + ) diff --git a/tests/unittests/test_file_accessor.py b/tests/unittests/test_file_accessor.py index 798f0ec56..238ce9e52 100644 --- a/tests/unittests/test_file_accessor.py +++ b/tests/unittests/test_file_accessor.py @@ -4,6 +4,8 @@ import os import unittest from azure_functions_worker import testutils +from azure_functions_worker.bindings.shared_memory_data_transfer \ + import SharedMemoryException class TestFileAccessor(testutils.SharedMemoryTestCase): @@ -26,18 +28,18 @@ def test_create_and_delete_mem_map(self): def test_create_mem_map_invalid_inputs(self): """ Attempt to create memory maps with invalid inputs (size and name) and - verify that an Exception is raised. + verify that an SharedMemoryException is raised. """ mem_map_name = self.get_new_mem_map_name() inv_mem_map_size = 0 - with self.assertRaisesRegex(Exception, 'Invalid size'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid size'): self.file_accessor.create_mem_map(mem_map_name, inv_mem_map_size) inv_mem_map_name = None mem_map_size = 1024 - with self.assertRaisesRegex(Exception, 'Invalid name'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid name'): self.file_accessor.create_mem_map(inv_mem_map_name, mem_map_size) inv_mem_map_name = '' - with self.assertRaisesRegex(Exception, 'Invalid name'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid name'): self.file_accessor.create_mem_map(inv_mem_map_name, mem_map_size) def test_open_existing_mem_map(self): @@ -56,18 +58,18 @@ def test_open_existing_mem_map(self): def test_open_mem_map_invalid_inputs(self): """ Attempt to open a memory map with invalid inputs (size and name) and - verify that an Exception is raised. + verify that an SharedMemoryException is raised. """ mem_map_name = self.get_new_mem_map_name() inv_mem_map_size = -1 - with self.assertRaisesRegex(Exception, 'Invalid size'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid size'): self.file_accessor.open_mem_map(mem_map_name, inv_mem_map_size) inv_mem_map_name = None mem_map_size = 1024 - with self.assertRaisesRegex(Exception, 'Invalid name'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid name'): self.file_accessor.open_mem_map(inv_mem_map_name, mem_map_size) inv_mem_map_name = '' - with self.assertRaisesRegex(Exception, 'Invalid name'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid name'): self.file_accessor.open_mem_map(inv_mem_map_name, mem_map_size) @unittest.skipIf(os.name == 'nt', diff --git a/tests/unittests/test_shared_memory_manager.py b/tests/unittests/test_shared_memory_manager.py index 8f2e02aff..9193b20e9 100644 --- a/tests/unittests/test_shared_memory_manager.py +++ b/tests/unittests/test_shared_memory_manager.py @@ -3,6 +3,7 @@ import math import os +import json from azure_functions_worker.utils.common import is_envvar_true from azure.functions import meta as bind_meta from azure_functions_worker import testutils @@ -79,6 +80,84 @@ def test_string_input_support(self): is_supported = manager.is_supported(bytes_datum) self.assertTrue(is_supported) + def test_int_input_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is int. + """ + manager = SharedMemoryManager() + datum = bind_meta.Datum(type='int', value=1) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + + def test_double_input_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is double. + """ + manager = SharedMemoryManager() + datum = bind_meta.Datum(type='double', value=1.0) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + + def test_json_input_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is json. + """ + manager = SharedMemoryManager() + content = { + 'name': 'foo', + 'val': 'bar' + } + datum = bind_meta.Datum(type='json', value=json.dumps(content)) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + + def test_collection_string_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is collection_string. + """ + manager = SharedMemoryManager() + content = ['foo', 'bar'] + datum = bind_meta.Datum(type='collection_string', value=content) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + + def test_collection_bytes_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is collection_bytes. + """ + manager = SharedMemoryManager() + content = [b'x01', b'x02'] + datum = bind_meta.Datum(type='collection_bytes', value=content) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + + def test_collection_double_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is collection_double. + """ + manager = SharedMemoryManager() + content = [1.0, 2.0] + datum = bind_meta.Datum(type='collection_double', value=content) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + + def test_collection_sint64_unsupported(self): + """ + Verify that the given input is unsupported by SharedMemoryManager. + This input is collection_sint64. + """ + manager = SharedMemoryManager() + content = [1, 2] + datum = bind_meta.Datum(type='collection_sint64', value=content) + is_supported = manager.is_supported(datum) + self.assertFalse(is_supported) + def test_large_invalid_bytes_input_support(self): """ Verify that the given input is NOT supported by SharedMemoryManager to @@ -148,7 +227,7 @@ def test_put_bytes(self): shared_mem_meta = manager.put_bytes(content) self.assertIsNotNone(shared_mem_meta) self.assertTrue(self.is_valid_uuid(shared_mem_meta.mem_map_name)) - self.assertEqual(content_size, shared_mem_meta.count) + self.assertEqual(content_size, shared_mem_meta.count_bytes) free_success = manager.free_mem_map(shared_mem_meta.mem_map_name) self.assertTrue(free_success) @@ -171,7 +250,7 @@ def test_get_bytes(self): content = self.get_random_bytes(content_size) shared_mem_meta = manager.put_bytes(content) mem_map_name = shared_mem_meta.mem_map_name - num_bytes_written = shared_mem_meta.count + num_bytes_written = shared_mem_meta.count_bytes read_content = manager.get_bytes(mem_map_name, offset=0, count=num_bytes_written) self.assertEqual(content, read_content) @@ -191,7 +270,7 @@ def test_put_string(self): shared_mem_meta = manager.put_string(content) self.assertIsNotNone(shared_mem_meta) self.assertTrue(self.is_valid_uuid(shared_mem_meta.mem_map_name)) - self.assertEqual(expected_size, shared_mem_meta.count) + self.assertEqual(expected_size, shared_mem_meta.count_bytes) free_success = manager.free_mem_map(shared_mem_meta.mem_map_name) self.assertTrue(free_success) @@ -215,7 +294,7 @@ def test_get_string(self): content = self.get_random_string(num_chars) shared_mem_meta = manager.put_string(content) mem_map_name = shared_mem_meta.mem_map_name - num_bytes_written = shared_mem_meta.count + num_bytes_written = shared_mem_meta.count_bytes read_content = manager.get_string(mem_map_name, offset=0, count=num_bytes_written) self.assertEqual(content, read_content) diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index 7e5d2be0e..30ba09ed2 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -8,6 +8,8 @@ import SharedMemoryMap from azure_functions_worker.bindings.shared_memory_data_transfer \ import SharedMemoryConstants as consts +from azure_functions_worker.bindings.shared_memory_data_transfer \ + import SharedMemoryException class TestSharedMemoryMap(testutils.SharedMemoryTestCase): @@ -30,18 +32,19 @@ def test_init(self): def test_init_with_invalid_inputs(self): """ Attempt to initialize a SharedMemoryMap from invalid inputs (name and - mmap) and verify that an Exception is raised. + mmap) and verify that an SharedMemoryException is raised. """ inv_mem_map_name = None mem_map_name = self.get_new_mem_map_name() mem_map_size = 1024 mem_map = self.file_accessor.create_mem_map(mem_map_name, mem_map_size) - with self.assertRaisesRegex(Exception, 'Invalid name'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid name'): SharedMemoryMap(self.file_accessor, inv_mem_map_name, mem_map) inv_mem_map_name = '' - with self.assertRaisesRegex(Exception, 'Invalid name'): + with self.assertRaisesRegex(SharedMemoryException, 'Invalid name'): SharedMemoryMap(self.file_accessor, inv_mem_map_name, mem_map) - with self.assertRaisesRegex(Exception, 'Invalid memory map'): + with self.assertRaisesRegex(SharedMemoryException, + 'Invalid memory map'): SharedMemoryMap(self.file_accessor, mem_map_name, None) def test_put_bytes(self): @@ -84,7 +87,8 @@ def test_get_bytes(self): def test_put_bytes_more_than_capacity(self): """ Attempt to put more bytes into the created SharedMemoryMap than the - size with which it was created. Verify that an Exception is raised. + size with which it was created. Verify that an SharedMemoryException is + raised. """ mem_map_name = self.get_new_mem_map_name() mem_map_size = 1024 + consts.CONTENT_HEADER_TOTAL_BYTES @@ -96,7 +100,7 @@ def test_put_bytes_more_than_capacity(self): # earlier (1024). content_size = 2048 content = self.get_random_bytes(content_size) - with self.assertRaisesRegex(Exception, 'out of range'): + with self.assertRaisesRegex(SharedMemoryException, 'out of range'): shared_mem_map.put_bytes(content) dispose_status = shared_mem_map.dispose() self.assertTrue(dispose_status) From 691a7747391f01efd98d80f4166af86d743cc918 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 8 Mar 2021 09:05:02 -0800 Subject: [PATCH 54/76] Fixed test - removed unused test functions --- .../shared_memory_manager.py | 8 ++++- .../blob_trigger_bytes/function.json | 21 ----------- .../blob_functions/blob_trigger_bytes/main.py | 22 ------------ .../get_blob_triggered_bytes/function.json | 23 ------------ .../get_blob_triggered_bytes/main.py | 16 --------- .../put_blob_trigger_bytes/function.json | 23 ------------ .../put_blob_trigger_bytes/main.py | 35 ------------------- tests/unittests/test_shared_memory_map.py | 2 +- 8 files changed, 8 insertions(+), 142 deletions(-) delete mode 100644 tests/endtoend/blob_functions/blob_trigger_bytes/function.json delete mode 100644 tests/endtoend/blob_functions/blob_trigger_bytes/main.py delete mode 100644 tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json delete mode 100644 tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py delete mode 100644 tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json delete mode 100644 tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py index 16f2b1109..bdad282c6 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py @@ -85,7 +85,13 @@ def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: shared_mem_map = self._create(mem_map_name, content_length) if shared_mem_map is None: return None - num_bytes_written = shared_mem_map.put_bytes(content) + try: + num_bytes_written = shared_mem_map.put_bytes(content) + except Exception as e: + logger.warn(f'Cannot write {content_length} bytes into shared ' + f'memory {mem_map_name} - {e}') + shared_mem_map.dispose() + return None if num_bytes_written != content_length: logger.error( f'Cannot write data into shared memory {mem_map_name} ' diff --git a/tests/endtoend/blob_functions/blob_trigger_bytes/function.json b/tests/endtoend/blob_functions/blob_trigger_bytes/function.json deleted file mode 100644 index b8efe6509..000000000 --- a/tests/endtoend/blob_functions/blob_trigger_bytes/function.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "scriptFile": "main.py", - "bindings": [ - { - "type": "blobTrigger", - "direction": "in", - "name": "file", - "dataType": "binary", - "connection": "AzureWebJobsStorage", - "path": "python-worker-tests/shmem-test-blob-trigger-bytes.txt" - }, - { - "type": "blob", - "direction": "out", - "name": "$return", - "dataType": "string", - "connection": "AzureWebJobsStorage", - "path": "python-worker-tests/shmem-test-blob-triggered-bytes.json" - } - ] -} diff --git a/tests/endtoend/blob_functions/blob_trigger_bytes/main.py b/tests/endtoend/blob_functions/blob_trigger_bytes/main.py deleted file mode 100644 index 7a596577b..000000000 --- a/tests/endtoend/blob_functions/blob_trigger_bytes/main.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import json -import hashlib - - -def main(file: bytes) -> str: - """ - Reads an input file (bytes) and writes the number of bytes read and the MD5 - digest of the read content into an output file, in JSON format. - """ - content_size = len(file) - content_md5 = hashlib.md5(file).hexdigest() - - output_content = { - 'content_size': content_size, - 'content_md5': content_md5 - } - - output_json = json.dumps(output_content) - return output_json diff --git a/tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json b/tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json deleted file mode 100644 index 66d57e99c..000000000 --- a/tests/endtoend/blob_functions/get_blob_triggered_bytes/function.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "scriptFile": "main.py", - "bindings": [ - { - "type": "httpTrigger", - "direction": "in", - "name": "req" - }, - { - "type": "blob", - "direction": "in", - "name": "file", - "dataType": "string", - "connection": "AzureWebJobsStorage", - "path": "python-worker-tests/shmem-test-blob-triggered-bytes.json" - }, - { - "type": "http", - "direction": "out", - "name": "$return" - } - ] -} diff --git a/tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py b/tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py deleted file mode 100644 index 6f086c703..000000000 --- a/tests/endtoend/blob_functions/get_blob_triggered_bytes/main.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import azure.functions as azf - - -def main(req: azf.HttpRequest, file: str) -> azf.HttpResponse: - """ - Read the given file (assumed to be in JSON format) and respond back with its - content in the HTTP response. - """ - return azf.HttpResponse( - body=file, - mimetype="application/json", - status_code=200 - ) diff --git a/tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json b/tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json deleted file mode 100644 index b60abd39e..000000000 --- a/tests/endtoend/blob_functions/put_blob_trigger_bytes/function.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "scriptFile": "main.py", - "bindings": [ - { - "type": "httpTrigger", - "direction": "in", - "name": "req" - }, - { - "type": "blob", - "direction": "out", - "name": "file", - "dataType": "binary", - "connection": "AzureWebJobsStorage", - "path": "python-worker-tests/shmem-test-blob-trigger-bytes.txt" - }, - { - "type": "http", - "direction": "out", - "name": "$return" - } - ] -} diff --git a/tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py b/tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py deleted file mode 100644 index d7cda92a5..000000000 --- a/tests/endtoend/blob_functions/put_blob_trigger_bytes/main.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. - -import json -import random -import hashlib -import azure.functions as azf - - -def main(req: azf.HttpRequest, file: azf.Out[bytes]) -> azf.HttpResponse: - """ - Write a blob (bytes) and respond back (in HTTP response) with the number of - bytes written and the MD5 digest of the content. - The number of bytes to write are specified in the input HTTP request. - This function's output blob triggers another function: blob_trigger_bytes - """ - content_size = int(req.params['content_size']) - - content = bytearray(random.getrandbits(8) for _ in range(content_size)) - content_md5 = hashlib.md5(content).hexdigest() - - file.set(content) - - response_dict = { - 'content_size': content_size, - 'content_md5': content_md5 - } - - response_body = json.dumps(response_dict, indent=2) - - return azf.HttpResponse( - body=response_body, - mimetype="application/json", - status_code=200 - ) diff --git a/tests/unittests/test_shared_memory_map.py b/tests/unittests/test_shared_memory_map.py index 30ba09ed2..86680474c 100644 --- a/tests/unittests/test_shared_memory_map.py +++ b/tests/unittests/test_shared_memory_map.py @@ -100,7 +100,7 @@ def test_put_bytes_more_than_capacity(self): # earlier (1024). content_size = 2048 content = self.get_random_bytes(content_size) - with self.assertRaisesRegex(SharedMemoryException, 'out of range'): + with self.assertRaisesRegex(ValueError, 'out of range'): shared_mem_map.put_bytes(content) dispose_status = shared_mem_map.dispose() self.assertTrue(dispose_status) From a399f2ea53c171b00e930c1ffb82475776639beb Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 10 Mar 2021 07:07:06 -0800 Subject: [PATCH 55/76] Addressing comments; caching list of valid directories for later use instead of checking every time --- .../file_accessor_factory.py | 3 +- .../file_accessor_unix.py | 106 +++++++++--------- .../shared_memory_constants.py | 4 +- 3 files changed, 57 insertions(+), 56 deletions(-) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_factory.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_factory.py index 290a2b843..e3b528132 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_factory.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_factory.py @@ -15,5 +15,4 @@ class FileAccessorFactory: def create_file_accessor(): if os.name == 'nt': return FileAccessorWindows() - else: - return FileAccessorUnix() + return FileAccessorUnix() diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index 2ef8c812c..ab9c79936 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -16,6 +16,12 @@ class FileAccessorUnix(FileAccessor): For accessing memory maps. This implements the FileAccessor interface for Unix platforms. """ + def __init__(self): + # From the list of configured directories where memory maps can be + # stored, get the list of directories which are valid (either existed + # already or have been created successfully for use). + self.valid_dirs = self._get_valid_mem_map_dirs() + def open_mem_map( self, mem_map_name: str, @@ -75,33 +81,36 @@ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: mem_map.close() return True - def _create_mem_map_dir(self) -> bool: + def _get_valid_mem_map_dirs(self) -> bool: """ - Create a directory to create memory maps. - Returns True if either a valid directory already exists or one was - created successfully, False otherwise. + From the configured list of allowed directories where memory maps can be + stored, return all those that either already existed or were created + successfully for use. + Returns list of directories, in decreasing order of preference, where + memory maps can be created. """ # Iterate over all the possible directories where the memory map could - # be created and try to create in one of them. - for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - dir_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX) - if os.path.isdir(dir_path): - # One of the directories already exists, no need - return True - try: - os.makedirs(dir_path) - return True - except Exception: - # We try to create a directory in each of the applicable - # directory paths until we successfully create one or one that - # already exists is found. - # Even if this fails, we keep trying others. - pass - # Could not create a directory in any of the applicable directory paths. - # We will not be able to create any memory maps so we fail. - logger.error('Cannot create directory for memory maps') - return False + # be created and try to create each of them if they don't exist already. + valid_dirs = [] + for temp_dir in consts.UNIX_TEMP_DIRS: + dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) + if os.path.exists(dir_path): + # A valid directory already exists + valid_dirs.append(dir_path) + logger.debug(f'Found directory {dir_path} to store memory maps') + else: + try: + os.makedirs(dir_path) + valid_dirs.append(dir_path) + except Exception as e: + logger.warn(f'Cannot create directory {dir_path} to store ' + f' memory maps - {e}', exc_info=True) + # We keep trying to check/create others + continue + if len(valid_dirs) == 0: + logger.error('No valid directory for memory maps in ' + f'{consts.UNIX_TEMP_DIRS}') + return valid_dirs def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: """ @@ -110,17 +119,18 @@ def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: """ # Iterate over all the possible directories where the memory map could # be present and try to open it. - for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) - try: - fd = open(file_path, 'r+b') - return fd - except FileNotFoundError: - pass + for temp_dir in self.valid_dirs: + file_path = os.path.join(temp_dir, mem_map_name) + if os.path.exists(file_path): + try: + fd = open(file_path, 'r+b') + return fd + except Exception as e: + logger.error(f'Cannot open file {file_path} - {e}', + exc_info=True) # The memory map was not found in any of the known directories logger.error(f'Cannot open memory map {mem_map_name} in any of the ' - f'following directories: {consts.UNIX_TEMP_DIRS}') + f'following directories: {self.valid_dirs}') return None def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ @@ -129,40 +139,30 @@ def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ Create the file descriptor for a new memory map. Returns the BufferedRandom stream to the file. """ - dir_exists = False - for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - # Check if the file already exists - file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + # Ensure that the file does not already exist + for temp_dir in self.valid_dirs: + file_path = os.path.join(temp_dir, mem_map_name) if os.path.exists(file_path): raise SharedMemoryException( f'File {file_path} for memory map {mem_map_name} ' f'already exists') - # Check if the parent directory exists - dir_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX) - if os.path.isdir(dir_path): - dir_exists = True - # Check if any of the parent directories exists - if not dir_exists: - if not self._create_mem_map_dir(): - return None # Create the file - for mem_map_temp_dir in consts.UNIX_TEMP_DIRS: - file_path = os.path.join(mem_map_temp_dir, - consts.UNIX_TEMP_DIR_SUFFIX, mem_map_name) + for temp_dir in self.valid_dirs: + file_path = os.path.join(temp_dir, mem_map_name) try: file = open(file_path, 'wb+') file.truncate(mem_map_size) return file - except Exception: + except Exception as e: # If the memory map could not be created in this directory, we # keep trying in other applicable directories. - pass + logger.warn(f'Cannot create memory map in {file_path} - {e}. ' + 'Trying other directories.', exc_info=True) + continue # Could not create the memory map in any of the applicable directory # paths so we fail. logger.error( f'Cannot create memory map {mem_map_name} with size ' f'{mem_map_size} in any of the following directories: ' - f'{consts.UNIX_TEMP_DIRS}') + f'{self.valid_dirs}') return None diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py index c69f77f22..c282ddfe9 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py @@ -66,7 +66,9 @@ class HeaderFlags: SIZE_OF_CHAR_BYTES = 2 """ - Directories in Unix where the memory maps can be found + Directories in Unix where the memory maps can be found. + These list is in order of preference, starting with the highest preference + directory. """ UNIX_TEMP_DIRS = ["/dev/shm"] From 33aab5e076e8845fc5e574e2359f59b9463adce2 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 10 Mar 2021 07:21:49 -0800 Subject: [PATCH 56/76] Whitespace fix --- azure_functions_worker/dispatcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure_functions_worker/dispatcher.py b/azure_functions_worker/dispatcher.py index 807ac1c06..453559855 100644 --- a/azure_functions_worker/dispatcher.py +++ b/azure_functions_worker/dispatcher.py @@ -262,7 +262,7 @@ async def _handle__worker_init_request(self, req): constants.RAW_HTTP_BODY_BYTES: _TRUE, constants.TYPED_DATA_COLLECTION: _TRUE, constants.RPC_HTTP_BODY_ONLY: _TRUE, - constants.RPC_HTTP_TRIGGER_METADATA_REMOVED: _TRUE, + constants.RPC_HTTP_TRIGGER_METADATA_REMOVED: _TRUE, constants.WORKER_STATUS: _TRUE, constants.SHARED_MEMORY_DATA_TRANSFER: _TRUE, } From 3c0c8a9d8e254e84f27ec219f4abcabc430404ee Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Wed, 10 Mar 2021 08:59:16 -0800 Subject: [PATCH 57/76] Cleanup --- .../shared_memory_data_transfer/file_accessor_unix.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index ab9c79936..b09345b98 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -103,10 +103,9 @@ def _get_valid_mem_map_dirs(self) -> bool: os.makedirs(dir_path) valid_dirs.append(dir_path) except Exception as e: + # We keep trying to check/create others logger.warn(f'Cannot create directory {dir_path} to store ' f' memory maps - {e}', exc_info=True) - # We keep trying to check/create others - continue if len(valid_dirs) == 0: logger.error('No valid directory for memory maps in ' f'{consts.UNIX_TEMP_DIRS}') @@ -158,7 +157,6 @@ def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ # keep trying in other applicable directories. logger.warn(f'Cannot create memory map in {file_path} - {e}. ' 'Trying other directories.', exc_info=True) - continue # Could not create the memory map in any of the applicable directory # paths so we fail. logger.error( From 94671708142aa82c3e248d0314178b77f6e8c263 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 16:06:06 -0800 Subject: [PATCH 58/76] Adding AppSetting to override shared memory directory for Unix - test setup for macOS --- azure_functions_worker/bindings/datumdef.py | 7 +- .../file_accessor_unix.py | 36 ++++++--- .../file_accessor_windows.py | 2 +- .../shared_memory_constants.py | 4 +- .../shared_memory_manager.py | 4 +- azure_functions_worker/constants.py | 5 ++ azure_functions_worker/testutils.py | 75 ++++++++++++++++--- 7 files changed, 106 insertions(+), 27 deletions(-) diff --git a/azure_functions_worker/bindings/datumdef.py b/azure_functions_worker/bindings/datumdef.py index 9691cff09..2257d38b0 100644 --- a/azure_functions_worker/bindings/datumdef.py +++ b/azure_functions_worker/bindings/datumdef.py @@ -103,8 +103,8 @@ def from_rpc_shared_memory( a datum object of the corresponding type. """ if shmem is None: - logger.warn('Cannot read from shared memory. ' - 'RpcSharedMemory is None.') + logger.warning('Cannot read from shared memory. ' + 'RpcSharedMemory is None.') return None mem_map_name = shmem.name @@ -153,7 +153,8 @@ def to_rpc_shared_memory( ) if shared_mem_meta is None: - logger.warn(f'Cannot write to shared memory for type: {datum.type}') + logger.warning('Cannot write to shared memory for type: ' + f'{datum.type}') return None shmem = protos.RpcSharedMemory( diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index b09345b98..6eb1a22b6 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -1,13 +1,15 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. +from azure_functions_worker import constants import os import mmap -from typing import Optional +from typing import Optional, List from io import BufferedRandom from .shared_memory_constants import SharedMemoryConstants as consts from .shared_memory_exception import SharedMemoryException from .file_accessor import FileAccessor +from ...utils.common import get_app_setting from ...logging import logger @@ -38,7 +40,7 @@ def open_mem_map( f'Cannot open memory map. Invalid size {mem_map_size}') fd = self._open_mem_map_file(mem_map_name) if fd is None: - logger.warn(f'Cannot open file: {mem_map_name}') + logger.warning(f'Cannot open file: {mem_map_name}') return None mem_map = mmap.mmap(fd.fileno(), mem_map_size, access=access) return mem_map @@ -53,7 +55,7 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ f'Cannot create memory map. Invalid size {mem_map_size}') file = self._create_mem_map_file(mem_map_name, mem_map_size) if file is None: - logger.warn(f'Cannot create file: {mem_map_name}') + logger.warning(f'Cannot create file: {mem_map_name}') return None mem_map = mmap.mmap(file.fileno(), mem_map_size, mmap.MAP_SHARED, mmap.PROT_WRITE) @@ -81,7 +83,20 @@ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: mem_map.close() return True - def _get_valid_mem_map_dirs(self) -> bool: + def _get_allowed_mem_map_dirs(self) -> List[str]: + """ + Get the list of directories where memory maps can be created. + If specified in AppSetting, that list will be used. + Otherwise, the default value will be used. + """ + allowed_mem_map_dirs_str = get_app_setting( + constants.UNIX_SHARED_MEMORY_DIRECTORIES) + allowed_mem_map_dirs = allowed_mem_map_dirs_str.split(',') + if allowed_mem_map_dirs is None: + allowed_mem_map_dirs = consts.UNIX_TEMP_DIR_SUFFIX + return allowed_mem_map_dirs + + def _get_valid_mem_map_dirs(self) -> List[str]: """ From the configured list of allowed directories where memory maps can be stored, return all those that either already existed or were created @@ -89,10 +104,11 @@ def _get_valid_mem_map_dirs(self) -> bool: Returns list of directories, in decreasing order of preference, where memory maps can be created. """ + allowed_dirs = self._get_allowed_mem_map_dirs() # Iterate over all the possible directories where the memory map could # be created and try to create each of them if they don't exist already. valid_dirs = [] - for temp_dir in consts.UNIX_TEMP_DIRS: + for temp_dir in allowed_dirs: dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) if os.path.exists(dir_path): # A valid directory already exists @@ -104,11 +120,11 @@ def _get_valid_mem_map_dirs(self) -> bool: valid_dirs.append(dir_path) except Exception as e: # We keep trying to check/create others - logger.warn(f'Cannot create directory {dir_path} to store ' - f' memory maps - {e}', exc_info=True) + logger.warning(f'Cannot create directory {dir_path} to ' + f'store memory maps - {e}', exc_info=True) if len(valid_dirs) == 0: logger.error('No valid directory for memory maps in ' - f'{consts.UNIX_TEMP_DIRS}') + f'{allowed_dirs}') return valid_dirs def _open_mem_map_file(self, mem_map_name: str) -> Optional[BufferedRandom]: @@ -155,8 +171,8 @@ def _create_mem_map_file(self, mem_map_name: str, mem_map_size: int) \ except Exception as e: # If the memory map could not be created in this directory, we # keep trying in other applicable directories. - logger.warn(f'Cannot create memory map in {file_path} - {e}. ' - 'Trying other directories.', exc_info=True) + logger.warning(f'Cannot create memory map in {file_path} - {e}.' + ' Trying other directories.', exc_info=True) # Could not create the memory map in any of the applicable directory # paths so we fail. logger.error( diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py index c3ed8c933..50ea20501 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_windows.py @@ -33,7 +33,7 @@ def open_mem_map( mem_map = mmap.mmap(-1, mem_map_size, mem_map_name, access=access) return mem_map except Exception as e: - logger.warn( + logger.warning( f'Cannot open memory map {mem_map_name} with size ' f'{mem_map_size} - {e}') return None diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py index c282ddfe9..45d227c3e 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py @@ -66,9 +66,11 @@ class HeaderFlags: SIZE_OF_CHAR_BYTES = 2 """ - Directories in Unix where the memory maps can be found. + Default directories in Unix where the memory maps can be found. These list is in order of preference, starting with the highest preference directory. + A user can override this by using the AppSetting: + UNIX_SHARED_MEMORY_DIRECTORIES. """ UNIX_TEMP_DIRS = ["/dev/shm"] diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py index bdad282c6..3475f51f4 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py @@ -88,8 +88,8 @@ def put_bytes(self, content: bytes) -> Optional[SharedMemoryMetadata]: try: num_bytes_written = shared_mem_map.put_bytes(content) except Exception as e: - logger.warn(f'Cannot write {content_length} bytes into shared ' - f'memory {mem_map_name} - {e}') + logger.warning(f'Cannot write {content_length} bytes into shared ' + f'memory {mem_map_name} - {e}') shared_mem_map.dispose() return None if num_bytes_written != content_length: diff --git a/azure_functions_worker/constants.py b/azure_functions_worker/constants.py index 28d066f66..75406ad33 100644 --- a/azure_functions_worker/constants.py +++ b/azure_functions_worker/constants.py @@ -24,6 +24,11 @@ PYTHON_ISOLATE_WORKER_DEPENDENCIES = "PYTHON_ISOLATE_WORKER_DEPENDENCIES" FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED = \ "FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED" +""" +Comma-separated list of directories where shared memory maps can be created for +data transfer between host and worker. +""" +UNIX_SHARED_MEMORY_DIRECTORIES = "UNIX_SHARED_MEMORY_DIRECTORIES" # Setting Defaults PYTHON_THREADPOOL_THREAD_COUNT_DEFAULT = 1 diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index fdcda51e6..917b31279 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -42,8 +42,9 @@ from . import dispatcher from . import protos from .constants import (PYAZURE_WEBHOST_DEBUG, - FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) -from .utils.common import is_envvar_true + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED, + UNIX_SHARED_MEMORY_DIRECTORIES) +from .utils.common import get_app_setting, is_envvar_true PROJECT_ROOT = pathlib.Path(__file__).parent.parent TESTS_ROOT = PROJECT_ROOT / 'tests' @@ -252,16 +253,34 @@ class SharedMemoryTestCase(unittest.TestCase): """ def setUp(self): self.file_accessor = FileAccessorFactory.create_file_accessor() + self.was_shmem_env_true = is_envvar_true( FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) os.environ.update( {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '1'}) - if os.name != 'nt': - self._setUpUnix() + + os_name = platform.system() + if os_name == 'Darwin': + # If an existing AppSetting is specified, save it so it can be + # restored later + self.was_shmem_dirs = get_app_setting( + UNIX_SHARED_MEMORY_DIRECTORIES + ) + self._setUpDarwin() + elif os_name == 'Linux': + self._setUpLinxx() def tearDown(self): - if os.name != 'nt': - self._tearDownUnix() + os_name = platform.system() + if os_name == 'Darwin': + self._setUpDarwin() + if self.was_shmem_dirs is not None: + # If an AppSetting was set before the tests ran, restore it back + os.environ.update( + {UNIX_SHARED_MEMORY_DIRECTORIES: self.was_shmem_dirs}) + elif os_name == 'Linux': + self._tearDownLinux() + if not self.was_shmem_env_true: os.environ.update( {FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED: '0'}) @@ -287,17 +306,53 @@ def is_valid_uuid(self, uuid_to_test: str, version: int = 4) -> bool: return False return str(uuid_obj) == uuid_to_test - def _setUpUnix(self): - for temp_dir in consts.UNIX_TEMP_DIRS: + def _createSharedMemoryDirectories(self, directories): + for temp_dir in directories: temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) if not os.path.exists(temp_dir_path): os.makedirs(temp_dir_path) - def _tearDownUnix(self): - for temp_dir in consts.UNIX_TEMP_DIRS: + def _deleteSharedMemoryDirectories(self, directories): + for temp_dir in directories: temp_dir_path = os.path.join(temp_dir, consts.UNIX_TEMP_DIR_SUFFIX) shutil.rmtree(temp_dir_path) + def _setUpLinux(self): + self._createSharedMemoryDirectories(consts.UNIX_TEMP_DIRS) + + def _tearDownLinux(self): + self._deleteSharedMemoryDirectories(consts.UNIX_TEMP_DIRS) + + def _setUpDarwin(self): + """ + Create a RAM disk on macOS. + Ref: https://stackoverflow.com/a/2033417/3132415 + """ + size_in_mb = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER / (1024 * 1024) + size = 2048 * size_in_mb + # The following command returns the name of the created disk + cmd = ['hdutil', 'attach', '-nomount', f'ram://{size}'] + result = subprocess.run(cmd, stdout=subprocess.PIPE) + if result.returncode != 0: + raise Exception(f'Cannot create ram disk with command: {cmd}') + disk_name = result.stdout + # We create a volume on the disk created above and mount it + volume_name = 'shm' + cmd = ['diskutil', 'eraseVolume', 'HFS+', volume_name, disk_name] + result = subprocess.run(cmd, stdout=subprocess.PIPE) + if result.returncode != 0: + raise Exception(f'Cannot create volume with command: {cmd}') + directory = f'/Volumes/{volume_name}' + self.created_directories = [directory] + self._createSharedMemoryDirectories(self.created_directories) + # Override the AppSetting for the duration of this test so the + # FileAccessorUnix can use these directories for creating memory maps + os.environ.update( + {UNIX_SHARED_MEMORY_DIRECTORIES: self.created_directories}) + + def _tearDownDarwin(self): + self._deleteSharedMemoryDirectories(self.created_directories) + class _MockWebHostServicer(protos.FunctionRpcServicer): From 0fbca317c9f3525252d7593d3ebaf3601c908713 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 16:08:38 -0800 Subject: [PATCH 59/76] Logging subprocess result in Exception --- azure_functions_worker/testutils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 917b31279..cadc60bcb 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -334,14 +334,16 @@ def _setUpDarwin(self): cmd = ['hdutil', 'attach', '-nomount', f'ram://{size}'] result = subprocess.run(cmd, stdout=subprocess.PIPE) if result.returncode != 0: - raise Exception(f'Cannot create ram disk with command: {cmd}') + raise Exception(f'Cannot create ram disk with command: {cmd} - ' + f'{result.stdout} - {result.stderr}') disk_name = result.stdout # We create a volume on the disk created above and mount it volume_name = 'shm' cmd = ['diskutil', 'eraseVolume', 'HFS+', volume_name, disk_name] result = subprocess.run(cmd, stdout=subprocess.PIPE) if result.returncode != 0: - raise Exception(f'Cannot create volume with command: {cmd}') + raise Exception(f'Cannot create volume with command: {cmd} - ' + f'{result.stdout} - {result.stderr}') directory = f'/Volumes/{volume_name}' self.created_directories = [directory] self._createSharedMemoryDirectories(self.created_directories) From 050399deabef7623acf76f16274f806c79d0c3b7 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 16:09:21 -0800 Subject: [PATCH 60/76] Changed Exception -> IOError --- azure_functions_worker/testutils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index cadc60bcb..e427208e8 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -334,16 +334,16 @@ def _setUpDarwin(self): cmd = ['hdutil', 'attach', '-nomount', f'ram://{size}'] result = subprocess.run(cmd, stdout=subprocess.PIPE) if result.returncode != 0: - raise Exception(f'Cannot create ram disk with command: {cmd} - ' - f'{result.stdout} - {result.stderr}') + raise IOError(f'Cannot create ram disk with command: {cmd} - ' + f'{result.stdout} - {result.stderr}') disk_name = result.stdout # We create a volume on the disk created above and mount it volume_name = 'shm' cmd = ['diskutil', 'eraseVolume', 'HFS+', volume_name, disk_name] result = subprocess.run(cmd, stdout=subprocess.PIPE) if result.returncode != 0: - raise Exception(f'Cannot create volume with command: {cmd} - ' - f'{result.stdout} - {result.stderr}') + raise IOError(f'Cannot create volume with command: {cmd} - ' + f'{result.stdout} - {result.stderr}') directory = f'/Volumes/{volume_name}' self.created_directories = [directory] self._createSharedMemoryDirectories(self.created_directories) From dd425c5255be9fdd7d39b3749afc3017ed17004b Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 16:11:02 -0800 Subject: [PATCH 61/76] Check shared memory directory AppSetting only for Darwin tests --- azure_functions_worker/testutils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index e427208e8..a47624c15 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -274,10 +274,10 @@ def tearDown(self): os_name = platform.system() if os_name == 'Darwin': self._setUpDarwin() - if self.was_shmem_dirs is not None: - # If an AppSetting was set before the tests ran, restore it back - os.environ.update( - {UNIX_SHARED_MEMORY_DIRECTORIES: self.was_shmem_dirs}) + if self.was_shmem_dirs is not None: + # If an AppSetting was set before the tests ran, restore it back + os.environ.update( + {UNIX_SHARED_MEMORY_DIRECTORIES: self.was_shmem_dirs}) elif os_name == 'Linux': self._tearDownLinux() From 3c75b31c76af41ba317f7887b1dde8595402c39e Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 16:12:16 -0800 Subject: [PATCH 62/76] Fix test cleanup for Darwin --- azure_functions_worker/testutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index a47624c15..1e79e4ffa 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -273,7 +273,7 @@ def setUp(self): def tearDown(self): os_name = platform.system() if os_name == 'Darwin': - self._setUpDarwin() + self._tearDownDarwin() if self.was_shmem_dirs is not None: # If an AppSetting was set before the tests ran, restore it back os.environ.update( From 4563697e94b7caf6dcf2a158dc5f327aa9f5e73a Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 16:24:34 -0800 Subject: [PATCH 63/76] Only split AppSetting list of directories if the AppSetting was found --- .../shared_memory_data_transfer/file_accessor_unix.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index 6eb1a22b6..8c64efa67 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -91,9 +91,10 @@ def _get_allowed_mem_map_dirs(self) -> List[str]: """ allowed_mem_map_dirs_str = get_app_setting( constants.UNIX_SHARED_MEMORY_DIRECTORIES) - allowed_mem_map_dirs = allowed_mem_map_dirs_str.split(',') - if allowed_mem_map_dirs is None: + if allowed_mem_map_dirs_str is None: allowed_mem_map_dirs = consts.UNIX_TEMP_DIR_SUFFIX + else: + allowed_mem_map_dirs = allowed_mem_map_dirs_str.split(',') return allowed_mem_map_dirs def _get_valid_mem_map_dirs(self) -> List[str]: From da436e3e3b1fca9d6d41ae767f18b4bfa0e775c0 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 23:05:03 -0800 Subject: [PATCH 64/76] Fix consts.UNIX_TEMP_DIRS --- .../bindings/shared_memory_data_transfer/file_accessor_unix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index 8c64efa67..85060c20c 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -92,7 +92,7 @@ def _get_allowed_mem_map_dirs(self) -> List[str]: allowed_mem_map_dirs_str = get_app_setting( constants.UNIX_SHARED_MEMORY_DIRECTORIES) if allowed_mem_map_dirs_str is None: - allowed_mem_map_dirs = consts.UNIX_TEMP_DIR_SUFFIX + allowed_mem_map_dirs = consts.UNIX_TEMP_DIRS else: allowed_mem_map_dirs = allowed_mem_map_dirs_str.split(',') return allowed_mem_map_dirs From ced927543c5591c1d88851e4a101eaad6cd6593a Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 12 Mar 2021 23:34:46 -0800 Subject: [PATCH 65/76] Typo fix --- azure_functions_worker/testutils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 1e79e4ffa..dc1877050 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -268,7 +268,7 @@ def setUp(self): ) self._setUpDarwin() elif os_name == 'Linux': - self._setUpLinxx() + self._setUpLinux() def tearDown(self): os_name = platform.system() From 36797fa43e503b35a958714e47238f93f7c488f2 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 18:17:41 +0000 Subject: [PATCH 66/76] Adding throughput tests for blob input/output using shared memory --- .../dockerfiles/perf_tests.Dockerfile | 5 +- .../SyncGetBlobAsBytesReturnHttpResponse.js | 65 +++++++++++++++++++ .../SyncPutBlobAsBytesReturnHttpResponse.js | 53 +++++++++++++++ .github/workflows/perf-testing-setup.yml | 4 +- 4 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 .ci/perf_tests/k6scripts/SyncGetBlobAsBytesReturnHttpResponse.js create mode 100644 .ci/perf_tests/k6scripts/SyncPutBlobAsBytesReturnHttpResponse.js diff --git a/.ci/perf_tests/dockerfiles/perf_tests.Dockerfile b/.ci/perf_tests/dockerfiles/perf_tests.Dockerfile index bfe3f5443..8c1320b80 100644 --- a/.ci/perf_tests/dockerfiles/perf_tests.Dockerfile +++ b/.ci/perf_tests/dockerfiles/perf_tests.Dockerfile @@ -1,6 +1,6 @@ ARG PYTHON_VERSION=3.8 -FROM mcr.microsoft.com/azure-functions/python:3.0.14492-python$PYTHON_VERSION +FROM mcr.microsoft.com/azure-functions/python:3.0.15418-python$PYTHON_VERSION # Mounting local machines azure-functions-python-worker and azure-functions-python-library onto it RUN rm -rf /azure-functions-host/workers/python/${PYTHON_VERSION}/LINUX/X64/azure_functions_worker @@ -11,7 +11,8 @@ VOLUME ["/azure-functions-host/workers/python/${PYTHON_VERSION}/LINUX/X64/azure_ ENV AzureWebJobsScriptRoot=/home/site/wwwroot \ AzureFunctionsJobHost__Logging__Console__IsEnabled=true \ FUNCTIONS_WORKER_PROCESS_COUNT=1 \ - AZURE_FUNCTIONS_ENVIRONMENT=Development + AZURE_FUNCTIONS_ENVIRONMENT=Development \ + FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED=1 RUN apt-get --quiet update && \ apt-get install --quiet -y git && \ diff --git a/.ci/perf_tests/k6scripts/SyncGetBlobAsBytesReturnHttpResponse.js b/.ci/perf_tests/k6scripts/SyncGetBlobAsBytesReturnHttpResponse.js new file mode 100644 index 000000000..5fab61fb3 --- /dev/null +++ b/.ci/perf_tests/k6scripts/SyncGetBlobAsBytesReturnHttpResponse.js @@ -0,0 +1,65 @@ +import { check } from "k6"; +import { Rate } from "k6/metrics"; +import http from "k6/http"; + +var HOSTNAME = __ENV.HOSTNAME || 'localhost'; +var PORT = __ENV.PORT || '80'; +var PROTOCOL = __ENV.PROTOCOL || (PORT === '80' ? 'http' : 'https'); +var INPUT_FILENAME = 'Input_256MB' +var CONTENT_SIZE = 1024 * 1024 * 256; // 256 MB + +// A custom metric to track failure rates +var failureRate = new Rate("check_failure_rate"); + +// Options +export let options = { + stages: [ + // Linearly ramp up from 1 to 20 VUs during first minute + { target: 20, duration: "1m" }, + // Hold at 20 VUs for the next 3 minutes and 45 seconds + { target: 20, duration: "3m45s" }, + // Linearly ramp down from 20 to 0 VUs over the last 15 seconds + { target: 0, duration: "15s" } + // Total execution time will be ~5 minutes + ], + thresholds: { + // We want the 95th percentile of all HTTP request durations to be less than 40s + "http_req_duration": ["p(95)<40000"], + // Thresholds based on the custom metric we defined and use to track application failures + "check_failure_rate": [ + // Global failure rate should be less than 1% + "rate<0.01", + // Abort the test early if it climbs over 5% + { threshold: "rate<=0.05", abortOnFail: true }, + ], + }, +}; + +// Setup function +// This will create a blob which will later be used as an input binding +export function setup() { + let no_random_input = true; + let url = `${PROTOCOL}://${HOSTNAME}:${PORT}/api/SyncPutBlobAsBytesReturnHttpResponse?content_size=${CONTENT_SIZE}&no_random_input=${no_random_input}&outfile=${INPUT_FILENAME}`; + let response = http.get(url); + + // check() returns false if any of the specified conditions fail + let checkRes = check(response, { + "status is 200": (r) => r.status === 200, + "content_size matches": (r) => r.json().content_size === CONTENT_SIZE, + }); +} + +// Main function +export default function () { + let url = `${PROTOCOL}://${HOSTNAME}:${PORT}/api/SyncGetBlobAsBytesReturnHttpResponse?infile=${INPUT_FILENAME}`; + let response = http.get(url); + + // check() returns false if any of the specified conditions fail + let checkRes = check(response, { + "status is 200": (r) => r.status === 200, + "content_size matches": (r) => r.json().content_size === CONTENT_SIZE, + }); + + // We reverse the check() result since we want to count the failures + failureRate.add(!checkRes); +} diff --git a/.ci/perf_tests/k6scripts/SyncPutBlobAsBytesReturnHttpResponse.js b/.ci/perf_tests/k6scripts/SyncPutBlobAsBytesReturnHttpResponse.js new file mode 100644 index 000000000..7426e5f7f --- /dev/null +++ b/.ci/perf_tests/k6scripts/SyncPutBlobAsBytesReturnHttpResponse.js @@ -0,0 +1,53 @@ +import { check } from "k6"; +import { Rate } from "k6/metrics"; +import http from "k6/http"; +import { randomIntBetween } from "https://jslib.k6.io/k6-utils/1.0.0/index.js"; + +var HOSTNAME = __ENV.HOSTNAME || 'localhost'; +var PORT = __ENV.PORT || '80'; +var PROTOCOL = __ENV.PROTOCOL || (PORT === '80' ? 'http' : 'https'); + +// A custom metric to track failure rates +var failureRate = new Rate("check_failure_rate"); + +// Options +export let options = { + stages: [ + // Linearly ramp up from 1 to 50 VUs during first minute + { target: 50, duration: "1m" }, + // Hold at 50 VUs for the next 3 minutes and 45 seconds + { target: 50, duration: "3m45s" }, + // Linearly ramp down from 50 to 0 VUs over the last 15 seconds + { target: 0, duration: "15s" } + // Total execution time will be ~5 minutes + ], + thresholds: { + // We want the 95th percentile of all HTTP request durations to be less than 40s + "http_req_duration": ["p(95)<40000"], + // Thresholds based on the custom metric we defined and use to track application failures + "check_failure_rate": [ + // Global failure rate should be less than 1% + "rate<0.01", + // Abort the test early if it climbs over 5% + { threshold: "rate<=0.05", abortOnFail: true }, + ], + }, +}; + +// Main function +export default function () { + let content_size = 1024 * 1024 * 256; // 256 MB + let no_random_input = true; + let outfile = randomIntBetween(1,500000); + let url = `${PROTOCOL}://${HOSTNAME}:${PORT}/api/SyncPutBlobAsBytesReturnHttpResponse?content_size=${content_size}&no_random_input=${no_random_input}&outfile=${outfile}`; + let response = http.get(url); + + // check() returns false if any of the specified conditions fail + let checkRes = check(response, { + "status is 200": (r) => r.status === 200, + "content_size matches": (r) => r.json().content_size === content_size, + }); + + // We reverse the check() result since we want to count the failures + failureRate.add(!checkRes); +} diff --git a/.github/workflows/perf-testing-setup.yml b/.github/workflows/perf-testing-setup.yml index bcdce2990..7e47965cb 100644 --- a/.github/workflows/perf-testing-setup.yml +++ b/.github/workflows/perf-testing-setup.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - test_to_run: [ SyncHttpTriggerHelloWorld, SyncHttpTriggerWithSyncRequests, AsyncHttpTriggerWithAsyncRequest, SyncHttpTriggerCPUIntensive ] + test_to_run: [ SyncHttpTriggerHelloWorld, SyncHttpTriggerWithSyncRequests, AsyncHttpTriggerWithAsyncRequest, SyncHttpTriggerCPUIntensive, SyncPutBlobAsBytesReturnHttpResponse, SyncGetBlobAsBytesReturnHttpResponse ] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ env.PYTHON_VERSION }} @@ -43,7 +43,7 @@ jobs: - name: Build and Run the Docker image run: | docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --file .ci/perf_tests/dockerfiles/perf_tests.Dockerfile --tag perfimage:latest . - docker run -d --env FUNCTIONS_WORKER_RUNTIME_VERSION=${{ env.PYTHON_VERSION }} -p ${PORT}:80 -v $GITHUB_WORKSPACE/azure_functions_worker:/azure-functions-host/workers/python/${{ env.PYTHON_VERSION }}/LINUX/X64/azure_functions_worker perfimage:latest + docker run -d --shm-size="2g" --env FUNCTIONS_WORKER_RUNTIME_VERSION=${{ env.PYTHON_VERSION }} -p ${PORT}:80 -v $GITHUB_WORKSPACE/azure_functions_worker:/azure-functions-host/workers/python/${{ env.PYTHON_VERSION }}/LINUX/X64/azure_functions_worker perfimage:latest sleep 10 # host needs some time to start. - name: Validate if the functions are now running run: | From 598acf4854f45f3c5be3f9f8d05338781aec4f25 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 18:49:50 +0000 Subject: [PATCH 67/76] Changing branch hardcode to instead use current branch --- .github/workflows/perf-testing-setup.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/perf-testing-setup.yml b/.github/workflows/perf-testing-setup.yml index 7e47965cb..7519dae92 100644 --- a/.github/workflows/perf-testing-setup.yml +++ b/.github/workflows/perf-testing-setup.yml @@ -6,7 +6,7 @@ on: branches: [ dev ] env: - PERF_TESTS_LINK: "https://raw.githubusercontent.com/Azure/azure-functions-python-worker/dev/.ci/perf_tests/k6scripts/" + PERF_TESTS_LINK: "https://raw.githubusercontent.com/Azure/azure-functions-python-worker/${GITHUB_REF##*/}/.ci/perf_tests/k6scripts/" PYTHON_VERSION: "3.8" PORT: 8000 From 4c38ae03e06f9e8a81709b62af7f248fbf88b866 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 19:09:45 +0000 Subject: [PATCH 68/76] Creating tests URL from current branch name --- .github/workflows/perf-testing-setup.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/perf-testing-setup.yml b/.github/workflows/perf-testing-setup.yml index 7519dae92..434e09950 100644 --- a/.github/workflows/perf-testing-setup.yml +++ b/.github/workflows/perf-testing-setup.yml @@ -6,7 +6,8 @@ on: branches: [ dev ] env: - PERF_TESTS_LINK: "https://raw.githubusercontent.com/Azure/azure-functions-python-worker/${GITHUB_REF##*/}/.ci/perf_tests/k6scripts/" + REPO_URL: "https://raw.githubusercontent.com/Azure/azure-functions-python-worker" + TESTS_DIR_PATH: ".ci/perf_tests/k6scripts" PYTHON_VERSION: "3.8" PORT: 8000 @@ -48,7 +49,15 @@ jobs: - name: Validate if the functions are now running run: | curl --get http://localhost:${PORT}/api/${{ matrix.test_to_run }} + - name: Extract branch name + run: | + echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" + id: extract_branch + - name: Get Perf Tests Link + run: | + echo "##[set-output name=link;]$(echo ${{ env.REPO_URL }}/${{ steps.extract_branch.outputs.branch }}/${{ env.TESTS_DIR_PATH }})" + id: get_perf_tests_link - name: Run Throughput tests run: | chmod 755 .ci/perf_tests/run-perftests.sh - .ci/perf_tests/run-perftests.sh localhost $PORT $PERF_TESTS_LINK ${{ matrix.test_to_run }} + .ci/perf_tests/run-perftests.sh localhost $PORT ${{ steps.get_perf_tests_link.outputs.link }} ${{ matrix.test_to_run }} From 9bffdded3732db4d8f0d3f3e12c74600a8ba2318 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 19:11:50 +0000 Subject: [PATCH 69/76] Whitespace fixes --- .github/workflows/perf-testing-setup.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/perf-testing-setup.yml b/.github/workflows/perf-testing-setup.yml index 434e09950..2f861c8b8 100644 --- a/.github/workflows/perf-testing-setup.yml +++ b/.github/workflows/perf-testing-setup.yml @@ -49,14 +49,14 @@ jobs: - name: Validate if the functions are now running run: | curl --get http://localhost:${PORT}/api/${{ matrix.test_to_run }} - - name: Extract branch name - run: | - echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" - id: extract_branch - - name: Get Perf Tests Link - run: | - echo "##[set-output name=link;]$(echo ${{ env.REPO_URL }}/${{ steps.extract_branch.outputs.branch }}/${{ env.TESTS_DIR_PATH }})" - id: get_perf_tests_link + - name: Extract branch name + run: | + echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" + id: extract_branch + - name: Get Perf Tests Link + run: | + echo "##[set-output name=link;]$(echo ${{ env.REPO_URL }}/${{ steps.extract_branch.outputs.branch }}/${{ env.TESTS_DIR_PATH }})" + id: get_perf_tests_link - name: Run Throughput tests run: | chmod 755 .ci/perf_tests/run-perftests.sh From a43b3d33f2aedfcc1a9bdb499b79f37ca2ad0590 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 19:23:16 +0000 Subject: [PATCH 70/76] Using tests from local dir --- .github/workflows/perf-testing-setup.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/perf-testing-setup.yml b/.github/workflows/perf-testing-setup.yml index 2f861c8b8..205421599 100644 --- a/.github/workflows/perf-testing-setup.yml +++ b/.github/workflows/perf-testing-setup.yml @@ -7,7 +7,7 @@ on: env: REPO_URL: "https://raw.githubusercontent.com/Azure/azure-functions-python-worker" - TESTS_DIR_PATH: ".ci/perf_tests/k6scripts" + TESTS_DIR_PATH: ".ci/perf_tests/k6scripts/" PYTHON_VERSION: "3.8" PORT: 8000 @@ -60,4 +60,4 @@ jobs: - name: Run Throughput tests run: | chmod 755 .ci/perf_tests/run-perftests.sh - .ci/perf_tests/run-perftests.sh localhost $PORT ${{ steps.get_perf_tests_link.outputs.link }} ${{ matrix.test_to_run }} + .ci/perf_tests/run-perftests.sh localhost $PORT ${{ env.TESTS_DIR_PATH }} ${{ matrix.test_to_run }} From 87ee40e96e725869f6d591ddc3251ac3106fa421 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 19:38:22 +0000 Subject: [PATCH 71/76] Removing the need to use a URL for tests; just use from local directory --- .github/workflows/perf-testing-setup.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/perf-testing-setup.yml b/.github/workflows/perf-testing-setup.yml index 205421599..9830605a3 100644 --- a/.github/workflows/perf-testing-setup.yml +++ b/.github/workflows/perf-testing-setup.yml @@ -6,7 +6,6 @@ on: branches: [ dev ] env: - REPO_URL: "https://raw.githubusercontent.com/Azure/azure-functions-python-worker" TESTS_DIR_PATH: ".ci/perf_tests/k6scripts/" PYTHON_VERSION: "3.8" PORT: 8000 @@ -49,14 +48,6 @@ jobs: - name: Validate if the functions are now running run: | curl --get http://localhost:${PORT}/api/${{ matrix.test_to_run }} - - name: Extract branch name - run: | - echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})" - id: extract_branch - - name: Get Perf Tests Link - run: | - echo "##[set-output name=link;]$(echo ${{ env.REPO_URL }}/${{ steps.extract_branch.outputs.branch }}/${{ env.TESTS_DIR_PATH }})" - id: get_perf_tests_link - name: Run Throughput tests run: | chmod 755 .ci/perf_tests/run-perftests.sh From 04dba673df01d6c15abddfc9bfe816cab05f3438 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Mon, 15 Mar 2021 14:12:42 -0700 Subject: [PATCH 72/76] Rename env variable to follow convention from host of using FUNCTIONS_ --- azure_functions_worker/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure_functions_worker/constants.py b/azure_functions_worker/constants.py index 75406ad33..d5b2ba674 100644 --- a/azure_functions_worker/constants.py +++ b/azure_functions_worker/constants.py @@ -28,7 +28,7 @@ Comma-separated list of directories where shared memory maps can be created for data transfer between host and worker. """ -UNIX_SHARED_MEMORY_DIRECTORIES = "UNIX_SHARED_MEMORY_DIRECTORIES" +UNIX_SHARED_MEMORY_DIRECTORIES = "FUNCTIONS_UNIX_SHARED_MEMORY_DIRECTORIES" # Setting Defaults PYTHON_THREADPOOL_THREAD_COUNT_DEFAULT = 1 From f7df639a1273225100d754db3e2d3995b5746a60 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Mar 2021 10:52:19 -0700 Subject: [PATCH 73/76] Addressing comments --- .../file_accessor_unix.py | 5 ++++- .../shared_memory_constants.py | 20 +++++++++---------- .../shared_memory_manager.py | 4 ++++ .../shared_memory_map.py | 2 +- azure_functions_worker/testutils.py | 14 ++++++++++--- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index 85060c20c..c2e2ac38d 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -24,6 +24,9 @@ def __init__(self): # already or have been created successfully for use). self.valid_dirs = self._get_valid_mem_map_dirs() + def __del__(self): + del self.valid_dirs + def open_mem_map( self, mem_map_name: str, @@ -67,7 +70,7 @@ def create_mem_map(self, mem_map_name: str, mem_map_size: int) \ def delete_mem_map(self, mem_map_name: str, mem_map: mmap.mmap) -> bool: if mem_map_name is None or mem_map_name == '': - raise Exception( + raise SharedMemoryException( f'Cannot delete memory map. Invalid name {mem_map_name}') try: fd = self._open_mem_map_file(mem_map_name) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py index 45d227c3e..ac25170b3 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_constants.py @@ -7,13 +7,14 @@ class HeaderFlags: """ Flags that are present in the header region of the memory maps. """ + Initialized = b'\x01' """ Indicates that the memory map has been initialized, may be in use and is not new. This represents a boolean value of True. """ - Initialized = b'\x01' + MEM_MAP_INITIALIZED_FLAG_NUM_BYTES = 1 """ The length of a bool which is the length of the part of the header flag specifying if the memory map is already created and used. @@ -21,20 +22,20 @@ class HeaderFlags: previously created and may be in use already. Header flags are defined in the class SharedMemoryConstants.HeaderFlags. """ - MEM_MAP_INITIALIZED_FLAG_NUM_BYTES = 1 + CONTENT_LENGTH_NUM_BYTES = 8 """ The length of a long which is the length of the part of the header specifying content length in the memory map. """ - CONTENT_LENGTH_NUM_BYTES = 8 + CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + \ + CONTENT_LENGTH_NUM_BYTES """ The total length of the header """ - CONTENT_HEADER_TOTAL_BYTES = MEM_MAP_INITIALIZED_FLAG_NUM_BYTES + \ - CONTENT_LENGTH_NUM_BYTES + MIN_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB """ Minimum size (in number of bytes) an object must be in order for it to be transferred over shared memory. @@ -42,8 +43,8 @@ class HeaderFlags: Note: This needs to be consistent among the host and workers. e.g. in the host, it is defined in SharedMemoryConstants.cs """ - MIN_BYTES_FOR_SHARED_MEM_TRANSFER = 1024 * 1024 # 1 MB + MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 2 * 1024 * 1024 * 1024 # 2 GB """ Maximum size (in number of bytes) an object must be in order for it to be transferred over shared memory. @@ -53,8 +54,8 @@ class HeaderFlags: Note: This needs to be consistent among the host and workers. e.g. in the host, it is defined in SharedMemoryConstants.cs """ - MAX_BYTES_FOR_SHARED_MEM_TRANSFER = 2 * 1024 * 1024 * 1024 # 2 GB + SIZE_OF_CHAR_BYTES = 2 """ This is what the size of a character is in DotNet. Can be verified by doing "sizeof(char)". @@ -63,8 +64,8 @@ class HeaderFlags: by this constant. Corresponding logic in the host can be found in SharedMemoryManager.cs """ - SIZE_OF_CHAR_BYTES = 2 + UNIX_TEMP_DIRS = ["/dev/shm"] """ Default directories in Unix where the memory maps can be found. These list is in order of preference, starting with the highest preference @@ -72,9 +73,8 @@ class HeaderFlags: A user can override this by using the AppSetting: UNIX_SHARED_MEMORY_DIRECTORIES. """ - UNIX_TEMP_DIRS = ["/dev/shm"] + UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" """ Suffix for the temp directories containing memory maps in Unix """ - UNIX_TEMP_DIR_SUFFIX = "AzureFunctions" diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py index 3475f51f4..b4cba5444 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_manager.py @@ -31,6 +31,10 @@ def __init__(self): self._allocated_mem_maps: Dict[str, SharedMemoryMap] = {} self._file_accessor = FileAccessorFactory.create_file_accessor() + def __del__(self): + del self._file_accessor + del self._allocated_mem_maps + @property def allocated_mem_maps(self): """ diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py index 8bd0e977d..f1dedcb3e 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/shared_memory_map.py @@ -33,7 +33,7 @@ def __init__( self.mem_map_name = mem_map_name self.mem_map = mem_map - def put_bytes(self, content: bytes) -> int: + def put_bytes(self, content: bytes) -> Optional[int]: """ Writes the given content bytes into this SharedMemoryMap. The number of bytes written must be less than or equal to the size of diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index dc1877050..668e51211 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -331,12 +331,12 @@ def _setUpDarwin(self): size_in_mb = consts.MAX_BYTES_FOR_SHARED_MEM_TRANSFER / (1024 * 1024) size = 2048 * size_in_mb # The following command returns the name of the created disk - cmd = ['hdutil', 'attach', '-nomount', f'ram://{size}'] + cmd = ['hdiutil', 'attach', '-nomount', f'ram://{size}'] result = subprocess.run(cmd, stdout=subprocess.PIPE) if result.returncode != 0: raise IOError(f'Cannot create ram disk with command: {cmd} - ' f'{result.stdout} - {result.stderr}') - disk_name = result.stdout + disk_name = result.stdout.strip().decode() # We create a volume on the disk created above and mount it volume_name = 'shm' cmd = ['diskutil', 'eraseVolume', 'HFS+', volume_name, disk_name] @@ -350,9 +350,17 @@ def _setUpDarwin(self): # Override the AppSetting for the duration of this test so the # FileAccessorUnix can use these directories for creating memory maps os.environ.update( - {UNIX_SHARED_MEMORY_DIRECTORIES: self.created_directories}) + {UNIX_SHARED_MEMORY_DIRECTORIES: + ','.join(self.created_directories)}) def _tearDownDarwin(self): + volume_name = 'shm' + cmd = ['find', '/Volumes', '-type', 'd', '-name', volume_name, + '-print0', '|', 'xargs', '-0', 'umount'] + result = subprocess.run(cmd, stdout=subprocess.PIPE) + if result.returncode != 0: + raise IOError(f'Cannot delete volume with command: {cmd} - ' + f'{result.stdout} - {result.stderr}') self._deleteSharedMemoryDirectories(self.created_directories) From 00b36ed044c8cdd88c1e0873357f9c5dbbda9448 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Mar 2021 14:10:05 -0700 Subject: [PATCH 74/76] Fixes for running tests on macOS --- azure_functions_worker/testutils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 668e51211..73098772a 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -252,8 +252,6 @@ class SharedMemoryTestCase(unittest.TestCase): For tests involving shared memory data transfer usage. """ def setUp(self): - self.file_accessor = FileAccessorFactory.create_file_accessor() - self.was_shmem_env_true = is_envvar_true( FUNCTIONS_WORKER_SHARED_MEMORY_DATA_TRANSFER_ENABLED) os.environ.update( @@ -269,6 +267,7 @@ def setUp(self): self._setUpDarwin() elif os_name == 'Linux': self._setUpLinux() + self.file_accessor = FileAccessorFactory.create_file_accessor() def tearDown(self): os_name = platform.system() @@ -346,6 +345,7 @@ def _setUpDarwin(self): f'{result.stdout} - {result.stderr}') directory = f'/Volumes/{volume_name}' self.created_directories = [directory] + # Create directories in the volume for shared memory maps self._createSharedMemoryDirectories(self.created_directories) # Override the AppSetting for the duration of this test so the # FileAccessorUnix can use these directories for creating memory maps @@ -354,14 +354,16 @@ def _setUpDarwin(self): ','.join(self.created_directories)}) def _tearDownDarwin(self): + # Delete the directories containing shared memory maps + self._deleteSharedMemoryDirectories(self.created_directories) + # Unmount the volume used for shared memory maps volume_name = 'shm' - cmd = ['find', '/Volumes', '-type', 'd', '-name', volume_name, - '-print0', '|', 'xargs', '-0', 'umount'] + cmd = f"find /Volumes -type d -name '{volume_name}*' -print0 " \ + "| xargs -0 umount -f" result = subprocess.run(cmd, stdout=subprocess.PIPE) if result.returncode != 0: raise IOError(f'Cannot delete volume with command: {cmd} - ' f'{result.stdout} - {result.stderr}') - self._deleteSharedMemoryDirectories(self.created_directories) class _MockWebHostServicer(protos.FunctionRpcServicer): From ab7d48889dbbb26c5e4fefbe0d110122b5ec3c5c Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Mar 2021 14:21:30 -0700 Subject: [PATCH 75/76] Log which allowed directories for shared memory are being used --- .../shared_memory_data_transfer/file_accessor_unix.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py index c2e2ac38d..5d0315592 100644 --- a/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py +++ b/azure_functions_worker/bindings/shared_memory_data_transfer/file_accessor_unix.py @@ -92,12 +92,16 @@ def _get_allowed_mem_map_dirs(self) -> List[str]: If specified in AppSetting, that list will be used. Otherwise, the default value will be used. """ - allowed_mem_map_dirs_str = get_app_setting( - constants.UNIX_SHARED_MEMORY_DIRECTORIES) + setting = constants.UNIX_SHARED_MEMORY_DIRECTORIES + allowed_mem_map_dirs_str = get_app_setting(setting) if allowed_mem_map_dirs_str is None: allowed_mem_map_dirs = consts.UNIX_TEMP_DIRS + logger.info('Using allowed directories for shared memory: ' + f'{allowed_mem_map_dirs} from App Setting: {setting}') else: allowed_mem_map_dirs = allowed_mem_map_dirs_str.split(',') + logger.info('Using default allowed directories for shared memory: ' + f'{allowed_mem_map_dirs}') return allowed_mem_map_dirs def _get_valid_mem_map_dirs(self) -> List[str]: From 0e56751ba73d55cfc8999724f4d94959817f3a47 Mon Sep 17 00:00:00 2001 From: Gohar Irfan Chaudhry Date: Fri, 19 Mar 2021 14:25:30 -0700 Subject: [PATCH 76/76] Change assert -> raise --- azure_functions_worker/bindings/meta.py | 3 ++- azure_functions_worker/testutils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/azure_functions_worker/bindings/meta.py b/azure_functions_worker/bindings/meta.py index 332178f6e..cbf95fcb0 100644 --- a/azure_functions_worker/bindings/meta.py +++ b/azure_functions_worker/bindings/meta.py @@ -137,7 +137,8 @@ def to_outgoing_param_binding(binding: str, obj: typing.Any, *, else: # If not, send it as part of the response message over RPC rpc_val = datumdef.datum_as_proto(datum) - assert rpc_val is not None + if rpc_val is None: + raise TypeError('Cannot convert datum to rpc_val') return protos.ParameterBinding( name=out_name, data=rpc_val) diff --git a/azure_functions_worker/testutils.py b/azure_functions_worker/testutils.py index 47f25dd12..0cd9eae57 100644 --- a/azure_functions_worker/testutils.py +++ b/azure_functions_worker/testutils.py @@ -364,7 +364,7 @@ def _tearDownDarwin(self): volume_name = 'shm' cmd = f"find /Volumes -type d -name '{volume_name}*' -print0 " \ "| xargs -0 umount -f" - result = subprocess.run(cmd, stdout=subprocess.PIPE) + result = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True) if result.returncode != 0: raise IOError(f'Cannot delete volume with command: {cmd} - ' f'{result.stdout} - {result.stderr}')