From da5d9289160a9323a5ac368403c888ab28a3d898 Mon Sep 17 00:00:00 2001 From: Wilko Nienhaus Date: Tue, 17 Jun 2025 09:01:53 +0300 Subject: [PATCH] Fix parsing of integer literals with base prefix MicroPython 1.25.0 introduced a breaking change, aligning the behaviour of the int() function with the behaviour of CPython (assume a decimal number, unless a base is specified. Only if a base of 0 is specified will the base be inferred from the string). This commit implements a new custom parsing function `parse_int`. It can correctly parse the following string literals: * 0x[0-9]+ -> treated as hex * 0b[0-9]+ -> treated as binary * 0o[0-9]+ -> treated as octal (Python style) * 0[0-9]+ -> treated as octal (GNU as style) * anything else parsed as decimal It only handles the GNU as style octal case directly, letting the original `int()` function handle the other cases (using base 0). In fact, the GNU as octal case was not handled correctly previously, and this commit fixes that. Some new tests for previous functionality were added to show that both new and previous cases are being handled correctly. Note: GNU as does not actually accept the octal prefix 0o..., but we accept it as a convenience, as this is accepted in Python code. This means however, that our assembler accepts code which GNU as does not accept. But the other way around, we still accept all code that GNU as accepts, which was one of our goals. --- esp32_ulp/assemble.py | 6 +++--- esp32_ulp/opcodes.py | 11 +++++++--- esp32_ulp/opcodes_s2.py | 11 +++++++--- esp32_ulp/util.py | 12 +++++++++++ tests/opcodes.py | 15 +++++++++++--- tests/opcodes_s2.py | 15 +++++++++++--- tests/util.py | 45 ++++++++++++++++++++++++++++++++++++++++- 7 files changed, 99 insertions(+), 16 deletions(-) diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 8b79071..33cec42 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -219,13 +219,13 @@ def fill(self, section, amount, fill_byte): raise ValueError('fill in bss section not allowed') if section is TEXT: # TODO: text section should be filled with NOPs raise ValueError('fill/skip/align in text section not supported') - fill = int(fill_byte or 0).to_bytes(1, 'little') * amount + fill = int(self.opcodes.eval_arg(str(fill_byte or 0))).to_bytes(1, 'little') * amount self.offsets[section] += len(fill) if section is not BSS: self.sections[section].append(fill) def d_skip(self, amount, fill=None): - amount = int(amount) + amount = int(self.opcodes.eval_arg(amount)) self.fill(self.section, amount, fill) d_space = d_skip @@ -246,7 +246,7 @@ def d_global(self, symbol): self.symbols.set_global(symbol) def append_data(self, wordlen, args): - data = [int(arg).to_bytes(wordlen, 'little') for arg in args] + data = [int(self.opcodes.eval_arg(arg)).to_bytes(wordlen, 'little') for arg in args] self.append_section(b''.join(data)) def d_byte(self, *args): diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 10fc3d1..03849a3 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -13,7 +13,7 @@ from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN from .soc import * -from .util import split_tokens, validate_expression +from .util import split_tokens, validate_expression, parse_int # XXX dirty hack: use a global for the symbol table symbols = None @@ -285,7 +285,12 @@ def eval_arg(arg): _, _, sym_value = symbols.get_sym(token) parts.append(str(sym_value)) else: - parts.append(token) + try: + # attempt to parse, to convert numbers with base prefix correctly + int_token = parse_int(token) + parts.append(str(int_token)) + except ValueError: + parts.append(token) parts = "".join(parts) if not validate_expression(parts): raise ValueError('Unsupported expression: %s' % parts) @@ -311,7 +316,7 @@ def arg_qualify(arg): if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']: return ARG(COND, arg_lower, arg) try: - return ARG(IMM, int(arg), arg) + return ARG(IMM, parse_int(arg), arg) except ValueError: pass try: diff --git a/esp32_ulp/opcodes_s2.py b/esp32_ulp/opcodes_s2.py index 91549af..3a9d643 100644 --- a/esp32_ulp/opcodes_s2.py +++ b/esp32_ulp/opcodes_s2.py @@ -12,7 +12,7 @@ from ucollections import namedtuple from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN -from .util import split_tokens, validate_expression +from .util import split_tokens, validate_expression, parse_int # XXX dirty hack: use a global for the symbol table symbols = None @@ -301,7 +301,12 @@ def eval_arg(arg): _, _, sym_value = symbols.get_sym(token) parts.append(str(sym_value)) else: - parts.append(token) + try: + # attempt to parse, to convert numbers with base prefix correctly + int_token = parse_int(token) + parts.append(str(int_token)) + except ValueError: + parts.append(token) parts = "".join(parts) if not validate_expression(parts): raise ValueError('Unsupported expression: %s' % parts) @@ -327,7 +332,7 @@ def arg_qualify(arg): if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']: return ARG(COND, arg_lower, arg) try: - return ARG(IMM, int(arg), arg) + return ARG(IMM, parse_int(arg), arg) except ValueError: pass try: diff --git a/esp32_ulp/util.py b/esp32_ulp/util.py index 78e7c85..5f1628d 100644 --- a/esp32_ulp/util.py +++ b/esp32_ulp/util.py @@ -77,6 +77,18 @@ def validate_expression(param): return True +def parse_int(literal): + """ + GNU as compatible parsing of string literals into integers + Specifically, GNU as treats literals starting with 0 as octal + All other literals are correctly parsed by Python + See: https://sourceware.org/binutils/docs/as/Integers.html + """ + if len(literal) >= 2 and (literal.startswith("0") or literal.startswith("-0")) and literal.lstrip("-0").isdigit(): + return int(literal, 8) + return int(literal, 0) + + def file_exists(filename): try: os.stat(filename) diff --git a/tests/opcodes.py b/tests/opcodes.py index 3dc7453..f8109b5 100644 --- a/tests/opcodes.py +++ b/tests/opcodes.py @@ -7,7 +7,7 @@ from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN from esp32_ulp.opcodes import make_ins, make_ins_struct_def -from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND +from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT import esp32_ulp.opcodes as opcodes @@ -46,6 +46,7 @@ def test_arg_qualify(): assert arg_qualify('-1') == ARG(IMM, -1, '-1') assert arg_qualify('1') == ARG(IMM, 1, '1') assert arg_qualify('0x20') == ARG(IMM, 32, '0x20') + assert arg_qualify('0100') == ARG(IMM, 64, '0100') assert arg_qualify('0o100') == ARG(IMM, 64, '0o100') assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000') assert arg_qualify('eq') == ARG(COND, 'eq', 'eq') @@ -96,6 +97,11 @@ def test_eval_arg(): assert eval_arg('const >> 1') == 21 assert eval_arg('(const|4)&0xf') == 0xe + assert eval_arg('0x7') == 7 + assert eval_arg('010') == 8 + assert eval_arg('-0x7') == -7 # negative + assert eval_arg('~0x7') == -8 # complement + assert_raises(ValueError, eval_arg, 'evil()') assert_raises(ValueError, eval_arg, 'def cafe()') assert_raises(ValueError, eval_arg, '1 ^ 2') @@ -105,14 +111,17 @@ def test_eval_arg(): opcodes.symbols = None -def assert_raises(exception, func, *args): +def assert_raises(exception, func, *args, message=None): try: func(*args) - except exception: + except exception as e: raised = True + actual_message = e.args[0] else: raised = False assert raised + if message: + assert actual_message == message, '%s == %s' % (actual_message, message) def test_reg_direct_ulp_addressing(): diff --git a/tests/opcodes_s2.py b/tests/opcodes_s2.py index 4525049..b9d74d3 100644 --- a/tests/opcodes_s2.py +++ b/tests/opcodes_s2.py @@ -7,7 +7,7 @@ from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN from esp32_ulp.opcodes_s2 import make_ins, make_ins_struct_def -from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND +from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT import esp32_ulp.opcodes_s2 as opcodes @@ -46,6 +46,7 @@ def test_arg_qualify(): assert arg_qualify('-1') == ARG(IMM, -1, '-1') assert arg_qualify('1') == ARG(IMM, 1, '1') assert arg_qualify('0x20') == ARG(IMM, 32, '0x20') + assert arg_qualify('0100') == ARG(IMM, 64, '0100') assert arg_qualify('0o100') == ARG(IMM, 64, '0o100') assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000') assert arg_qualify('eq') == ARG(COND, 'eq', 'eq') @@ -96,6 +97,11 @@ def test_eval_arg(): assert eval_arg('const >> 1') == 21 assert eval_arg('(const|4)&0xf') == 0xe + assert eval_arg('0x7') == 7 + assert eval_arg('010') == 8 + assert eval_arg('-0x7') == -7 # negative + assert eval_arg('~0x7') == -8 # complement + assert_raises(ValueError, eval_arg, 'evil()') assert_raises(ValueError, eval_arg, 'def cafe()') assert_raises(ValueError, eval_arg, '1 ^ 2') @@ -105,14 +111,17 @@ def test_eval_arg(): opcodes.symbols = None -def assert_raises(exception, func, *args): +def assert_raises(exception, func, *args, message=None): try: func(*args) - except exception: + except exception as e: raised = True + actual_message = e.args[0] else: raised = False assert raised + if message: + assert actual_message == message, '%s == %s' % (actual_message, message) def test_reg_direct_ulp_addressing(): diff --git a/tests/util.py b/tests/util.py index 6aadc8b..5f487ae 100644 --- a/tests/util.py +++ b/tests/util.py @@ -6,7 +6,7 @@ # SPDX-License-Identifier: MIT import os -from esp32_ulp.util import split_tokens, validate_expression, file_exists +from esp32_ulp.util import split_tokens, validate_expression, parse_int, file_exists tests = [] @@ -18,6 +18,19 @@ def test(param): tests.append(param) +def assert_raises(exception, func, *args, message=None): + try: + func(*args) + except exception as e: + raised = True + actual_message = e.args[0] + else: + raised = False + assert raised + if message: + assert actual_message == message, '%s == %s' % (actual_message, message) + + @test def test_split_tokens(): assert split_tokens("") == [] @@ -69,6 +82,36 @@ def test_validate_expression(): assert validate_expression('def CAFE()') is False +@test +def test_parse_int(): + # decimal + assert parse_int("0") == 0, "0 == 0" + assert parse_int("5") == 5, "5 == 5" + assert parse_int("-0") == 0, "-0 == 0" + assert parse_int("-5") == -5, "-5 == -5" + # hex + assert parse_int("0x5") == 5, "0x5 == 5" + assert parse_int("0x5a") == 90, "0x5a == 90" + assert parse_int("-0x5a") == -90, "-0x5a == -90" + # binary + assert parse_int("0b1001") == 9, "0b1001 == 9" + assert parse_int("-0b1001") == -9, "-0b1001 == 9" + # octal + assert parse_int("07") == 7, "07 == 7" + assert parse_int("0100") == 64, "0100 == 64" + assert parse_int("0o210") == 136, "0o210 == 136" + assert parse_int("00000010") == 8, "00000010 == 8" + assert parse_int("-07") == -7, "-07 == -7" + assert parse_int("-0100") == -64, "-0100 == -64" + assert parse_int("-0o210") == -136, "-0o210 == -136" + assert parse_int("-00000010") == -8, "-00000010 == -8" + # negative cases + assert_raises(ValueError, parse_int, '0b123', message="invalid syntax for integer with base 2: '123'") + assert_raises(ValueError, parse_int, '0900', message="invalid syntax for integer with base 8: '0900'") + assert_raises(ValueError, parse_int, '0o900', message="invalid syntax for integer with base 8: '900'") + assert_raises(ValueError, parse_int, '0xg', message="invalid syntax for integer with base 16: 'g'") + + @test def test_file_exists(): testfile = '.testfile'