Skip to content

Commit e82e2d7

Browse files
authored
Merge 9fdfdf2 into 8726c8a
2 parents 8726c8a + 9fdfdf2 commit e82e2d7

File tree

2 files changed

+60
-29
lines changed

2 files changed

+60
-29
lines changed

source/NVDAObjects/IAccessible/adobeAcrobat.py

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -119,61 +119,91 @@ def _isEqual(self, other):
119119
return self.accID == other.accID
120120
return super(AcrobatNode, self)._isEqual(other)
121121

122-
def _getNodeMathMl(self, node):
123-
tag = node.GetTagName()
124-
yield "<%s" % tag
125-
# Output relevant attributes.
126-
if tag == "mfenced":
127-
for attr in "open", "close", "separators":
128-
val = node.GetAttribute(attr, "XML-1.00")
122+
def _getNodeMathMl(self, node) -> str:
123+
"""Traverse the MathML tree and return an XML string representing the math"""
124+
125+
def getMathMLAttributes(element, attrList: list) -> str:
126+
attrValues = ""
127+
for attr in attrList:
128+
val = element.GetAttribute(attr, "NSO")
129129
if val:
130-
yield ' %s="%s"' % (attr, val)
131-
yield ">"
130+
attrValues += f' {attr}="{val}"'
131+
return attrValues
132+
133+
tag = node.GetTagName()
134+
answer = f"<{tag}"
135+
# Output relevant attributes
136+
id = node.GetID()
137+
if id:
138+
answer += f' id="{id}"'
139+
answer += getMathMLAttributes(node, ["intent", "arg"])
140+
match tag:
141+
case "mi" | "mn" | "mo" | "mtext":
142+
answer += getMathMLAttributes(node, ["mathvariant"])
143+
case "mfenced":
144+
answer += getMathMLAttributes(node, ["open", "close", "separators"])
145+
case "menclose":
146+
answer += getMathMLAttributes(node, ["notation", "notationtype"])
147+
case "annotation-xml" | "annotation":
148+
answer += getMathMLAttributes(node, ["encoding"])
149+
case "ms":
150+
answer += getMathMLAttributes(node, ["open", "close"])
151+
case _:
152+
pass
153+
answer += ">"
132154
val = node.GetValue()
133155
if val:
134-
yield val
156+
answer += val
135157
else:
136158
for childNum in range(node.GetChildCount()):
137159
try:
138160
subNode = node.GetChild(childNum).QueryInterface(IPDDomElement)
139161
except COMError:
140162
continue
141163
for sub in self._getNodeMathMl(subNode):
142-
yield sub
143-
yield "</%s>" % tag
164+
answer += sub
165+
return answer + f"</{tag}>"
144166

145167
def _get_mathMl(self) -> str:
146168
"""Return the MathML associated with a Formula tag"""
169+
# There are two ways that MathML can be represented in a PDF:
170+
# 1. As a series of nested tags, each with a MathML element as the value.
171+
# 2. As a Formula tag with MathML as the value (comes from MathML in an Associated File)
147172
if self.pdDomNode is None:
148173
log.debugWarning("_get_mathMl: self.pdDomNode is None!")
149174
raise LookupError
175+
176+
# see if it is MathML tagging is used
177+
for childNum in range(self.pdDomNode.GetChildCount()):
178+
try:
179+
child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement)
180+
except COMError:
181+
log.debugWarning(f"COMError trying to get childNum={childNum}")
182+
continue
183+
if log.isEnabledFor(log.DEBUG):
184+
log.debug(f"\t(PDF) get_mathMl: tag={child.GetTagName()}")
185+
if child.GetTagName() == "math":
186+
answer = "".join(self._getNodeMathMl(child))
187+
log.debug(f"_get_mathMl (PDF): found tagged MathML = {answer}")
188+
return answer
189+
150190
mathMl = self.pdDomNode.GetValue()
151191
if log.isEnabledFor(log.DEBUG):
152192
log.debug(
153193
(
154-
f"_get_mathMl: math recognized: {mathMl.startswith('<math')}, "
194+
f"_get_mathMl (PDF): math recognized: {mathMl.startswith('<math')}, "
155195
f"child count={self.pdDomNode.GetChildCount()},"
156-
f"\n name='{self.pdDomNode.GetName()}', value='{mathMl}'"
196+
f"\n name='{self.pdDomNode.GetName()}', value found from AF ='{mathMl}'"
157197
),
158198
)
159199
# this test and the replacement doesn't work if someone uses a namespace tag (which they shouldn't, but..)
160200
if mathMl.startswith("<math"):
161201
return mathMl.replace('xmlns:mml="http://www.w3.org/1998/Math/MathML"', "")
162-
# Alternative for tagging: all the sub expressions are tagged -- gather up the MathML
163-
for childNum in range(self.pdDomNode.GetChildCount()):
164-
try:
165-
child = self.pdDomNode.GetChild(childNum).QueryInterface(IPDDomElement)
166-
except COMError:
167-
log.debugWarning(f"COMError trying to get childNum={childNum}")
168-
continue
169-
if log.isEnabledFor(log.DEBUG):
170-
log.debug(f"\tget_mathMl: tag={child.GetTagName()}")
171-
if child.GetTagName() == "math":
172-
return "".join(self._getNodeMathMl(child))
173-
# fall back to return the contents, which is hopefully alt text
174-
if log.isEnabledFor(log.DEBUG):
175-
log.debug("_get_mathMl: didn't find MathML -- returning value as mtext")
176-
return f"<math><mtext>{self.pdDomNode.GetValue()}</mtext></math>"
202+
203+
# not MathML -- fall back to return the contents, which is hopefully alt text, inside an <mtext>
204+
answer = f"<math><mtext>{mathMl}</mtext></math>"
205+
log.debug(f"_get_mathMl: didn't find MathML -- returning value as mtext: {answer}")
206+
return answer
177207

178208

179209
class RootNode(AcrobatNode):

user_docs/en/changes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,7 @@ There are many minor bug fixes for applications, such as Thunderbird, Adobe Read
598598

599599
### Bug Fixes
600600

601+
* Fixed math attributes being read in Adobe Reader, this resulted in poor or wrong speech and braille. (#17980)
601602
* Windows 11 fixes:
602603
* NVDA will once again announce hardware keyboard input suggestions. (#16283, @josephsl)
603604
* In Version 24H2 (2024 Update and Windows Server 2025), mouse and touch interaction can be used in quick settings. (#16348, @josephsl)

0 commit comments

Comments
 (0)