Skip to content

Commit dd32f20

Browse files
authored
Alphabetic canonical order (#1685)
@hrueter, this is a proposal for the reimplementation #1680, for taking into account the WMA alphabetic ordering. Thanks for noticing this.
1 parent f34137c commit dd32f20

File tree

5 files changed

+89
-5
lines changed

5 files changed

+89
-5
lines changed

mathics/core/atoms/strings.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import sympy
1010

1111
from mathics.core.element import BoxElementMixin
12-
from mathics.core.keycomparable import BASIC_ATOM_STRING_ELT_ORDER
12+
from mathics.core.keycomparable import BASIC_ATOM_STRING_ELT_ORDER, wma_str_sort_key
1313
from mathics.core.symbols import Atom, Symbol, SymbolFalse, SymbolTrue, symbol_set
1414
from mathics.core.systemsymbols import SymbolFullForm, SymbolInputForm
1515

@@ -70,7 +70,7 @@ def element_order(self) -> tuple:
7070
"""
7171
return (
7272
BASIC_ATOM_STRING_ELT_ORDER,
73-
self.value,
73+
wma_str_sort_key(self.value),
7474
0,
7575
1,
7676
)

mathics/core/expression.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
GENERAL_EXPRESSION_ELT_ORDER,
4646
GENERAL_NUMERIC_EXPRESSION_ELT_ORDER,
4747
Monomial,
48+
wma_str_sort_key,
4849
)
4950
from mathics.core.structure import LinkedStructure
5051
from mathics.core.symbols import (
@@ -893,8 +894,9 @@ def element_order(self) -> tuple:
893894
3: tuple: list of Elements
894895
4: 1: No clue...
895896
"""
896-
exps: Dict[str, Union[float, complex]] = {}
897+
exps: Dict[Tuple[str, str], Union[float, complex]] = {}
897898
head = self._head
899+
898900
if head is SymbolTimes:
899901
for element in self.elements:
900902
name = element.get_name()
@@ -904,8 +906,10 @@ def element_order(self) -> tuple:
904906
assert isinstance(expr, (Expression, NumericOperators))
905907
exp = expr.round_to_float()
906908
if var and exp is not None:
909+
var = wma_str_sort_key(var)
907910
exps[var] = exps.get(var, 0) + exp
908911
elif name:
912+
name = wma_str_sort_key(name)
909913
exps[name] = exps.get(name, 0) + 1
910914
elif self.has_form("Power", 2):
911915
var = self.elements[0].get_name()
@@ -917,6 +921,7 @@ def element_order(self) -> tuple:
917921
except AttributeError:
918922
exp = None
919923
if var and exp is not None:
924+
var = wma_str_sort_key(var)
920925
exps[var] = exps.get(var, 0) + exp
921926
if exps:
922927
return (

mathics/core/keycomparable.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
44
"""
55

6+
from typing import Tuple
7+
68

79
class KeyComparable:
810
"""Mathics3/WL defines a "canonical ordering" between elements,
@@ -287,3 +289,38 @@ def __ne__(self, other) -> bool:
287289

288290
BASIC_EXPRESSION_ELT_ORDER = 0x22
289291
GENERAL_EXPRESSION_ELT_ORDER = 0x23
292+
293+
294+
def wma_str_sort_key(s: str) -> Tuple[str, str]:
295+
"""
296+
Return a Tuple providing the sort key
297+
reproduce the order of strings and symbols
298+
in WMA.
299+
For example, the following is a list of sorted
300+
strings in the WMA order:
301+
`{Abeja, ABEJA, ave de paso, Ave de paso, Ave de Paso, AVe}`
302+
The order criteria is: first sort case insensitive, then
303+
for the first different character in the original string,
304+
lower case comes before upper case.
305+
"""
306+
# An alternative to this implementation would be to map the
307+
# characters in a way that
308+
# a -> A
309+
# A -> B
310+
# b -> C
311+
# B -> D
312+
# ...
313+
# m -> Z
314+
# M -> a
315+
# n -> b
316+
# N -> c
317+
# ...
318+
# z -> y
319+
# Z -> z
320+
# so the result is again a string. Another possibility would be
321+
# to return a wrapper class that implement this special comparison
322+
# on the fly through the method `__lt__`.
323+
return (
324+
s.lower(),
325+
s.swapcase(),
326+
)

mathics/core/symbols.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
BASIC_EXPRESSION_ELT_ORDER,
2020
BASIC_NUMERIC_EXPRESSION_ELT_ORDER,
2121
Monomial,
22+
wma_str_sort_key,
2223
)
2324
from mathics.eval.tracing import trace_evaluate
2425

@@ -556,15 +557,17 @@ def element_order(self) -> tuple:
556557
Return a tuple value that is used in ordering elements
557558
of an expression. The tuple is ultimately compared lexicographically.
558559
"""
560+
name = self.name
561+
name_key = wma_str_sort_key(name)
559562
return (
560563
(
561564
BASIC_NUMERIC_EXPRESSION_ELT_ORDER
562565
if self.is_numeric()
563566
else BASIC_EXPRESSION_ELT_ORDER
564567
),
565-
Monomial({self.name: 1}),
568+
Monomial({name_key: 1}),
566569
0,
567-
self.name,
570+
name,
568571
1,
569572
)
570573

test/builtin/test_sort.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,49 @@
11
# -*- coding: utf-8 -*-
22

3+
from test.helper import check_evaluation
34

45
from mathics.core.expression import Expression
56
from mathics.core.symbols import Symbol, SymbolPlus, SymbolTimes
67

78

9+
def test_sort_wma():
10+
"""Test the alphabetic order in WMA for Strings and Symbols"""
11+
# In Python, str are ordered as tuples of
12+
# ascii codes of the characters. So,
13+
#
14+
# "Abeja" <"Ave"<"aVe"<"abeja"
15+
#
16+
# In WMA, strings and symbols are sorted in alphabetical order, with
17+
# lowercaps characters coming before than the corresponding upper case.
18+
# Then, the same words are sorted in WMA as
19+
#
20+
# "abeja"< "Abeja"<"aVe"<"Ave"
21+
#
22+
# Such order is equivalent to use
23+
# `lambda s: (s.lower(), s.swapcaps(),)` as sort key.
24+
#
25+
# Finally, String atoms comes before than Symbols. The following test
26+
# reinforce this order.
27+
str_expr = (
28+
'{"Ave", "aVe", "abeja", AVe, ave, aVe, "Abeja", "ABEJA", '
29+
'"AVe", "ave del paraíso", "Ave del paraíso", '
30+
'"Ave del Paraíso"} // Sort // InputForm'
31+
)
32+
str_expected = (
33+
'{"abeja", "Abeja", "ABEJA", "aVe", "Ave", "AVe", '
34+
'"ave del paraíso", "Ave del paraíso", "Ave del Paraíso", '
35+
"ave, aVe, AVe}//InputForm"
36+
)
37+
check_evaluation(
38+
str_expr,
39+
str_expected,
40+
# to_string_expr=True,
41+
# to_string_expected=True,
42+
# hold_expected=True,
43+
failure_message="WMA order",
44+
)
45+
46+
847
def test_Expression_sameQ():
948
"""
1049
Test Expression.SameQ

0 commit comments

Comments
 (0)