# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2021-2024, Science and Technology Facilities Council
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Author: J. Henrichs, Bureau of Meteorology
# Modified: S. Siso, STFC Daresbury Lab
'''PSyIR backend to create expressions that are handled by SymPy.
'''
import keyword
from sympy import Function, Symbol
from sympy.parsing.sympy_parser import parse_expr
from psyclone.psyir.backend.fortran import FortranWriter
from psyclone.psyir.backend.visitor import VisitorError
from psyclone.psyir.frontend.sympy_reader import SymPyReader
from psyclone.psyir.nodes import DataNode, Range, Reference, IntrinsicCall
from psyclone.psyir.symbols import (ArrayType, ScalarType, SymbolTable)
[docs]class SymPyWriter(FortranWriter):
'''Implements a PSyIR-to-SymPy writer, which is used to create a
representation of the PSyIR tree that can be understood by SymPy. Most
Fortran expressions work as expected without modification. This class
implements special handling for constants (which can have a precision
attached, e.g. 2_4) and some intrinsic functions (e.g. ``MAX``, which SymPy
expects to be ``Max``). Array accesses are converted into functions (while
SymPy supports indexed expression, they cannot be used as expected when
solving, SymPy does not solve component-wise - ``M[x]-M[1]`` would not
result in ``x=1``, while it does for SymPy unknown functions).
Array expressions are supported by the writer: it will convert any array
expression like ``a(i:j:k)`` by using three arguments: ``a(i, j, k)``.
Then simple array accesses like ``b(i,j)`` are converted to
``b(i,i,1,j,j,1)``. Similarly, if ``a`` is known to be an array, then the
writer will use ``a(sympy_lower,sympy_upper,1)``. This makes sure all SymPy
unknown functions that represent an array use the same number of
arguments.
The simple use case of converting a (list of) PSyIR expressions to SymPy
expressions is as follows::
symp_expr_list = SymPyWriter(exp1, exp2, ...)
If additional functionality is required (access to the type map or
to convert a potentially modified SymPy expression back to PSyIR), an
instance of SymPy writer must be created::
writer = SymPyWriter()
symp_expr_list = writer([exp1, exp2, ...])
It additionally supports accesses to structure types. A full description
can be found in the manual:
https://psyclone-dev.readthedocs.io/en/latest/sympy.html#sympy
'''
# This option will disable the lowering of abstract nodes into language
# level nodes, and as a consequence the backend does not need to deep-copy
# the tree and is much faster to execute.
# Be careful not to modify anything from the input tree when this option
# is set to True as the modifications will persist after the Writer!
_DISABLE_LOWERING = True
# A list of all reserved Python keywords (Fortran variables that are the
# same as a reserved name must be renamed, otherwise parsing will fail).
# This class attribute will get initialised in __init__:
_RESERVED_NAMES = set()
def __init__(self):
super().__init__()
# The symbol table is used to create unique names for structure
# members that are being accessed (these need to be defined as
# SymPy functions or symbols, which could clash with other
# references in the expression).
self._symbol_table = None
# The writer will use special names in array expressions to indicate
# the lower and upper bound (e.g. ``a(::)`` becomes
# ``a(sympy_lower, sympy_upper, 1)``). The symbol table will be used
# to resolve a potential name clash with a user variable.
self._lower_bound = "sympy_lower"
self._upper_bound = "sympy_upper"
if not SymPyWriter._RESERVED_NAMES:
# Get the list of all reserved Python words from the
# keyword module:
for reserved in keyword.kwlist:
# Some Python keywords are capitalised (True, False, None).
# Since all symbols in PSyclone are lowercase, these can
# never clash when using SymPy. So only take keywords that
# are in lowercase:
if reserved.lower() == reserved:
SymPyWriter._RESERVED_NAMES.add(reserved)
# This dictionary will be supplied when parsing a string by SymPy
# and defines which symbols in the parsed expressions are scalars
# (SymPy symbols) or arrays (SymPy functions).
self._sympy_type_map = {}
# The set of intrinsic Fortran operations that need a rename or
# are case sensitive in SymPy:
self._intrinsic = set()
self._intrinsic_to_str = {}
# Create the mapping of intrinsics to the name SymPy expects.
for intr, intr_str in [(IntrinsicCall.Intrinsic.MAX, "Max"),
(IntrinsicCall.Intrinsic.MIN, "Min"),
(IntrinsicCall.Intrinsic.FLOOR, "floor"),
(IntrinsicCall.Intrinsic.TRANSPOSE,
"transpose"),
(IntrinsicCall.Intrinsic.MOD, "Mod"),
# exp is needed for a test case only, in
# general the maths functions can just be
# handled as unknown sympy functions.
(IntrinsicCall.Intrinsic.EXP, "exp"),
]:
self._intrinsic.add(intr_str)
self._intrinsic_to_str[intr] = intr_str
# -------------------------------------------------------------------------
[docs] def __new__(cls, *expressions):
'''This function allows the SymPy writer to be used in two
different ways: if only the SymPy expression of the PSyIR expressions
are required, it can be called as::
sympy_expressions = SymPyWriter(exp1, exp2, ...)
But if additional information is needed (e.g. the SymPy type map, or
to convert a SymPy expression back to PSyIR), an instance of the
SymPyWriter must be kept, e.g.::
writer = SymPyWriter()
sympy_expressions = writer([exp1, exp2, ...])
writer.type_map
:param expressions: a (potentially empty) tuple of PSyIR nodes
to be converted to SymPy expressions.
:type expressions: Tuple[:py:class:`psyclone.psyir.nodes.Node`]
:returns: either an instance of SymPyWriter, if no parameter is
specified, or a list of SymPy expressions.
:rtype: Union[:py:class:`psyclone.psyir.backend.SymPyWriter`,
List[:py:class:`sympy.core.basic.Basic`]]
'''
if expressions:
# If we have parameters, create an instance of the writer
# and use it to convert the expressions:
writer = SymPyWriter()
return writer(expressions)
# No parameter, just create an instance and return it:
return super().__new__(cls)
# -------------------------------------------------------------------------
def __getitem__(self, _):
'''This function is only here to trick pylint into thinking that
the object returned from ``__new__`` is subscriptable, meaning that
code like:
``out = SymPyWriter(exp1, exp2); out[1]`` does not trigger
a pylint warning about unsubscriptable-object.
'''
raise NotImplementedError("__getitem__ for a SymPyWriter should "
"never be called.")
# -------------------------------------------------------------------------
def _create_sympy_array_function(self, name, sig=None, num_dims=None):
'''Creates a Function class with the given name to be used for SymPy
parsing. This Function overwrites the conversion to string, and will
replace the triplicated array indices back to the normal Fortran
syntax. If the signature Sig and number of dimensions for each
component of the signature are given, it will add this information
to the object, so that the SymPyReader can recreate the proper
access to a user-defined type.
:param str name: name of the function class to create.
:param sig: the signature of the variable, which is required
to convert user defined types back properly. Only defined for
user-defined types.
:type sig: Optional[:py:class:`psyclone.core.Signature`]
:param num_dims: the number of dimensions for each component of a
user defined type.
:type num_dims: Optional[List[int]]
:returns: a SymPy function, which has a special ``_sympystr`` function
defined as attribute to print user-defined types..
:rtype: :py:class:`sympy.Function`
'''
# Now a new Fortran array is used. Create a new function
# instance, and overwrite how this function is converted back
# into a string by defining the ``_sympystr`` attribute,
# which points to a function that controls how this object
# is converted into a string. Use the ``print_fortran_array``
# function from the SymPyReader for this. Note that we cannot
# create a derived class based on ``Function`` and define
# this function there: SymPy tests internally if the type is a
# Function (not if it is an instance), therefore, SymPy's
# behaviour would change if we used a derived class:
# https://docs.sympy.org/latest/modules/functions/index.html:
# "It [Function class] also serves as a constructor for undefined
# function classes."
new_func = Function(name)
# pylint: disable=protected-access
new_func._sympystr = SymPyReader.print_fortran_array
# Store the signature and the number of dimensions of each
# component, so that SymPyReader.print_fortran_array can match
# the indices back to the user defined types.
new_func._sig = sig
new_func._num_dims = num_dims
# pylint: enable=protected-access
return new_func
# -------------------------------------------------------------------------
def _create_type_map(self, list_of_expressions):
'''This function creates a dictionary mapping each Reference in any
of the expressions to either a SymPy Function (if the reference
is an array reference) or a Symbol (if the reference is not an
array reference). It defines a new SymPy function for each array,
which has a special write method implemented that automatically
converts array indices back by combining each three arguments into
one expression (i. e. ``a(1,9,2)`` would become ``a(1:9:2)``).
A new symbol table is created any time this function is called, so
it is important to provide all expressions at once for the symbol
table to avoid name clashes in any expression.
This function also handles reserved names like 'lambda' which might
occur in one of the Fortran expressions. These reserved names must be
renamed (otherwise SymPy parsing, which uses ``eval`` internally,
fails). A symbol table is used which is initially filled with all
reserved names. Then for each reference accounted, a unique name is
generated using this symbol table - so if the reference is a reserved
name, a new name will be created (e.g. ``lambda`` might become
``lambda_1``). The SymPyWriter (e.g. in ``reference_node``) will
use the renamed value when creating the string representation.
:param list_of_expressions: the list of expressions from which all
references are taken and added to a symbol table to avoid
renaming any symbols (so that only member names will be renamed).
:type list_of_expressions: List[:py:class:`psyclone.psyir.nodes.Node`]
'''
# Create a new symbol table, so previous symbol will not affect this
# new conversion (i.e. this avoids name clashes with a previous
# conversion). First add all reserved names so that these names will
# automatically be renamed. The symbol table is used later to also
# create guaranteed unique names for lower and upper bounds.
self._symbol_table = SymbolTable()
for reserved in SymPyWriter._RESERVED_NAMES:
self._symbol_table.new_symbol(reserved)
# Find each reference in each of the expression, and declare this name
# as either a SymPy Symbol (scalar reference), or a SymPy Function
# (an array).
for expr in list_of_expressions:
for ref in expr.walk(Reference):
name = ref.name
# The reserved Python keywords do not have tags, so they
# will not be found.
if name in self._symbol_table.tags_dict:
continue
# Any symbol from the list of expressions to be handled
# will be created with a tag, so if the same symbol is
# used more than once, the previous test will prevent
# calling new_symbol again. If the name is a Python
# reserved symbol, a new unique name will be created by
# the symbol table.
unique_sym = self._symbol_table.new_symbol(name, tag=name)
# Test if an array or an array expression is used:
if not ref.is_array:
self._sympy_type_map[unique_sym.name] = Symbol(name)
continue
# A Fortran array is used which has not been seen before.
# Declare a new SymPy function for it. This SymPy function
# will convert array expressions back into the original
# Fortran code.
self._sympy_type_map[unique_sym.name] = \
self._create_sympy_array_function(name)
# Now all symbols have been added to the symbol table, create
# unique names for the lower- and upper-bounds using special tags:
self._lower_bound = \
self._symbol_table.new_symbol("sympy_lower",
tag="sympy!lower_bound").name
self._upper_bound = \
self._symbol_table.new_symbol("sympy_upper",
tag="sympy!upper_bound").name
# -------------------------------------------------------------------------
@property
def lower_bound(self):
''':returns: the name to be used for an unspecified lower bound.
:rtype: str
'''
return self._lower_bound
# -------------------------------------------------------------------------
@property
def upper_bound(self):
''':returns: the name to be used for an unspecified upper bound.
:rtype: str
'''
return self._upper_bound
# -------------------------------------------------------------------------
@property
def type_map(self):
''':returns: the mapping of names to SymPy symbols or functions.
:rtype: Dict[str, Union[:py:class:`sympy.core.symbol.Symbol`,
:py:class:`sympy.core.function.Function`]]
'''
return self._sympy_type_map
# -------------------------------------------------------------------------
def _to_str(self, list_of_expressions):
'''Converts PSyIR expressions to strings. It will replace Fortran-
specific expressions with code that can be parsed by SymPy. The
argument can either be a single element (in which case a single string
is returned) or a list/tuple, in which case a list is returned.
:param list_of_expressions: the list of expressions which are to be
converted into SymPy-parsable strings.
:type list_of_expressions: Union[:py:class:`psyclone.psyir.nodes.Node`,
List[:py:class:`psyclone.psyir.nodes.Node`]]
:returns: the converted strings(s).
:rtype: Union[str, List[str]]
'''
is_list = isinstance(list_of_expressions, (tuple, list))
if not is_list:
list_of_expressions = [list_of_expressions]
# Create the type map in `self._sympy_type_map`, which is required
# when converting these strings to SymPy expressions
self._create_type_map(list_of_expressions)
expression_str_list = []
for expr in list_of_expressions:
expression_str_list.append(super().__call__(expr))
# If the argument was a single expression, only return a single
# expression, otherwise return a list
if not is_list:
return expression_str_list[0]
return expression_str_list
# -------------------------------------------------------------------------
def __call__(self, list_of_expressions):
'''
This function takes a list of PSyIR expressions, and converts
them all into Sympy expressions using the SymPy parser.
It takes care of all Fortran specific conversion required (e.g.
constants with kind specification, ...), including the renaming of
member accesses, as described in
https://psyclone-dev.readthedocs.io/en/latest/sympy.html#sympy
:param list_of_expressions: the list of expressions which are to be
converted into SymPy-parsable strings.
:type list_of_expressions: list of
:py:class:`psyclone.psyir.nodes.Node`
:returns: a 2-tuple consisting of the the converted PSyIR
expressions, followed by a dictionary mapping the symbol names
to SymPy Symbols.
:rtype: Union[:py:class:`sympy.core.basic.Basic`,
List[:py:class:`sympy.core.basic.Basic`]]
:raises VisitorError: if an invalid SymPy expression is found.
'''
is_list = isinstance(list_of_expressions, (tuple, list))
if not is_list:
list_of_expressions = [list_of_expressions]
expression_str_list = self._to_str(list_of_expressions)
result = []
for expr in expression_str_list:
try:
result.append(parse_expr(expr, self.type_map))
except SyntaxError as err:
raise VisitorError(f"Invalid SymPy expression: '{expr}'.") \
from err
if is_list:
return result
# We had no list initially, so only convert the one and only
# list member
return result[0]
# -------------------------------------------------------------------------
[docs] def arrayreference_node(self, node):
'''The implementation of the method handling a
ArrayOfStructureReference is generic enough to also handle
non-structure arrays. So just use it.
:param node: a ArrayReference PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.ArrayReference`
:returns: the code as string.
:rtype: str
'''
return self.arrayofstructuresreference_node(node)
# -------------------------------------------------------------------------
[docs] def structurereference_node(self, node):
'''The implementation of the method handling a
ArrayOfStructureReference is generic enough to also handle non-arrays.
So just use it.
:param node: a StructureReference PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.StructureReference`
:returns: the code as string.
:rtype: str
'''
return self.arrayofstructuresreference_node(node)
# -------------------------------------------------------------------------
[docs] def arrayofstructuresreference_node(self, node):
'''This handles ArrayOfStructureReferences (and also simple
StructureReferences). An access like ``a(i)%b(j)`` is converted to
the string ``a_b(i,i,1,j,j,1)`` (also handling name clashes in case
that the user code already contains a symbol ``a_b``). The SymPy
function created for this new symbol will store the original signature
and the number of indices for each member (so in the example above
that would be ``Signature("a%b")`` and ``(1,1)``. This information
is sufficient to convert the SymPy symbol back to the correct Fortran
representation
:param node: a StructureReference PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.StructureReference`
:returns: the code as string.
:rtype: str
'''
sig, indices = node.get_signature_and_indices()
out = []
num_dims = []
all_dims = []
is_array = False
for i, name in enumerate(sig):
num_dims.append(len(indices[i]))
for index in indices[i]:
all_dims.append(index)
is_array = True
out.append(name)
flat_name = "_".join(out)
# Find (or create) a unique variable name:
try:
unique_name = self._symbol_table.lookup_with_tag(str(sig)).name
except KeyError:
unique_name = self._symbol_table.new_symbol(flat_name,
tag=str(sig)).name
if is_array:
indices_str = self.gen_indices(all_dims)
# Create the corresponding SymPy function, which will store
# the signature and num_dims, so that the correct Fortran
# representation can be recreated later.
self._sympy_type_map[unique_name] = \
self._create_sympy_array_function(unique_name, sig, num_dims)
return f"{unique_name}({','.join(indices_str)})"
# Not an array access. We use the unique name for the string,
# but the required symbol is mapped to the original name, which means
# if the SymPy expression is converted to a string (in order to be
# parsed), it will use the original structure reference syntax:
self._sympy_type_map[unique_name] = Symbol(sig.to_language())
return unique_name
# -------------------------------------------------------------------------
[docs] def literal_node(self, node):
'''This method is called when a Literal instance is found in the PSyIR
tree. For SymPy we need to handle booleans (which are expected to
be capitalised: True). Real values work by just ignoring any precision
information (e.g. 2_4, 3.1_wp). Character constants are not supported
and will raise an exception.
:param node: a Literal PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.Literal`
:returns: the SymPy representation for the literal.
:rtype: str
:raises TypeError: if a character constant is found, which
is not supported with SymPy.
'''
if node.datatype.intrinsic == ScalarType.Intrinsic.BOOLEAN:
# Booleans need to be converted to SymPy format
return node.value.capitalize()
if node.datatype.intrinsic == ScalarType.Intrinsic.CHARACTER:
raise TypeError(f"SymPy cannot handle strings "
f"like '{node.value}'.")
# All real (single, double precision) and integer work by just
# using the node value. Single and double precision both use
# 'e' as specification, which SymPy accepts, and precision
# information can be ignored.
return node.value
[docs] def intrinsiccall_node(self, node):
''' This method is called when an IntrinsicCall instance is found in
the PSyIR tree. The Sympy backend will use the exact sympy name for
some math intrinsics (listed in _intrinsic_to_str) and will remove
named arguments.
:param node: an IntrinsicCall PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.IntrinsicCall`
:returns: the SymPy representation for the Intrinsic.
:rtype: str
'''
# Sympy does not support argument names, remove them for now
if any(node.argument_names):
# TODO #2302: This is not totally right without canonical intrinsic
# positions for arguments. One alternative is to refuse it with:
# raise VisitorError(
# f"Named arguments are not supported by SymPy but found: "
# f"'{node.debug_string()}'.")
# but this leaves sympy comparisons almost always giving false when
# out of order arguments are rare, so instead we ignore it for now.
# It makes a copy (of the parent because if matters to the call
# visitor) because we don't want to delete the original arg names
parent = node.parent.copy()
node = parent.children[node.position]
for idx in range(len(node.argument_names)):
# pylint: disable=protected-access
node._argument_names[idx] = (node._argument_names[idx][0],
None)
try:
name = self._intrinsic_to_str[node.intrinsic]
args = self._gen_arguments(node)
return f"{self._nindent}{name}({args})"
except KeyError:
return super().call_node(node)
# -------------------------------------------------------------------------
[docs] def reference_node(self, node):
'''This method is called when a Reference instance is found in the
PSyIR tree. It handles the case that this normal reference might
be an array expression, which in the SymPy writer needs to have
indices added explicitly: it basically converts the array expression
``a`` to ``a(sympy_lower, sympy_upper, 1)``.
:param node: a Reference PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.Reference`
:returns: the text representation of this reference.
:rtype: str
'''
# Support renaming a symbol (e.g. if it is a reserved Python name).
# Look up with the name as tag, which will return the symbol with
# a unique name (e.g. lambda --> lambda_1):
name = self._symbol_table.lookup_with_tag(node.name).name
if not node.is_array:
# This reference is not an array, just return the name
return name
# Now this must be an array expression without parenthesis. Add
# the triple-array indices to represent `lower:upper:1` for each
# dimension:
shape = node.symbol.shape
result = [f"{self.lower_bound},"
f"{self.upper_bound},1"]*len(shape)
return (f"{name}{self.array_parenthesis[0]}"
f"{','.join(result)}{self.array_parenthesis[1]}")
# ------------------------------------------------------------------------
[docs] def gen_indices(self, indices, var_name=None):
'''Given a list of PSyIR nodes representing the dimensions of an
array, return a list of strings representing those array dimensions.
This is used both for array references and array declarations. Note
that 'indices' can also be a shape in case of Fortran. The
implementation here overwrites the one in the base class to convert
each array index into three parameters to support array expressions.
:param indices: list of PSyIR nodes.
:type indices: List[:py:class:`psyclone.psyir.symbols.Node`]
:param str var_name: name of the variable for which the dimensions
are created. Not used in this implementation.
:returns: the Fortran representation of the dimensions.
:rtype: List[str]
:raises NotImplementedError: if the format of the dimension is not
supported.
'''
dims = []
for index in indices:
if isinstance(index, DataNode):
# literal constant, symbol reference, or computed
# dimension
expression = self._visit(index)
dims.extend([expression, expression, "1"])
elif isinstance(index, Range):
# literal constant, symbol reference, or computed
# dimension
expression = self._visit(index)
dims.append(expression)
elif isinstance(index, ArrayType.ArrayBounds):
# Lower and upper bounds of an array declaration specified
# by literal constant, symbol reference, or computed dimension
lower_expression = self._visit(index.lower)
upper_expression = self._visit(index.upper)
dims.extend([lower_expression, upper_expression, "1"])
elif isinstance(index, ArrayType.Extent):
# unknown extent
dims.extend([self.lower_bound, self.upper_bound, "1"])
else:
raise NotImplementedError(
f"unsupported gen_indices index '{index}'")
return dims
# -------------------------------------------------------------------------
[docs] def range_node(self, node):
'''This method is called when a Range instance is found in the PSyIR
tree. This implementation converts a range into three parameters
for the corresponding SymPy function.
:param node: a Range PSyIR node.
:type node: :py:class:`psyclone.psyir.nodes.Range`
:returns: the Fortran code as a string.
:rtype: str
'''
if node.parent and node.parent.is_lower_bound(
node.parent.indices.index(node)):
# The range starts for the first element in this
# dimension, so use the generic name for lower bound:
start = self.lower_bound
else:
start = self._visit(node.start)
if node.parent and node.parent.is_upper_bound(
node.parent.indices.index(node)):
# The range ends with the last element in this
# dimension, so use the generic name for the upper bound:
stop = self.upper_bound
else:
stop = self._visit(node.stop)
result = f"{start},{stop}"
step = self._visit(node.step)
result += f",{step}"
return result