Source code for psyclone.parse.module_info

# -----------------------------------------------------------------------------
# BSD 3-Clause License
#
# Copyright (c) 2023-2024, Science and Technology Facilities Council.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
# Author J. Henrichs, Bureau of Meteorology

'''This module contains the ModuleInfo class, which is used to store
and cache information about a module: the filename, source code (if requested)
and the fparser tree (if requested), and information about any routines it
includes, and external symbol usage.
'''

import os

from fparser.common.readfortran import FortranStringReader
from fparser.two.Fortran2003 import (Function_Subprogram, Interface_Block,
                                     Interface_Stmt, Procedure_Stmt,
                                     Subroutine_Subprogram, Use_Stmt)
from fparser.two.parser import ParserFactory
from fparser.two.utils import FortranSyntaxError, walk

from psyclone.errors import InternalError, PSycloneError
from psyclone.psyir.frontend.fparser2 import Fparser2Reader
from psyclone.psyir.nodes import Container, FileContainer
from psyclone.psyir.symbols import SymbolError


# ============================================================================
class ModuleInfoError(PSycloneError):
    '''
    PSyclone-specific exception for use when an error with the module manager
    happens - typically indicating that some module information cannot be
    found.

    :param str value: the message associated with the error.

    '''
    def __init__(self, value):
        PSycloneError.__init__(self, value)
        self.value = "ModuleInfo error: "+str(value)


# ============================================================================
[docs]class ModuleInfo: # pylint: disable=too-many-instance-attributes '''This class stores mostly cached information about modules: it stores the original filename, if requested it will read the file and then caches the plain text file, and if required it will parse the file, and then cache the fparser AST. :param str name: the module name. :param str filename: the name of the source file that stores this module \ (including path). ''' def __init__(self, name, filename): self._name = name self._filename = filename # A cache for the source code: self._source_code = None # A cache for the fparser tree self._parse_tree = None # A cache for the PSyIR representation self._psyir = None # A cache for the module dependencies: this is just a set # of all modules used by this module. Type: set[str] self._used_modules = None # This is a dictionary containing the sets of symbols imported from # each module, indexed by the module names: dict[str, set[str]]. self._used_symbols_from_module = None # This variable will be a set that stores the name of all routines # (based on fparser), so we can test is a routine is defined # without having to convert the AST to PSyIR. It is initialised with # None so we avoid trying to parse a file more than once (parsing # errors would cause routine_names to be empty, so we can test # if routine_name is None vs if routine_names is empty) # TODO #2435: To be changed once we have support for interfaces self._routine_names = None # This map contains the list of routine names that are part # of the same generic interface. # TODO #2435: To be changed once we have support for interfaces self._generic_interfaces = {} self._processor = Fparser2Reader() # ------------------------------------------------------------------------ @property def name(self): ''':returns: the name of this module. :rtype: str ''' return self._name # ------------------------------------------------------------------------ @property def filename(self): ''':returns: the filename that contains the source code for this \ module. :rtype: str ''' return self._filename # ------------------------------------------------------------------------
[docs] def get_source_code(self): '''Returns the source code for the module. The first time, it will be read from the file, but the data is then cached. :returns: the source code. :rtype: str :raises ModuleInfoError: when the file cannot be read. ''' if self._source_code is None: try: with open(self._filename, "r", encoding='utf-8') as file_in: self._source_code = file_in.read() except FileNotFoundError as err: raise ModuleInfoError( f"Could not find file '{self._filename}' when trying to " f"read source code for module '{self._name}'") from err return self._source_code
# ------------------------------------------------------------------------
[docs] def get_parse_tree(self): '''Returns the fparser AST for this module. The first time, the file will be parsed by fparser using the Fortran 2008 standard. The AST is then cached for any future uses. :returns: the fparser AST for this module. :rtype: :py:class:`fparser.two.Fortran2003.Program` ''' if self._parse_tree is None: # Set routine_names to be an empty set (it was None before). # This way we avoid that any other function might trigger to # parse this file again (in case of parsing errors). self._routine_names = set() reader = FortranStringReader(self.get_source_code()) parser = ParserFactory().create(std="f2008") self._parse_tree = parser(reader) # First collect information about all subroutines/functions. # Store information about generic interface to be handled later # (so we only walk the tree once): # TODO #2478: once generic interfaces are supported, use PSyIR # instead of fparser here. all_generic_interfaces = [] for routine in walk(self._parse_tree, (Function_Subprogram, Subroutine_Subprogram, Interface_Block)): if isinstance(routine, Interface_Block): all_generic_interfaces.append(routine) else: routine_name = str(routine.content[0].items[1]) self._routine_names.add(routine_name) # Then handle all generic interfaces and add them to # _generic_interfaces: for interface in all_generic_interfaces: # TODO #2422 This code does not support all potential # interface statements. After #2422 we can use PSyIR here. # Get the name of the interface from the Interface_Stmt: name = str(walk(interface, Interface_Stmt)[0].items[0]).lower() self._routine_names.add(name) # Collect all specific functions for this generic interface routine_names = [] for proc_stmt in walk(interface, Procedure_Stmt): # Convert the items to strings: routine_names.extend([str(i) for i in proc_stmt.items[0].items]) self._generic_interfaces[name] = routine_names return self._parse_tree
# ------------------------------------------------------------------------
[docs] def contains_routine(self, routine_name): ''':returns: whether the specified routine name is part of this module or not. It will also return False if the file could not be parsed. :rtype: bool ''' # TODO #2422 and TODO #2478: Once we parse everything to PSyIR (esp. # generic interfaces), this routine can just be replaced with # get_psyir().get_routine_psyir(routine_name) if self._routine_names is None: # This will trigger adding routine information try: self.get_parse_tree() except FortranSyntaxError: return False return routine_name.lower() in self._routine_names
# ------------------------------------------------------------------------ def _extract_import_information(self): '''This internal function analyses a given module source file and caches which modules are imported (in self._used_modules), and which symbol is imported from each of these modules (in self._used_symbols_from_module). ''' # Initialise the caches: self._used_modules = set() self._used_symbols_from_module = {} try: parse_tree = self.get_parse_tree() except FortranSyntaxError: # TODO #11: Add proper logging # TODO #2120: Handle error print(f"[ModuleInfo._extract_import_information] Syntax error " f"parsing '{self._filename} - ignored") # Hide syntax errors return for use in walk(parse_tree, Use_Stmt): # Ignore intrinsic modules: if str(use.items[0]) == "INTRINSIC": continue mod_name = str(use.items[2]) self._used_modules.add(mod_name) all_symbols = set() only_list = use.items[4] # If there is no only_list, then the set of symbols # will stay empty if only_list: # Parse the only list: for symbol in only_list.children: all_symbols.add(str(symbol)) self._used_symbols_from_module[mod_name] = all_symbols # ------------------------------------------------------------------------
[docs] def get_used_modules(self): '''This function returns a set of all modules `used` in this module. Fortran `intrinsic` modules will be ignored. The information is based on the fparser parse tree of the module (since fparser can handle more files than PSyir, like LFRic's `constants_mod` which has pre-processor directives). :returns: a set with all imported module names. :rtype: set[str] ''' if self._used_modules is None: self._extract_import_information() return self._used_modules
# ------------------------------------------------------------------------
[docs] def get_used_symbols_from_modules(self): '''This function returns information about which modules are used by this module, and also which symbols are imported. The return value is a dictionary with the used module name as key, and a set of all imported symbol names as value. :returns: a dictionary that gives for each module name the set \ of symbols imported from it. :rtype: dict[str, set[str]] ''' if self._used_symbols_from_module is None: self._extract_import_information() return self._used_symbols_from_module
# ------------------------------------------------------------------------
[docs] def get_psyir(self): '''Returns the PSyIR representation of this module. This is based on the fparser tree (see get_parse_tree), and the information is cached. If the PSyIR must be modified, it needs to be copied, otherwise the modified tree will be returned from the cache in the future. If the conversion to PSyIR fails, a dummy FileContainer with an empty Container (module) is returned, which avoids additional error handling in many other subroutines. #TODO 2120: This should be revisited when improving on the error handling. :param routine_name: optional the name of a routine. :type routine_name: Optional[str] :returns: PSyIR representing this module. :rtype: list[:py:class:`psyclone.psyir.nodes.Node`] ''' if self._psyir is None: try: self._psyir = \ self._processor.generate_psyir(self.get_parse_tree()) except (KeyError, SymbolError, InternalError, FortranSyntaxError) as err: print(f"Error trying to parse '{self.filename}': '{err}'") # TODO #11: Add proper logging # TODO #2120: Handle error better. Long term we should not # just ignore errors. # Create a dummy FileContainer with a dummy module. This avoids # additional error handling in other subroutines, since they # will all return 'no information', whatever you ask for self._psyir = FileContainer(os.path.basename(self._filename)) module = Container("invalid-module") self._psyir.children.append(module) # TODO #2462: needs to be fixed to properly support multiple modules # in one file # Return the actual module Container (not the FileContainer) return self._psyir.children[0]
# ------------------------------------------------------------------------
[docs] def resolve_routine(self, routine_name): '''This function returns a list of function names that might be actually called when the routine `name` is called. In most cases this is exactly name, but in case of a generic subroutine the name might change. For now (since we cannot resolve generic interfaces yet), we return the list of all possible functions that might be called. :param str routine_name: the name of the routine to resolve :returns: list of routine name(s) that could be called. :rtype: list[str] ''' # TODO #2422: once #2422 is done, this can be moved into the PSyIR if self._psyir is None: self.get_psyir() routine_name = routine_name.lower() if routine_name not in self._generic_interfaces: return [routine_name] # If a generic interface name is queried, return a copy # of all possible routine names that might be called: return self._generic_interfaces[routine_name][:]