Navigator / src / navigator / parse / base_parse.py
base_parse.py
Raw
"""The `abstract_parser` module provides the `AbstractParser` class, an abstract base class designed for parsing data from files using an IParse interface. This module serves as the foundation for implementing specific parsers for various file formats.

Module Contents:
    - AbstractParser: An abstract base class for parsing data from files.

Usage:
    To create a custom parser for a specific file format, inherit from the `AbstractParser` class and implement the required parsing logic by defining an IParse interface.

Example:
    ```python
    from myparser import MyCustomParser
    from myiparse import MyIParseImplementation

    # Create a parser instance
    custom_parser = MyCustomParser(MyIParseImplementation())

    # Parse data from a file
    parsed_data = custom_parser('datafile.dat')
    ```

Classes:
    - AbstractParser: An abstract base class for parsing data from files.

"""

import typing as tp
from abc import ABC, abstractmethod
from logging import Logger
from pathlib import Path

import pandas as pd

from ..dispatch.base_dispatch import AbstractDispatcher
from .iparse import IParse  # TODO: add IParse

__all__ = ["AbstractParser", "Parser"]


class AbstractParser(ABC):
    """Abstract class for parsing data from a file using an IParse interface.

    This abstract class serves as a template for creating parsers for specific file formats. It enforces the use of an IParse interface for parsing data and provides common functionality for checking file integrity and handling logging.

    Attributes:
    iparser (IParse): An instance of an IParse interface for data parsing.
    logger (Logger | None): A logger for logging operations. Defaults to None.
    dispatcher (None): A parallel task dispatcher (optional). Defaults to None.

    Methods:
    __init__(self, iparser: IParse, logger: Logger | None = None, dispatcher: None = None) -> None:
        Initialize a new AbstractParser instance.

    _check_file_integrity(self, filepath: str | Path) -> None:
        Check the integrity of a file path to ensure it exists, is a file, and is readable.

    parse(self, filepath: str | Path) -> tp.Tuple[pd.Series, pd.DataFrame]:
        Parse data from a file using the provided IParse interface.

    __call__(self, filepath: str | Path) -> Any:
        Call method for parsing data from a file.

    __repr__(self) -> str:
        Return a string representation of the AbstractParser instance.

    iparser(self) -> IParse:
        Returns the IParse object associated with this BaseParse object.

    iparser(self, iparser: IParse) -> None:
        Sets the internal IParse instance to the provided IParse object.

    swap(self, iparser: IParse) -> None:
        Swaps the internal IParse instance with the provided IParse object.


    Notes:
        - This class is an abstract base class and should not be instantiated directly.

    """

    def __init__(
        self,
        iparser: IParse,
        logger: Logger | None = None,
        dispatcher: AbstractDispatcher | None = None,
    ) -> None:  # TODO: add dispatcher
        """Initialize a new AbstractParser instance.

        Args:
            iparser (IParse): An instance of an IParse interface for data parsing.
            logger (Logger | None, optional): A logger for logging operations. Defaults to None.
            dispatcher (None, optional): A parallel task dispatcher (optional). Defaults to None.

        Raises:
            TypeError: If iparser is not an instance of IParse.
            TypeError: If logger is provided but is not an instance of Logger.

        """
        # Verify iparser is IParse
        self._iparser = iparser

        # Verify logger is Logger
        if logger is not None and not isinstance(logger, Logger):
            raise TypeError(f"logger must be Logger, not {type(logger)}")
        self.logger = logger

        # TO DO: Verify dispatcher is Dispatcher
        if dispatcher is not None and not issubclass(
            dispatcher.__class__, AbstractDispatcher
        ):
            raise TypeError(
                f"dispatcher must be subclass AbstractDispatcher, not {type(dispatcher)}"
            )
        self.dispatcher = dispatcher

        return

    def _check_file_integrity(self, filepath: str | Path) -> None:
        """Check the integrity of a file path.

        Args:
            filepath (str | Path): The path to the file to check.

        Raises:
            TypeError: If filepath is not a str or Path.
            FileNotFoundError: If the file does not exist.
            FileNotFoundError: If the path is not a file.
            PermissionError: If the file is not readable.

        """
        # Verify filepath
        if not isinstance(filepath, (str, Path)):
            raise TypeError(f"filepath must be str or Path, not {type(filepath)}")

        if isinstance(filepath, str):
            filepath = Path(filepath)
        else:
            filepath = filepath

        # If not exists, raise error
        if not filepath.exists():
            raise FileNotFoundError(f"{filepath} does not exist")

        # If not a file, raise error
        if not filepath.is_file():
            raise FileNotFoundError(f"{filepath} is not a file")

        return

    # TO DO : def _dispatch(self) -> None:

    @abstractmethod
    def parse(
        self, filepath: str | Path, **kwargs
    ) -> tp.Tuple[pd.Series, pd.DataFrame]:
        """Parse data from a file using the provided IParse interface.

        Args:
            filepath (str | Path): Path to the file to parse.
            kwargs: Additional keyword arguments to pass to the iparser.

        Returns:
            tp.Tuple[pd.Series, pd.DataFrame]: Tuple of parsed data (metadata, data).

        """
        # Verify filepath
        self._check_file_integrity(filepath)

        # Parse data
        return self.iparser.parse(filepath, **kwargs)

    def __call__(
        self, filepath: str | Path, **kwargs
    ) -> tp.Tuple[pd.Series, pd.DataFrame]:
        """Call method for parsing data from a file.

        Args:
            filepath (str | Path): Path to the file to parse.
            kwargs: Additional keyword arguments to pass to the parser.


        Returns:
            Any: The parsed data.

        """
        return self.parse(filepath, **kwargs)

    def __repr__(self) -> str:
        """Return a string representation of the AbstractParser instance.

        Returns:
            str: String representation of the object.

        """
        return f"{self.__class__.__name__}(iparser={self.iparser}, logger={self.logger}, dispatcher={self.dispatcher})"

    @property
    def iparser(self) -> IParse:
        """Returns the IParse object associated with this BaseParse object.

        Returns:
        IParse -- The IParse object associated with this BaseParse object.
        """
        return self._iparser

    @iparser.setter
    def iparser(self, iparser: IParse) -> None:
        """Sets the internal IParse instance to the provided IParse object.

        Args:
            iparser (IParse): The IParse object to set as the internal IParse instance.

        Raises:
            TypeError: If the provided iparser object is not a subclass of IParse.
        """
        # Verify iparser is IParse
        if not issubclass(iparser.__class__, IParse):
            raise TypeError(f"iparser must be subclass IParse, not {type(iparser)}")

        self._iparser = iparser

    def swap(self, iparser: IParse) -> None:
        """Swaps the internal IParse instance with the provided IParse object.

        Args:
            iparser (IParse): The IParse object to swap with the internal IParse instance.


        Returns:
            None
        """
        self.iparser = iparser


class Parser(AbstractParser):
    """Concrete class for parsing data from a file using an IParse interface.

    Args:
        AbstractParser (_type_): Abstract class for parsing data from a file using an IParse interface. This is due to the builder design pattern.
    """

    def __init__(
        self, iparser: IParse, logger: Logger | None = None, dispatcher: None = None
    ) -> None:
        """Initialize a new AbstractParser instance.

        Args:
            iparser (IParse): An instance of an IParse interface for data parsing.
            logger (Logger | None, optional): A logger for logging operations. Defaults to None.
            dispatcher (None, optional): A parallel task dispatcher (optional). Defaults to None.

        Raises:
            TypeError: If iparser is not an instance of IParse.
            TypeError: If logger is provided but is not an instance of Logger.

        """
        super().__init__(iparser, logger, dispatcher)
        return

    def parse(
        self, filepath: str | Path, **kwargs
    ) -> tp.Tuple[pd.Series, pd.DataFrame]:
        """Parse data from a file using the provided IParse interface.

        Args:
            filepath (str | Path): Path to the file to parse.
            kwargs: Additional keyword arguments to pass to the iparser.


        Returns:
            tp.Tuple[pd.Series, pd.DataFrame]: Tuple of parsed data (metadata, data).
        """
        return super().parse(filepath, **kwargs)