"""The `abstract_parser` module provides the `AbstractParser` class, an abstract base class designed for parsing data from files using an IParse interface. This module serves as the foundation for implementing specific parsers for various file formats. Module Contents: - AbstractParser: An abstract base class for parsing data from files. Usage: To create a custom parser for a specific file format, inherit from the `AbstractParser` class and implement the required parsing logic by defining an IParse interface. Example: ```python from myparser import MyCustomParser from myiparse import MyIParseImplementation # Create a parser instance custom_parser = MyCustomParser(MyIParseImplementation()) # Parse data from a file parsed_data = custom_parser('datafile.dat') ``` Classes: - AbstractParser: An abstract base class for parsing data from files. """ import typing as tp from abc import ABC, abstractmethod from logging import Logger from pathlib import Path import pandas as pd from ..dispatch.base_dispatch import AbstractDispatcher from .iparse import IParse # TODO: add IParse __all__ = ["AbstractParser", "Parser"] class AbstractParser(ABC): """Abstract class for parsing data from a file using an IParse interface. This abstract class serves as a template for creating parsers for specific file formats. It enforces the use of an IParse interface for parsing data and provides common functionality for checking file integrity and handling logging. Attributes: iparser (IParse): An instance of an IParse interface for data parsing. logger (Logger | None): A logger for logging operations. Defaults to None. dispatcher (None): A parallel task dispatcher (optional). Defaults to None. Methods: __init__(self, iparser: IParse, logger: Logger | None = None, dispatcher: None = None) -> None: Initialize a new AbstractParser instance. _check_file_integrity(self, filepath: str | Path) -> None: Check the integrity of a file path to ensure it exists, is a file, and is readable. parse(self, filepath: str | Path) -> tp.Tuple[pd.Series, pd.DataFrame]: Parse data from a file using the provided IParse interface. __call__(self, filepath: str | Path) -> Any: Call method for parsing data from a file. __repr__(self) -> str: Return a string representation of the AbstractParser instance. iparser(self) -> IParse: Returns the IParse object associated with this BaseParse object. iparser(self, iparser: IParse) -> None: Sets the internal IParse instance to the provided IParse object. swap(self, iparser: IParse) -> None: Swaps the internal IParse instance with the provided IParse object. Notes: - This class is an abstract base class and should not be instantiated directly. """ def __init__( self, iparser: IParse, logger: Logger | None = None, dispatcher: AbstractDispatcher | None = None, ) -> None: # TODO: add dispatcher """Initialize a new AbstractParser instance. Args: iparser (IParse): An instance of an IParse interface for data parsing. logger (Logger | None, optional): A logger for logging operations. Defaults to None. dispatcher (None, optional): A parallel task dispatcher (optional). Defaults to None. Raises: TypeError: If iparser is not an instance of IParse. TypeError: If logger is provided but is not an instance of Logger. """ # Verify iparser is IParse self._iparser = iparser # Verify logger is Logger if logger is not None and not isinstance(logger, Logger): raise TypeError(f"logger must be Logger, not {type(logger)}") self.logger = logger # TO DO: Verify dispatcher is Dispatcher if dispatcher is not None and not issubclass( dispatcher.__class__, AbstractDispatcher ): raise TypeError( f"dispatcher must be subclass AbstractDispatcher, not {type(dispatcher)}" ) self.dispatcher = dispatcher return def _check_file_integrity(self, filepath: str | Path) -> None: """Check the integrity of a file path. Args: filepath (str | Path): The path to the file to check. Raises: TypeError: If filepath is not a str or Path. FileNotFoundError: If the file does not exist. FileNotFoundError: If the path is not a file. PermissionError: If the file is not readable. """ # Verify filepath if not isinstance(filepath, (str, Path)): raise TypeError(f"filepath must be str or Path, not {type(filepath)}") if isinstance(filepath, str): filepath = Path(filepath) else: filepath = filepath # If not exists, raise error if not filepath.exists(): raise FileNotFoundError(f"{filepath} does not exist") # If not a file, raise error if not filepath.is_file(): raise FileNotFoundError(f"{filepath} is not a file") return # TO DO : def _dispatch(self) -> None: @abstractmethod def parse( self, filepath: str | Path, **kwargs ) -> tp.Tuple[pd.Series, pd.DataFrame]: """Parse data from a file using the provided IParse interface. Args: filepath (str | Path): Path to the file to parse. kwargs: Additional keyword arguments to pass to the iparser. Returns: tp.Tuple[pd.Series, pd.DataFrame]: Tuple of parsed data (metadata, data). """ # Verify filepath self._check_file_integrity(filepath) # Parse data return self.iparser.parse(filepath, **kwargs) def __call__( self, filepath: str | Path, **kwargs ) -> tp.Tuple[pd.Series, pd.DataFrame]: """Call method for parsing data from a file. Args: filepath (str | Path): Path to the file to parse. kwargs: Additional keyword arguments to pass to the parser. Returns: Any: The parsed data. """ return self.parse(filepath, **kwargs) def __repr__(self) -> str: """Return a string representation of the AbstractParser instance. Returns: str: String representation of the object. """ return f"{self.__class__.__name__}(iparser={self.iparser}, logger={self.logger}, dispatcher={self.dispatcher})" @property def iparser(self) -> IParse: """Returns the IParse object associated with this BaseParse object. Returns: IParse -- The IParse object associated with this BaseParse object. """ return self._iparser @iparser.setter def iparser(self, iparser: IParse) -> None: """Sets the internal IParse instance to the provided IParse object. Args: iparser (IParse): The IParse object to set as the internal IParse instance. Raises: TypeError: If the provided iparser object is not a subclass of IParse. """ # Verify iparser is IParse if not issubclass(iparser.__class__, IParse): raise TypeError(f"iparser must be subclass IParse, not {type(iparser)}") self._iparser = iparser def swap(self, iparser: IParse) -> None: """Swaps the internal IParse instance with the provided IParse object. Args: iparser (IParse): The IParse object to swap with the internal IParse instance. Returns: None """ self.iparser = iparser class Parser(AbstractParser): """Concrete class for parsing data from a file using an IParse interface. Args: AbstractParser (_type_): Abstract class for parsing data from a file using an IParse interface. This is due to the builder design pattern. """ def __init__( self, iparser: IParse, logger: Logger | None = None, dispatcher: None = None ) -> None: """Initialize a new AbstractParser instance. Args: iparser (IParse): An instance of an IParse interface for data parsing. logger (Logger | None, optional): A logger for logging operations. Defaults to None. dispatcher (None, optional): A parallel task dispatcher (optional). Defaults to None. Raises: TypeError: If iparser is not an instance of IParse. TypeError: If logger is provided but is not an instance of Logger. """ super().__init__(iparser, logger, dispatcher) return def parse( self, filepath: str | Path, **kwargs ) -> tp.Tuple[pd.Series, pd.DataFrame]: """Parse data from a file using the provided IParse interface. Args: filepath (str | Path): Path to the file to parse. kwargs: Additional keyword arguments to pass to the iparser. Returns: tp.Tuple[pd.Series, pd.DataFrame]: Tuple of parsed data (metadata, data). """ return super().parse(filepath, **kwargs)