ValueNet4SPARQL / src / preprocessing / test / test_sql2SemQL.py
test_sql2SemQL.py
Raw
import json
import re
from unittest import TestCase

from preprocessing.sql2SemQL import Parser


def _clean(text):
    text = text.strip()
    return re.sub(r'\s+', ' ', text)


class TestParser(TestCase):
    def test_full_parse__AND_OR(self):
        # GIVEN
        with open('src/preprocessing/test/sql2SemQL_data/AND_OR.json', 'r', encoding='utf8') as f:
            example = json.load(f)

        # WHEN
        parser = Parser(example['values'])
        semql_result = parser.full_parse(example)

        # THEN
        actual = ' '.join([str(x) for x in semql_result])

        # One could argue also here if the most outer condition should not be an "OR" instead of an "AND".
        # See test "test_full_parse__four_AND_one_OR()" for an explanation.
        self.assertEqual(_clean('''
        Root1(3) 
            Root(3) 
                Sel(0) 
                    N(0) 
                        A(3) C(0) T(1) 
                Filter(0) 
                    Filter(2) 
                        A(0) C(8) T(2) V(0) 
                    Filter(1) 
                        Filter(2) 
                            A(0) C(4) T(1) V(1) 
                        Filter(2) 
                            A(0) C(4) T(1) V(2)
        '''), actual)

    def test_full_parse__two_AND(self):
        # GIVEN
        with open('src/preprocessing/test/sql2SemQL_data/two_AND.json', 'r', encoding='utf8') as f:
            example = json.load(f)

        # WHEN
        parser = Parser(example['values'])
        semql_result = parser.full_parse(example)

        # THEN
        actual = ' '.join([str(x) for x in semql_result])

        self.assertEqual(_clean('''
        Root1(3) 
            Root(3) 
                Sel(0) 
                    N(0) 
                    A(0) C(22) T(3) 
                Filter(0) 
                    Filter(2) 
                        A(0) C(21) T(3) V(2) 
                    Filter(0) 
                        Filter(3) 
                            A(0) C(43) T(3) V(3) 
                        Filter(3) A(0) C(43) T(3) V(1)
        '''), actual)

    def test_full_parse__three_AND(self):
        # GIVEN
        with open('src/preprocessing/test/sql2SemQL_data/three_AND.json', 'r', encoding='utf8') as f:
            example = json.load(f)

        # WHEN
        parser = Parser(example['values'])
        semql_result = parser.full_parse(example)

        # THEN
        actual = ' '.join([str(x) for x in semql_result])

        self.assertEqual(_clean('''
        Root1(3) 
            Root(3) 
                Sel(0) 
                    N(0) 
                        A(0) C(3) T(0) 
                Filter(0) 
                    Filter(2) 
                        A(0) C(5) T(0) V(2) 
                        Filter(0) 
                            Filter(2) 
                                A(0) C(13) T(1) V(0)
                                Filter(0)
                                    Filter(2) 
                                        A(0) C(13) T(1) V(3)
                                    Filter(2) 
                                        A(0) C(19) T(4) V(1)
        '''), actual)

    def test_full_parse__four_AND_one_OR(self):
        # GIVEN
        with open('src/preprocessing/test/sql2SemQL_data/four_AND_one_OR.json', 'r', encoding='utf8') as f:
            example = json.load(f)

        # WHEN
        parser = Parser(example['values'])
        semql_result = parser.full_parse(example)

        # THEN
        actual = ' '.join([str(x) for x in semql_result])

        # TODO: you would mabye expect here rather something like this: the filter clearly separated by an OR, then in
        # TODO: both sub-filters all the AND concatenation. Unfortunately the SemQL2SQL module is not yet able to handle
        # TODO: such a construct. It would though get produced exactly like this if you change the AND/OR order in _parse_filter_internal()
        # TODO: Therefore: Rewrite the SemQL2SQL module to handle this here properly.
        # self.assertEqual(_clean('''
        # Root1(3)
        #     Root(3)
        #         Sel(0)
        #             N(0)
        #                 A(0) C(3) T(0)
        #         Filter(1)
        #             Filter(0)
        #                 Filter(2)
        #                     A(0) C(5) T(0) V(2)
        #                     Filter(0)
        #                         Filter(2)
        #                             A(0) C(13) T(1) V(0)
        #                             Filter(0)
        #                                 Filter(2)
        #                                     A(0) C(13) T(1) V(3)
        #                                 Filter(2)
        #                                     A(0) C(19) T(4) V(1)
        #             Filter(0)
        #                 Filter(2)
        #                     A(0) C(5) T(0) V(4)
        #                     Filter(0)
        #                         Filter(2)
        #                             A(0) C(13) T(1) V(6)
        #                             Filter(0)
        #                                 Filter(2)
        #                                     A(0) C(13) T(1) V(3)
        #                                 Filter(2)
        #                                     A(0) C(19) T(4) V(5)
        # '''), actual)

        # What we get now is more simple: it is the one-to-one representation of the "WHERE" list in the "sql" variable.
        # Event though it looks wrong regarding the precedence, it will work when translating to SQL as the SemQL2sql module
        # is building up the SQL representation in the exact same order as we see it here.
        self.assertEqual(_clean('''
        Root1(3)
            Root(3)
                Sel(0)
                    N(0)
                        A(0) C(3) T(0)
                Filter(0)
                    Filter(2)
                        A(0) C(5) T(0) V(2)
                    Filter(0)
                        Filter(2)
                            A(0) C(13) T(1) V(0)
                        Filter(0)
                            Filter(2)
                                A(0) C(13) T(1) V(3)
                            Filter(0)
                                Filter(1)
                                    Filter(2)
                                        A(0) C(19) T(4) V(1)
                                    Filter(2)
                                        A(0) C(5) T(0) V(4)
                                Filter(0)
                                    Filter(2)
                                        A(0) C(13) T(1) V(6)
                                    Filter(0)
                                        Filter(2)
                                            A(0) C(13) T(1) V(3)
                                        Filter(2)
                                            A(0) C(19) T(4) V(5)
        '''), actual)


    def test_full_parse__build_up_values_on_the_fly(self):
        # GIVEN
        with open('src/preprocessing/test/sql2SemQL_data/three_AND.json', 'r', encoding='utf8') as f:
            example = json.load(f)

        parser = Parser(build_value_list=True)

        # WHEN
        # We don't care about the values here - we are just interested in the values-list
        _ = parser.full_parse(example)

        # THEN
        self.assertEqual(['Madison', 'Italian', 'restaurant', 'Meadowood'], parser.values)