Source code for ontolearn.utils

# -----------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2024 Ontolearn Team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------

"""Ontolearn utils."""
import datetime
import os
import pickle
import random
import time
from typing import Callable, TypeVar, Tuple, Union
from owlapy.class_expression import OWLClass
from owlapy.iri import IRI
from owlapy.meta_classes import HasIRI
from owlapy.owl_individual import OWLNamedIndividual
from ontolearn.utils.log_config import setup_logging  # noqa: F401
import pandas as pd
from .static_funcs import compute_f1_score, f1_set_similarity, concept_reducer, concept_reducer_properties

Factory = Callable
from typing import Set

# DEFAULT_FMT = '[{elapsed:0.8f}s] {name}({args}) -> {result}'
DEFAULT_FMT = 'Func:{name} took {elapsed:0.8f}s'
flag_for_performance = False



[docs]
def jaccard_similarity(y: Set[str], yhat: Set[str]) -> float:
    """
    Compute Jaccard Similarity
    :param y: A set of URIs
    :param yhat: A set of URIs
    :return:
    """
    if len(yhat) == len(y) == 0:
        return 1.0
    if len(yhat) == 0 or len(y) == 0:
        return 0.0
    return len(y.intersection(yhat)) / len(y.union(yhat))



[docs]
def parametrized_performance_debugger(fmt=DEFAULT_FMT): # pragma: no cover
    def decorate(func):
        if flag_for_performance:
            def clocked(*_args):
                t0 = time.time()
                _result = func(*_args)
                elapsed = time.time() - t0
                name = func.__name__
                args = ', '.join(repr(arg) for arg in _args)
                result = repr(_result)
                print(fmt.format(**locals()))
                return _result

            return clocked
        else:
            return func

    return decorate




[docs]
def performance_debugger(func_name): # pragma: no cover
    def function_name_decorator(func):
        def debug(*args, **kwargs):
            start = time.time()
            r = func(*args, **kwargs)
            print(func_name, ' took ', round(time.time() - start, 4), ' seconds')

            return r

        return debug

    return function_name_decorator




[docs]
def create_experiment_folder(folder_name='Log'):
    from ontolearn.utils import log_config
    if log_config.log_dirs:
        path_of_folder = log_config.log_dirs[-1]
    else:
        directory = os.getcwd() + '/' + folder_name + '/'
        folder_name = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
        path_of_folder = directory + folder_name
        os.makedirs(path_of_folder)
    return path_of_folder, path_of_folder[:path_of_folder.rfind('/')]




[docs]
def serializer(*, object_: object, path: str, serialized_name: str):  # pragma: no cover
    with open(path + '/' + serialized_name + ".p", "wb") as f:
        pickle.dump(object_, f)
    f.close()




[docs]
def deserializer(*, path: str, serialized_name: str):  # pragma: no cover
    with open(path + "/" + serialized_name + ".p", "rb") as f:
        obj_ = pickle.load(f)
    f.close()
    return obj_




[docs]
def apply_TSNE_on_df(df) -> None:  # pragma: no cover
    from sklearn.manifold import TSNE
    from matplotlib import pyplot as plt
    low_emb = TSNE(n_components=2).fit_transform(df)
    plt.scatter(low_emb[:, 0], low_emb[:, 1])
    plt.title('Instance Representatons via TSNE')
    plt.show()




[docs]
def balanced_sets(a: set, b: set) -> Tuple[Set, Set]:  # pragma: no cover
    """
    Balance given two sets through sampling without replacement.
    Returned sets have the same length.
    @param a:
    @param b:
    @return:
    """

    if len(a) > len(b):
        sampled_a = random.sample(list(a), len(b))
        return set(sampled_a), b
    elif len(b) > len(a):
        sampled_b = random.sample(list(b), len(a))
        return a, set(sampled_b)
    else:
        assert len(a) == len(b)
        return a, b




[docs]
def read_csv(path)->Union[None,pd.DataFrame]:  # pragma: no cover
    """
    Path leads a folder containing embeddings in csv format.
    indexes correspond subjects or predicates or objects in n-triple.
    @param path:
    @return:
    """
    if assertion_path_isfile(path):
        df = pd.read_csv(path, index_col=0)
        assert (df.all()).all()  # all columns and all rows are not none.
        return df
    else:
        return None




[docs]
def assertion_path_isfile(path) -> bool:  # pragma: no cover
    try:
        assert path is not None
    except AssertionError:
        print(f'Path can not be:{path}')
        return False

    try:
        assert os.path.isfile(path)
    except (AssertionError, TypeError):
        print(f'Input:{path} not found.')
        return False
    return True




[docs]
def sanity_checking_args(args):  # pragma: no cover
    try:
        assert os.path.isfile(args.path_knowledge_base)
    except AssertionError:
        print(f'--path_knowledge_base ***{args.path_knowledge_base}*** does not lead to a file.')
        raise

    try:
        assert os.path.isfile(args.path_knowledge_base_embeddings)
    except AssertionError:
        print(f'--path_knowledge_base_embeddings ***{args.path_knowledge_base_embeddings}*** does not lead to a file.')
        raise

    assert args.min_length > 0
    assert args.max_length > 0
    assert args.min_num_concepts > 0
    assert args.min_num_concepts > 0
    assert args.min_num_instances_per_concept > 0
    assert os.path.isfile(args.path_knowledge_base)
    if hasattr(args, 'num_fold_for_k_fold_cv'):
        assert args.num_fold_for_k_fold_cv > 0
    if hasattr(args, 'max_test_time_per_concept'):
        assert args.max_test_time_per_concept > 1

    if hasattr(args, 'num_of_sequential_actions'):
        assert args.num_of_sequential_actions > 0

    if hasattr(args, 'batch_size'):
        assert args.batch_size > 1



_T = TypeVar('_T', bound=HasIRI)


def _read_iri_file(file: str, type_: Factory[[IRI], _T]) -> Set[_T]:  # pragma: no cover
    """Read a text file containing IRIs (one per line) and return the content as a set of instances created by the
    given type

    Args:
        file: path to the text file with the IRIs of the named individuals
        type_: factory or type to create from the IRI

    Returns:
        set of type_ instances with these IRIs
    """

    def optional_angles(iri: str):
        if iri.startswith('<'):
            return iri[1:-1]
        else:
            return iri

    with open(file, 'r') as f:
        inds = map(type_,
                   map(IRI.create,
                       map(optional_angles,
                           f.read().splitlines())))
    return set(inds)



[docs]
def read_individuals_file(file: str) -> Set[OWLNamedIndividual]:  # pragma: no cover
    """Read a text file containing IRIs of Named Individuals (one per line) and return the content as a set of OWL
    Named Individuals

    Args:
        file: path to the text file with the IRIs of the named individuals

    Returns:
        set of OWLNamedIndividual with these IRIs
    """
    return _read_iri_file(file, OWLNamedIndividual)




[docs]
def read_named_classes_file(file: str) -> Set[OWLClass]:  # pragma: no cover
    """Read a text file containing IRIs of OWL Named Classes (one per line) and return the content as a set of OWL
    Classes

    Args:
        file: path to the text file with the IRIs of the classes

    Returns:
        set of OWLNamedIndividual with these IRIs
    """
    return _read_iri_file(file, OWLClass)