Source code for ontolearn.scripts.run

"""


"""
# -----------------------------------------------------------------------------
# MIT License
#
# Copyright (c) 2024 Ontolearn Team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------


import argparse
import glob
from fastapi import FastAPI
import uvicorn
from typing import Dict, Iterable, Union, List
from owlapy.class_expression import OWLClassExpression
from owlapy.iri import IRI
from owlapy.owl_individual import OWLNamedIndividual
from ontolearn.utils import compute_f1_score
from ontolearn.knowledge_base import KnowledgeBase
from ontolearn.triple_store import TripleStore
from ontolearn.learning_problem import PosNegLPStandard
from ontolearn.learners import Drill, TDL
from ontolearn.concept_learner import NCES
from ontolearn.metrics import F1
from ontolearn.verbalizer import LLMVerbalizer
from owlapy import owl_expression_to_dl
import os

app = FastAPI()
args = None
# Knowledge Base Loaded once
kb = None


[docs] def get_default_arguments(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument("--host", type=str, default="0.0.0.0") parser.add_argument("--port", type=int, default=8000) parser.add_argument("--path_knowledge_base", type=str, default=None) parser.add_argument("--endpoint_triple_store", type=str, default=None) return parser.parse_args()
[docs] @app.get("/") async def root(): global args return {"response": "Ontolearn Service is Running"}
[docs] def get_drill(data: dict): """ Initialize DRILL """ # (1) Init DRILL. global kb drill = Drill(knowledge_base=kb, path_embeddings=data.get("path_embeddings", None), quality_func=F1(), iter_bound=data.get("iter_bound", 10), # total refinement operation applied max_runtime=data.get("max_runtime", 60), # seconds num_episode=data.get("num_episode", 2), # for the training use_inverse=data.get("use_inverse", True), use_data_properties=data.get("use_data_properties", True), use_card_restrictions=data.get("use_card_restrictions", True), use_nominals=data.get("use_nominals", True), verbose=1) # (2) Either load the weights of DRILL or train it. if data.get("path_to_pretrained_drill", None) and os.path.isdir(data["path_to_pretrained_drill"]): drill.load(directory=data["path_to_pretrained_drill"]) else: # Train & Save drill.train(num_of_target_concepts=data.get("num_of_target_concepts", 1), num_learning_problems=data.get("num_of_training_learning_problems", 1)) drill.save(directory=data.get("path_to_pretrained_drill", None)) return drill
[docs] def get_nces(data: dict) -> NCES: """ Load NCES """ global kb global args assert args.path_knowledge_base.endswith(".owl"), "NCES supports only a knowledge base file with extension .owl" # (1) Init NCES. nces = NCES(knowledge_base_path=args.path_knowledge_base, path_of_embeddings=data.get("path_embeddings", None), quality_func=F1(), load_pretrained=False, learner_names=["SetTransformer", "LSTM", "GRU"], num_predictions=64 ) # (2) Either load the weights of NCES or train it. if data.get("path_to_pretrained_nces", None) and os.path.isdir(data["path_to_pretrained_nces"]) and glob.glob(data["path_to_pretrained_nces"]+"/*.pt"): nces.refresh(data["path_to_pretrained_nces"]) else: nces.train(epochs=data["nces_train_epochs"], batch_size=data["nces_batch_size"], num_lps=data["num_of_training_learning_problems"]) nces.refresh(nces.trained_models_path) return nces
[docs] def get_tdl(data) -> TDL: global kb return TDL(knowledge_base=kb, use_inverse=False, use_data_properties=False, use_nominals=False, use_card_restrictions=data.get("use_card_restrictions",False), kwargs_classifier=data.get("kwargs_classifier",None), verbose=10)
[docs] def get_learner(data: dict) -> Union[Drill, TDL, NCES, None]: if data["model"] == "Drill": return get_drill(data) elif data["model"] == "TDL": return get_tdl(data) elif data["model"] == "NCES": return get_nces(data) else: return None
[docs] @app.get("/cel") async def cel(data: dict) -> Dict: global args global kb print("######### CEL Arguments ###############") print(f"Knowledgebase/Triplestore: {kb}\n") print(f"Input data: {data}\n") print("######### CEL Arguments ###############\n") # (1) Initialize OWL CEL and verbalizer owl_learner = get_learner(data) if owl_learner is None: return {"Results": f"There is no learner named as {data['model']}. Available models: Drill, TDL, NCES"} # (2) Read Positives and Negatives. positives = {OWLNamedIndividual(IRI.create(i)) for i in data['pos']} negatives = {OWLNamedIndividual(IRI.create(i)) for i in data['neg']} # (5) if len(positives) > 0 and len(negatives) > 0: # () LP lp = PosNegLPStandard(pos=positives, neg=negatives) # Few variable definitions for the sake of the readability. # ()Learning Process. results = [] learned_owl_expression: OWLClassExpression predictions = owl_learner.fit(lp).best_hypotheses(n=data.get("topk", 3)) if not isinstance(predictions, List): predictions = [predictions] verbalizer = LLMVerbalizer() for ith, learned_owl_expression in enumerate(predictions): # () OWL to DL dl_learned_owl_expression: str dl_learned_owl_expression = owl_expression_to_dl(learned_owl_expression) # () Get Individuals print(f"Retrieving individuals of {dl_learned_owl_expression}...") # TODO:CD: With owlapy:1.3.1, we can move the f1 score computation into triple store. # TODO: By this, we do not need to wait for the retrival results to return an answer to the user individuals: Iterable[OWLNamedIndividual] individuals = kb.individuals(learned_owl_expression) # () F1 score training train_f1: float train_f1 = compute_f1_score(individuals=frozenset({i for i in individuals}), pos=lp.pos, neg=lp.neg) results.append({"Rank": ith + 1, "Prediction": dl_learned_owl_expression, "Verbalization": verbalizer(dl_learned_owl_expression), "F1": train_f1}) return {"Results": results} else: return {"Results": "Error no valid learning problem"}
[docs] def main(): global args global kb args = get_default_arguments() # (1) Init knowledge base. parser = argparse.ArgumentParser() parser.add_argument("--path_knowledge_base", type=str, default=None) parser.add_argument("--endpoint_triple_store", type=str, default=None) if args.path_knowledge_base: kb = KnowledgeBase(path=args.path_knowledge_base) elif args.endpoint_triple_store: kb = TripleStore(url=args.endpoint_triple_store) else: raise RuntimeError("Either --path_knowledge_base or --endpoint_triplestore must be provided") uvicorn.run(app, host=args.host, port=args.port)
if __name__ == '__main__': main()