Source code for poincare_train

import argparse
import logging
from wordent_utils import word_to_wn
from gensim.models.poincare import PoincareModel, PoincareKeyedVectors, PoincareRelations

logging.basicConfig(level=logging.INFO)


[docs]def poincare_train(hypertouple_dataset, size=2, burn_in=0, epochs = 5, print_freq = 100): """ Train a poincare embedding Args: hypertouple_dataset (list): The hypertouple dataset to feed for training size (int): size of model burn_in (int): Burnin identifier epochs (int): Number of epochs to train print_freq (int): Update frequency number Returns: poincare_model (model object) : The trained Poincare Model """ poincare_model = None try: #poincare_model = PoincareModel(train_data = hypertouple_dataset) poincare_model = PoincareModel(train_data=hypertouple_dataset, size = size, burn_in = burn_in) poincare_model.train(epochs=epochs, print_every = print_freq) except Exception as e: print(e) return poincare_model
[docs]def poincare_simmilar(poincare_model, word): """ Return the list of words closest to word Args: poincare_model (model object): The trained poincare model to use word (str): The word used for finding similar words list Returns: most_simmilar_set (list) : The list of similar words """ most_simmilar_set = None try: wnet_word = word_to_wn(word) most_simmilar_set = poincare_model.kv.most_similar(wnet_word) except Exception as e: print(e) return most_simmilar_set
[docs]def poincare_closer_then(poincare_model, word1, word2): """ Return the list of words closer to word1 in comparison with word2 Args: word1 (str): first word word2 (str): second word Returns: closer_list (list) : The list of segmented words """ closer_list = None try: wnet_word1 = word_to_wn(word1) wnet_word2 = word_to_wn(word2) closer_list = poincare_model.kv.closer_than(wnet_word1, wnet_word2) except Exception as e : print(e) return closer_list
[docs]def poincare_word_dist(poincare_model, word1, word2): """ Return the distance of words between word1 and word2 Args: word1 (str): first word word2 (str): second word Returns: dist (float) : The list of segmented words """ dist = None try: wnet_word1 = word_to_wn(word1) wnet_word2 = word_to_wn(word2) dist = poincare_model.kv.distance(wnet_word1, wnet_word2) except Exception as e : print(e) return dist
[docs]def poincare_closest_child(poincare_model, word): """ Return the closet child node for a given word Args: word (str): arbitrary word Returns: child_word (str) : The closest child word in Wordnet format """ child_word = None try: wnet_word = word_to_wn(word) child_word = poincare_model.kv.closest_child(wnet_word) except Exception as e : print(e) return child_word
[docs]def poincare_closest_parent(poincare_model, word): """ Return the closet parent node for a given word Args: word (str): arbitrary word Returns: child_word (str) : The closest parent word in Wordnet format """ parent_word = None try: wnet_word = word_to_wn(word) parent_word = poincare_model.kv.closest_parent(wnet_word) except Exception as e : print(e) return parent_word
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Anotator options') parser.add_argument('-p', action="store", type=str, dest = 'dataset_path', help ='designated dataset path', default = None) parser.add_argument('-d', action="store", type=str, dest = 'delimiter', help ='the split for the hyper touples', default = '\t') path = parser.path dataset = PoincareRelations(file_path=path, delimiter='\t') model = poincare_train(dataset)