Source code for api_utils

import subprocess
import sys
import argparse
import requests
import shutil
import lxml.html as LH
import pandas as pd
import urllib.request
from fast_utils import exact_word_match


[docs]class DownloadError(Exception): def __init__(self, output): self.output = output
[docs]def spacy_model_download(model_name, timeout = None): """ Downloads a spacy model with name Args: model_name (str): The model name for download Returns: (void) : download in the designated folder of fastent """ try : if sys.version_info <=(3,4): arguments = [python_exec, "-m",'spacy','download',model_name] subprocess.call(arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # process = subprocess.call(arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # # output_cont = process.stdout.decode("ISO-8859-1", "ignore") # # if not exact_word_match('Successfully',output_cont): # raise DownloadError(process.stdout.decode("ISO-8859-1", "ignore")) # else: # return filename.group(1) else: arguments = [python_exec(), '-m','spacy','download', model_name] print("Dowload for model {} stared".format(model_name)) process = subprocess.run(arguments, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout) output_cont = process.stdout.decode("ISO-8859-1", "ignore") print("Dowload for model {} ended".format(model_name)) if not exact_word_match('Successfully',output_cont): raise DownloadError(process.stdout.decode("ISO-8859-1", "ignore")) else: return output_cont except (DownloadError, Exception) as e: print(e)
[docs]def fasttext_list(): """ Return a Dictionary of the possible fasttext models Args: None: Returns: diction_frac(dict) : Language to Model dictionary """ diction_frac = {} try: content = requests.get("https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md").content webpage = LH.fromstring(content) allRefs = webpage.xpath('//a/@href') allRefs = [i for i in allRefs if 'amazonaws' in i and not 'zip' in i] allRefs df = pd.read_html(content) df = df[-1] assert(len(allRefs) == len(df['Unnamed: 0']) + len(df['Unnamed: 1'])+len(df['Unnamed: 2'])) for i in range(len(allRefs)): if i%3 == 0: diction_frac[df['Unnamed: 0'][int(i/3)]] = allRefs[i] if i%3 == 1: diction_frac[df['Unnamed: 1'][int(i/3)]] = allRefs[i] if i%3 == 2: diction_frac[df['Unnamed: 2'][int(i/3)]] = allRefs[i] except Exception as e: print(e) return None return diction_frac
[docs]def fasttext_dowload(language_name, timeout = None): """ Downloads a fasttext model with language name Args: language_name (str): The language name for download Returns: (void) : download in the designated language model to fastent folder """ try: full_lang_dict = fasttext_list() url = '' for key in full_lang_dict: if language_name.lower() in key.lower(): url = full_lang_dict[key] file_name = url.split('/')[-1] with urllib.request.urlopen(url) as response, open(file_name, 'wb') as out_file: shutil.copyfileobj(response, out_file) except Exception as e: print(e)
[docs]def python_exec(): if sys.version_info <(3,): return 'python' return 'python3'
if __name__ == "__main__": parser = argparse.ArgumentParser(description='API options') parser.add_argument('-l', action="store", dest = 'location', help = 'Location of the model, i.e gensim, spacy, fastText etc etc') parser.add_argument('-m', action="store", type=str, dest = 'model_name', help ='designated model name') parser.add_argument('-t', action="store", type=str, dest = 'timeout', help ='timeout', default = None) results = parser.parse_args() print(results) if 'spacy' in results.location.lower(): spacy_model_download(results.model_name, results.timeout) if 'fasttext' in results.location.lower(): fasttext_dowload(results.model_name, results.timeout)