############################ Analyse mit fastText ###################### # Korpusvorbereitung (vgl. Datei "05a Korpusvorbereitung-Fuer-fastText-Calderon.R": # - alle 21 identifizierten Tragödien # - alle 21 identifiezierten Komödien # mit Leerzeichen, Zeichensetzung, Eigennamen, Stopwords entfernt # alles in Kleinschreibung # die 21 Tragödien bzw. Komödien jeweils in einer einzigen Datei # zusammengefasst # das Paket fastText aufrufen library(fastText) # Das Arbeitsverzeichnis auf den "fastText"-Ordner setzen # Das Verzeichnis für den output in einem Vektor abspeichern output <- getwd() # 'skipgram' Funktion berechnet n-Grams list_params = list(command = 'skipgram', lr = 0.1, dim = 50, input = "Tragedies.txt", output = file.path(output, 'word_vectorsT'), verbose = 2, thread = 1, minn = 2, maxn = 2) res = fasttext_interface(list_params, path_output = file.path(output, 'skipgram_logsT.txt'), MilliSecs = 5) # 'nn' Funktion gibt die Nearest Neighbors für ein bestimmtes Wort # wieder, basierend auf dem oben erzeugten Input # hier für den Begriff 'poder' list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'poder') res = fasttext_interface(list_params, path_output = file.path(output, 'poderT-nearest.txt')) # amor list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'amor') res = fasttext_interface(list_params, path_output = file.path(output, 'amorT-nearest.txt')) # honor list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'honor') res = fasttext_interface(list_params, path_output = file.path(output, 'honorT-nearest.txt')) # Dasselbe für die Komödien # 'skipgram' Funktion berechnet n-Grams list_params = list(command = 'skipgram', lr = 0.1, dim = 50, input = "Comedies.txt", output = file.path(output, 'word_vectorsC'), verbose = 2, thread = 1, minn = 2, maxn = 2) res = fasttext_interface(list_params, path_output = file.path(output, 'skipgram_logsC.txt'), MilliSecs = 5) # 'nn' Funktion gibt die Nearest Neighbors für ein bestimmtes Wort # wieder, basierend auf dem oben erzeugten Input # hier für den Begriff 'poder' list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'poder') res = fasttext_interface(list_params, path_output = file.path(output, 'poderC-nearest.txt')) # amor list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'amor') res = fasttext_interface(list_params, path_output = file.path(output, 'amorC-nearest.txt')) # honor list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'honor') res = fasttext_interface(list_params, path_output = file.path(output, 'honorC-nearest.txt')) # Nun der Vergleich der Nearest Neighbors für einige der identischen # Begriffe in beiden Subkorpora # celos list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'celos') res = fasttext_interface(list_params, path_output = file.path(output, 'celosT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'celos') res = fasttext_interface(list_params, path_output = file.path(output, 'celosC-nearest.txt')) # dios list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'dios') res = fasttext_interface(list_params, path_output = file.path(output, 'diosT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'dios') res = fasttext_interface(list_params, path_output = file.path(output, 'diosC-nearest.txt')) # guerra list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'guerra') res = fasttext_interface(list_params, path_output = file.path(output, 'guerraT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'guerra') res = fasttext_interface(list_params, path_output = file.path(output, 'guerraC-nearest.txt')) # hado list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'hado') res = fasttext_interface(list_params, path_output = file.path(output, 'hadoT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'hado') res = fasttext_interface(list_params, path_output = file.path(output, 'hadoC-nearest.txt')) # hado list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'hado') res = fasttext_interface(list_params, path_output = file.path(output, 'hadoT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'hado') res = fasttext_interface(list_params, path_output = file.path(output, 'hadoC-nearest.txt')) # justicia list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'justicia') res = fasttext_interface(list_params, path_output = file.path(output, 'justiciaT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'justicia') res = fasttext_interface(list_params, path_output = file.path(output, 'justiciaC-nearest.txt')) # muera list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'muera') res = fasttext_interface(list_params, path_output = file.path(output, 'mueraT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'muera') res = fasttext_interface(list_params, path_output = file.path(output, 'mueraC-nearest.txt')) # villano list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'villano') res = fasttext_interface(list_params, path_output = file.path(output, 'villanoT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'villano') res = fasttext_interface(list_params, path_output = file.path(output, 'villanoC-nearest.txt')) # amistad list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'amistad') res = fasttext_interface(list_params, path_output = file.path(output, 'amistadT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'amistad') res = fasttext_interface(list_params, path_output = file.path(output, 'amistadC-nearest.txt')) # celos list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'celos') res = fasttext_interface(list_params, path_output = file.path(output, 'celosT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'celos') res = fasttext_interface(list_params, path_output = file.path(output, 'celosC-nearest.txt')) # fineza list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'fineza') res = fasttext_interface(list_params, path_output = file.path(output, 'finezaT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'fineza') res = fasttext_interface(list_params, path_output = file.path(output, 'finezaC-nearest.txt')) # gusto list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'gusto') res = fasttext_interface(list_params, path_output = file.path(output, 'gustoT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'gusto') res = fasttext_interface(list_params, path_output = file.path(output, 'gustoC-nearest.txt')) # vida list_params = list(command = 'nn', model = file.path(output, 'word_vectorsT.bin'), k = 10, query_word = 'vida') res = fasttext_interface(list_params, path_output = file.path(output, 'vidaT-nearest.txt')) list_params = list(command = 'nn', model = file.path(output, 'word_vectorsC.bin'), k = 10, query_word = 'vida') res = fasttext_interface(list_params, path_output = file.path(output, 'vidaC-nearest.txt'))