diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index 17b314fec9..0af303222c 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -863,33 +863,31 @@ def _load_vectors(self, file_handle): Open file handle to persisted vectors. """ - if self.new_format: - self.struct_unpack(file_handle, '@?') # bool quant_input in fasttext.cc - num_vectors, dim = self.struct_unpack(file_handle, '@2q') - # Vectors stored by [Matrix::save](https://github.com/facebookresearch/fastText/blob/master/src/matrix.cc) - assert self.wv.vector_size == dim, ( - 'mismatch between vector size in model params ({}) and model vectors ({})' - .format(self.wv.vector_size, dim) + self.wv.vectors_ngrams = _load_matrix( + file_handle, + new_format=self.new_format, + expected_vector_size=self.wv.vector_size ) - float_size = struct.calcsize('@f') - if float_size == 4: - dtype = np.dtype(np.float32) - elif float_size == 8: - dtype = np.dtype(np.float64) - - self.num_original_vectors = num_vectors - self.wv.vectors_ngrams = np.fromfile(file_handle, dtype=dtype, count=num_vectors * dim) - self.wv.vectors_ngrams = self.wv.vectors_ngrams.reshape((num_vectors, dim)) - assert self.wv.vectors_ngrams.shape == ( - self.trainables.bucket + len(self.wv.vocab), self.wv.vector_size), \ - 'mismatch between actual weight matrix shape {} and expected shape {}'\ - .format( - self.wv.vectors_ngrams.shape, (self.trainables.bucket + len(self.wv.vocab), self.wv.vector_size) + self.num_original_vectors = self.wv.vectors_ngrams.shape[0] + + expected_shape = (self.trainables.bucket + len(self.wv.vocab), self.wv.vector_size) + assert self.wv.vectors_ngrams.shape == expected_shape, \ + 'mismatch between actual weight matrix shape {} and expected shape {}'.format( + self.wv.vectors_ngrams.shape, expected_shape ) self.trainables.init_ngrams_post_load(self.file_name, self.wv) self._clear_post_train() + # + # FIXME: not sure what to do with this yet, but we will need it. + # + hidden_output = _load_matrix( + file_handle, + new_format=self.new_format, + expected_vector_size=self.wv.vector_size + ) + def struct_unpack(self, file_handle, fmt): """Read a single object from an open file. @@ -906,8 +904,7 @@ def struct_unpack(self, file_handle, fmt): Unpacked structure. """ - num_bytes = struct.calcsize(fmt) - return struct.unpack(fmt, file_handle.read(num_bytes)) + return _struct_unpack(file_handle, fmt) def save(self, *args, **kwargs): """Save the Fasttext model. This saved model can be loaded again using @@ -967,6 +964,62 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_inse return self.wv.accuracy(questions, restrict_vocab, most_similar, case_insensitive) +def _struct_unpack(file_handle, fmt): + num_bytes = struct.calcsize(fmt) + return struct.unpack(fmt, file_handle.read(num_bytes)) + + +def _load_matrix(file_handle, new_format=True, expected_vector_size=None): + """Load a matrix from fastText native format. + + Interprets the matrix dimensions and type from the file stream. + + Parameters + ---------- + file_handle : file + A file handle opened for reading. + new_format : boolean + True if the quant_input variable precedes + the matrix declaration. Should be True for newer versions of fastText. + expected_vector_size : int + The expected dimensionality of each vector. + If you specify this and the matrix's dimensionality is different, + will raise an assertion. + + Returns + ------- + :class:`numpy.array` + The vectors as an array. + Each vector will be a row in the array. + The number of columns of the array will correspond to the vector size. + + See Also + -------- + https://github.com/facebookresearch/fastText/blob/master/src/matrix.cc + + """ + if new_format: + _struct_unpack(file_handle, '@?') # bool quant_input in fasttext.cc + + num_vectors, dim = _struct_unpack(file_handle, '@2q') + assert expected_vector_size is None or expected_vector_size == dim, ( + 'mismatch between vector size in model params ({}) and model vectors ({})' + .format(expected_vector_size, dim) + ) + + float_size = struct.calcsize('@f') + if float_size == 4: + dtype = np.dtype(np.float32) + elif float_size == 8: + dtype = np.dtype(np.float64) + else: + raise ValueError("Incompatible float size: %r" % float_size) + + matrix = np.fromfile(file_handle, dtype=dtype, count=num_vectors * dim) + matrix = matrix.reshape((num_vectors, dim)) + return matrix + + class FastTextVocab(Word2VecVocab): """Vocabulary used by :class:`~gensim.models.fasttext.FastText`.""" def __init__(self, max_vocab_size=None, min_count=5, sample=1e-3, sorted_vocab=True, null_word=0, ns_exponent=0.75): diff --git a/gensim/test/test_data/toy-data.txt b/gensim/test/test_data/toy-data.txt new file mode 100644 index 0000000000..58ac340bec --- /dev/null +++ b/gensim/test/test_data/toy-data.txt @@ -0,0 +1 @@ + anarchism originated as a term of abuse first used against early working class radicals including the diggers of the english revolution and the sans culottes of the french revolution whilst the term is still used in a pejorative way to describe any act that used violent means to destroy the organization of society it has also been taken up as a positive label by self defined anarchists the word anarchism is derived from the greek without archons ruler chief king anarchism as a political philosophy is the belief that rulers are unnecessary and should be abolished although there are differing interpretations of what this means anarchism also refers to related social movements that advocate the elimination of authoritarian institutions particularly the state the word anarchy as most anarchists use it does not imply chaos nihilism or anomie but rather a harmonious anti authoritarian society in place of what are regarded as authoritarian political structures and coercive economic institutions anarchists advocate social relations based upon voluntary association of autonomous individuals mutual aid and self governance while anarchism is most easily defined by what it is against anarchists also offer positive visions of what they believe to be a truly free society however ideas about how an anarchist society might work vary considerably especially with respect to economics there is also disagreement about how a free society might be brought about origins and predecessors kropotkin and others argue that before recorded history human society was organized on anarchist principles most anthropologists follow kropotkin and engels in believing that hunter gatherer bands were egalitarian and lacked division of labour accumulated wealth or decreed law and had equal access to resources william godwin anarchists including the the anarchy organisation and rothbard find anarchist attitudes in taoism from ancient china kropotkin found similar ideas in stoic zeno of citium according to kropotkin zeno repudiated the omnipotence of the state its intervention and regimentation and proclaimed the sovereignty of the moral law of the individual the anabaptists of one six th century europe are sometimes considered to be religious forerunners of modern anarchism bertrand russell in his history of western philosophy writes that the anabaptists repudiated all law since they held that the good man will be guided at every moment by the holy spirit from this premise they arrive at communism the diggers or true levellers were an early communistic movement during the time of the english civil war and are considered by some as forerunners of modern anarchism in the modern era the first to use the term to mean something other than chaos was louis armand baron de lahontan in his nouveaux voyages dans l am rique septentrionale one seven zero three where he described the indigenous american society which had no state laws prisons priests or private property as being in anarchy russell means a libertarian and leader in the american indian movement has repeatedly stated that he is an anarchist and so are all his ancestors in one seven nine three in the thick of the french revolution william godwin published an enquiry concerning political justice although godwin did not use the word anarchism many later anarchists have regarded this book as the first major anarchist text and godwin as the founder of philosophical anarchism but at this point no anarchist movement yet existed and the term anarchiste was known mainly as an insult hurled by the bourgeois girondins at more radical elements in the french revolution the first self labelled anarchist pierre joseph proudhon it is commonly held that it wasn t until pierre joseph proudhon published what is property in one eight four zero that the term anarchist was adopted as a self description it is for this reason that some claim proudhon as the founder of modern anarchist theory in what is property proudhon answers with the famous accusation property is theft in this work he opposed the institution of decreed property p \ No newline at end of file diff --git a/gensim/test/test_data/toy-model.bin b/gensim/test/test_data/toy-model.bin new file mode 100644 index 0000000000..7e1b9819cc Binary files /dev/null and b/gensim/test/test_data/toy-model.bin differ diff --git a/gensim/test/test_data/toy-model.vec b/gensim/test/test_data/toy-model.vec new file mode 100644 index 0000000000..ae8b0c36d2 --- /dev/null +++ b/gensim/test/test_data/toy-model.vec @@ -0,0 +1,23 @@ +22 100 +the 0.11795 0.091151 0.080958 -0.10915 0.10121 0.059092 -0.19102 0.0015307 0.00040477 -0.01392 -0.11906 0.11998 0.097833 0.21086 -0.2983 -0.041993 0.16582 0.14808 0.014526 -0.073218 -0.2483 0.17985 0.069347 -0.18418 -0.10304 0.032945 0.061671 0.025272 -0.024186 0.25927 -0.076794 0.086819 -0.072027 0.13621 -0.19238 0.0098201 0.23451 -0.16532 -0.07339 0.24675 -0.34921 -0.12771 0.20714 -0.0076824 0.15132 -0.11738 0.20811 0.052524 -0.14623 0.086644 -0.10438 0.052601 -0.20899 0.25047 -0.078331 0.0093942 -0.14422 0.21313 0.34173 0.22315 0.2586 -0.042675 0.15711 -0.099053 0.16983 0.025244 -0.010969 0.024829 0.079661 -0.19744 -0.05247 -0.15115 -0.085485 0.13294 -0.17589 0.19305 0.14563 -0.17344 0.12943 -0.18564 -0.01404 0.089734 0.010085 0.015518 -0.14798 0.13217 0.12804 0.10621 -0.096836 0.11842 0.1877 -0.15098 0.19061 -0.13194 0.1031 -0.042321 -0.049258 0.068264 -0.011555 -0.16212 +of 0.075341 0.054132 0.04908 -0.066084 0.066624 0.03997 -0.11775 -0.00088257 0.0022383 -0.0058991 -0.072435 0.071198 0.060748 0.13084 -0.17766 -0.027242 0.10128 0.088913 0.0039799 -0.044991 -0.15075 0.10956 0.045696 -0.11017 -0.062894 0.021787 0.035527 0.013677 -0.016963 0.15635 -0.045489 0.048502 -0.039035 0.087011 -0.12036 0.0082743 0.1397 -0.098176 -0.043923 0.1494 -0.21217 -0.078576 0.12672 -0.0051724 0.095007 -0.079881 0.12076 0.031968 -0.094125 0.052373 -0.060026 0.02521 -0.12034 0.15228 -0.047011 0.0099649 -0.086932 0.12178 0.20693 0.13663 0.16214 -0.029398 0.094377 -0.055589 0.10338 0.014219 -0.0078267 0.013238 0.049496 -0.12249 -0.03178 -0.087354 -0.050306 0.079035 -0.10948 0.11508 0.086727 -0.10528 0.081607 -0.11165 -0.0086579 0.05274 0.004607 0.0046594 -0.089009 0.081926 0.073143 0.061131 -0.063266 0.073349 0.11457 -0.092375 0.11466 -0.078164 0.063544 -0.029748 -0.034002 0.037661 -0.0056996 -0.097617 +and 0.078116 0.058386 0.056487 -0.073178 0.06466 0.039211 -0.12921 -0.00027854 0.0017653 -0.0098805 -0.078851 0.08261 0.068225 0.13977 -0.19904 -0.025525 0.11046 0.091646 0.0085715 -0.044192 -0.16139 0.12208 0.046163 -0.12184 -0.072914 0.024582 0.042762 0.014857 -0.019237 0.17274 -0.049233 0.05741 -0.048577 0.091194 -0.12728 0.0049974 0.1497 -0.10869 -0.04381 0.16727 -0.23535 -0.085609 0.13803 -0.0064574 0.10195 -0.08397 0.1413 0.037087 -0.098394 0.056629 -0.071971 0.031923 -0.13707 0.16878 -0.055636 0.0091506 -0.09773 0.1368 0.22309 0.1454 0.17429 -0.028414 0.10478 -0.062397 0.11258 0.018857 -0.0011591 0.01609 0.056028 -0.13144 -0.029054 -0.096054 -0.055333 0.086362 -0.11603 0.12827 0.097475 -0.11951 0.08885 -0.12542 -0.0038661 0.060459 0.0083025 0.0078055 -0.095529 0.088615 0.086224 0.07226 -0.067246 0.083083 0.1213 -0.10299 0.12617 -0.085734 0.067823 -0.0262 -0.036135 0.048 -0.0042103 -0.10378 +in 0.086669 0.062243 0.054412 -0.073767 0.072139 0.045317 -0.13439 -0.0017746 0.00044335 -0.0096974 -0.081614 0.088114 0.064446 0.14734 -0.20843 -0.030205 0.1085 0.09816 0.013112 -0.045302 -0.16667 0.12973 0.044584 -0.12834 -0.075227 0.026261 0.047247 0.017202 -0.019537 0.17814 -0.052551 0.063582 -0.049734 0.094952 -0.13492 0.0084108 0.16095 -0.11406 -0.051615 0.17439 -0.24113 -0.090479 0.14399 -0.0049879 0.10208 -0.088252 0.14673 0.031961 -0.10017 0.061869 -0.072798 0.034444 -0.14254 0.16777 -0.055828 0.0024859 -0.10045 0.14659 0.23189 0.1546 0.18216 -0.028544 0.10698 -0.070123 0.1166 0.019915 -0.0066732 0.012435 0.058525 -0.13743 -0.032705 -0.099382 -0.053766 0.097017 -0.12322 0.13095 0.1048 -0.11822 0.094615 -0.13153 -0.0062404 0.063022 0.01086 0.013804 -0.096976 0.094258 0.088442 0.069077 -0.067368 0.077237 0.12443 -0.10925 0.13229 -0.090949 0.069971 -0.031434 -0.036609 0.044712 -0.0081178 -0.11471 +as 0.053863 0.042938 0.037241 -0.051183 0.05137 0.027114 -0.084835 1.3728e-05 0.0025314 -0.0069993 -0.050604 0.054771 0.047013 0.10033 -0.13325 -0.023278 0.080855 0.067213 0.0032944 -0.028117 -0.11216 0.081096 0.029206 -0.08644 -0.044203 0.017712 0.031886 0.0091182 -0.0085869 0.12154 -0.032878 0.039264 -0.039498 0.062988 -0.087045 0.0049847 0.10196 -0.075371 -0.028413 0.11865 -0.16172 -0.059862 0.097285 -0.0047555 0.066067 -0.059969 0.095558 0.02544 -0.073157 0.037012 -0.046715 0.024077 -0.089977 0.11319 -0.027823 0.0066428 -0.064607 0.097043 0.15502 0.10629 0.12505 -0.019258 0.070654 -0.044533 0.080667 0.012301 -0.0022248 0.0092332 0.037268 -0.091938 -0.026553 -0.065549 -0.037304 0.064934 -0.077455 0.092462 0.063586 -0.083913 0.059991 -0.087713 -0.0092362 0.043102 -0.00051714 0.0099011 -0.069276 0.056812 0.057576 0.045192 -0.044366 0.059025 0.090698 -0.071062 0.0937 -0.056276 0.046899 -0.016259 -0.022376 0.024875 -0.0067809 -0.073339 +is 0.074437 0.052038 0.051981 -0.067465 0.058618 0.03526 -0.11765 -0.0015289 -0.00015959 -0.0067318 -0.06577 0.076415 0.05829 0.12698 -0.17727 -0.028005 0.096446 0.088592 0.0046257 -0.044585 -0.14641 0.10238 0.038044 -0.10902 -0.060715 0.026365 0.036137 0.0072479 -0.016273 0.15289 -0.051616 0.053147 -0.045661 0.081826 -0.1067 0.0069735 0.13494 -0.10346 -0.048327 0.1467 -0.21133 -0.072159 0.12128 -0.0092351 0.088376 -0.072045 0.11589 0.032887 -0.087238 0.048044 -0.062112 0.031598 -0.12588 0.14592 -0.044971 0.011856 -0.085911 0.1297 0.20461 0.13551 0.14988 -0.029959 0.096274 -0.057667 0.10345 0.014265 -0.0039144 0.010562 0.047673 -0.11893 -0.029959 -0.088578 -0.048333 0.07779 -0.098661 0.11295 0.087307 -0.10624 0.076406 -0.10848 -0.0086265 0.059426 0.0076717 0.010637 -0.08249 0.078295 0.074592 0.05979 -0.055786 0.070062 0.11317 -0.088385 0.11068 -0.075256 0.062661 -0.026641 -0.028308 0.040725 -0.0044682 -0.093382 +that 0.066945 0.047453 0.045943 -0.057811 0.056633 0.031702 -0.10249 0.00024042 0.00079663 -0.0053683 -0.06074 0.065167 0.053823 0.11286 -0.15837 -0.021426 0.089578 0.080272 0.0075319 -0.039696 -0.13106 0.095934 0.036305 -0.096738 -0.057495 0.020537 0.033466 0.010245 -0.014078 0.13905 -0.042811 0.043999 -0.037507 0.078488 -0.1016 0.0056259 0.126 -0.088707 -0.039625 0.13276 -0.1881 -0.0689 0.11293 -0.0058395 0.077197 -0.069944 0.11004 0.029272 -0.078062 0.048065 -0.057099 0.025667 -0.10919 0.13677 -0.039712 0.0037627 -0.077784 0.10986 0.18405 0.11977 0.14268 -0.023658 0.083445 -0.051322 0.093099 0.013567 -0.0049253 0.015867 0.043399 -0.10602 -0.031198 -0.080191 -0.045041 0.072237 -0.095975 0.10266 0.078967 -0.0928 0.071374 -0.099167 -0.0068718 0.049467 0.0041039 0.0062738 -0.075501 0.070375 0.068726 0.058608 -0.054298 0.062562 0.10179 -0.084575 0.10511 -0.067477 0.052601 -0.026853 -0.029131 0.035389 -0.003624 -0.087392 +to 0.060323 0.046111 0.04158 -0.054313 0.057129 0.031363 -0.10041 -0.0033526 -0.0013111 -0.0098172 -0.060896 0.063161 0.05388 0.11195 -0.15321 -0.023402 0.086457 0.078618 0.0053244 -0.037212 -0.13181 0.094091 0.030763 -0.099566 -0.052809 0.015859 0.02956 0.015379 -0.012047 0.13619 -0.042537 0.043199 -0.034664 0.070228 -0.10471 0.0062273 0.12568 -0.087126 -0.035855 0.13071 -0.18209 -0.06096 0.10849 -0.00080616 0.079045 -0.064025 0.11502 0.02744 -0.073819 0.042107 -0.052047 0.024716 -0.10574 0.13205 -0.038193 0.0090317 -0.079089 0.1114 0.18122 0.11757 0.13478 -0.025544 0.083736 -0.051223 0.083238 0.0075664 -0.0044848 0.0086053 0.039882 -0.10386 -0.033724 -0.07977 -0.047524 0.07071 -0.085147 0.10484 0.073396 -0.090302 0.067185 -0.094732 -0.0072977 0.046248 0.0040743 0.0088815 -0.075282 0.068908 0.063497 0.053804 -0.049192 0.063104 0.098934 -0.081646 0.094111 -0.06628 0.05024 -0.022262 -0.031661 0.030206 -0.0022784 -0.084192 +a 0.046421 0.032798 0.039108 -0.038349 0.037766 0.020456 -0.071248 0.0028358 0.00072006 -0.0046159 -0.046781 0.041606 0.033866 0.08403 -0.11053 -0.015627 0.064717 0.06068 0.0048067 -0.023191 -0.095918 0.069243 0.021502 -0.071381 -0.037481 0.013889 0.031352 0.0073825 -0.0086553 0.095833 -0.02385 0.036393 -0.018642 0.050919 -0.071323 0.0091854 0.092882 -0.053973 -0.029557 0.098961 -0.13712 -0.051353 0.075869 0.0015759 0.062333 -0.049722 0.082184 0.019664 -0.058983 0.032148 -0.034979 0.025808 -0.076615 0.099721 -0.030648 0.0031889 -0.055634 0.076594 0.12679 0.086509 0.09253 -0.015524 0.057569 -0.027175 0.06334 0.00085049 0.0069896 0.0061507 0.030771 -0.073703 -0.015627 -0.060113 -0.033121 0.049414 -0.057852 0.072202 0.048984 -0.065439 0.051354 -0.06322 -0.0072296 0.033864 0.00047817 0.0024526 -0.053388 0.051924 0.054311 0.036886 -0.035877 0.040401 0.065778 -0.062867 0.07423 -0.048171 0.037957 -0.015353 -0.01992 0.029231 0.003175 -0.066286 +anarchist 0.10499 0.077077 0.073893 -0.094911 0.09087 0.051778 -0.16896 0.0028592 0.0018984 -0.014372 -0.10235 0.10641 0.083541 0.18568 -0.26289 -0.038413 0.14436 0.12947 0.0094352 -0.061216 -0.21769 0.15579 0.058523 -0.1614 -0.091191 0.032544 0.05609 0.018824 -0.019396 0.22724 -0.067484 0.07504 -0.062237 0.12111 -0.16807 0.0095553 0.20286 -0.14557 -0.064549 0.21793 -0.30632 -0.11071 0.17923 -0.0078681 0.13163 -0.10733 0.18314 0.045805 -0.12732 0.074925 -0.090335 0.043671 -0.18138 0.2206 -0.069132 0.0083824 -0.12894 0.18778 0.29605 0.19819 0.23036 -0.037809 0.13658 -0.083081 0.14884 0.02168 -0.0047173 0.020297 0.071285 -0.17489 -0.04414 -0.13011 -0.074402 0.11847 -0.15176 0.17136 0.1275 -0.15072 0.11277 -0.15911 -0.011503 0.077796 0.0082271 0.013163 -0.12605 0.11575 0.11296 0.089694 -0.087017 0.10286 0.16346 -0.13179 0.17034 -0.11426 0.088749 -0.038251 -0.0476 0.05731 -0.0060445 -0.14348 +anarchism 0.1065 0.077815 0.074661 -0.095396 0.09131 0.051896 -0.17089 0.0018371 0.001516 -0.014661 -0.10193 0.10936 0.084016 0.18756 -0.26431 -0.038648 0.14599 0.1291 0.010227 -0.062496 -0.21948 0.15779 0.057932 -0.16489 -0.092619 0.032062 0.057309 0.017896 -0.021118 0.23073 -0.068301 0.076624 -0.063088 0.12222 -0.16967 0.0083748 0.20363 -0.14697 -0.06487 0.22062 -0.30957 -0.11246 0.18228 -0.0092674 0.1329 -0.10833 0.18368 0.0464 -0.12981 0.074328 -0.091311 0.044441 -0.18397 0.22308 -0.069787 0.0083563 -0.12956 0.18834 0.29892 0.19974 0.23287 -0.039338 0.13794 -0.083537 0.14934 0.022374 -0.0035459 0.019157 0.072989 -0.17655 -0.043628 -0.12885 -0.072803 0.11882 -0.15278 0.17173 0.12802 -0.15251 0.11472 -0.16139 -0.012639 0.078508 0.0075528 0.014397 -0.12621 0.1172 0.11454 0.089934 -0.0884 0.10366 0.16345 -0.13331 0.17023 -0.11475 0.090429 -0.038623 -0.047434 0.058361 -0.0071884 -0.14274 +society 0.073428 0.049574 0.048737 -0.063202 0.060547 0.035932 -0.11166 -0.00075795 0.0021406 -0.011223 -0.068295 0.072499 0.058757 0.12445 -0.17294 -0.027342 0.0949 0.085678 0.0060218 -0.041508 -0.14568 0.10521 0.038354 -0.10717 -0.060493 0.019552 0.035317 0.011879 -0.014087 0.15305 -0.045015 0.051658 -0.040138 0.081434 -0.11234 0.0045097 0.13433 -0.096747 -0.044378 0.14493 -0.20467 -0.073434 0.11926 -0.0040236 0.08655 -0.074813 0.1196 0.031824 -0.088548 0.049545 -0.060813 0.028288 -0.12067 0.14751 -0.045687 0.0064171 -0.083153 0.12286 0.19947 0.13131 0.15433 -0.025985 0.092701 -0.056294 0.098473 0.017526 -0.0027713 0.013787 0.047603 -0.11686 -0.031067 -0.084049 -0.047809 0.080847 -0.098736 0.11137 0.082841 -0.10219 0.074808 -0.10622 -0.0048072 0.053736 0.0044521 0.0096848 -0.084189 0.076183 0.077013 0.059052 -0.057921 0.068045 0.10776 -0.091172 0.11547 -0.075015 0.058444 -0.025836 -0.031412 0.036946 -0.0047835 -0.094459 +what 0.052942 0.036601 0.034723 -0.045054 0.046588 0.027618 -0.084862 -0.00021578 0.0027216 -0.002828 -0.05113 0.054561 0.043064 0.094993 -0.12566 -0.021983 0.073049 0.059352 0.004345 -0.035023 -0.10988 0.078154 0.031004 -0.078389 -0.045783 0.014148 0.028414 0.010738 -0.011168 0.11501 -0.029424 0.035917 -0.028134 0.063023 -0.082968 0.0046163 0.10198 -0.072601 -0.031131 0.10537 -0.14973 -0.058623 0.089927 -0.0021857 0.064213 -0.054243 0.088589 0.022348 -0.062634 0.033833 -0.042919 0.017465 -0.089191 0.1089 -0.03287 0.0044129 -0.063903 0.089537 0.14356 0.09978 0.11865 -0.018592 0.066936 -0.040006 0.076997 0.010735 -0.0057157 0.0090062 0.032184 -0.087512 -0.020585 -0.060808 -0.036373 0.059244 -0.076032 0.084583 0.067458 -0.071997 0.057628 -0.080769 -0.00465 0.0411 0.003916 0.0093105 -0.060377 0.055077 0.052543 0.044956 -0.043094 0.051649 0.083144 -0.068213 0.084732 -0.054745 0.040053 -0.019524 -0.024444 0.025486 -0.002909 -0.072029 +are 0.044979 0.027886 0.031057 -0.039078 0.039544 0.020218 -0.06787 0.00088538 0.0025773 -0.0038006 -0.038958 0.046053 0.036366 0.07762 -0.11042 -0.016477 0.061344 0.053643 0.002494 -0.027408 -0.091474 0.066466 0.021019 -0.06673 -0.039331 0.013811 0.025774 0.0089593 -0.0090281 0.09847 -0.026295 0.031816 -0.029169 0.051287 -0.069851 0.0048476 0.083364 -0.061437 -0.026925 0.095641 -0.12418 -0.048026 0.078329 -0.0048473 0.055662 -0.046907 0.076542 0.019073 -0.053229 0.028687 -0.03836 0.018848 -0.079291 0.090561 -0.031473 0.00058625 -0.053301 0.076623 0.12418 0.083955 0.094803 -0.014172 0.059011 -0.032073 0.061886 0.0090609 -0.0032872 0.0086799 0.029804 -0.073295 -0.022582 -0.051794 -0.027344 0.047928 -0.061035 0.067855 0.04758 -0.062712 0.049954 -0.066819 -0.0008138 0.032022 0.00073033 0.0053074 -0.053486 0.048945 0.050202 0.036256 -0.037922 0.043493 0.063835 -0.053633 0.071947 -0.048897 0.040255 -0.012879 -0.020365 0.025443 -0.0037648 -0.058896 +anarchists 0.099522 0.072045 0.068926 -0.090451 0.085243 0.048677 -0.15981 0.0025417 0.0021009 -0.012668 -0.096492 0.10115 0.078704 0.17486 -0.24675 -0.037135 0.13695 0.12146 0.008434 -0.05683 -0.20527 0.14677 0.055201 -0.15272 -0.086044 0.030467 0.052698 0.01695 -0.018831 0.21493 -0.063893 0.070846 -0.058822 0.11392 -0.15843 0.0084885 0.19094 -0.13761 -0.060324 0.2046 -0.28878 -0.10414 0.16962 -0.0078024 0.12369 -0.10094 0.1712 0.043341 -0.11972 0.071347 -0.08591 0.041418 -0.17143 0.20788 -0.065511 0.0080243 -0.12186 0.17679 0.27873 0.1874 0.2168 -0.036588 0.12885 -0.079483 0.14046 0.019436 -0.0043168 0.019128 0.067365 -0.16554 -0.042365 -0.12127 -0.069515 0.11118 -0.14305 0.16136 0.121 -0.14168 0.10687 -0.1499 -0.010584 0.074033 0.0069971 0.012776 -0.11828 0.11014 0.10639 0.084542 -0.082786 0.097526 0.15445 -0.12288 0.16006 -0.10791 0.083605 -0.036212 -0.044529 0.053515 -0.0061845 -0.13367 +this 0.045778 0.039556 0.03217 -0.043746 0.040279 0.024639 -0.079102 -0.00030707 -0.00064378 -0.0026417 -0.045928 0.048443 0.037168 0.085918 -0.11813 -0.016392 0.063361 0.059686 0.0053919 -0.031646 -0.098872 0.067983 0.02767 -0.073213 -0.043511 0.015412 0.027337 0.008768 -0.0072309 0.10439 -0.03294 0.035632 -0.027657 0.051934 -0.074958 0.0055605 0.092829 -0.068748 -0.027734 0.095587 -0.13734 -0.048223 0.082726 -0.0006984 0.059528 -0.049323 0.081378 0.024843 -0.056656 0.032449 -0.043441 0.019917 -0.085481 0.095227 -0.030461 0.0050606 -0.058062 0.087191 0.1356 0.091672 0.10385 -0.01738 0.06151 -0.040628 0.069794 0.011668 -0.0017514 0.0081234 0.030857 -0.075968 -0.017533 -0.059355 -0.033059 0.054041 -0.068919 0.078617 0.057009 -0.067612 0.048645 -0.07553 -0.00692 0.038983 0.0058949 0.0042996 -0.054315 0.049537 0.053605 0.040284 -0.037143 0.043543 0.074433 -0.057207 0.075567 -0.051073 0.040178 -0.015749 -0.017475 0.026747 0.00035177 -0.061563 +it 0.065768 0.043959 0.039486 -0.063757 0.053133 0.030436 -0.10313 -0.0018257 0.0055686 -0.012154 -0.055456 0.071659 0.049232 0.11131 -0.16144 -0.025814 0.082027 0.078277 0.0056743 -0.035665 -0.12919 0.096767 0.041344 -0.096359 -0.054538 0.020959 0.039109 0.01278 -0.0094326 0.14238 -0.037533 0.045942 -0.037842 0.069764 -0.10194 0.001699 0.12093 -0.085403 -0.037515 0.13376 -0.18464 -0.066151 0.1092 -0.00063888 0.078896 -0.06391 0.11309 0.028978 -0.072086 0.051116 -0.056798 0.028741 -0.10932 0.13163 -0.048367 0.0060891 -0.077308 0.11071 0.17941 0.12014 0.14379 -0.022083 0.085774 -0.055319 0.087777 0.020903 0.0004861 0.010892 0.04564 -0.10823 -0.029483 -0.075261 -0.044079 0.073125 -0.09097 0.10067 0.076785 -0.091885 0.069818 -0.09805 -0.0071615 0.050955 0.002307 0.0071944 -0.074464 0.070171 0.064623 0.049089 -0.048317 0.064493 0.10176 -0.07941 0.10575 -0.069482 0.053976 -0.02536 -0.028804 0.034726 -0.0027013 -0.08897 +property 0.064511 0.04534 0.041742 -0.055481 0.055537 0.031541 -0.10368 0.00019278 0.00017956 -0.0094122 -0.060629 0.067435 0.050486 0.11217 -0.15864 -0.023389 0.08585 0.077658 0.007401 -0.038988 -0.13185 0.094372 0.034909 -0.097542 -0.058451 0.018337 0.034051 0.0108 -0.012071 0.13931 -0.039594 0.046297 -0.035697 0.072832 -0.10118 0.0057964 0.12278 -0.086455 -0.040127 0.13029 -0.18388 -0.066943 0.10733 -0.0026092 0.079796 -0.066461 0.11023 0.027668 -0.077621 0.045145 -0.056854 0.02615 -0.11147 0.13277 -0.040669 0.0034854 -0.077181 0.11241 0.18008 0.11954 0.13936 -0.025069 0.084938 -0.050766 0.08914 0.01525 -0.0015898 0.012432 0.042979 -0.10564 -0.027668 -0.077233 -0.042984 0.071083 -0.09237 0.10063 0.075554 -0.092005 0.068528 -0.09698 -0.0078593 0.048919 0.004507 0.0091544 -0.07546 0.067984 0.06756 0.054913 -0.051434 0.060753 0.09843 -0.083337 0.10414 -0.069871 0.052668 -0.024461 -0.026765 0.033484 -0.0043592 -0.083369 +be 0.079989 0.058416 0.048607 -0.072412 0.062466 0.036554 -0.12382 0.0023295 0.0032995 -0.0076173 -0.074069 0.076657 0.064539 0.1336 -0.18646 -0.021806 0.10013 0.094264 0.0083858 -0.042228 -0.15464 0.11358 0.046591 -0.11252 -0.063445 0.023922 0.040398 0.011449 -0.018156 0.16608 -0.051683 0.050034 -0.041786 0.089461 -0.12329 0.0069861 0.14889 -0.10932 -0.044203 0.16047 -0.22086 -0.081893 0.13407 -0.0061596 0.094363 -0.078614 0.12557 0.03316 -0.093883 0.055331 -0.064186 0.031457 -0.12979 0.15983 -0.049183 0.0098248 -0.093031 0.1375 0.21587 0.14373 0.16984 -0.032541 0.10103 -0.06017 0.1066 0.014696 -0.00071478 0.015812 0.051226 -0.12574 -0.034798 -0.093022 -0.052441 0.086865 -0.11334 0.12047 0.088413 -0.10588 0.083267 -0.11223 -0.0089499 0.056393 0.0068064 0.010615 -0.087858 0.090703 0.084124 0.06684 -0.060813 0.075828 0.123 -0.089453 0.12298 -0.087267 0.061913 -0.030957 -0.038693 0.036571 -0.0052479 -0.098058 +term 0.033615 0.023247 0.026867 -0.026717 0.030987 0.013976 -0.050264 -0.00094697 -0.0018201 -0.0034984 -0.030187 0.031742 0.028637 0.055053 -0.079087 -0.012198 0.043406 0.039886 0.0017805 -0.021358 -0.065835 0.048326 0.016053 -0.049297 -0.025541 0.0092873 0.017387 0.0044436 -0.0061342 0.068247 -0.020261 0.02238 -0.017378 0.040313 -0.051688 0.0039586 0.061393 -0.044528 -0.019722 0.065175 -0.092248 -0.033597 0.054701 -0.0025578 0.039695 -0.031013 0.05537 0.010289 -0.038278 0.022052 -0.028226 0.01365 -0.052969 0.066794 -0.021489 0.0029829 -0.037974 0.056953 0.092194 0.059626 0.069586 -0.013069 0.040775 -0.025132 0.043711 0.0068078 0.0041025 0.0063076 0.022226 -0.054097 -0.015653 -0.039792 -0.022528 0.037051 -0.046657 0.053246 0.035745 -0.04364 0.03748 -0.04913 -0.0026505 0.022082 -0.0001407 0.003254 -0.040581 0.033962 0.031392 0.02845 -0.026258 0.029892 0.049767 -0.042433 0.052026 -0.036117 0.0272 -0.01094 -0.012944 0.01903 -0.0030746 -0.042026 +an 0.12227 0.091419 0.087147 -0.11106 0.10615 0.065026 -0.20167 0.0034929 0.0054287 -0.018122 -0.12445 0.12647 0.10303 0.22294 -0.31601 -0.041159 0.17903 0.15344 0.015156 -0.072081 -0.26406 0.19153 0.073605 -0.19321 -0.11085 0.036275 0.063943 0.029965 -0.028504 0.26909 -0.078123 0.095528 -0.077139 0.14885 -0.20301 0.0094167 0.248 -0.17009 -0.074508 0.26627 -0.36845 -0.13894 0.21988 -0.0079346 0.16368 -0.12255 0.21815 0.059933 -0.15294 0.088891 -0.10997 0.053893 -0.22076 0.26567 -0.085267 0.007004 -0.15082 0.22064 0.35812 0.23683 0.26843 -0.048439 0.16709 -0.10178 0.17912 0.02169 -0.0071486 0.025373 0.086937 -0.20833 -0.055181 -0.15439 -0.089202 0.1416 -0.18333 0.20725 0.14963 -0.18555 0.13482 -0.19519 -0.01282 0.097157 0.010161 0.016978 -0.15359 0.14315 0.13549 0.11412 -0.10544 0.12296 0.19671 -0.15704 0.20211 -0.13389 0.10633 -0.041893 -0.057238 0.069755 -0.014764 -0.17252 +by 0.071151 0.051981 0.045753 -0.063299 0.062093 0.035953 -0.11538 0.00060194 -0.0025281 -0.010701 -0.066507 0.07565 0.055446 0.12051 -0.176 -0.02351 0.097648 0.084868 0.0076767 -0.041328 -0.13606 0.099846 0.035072 -0.10929 -0.056532 0.017203 0.041654 0.011639 -0.0098535 0.15117 -0.039857 0.054444 -0.03698 0.080044 -0.10618 0.0042917 0.13292 -0.093517 -0.040544 0.14206 -0.19758 -0.068867 0.11885 -0.0052796 0.087411 -0.074258 0.12072 0.029028 -0.080058 0.045182 -0.056723 0.030487 -0.12049 0.14077 -0.043298 0.0063719 -0.08324 0.11883 0.19432 0.13002 0.14989 -0.026347 0.093855 -0.051438 0.097086 0.012293 -0.0010578 0.010561 0.052007 -0.1124 -0.028146 -0.083542 -0.047969 0.079448 -0.10448 0.11123 0.080287 -0.097965 0.073639 -0.1023 -0.0063079 0.048546 0.010219 0.011558 -0.085891 0.081609 0.074085 0.061063 -0.05941 0.068312 0.10832 -0.093626 0.10603 -0.080541 0.061834 -0.027813 -0.027721 0.03506 -0.00020315 -0.094649 diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py index c9935431e4..24cd60410f 100644 --- a/gensim/test/test_fasttext.py +++ b/gensim/test/test_fasttext.py @@ -842,6 +842,33 @@ def test_sg_hs_against_wrapper(self): self.compare_with_wrapper(model_gensim, model_wrapper) +class NativeTrainingContinuationTest(unittest.TestCase): + def test(self): + + def train_gensim(): + path = datapath('toy-data.txt') + with open(path) as fin: + words = fin.read().strip().split(' ') + + model = FT_gensim() + model.build_vocab(words) + model.train(words, total_examples=len(words), epochs=model.epochs) + return model + + def load_native(): + path = datapath('toy-model.bin') + model = FT_gensim.load_fasttext_format(path) + # model.build_vocab(common_texts, update=True) # this doesn't work, but should. See also https://github.com/RaRe-Technologies/gensim/issues/2139 + return model + + trained = train_gensim() + native = load_native() + + # + # For now, having this test not crash is good enough. + # + + if __name__ == '__main__': logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) unittest.main()