Commit d83ccfba authored by tmikolov's avatar tmikolov

bugfix in InitUnigramTable() - some words could not have been sampled as negative examples

parent 330c98d0
...@@ -51,17 +51,17 @@ int *table; ...@@ -51,17 +51,17 @@ int *table;
void InitUnigramTable() { void InitUnigramTable() {
int a, i; int a, i;
long long train_words_pow = 0; double train_words_pow = 0;
real d1, power = 0.75; double d1, power = 0.75;
table = (int *)malloc(table_size * sizeof(int)); table = (int *)malloc(table_size * sizeof(int));
for (a = 0; a < vocab_size; a++) train_words_pow += pow(vocab[a].cn, power); for (a = 0; a < vocab_size; a++) train_words_pow += pow(vocab[a].cn, power);
i = 0; i = 0;
d1 = pow(vocab[i].cn, power) / (real)train_words_pow; d1 = pow(vocab[i].cn, power) / train_words_pow;
for (a = 0; a < table_size; a++) { for (a = 0; a < table_size; a++) {
table[a] = i; table[a] = i;
if (a / (real)table_size > d1) { if (a / (double)table_size > d1) {
i++; i++;
d1 += pow(vocab[i].cn, power) / (real)train_words_pow; d1 += pow(vocab[i].cn, power) / train_words_pow;
} }
if (i >= vocab_size) i = vocab_size - 1; if (i >= vocab_size) i = vocab_size - 1;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment