Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
remove printing functions from fasttext class
Browse files Browse the repository at this point in the history
Summary:
This diff removes the print capabilities from fasttext and defines a new api.
- `predictLine` extracts predictions from exactly one line of the input stream.
- the deprecated `printLabelStats` is removed as [js bindings don't use it]( https://www.facebook.com/groups/1174547215919768/?multi_permalinks=2328051983902613&comment_id=2360179150689896 )
- `ngramVectors` is now deprecated by the addition of `getNgramVectors`. `Vector` class remains copy-free but move semantics has been added.
- `analogies` is now deprecated by `getAnalogies`. when called, fastText class lazy-precomputes word vectors
- `findNN` is now deprecated by `getNN`. when called, fastText class lazy-precomputes word vectors
- `trainThread` and `printInfo` functions are now private.
- `supervised`, `cbow`, `skipgram`, `selectEmbeddings`, `precomputeWordVectors` are now deprecated and will be private in the future.
- `saveVectors`, `saveOutput` and `saveModel` without arguments are now deprecated by their equivalent with filename as string argument.

Reviewed By: EdouardGrave

Differential Revision: D13083799

fbshipit-source-id: f557ed7c141a90a6171045fe118ac16c195c824f
  • Loading branch information
Celebio authored and facebook-github-bot committed Nov 23, 2018
1 parent 4aee63d commit 256032b
Show file tree
Hide file tree
Showing 9 changed files with 395 additions and 225 deletions.
12 changes: 8 additions & 4 deletions python/fastText/FastText.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,16 @@ def check(entry):

if type(text) == list:
text = [check(entry) for entry in text]
all_probs, all_labels = self.f.multilinePredict(text, k, threshold)
return all_labels, np.array(all_probs, copy=False)
predictions = self.f.multilinePredict(text, k, threshold)
dt = np.dtype([('probability', 'float64'), ('label', '<U32')])
result_as_pair = np.array(predictions, dtype=dt)

return result_as_pair['label'].tolist(), result_as_pair['probability']
else:
text = check(text)
pairs = self.f.predict(text, k, threshold)
probs, labels = zip(*pairs)
predictions = self.f.predict(text, k, threshold)
probs, labels = zip(*predictions)

return labels, np.array(probs, copy=False)

def get_input_matrix(self):
Expand Down
71 changes: 23 additions & 48 deletions python/fastText/pybind/fasttext_pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -262,23 +262,30 @@ PYBIND11_MODULE(fasttext_pybind, m) {
const std::string text,
int32_t k,
fasttext::real threshold) {
std::vector<std::pair<fasttext::real, int32_t>> predictions;
std::vector<std::pair<fasttext::real, std::string>> all_predictions;
std::stringstream ioss(text);
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
std::vector<int32_t> words, labels;
d->getLine(ioss, words, labels);
m.predict(k, words, predictions, threshold);
std::transform(
predictions.begin(),
predictions.end(),
std::back_inserter(all_predictions),
[&d](const std::pair<fasttext::real, int32_t>& prediction) {
return std::pair<fasttext::real, std::string>(
std::exp(prediction.first),
d->getLabel(prediction.second));
});
return all_predictions;
std::vector<std::pair<fasttext::real, std::string>> predictions;
m.predictLine(ioss, predictions, k, threshold);

return predictions;
})
.def(
"multilinePredict",
// NOTE: text needs to end in a newline
// to exactly mimic the behavior of the cli
[](fasttext::FastText& m,
const std::vector<std::string>& lines,
int32_t k,
fasttext::real threshold) {
std::vector<std::vector<std::pair<fasttext::real, std::string>>>
allPredictions;
std::vector<std::pair<fasttext::real, std::string>> predictions;

for (const std::string& text : lines) {
std::stringstream ioss(text);
m.predictLine(ioss, predictions, k, threshold);
allPredictions.push_back(predictions);
}
return allPredictions;
})
.def(
"testLabel",
Expand All @@ -303,38 +310,6 @@ PYBIND11_MODULE(fasttext_pybind, m) {

return returnedValue;
})
.def(
"multilinePredict",
// NOTE: text needs to end in a newline
// to exactly mimic the behavior of the cli
[](fasttext::FastText& m,
const std::vector<std::string>& lines,
int32_t k,
fasttext::real threshold) {
std::pair<
std::vector<std::vector<fasttext::real>>,
std::vector<std::vector<std::string>>>
all_predictions;
std::vector<std::pair<fasttext::real, int32_t>> predictions;
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
std::vector<int32_t> words, labels;
for (const std::string& text : lines) {
std::stringstream ioss(text);
predictions.clear();
d->getLine(ioss, words, labels);
m.predict(k, words, predictions, threshold);
all_predictions.first.push_back(std::vector<fasttext::real>());
all_predictions.second.push_back(std::vector<std::string>());
for (auto& pair : predictions) {
pair.first = std::exp(pair.first);
all_predictions.first[all_predictions.first.size() - 1]
.push_back(pair.first);
all_predictions.second[all_predictions.second.size() - 1]
.push_back(d->getLabel(pair.second));
}
}
return all_predictions;
})
.def(
"getWordId",
[](fasttext::FastText& m, const std::string word) {
Expand Down
Loading

0 comments on commit 256032b

Please sign in to comment.