Krotos Modules 3
Loading...
Searching...
No Matches
NER.h
Go to the documentation of this file.
1#pragma once
2
3#include <algorithm>
4#include <tuple>
5
6namespace krotos
7{
13 class NER
14 {
15 public:
23 std::unordered_map<String, StringArray> findEntity(String text,
24 const std::unordered_map<String, StringArray>& dictionary,
25 float similarityThreshold = 0.9f);
26
27 private:
34 StringArray ngrams(const StringArray& tokens, int n = 1);
35
40 int levenshteinDistance(const String& str1, const String& str2);
41
46 float stringSimilarity(const String& str1, const String& str2);
47
55 std::tuple<float, String, String> getFuzzySimilarity(String text,
56 const std::unordered_map<String, StringArray>& dictionary,
57 float similarityThreshold);
58
59 // Struct to hold named entity info
60 struct Entity
61 {
62 String name;
63 String category;
64 float score;
67 Entity(String name = String(), String category = String(), float score = 0.0f, int startIndex = 0,
68 int endIndex = 0)
70 {
71 }
72 };
73
79 std::vector<Entity> removeOverlapping(std::vector<Entity> entities);
80 };
81
82} // namespace krotos
NER is a Named Entity Recognition (NER) class designed to identify and extract named entities from un...
Definition NER.h:14
float stringSimilarity(const String &str1, const String &str2)
Compute the string similarity.
Definition NER.cpp:177
int levenshteinDistance(const String &str1, const String &str2)
Compute the Levenshtein distance between strings.
Definition NER.cpp:144
std::unordered_map< String, StringArray > findEntity(String text, const std::unordered_map< String, StringArray > &dictionary, float similarityThreshold=0.9f)
Search text for named entities held in dictionary.
Definition NER.cpp:61
std::tuple< float, String, String > getFuzzySimilarity(String text, const std::unordered_map< String, StringArray > &dictionary, float similarityThreshold)
Search for matching named entities using fuzzy string matching.
Definition NER.cpp:38
std::vector< Entity > removeOverlapping(std::vector< Entity > entities)
Remove overlapping entities (keep longest)
Definition NER.cpp:5
StringArray ngrams(const StringArray &tokens, int n=1)
Compute ngrams for the given StringArray.
Definition NER.cpp:128
Definition AirAbsorptionFilter.cpp:2
Definition NER.h:61
String name
Definition NER.h:62
int endIndex
Definition NER.h:66
Entity(String name=String(), String category=String(), float score=0.0f, int startIndex=0, int endIndex=0)
Definition NER.h:67
float score
Definition NER.h:64
String category
Definition NER.h:63
int startIndex
Definition NER.h:65