-
Notifications
You must be signed in to change notification settings - Fork 414
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Tokens in Swift and Kotlin #227
Changes from 15 commits
240c04e
313ffbd
171c9f2
a0e0793
3b39860
92b0432
d1dfa87
25b708c
24aa98d
3d7c6f5
28d43bc
2b9c69e
48038a7
0a8d370
ccc3ba4
c313653
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -128,15 +128,59 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( | |||||
const auto &text = result.text; | ||||||
|
||||||
auto r = new SherpaOnnxOnlineRecognizerResult; | ||||||
// copy text | ||||||
r->text = new char[text.size() + 1]; | ||||||
std::copy(text.begin(), text.end(), const_cast<char *>(r->text)); | ||||||
const_cast<char *>(r->text)[text.size()] = 0; | ||||||
|
||||||
// copy json | ||||||
const auto &json = result.AsJsonString(); | ||||||
r->json = new char[json.size() + 1]; | ||||||
std::copy(json.begin(), json.end(), const_cast<char *>(r->json)); | ||||||
const_cast<char *>(r->json)[json.size()] = 0; | ||||||
|
||||||
// copy tokens | ||||||
auto count = result.tokens.size(); | ||||||
if (count > 0) { | ||||||
size_t total_length = 0; | ||||||
for (const auto& token : result.tokens) { | ||||||
// +1 for the null character at the end of each token | ||||||
total_length += token.size() + 1; | ||||||
} | ||||||
|
||||||
r->count = count; | ||||||
// Each word ends with nullptr | ||||||
r->tokens = new char[total_length]; | ||||||
memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0, | ||||||
total_length); | ||||||
r->timestamps = new float[r->count]; | ||||||
char **tokens_temp = new char*[r->count]; | ||||||
int pos = 0; | ||||||
for (int32_t i = 0; i < r->count; ++i) { | ||||||
tokens_temp[i] = const_cast<char*>(r->tokens) + pos; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reason for this casting
|
||||||
memcpy(reinterpret_cast<void *>(const_cast<char *>(r->tokens + pos)), | ||||||
result.tokens[i].c_str(), result.tokens[i].size()); | ||||||
// +1 to move past the null character | ||||||
pos += result.tokens[i].size() + 1; | ||||||
r->timestamps[i] = result.timestamps[i]; | ||||||
} | ||||||
|
||||||
r->tokens_arr = tokens_temp; | ||||||
} else { | ||||||
r->count = 0; | ||||||
r->timestamps = nullptr; | ||||||
r->tokens = nullptr; | ||||||
r->tokens_arr = nullptr; | ||||||
} | ||||||
|
||||||
return r; | ||||||
} | ||||||
|
||||||
void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r) { | ||||||
delete[] r->text; | ||||||
delete[] r->json; | ||||||
delete[] r->tokens; | ||||||
delete[] r->tokens_arr; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please also delete delete[] r->timestamps; |
||||||
delete r; | ||||||
} | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -101,8 +101,36 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { | |
} SherpaOnnxOnlineRecognizerConfig; | ||
|
||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { | ||
// Recognized text | ||
const char *text; | ||
// TODO(fangjun): Add more fields | ||
|
||
// Pointer to continuous memory which holds string based tokens | ||
// which are seperated by \0 | ||
const char *tokens; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest that we define
where And the new In this way, it simplifies users' life as they only need to iterate
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @csukuangfj I added |
||
|
||
// a pointer array contains the address of the first item in tokens | ||
const char *const *tokens_arr; | ||
|
||
// Pointer to continuous memory which holds timestamps which | ||
// are seperated by \0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please update the comment. It is not separated by \0 for timestamps. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
float *timestamps; | ||
|
||
// The number of tokens/timestamps in above pointer | ||
int32_t count; | ||
|
||
/** Return a json string. | ||
* | ||
* The returned string contains: | ||
* { | ||
* "text": "The recognition result", | ||
* "tokens": [x, x, x], | ||
* "timestamps": [x, x, x], | ||
* "segment": x, | ||
* "start_time": x, | ||
* "is_final": true|false | ||
* } | ||
*/ | ||
const char *json; | ||
} SherpaOnnxOnlineRecognizerResult; | ||
|
||
/// Note: OnlineRecognizer here means StreamingRecognizer. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.