edit chatbot and core

This commit is contained in:
2026-04-29 02:05:59 +07:00
parent 224a6444ef
commit 0a7974260d
9 changed files with 326 additions and 105 deletions
+46
View File
@@ -0,0 +1,46 @@
#include "token.h"
#include <algorithm>
#include <random>
void Tokenizer::add(std::string word) {
int id = wordToId.size();
wordToId[word] = id;
idToWord[id] = word;
}
std::string Tokenizer::getWord(int id) {
return idToWord.count(id) ? idToWord[id] : "";
}
std::vector<int> Tokenizer::textToTokens(const std::string& text) {
std::vector<int> tokens;
size_t pos = 0;
while (pos < text.length()) {
int longestId = -1; size_t longestLen = 0;
for (auto const& [word, id] : wordToId) {
if (text.compare(pos, word.length(), word) == 0) {
if (word.length() > longestLen) {
longestLen = word.length(); longestId = id;
}
}
}
if (longestId != -1) {
tokens.push_back(longestId);
pos += longestLen;
} else pos++;
}
return tokens;
}
Embedder::Embedder(int vSize, int dim) {
std::mt19937 gen(42);
std::uniform_real_distribution<double> dist(-1.0, 1.0);
matrix.resize(vSize, std::vector<double>(dim));
for(int i=0; i<vSize; i++)
for(int j=0; j<dim; j++) matrix[i][j] = dist(gen);
}
std::vector<double> Embedder::get(int id) {
if (id >= 0 && id < (int)matrix.size()) return matrix[id];
return std::vector<double>(matrix[0].size(), 0.0);
}