diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 0040b88..4257891 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -9,9 +9,11 @@ "-g", "${fileDirname}/main.cpp", "${fileDirname}/Xenith/core.cpp", + "${fileDirname}/Xenith/token/token.cpp", "-o", "${fileDirname}/main", - "-I", "${fileDirname}/Xenith" + "-I", "${fileDirname}/Xenith", + "-I", "${fileDirname}/Xenith/token" ], "options": { "cwd": "${fileDirname}" diff --git a/Xenith/core.cpp b/Xenith/core.cpp index 29203c0..ed1837e 100644 --- a/Xenith/core.cpp +++ b/Xenith/core.cpp @@ -1,82 +1,63 @@ #include "core.h" +#include +#include -NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) { - numLayers = count; - for (int i = 0; i < count; i++) { - layerSizes.push_back(layers[i].size); - } - - // Инициализация весов случайными числами +NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) : numLayers(count) { + for (int i = 0; i < count; i++) sizes.push_back(layers[i].size); for (int i = 0; i < count - 1; i++) { - std::vector> layerWeights; - for (int j = 0; j < layerSizes[i+1]; j++) { - std::vector nodeWeights; - for (int k = 0; k < layerSizes[i]; k++) { - nodeWeights.push_back(((double)rand() / RAND_MAX) * 2 - 1); - } - layerWeights.push_back(nodeWeights); + std::vector> layerW; + double scale = sqrt(2.0 / sizes[i]); + for (int j = 0; j < sizes[i+1]; j++) { + std::vector nodeW; + for (int k = 0; k < sizes[i]; k++) + nodeW.push_back(((double)rand()/RAND_MAX * 2 - 1) * scale); + layerW.push_back(nodeW); } - weights.push_back(layerWeights); - - std::vector layerBiases; - for (int j = 0; j < layerSizes[i+1]; j++) { - layerBiases.push_back(((double)rand() / RAND_MAX) * 2 - 1); - } - biases.push_back(layerBiases); + weights.push_back(layerW); + biases.push_back(std::vector(sizes[i+1], 0.0)); } } -std::vector NeuralNetwork::feedForward(std::vector input) { +std::vector NeuralNetwork::feedForward(const std::vector& input) { outputs.clear(); outputs.push_back(input); - - std::vector current = input; + std::vector curr = input; for (int i = 0; i < numLayers - 1; i++) { std::vector next; - for (int j = 0; j < layerSizes[i+1]; j++) { + for (int j = 0; j < sizes[i+1]; j++) { double sum = biases[i][j]; - for (int k = 0; k < layerSizes[i]; k++) { - sum += current[k] * weights[i][j][k]; - } - next.push_back(sigmoid(sum)); + for (int k = 0; k < (int)curr.size(); k++) sum += curr[k] * weights[i][j][k]; + next.push_back(1.0 / (1.0 + exp(-sum))); } - current = next; - outputs.push_back(current); + curr = next; + outputs.push_back(curr); } - return current; + return curr; } -void NeuralNetwork::train(std::vector input, std::vector target, double lr) { - // 1. Прямой проход - feedForward(input); - - // 2. Вычисление ошибок для выходного слоя +double NeuralNetwork::train(const std::vector& input, const std::vector& target, double lr) { + std::vector pred = feedForward(input); std::vector> errors(numLayers); - errors[numLayers - 1].resize(layerSizes[numLayers - 1]); - for (int i = 0; i < layerSizes[numLayers - 1]; i++) { - double output = outputs[numLayers - 1][i]; - errors[numLayers - 1][i] = (target[i] - output) * sigmoidDerivative(output); + errors[numLayers-1].resize(sizes[numLayers-1]); + double totalErr = 0; + for (int i = 0; i < sizes[numLayers-1]; i++) { + double e = target[i] - pred[i]; + errors[numLayers-1][i] = e * pred[i] * (1.0 - pred[i]); + totalErr += e * e; } - - // 3. Обратное распространение ошибки на скрытые слои for (int i = numLayers - 2; i > 0; i--) { - errors[i].resize(layerSizes[i]); - for (int j = 0; j < layerSizes[i]; j++) { - double error = 0.0; - for (int k = 0; k < layerSizes[i+1]; k++) { - error += errors[i+1][k] * weights[i][k][j]; - } - errors[i][j] = error * sigmoidDerivative(outputs[i][j]); + errors[i].resize(sizes[i]); + for (int j = 0; j < sizes[i]; j++) { + double e = 0; + for (int k = 0; k < sizes[i+1]; k++) e += errors[i+1][k] * weights[i][k][j]; + errors[i][j] = e * outputs[i][j] * (1.0 - outputs[i][j]); } } - - // 4. Обновление весов и смещений for (int i = 0; i < numLayers - 1; i++) { - for (int j = 0; j < layerSizes[i+1]; j++) { - for (int k = 0; k < layerSizes[i]; k++) { - weights[i][j][k] += lr * errors[i+1][j] * outputs[i][k]; - } + for (int j = 0; j < sizes[i+1]; j++) { + for (int k = 0; k < sizes[i]; k++) weights[i][j][k] += lr * errors[i+1][j] * outputs[i][k]; biases[i][j] += lr * errors[i+1][j]; } } + return totalErr; } \ No newline at end of file diff --git a/Xenith/core.h b/Xenith/core.h index 038f768..dc79cd6 100644 --- a/Xenith/core.h +++ b/Xenith/core.h @@ -4,25 +4,22 @@ #include "typedef.h" #include #include -#include -#include class NeuralNetwork { private: int numLayers; - std::vector layerSizes; - std::vector>> weights; // weights[layer][to_node][from_node] - std::vector> biases; // biases[layer][node] - std::vector> outputs; // Храним выходы слоев для backprop + std::vector sizes; + std::vector>> weights; + std::vector> biases; + std::vector> outputs; double sigmoid(double x) { return 1.0 / (1.0 + exp(-x)); } - double sigmoidDerivative(double x) { return x * (1.0 - x); } + double sigmoidDeriv(double x) { return x * (1.0 - x); } public: NeuralNetwork(LayerStructure_t layers[], int count); - - std::vector feedForward(std::vector input); - void train(std::vector input, std::vector target, double learningRate); + std::vector feedForward(const std::vector& input); + double train(const std::vector& input, const std::vector& target, double lr); }; #endif \ No newline at end of file diff --git a/Xenith/token/token.cpp b/Xenith/token/token.cpp new file mode 100644 index 0000000..fa24bd7 --- /dev/null +++ b/Xenith/token/token.cpp @@ -0,0 +1,46 @@ +#include "token.h" +#include +#include + +void Tokenizer::add(std::string word) { + int id = wordToId.size(); + wordToId[word] = id; + idToWord[id] = word; +} + +std::string Tokenizer::getWord(int id) { + return idToWord.count(id) ? idToWord[id] : ""; +} + +std::vector Tokenizer::textToTokens(const std::string& text) { + std::vector tokens; + size_t pos = 0; + while (pos < text.length()) { + int longestId = -1; size_t longestLen = 0; + for (auto const& [word, id] : wordToId) { + if (text.compare(pos, word.length(), word) == 0) { + if (word.length() > longestLen) { + longestLen = word.length(); longestId = id; + } + } + } + if (longestId != -1) { + tokens.push_back(longestId); + pos += longestLen; + } else pos++; + } + return tokens; +} + +Embedder::Embedder(int vSize, int dim) { + std::mt19937 gen(42); + std::uniform_real_distribution dist(-1.0, 1.0); + matrix.resize(vSize, std::vector(dim)); + for(int i=0; i Embedder::get(int id) { + if (id >= 0 && id < (int)matrix.size()) return matrix[id]; + return std::vector(matrix[0].size(), 0.0); +} \ No newline at end of file diff --git a/Xenith/token/token.h b/Xenith/token/token.h new file mode 100644 index 0000000..095bb55 --- /dev/null +++ b/Xenith/token/token.h @@ -0,0 +1,42 @@ +#ifndef TOKEN_H +#define TOKEN_H + +#include +#include +#include + +class Tokenizer { +public: + std::map wordToId; + std::map idToWord; + + Tokenizer() { + add(""); // 0 + add("[SYS]"); // 1 + add("[USER]"); // 2 + add("[AI]"); // 3 + add(" "); // 4 + add("\n"); // 5 + add("привет"); // 6 + add("как"); // 7 + add("дела"); // 8 + add("?"); // 9 + add("я"); // 10 + add("робот"); // 11 + add("хорошо"); // 12 + } + + void add(std::string word); + int getID(std::string word); + std::string getWord(int id); + std::vector textToTokens(const std::string& text); +}; + +class Embedder { +public: + std::vector> matrix; + Embedder(int vSize, int dim); + std::vector get(int id); +}; + +#endif \ No newline at end of file diff --git a/Xenith/typedef.h b/Xenith/typedef.h index 31fdc29..348e38c 100644 --- a/Xenith/typedef.h +++ b/Xenith/typedef.h @@ -1,15 +1,11 @@ #ifndef TYPEDEF_H #define TYPEDEF_H -#include +const int MAX_CONTEXT = 4; // Сколько токенов видит сеть +const int EMBED_DIM = 4; // Размер вектора одного токена +const int MAX_VOCAB = 13; // Размер словаря -typedef enum { - SIGMOID -} FunctionActivate_t; - -typedef struct { - int size; - FunctionActivate_t activate; -} LayerStructure_t; +typedef enum { SIGMOID } FunctionActivate_t; +typedef struct { int size; FunctionActivate_t activate; } LayerStructure_t; #endif \ No newline at end of file diff --git a/dataset.txt b/dataset.txt new file mode 100644 index 0000000..38b9c75 --- /dev/null +++ b/dataset.txt @@ -0,0 +1 @@ +[USER]привет[AI]привет как дела? \ No newline at end of file diff --git a/main b/main index cbf00c0..a7ad854 100755 Binary files a/main and b/main differ diff --git a/main.cpp b/main.cpp index c77f4e6..04e8e20 100644 --- a/main.cpp +++ b/main.cpp @@ -1,39 +1,195 @@ #include #include -#include +#include +#include +#include +#include +#include #include "Xenith/core.h" -#include "Xenith/typedef.h" +#include "Xenith/token/token.h" +// Глобальные настройки +std::string currentSystemPrompt = "я робот"; -int main() { - srand(time(NULL)); - - LayerStructure_t layers[] = { - {2, SIGMOID}, // Вход: 2 числа - {3, SIGMOID}, // Скрытый слой - {1, SIGMOID} // Выход: 1 число - }; - - NeuralNetwork nn(layers, 3); - - // Данные для обучения - std::vector> inputs = {{1, 1}, {1, 0}, {0, 0}, {0, 1}}; - std::vector> targets = {{0}, {1}, {1}, {0}}; - - // Цикл обучения - std::cout << "Training..." << std::endl; - for (int epoch = 0; epoch < 20000; epoch++) { - for (int i = 0; i < inputs.size(); i++) { - nn.train(inputs[i], targets[i], 0.5); - } +void printParameterCount(LayerStructure_t layers[], int numLayers) { + long long totalParams = 0; + for (int i = 0; i < numLayers - 1; i++) { + long long weights = (long long)layers[i].size * layers[i + 1].size; + long long biases = (long long)layers[i + 1].size; + totalParams += (weights + biases); } - // Проверка результатов - std::cout << "Results:" << std::endl; - for (int i = 0; i < inputs.size(); i++) { - std::vector res = nn.feedForward(inputs[i]); - std::cout << inputs[i][0] << " " << inputs[i][1] << " -> " - << (res[0] > 0.5 ? 1 : 0) << " (raw: " << res[0] << ")" << std::endl; + std::cout << "--- Xenith AI (Model Size: "; + if (totalParams >= 1000000000000LL) + std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t"; + else if (totalParams >= 1000000000LL) + std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b"; + else if (totalParams >= 1000000LL) + std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m"; + else if (totalParams >= 1000LL) + std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k"; + else + std::cout << totalParams; + + std::cout << " parameters) ---" << std::endl; +} + +std::vector buildNetInput(const std::vector& tokens, Embedder& emb) { + std::vector netInput; + netInput.reserve(MAX_CONTEXT * EMBED_DIM); + int start = (int)tokens.size() - MAX_CONTEXT; + if (start < 0) start = 0; + int count = 0; + for (int i = start; i < (int)tokens.size(); i++) { + std::vector v = emb.get(tokens[i]); + netInput.insert(netInput.end(), v.begin(), v.end()); + count++; + } + while (count < MAX_CONTEXT) { + for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0); + count++; + } + return netInput; +} + +void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std::string& dataset, int epochs, double lr) { + std::vector allTokens = tok.textToTokens(dataset); + if (allTokens.size() < 2) { + std::cout << "Error: Sequence too short for training." << std::endl; + return; + } + + std::cout << "Training logic: Next Token Prediction..." << std::endl; + for (int e = 1; e <= epochs; e++) { + double totalLoss = 0; + for (size_t i = 1; i < allTokens.size(); i++) { + std::vector context; + for (size_t j = 0; j < i; j++) context.push_back(allTokens[j]); + + std::vector target(MAX_VOCAB, 0.0); + target[allTokens[i]] = 1.0; + + totalLoss += nn.train(buildNetInput(context, emb), target, lr); + } + std::cout << "\rEpoch " << e << "/" << epochs << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss << std::flush; + } + std::cout << "\nDone!" << std::endl; +} + +int main() { + Tokenizer tok; + Embedder emb(MAX_VOCAB, EMBED_DIM); + LayerStructure_t layers[] = { + {MAX_CONTEXT * EMBED_DIM, SIGMOID}, + {16, SIGMOID}, + {MAX_VOCAB, SIGMOID} + }; + int numLayers = sizeof(layers) / sizeof(layers[0]); + + NeuralNetwork nn(layers, numLayers); + printParameterCount(layers, numLayers); + + std::cout << "\n--- MENU ---" << std::endl; + std::cout << "/train\n/trainFile\n/help\n/exit\n"; + + while (true) { + std::cout << "\nxentith~$ "; + + std::string cmdIn; + std::getline(std::cin, cmdIn); + + if (cmdIn == "/exit") break; + + if (cmdIn == "/train") { + int epochs; + double lr; + std::cout << "--- Training Setup ---\n"; + std::cout << "Enter number of epochs: "; + std::string epStr; std::getline(std::cin, epStr); + epochs = std::stoi(epStr); + + std::cout << "Enter learning rate (e.g. 0.1): "; + std::string lrStr; std::getline(std::cin, lrStr); + lr = std::stod(lrStr); + + std::cout << "\n--- Example Content ---\n"; + std::cout << "User: "; + std::string userPart; + std::getline(std::cin, userPart); + + std::cout << "AI: "; + std::string aiPart; + std::getline(std::cin, aiPart); + + std::string finalData = "[SYS]" + currentSystemPrompt + + "[USER]" + userPart + + "[AI]" + aiPart + ""; + + std::cout << "\nTraining logic: Pattern Recognition..." << std::endl; + trainOnSequence(nn, tok, emb, finalData, epochs, lr); + } + + else if (cmdIn == "/trainFile") { + std::string content; + std::cout << "Enter filename: "; + std::string filename; + std::getline(std::cin, filename); + std::ifstream file(filename); + if (file.is_open()) { + std::stringstream buffer; + buffer << file.rdbuf(); + content = buffer.str(); + std::cout << "Loaded " << content.length() << " characters from file." << std::endl; + } else { + std::cout << "Could not open file!" << std::endl; + continue; + } + + int epochs; + double lr; + std::cout << "Enter number of epochs: "; + std::string epStr; std::getline(std::cin, epStr); + epochs = std::stoi(epStr); + + std::cout << "Enter learning rate (e.g. 0.1): "; + std::string lrStr; std::getline(std::cin, lrStr); + lr = std::stod(lrStr); + + std::string finalData = "[SYS]" + currentSystemPrompt + content + ""; + trainOnSequence(nn, tok, emb, finalData, epochs, lr); + + } else if (cmdIn == "/sysPrompt") { + std::cout << "Current System Prompt: " << currentSystemPrompt << std::endl; + std::cout << "Enter new System Prompt: "; + std::getline(std::cin, currentSystemPrompt); + std::cout << "System Prompt updated!" << std::endl; + + } else if (cmdIn == "/help") { + std::cout << "\n--- MENU ---" << std::endl; + std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n"; + + } else { + std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]"; + std::vector currentTokens = tok.textToTokens(prompt); + + std::cout << "AI: "; + for (int g = 0; g < 30; g++) { + std::vector out = nn.feedForward(buildNetInput(currentTokens, emb)); + + int bestId = 0; + for (int i = 0; i < MAX_VOCAB; i++) { + if (out[i] > out[bestId]) bestId = i; + } + + if (bestId == 0) break; + + std::string word = tok.getWord(bestId); + std::cout << word << std::flush; + + currentTokens.push_back(bestId); + } + std::cout << std::endl; + } } return 0;