From de1c37a16f88981701446cdcbda42b04cc8b8c3e Mon Sep 17 00:00:00 2001 From: KoDer Date: Wed, 29 Apr 2026 10:54:26 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=B1=D0=BD=D0=BE=D0=B2=D0=B8=D1=82?= =?UTF-8?q?=D1=8C=20main.cpp?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.cpp | 164 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 120 insertions(+), 44 deletions(-) diff --git a/main.cpp b/main.cpp index 04e8e20..d6bf2c5 100644 --- a/main.cpp +++ b/main.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -7,32 +8,16 @@ #include #include "Xenith/core.h" #include "Xenith/token/token.h" +#include +#include -// Глобальные настройки -std::string currentSystemPrompt = "я робот"; +std::string currentSystemPrompt = ""; -void printParameterCount(LayerStructure_t layers[], int numLayers) { - long long totalParams = 0; - for (int i = 0; i < numLayers - 1; i++) { - long long weights = (long long)layers[i].size * layers[i + 1].size; - long long biases = (long long)layers[i + 1].size; - totalParams += (weights + biases); - } - - std::cout << "--- Xenith AI (Model Size: "; - if (totalParams >= 1000000000000LL) - std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t"; - else if (totalParams >= 1000000000LL) - std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b"; - else if (totalParams >= 1000000LL) - std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m"; - else if (totalParams >= 1000LL) - std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k"; - else - std::cout << totalParams; - - std::cout << " parameters) ---" << std::endl; -} +LayerStructure_t layers[] = { + {MAX_CONTEXT * EMBED_DIM, SIGMOID}, + {16, SIGMOID}, + {MAX_VOCAB, SIGMOID} +}; std::vector buildNetInput(const std::vector& tokens, Embedder& emb) { std::vector netInput; @@ -58,42 +43,89 @@ void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std std::cout << "Error: Sequence too short for training." << std::endl; return; } + int numLayers = sizeof(layers) / sizeof(layers[0]); + long long totalParams = 0; + for (int i = 0; i < numLayers - 1; i++) { + totalParams += (long long)layers[i].size * layers[i + 1].size + layers[i + 1].size; + } + + std::string modelSizeStr; + { + std::stringstream ss; + if (totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << totalParams / 1e12 << "t"; + else if (totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << totalParams / 1e9 << "b"; + else if (totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << totalParams / 1e6 << "m"; + else if (totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << totalParams / 1e3 << "k"; + else ss << totalParams; + modelSizeStr = ss.str(); + } + + std::string sequenceStr = ""; + for (int tId : allTokens) { + sequenceStr += "{" + tok.getWord(tId) + " (" + std::to_string(tId) + ")} "; + } + + auto startTime = std::chrono::high_resolution_clock::now(); + int trainSteps = 0; + double stepsPerSec = 0, maxLoss = 0; std::cout << "Training logic: Next Token Prediction..." << std::endl; + + std::cout << "\033[s\033[999;1H" << "\033[2K" << "\033[1;30m" << "\033[F" << "\r" + << "DATA: " << (sequenceStr.length() > 100 ? sequenceStr.substr(0, 200) : sequenceStr) << "\033[0m\033[u"; + for (int e = 1; e <= epochs; e++) { double totalLoss = 0; for (size_t i = 1; i < allTokens.size(); i++) { - std::vector context; - for (size_t j = 0; j < i; j++) context.push_back(allTokens[j]); - + std::vector context(allTokens.begin(), allTokens.begin() + i); std::vector target(MAX_VOCAB, 0.0); target[allTokens[i]] = 1.0; - totalLoss += nn.train(buildNetInput(context, emb), target, lr); + + trainSteps++; + auto currentTime = std::chrono::high_resolution_clock::now(); + if (std::chrono::duration(currentTime - startTime).count() >= 1.0) { + stepsPerSec = trainSteps / std::chrono::duration(currentTime - startTime).count(); + trainSteps = 0; + startTime = currentTime; + } + std::cout << "\rEpoch " << std::setw(4) << e << "/" << epochs + << " | Token: " << std::setw(3) << i << "/" << allTokens.size() + << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss + << " | Max Loss: " << std::fixed << std::setprecision(6) << maxLoss << " \033[s"; + + std::cout << "\033[999;1H" << "\r"; + + std::cout << "SPEED: " << std::setw(6) << std::fixed << std::setprecision(1) << stepsPerSec << " st/s" + << " | MODEL: " << std::setw(7) << modelSizeStr + << " | CURRENT: [" << std::left << std::setw(15) << tok.getWord(allTokens[i]) << "] (" + << std::right << std::setw(4) << allTokens[i] << ") "; + + std::cout << "\033[K" << "\033[0m"; + + std::cout << "\033[997;1H" << "\r" << std::flush << "\033[u"; + } - std::cout << "\rEpoch " << e << "/" << epochs << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss << std::flush; + maxLoss = totalLoss; } std::cout << "\nDone!" << std::endl; } + + int main() { + SetConsoleOutputCP(CP_UTF8); + SetConsoleCP(CP_UTF8); + Tokenizer tok; Embedder emb(MAX_VOCAB, EMBED_DIM); - LayerStructure_t layers[] = { - {MAX_CONTEXT * EMBED_DIM, SIGMOID}, - {16, SIGMOID}, - {MAX_VOCAB, SIGMOID} - }; + int numLayers = sizeof(layers) / sizeof(layers[0]); NeuralNetwork nn(layers, numLayers); - printParameterCount(layers, numLayers); - - std::cout << "\n--- MENU ---" << std::endl; - std::cout << "/train\n/trainFile\n/help\n/exit\n"; while (true) { - std::cout << "\nxentith~$ "; + std::cout << "xentith~$ "; std::string cmdIn; std::getline(std::cin, cmdIn); @@ -164,30 +196,74 @@ int main() { std::getline(std::cin, currentSystemPrompt); std::cout << "System Prompt updated!" << std::endl; - } else if (cmdIn == "/help") { + } + else if (cmdIn == "/help") { std::cout << "\n--- MENU ---" << std::endl; std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n"; + } else if (cmdIn == "/clr") { + + std::cout << "\033[2J\033[1;1H"; + } else { std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]"; std::vector currentTokens = tok.textToTokens(prompt); std::cout << "AI: "; - for (int g = 0; g < 30; g++) { + long long totalParams = 0; + for (int i = 0; i < numLayers - 1; i++) { + long long weights = (long long)layers[i].size * layers[i + 1].size; + long long biases = (long long)layers[i + 1].size; + totalParams += (weights + biases); + } + std::string modelSizeStr; + { + std::stringstream ss; + if (totalParams >= 1000000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t"; + else if (totalParams >= 1000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b"; + else if (totalParams >= 1000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m"; + else if (totalParams >= 1000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k"; + else ss << totalParams; + modelSizeStr = ss.str(); + } + + // Переменные для замера скорости + auto startTime = std::chrono::high_resolution_clock::now(); + int tokensInSecond = 0; + double tokensPerSec = 0; + + for (int g = 0; g < 1024; g++) { std::vector out = nn.feedForward(buildNetInput(currentTokens, emb)); - + int bestId = 0; for (int i = 0; i < MAX_VOCAB; i++) { if (out[i] > out[bestId]) bestId = i; } if (bestId == 0) break; - + + tokensInSecond++; + auto currentTime = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = currentTime - startTime; + + if (elapsed.count() >= 0.1) { + tokensPerSec = tokensInSecond / elapsed.count(); + tokensInSecond = 0; + startTime = currentTime; + } + std::string word = tok.getWord(bestId); std::cout << word << std::flush; - + + std::cout << "\033[s" << "\033[999;1H" << "\033[2K" + << "--- [ID: " << bestId << "] | " + << "Speed: " << std::fixed << std::setprecision(1) << tokensPerSec*10 << " t/s | " + << "Model: " << modelSizeStr << " params ---" + << "\033[u" << std::flush; + currentTokens.push_back(bestId); } + // Чтобы курсор не остался внизу после генерации std::cout << std::endl; } }