Обновить main.cpp

This commit is contained in:
2026-04-29 10:54:26 +03:00
parent 55bc00e5dc
commit de1c37a16f
+120 -44
View File
@@ -1,4 +1,5 @@
#include <iostream>
#include <iomanip>
#include <vector>
#include <string>
#include <iomanip>
@@ -7,32 +8,16 @@
#include <algorithm>
#include "Xenith/core.h"
#include "Xenith/token/token.h"
#include <windows.h>
#include <chrono>
// Глобальные настройки
std::string currentSystemPrompt = "я робот";
std::string currentSystemPrompt = "";
void printParameterCount(LayerStructure_t layers[], int numLayers) {
long long totalParams = 0;
for (int i = 0; i < numLayers - 1; i++) {
long long weights = (long long)layers[i].size * layers[i + 1].size;
long long biases = (long long)layers[i + 1].size;
totalParams += (weights + biases);
}
std::cout << "--- Xenith AI (Model Size: ";
if (totalParams >= 1000000000000LL)
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
else if (totalParams >= 1000000000LL)
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
else if (totalParams >= 1000000LL)
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
else if (totalParams >= 1000LL)
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
else
std::cout << totalParams;
std::cout << " parameters) ---" << std::endl;
}
LayerStructure_t layers[] = {
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
{16, SIGMOID},
{MAX_VOCAB, SIGMOID}
};
std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
std::vector<double> netInput;
@@ -58,42 +43,89 @@ void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std
std::cout << "Error: Sequence too short for training." << std::endl;
return;
}
int numLayers = sizeof(layers) / sizeof(layers[0]);
long long totalParams = 0;
for (int i = 0; i < numLayers - 1; i++) {
totalParams += (long long)layers[i].size * layers[i + 1].size + layers[i + 1].size;
}
std::string modelSizeStr;
{
std::stringstream ss;
if (totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << totalParams / 1e12 << "t";
else if (totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << totalParams / 1e9 << "b";
else if (totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << totalParams / 1e6 << "m";
else if (totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << totalParams / 1e3 << "k";
else ss << totalParams;
modelSizeStr = ss.str();
}
std::string sequenceStr = "";
for (int tId : allTokens) {
sequenceStr += "{" + tok.getWord(tId) + " (" + std::to_string(tId) + ")} ";
}
auto startTime = std::chrono::high_resolution_clock::now();
int trainSteps = 0;
double stepsPerSec = 0, maxLoss = 0;
std::cout << "Training logic: Next Token Prediction..." << std::endl;
std::cout << "\033[s\033[999;1H" << "\033[2K" << "\033[1;30m" << "\033[F" << "\r"
<< "DATA: " << (sequenceStr.length() > 100 ? sequenceStr.substr(0, 200) : sequenceStr) << "\033[0m\033[u";
for (int e = 1; e <= epochs; e++) {
double totalLoss = 0;
for (size_t i = 1; i < allTokens.size(); i++) {
std::vector<int> context;
for (size_t j = 0; j < i; j++) context.push_back(allTokens[j]);
std::vector<int> context(allTokens.begin(), allTokens.begin() + i);
std::vector<double> target(MAX_VOCAB, 0.0);
target[allTokens[i]] = 1.0;
totalLoss += nn.train(buildNetInput(context, emb), target, lr);
trainSteps++;
auto currentTime = std::chrono::high_resolution_clock::now();
if (std::chrono::duration<double>(currentTime - startTime).count() >= 1.0) {
stepsPerSec = trainSteps / std::chrono::duration<double>(currentTime - startTime).count();
trainSteps = 0;
startTime = currentTime;
}
std::cout << "\rEpoch " << std::setw(4) << e << "/" << epochs
<< " | Token: " << std::setw(3) << i << "/" << allTokens.size()
<< " | Loss: " << std::fixed << std::setprecision(6) << totalLoss
<< " | Max Loss: " << std::fixed << std::setprecision(6) << maxLoss << " \033[s";
std::cout << "\033[999;1H" << "\r";
std::cout << "SPEED: " << std::setw(6) << std::fixed << std::setprecision(1) << stepsPerSec << " st/s"
<< " | MODEL: " << std::setw(7) << modelSizeStr
<< " | CURRENT: [" << std::left << std::setw(15) << tok.getWord(allTokens[i]) << "] ("
<< std::right << std::setw(4) << allTokens[i] << ") ";
std::cout << "\033[K" << "\033[0m";
std::cout << "\033[997;1H" << "\r" << std::flush << "\033[u";
}
std::cout << "\rEpoch " << e << "/" << epochs << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss << std::flush;
maxLoss = totalLoss;
}
std::cout << "\nDone!" << std::endl;
}
int main() {
SetConsoleOutputCP(CP_UTF8);
SetConsoleCP(CP_UTF8);
Tokenizer tok;
Embedder emb(MAX_VOCAB, EMBED_DIM);
LayerStructure_t layers[] = {
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
{16, SIGMOID},
{MAX_VOCAB, SIGMOID}
};
int numLayers = sizeof(layers) / sizeof(layers[0]);
NeuralNetwork nn(layers, numLayers);
printParameterCount(layers, numLayers);
std::cout << "\n--- MENU ---" << std::endl;
std::cout << "/train\n/trainFile\n/help\n/exit\n";
while (true) {
std::cout << "\nxentith~$ ";
std::cout << "xentith~$ ";
std::string cmdIn;
std::getline(std::cin, cmdIn);
@@ -164,30 +196,74 @@ int main() {
std::getline(std::cin, currentSystemPrompt);
std::cout << "System Prompt updated!" << std::endl;
} else if (cmdIn == "/help") {
}
else if (cmdIn == "/help") {
std::cout << "\n--- MENU ---" << std::endl;
std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
} else if (cmdIn == "/clr") {
std::cout << "\033[2J\033[1;1H";
} else {
std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]";
std::vector<int> currentTokens = tok.textToTokens(prompt);
std::cout << "AI: ";
for (int g = 0; g < 30; g++) {
long long totalParams = 0;
for (int i = 0; i < numLayers - 1; i++) {
long long weights = (long long)layers[i].size * layers[i + 1].size;
long long biases = (long long)layers[i + 1].size;
totalParams += (weights + biases);
}
std::string modelSizeStr;
{
std::stringstream ss;
if (totalParams >= 1000000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
else if (totalParams >= 1000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
else if (totalParams >= 1000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
else if (totalParams >= 1000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
else ss << totalParams;
modelSizeStr = ss.str();
}
// Переменные для замера скорости
auto startTime = std::chrono::high_resolution_clock::now();
int tokensInSecond = 0;
double tokensPerSec = 0;
for (int g = 0; g < 1024; g++) {
std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
int bestId = 0;
for (int i = 0; i < MAX_VOCAB; i++) {
if (out[i] > out[bestId]) bestId = i;
}
if (bestId == 0) break;
tokensInSecond++;
auto currentTime = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = currentTime - startTime;
if (elapsed.count() >= 0.1) {
tokensPerSec = tokensInSecond / elapsed.count();
tokensInSecond = 0;
startTime = currentTime;
}
std::string word = tok.getWord(bestId);
std::cout << word << std::flush;
std::cout << "\033[s" << "\033[999;1H" << "\033[2K"
<< "--- [ID: " << bestId << "] | "
<< "Speed: " << std::fixed << std::setprecision(1) << tokensPerSec*10 << " t/s | "
<< "Model: " << modelSizeStr << " params ---"
<< "\033[u" << std::flush;
currentTokens.push_back(bestId);
}
// Чтобы курсор не остался внизу после генерации
std::cout << std::endl;
}
}