FirstVersion
This commit is contained in:
@@ -2,262 +2,104 @@
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include "Xenith/core.hpp"
|
||||
#include "Xenith/token/token.hpp"
|
||||
#include <chrono>
|
||||
|
||||
|
||||
|
||||
std::string currentSystemPrompt = "";
|
||||
|
||||
LayerStructure_t layers[] = {
|
||||
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
|
||||
{256, SIGMOID},
|
||||
{1024, SIGMOID},
|
||||
{MAX_VOCAB, SIGMOID}
|
||||
};
|
||||
|
||||
std::string formatTime(double seconds) {
|
||||
if (seconds < 0) seconds = 0;
|
||||
int h = (int)seconds / 3600;
|
||||
int m = ((int)seconds % 3600) / 60;
|
||||
int s = (int)seconds % 60;
|
||||
std::stringstream ss;
|
||||
ss << std::setfill('0') << std::setw(2) << h << ":" << std::setfill('0') << std::setw(2) << m << ":" << std::setfill('0') << std::setw(2) << s;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
|
||||
std::vector<double> netInput;
|
||||
netInput.reserve(MAX_CONTEXT * EMBED_DIM);
|
||||
int start = (int)tokens.size() - MAX_CONTEXT;
|
||||
if (start < 0) start = 0;
|
||||
std::vector<double> netInput; netInput.reserve(MAX_CONTEXT * EMBED_DIM);
|
||||
int start = (int)tokens.size() - MAX_CONTEXT; if (start < 0) start = 0;
|
||||
int count = 0;
|
||||
for (int i = start; i < (int)tokens.size(); i++) {
|
||||
std::vector<double> v = emb.get(tokens[i]);
|
||||
netInput.insert(netInput.end(), v.begin(), v.end());
|
||||
count++;
|
||||
}
|
||||
while (count < MAX_CONTEXT) {
|
||||
for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0);
|
||||
count++;
|
||||
netInput.insert(netInput.end(), v.begin(), v.end()); count++;
|
||||
}
|
||||
while (count < MAX_CONTEXT) { for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0); count++; }
|
||||
return netInput;
|
||||
}
|
||||
|
||||
void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std::string& dataset, int epochs, double lr) {
|
||||
std::vector<int> allTokens = tok.textToTokens(dataset);
|
||||
if (allTokens.size() < 2) {
|
||||
std::cout << "Error: Sequence too short for training." << std::endl;
|
||||
return;
|
||||
}
|
||||
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
||||
long long totalParams = 0;
|
||||
for (int i = 0; i < numLayers - 1; i++) {
|
||||
totalParams += (long long)layers[i].size * layers[i + 1].size + layers[i + 1].size;
|
||||
}
|
||||
|
||||
std::string modelSizeStr;
|
||||
{
|
||||
std::stringstream ss;
|
||||
if (totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << totalParams / 1e12 << "t";
|
||||
else if (totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << totalParams / 1e9 << "b";
|
||||
else if (totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << totalParams / 1e6 << "m";
|
||||
else if (totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << totalParams / 1e3 << "k";
|
||||
else ss << totalParams;
|
||||
modelSizeStr = ss.str();
|
||||
}
|
||||
|
||||
std::string sequenceStr = "";
|
||||
for (int tId : allTokens) {
|
||||
sequenceStr += "{" + tok.getWord(tId) + " (" + std::to_string(tId) + ")} ";
|
||||
}
|
||||
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
int trainSteps = 0;
|
||||
double stepsPerSec = 0, maxLoss = 0;
|
||||
|
||||
std::cout << "Training logic: Next Token Prediction..." << std::endl;
|
||||
|
||||
std::cout << "\033[s\n\n";
|
||||
|
||||
for (int e = 1; e <= epochs; e++) {
|
||||
double totalLoss = 0;
|
||||
for (size_t i = 1; i < allTokens.size(); i++) {
|
||||
std::vector<int> context(allTokens.begin(), allTokens.begin() + i);
|
||||
std::vector<double> target(MAX_VOCAB, 0.0);
|
||||
target[allTokens[i]] = 1.0;
|
||||
totalLoss += nn.trainVulkan(buildNetInput(context, emb), target, lr);
|
||||
|
||||
trainSteps++;
|
||||
auto currentTime = std::chrono::high_resolution_clock::now();
|
||||
if (std::chrono::duration<double>(currentTime - startTime).count() >= 0.1) {
|
||||
stepsPerSec = trainSteps / std::chrono::duration<double>(currentTime - startTime).count();
|
||||
trainSteps = 0;
|
||||
startTime = currentTime;
|
||||
}
|
||||
std::cout << "\033[u";
|
||||
|
||||
std::cout << "Epoch " << std::setw(4) << e << "/" << epochs
|
||||
<< " | Token: " << std::setw(4) << i << "/" << allTokens.size()
|
||||
<< " | Loss: " << std::fixed << std::setprecision(6) << totalLoss
|
||||
<< " | Max Loss: " << maxLoss << "\033[K\n";
|
||||
|
||||
std::cout << "SPEED: " << std::setw(6) << std::fixed << std::setprecision(1) << stepsPerSec
|
||||
<< " st/s | MODEL: " << std::setw(7) << modelSizeStr
|
||||
<< " | CURRENT: [" << std::left << std::setw(15) << tok.getWord(allTokens[i]) << "]"
|
||||
<< "\033[K" << std::flush;
|
||||
|
||||
}
|
||||
maxLoss = totalLoss;
|
||||
}
|
||||
std::cout << "\nDone!" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main() {
|
||||
Tokenizer tok;
|
||||
Embedder emb(MAX_VOCAB, EMBED_DIM);
|
||||
|
||||
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
||||
|
||||
NeuralNetwork nn(layers, numLayers, true);
|
||||
Tokenizer tok; Embedder emb(MAX_VOCAB, EMBED_DIM);
|
||||
NeuralNetwork nn(layers, sizeof(layers)/sizeof(layers[0]), true);
|
||||
|
||||
while (true) {
|
||||
std::cout << "xentith~$ ";
|
||||
|
||||
std::string cmdIn;
|
||||
std::getline(std::cin, cmdIn);
|
||||
|
||||
std::cout << "\033[1;32mxenith\033[0m~$ ";
|
||||
std::string cmdIn; std::getline(std::cin, cmdIn);
|
||||
if (cmdIn == "/exit") break;
|
||||
|
||||
if (cmdIn == "/train") {
|
||||
int epochs;
|
||||
double lr;
|
||||
std::cout << "--- Training Setup ---\n";
|
||||
std::cout << "Enter number of epochs: ";
|
||||
std::string epStr; std::getline(std::cin, epStr);
|
||||
epochs = std::stoi(epStr);
|
||||
|
||||
std::cout << "Enter learning rate (e.g. 0.1): ";
|
||||
std::string lrStr; std::getline(std::cin, lrStr);
|
||||
lr = std::stod(lrStr);
|
||||
|
||||
std::cout << "\n--- Example Content ---\n";
|
||||
std::cout << "User: ";
|
||||
std::string userPart;
|
||||
std::getline(std::cin, userPart);
|
||||
|
||||
std::cout << "AI: ";
|
||||
std::string aiPart;
|
||||
std::getline(std::cin, aiPart);
|
||||
|
||||
std::string finalData = "[USER]" + userPart +
|
||||
"[AI]" + aiPart + "<EOS>";
|
||||
|
||||
std::cout << "\nTraining logic: Pattern Recognition..." << std::endl;
|
||||
trainOnSequence(nn, tok, emb, finalData, epochs, lr);
|
||||
}
|
||||
|
||||
else if (cmdIn == "/trainFile") {
|
||||
if (cmdIn == "/train" || cmdIn == "/trainFile") {
|
||||
std::string content;
|
||||
std::cout << "Enter filename: ";
|
||||
std::string filename;
|
||||
std::getline(std::cin, filename);
|
||||
std::ifstream file(filename);
|
||||
if (file.is_open()) {
|
||||
std::stringstream buffer;
|
||||
buffer << file.rdbuf();
|
||||
content = buffer.str();
|
||||
std::cout << "Loaded " << content.length() << " characters from file." << std::endl;
|
||||
if (cmdIn == "/trainFile") {
|
||||
std::cout << "Filename: "; std::string fn; std::getline(std::cin, fn);
|
||||
std::ifstream f(fn); std::stringstream ss; ss << f.rdbuf(); content = ss.str();
|
||||
} else {
|
||||
std::cout << "Could not open file!" << std::endl;
|
||||
continue;
|
||||
std::cout << "User: "; std::string u; std::getline(std::cin, u);
|
||||
std::cout << "AI: "; std::string a; std::getline(std::cin, a);
|
||||
content = "[CLR][USER]" + u + "[AI]" + a + "<EOS>";
|
||||
}
|
||||
|
||||
int epochs;
|
||||
double lr;
|
||||
std::cout << "Enter number of epochs: ";
|
||||
std::string epStr; std::getline(std::cin, epStr);
|
||||
epochs = std::stoi(epStr);
|
||||
|
||||
std::cout << "Enter learning rate (e.g. 0.1): ";
|
||||
std::string lrStr; std::getline(std::cin, lrStr);
|
||||
lr = std::stod(lrStr);
|
||||
|
||||
std::string finalData = "[SYS]" + currentSystemPrompt + content + "<EOS>";
|
||||
trainOnSequence(nn, tok, emb, finalData, epochs, lr);
|
||||
|
||||
} else if (cmdIn == "/sysPrompt") {
|
||||
std::cout << "Current System Prompt: " << currentSystemPrompt << std::endl;
|
||||
std::cout << "Enter new System Prompt: ";
|
||||
std::getline(std::cin, currentSystemPrompt);
|
||||
std::cout << "System Prompt updated!" << std::endl;
|
||||
|
||||
} else if (cmdIn == "/help") {
|
||||
std::cout << "\n--- MENU ---" << std::endl;
|
||||
std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
|
||||
|
||||
} else if (cmdIn == "/clr") {
|
||||
|
||||
std::cout << "\033[2J\033[1;1H";
|
||||
|
||||
std::cout << "Epochs: "; std::string ep; std::getline(std::cin, ep);
|
||||
std::cout << "LR: "; std::string lr; std::getline(std::cin, lr);
|
||||
std::cout << "\n\033[s";
|
||||
nn.trainOnSequence(tok, emb, content, std::stoi(ep), std::stod(lr), buildNetInput, [](const TrainStatus& s) {
|
||||
std::stringstream ss;
|
||||
if (s.totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e12 << "t";
|
||||
else if (s.totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e9 << "b";
|
||||
else if (s.totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e6 << "m";
|
||||
else if (s.totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e3 << "k";
|
||||
else ss << s.totalParams;
|
||||
std::cout << "\033[u";
|
||||
int width = 100;
|
||||
int pos = width * (s.percentage / 100.0f);
|
||||
std::cout << "[\033[1;36m";
|
||||
for(int i=0; i<width; i++) std::cout << (i < pos ? "■" : " ");
|
||||
std::cout << "\033[0m] " << std::fixed << std::setprecision(1) << s.percentage << "% | ETA: \033[1;33m" << formatTime(s.eta) << "\033[0m | Params: \033[1;32m" << ss.str() << "\033[0m\n";
|
||||
|
||||
std::cout << "Epoch: " << s.currentEpoch << "/" << s.totalEpochs
|
||||
<< " | Token: " << s.currentToken << "/" << s.totalTokens << "\n";
|
||||
std::cout << "Loss: " << std::fixed << std::setprecision(6) << s.currentLoss
|
||||
<< " | Ep Loss: " << s.epochLoss << "\n";
|
||||
std::cout << "Prev Ep Loss: " << s.lastEpochLoss << "\n";
|
||||
std::cout << "Speed: " << std::fixed << std::setprecision(1) << s.speed << " t/s\033[K" << std::flush;
|
||||
}
|
||||
);
|
||||
std::cout << "\n\nDone.\n";
|
||||
} else {
|
||||
std::string prompt = "[USER]" + cmdIn + "[AI]";
|
||||
std::vector<int> currentTokens = tok.textToTokens(prompt);
|
||||
|
||||
std::cout << "AI: ";
|
||||
long long totalParams = 0;
|
||||
for (int i = 0; i < numLayers - 1; i++) {
|
||||
long long weights = (long long)layers[i].size * layers[i + 1].size;
|
||||
long long biases = (long long)layers[i + 1].size;
|
||||
totalParams += (weights + biases);
|
||||
std::vector<int> ctx = tok.textToTokens(prompt);
|
||||
int eosId = -1; auto s = tok.textToTokens("<EOS>"); if(!s.empty()) eosId = s[0];
|
||||
std::cout << "\033[1;33mAI:\033[0m ";
|
||||
for (int g = 0; g < 256; g++) {
|
||||
std::vector<double> out = nn.feedForward(buildNetInput(ctx, emb));
|
||||
int bId = 0; double mV = -1.0;
|
||||
for (int i = 0; i < MAX_VOCAB; i++) if (out[i] > mV) { mV = out[i]; bId = i; }
|
||||
if (bId == eosId || bId == 0) break;
|
||||
std::string w = tok.getWord(bId);
|
||||
if (w != "[AI]" && w != "[USER]" && w != "[CLR]") std::cout << w << std::flush;
|
||||
ctx.push_back(bId); if (ctx.size() > MAX_CONTEXT) ctx.erase(ctx.begin());
|
||||
}
|
||||
std::string modelSizeStr;
|
||||
{
|
||||
std::stringstream ss;
|
||||
if (totalParams >= 1000000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
|
||||
else if (totalParams >= 1000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
|
||||
else if (totalParams >= 1000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
|
||||
else if (totalParams >= 1000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
|
||||
else ss << totalParams;
|
||||
modelSizeStr = ss.str();
|
||||
}
|
||||
|
||||
// Переменные для замера скорости
|
||||
auto startTime = std::chrono::high_resolution_clock::now();
|
||||
int tokensInSecond = 0;
|
||||
double tokensPerSec = 0;
|
||||
|
||||
for (int g = 0; g < 1024; g++) {
|
||||
std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
|
||||
|
||||
int bestId = 0;
|
||||
for (int i = 0; i < MAX_VOCAB; i++) {
|
||||
if (out[i] > out[bestId]) bestId = i;
|
||||
}
|
||||
|
||||
if (bestId == 0) break;
|
||||
|
||||
tokensInSecond++;
|
||||
auto currentTime = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> elapsed = currentTime - startTime;
|
||||
|
||||
if (elapsed.count() >= 0.1) {
|
||||
tokensPerSec = tokensInSecond / elapsed.count();
|
||||
tokensInSecond = 0;
|
||||
startTime = currentTime;
|
||||
}
|
||||
|
||||
std::string word = tok.getWord(bestId);
|
||||
std::cout << word << std::flush;
|
||||
|
||||
std::cout << "\033[s" << "\033[999;1H" << "\033[2K"
|
||||
<< "--- [ID: " << bestId << "] | "
|
||||
<< "Speed: " << std::fixed << std::setprecision(1) << tokensPerSec*10 << " t/s | "
|
||||
<< "Model: " << modelSizeStr << " params ---"
|
||||
<< "\033[u" << std::flush;
|
||||
|
||||
currentTokens.push_back(bestId);
|
||||
}
|
||||
// Чтобы курсор не остался внизу после генерации
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user