FirstVersion

2026-05-03 21:02:34 +07:00
parent 05875d8aa2
commit 0d6b240099
12 changed files with 632 additions and 566 deletions
@@ -2,262 +2,104 @@
 #include <iomanip>
 #include <vector>
 #include <string>
-#include <iomanip>
 #include <sstream>
 #include <fstream>
-#include <algorithm>
+#include <chrono>
 #include "Xenith/core.hpp"
 #include "Xenith/token/token.hpp"
-#include <chrono>
-


 std::string currentSystemPrompt = ""; 

 LayerStructure_t layers[] = {
    {MAX_CONTEXT * EMBED_DIM, SIGMOID},
-    {256, SIGMOID},
+    {1024, SIGMOID},
    {MAX_VOCAB, SIGMOID}
 };

+std::string formatTime(double seconds) {
+    if (seconds < 0) seconds = 0;
+    int h = (int)seconds / 3600;
+    int m = ((int)seconds % 3600) / 60;
+    int s = (int)seconds % 60;
+    std::stringstream ss;
+    ss << std::setfill('0') << std::setw(2) << h << ":" << std::setfill('0') << std::setw(2) << m << ":" << std::setfill('0') << std::setw(2) << s;
+    return ss.str();
+}
+
 std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
-    std::vector<double> netInput;
-    netInput.reserve(MAX_CONTEXT * EMBED_DIM);
-    int start = (int)tokens.size() - MAX_CONTEXT;
-    if (start < 0) start = 0;
+    std::vector<double> netInput; netInput.reserve(MAX_CONTEXT * EMBED_DIM);
+    int start = (int)tokens.size() - MAX_CONTEXT; if (start < 0) start = 0;
    int count = 0;
    for (int i = start; i < (int)tokens.size(); i++) {
        std::vector<double> v = emb.get(tokens[i]);
-        netInput.insert(netInput.end(), v.begin(), v.end());
-        count++;
-    }
-    while (count < MAX_CONTEXT) {
-        for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0);
-        count++;
+        netInput.insert(netInput.end(), v.begin(), v.end()); count++;
    }
+    while (count < MAX_CONTEXT) { for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0); count++; }
    return netInput;
 }

-void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std::string& dataset, int epochs, double lr) {
-    std::vector<int> allTokens = tok.textToTokens(dataset);
-    if (allTokens.size() < 2) {
-        std::cout << "Error: Sequence too short for training." << std::endl;
-        return;
-    }
-    int numLayers = sizeof(layers) / sizeof(layers[0]);
-    long long totalParams = 0;
-    for (int i = 0; i < numLayers - 1; i++) {
-        totalParams += (long long)layers[i].size * layers[i + 1].size + layers[i + 1].size;
-    }
-
-    std::string modelSizeStr;
-    {
-        std::stringstream ss;
-        if (totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << totalParams / 1e12 << "t";
-        else if (totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << totalParams / 1e9 << "b";
-        else if (totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << totalParams / 1e6 << "m";
-        else if (totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << totalParams / 1e3 << "k";
-        else ss << totalParams;
-        modelSizeStr = ss.str();
-    }
-
-    std::string sequenceStr = "";
-    for (int tId : allTokens) {
-        sequenceStr += "{" + tok.getWord(tId) + " (" + std::to_string(tId) + ")} ";
-    }
-
-    auto startTime = std::chrono::high_resolution_clock::now();
-    int trainSteps = 0;
-    double stepsPerSec = 0, maxLoss = 0;
-
-    std::cout << "Training logic: Next Token Prediction..." << std::endl;
-
-    std::cout << "\033[s\n\n"; 
-
-    for (int e = 1; e <= epochs; e++) {
-        double totalLoss = 0;
-        for (size_t i = 1; i < allTokens.size(); i++) {
-            std::vector<int> context(allTokens.begin(), allTokens.begin() + i);
-            std::vector<double> target(MAX_VOCAB, 0.0);
-            target[allTokens[i]] = 1.0;
-            totalLoss += nn.trainVulkan(buildNetInput(context, emb), target, lr);
-
-            trainSteps++;
-            auto currentTime = std::chrono::high_resolution_clock::now();
-            if (std::chrono::duration<double>(currentTime - startTime).count() >= 0.1) {
-                stepsPerSec = trainSteps / std::chrono::duration<double>(currentTime - startTime).count();
-                trainSteps = 0;
-                startTime = currentTime;
-            }
-            std::cout << "\033[u"; 
-
-            std::cout << "Epoch " << std::setw(4) << e << "/" << epochs 
-                    << " | Token: " << std::setw(4) << i << "/" << allTokens.size() 
-                    << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss 
-                    << " | Max Loss: " << maxLoss << "\033[K\n"; 
-
-            std::cout << "SPEED: " << std::setw(6) << std::fixed << std::setprecision(1) << stepsPerSec 
-                    << " st/s | MODEL: " << std::setw(7) << modelSizeStr 
-                    << " | CURRENT: [" << std::left << std::setw(15) << tok.getWord(allTokens[i]) << "]" 
-                    << "\033[K" << std::flush;
-
-        }
-        maxLoss = totalLoss;
-    }
-    std::cout << "\nDone!" << std::endl;
-}
-
-
-
 int main() {
-    Tokenizer tok;
-    Embedder emb(MAX_VOCAB, EMBED_DIM);
-    
-    int numLayers = sizeof(layers) / sizeof(layers[0]);
-
-    NeuralNetwork nn(layers, numLayers, true);
+    Tokenizer tok; Embedder emb(MAX_VOCAB, EMBED_DIM);
+    NeuralNetwork nn(layers, sizeof(layers)/sizeof(layers[0]), true);

    while (true) {
-        std::cout << "xentith~$ ";
-
-        std::string cmdIn;
-        std::getline(std::cin, cmdIn);
-
+        std::cout << "\033[1;32mxenith\033[0m~$ ";
+        std::string cmdIn; std::getline(std::cin, cmdIn);
        if (cmdIn == "/exit") break;

-        if (cmdIn == "/train") {
-            int epochs;
-            double lr;
-            std::cout << "--- Training Setup ---\n";
-            std::cout << "Enter number of epochs: ";
-            std::string epStr; std::getline(std::cin, epStr);
-            epochs = std::stoi(epStr);
-            
-            std::cout << "Enter learning rate (e.g. 0.1): ";
-            std::string lrStr; std::getline(std::cin, lrStr);
-            lr = std::stod(lrStr);
-
-            std::cout << "\n--- Example Content ---\n";
-            std::cout << "User: ";
-            std::string userPart;
-            std::getline(std::cin, userPart);
-
-            std::cout << "AI: ";
-            std::string aiPart;
-            std::getline(std::cin, aiPart);
-
-            std::string finalData = "[USER]" + userPart + 
-                                    "[AI]" + aiPart + "<EOS>";
-
-            std::cout << "\nTraining logic: Pattern Recognition..." << std::endl;
-            trainOnSequence(nn, tok, emb, finalData, epochs, lr);
-        }
-
-        else if (cmdIn == "/trainFile") {
+        if (cmdIn == "/train" || cmdIn == "/trainFile") {
            std::string content;
-            std::cout << "Enter filename: ";
-            std::string filename;
-            std::getline(std::cin, filename);
-            std::ifstream file(filename);
-            if (file.is_open()) {
-                std::stringstream buffer;
-                buffer << file.rdbuf();
-                content = buffer.str();
-                std::cout << "Loaded " << content.length() << " characters from file." << std::endl;
+            if (cmdIn == "/trainFile") {
+                std::cout << "Filename: "; std::string fn; std::getline(std::cin, fn);
+                std::ifstream f(fn); std::stringstream ss; ss << f.rdbuf(); content = ss.str();
            } else {
-                std::cout << "Could not open file!" << std::endl;
-                continue;
+                std::cout << "User: "; std::string u; std::getline(std::cin, u);
+                std::cout << "AI: "; std::string a; std::getline(std::cin, a);
+                content = "[CLR][USER]" + u + "[AI]" + a + "<EOS>";
            }
-
-            int epochs;
-            double lr;
-            std::cout << "Enter number of epochs: ";
-            std::string epStr; std::getline(std::cin, epStr);
-            epochs = std::stoi(epStr);
-            
-            std::cout << "Enter learning rate (e.g. 0.1): ";
-            std::string lrStr; std::getline(std::cin, lrStr);
-            lr = std::stod(lrStr);
-
-            std::string finalData = "[SYS]" + currentSystemPrompt + content + "<EOS>";
-            trainOnSequence(nn, tok, emb, finalData, epochs, lr);
-
-        } else if (cmdIn == "/sysPrompt") {
-            std::cout << "Current System Prompt: " << currentSystemPrompt << std::endl;
-            std::cout << "Enter new System Prompt: ";
-            std::getline(std::cin, currentSystemPrompt);
-            std::cout << "System Prompt updated!" << std::endl;
-
-        } else if (cmdIn == "/help") {
-            std::cout << "\n--- MENU ---" << std::endl;
-            std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
-
-        } else if (cmdIn == "/clr") {
-
-            std::cout << "\033[2J\033[1;1H";
-
+            std::cout << "Epochs: "; std::string ep; std::getline(std::cin, ep);
+            std::cout << "LR: "; std::string lr; std::getline(std::cin, lr);
+            std::cout << "\n\033[s";
+            nn.trainOnSequence(tok, emb, content, std::stoi(ep), std::stod(lr), buildNetInput, [](const TrainStatus& s) {
+                    std::stringstream ss;
+                    if (s.totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e12 << "t";
+                    else if (s.totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e9 << "b";
+                    else if (s.totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e6 << "m";
+                    else if (s.totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << s.totalParams / 1e3 << "k";
+                    else ss << s.totalParams;
+                    std::cout << "\033[u";
+                    int width = 100;
+                    int pos = width * (s.percentage / 100.0f);
+                    std::cout << "[\033[1;36m";
+                    for(int i=0; i<width; i++) std::cout << (i < pos ? "■" : " ");
+                    std::cout << "\033[0m] " << std::fixed << std::setprecision(1) << s.percentage << "% | ETA: \033[1;33m" << formatTime(s.eta) << "\033[0m | Params: \033[1;32m" << ss.str() << "\033[0m\n";
+                    
+                    std::cout << "Epoch: " << s.currentEpoch << "/" << s.totalEpochs 
+                              << " | Token: " << s.currentToken << "/" << s.totalTokens << "\n";
+                    std::cout << "Loss: " << std::fixed << std::setprecision(6) << s.currentLoss 
+                              << " | Ep Loss: " << s.epochLoss << "\n";
+                    std::cout << "Prev Ep Loss: " << s.lastEpochLoss << "\n";
+                    std::cout << "Speed: " << std::fixed << std::setprecision(1) << s.speed << " t/s\033[K" << std::flush;
+                }
+            );
+            std::cout << "\n\nDone.\n";
        } else {
            std::string prompt = "[USER]" + cmdIn + "[AI]";
-            std::vector<int> currentTokens = tok.textToTokens(prompt);
-
-            std::cout << "AI: ";
-            long long totalParams = 0;
-            for (int i = 0; i < numLayers - 1; i++) {
-                long long weights = (long long)layers[i].size * layers[i + 1].size;
-                long long biases = (long long)layers[i + 1].size;
-                totalParams += (weights + biases);
+            std::vector<int> ctx = tok.textToTokens(prompt);
+            int eosId = -1; auto s = tok.textToTokens("<EOS>"); if(!s.empty()) eosId = s[0];
+            std::cout << "\033[1;33mAI:\033[0m ";
+            for (int g = 0; g < 256; g++) {
+                std::vector<double> out = nn.feedForward(buildNetInput(ctx, emb));
+                int bId = 0; double mV = -1.0;
+                for (int i = 0; i < MAX_VOCAB; i++) if (out[i] > mV) { mV = out[i]; bId = i; }
+                if (bId == eosId || bId == 0) break;
+                std::string w = tok.getWord(bId);
+                if (w != "[AI]" && w != "[USER]" && w != "[CLR]") std::cout << w << std::flush;
+                ctx.push_back(bId); if (ctx.size() > MAX_CONTEXT) ctx.erase(ctx.begin());
            }
-            std::string modelSizeStr;
-            {
-                std::stringstream ss;
-                if (totalParams >= 1000000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
-                else if (totalParams >= 1000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
-                else if (totalParams >= 1000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
-                else if (totalParams >= 1000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
-                else ss << totalParams;
-                modelSizeStr = ss.str();
-            }
-
-            // Переменные для замера скорости
-            auto startTime = std::chrono::high_resolution_clock::now();
-            int tokensInSecond = 0;
-            double tokensPerSec = 0;
-
-            for (int g = 0; g < 1024; g++) {
-                std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
-
-                int bestId = 0;
-                for (int i = 0; i < MAX_VOCAB; i++) {
-                    if (out[i] > out[bestId]) bestId = i;
-                }
-
-                if (bestId == 0) break;
-
-                tokensInSecond++;
-                auto currentTime = std::chrono::high_resolution_clock::now();
-                std::chrono::duration<double> elapsed = currentTime - startTime;
-
-                if (elapsed.count() >= 0.1) {
-                    tokensPerSec = tokensInSecond / elapsed.count();
-                    tokensInSecond = 0;
-                    startTime = currentTime;
-                }
-
-                std::string word = tok.getWord(bestId);
-                std::cout << word << std::flush;
-
-                std::cout << "\033[s" << "\033[999;1H" << "\033[2K"
-                    << "--- [ID: " << bestId << "] | "
-                    << "Speed: " << std::fixed << std::setprecision(1) << tokensPerSec*10 << " t/s | "
-                    << "Model: " << modelSizeStr << " params ---"
-                    << "\033[u" << std::flush;
-
-                currentTokens.push_back(bestId);
-            }
-            // Чтобы курсор не остался внизу после генерации
            std::cout << std::endl;
        }
    }
-
    return 0;
 }