196 lines
7.0 KiB
C++
196 lines
7.0 KiB
C++
#include <iostream>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <iomanip>
|
|
#include <sstream>
|
|
#include <fstream>
|
|
#include <algorithm>
|
|
#include "Xenith/core.h"
|
|
#include "Xenith/token/token.h"
|
|
|
|
// Глобальные настройки
|
|
std::string currentSystemPrompt = "я робот";
|
|
|
|
void printParameterCount(LayerStructure_t layers[], int numLayers) {
|
|
long long totalParams = 0;
|
|
for (int i = 0; i < numLayers - 1; i++) {
|
|
long long weights = (long long)layers[i].size * layers[i + 1].size;
|
|
long long biases = (long long)layers[i + 1].size;
|
|
totalParams += (weights + biases);
|
|
}
|
|
|
|
std::cout << "--- Xenith AI (Model Size: ";
|
|
if (totalParams >= 1000000000000LL)
|
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
|
|
else if (totalParams >= 1000000000LL)
|
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
|
|
else if (totalParams >= 1000000LL)
|
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
|
|
else if (totalParams >= 1000LL)
|
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
|
|
else
|
|
std::cout << totalParams;
|
|
|
|
std::cout << " parameters) ---" << std::endl;
|
|
}
|
|
|
|
std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
|
|
std::vector<double> netInput;
|
|
netInput.reserve(MAX_CONTEXT * EMBED_DIM);
|
|
int start = (int)tokens.size() - MAX_CONTEXT;
|
|
if (start < 0) start = 0;
|
|
int count = 0;
|
|
for (int i = start; i < (int)tokens.size(); i++) {
|
|
std::vector<double> v = emb.get(tokens[i]);
|
|
netInput.insert(netInput.end(), v.begin(), v.end());
|
|
count++;
|
|
}
|
|
while (count < MAX_CONTEXT) {
|
|
for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0);
|
|
count++;
|
|
}
|
|
return netInput;
|
|
}
|
|
|
|
void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std::string& dataset, int epochs, double lr) {
|
|
std::vector<int> allTokens = tok.textToTokens(dataset);
|
|
if (allTokens.size() < 2) {
|
|
std::cout << "Error: Sequence too short for training." << std::endl;
|
|
return;
|
|
}
|
|
|
|
std::cout << "Training logic: Next Token Prediction..." << std::endl;
|
|
for (int e = 1; e <= epochs; e++) {
|
|
double totalLoss = 0;
|
|
for (size_t i = 1; i < allTokens.size(); i++) {
|
|
std::vector<int> context;
|
|
for (size_t j = 0; j < i; j++) context.push_back(allTokens[j]);
|
|
|
|
std::vector<double> target(MAX_VOCAB, 0.0);
|
|
target[allTokens[i]] = 1.0;
|
|
|
|
totalLoss += nn.train(buildNetInput(context, emb), target, lr);
|
|
}
|
|
std::cout << "\rEpoch " << e << "/" << epochs << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss << std::flush;
|
|
}
|
|
std::cout << "\nDone!" << std::endl;
|
|
}
|
|
|
|
int main() {
|
|
Tokenizer tok;
|
|
Embedder emb(MAX_VOCAB, EMBED_DIM);
|
|
LayerStructure_t layers[] = {
|
|
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
|
|
{16, SIGMOID},
|
|
{MAX_VOCAB, SIGMOID}
|
|
};
|
|
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
|
|
|
NeuralNetwork nn(layers, numLayers);
|
|
printParameterCount(layers, numLayers);
|
|
|
|
std::cout << "\n--- MENU ---" << std::endl;
|
|
std::cout << "/train\n/trainFile\n/help\n/exit\n";
|
|
|
|
while (true) {
|
|
std::cout << "\nxentith~$ ";
|
|
|
|
std::string cmdIn;
|
|
std::getline(std::cin, cmdIn);
|
|
|
|
if (cmdIn == "/exit") break;
|
|
|
|
if (cmdIn == "/train") {
|
|
int epochs;
|
|
double lr;
|
|
std::cout << "--- Training Setup ---\n";
|
|
std::cout << "Enter number of epochs: ";
|
|
std::string epStr; std::getline(std::cin, epStr);
|
|
epochs = std::stoi(epStr);
|
|
|
|
std::cout << "Enter learning rate (e.g. 0.1): ";
|
|
std::string lrStr; std::getline(std::cin, lrStr);
|
|
lr = std::stod(lrStr);
|
|
|
|
std::cout << "\n--- Example Content ---\n";
|
|
std::cout << "User: ";
|
|
std::string userPart;
|
|
std::getline(std::cin, userPart);
|
|
|
|
std::cout << "AI: ";
|
|
std::string aiPart;
|
|
std::getline(std::cin, aiPart);
|
|
|
|
std::string finalData = "[SYS]" + currentSystemPrompt +
|
|
"[USER]" + userPart +
|
|
"[AI]" + aiPart + "<EOS>";
|
|
|
|
std::cout << "\nTraining logic: Pattern Recognition..." << std::endl;
|
|
trainOnSequence(nn, tok, emb, finalData, epochs, lr);
|
|
}
|
|
|
|
else if (cmdIn == "/trainFile") {
|
|
std::string content;
|
|
std::cout << "Enter filename: ";
|
|
std::string filename;
|
|
std::getline(std::cin, filename);
|
|
std::ifstream file(filename);
|
|
if (file.is_open()) {
|
|
std::stringstream buffer;
|
|
buffer << file.rdbuf();
|
|
content = buffer.str();
|
|
std::cout << "Loaded " << content.length() << " characters from file." << std::endl;
|
|
} else {
|
|
std::cout << "Could not open file!" << std::endl;
|
|
continue;
|
|
}
|
|
|
|
int epochs;
|
|
double lr;
|
|
std::cout << "Enter number of epochs: ";
|
|
std::string epStr; std::getline(std::cin, epStr);
|
|
epochs = std::stoi(epStr);
|
|
|
|
std::cout << "Enter learning rate (e.g. 0.1): ";
|
|
std::string lrStr; std::getline(std::cin, lrStr);
|
|
lr = std::stod(lrStr);
|
|
|
|
std::string finalData = "[SYS]" + currentSystemPrompt + content + "<EOS>";
|
|
trainOnSequence(nn, tok, emb, finalData, epochs, lr);
|
|
|
|
} else if (cmdIn == "/sysPrompt") {
|
|
std::cout << "Current System Prompt: " << currentSystemPrompt << std::endl;
|
|
std::cout << "Enter new System Prompt: ";
|
|
std::getline(std::cin, currentSystemPrompt);
|
|
std::cout << "System Prompt updated!" << std::endl;
|
|
|
|
} else if (cmdIn == "/help") {
|
|
std::cout << "\n--- MENU ---" << std::endl;
|
|
std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
|
|
|
|
} else {
|
|
std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]";
|
|
std::vector<int> currentTokens = tok.textToTokens(prompt);
|
|
|
|
std::cout << "AI: ";
|
|
for (int g = 0; g < 30; g++) {
|
|
std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
|
|
|
|
int bestId = 0;
|
|
for (int i = 0; i < MAX_VOCAB; i++) {
|
|
if (out[i] > out[bestId]) bestId = i;
|
|
}
|
|
|
|
if (bestId == 0) break;
|
|
|
|
std::string word = tok.getWord(bestId);
|
|
std::cout << word << std::flush;
|
|
|
|
currentTokens.push_back(bestId);
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
} |