update console design
This commit is contained in:
+67
-11
@@ -2,6 +2,19 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
|
||||||
|
#define USE_PARALLEL // Çàêîììåíòèðóéòå, ÷òîáû îòêëþ÷èòü ïàðàëëåëèçì
|
||||||
|
#define MAX_THREADS 0 // Óêàæèòå ÷èñëî ÿäåð (0 — èñïîëüçîâàòü âñå äîñòóïíûå)
|
||||||
|
|
||||||
|
#ifdef USE_PARALLEL
|
||||||
|
#include <omp.h>
|
||||||
|
#define OMP_PARALLEL _Pragma("omp parallel for")
|
||||||
|
#define OMP_SET_THREADS() { if (MAX_THREADS > 0) omp_set_num_threads(MAX_THREADS); }
|
||||||
|
#else
|
||||||
|
#define OMP_PARALLEL
|
||||||
|
#define OMP_SET_THREADS()
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) : numLayers(count) {
|
NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) : numLayers(count) {
|
||||||
for (int i = 0; i < count; i++) sizes.push_back(layers[i].size);
|
for (int i = 0; i < count; i++) sizes.push_back(layers[i].size);
|
||||||
for (int i = 0; i < count - 1; i++) {
|
for (int i = 0; i < count - 1; i++) {
|
||||||
@@ -19,45 +32,88 @@ NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) : numLayers(c
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<double> NeuralNetwork::feedForward(const std::vector<double>& input) {
|
std::vector<double> NeuralNetwork::feedForward(const std::vector<double>& input) {
|
||||||
|
OMP_SET_THREADS(); // Ïðèìåíÿåì ëèìèò ÿäåð
|
||||||
|
|
||||||
outputs.clear();
|
outputs.clear();
|
||||||
outputs.push_back(input);
|
outputs.push_back(input);
|
||||||
|
|
||||||
std::vector<double> curr = input;
|
std::vector<double> curr = input;
|
||||||
|
|
||||||
for (int i = 0; i < numLayers - 1; i++) {
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
std::vector<double> next;
|
// Çàðàíåå ãîòîâèì âåêòîð íóæíîãî ðàçìåðà äëÿ òåêóùåãî ñëîÿ
|
||||||
for (int j = 0; j < sizes[i+1]; j++) {
|
std::vector<double> next(sizes[i + 1]);
|
||||||
|
|
||||||
|
// Ðàñïàðàëëåëèâàåì ðàñ÷åò êàæäîãî íåéðîíà â ñëîå
|
||||||
|
OMP_PARALLEL
|
||||||
|
for (int j = 0; j < sizes[i + 1]; j++) {
|
||||||
double sum = biases[i][j];
|
double sum = biases[i][j];
|
||||||
for (int k = 0; k < (int)curr.size(); k++) sum += curr[k] * weights[i][j][k];
|
|
||||||
next.push_back(1.0 / (1.0 + exp(-sum)));
|
// Âíóòðåííèé öèêë îáû÷íî îñòàâëÿåì ïîñëåäîâàòåëüíûì,
|
||||||
|
// òàê êàê ñîçäàíèå ïîòîêîâ çäåñü äàñò áîëüøå òîðìîçîâ, ÷åì ïîëüçû
|
||||||
|
for (int k = 0; k < (int)curr.size(); k++) {
|
||||||
|
sum += curr[k] * weights[i][j][k];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ñèãìîèäà. Òåïåðü ïèøåì ïî èíäåêñó j — ýòî áåçîïàñíî äëÿ ïîòîêîâ
|
||||||
|
next[j] = 1.0 / (1.0 + exp(-sum));
|
||||||
|
}
|
||||||
|
|
||||||
curr = next;
|
curr = next;
|
||||||
outputs.push_back(curr);
|
outputs.push_back(curr);
|
||||||
}
|
}
|
||||||
return curr;
|
return curr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double NeuralNetwork::train(const std::vector<double>& input, const std::vector<double>& target, double lr) {
|
double NeuralNetwork::train(const std::vector<double>& input, const std::vector<double>& target, double lr) {
|
||||||
|
// 0. Óñòàíàâëèâàåì êîëè÷åñòâî ïîòîêîâ (åñëè âêëþ÷åíî â define)
|
||||||
|
OMP_SET_THREADS();
|
||||||
|
|
||||||
|
// 1. Ïðÿìîé ïðîõîä (ïîëó÷àåì ïðåäñêàçàíèå)
|
||||||
std::vector<double> pred = feedForward(input);
|
std::vector<double> pred = feedForward(input);
|
||||||
|
|
||||||
std::vector<std::vector<double>> errors(numLayers);
|
std::vector<std::vector<double>> errors(numLayers);
|
||||||
errors[numLayers-1].resize(sizes[numLayers-1]);
|
errors[numLayers - 1].resize(sizes[numLayers - 1]);
|
||||||
|
|
||||||
double totalErr = 0;
|
double totalErr = 0;
|
||||||
for (int i = 0; i < sizes[numLayers-1]; i++) {
|
|
||||||
|
// 2. Âû÷èñëåíèå îøèáêè íà âûõîäíîì ñëîå
|
||||||
|
for (int i = 0; i < sizes[numLayers - 1]; i++) {
|
||||||
double e = target[i] - pred[i];
|
double e = target[i] - pred[i];
|
||||||
errors[numLayers-1][i] = e * pred[i] * (1.0 - pred[i]);
|
// Ïðîèçâîäíàÿ ôóíêöèè àêòèâàöèè (ñèãìîèäû): pred * (1 - pred)
|
||||||
|
errors[numLayers - 1][i] = e * pred[i] * (1.0 - pred[i]);
|
||||||
totalErr += e * e;
|
totalErr += e * e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 3. Îáðàòíîå ðàñïðîñòðàíåíèå îøèáêè (Backpropagation) ïî ñêðûòûì ñëîÿì
|
||||||
for (int i = numLayers - 2; i > 0; i--) {
|
for (int i = numLayers - 2; i > 0; i--) {
|
||||||
errors[i].resize(sizes[i]);
|
errors[i].resize(sizes[i]);
|
||||||
|
|
||||||
|
// Ïàðàëëåëèì âû÷èñëåíèÿ äëÿ êàæäîãî íåéðîíà â ñëîå
|
||||||
|
OMP_PARALLEL
|
||||||
for (int j = 0; j < sizes[i]; j++) {
|
for (int j = 0; j < sizes[i]; j++) {
|
||||||
double e = 0;
|
double e = 0;
|
||||||
for (int k = 0; k < sizes[i+1]; k++) e += errors[i+1][k] * weights[i][k][j];
|
for (int k = 0; k < sizes[i + 1]; k++) {
|
||||||
|
e += errors[i + 1][k] * weights[i][k][j];
|
||||||
|
}
|
||||||
|
// outputs[i][j] — ýòî ñîõðàíåííûé ðåçóëüòàò àêòèâàöèè ýòîãî íåéðîíà èç feedForward
|
||||||
errors[i][j] = e * outputs[i][j] * (1.0 - outputs[i][j]);
|
errors[i][j] = e * outputs[i][j] * (1.0 - outputs[i][j]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 4. Îáíîâëåíèå âåñîâ è ñìåùåíèé (Biases)
|
||||||
for (int i = 0; i < numLayers - 1; i++) {
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
for (int j = 0; j < sizes[i+1]; j++) {
|
|
||||||
for (int k = 0; k < sizes[i]; k++) weights[i][j][k] += lr * errors[i+1][j] * outputs[i][k];
|
// Ïàðàëëåëèì îáíîâëåíèå âåñîâ ñëåäóþùåãî ñëîÿ
|
||||||
biases[i][j] += lr * errors[i+1][j];
|
OMP_PARALLEL
|
||||||
|
for (int j = 0; j < sizes[i + 1]; j++) {
|
||||||
|
for (int k = 0; k < sizes[i]; k++) {
|
||||||
|
// Ãðàäèåíòíûé ñïóñê: ïðèáàâëÿåì (lr * îøèáêà * âõîäíîé ñèãíàë)
|
||||||
|
weights[i][j][k] += lr * errors[i + 1][j] * outputs[i][k];
|
||||||
|
}
|
||||||
|
biases[i][j] += lr * errors[i + 1][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return totalErr;
|
return totalErr;
|
||||||
}
|
}
|
||||||
+14
-13
@@ -11,21 +11,22 @@ public:
|
|||||||
std::map<int, std::string> idToWord;
|
std::map<int, std::string> idToWord;
|
||||||
|
|
||||||
Tokenizer() {
|
Tokenizer() {
|
||||||
add("<EOS>"); // 0
|
add("<EOS>"); add("[SYS]"); add("[USER]"); add("[AI]"); add(" "); add("\n");
|
||||||
add("[SYS]"); // 1
|
add("."); add(","); add("!"); add("?"); add(":"); add(";"); add("-"); add("\""); add("("); add(")");
|
||||||
add("[USER]"); // 2
|
add("а"); add("б"); add("в"); add("г"); add("д"); add("е"); add("ё"); add("ж");
|
||||||
add("[AI]"); // 3
|
add("з"); add("и"); add("й"); add("к"); add("л"); add("м"); add("н"); add("о");
|
||||||
add(" "); // 4
|
add("п"); add("р"); add("с"); add("т"); add("у"); add("ф"); add("х"); add("ц");
|
||||||
add("\n"); // 5
|
add("ч"); add("ш"); add("щ"); add("ъ"); add("ы"); add("ь"); add("э"); add("ю"); add("я");
|
||||||
add("привет"); // 6
|
add("и"); add("в"); add("не"); add("на"); add("я"); add("что"); add("тот"); add("быть");
|
||||||
add("как"); // 7
|
add("с"); add("а"); add("весь"); add("это"); add("как"); add("она"); add("по"); add("но");
|
||||||
add("дела"); // 8
|
add("они"); add("к"); add("у"); add("ты"); add("из"); add("мы"); add("за"); add("вы");
|
||||||
add("?"); // 9
|
add("привет"); add("дела"); add("робот"); add("хорошо"); add("спасибо");
|
||||||
add("я"); // 10
|
add("да"); add("нет"); add("могу"); add("помочь"); add("знаю"); add("кто");
|
||||||
add("робот"); // 11
|
add("где"); add("когда"); add("почему"); add("хочу"); add("очень");
|
||||||
add("хорошо"); // 12
|
add("тебя"); add("зовут"); add("BiPy");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void add(std::string word);
|
void add(std::string word);
|
||||||
int getID(std::string word);
|
int getID(std::string word);
|
||||||
std::string getWord(int id);
|
std::string getWord(int id);
|
||||||
|
|||||||
+2
-2
@@ -1,9 +1,9 @@
|
|||||||
#ifndef TYPEDEF_H
|
#ifndef TYPEDEF_H
|
||||||
#define TYPEDEF_H
|
#define TYPEDEF_H
|
||||||
|
|
||||||
const int MAX_CONTEXT = 4; // Сколько токенов видит сеть
|
const int MAX_CONTEXT = 8; // Сколько токенов видит сеть
|
||||||
const int EMBED_DIM = 4; // Размер вектора одного токена
|
const int EMBED_DIM = 4; // Размер вектора одного токена
|
||||||
const int MAX_VOCAB = 13; // Размер словаря
|
const int MAX_VOCAB = 90; // Размер словаря
|
||||||
|
|
||||||
typedef enum { SIGMOID } FunctionActivate_t;
|
typedef enum { SIGMOID } FunctionActivate_t;
|
||||||
typedef struct { int size; FunctionActivate_t activate; } LayerStructure_t;
|
typedef struct { int size; FunctionActivate_t activate; } LayerStructure_t;
|
||||||
|
|||||||
+1
-1
@@ -1 +1 @@
|
|||||||
[USER]привет[AI]привет как дела?<EOS>
|
[SYS]ты полезный робот помощник[USER]привет[AI]привет как дела?
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <iomanip>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
@@ -7,32 +8,16 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "Xenith/core.h"
|
#include "Xenith/core.h"
|
||||||
#include "Xenith/token/token.h"
|
#include "Xenith/token/token.h"
|
||||||
|
#include <windows.h>
|
||||||
|
#include <chrono>
|
||||||
|
|
||||||
// Глобальные настройки
|
std::string currentSystemPrompt = "";
|
||||||
std::string currentSystemPrompt = "я робот";
|
|
||||||
|
|
||||||
void printParameterCount(LayerStructure_t layers[], int numLayers) {
|
LayerStructure_t layers[] = {
|
||||||
long long totalParams = 0;
|
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
|
||||||
for (int i = 0; i < numLayers - 1; i++) {
|
{16, SIGMOID},
|
||||||
long long weights = (long long)layers[i].size * layers[i + 1].size;
|
{MAX_VOCAB, SIGMOID}
|
||||||
long long biases = (long long)layers[i + 1].size;
|
};
|
||||||
totalParams += (weights + biases);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "--- Xenith AI (Model Size: ";
|
|
||||||
if (totalParams >= 1000000000000LL)
|
|
||||||
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
|
|
||||||
else if (totalParams >= 1000000000LL)
|
|
||||||
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
|
|
||||||
else if (totalParams >= 1000000LL)
|
|
||||||
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
|
|
||||||
else if (totalParams >= 1000LL)
|
|
||||||
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
|
|
||||||
else
|
|
||||||
std::cout << totalParams;
|
|
||||||
|
|
||||||
std::cout << " parameters) ---" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
|
std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
|
||||||
std::vector<double> netInput;
|
std::vector<double> netInput;
|
||||||
@@ -58,42 +43,89 @@ void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std
|
|||||||
std::cout << "Error: Sequence too short for training." << std::endl;
|
std::cout << "Error: Sequence too short for training." << std::endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
||||||
|
long long totalParams = 0;
|
||||||
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
|
totalParams += (long long)layers[i].size * layers[i + 1].size + layers[i + 1].size;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string modelSizeStr;
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
if (totalParams >= 1e12) ss << std::fixed << std::setprecision(1) << totalParams / 1e12 << "t";
|
||||||
|
else if (totalParams >= 1e9) ss << std::fixed << std::setprecision(1) << totalParams / 1e9 << "b";
|
||||||
|
else if (totalParams >= 1e6) ss << std::fixed << std::setprecision(1) << totalParams / 1e6 << "m";
|
||||||
|
else if (totalParams >= 1e3) ss << std::fixed << std::setprecision(1) << totalParams / 1e3 << "k";
|
||||||
|
else ss << totalParams;
|
||||||
|
modelSizeStr = ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string sequenceStr = "";
|
||||||
|
for (int tId : allTokens) {
|
||||||
|
sequenceStr += "{" + tok.getWord(tId) + " (" + std::to_string(tId) + ")} ";
|
||||||
|
}
|
||||||
|
|
||||||
|
auto startTime = std::chrono::high_resolution_clock::now();
|
||||||
|
int trainSteps = 0;
|
||||||
|
double stepsPerSec = 0, maxLoss = 0;
|
||||||
|
|
||||||
std::cout << "Training logic: Next Token Prediction..." << std::endl;
|
std::cout << "Training logic: Next Token Prediction..." << std::endl;
|
||||||
|
|
||||||
|
std::cout << "\033[s\033[999;1H" << "\033[2K" << "\033[1;30m" << "\033[F" << "\r"
|
||||||
|
<< "DATA: " << (sequenceStr.length() > 100 ? sequenceStr.substr(0, 200) : sequenceStr) << "\033[0m\033[u";
|
||||||
|
|
||||||
for (int e = 1; e <= epochs; e++) {
|
for (int e = 1; e <= epochs; e++) {
|
||||||
double totalLoss = 0;
|
double totalLoss = 0;
|
||||||
for (size_t i = 1; i < allTokens.size(); i++) {
|
for (size_t i = 1; i < allTokens.size(); i++) {
|
||||||
std::vector<int> context;
|
std::vector<int> context(allTokens.begin(), allTokens.begin() + i);
|
||||||
for (size_t j = 0; j < i; j++) context.push_back(allTokens[j]);
|
|
||||||
|
|
||||||
std::vector<double> target(MAX_VOCAB, 0.0);
|
std::vector<double> target(MAX_VOCAB, 0.0);
|
||||||
target[allTokens[i]] = 1.0;
|
target[allTokens[i]] = 1.0;
|
||||||
|
|
||||||
totalLoss += nn.train(buildNetInput(context, emb), target, lr);
|
totalLoss += nn.train(buildNetInput(context, emb), target, lr);
|
||||||
|
|
||||||
|
trainSteps++;
|
||||||
|
auto currentTime = std::chrono::high_resolution_clock::now();
|
||||||
|
if (std::chrono::duration<double>(currentTime - startTime).count() >= 1.0) {
|
||||||
|
stepsPerSec = trainSteps / std::chrono::duration<double>(currentTime - startTime).count();
|
||||||
|
trainSteps = 0;
|
||||||
|
startTime = currentTime;
|
||||||
}
|
}
|
||||||
std::cout << "\rEpoch " << e << "/" << epochs << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss << std::flush;
|
std::cout << "\rEpoch " << std::setw(4) << e << "/" << epochs
|
||||||
|
<< " | Token: " << std::setw(3) << i << "/" << allTokens.size()
|
||||||
|
<< " | Loss: " << std::fixed << std::setprecision(6) << totalLoss
|
||||||
|
<< " | Max Loss: " << std::fixed << std::setprecision(6) << maxLoss << " \033[s";
|
||||||
|
|
||||||
|
std::cout << "\033[999;1H" << "\r";
|
||||||
|
|
||||||
|
std::cout << "SPEED: " << std::setw(6) << std::fixed << std::setprecision(1) << stepsPerSec << " st/s"
|
||||||
|
<< " | MODEL: " << std::setw(7) << modelSizeStr
|
||||||
|
<< " | CURRENT: [" << std::left << std::setw(15) << tok.getWord(allTokens[i]) << "] ("
|
||||||
|
<< std::right << std::setw(4) << allTokens[i] << ") ";
|
||||||
|
|
||||||
|
std::cout << "\033[K" << "\033[0m";
|
||||||
|
|
||||||
|
std::cout << "\033[997;1H" << "\r" << std::flush << "\033[u";
|
||||||
|
|
||||||
|
}
|
||||||
|
maxLoss = totalLoss;
|
||||||
}
|
}
|
||||||
std::cout << "\nDone!" << std::endl;
|
std::cout << "\nDone!" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
SetConsoleOutputCP(CP_UTF8);
|
||||||
|
SetConsoleCP(CP_UTF8);
|
||||||
|
|
||||||
Tokenizer tok;
|
Tokenizer tok;
|
||||||
Embedder emb(MAX_VOCAB, EMBED_DIM);
|
Embedder emb(MAX_VOCAB, EMBED_DIM);
|
||||||
LayerStructure_t layers[] = {
|
|
||||||
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
|
|
||||||
{16, SIGMOID},
|
|
||||||
{MAX_VOCAB, SIGMOID}
|
|
||||||
};
|
|
||||||
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
||||||
|
|
||||||
NeuralNetwork nn(layers, numLayers);
|
NeuralNetwork nn(layers, numLayers);
|
||||||
printParameterCount(layers, numLayers);
|
|
||||||
|
|
||||||
std::cout << "\n--- MENU ---" << std::endl;
|
|
||||||
std::cout << "/train\n/trainFile\n/help\n/exit\n";
|
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
std::cout << "\nxentith~$ ";
|
std::cout << "xentith~$ ";
|
||||||
|
|
||||||
std::string cmdIn;
|
std::string cmdIn;
|
||||||
std::getline(std::cin, cmdIn);
|
std::getline(std::cin, cmdIn);
|
||||||
@@ -164,16 +196,43 @@ int main() {
|
|||||||
std::getline(std::cin, currentSystemPrompt);
|
std::getline(std::cin, currentSystemPrompt);
|
||||||
std::cout << "System Prompt updated!" << std::endl;
|
std::cout << "System Prompt updated!" << std::endl;
|
||||||
|
|
||||||
} else if (cmdIn == "/help") {
|
}
|
||||||
|
else if (cmdIn == "/help") {
|
||||||
std::cout << "\n--- MENU ---" << std::endl;
|
std::cout << "\n--- MENU ---" << std::endl;
|
||||||
std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
|
std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
|
||||||
|
|
||||||
|
} else if (cmdIn == "/clr") {
|
||||||
|
|
||||||
|
std::cout << "\033[2J\033[1;1H";
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]";
|
std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]";
|
||||||
std::vector<int> currentTokens = tok.textToTokens(prompt);
|
std::vector<int> currentTokens = tok.textToTokens(prompt);
|
||||||
|
|
||||||
std::cout << "AI: ";
|
std::cout << "AI: ";
|
||||||
for (int g = 0; g < 30; g++) {
|
long long totalParams = 0;
|
||||||
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
|
long long weights = (long long)layers[i].size * layers[i + 1].size;
|
||||||
|
long long biases = (long long)layers[i + 1].size;
|
||||||
|
totalParams += (weights + biases);
|
||||||
|
}
|
||||||
|
std::string modelSizeStr;
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
if (totalParams >= 1000000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
|
||||||
|
else if (totalParams >= 1000000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
|
||||||
|
else if (totalParams >= 1000000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
|
||||||
|
else if (totalParams >= 1000LL) ss << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
|
||||||
|
else ss << totalParams;
|
||||||
|
modelSizeStr = ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Переменные для замера скорости
|
||||||
|
auto startTime = std::chrono::high_resolution_clock::now();
|
||||||
|
int tokensInSecond = 0;
|
||||||
|
double tokensPerSec = 0;
|
||||||
|
|
||||||
|
for (int g = 0; g < 1024; g++) {
|
||||||
std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
|
std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
|
||||||
|
|
||||||
int bestId = 0;
|
int bestId = 0;
|
||||||
@@ -183,11 +242,28 @@ int main() {
|
|||||||
|
|
||||||
if (bestId == 0) break;
|
if (bestId == 0) break;
|
||||||
|
|
||||||
|
tokensInSecond++;
|
||||||
|
auto currentTime = std::chrono::high_resolution_clock::now();
|
||||||
|
std::chrono::duration<double> elapsed = currentTime - startTime;
|
||||||
|
|
||||||
|
if (elapsed.count() >= 0.1) {
|
||||||
|
tokensPerSec = tokensInSecond / elapsed.count();
|
||||||
|
tokensInSecond = 0;
|
||||||
|
startTime = currentTime;
|
||||||
|
}
|
||||||
|
|
||||||
std::string word = tok.getWord(bestId);
|
std::string word = tok.getWord(bestId);
|
||||||
std::cout << word << std::flush;
|
std::cout << word << std::flush;
|
||||||
|
|
||||||
|
std::cout << "\033[s" << "\033[999;1H" << "\033[2K"
|
||||||
|
<< "--- [ID: " << bestId << "] | "
|
||||||
|
<< "Speed: " << std::fixed << std::setprecision(1) << tokensPerSec*10 << " t/s | "
|
||||||
|
<< "Model: " << modelSizeStr << " params ---"
|
||||||
|
<< "\033[u" << std::flush;
|
||||||
|
|
||||||
currentTokens.push_back(bestId);
|
currentTokens.push_back(bestId);
|
||||||
}
|
}
|
||||||
|
// Чтобы курсор не остался внизу после генерации
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user