edit chatbot and core
This commit is contained in:
Vendored
+3
-1
@@ -9,9 +9,11 @@
|
|||||||
"-g",
|
"-g",
|
||||||
"${fileDirname}/main.cpp",
|
"${fileDirname}/main.cpp",
|
||||||
"${fileDirname}/Xenith/core.cpp",
|
"${fileDirname}/Xenith/core.cpp",
|
||||||
|
"${fileDirname}/Xenith/token/token.cpp",
|
||||||
"-o",
|
"-o",
|
||||||
"${fileDirname}/main",
|
"${fileDirname}/main",
|
||||||
"-I", "${fileDirname}/Xenith"
|
"-I", "${fileDirname}/Xenith",
|
||||||
|
"-I", "${fileDirname}/Xenith/token"
|
||||||
],
|
],
|
||||||
"options": {
|
"options": {
|
||||||
"cwd": "${fileDirname}"
|
"cwd": "${fileDirname}"
|
||||||
|
|||||||
+37
-56
@@ -1,82 +1,63 @@
|
|||||||
#include "core.h"
|
#include "core.h"
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstdlib>
|
||||||
|
|
||||||
NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) {
|
NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) : numLayers(count) {
|
||||||
numLayers = count;
|
for (int i = 0; i < count; i++) sizes.push_back(layers[i].size);
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
layerSizes.push_back(layers[i].size);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Инициализация весов случайными числами
|
|
||||||
for (int i = 0; i < count - 1; i++) {
|
for (int i = 0; i < count - 1; i++) {
|
||||||
std::vector<std::vector<double>> layerWeights;
|
std::vector<std::vector<double>> layerW;
|
||||||
for (int j = 0; j < layerSizes[i+1]; j++) {
|
double scale = sqrt(2.0 / sizes[i]);
|
||||||
std::vector<double> nodeWeights;
|
for (int j = 0; j < sizes[i+1]; j++) {
|
||||||
for (int k = 0; k < layerSizes[i]; k++) {
|
std::vector<double> nodeW;
|
||||||
nodeWeights.push_back(((double)rand() / RAND_MAX) * 2 - 1);
|
for (int k = 0; k < sizes[i]; k++)
|
||||||
}
|
nodeW.push_back(((double)rand()/RAND_MAX * 2 - 1) * scale);
|
||||||
layerWeights.push_back(nodeWeights);
|
layerW.push_back(nodeW);
|
||||||
}
|
}
|
||||||
weights.push_back(layerWeights);
|
weights.push_back(layerW);
|
||||||
|
biases.push_back(std::vector<double>(sizes[i+1], 0.0));
|
||||||
std::vector<double> layerBiases;
|
|
||||||
for (int j = 0; j < layerSizes[i+1]; j++) {
|
|
||||||
layerBiases.push_back(((double)rand() / RAND_MAX) * 2 - 1);
|
|
||||||
}
|
|
||||||
biases.push_back(layerBiases);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<double> NeuralNetwork::feedForward(std::vector<double> input) {
|
std::vector<double> NeuralNetwork::feedForward(const std::vector<double>& input) {
|
||||||
outputs.clear();
|
outputs.clear();
|
||||||
outputs.push_back(input);
|
outputs.push_back(input);
|
||||||
|
std::vector<double> curr = input;
|
||||||
std::vector<double> current = input;
|
|
||||||
for (int i = 0; i < numLayers - 1; i++) {
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
std::vector<double> next;
|
std::vector<double> next;
|
||||||
for (int j = 0; j < layerSizes[i+1]; j++) {
|
for (int j = 0; j < sizes[i+1]; j++) {
|
||||||
double sum = biases[i][j];
|
double sum = biases[i][j];
|
||||||
for (int k = 0; k < layerSizes[i]; k++) {
|
for (int k = 0; k < (int)curr.size(); k++) sum += curr[k] * weights[i][j][k];
|
||||||
sum += current[k] * weights[i][j][k];
|
next.push_back(1.0 / (1.0 + exp(-sum)));
|
||||||
}
|
|
||||||
next.push_back(sigmoid(sum));
|
|
||||||
}
|
}
|
||||||
current = next;
|
curr = next;
|
||||||
outputs.push_back(current);
|
outputs.push_back(curr);
|
||||||
}
|
}
|
||||||
return current;
|
return curr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void NeuralNetwork::train(std::vector<double> input, std::vector<double> target, double lr) {
|
double NeuralNetwork::train(const std::vector<double>& input, const std::vector<double>& target, double lr) {
|
||||||
// 1. Прямой проход
|
std::vector<double> pred = feedForward(input);
|
||||||
feedForward(input);
|
|
||||||
|
|
||||||
// 2. Вычисление ошибок для выходного слоя
|
|
||||||
std::vector<std::vector<double>> errors(numLayers);
|
std::vector<std::vector<double>> errors(numLayers);
|
||||||
errors[numLayers - 1].resize(layerSizes[numLayers - 1]);
|
errors[numLayers-1].resize(sizes[numLayers-1]);
|
||||||
for (int i = 0; i < layerSizes[numLayers - 1]; i++) {
|
double totalErr = 0;
|
||||||
double output = outputs[numLayers - 1][i];
|
for (int i = 0; i < sizes[numLayers-1]; i++) {
|
||||||
errors[numLayers - 1][i] = (target[i] - output) * sigmoidDerivative(output);
|
double e = target[i] - pred[i];
|
||||||
|
errors[numLayers-1][i] = e * pred[i] * (1.0 - pred[i]);
|
||||||
|
totalErr += e * e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Обратное распространение ошибки на скрытые слои
|
|
||||||
for (int i = numLayers - 2; i > 0; i--) {
|
for (int i = numLayers - 2; i > 0; i--) {
|
||||||
errors[i].resize(layerSizes[i]);
|
errors[i].resize(sizes[i]);
|
||||||
for (int j = 0; j < layerSizes[i]; j++) {
|
for (int j = 0; j < sizes[i]; j++) {
|
||||||
double error = 0.0;
|
double e = 0;
|
||||||
for (int k = 0; k < layerSizes[i+1]; k++) {
|
for (int k = 0; k < sizes[i+1]; k++) e += errors[i+1][k] * weights[i][k][j];
|
||||||
error += errors[i+1][k] * weights[i][k][j];
|
errors[i][j] = e * outputs[i][j] * (1.0 - outputs[i][j]);
|
||||||
}
|
|
||||||
errors[i][j] = error * sigmoidDerivative(outputs[i][j]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. Обновление весов и смещений
|
|
||||||
for (int i = 0; i < numLayers - 1; i++) {
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
for (int j = 0; j < layerSizes[i+1]; j++) {
|
for (int j = 0; j < sizes[i+1]; j++) {
|
||||||
for (int k = 0; k < layerSizes[i]; k++) {
|
for (int k = 0; k < sizes[i]; k++) weights[i][j][k] += lr * errors[i+1][j] * outputs[i][k];
|
||||||
weights[i][j][k] += lr * errors[i+1][j] * outputs[i][k];
|
|
||||||
}
|
|
||||||
biases[i][j] += lr * errors[i+1][j];
|
biases[i][j] += lr * errors[i+1][j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return totalErr;
|
||||||
}
|
}
|
||||||
+7
-10
@@ -4,25 +4,22 @@
|
|||||||
#include "typedef.h"
|
#include "typedef.h"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <iostream>
|
|
||||||
#include <cstdlib>
|
|
||||||
|
|
||||||
class NeuralNetwork {
|
class NeuralNetwork {
|
||||||
private:
|
private:
|
||||||
int numLayers;
|
int numLayers;
|
||||||
std::vector<int> layerSizes;
|
std::vector<int> sizes;
|
||||||
std::vector<std::vector<std::vector<double>>> weights; // weights[layer][to_node][from_node]
|
std::vector<std::vector<std::vector<double>>> weights;
|
||||||
std::vector<std::vector<double>> biases; // biases[layer][node]
|
std::vector<std::vector<double>> biases;
|
||||||
std::vector<std::vector<double>> outputs; // Храним выходы слоев для backprop
|
std::vector<std::vector<double>> outputs;
|
||||||
|
|
||||||
double sigmoid(double x) { return 1.0 / (1.0 + exp(-x)); }
|
double sigmoid(double x) { return 1.0 / (1.0 + exp(-x)); }
|
||||||
double sigmoidDerivative(double x) { return x * (1.0 - x); }
|
double sigmoidDeriv(double x) { return x * (1.0 - x); }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
NeuralNetwork(LayerStructure_t layers[], int count);
|
NeuralNetwork(LayerStructure_t layers[], int count);
|
||||||
|
std::vector<double> feedForward(const std::vector<double>& input);
|
||||||
std::vector<double> feedForward(std::vector<double> input);
|
double train(const std::vector<double>& input, const std::vector<double>& target, double lr);
|
||||||
void train(std::vector<double> input, std::vector<double> target, double learningRate);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -0,0 +1,46 @@
|
|||||||
|
#include "token.h"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
void Tokenizer::add(std::string word) {
|
||||||
|
int id = wordToId.size();
|
||||||
|
wordToId[word] = id;
|
||||||
|
idToWord[id] = word;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string Tokenizer::getWord(int id) {
|
||||||
|
return idToWord.count(id) ? idToWord[id] : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<int> Tokenizer::textToTokens(const std::string& text) {
|
||||||
|
std::vector<int> tokens;
|
||||||
|
size_t pos = 0;
|
||||||
|
while (pos < text.length()) {
|
||||||
|
int longestId = -1; size_t longestLen = 0;
|
||||||
|
for (auto const& [word, id] : wordToId) {
|
||||||
|
if (text.compare(pos, word.length(), word) == 0) {
|
||||||
|
if (word.length() > longestLen) {
|
||||||
|
longestLen = word.length(); longestId = id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (longestId != -1) {
|
||||||
|
tokens.push_back(longestId);
|
||||||
|
pos += longestLen;
|
||||||
|
} else pos++;
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
Embedder::Embedder(int vSize, int dim) {
|
||||||
|
std::mt19937 gen(42);
|
||||||
|
std::uniform_real_distribution<double> dist(-1.0, 1.0);
|
||||||
|
matrix.resize(vSize, std::vector<double>(dim));
|
||||||
|
for(int i=0; i<vSize; i++)
|
||||||
|
for(int j=0; j<dim; j++) matrix[i][j] = dist(gen);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> Embedder::get(int id) {
|
||||||
|
if (id >= 0 && id < (int)matrix.size()) return matrix[id];
|
||||||
|
return std::vector<double>(matrix[0].size(), 0.0);
|
||||||
|
}
|
||||||
@@ -0,0 +1,42 @@
|
|||||||
|
#ifndef TOKEN_H
|
||||||
|
#define TOKEN_H
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
class Tokenizer {
|
||||||
|
public:
|
||||||
|
std::map<std::string, int> wordToId;
|
||||||
|
std::map<int, std::string> idToWord;
|
||||||
|
|
||||||
|
Tokenizer() {
|
||||||
|
add("<EOS>"); // 0
|
||||||
|
add("[SYS]"); // 1
|
||||||
|
add("[USER]"); // 2
|
||||||
|
add("[AI]"); // 3
|
||||||
|
add(" "); // 4
|
||||||
|
add("\n"); // 5
|
||||||
|
add("привет"); // 6
|
||||||
|
add("как"); // 7
|
||||||
|
add("дела"); // 8
|
||||||
|
add("?"); // 9
|
||||||
|
add("я"); // 10
|
||||||
|
add("робот"); // 11
|
||||||
|
add("хорошо"); // 12
|
||||||
|
}
|
||||||
|
|
||||||
|
void add(std::string word);
|
||||||
|
int getID(std::string word);
|
||||||
|
std::string getWord(int id);
|
||||||
|
std::vector<int> textToTokens(const std::string& text);
|
||||||
|
};
|
||||||
|
|
||||||
|
class Embedder {
|
||||||
|
public:
|
||||||
|
std::vector<std::vector<double>> matrix;
|
||||||
|
Embedder(int vSize, int dim);
|
||||||
|
std::vector<double> get(int id);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
+5
-9
@@ -1,15 +1,11 @@
|
|||||||
#ifndef TYPEDEF_H
|
#ifndef TYPEDEF_H
|
||||||
#define TYPEDEF_H
|
#define TYPEDEF_H
|
||||||
|
|
||||||
#include <vector>
|
const int MAX_CONTEXT = 4; // Сколько токенов видит сеть
|
||||||
|
const int EMBED_DIM = 4; // Размер вектора одного токена
|
||||||
|
const int MAX_VOCAB = 13; // Размер словаря
|
||||||
|
|
||||||
typedef enum {
|
typedef enum { SIGMOID } FunctionActivate_t;
|
||||||
SIGMOID
|
typedef struct { int size; FunctionActivate_t activate; } LayerStructure_t;
|
||||||
} FunctionActivate_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int size;
|
|
||||||
FunctionActivate_t activate;
|
|
||||||
} LayerStructure_t;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
[USER]привет[AI]привет как дела?<EOS>
|
||||||
@@ -1,39 +1,195 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <ctime>
|
#include <string>
|
||||||
|
#include <iomanip>
|
||||||
|
#include <sstream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <algorithm>
|
||||||
#include "Xenith/core.h"
|
#include "Xenith/core.h"
|
||||||
#include "Xenith/typedef.h"
|
#include "Xenith/token/token.h"
|
||||||
|
|
||||||
|
// Глобальные настройки
|
||||||
|
std::string currentSystemPrompt = "я робот";
|
||||||
|
|
||||||
int main() {
|
void printParameterCount(LayerStructure_t layers[], int numLayers) {
|
||||||
srand(time(NULL));
|
long long totalParams = 0;
|
||||||
|
for (int i = 0; i < numLayers - 1; i++) {
|
||||||
LayerStructure_t layers[] = {
|
long long weights = (long long)layers[i].size * layers[i + 1].size;
|
||||||
{2, SIGMOID}, // Вход: 2 числа
|
long long biases = (long long)layers[i + 1].size;
|
||||||
{3, SIGMOID}, // Скрытый слой
|
totalParams += (weights + biases);
|
||||||
{1, SIGMOID} // Выход: 1 число
|
|
||||||
};
|
|
||||||
|
|
||||||
NeuralNetwork nn(layers, 3);
|
|
||||||
|
|
||||||
// Данные для обучения
|
|
||||||
std::vector<std::vector<double>> inputs = {{1, 1}, {1, 0}, {0, 0}, {0, 1}};
|
|
||||||
std::vector<std::vector<double>> targets = {{0}, {1}, {1}, {0}};
|
|
||||||
|
|
||||||
// Цикл обучения
|
|
||||||
std::cout << "Training..." << std::endl;
|
|
||||||
for (int epoch = 0; epoch < 20000; epoch++) {
|
|
||||||
for (int i = 0; i < inputs.size(); i++) {
|
|
||||||
nn.train(inputs[i], targets[i], 0.5);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Проверка результатов
|
std::cout << "--- Xenith AI (Model Size: ";
|
||||||
std::cout << "Results:" << std::endl;
|
if (totalParams >= 1000000000000LL)
|
||||||
for (int i = 0; i < inputs.size(); i++) {
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000000.0 << "t";
|
||||||
std::vector<double> res = nn.feedForward(inputs[i]);
|
else if (totalParams >= 1000000000LL)
|
||||||
std::cout << inputs[i][0] << " " << inputs[i][1] << " -> "
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000000.0 << "b";
|
||||||
<< (res[0] > 0.5 ? 1 : 0) << " (raw: " << res[0] << ")" << std::endl;
|
else if (totalParams >= 1000000LL)
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000000.0 << "m";
|
||||||
|
else if (totalParams >= 1000LL)
|
||||||
|
std::cout << std::fixed << std::setprecision(1) << (double)totalParams / 1000.0 << "k";
|
||||||
|
else
|
||||||
|
std::cout << totalParams;
|
||||||
|
|
||||||
|
std::cout << " parameters) ---" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<double> buildNetInput(const std::vector<int>& tokens, Embedder& emb) {
|
||||||
|
std::vector<double> netInput;
|
||||||
|
netInput.reserve(MAX_CONTEXT * EMBED_DIM);
|
||||||
|
int start = (int)tokens.size() - MAX_CONTEXT;
|
||||||
|
if (start < 0) start = 0;
|
||||||
|
int count = 0;
|
||||||
|
for (int i = start; i < (int)tokens.size(); i++) {
|
||||||
|
std::vector<double> v = emb.get(tokens[i]);
|
||||||
|
netInput.insert(netInput.end(), v.begin(), v.end());
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
while (count < MAX_CONTEXT) {
|
||||||
|
for (int d = 0; d < EMBED_DIM; d++) netInput.push_back(0.0);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return netInput;
|
||||||
|
}
|
||||||
|
|
||||||
|
void trainOnSequence(NeuralNetwork& nn, Tokenizer& tok, Embedder& emb, const std::string& dataset, int epochs, double lr) {
|
||||||
|
std::vector<int> allTokens = tok.textToTokens(dataset);
|
||||||
|
if (allTokens.size() < 2) {
|
||||||
|
std::cout << "Error: Sequence too short for training." << std::endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Training logic: Next Token Prediction..." << std::endl;
|
||||||
|
for (int e = 1; e <= epochs; e++) {
|
||||||
|
double totalLoss = 0;
|
||||||
|
for (size_t i = 1; i < allTokens.size(); i++) {
|
||||||
|
std::vector<int> context;
|
||||||
|
for (size_t j = 0; j < i; j++) context.push_back(allTokens[j]);
|
||||||
|
|
||||||
|
std::vector<double> target(MAX_VOCAB, 0.0);
|
||||||
|
target[allTokens[i]] = 1.0;
|
||||||
|
|
||||||
|
totalLoss += nn.train(buildNetInput(context, emb), target, lr);
|
||||||
|
}
|
||||||
|
std::cout << "\rEpoch " << e << "/" << epochs << " | Loss: " << std::fixed << std::setprecision(6) << totalLoss << std::flush;
|
||||||
|
}
|
||||||
|
std::cout << "\nDone!" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
Tokenizer tok;
|
||||||
|
Embedder emb(MAX_VOCAB, EMBED_DIM);
|
||||||
|
LayerStructure_t layers[] = {
|
||||||
|
{MAX_CONTEXT * EMBED_DIM, SIGMOID},
|
||||||
|
{16, SIGMOID},
|
||||||
|
{MAX_VOCAB, SIGMOID}
|
||||||
|
};
|
||||||
|
int numLayers = sizeof(layers) / sizeof(layers[0]);
|
||||||
|
|
||||||
|
NeuralNetwork nn(layers, numLayers);
|
||||||
|
printParameterCount(layers, numLayers);
|
||||||
|
|
||||||
|
std::cout << "\n--- MENU ---" << std::endl;
|
||||||
|
std::cout << "/train\n/trainFile\n/help\n/exit\n";
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
std::cout << "\nxentith~$ ";
|
||||||
|
|
||||||
|
std::string cmdIn;
|
||||||
|
std::getline(std::cin, cmdIn);
|
||||||
|
|
||||||
|
if (cmdIn == "/exit") break;
|
||||||
|
|
||||||
|
if (cmdIn == "/train") {
|
||||||
|
int epochs;
|
||||||
|
double lr;
|
||||||
|
std::cout << "--- Training Setup ---\n";
|
||||||
|
std::cout << "Enter number of epochs: ";
|
||||||
|
std::string epStr; std::getline(std::cin, epStr);
|
||||||
|
epochs = std::stoi(epStr);
|
||||||
|
|
||||||
|
std::cout << "Enter learning rate (e.g. 0.1): ";
|
||||||
|
std::string lrStr; std::getline(std::cin, lrStr);
|
||||||
|
lr = std::stod(lrStr);
|
||||||
|
|
||||||
|
std::cout << "\n--- Example Content ---\n";
|
||||||
|
std::cout << "User: ";
|
||||||
|
std::string userPart;
|
||||||
|
std::getline(std::cin, userPart);
|
||||||
|
|
||||||
|
std::cout << "AI: ";
|
||||||
|
std::string aiPart;
|
||||||
|
std::getline(std::cin, aiPart);
|
||||||
|
|
||||||
|
std::string finalData = "[SYS]" + currentSystemPrompt +
|
||||||
|
"[USER]" + userPart +
|
||||||
|
"[AI]" + aiPart + "<EOS>";
|
||||||
|
|
||||||
|
std::cout << "\nTraining logic: Pattern Recognition..." << std::endl;
|
||||||
|
trainOnSequence(nn, tok, emb, finalData, epochs, lr);
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (cmdIn == "/trainFile") {
|
||||||
|
std::string content;
|
||||||
|
std::cout << "Enter filename: ";
|
||||||
|
std::string filename;
|
||||||
|
std::getline(std::cin, filename);
|
||||||
|
std::ifstream file(filename);
|
||||||
|
if (file.is_open()) {
|
||||||
|
std::stringstream buffer;
|
||||||
|
buffer << file.rdbuf();
|
||||||
|
content = buffer.str();
|
||||||
|
std::cout << "Loaded " << content.length() << " characters from file." << std::endl;
|
||||||
|
} else {
|
||||||
|
std::cout << "Could not open file!" << std::endl;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int epochs;
|
||||||
|
double lr;
|
||||||
|
std::cout << "Enter number of epochs: ";
|
||||||
|
std::string epStr; std::getline(std::cin, epStr);
|
||||||
|
epochs = std::stoi(epStr);
|
||||||
|
|
||||||
|
std::cout << "Enter learning rate (e.g. 0.1): ";
|
||||||
|
std::string lrStr; std::getline(std::cin, lrStr);
|
||||||
|
lr = std::stod(lrStr);
|
||||||
|
|
||||||
|
std::string finalData = "[SYS]" + currentSystemPrompt + content + "<EOS>";
|
||||||
|
trainOnSequence(nn, tok, emb, finalData, epochs, lr);
|
||||||
|
|
||||||
|
} else if (cmdIn == "/sysPrompt") {
|
||||||
|
std::cout << "Current System Prompt: " << currentSystemPrompt << std::endl;
|
||||||
|
std::cout << "Enter new System Prompt: ";
|
||||||
|
std::getline(std::cin, currentSystemPrompt);
|
||||||
|
std::cout << "System Prompt updated!" << std::endl;
|
||||||
|
|
||||||
|
} else if (cmdIn == "/help") {
|
||||||
|
std::cout << "\n--- MENU ---" << std::endl;
|
||||||
|
std::cout << "/train\n/trainFile\n/sysPrompt\n/help\n/exit\n";
|
||||||
|
|
||||||
|
} else {
|
||||||
|
std::string prompt = "[SYS]" + currentSystemPrompt + "[USER]" + cmdIn + "[AI]";
|
||||||
|
std::vector<int> currentTokens = tok.textToTokens(prompt);
|
||||||
|
|
||||||
|
std::cout << "AI: ";
|
||||||
|
for (int g = 0; g < 30; g++) {
|
||||||
|
std::vector<double> out = nn.feedForward(buildNetInput(currentTokens, emb));
|
||||||
|
|
||||||
|
int bestId = 0;
|
||||||
|
for (int i = 0; i < MAX_VOCAB; i++) {
|
||||||
|
if (out[i] > out[bestId]) bestId = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bestId == 0) break;
|
||||||
|
|
||||||
|
std::string word = tok.getWord(bestId);
|
||||||
|
std::cout << word << std::flush;
|
||||||
|
|
||||||
|
currentTokens.push_back(bestId);
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user