#include "core.hpp" #include #include #include #include #include #include #include #include #include // --- КОНСТРУКТОР --- NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count, bool useVulkanParam) { this->numLayers = count; this->useVulkan = useVulkanParam; this->vulkanResourcesInitialized = false; if (this->useVulkan) { try { vk::ApplicationInfo appInfo{"Xenith", 1, nullptr, 0, VK_API_VERSION_1_1}; instance = vk::createInstance({{}, &appInfo}); auto physicalDevices = instance.enumeratePhysicalDevices(); if (physicalDevices.empty()) throw std::runtime_error("GPU не найдены"); physDev = physicalDevices[0]; auto queueProps = physDev.getQueueFamilyProperties(); int computeFamily = -1; for (int i = 0; i < (int)queueProps.size(); i++) { if (queueProps[i].queueFlags & vk::QueueFlagBits::eCompute) { computeFamily = i; break; } } if (computeFamily == -1) throw std::runtime_error("Compute не поддерживается"); this->computeQueueFamilyIndex = (uint32_t)computeFamily; float priority = 1.0f; vk::DeviceQueueCreateInfo queueInfo({}, computeQueueFamilyIndex, 1, &priority); vk::DeviceCreateInfo deviceCreateInfo({}, 1, &queueInfo); device = physDev.createDevice(deviceCreateInfo); queue = device.getQueue(computeQueueFamilyIndex, 0); vk::CommandPoolCreateInfo poolInfo({}, computeQueueFamilyIndex); cmdPool = device.createCommandPool(poolInfo); std::cout << "Vulkan инициализирован на: " << physDev.getProperties().deviceName << std::endl; } catch (const std::exception& e) { std::cerr << "Ошибка Vulkan: " << e.what() << ". Переключение на CPU." << std::endl; this->useVulkan = false; } } // Инициализация CPU данных for (int i = 0; i < count; i++) sizes.push_back(layers[i].size); for (int i = 0; i < count - 1; i++) { std::vector> layerW; double scale = sqrt(2.0 / sizes[i]); for (int j = 0; j < sizes[i+1]; j++) { std::vector nodeW; for (int k = 0; k < sizes[i]; k++) nodeW.push_back(((double)rand()/RAND_MAX * 2 - 1) * scale); layerW.push_back(nodeW); } weights.push_back(layerW); biases.push_back(std::vector(sizes[i+1], 0.0)); } if (this->useVulkan) { initVulkanResources(); } } // --- ДЕСТРУКТОР --- NeuralNetwork::~NeuralNetwork() { if (useVulkan && vulkanResourcesInitialized) { device.waitIdle(); device.destroyPipeline(pipeline); device.destroyPipelineLayout(pipeLayout); device.destroyShaderModule(shaderModule); device.destroyDescriptorPool(descriptorPool); device.destroyDescriptorSetLayout(dsLayout); device.destroyBuffer(gpuW); device.freeMemory(memW); device.destroyBuffer(gpuB); device.freeMemory(memB); device.destroyBuffer(gpuO); device.freeMemory(memO); device.destroyBuffer(gpuE); device.freeMemory(memE); device.destroyCommandPool(cmdPool); device.destroy(); instance.destroy(); } } // --- ИНИЦИАЛИЗАЦИЯ РЕСУРСОВ GPU --- void NeuralNetwork::initVulkanResources() { if (!useVulkan || vulkanResourcesInitialized) return; size_t wSize = 0, bSize = 0, oSize = 0; for (int i = 0; i < numLayers - 1; i++) { wSize += (size_t)sizes[i] * sizes[i+1]; bSize += (size_t)sizes[i+1]; } for (int s : sizes) oSize += s; auto createBuf = [&](size_t size, vk::Buffer& buf, vk::DeviceMemory& mem) { buf = device.createBuffer({{}, size * sizeof(float), vk::BufferUsageFlagBits::eStorageBuffer}); vk::MemoryRequirements req = device.getBufferMemoryRequirements(buf); mem = device.allocateMemory({req.size, findMemoryType(req.memoryTypeBits, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent)}); device.bindBufferMemory(buf, mem, 0); }; createBuf(wSize, gpuW, memW); createBuf(bSize, gpuB, memB); createBuf(oSize, gpuO, memO); createBuf(oSize, gpuE, memE); std::vector bindings = { {0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}, {1, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}, {2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}, {3, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute} }; dsLayout = device.createDescriptorSetLayout({{}, (uint32_t)bindings.size(), bindings.data()}); vk::DescriptorPoolSize poolSize(vk::DescriptorType::eStorageBuffer, 4); descriptorPool = device.createDescriptorPool({{}, 1, 1, &poolSize}); descriptorSet = device.allocateDescriptorSets({descriptorPool, 1, &dsLayout})[0]; vk::DescriptorBufferInfo bW(gpuW, 0, VK_WHOLE_SIZE), bB(gpuB, 0, VK_WHOLE_SIZE), bO(gpuO, 0, VK_WHOLE_SIZE), bE(gpuE, 0, VK_WHOLE_SIZE); device.updateDescriptorSets({{descriptorSet, 0, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &bW}, {descriptorSet, 1, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &bB}, {descriptorSet, 2, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &bO}, {descriptorSet, 3, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &bE}}, {}); auto shaderCode = readFile("Xenith/shader.comp.spv"); shaderModule = device.createShaderModule({{}, shaderCode.size(), (uint32_t*)shaderCode.data()}); vk::PushConstantRange pushRange(vk::ShaderStageFlagBits::eCompute, 0, sizeof(TrainParams)); pipeLayout = device.createPipelineLayout({{}, 1, &dsLayout, 1, &pushRange}); vk::PipelineShaderStageCreateInfo stageInfo({}, vk::ShaderStageFlagBits::eCompute, shaderModule, "main"); pipeline = device.createComputePipeline(nullptr, {{}, stageInfo, pipeLayout}).value; vulkanResourcesInitialized = true; } // --- ОБУЧЕНИЕ VULKAN --- double NeuralNetwork::trainVulkan(const std::vector& input, const std::vector& target, double lr) { if (!useVulkan) return train(input, target, lr); if (!vulkanResourcesInitialized) initVulkanResources(); std::vector pred = feedForward(input); std::vector> errors(numLayers); errors[numLayers - 1].resize(sizes[numLayers - 1]); double totalErr = 0; for (int i = 0; i < sizes[numLayers - 1]; i++) { double e = target[i] - pred[i]; errors[numLayers - 1][i] = e * pred[i] * (1.0 - pred[i]); totalErr += e * e; } for (int i = numLayers - 2; i > 0; i--) { errors[i].resize(sizes[i]); for (int j = 0; j < sizes[i]; j++) { double e = 0; for (int k = 0; k < sizes[i + 1]; k++) e += errors[i + 1][k] * weights[i][k][j]; errors[i][j] = e * outputs[i][j] * (1.0 - outputs[i][j]); } } std::vector fW, fB, fO, fE; std::vector wOff, bOff, oOff; for (int i = 0; i < numLayers - 1; i++) { wOff.push_back(fW.size()); for (auto& row : weights[i]) for (double v : row) fW.push_back((float)v); bOff.push_back(fB.size()); for (double v : biases[i]) fB.push_back((float)v); } for (int i = 0; i < numLayers; i++) { oOff.push_back(fO.size()); for (double v : outputs[i]) fO.push_back((float)v); for (double v : errors[i]) fE.push_back((float)v); } auto upload = [&](vk::DeviceMemory mem, void* data, size_t size) { if (size == 0) return; void* mapped = device.mapMemory(mem, 0, size); memcpy(mapped, data, size); device.unmapMemory(mem); }; upload(memW, fW.data(), fW.size() * sizeof(float)); upload(memB, fB.data(), fB.size() * sizeof(float)); upload(memO, fO.data(), fO.size() * sizeof(float)); upload(memE, fE.data(), fE.size() * sizeof(float)); vk::CommandBufferAllocateInfo allocInfo(cmdPool, vk::CommandBufferLevel::ePrimary, 1); vk::CommandBuffer cmd = device.allocateCommandBuffers(allocInfo)[0]; cmd.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); cmd.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline); cmd.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipeLayout, 0, {descriptorSet}, {}); for (int i = 0; i < numLayers - 1; i++) { TrainParams p = {(uint32_t)sizes[i], (uint32_t)sizes[i+1], wOff[i], bOff[i], oOff[i], oOff[i+1], (float)lr}; cmd.pushConstants(pipeLayout, vk::ShaderStageFlagBits::eCompute, 0, sizeof(TrainParams), &p); cmd.dispatch((sizes[i+1] + 255) / 256, 1, 1); } cmd.end(); queue.submit(vk::SubmitInfo(0, nullptr, nullptr, 1, &cmd), nullptr); queue.waitIdle(); device.freeCommandBuffers(cmdPool, cmd); void* wPtr = device.mapMemory(memW, 0, fW.size() * sizeof(float)); memcpy(fW.data(), wPtr, fW.size() * sizeof(float)); device.unmapMemory(memW); void* bPtr = device.mapMemory(memB, 0, fB.size() * sizeof(float)); memcpy(fB.data(), bPtr, fB.size() * sizeof(float)); device.unmapMemory(memB); int wi = 0, bi = 0; for (int i = 0; i < numLayers - 1; i++) { for (int j = 0; j < sizes[i+1]; j++) { for (int k = 0; k < sizes[i]; k++) weights[i][j][k] = fW[wi++]; biases[i][j] = fB[bi++]; } } return totalErr; } // --- ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ --- std::vector NeuralNetwork::feedForward(const std::vector& input) { outputs.clear(); outputs.push_back(input); std::vector curr = input; for (int i = 0; i < numLayers - 1; i++) { std::vector next; for (int j = 0; j < sizes[i+1]; j++) { double sum = biases[i][j]; for (int k = 0; k < (int)curr.size(); k++) sum += curr[k] * weights[i][j][k]; next.push_back(1.0 / (1.0 + exp(-sum))); } curr = next; outputs.push_back(curr); } return curr; } double NeuralNetwork::train(const std::vector& input, const std::vector& target, double lr) { std::vector pred = feedForward(input); std::vector> errors(numLayers); errors[numLayers - 1].resize(sizes[numLayers - 1]); double totalErr = 0; for (int i = 0; i < sizes[numLayers - 1]; i++) { double e = target[i] - pred[i]; errors[numLayers - 1][i] = e * pred[i] * (1.0 - pred[i]); totalErr += e * e; } for (int i = numLayers - 2; i > 0; i--) { errors[i].resize(sizes[i]); for (int j = 0; j < sizes[i]; j++) { double e = 0; for (int k = 0; k < sizes[i + 1]; k++) e += errors[i + 1][k] * weights[i][k][j]; errors[i][j] = e * outputs[i][j] * (1.0 - outputs[i][j]); } } for (int i = 0; i < numLayers - 1; i++) { for (int j = 0; j < sizes[i + 1]; j++) { double errT = lr * errors[i + 1][j]; for (int k = 0; k < sizes[i]; k++) weights[i][j][k] += errT * outputs[i][k]; biases[i][j] += errT; } } return totalErr; } uint32_t NeuralNetwork::findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties) { vk::PhysicalDeviceMemoryProperties memProperties = physDev.getMemoryProperties(); for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) return i; } throw std::runtime_error("Память не найдена"); } std::vector NeuralNetwork::readFile(const std::string& filename) { std::ifstream file(filename, std::ios::ate | std::ios::binary); if (!file.is_open()) throw std::runtime_error("Файл не найден: " + filename); size_t fileSize = (size_t)file.tellg(); std::vector buffer(fileSize); file.seekg(0); file.read(buffer.data(), fileSize); return buffer; }