the beginning of the implementation of Vulkan
Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
+139
-3
@@ -2,10 +2,46 @@
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
#include <omp.h>
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <chrono>
|
||||
|
||||
#define MAX_CORES 16
|
||||
|
||||
NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count) : numLayers(count) {
|
||||
|
||||
NeuralNetwork::NeuralNetwork(LayerStructure_t layers[], int count, bool useVulkan) : numLayers(count) {
|
||||
|
||||
if (useVulkan) {
|
||||
|
||||
vk::ApplicationInfo appInfo{"Xenith", 1, nullptr, 0, VK_API_VERSION_1_1};
|
||||
instance = vk::createInstance({{}, &appInfo});
|
||||
|
||||
auto physicalDevices = instance.enumeratePhysicalDevices();
|
||||
physDev = physicalDevices[0];
|
||||
auto props = physDev.getProperties();
|
||||
std::cout << "Используем GPU: " << props.deviceName << std::endl;
|
||||
|
||||
// 3. Поиск очереди для вычислений
|
||||
auto queueProps = physDev.getQueueFamilyProperties();
|
||||
int computeFamily = -1;
|
||||
for (int i = 0; i < queueProps.size(); i++) {
|
||||
if (queueProps[i].queueFlags & vk::QueueFlagBits::eCompute) {
|
||||
computeFamily = i; break;
|
||||
}
|
||||
}
|
||||
if (computeFamily == -1) throw std::runtime_error("GPU не поддерживает Compute");
|
||||
|
||||
// 4. Логическое устройство
|
||||
float priority = 1.0f;
|
||||
vk::DeviceQueueCreateInfo queueInfo({}, (uint32_t)computeFamily, 1, &priority);
|
||||
vk::DeviceCreateInfo deviceCreateInfo({}, 1, &queueInfo);
|
||||
device = physDev.createDevice(deviceCreateInfo);
|
||||
|
||||
queue = device.getQueue(computeFamily, 0);
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; i++) sizes.push_back(layers[i].size);
|
||||
for (int i = 0; i < count - 1; i++) {
|
||||
std::vector<std::vector<double>> layerW;
|
||||
@@ -41,8 +77,9 @@ std::vector<double> NeuralNetwork::feedForward(const std::vector<double>& input)
|
||||
}
|
||||
|
||||
|
||||
|
||||
double NeuralNetwork::train(const std::vector<double>& input, const std::vector<double>& target, double lr) {
|
||||
omp_set_num_threads(MAX_CORES);
|
||||
omp_set_num_threads(cpu_count);
|
||||
|
||||
std::vector<double> pred = feedForward(input);
|
||||
std::vector<std::vector<double>> errors(numLayers);
|
||||
@@ -83,3 +120,102 @@ double NeuralNetwork::train(const std::vector<double>& input, const std::vector<
|
||||
|
||||
return totalErr;
|
||||
}
|
||||
|
||||
uint32_t NeuralNetwork::findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties) {
|
||||
vk::PhysicalDeviceMemoryProperties memProperties = physDev.getMemoryProperties();
|
||||
for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) {
|
||||
if ((typeFilter & (1 << i)) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
throw std::runtime_error("Не удалось найти подходящий тип памяти!");
|
||||
}
|
||||
|
||||
|
||||
double NeuralNetwork::trainVulkan() {
|
||||
// 1. Создание буферов
|
||||
vk::Buffer inputBuffer = device.createBuffer({{}, sizeof(float) * 2, vk::BufferUsageFlagBits::eStorageBuffer});
|
||||
vk::Buffer outputBuffer = device.createBuffer({{}, sizeof(float), vk::BufferUsageFlagBits::eStorageBuffer});
|
||||
|
||||
// 2. Выделение и привязка памяти для ВХОДА
|
||||
vk::MemoryRequirements inReq = device.getBufferMemoryRequirements(inputBuffer);
|
||||
vk::DeviceMemory inputMemory = device.allocateMemory({
|
||||
inReq.size,
|
||||
findMemoryType(inReq.memoryTypeBits, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent)
|
||||
});
|
||||
device.bindBufferMemory(inputBuffer, inputMemory, 0); // КРИТИЧНО: привязываем память к буферу
|
||||
|
||||
// 3. Копирование данных во входной буфер
|
||||
float inputData[2] = {2.51f, 2.32f};
|
||||
void* pIn = device.mapMemory(inputMemory, 0, sizeof(float) * 2);
|
||||
memcpy(pIn, inputData, sizeof(float) * 2);
|
||||
device.unmapMemory(inputMemory);
|
||||
|
||||
// 4. Выделение и привязка памяти для ВЫХОДА
|
||||
vk::MemoryRequirements outReq = device.getBufferMemoryRequirements(outputBuffer);
|
||||
vk::DeviceMemory outputMemory = device.allocateMemory({
|
||||
outReq.size,
|
||||
findMemoryType(outReq.memoryTypeBits, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent)
|
||||
});
|
||||
device.bindBufferMemory(outputBuffer, outputMemory, 0);
|
||||
|
||||
// 5. ДЕСКРИПТОРЫ (Связь C++ -> Шейдер)
|
||||
// Описываем, что у нас есть 2 слота (binding 0 и 1)
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings = {
|
||||
{0, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute},
|
||||
{1, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute}
|
||||
};
|
||||
vk::DescriptorSetLayout dsLayout = device.createDescriptorSetLayout({{}, (uint32_t)bindings.size(), bindings.data()});
|
||||
|
||||
// Создаем пул и выделяем сет дескрипторов
|
||||
vk::DescriptorPoolSize poolSize{vk::DescriptorType::eStorageBuffer, 2};
|
||||
vk::DescriptorPool pool = device.createDescriptorPool({{}, 1, 1, &poolSize});
|
||||
vk::DescriptorSet ds = device.allocateDescriptorSets({pool, 1, &dsLayout})[0];
|
||||
|
||||
// Указываем, какие именно буферы в какие слоты вставить
|
||||
vk::DescriptorBufferInfo bInInfo{inputBuffer, 0, VK_WHOLE_SIZE};
|
||||
vk::DescriptorBufferInfo bOutInfo{outputBuffer, 0, VK_WHOLE_SIZE};
|
||||
device.updateDescriptorSets({
|
||||
{ds, 0, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &bInInfo},
|
||||
{ds, 1, 0, 1, vk::DescriptorType::eStorageBuffer, nullptr, &bOutInfo}
|
||||
}, {});
|
||||
|
||||
// 6. ПАЙПЛАЙН (Загрузка шейдера)
|
||||
auto shaderCode = readFile("shader.comp.spv"); // Твоя функция чтения файла
|
||||
vk::ShaderModule shaderModule = device.createShaderModule({{}, shaderCode.size(), (uint32_t*)shaderCode.data()});
|
||||
vk::PipelineLayout pipeLayout = device.createPipelineLayout({{}, 1, &dsLayout});
|
||||
|
||||
vk::ComputePipelineCreateInfo pipeInfo{{}, {{}, vk::ShaderStageFlagBits::eCompute, shaderModule, "main"}, pipeLayout};
|
||||
vk::Pipeline pipeline = device.createComputePipeline(nullptr, pipeInfo).value;
|
||||
|
||||
// 7. КОМАНДЫ И ЗАПУСК (Command Buffer)
|
||||
// (Предполагаем, что cmdPool и queue уже созданы в классе)
|
||||
vk::CommandBufferAllocateInfo cmdAllocInfo(cmdPool, vk::CommandBufferLevel::ePrimary, 1);
|
||||
vk::CommandBuffer cmd = device.allocateCommandBuffers(cmdAllocInfo)[0];
|
||||
|
||||
cmd.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
|
||||
cmd.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline);
|
||||
cmd.bindDescriptorSets(vk::PipelineBindPoint::eCompute, pipeLayout, 0, {ds}, {});
|
||||
cmd.dispatch(1, 1, 1); // Запускаем 1 поток
|
||||
cmd.end();
|
||||
|
||||
queue.submit(vk::SubmitInfo(0, nullptr, nullptr, 1, &cmd), nullptr);
|
||||
queue.waitIdle();
|
||||
|
||||
// 8. ЗАБИРАЕМ РЕЗУЛЬТАТ
|
||||
float result = 0;
|
||||
void* pOut = device.mapMemory(outputMemory, 0, sizeof(float));
|
||||
memcpy(&result, pOut, sizeof(float));
|
||||
device.unmapMemory(outputMemory);
|
||||
|
||||
// Очистка (в реальном коде лучше делать в деструкторе)
|
||||
device.destroyPipeline(pipeline);
|
||||
device.destroyPipelineLayout(pipeLayout);
|
||||
device.destroyShaderModule(shaderModule);
|
||||
device.destroyDescriptorPool(pool);
|
||||
device.destroyDescriptorSetLayout(dsLayout);
|
||||
device.destroyBuffer(inputBuffer); device.freeMemory(inputMemory);
|
||||
device.destroyBuffer(outputBuffer); device.freeMemory(outputMemory);
|
||||
|
||||
return (double)result;
|
||||
}
|
||||
+17
-1
@@ -4,6 +4,12 @@
|
||||
#include "typedef.hpp"
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
#include "core.hpp"
|
||||
#include <cstdlib>
|
||||
#include <omp.h>
|
||||
#include <vulkan/vulkan.hpp>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
class NeuralNetwork {
|
||||
private:
|
||||
@@ -13,13 +19,23 @@ private:
|
||||
std::vector<std::vector<double>> biases;
|
||||
std::vector<std::vector<double>> outputs;
|
||||
|
||||
vk::Instance instance;
|
||||
vk::PhysicalDevice physDev;
|
||||
vk::Device device;
|
||||
vk::Queue queue;
|
||||
vk::CommandPool cmdPool;
|
||||
|
||||
uint32_t NeuralNetwork::findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties);
|
||||
|
||||
double sigmoid(double x) { return 1.0 / (1.0 + exp(-x)); }
|
||||
double sigmoidDeriv(double x) { return x * (1.0 - x); }
|
||||
|
||||
public:
|
||||
NeuralNetwork(LayerStructure_t layers[], int count);
|
||||
int cpu_count = 1;
|
||||
NeuralNetwork(LayerStructure_t layers[], int count, bool useVulkan = false);
|
||||
std::vector<double> feedForward(const std::vector<double>& input);
|
||||
double train(const std::vector<double>& input, const std::vector<double>& target, double lr);
|
||||
double trainVulkan();
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -1,2 +1,16 @@
|
||||
#version 450
|
||||
|
||||
layout(local_size_x = 1) in; // Запускаем 1 поток
|
||||
|
||||
layout(std430, binding = 0) buffer InputBuffer {
|
||||
float a;
|
||||
float b;
|
||||
} inputs;
|
||||
|
||||
layout(std430, binding = 1) buffer OutputBuffer {
|
||||
float result;
|
||||
} outputs;
|
||||
|
||||
void main() {
|
||||
outputs.result = inputs.a * inputs.b;
|
||||
}
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user