From 5b3fa077901f603195da78f5e7286a4b75324021 Mon Sep 17 00:00:00 2001 From: cosmonaut Date: Wed, 30 Dec 2020 20:39:47 -0800 Subject: [PATCH] more compute implementation --- include/Refresh.h | 107 +++++------ src/Refresh.c | 59 +++--- src/Refresh_Driver.h | 35 ++-- src/Refresh_Driver_Vulkan.c | 352 +++++++++++++++++++++++++++--------- 4 files changed, 350 insertions(+), 203 deletions(-) diff --git a/include/Refresh.h b/include/Refresh.h index 8d59dba..f734dbb 100644 --- a/include/Refresh.h +++ b/include/Refresh.h @@ -170,6 +170,15 @@ typedef enum REFRESH_CubeMapFace REFRESH_CUBEMAPFACE_NEGATIVEZ } REFRESH_CubeMapFace; +typedef enum REFRESH_BufferUsageFlagBits +{ + REFRESH_BUFFERUSAGE_VERTEX_BIT = 0x00000001, + REFRESH_BUFFERUSAGE_INDEX_BIT = 0x00000002, + REFRESH_BUFFERUSAGE_STORAGE_BIT = 0x00000004 +} REFRESH_BufferUsageFlagBits; + +typedef uint32_t REFRESH_BufferUsageFlags; + typedef enum REFRESH_VertexElementFormat { REFRESH_VERTEXELEMENTFORMAT_SINGLE, @@ -499,18 +508,12 @@ typedef struct REFRESH_ShaderModuleCreateInfo /* Pipeline state structures */ -typedef struct REFRESH_ComputeShaderStageState -{ - REFRESH_ShaderModule *shaderModule; - const char* entryPointName; -} REFRESH_ComputeShaderStageState; - -typedef struct REFRESH_GraphicsShaderStageState +typedef struct REFRESH_ShaderStageState { REFRESH_ShaderModule *shaderModule; const char* entryPointName; uint64_t uniformBufferSize; -} REFRESH_GraphicsShaderStageState; +} REFRESH_ShaderStageState; typedef struct REFRESH_TopologyState { @@ -559,7 +562,7 @@ typedef struct REFRESH_DepthStencilState typedef struct REFRESH_ColorBlendState { - uint8_t blendOpEnable; + uint8_t logicOpEnable; REFRESH_LogicOp logicOp; const REFRESH_ColorTargetBlendState *blendStates; uint32_t blendStateCount; @@ -568,14 +571,14 @@ typedef struct REFRESH_ColorBlendState typedef struct REFRESH_ComputePipelineCreateInfo { - REFRESH_ComputeShaderStageState computeShaderState; + REFRESH_ShaderStageState computeShaderState; REFRESH_ComputePipelineLayoutCreateInfo pipelineLayoutCreateInfo; } REFRESH_ComputePipelineCreateInfo; typedef struct REFRESH_GraphicsPipelineCreateInfo { - REFRESH_GraphicsShaderStageState vertexShaderState; - REFRESH_GraphicsShaderStageState fragmentShaderState; + REFRESH_ShaderStageState vertexShaderState; + REFRESH_ShaderStageState fragmentShaderState; REFRESH_VertexInputState vertexInputState; REFRESH_TopologyState topologyState; REFRESH_ViewportState viewportState; @@ -731,15 +734,17 @@ REFRESHAPI void REFRESH_DrawPrimitives( /* Dispatches work compute items. * - * groupCountX: Number of local workgroups to dispatch in the X dimension. - * groupCountY: Number of local workgroups to dispatch in the Y dimension. - * groupCountZ: Number of local workgroups to dispatch in the Z dimension. + * groupCountX: Number of local workgroups to dispatch in the X dimension. + * groupCountY: Number of local workgroups to dispatch in the Y dimension. + * groupCountZ: Number of local workgroups to dispatch in the Z dimension. + * computeParamOffset: The offset of the compute shader param data. */ REFRESHAPI void REFRESH_DispatchCompute( REFRESH_Device *device, uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ + uint32_t groupCountZ, + uint32_t computeParamOffset ); /* State Creation */ @@ -864,21 +869,14 @@ REFRESHAPI REFRESH_DepthStencilTarget* REFRESH_CreateDepthStencilTarget( REFRESH_DepthFormat format ); -/* Creates a vertex buffer to be used by Draw commands. +/* Creates a buffer. * - * sizeInBytes: The length of the vertex buffer. + * usageFlags: Specifies how the buffer will be used. + * sizeInBytes: The length of the buffer. */ -REFRESHAPI REFRESH_Buffer* REFRESH_CreateVertexBuffer( - REFRESH_Device *device, - uint32_t sizeInBytes -); - -/* Creates an index buffer to be used by Draw commands. - * - * sizeInBytes: The length of the index buffer. - */ -REFRESHAPI REFRESH_Buffer* REFRESH_CreateIndexBuffer( +REFRESHAPI REFRESH_Buffer* REFRESH_CreateBuffer( REFRESH_Device *device, + REFRESH_BufferUsageFlags usageFlags, uint32_t sizeInBytes ); @@ -984,41 +982,18 @@ REFRESHAPI void REFRESH_SetTextureDataYUV( uint32_t dataLength ); -/* Sets a region of the vertex buffer with client data. +/* Sets a region of the buffer with client data. * * NOTE: * Calling this function on a buffer after the buffer - * has been bound by BindVertexBuffers without calling - * Submit first is an error. + * has been bound without calling Submit first is an error. * - * buffer: The vertex buffer to be updated. + * buffer: The vertex buffer to be updated. * offsetInBytes: The starting offset of the buffer to write into. - * data: The client data to write into the buffer. - * elementCount: The number of elements from the client buffer to write. - * vertexStride: The size of each element in the client buffer (including padding). + * data: The client data to write into the buffer. + * dataLength: The length of data from the client buffer to write. */ -REFRESHAPI void REFRESH_SetVertexBufferData( - REFRESH_Device *device, - REFRESH_Buffer *buffer, - uint32_t offsetInBytes, - void* data, - uint32_t elementCount, - uint32_t vertexStride -); - -/* Sets a region of the index buffer with client data. - * - * NOTE: - * Calling this function on a buffer after the buffer - * has been bound by BindIndexBuffer without calling - * Submit first is an error. - * - * buffer: The index buffer to be updated. - * offsetInBytes: The starting offset of the buffer to write into. - * data: The client data to write into the buffer. - * dataLength: The size (in bytes) of the client data. - */ -REFRESHAPI void REFRESH_SetIndexBufferData( +REFRESHAPI void REFRESH_SetBufferData( REFRESH_Device *device, REFRESH_Buffer *buffer, uint32_t offsetInBytes, @@ -1046,7 +1021,7 @@ REFRESHAPI uint32_t REFRESH_PushVertexShaderParams( * Returns a starting offset value to be used with draw calls. * * NOTE: - * A pipeline must be bound. + * A graphics pipeline must be bound. * Will use the block size of the currently bound fragment shader. * * data: The client data to write into the buffer. @@ -1058,6 +1033,22 @@ REFRESHAPI uint32_t REFRESH_PushFragmentShaderParams( uint32_t paramBlockCount ); +/* Pushes compute shader params to the device. + * Returns a starting offset value to be used with draw calls. + * + * NOTE: + * A compute pipeline must be bound. + * Will use the block size of the currently bound compute shader. + * + * data: The client data to write into the buffer. + * paramBlockData: The number of param-sized blocks from the client buffer to write. + */ +REFRESHAPI uint32_t REFRESH_PushComputeShaderParams( + REFRESH_Device *device, + void *data, + uint32_t paramBlockCount +); + /* Sets textures/samplers for use with the currently bound vertex shader. * * NOTE: diff --git a/src/Refresh.c b/src/Refresh.c index 31d0ea4..a553c75 100644 --- a/src/Refresh.c +++ b/src/Refresh.c @@ -245,14 +245,16 @@ void REFRESH_DispatchCompute( REFRESH_Device *device, uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ + uint32_t groupCountZ, + uint32_t computeParamOffset ) { NULL_RETURN(device); device->DispatchCompute( device->driverData, groupCountX, groupCountY, - groupCountZ + groupCountZ, + computeParamOffset ); } @@ -407,24 +409,15 @@ REFRESH_DepthStencilTarget* REFRESH_CreateDepthStencilTarget( ); } -REFRESH_Buffer* REFRESH_CreateVertexBuffer( +REFRESH_Buffer* REFRESH_CreateBuffer( REFRESH_Device *device, + REFRESH_BufferUsageFlags usageFlags, uint32_t sizeInBytes ) { NULL_RETURN_NULL(device); - return device->CreateVertexBuffer( - device->driverData, - sizeInBytes - ); -} - -REFRESH_Buffer* REFRESH_CreateIndexBuffer( - REFRESH_Device *device, - uint32_t sizeInBytes -) { - NULL_RETURN_NULL(device); - return device->CreateIndexBuffer( + return device->CreateBuffer( device->driverData, + usageFlags, sizeInBytes ); } @@ -537,26 +530,7 @@ void REFRESH_SetTextureDataYUV( ); } -void REFRESH_SetVertexBufferData( - REFRESH_Device *device, - REFRESH_Buffer *buffer, - uint32_t offsetInBytes, - void* data, - uint32_t elementCount, - uint32_t vertexStride -) { - NULL_RETURN(device); - device->SetVertexBufferData( - device->driverData, - buffer, - offsetInBytes, - data, - elementCount, - vertexStride - ); -} - -void REFRESH_SetIndexBufferData( +void REFRESH_SetBufferData( REFRESH_Device *device, REFRESH_Buffer *buffer, uint32_t offsetInBytes, @@ -564,7 +538,7 @@ void REFRESH_SetIndexBufferData( uint32_t dataLength ) { NULL_RETURN(device); - device->SetIndexBufferData( + device->SetBufferData( device->driverData, buffer, offsetInBytes, @@ -599,6 +573,19 @@ uint32_t REFRESH_PushFragmentShaderParams( ); } +uint32_t REFRESH_PushComputeShaderParams( + REFRESH_Device *device, + void *data, + uint32_t elementCount +) { + if (device == NULL) { return 0; } + return device->PushComputeShaderParams( + device->driverData, + data, + elementCount + ); +} + void REFRESH_SetVertexSamplers( REFRESH_Device *device, REFRESH_Texture **pTextures, diff --git a/src/Refresh_Driver.h b/src/Refresh_Driver.h index 6fbe843..ba4d660 100644 --- a/src/Refresh_Driver.h +++ b/src/Refresh_Driver.h @@ -216,7 +216,8 @@ struct REFRESH_Device REFRESH_Renderer *device, uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ + uint32_t groupCountZ, + uint32_t computeParamOffset ); /* State Creation */ @@ -291,13 +292,9 @@ struct REFRESH_Device REFRESH_DepthFormat format ); - REFRESH_Buffer* (*CreateVertexBuffer)( - REFRESH_Renderer *driverData, - uint32_t sizeInBytes - ); - - REFRESH_Buffer* (*CreateIndexBuffer)( + REFRESH_Buffer* (*CreateBuffer)( REFRESH_Renderer *driverData, + REFRESH_BufferUsageFlags usageFlags, uint32_t sizeInBytes ); @@ -355,16 +352,7 @@ struct REFRESH_Device uint32_t dataLength ); - void(*SetVertexBufferData)( - REFRESH_Renderer *driverData, - REFRESH_Buffer *buffer, - uint32_t offsetInBytes, - void* data, - uint32_t elementCount, - uint32_t vertexStride - ); - - void(*SetIndexBufferData)( + void(*SetBufferData)( REFRESH_Renderer *driverData, REFRESH_Buffer *buffer, uint32_t offsetInBytes, @@ -384,6 +372,12 @@ struct REFRESH_Device uint32_t elementCount ); + uint32_t (*PushComputeShaderParams)( + REFRESH_Renderer *driverData, + void *data, + uint32_t elementCount + ); + void(*SetVertexSamplers)( REFRESH_Renderer *driverData, REFRESH_Texture **pTextures, @@ -564,16 +558,15 @@ struct REFRESH_Device ASSIGN_DRIVER_FUNC(CreateTextureCube, name) \ ASSIGN_DRIVER_FUNC(CreateColorTarget, name) \ ASSIGN_DRIVER_FUNC(CreateDepthStencilTarget, name) \ - ASSIGN_DRIVER_FUNC(CreateVertexBuffer, name) \ - ASSIGN_DRIVER_FUNC(CreateIndexBuffer, name) \ + ASSIGN_DRIVER_FUNC(CreateBuffer, name) \ ASSIGN_DRIVER_FUNC(SetTextureData2D, name) \ ASSIGN_DRIVER_FUNC(SetTextureData3D, name) \ ASSIGN_DRIVER_FUNC(SetTextureDataCube, name) \ ASSIGN_DRIVER_FUNC(SetTextureDataYUV, name) \ - ASSIGN_DRIVER_FUNC(SetVertexBufferData, name) \ - ASSIGN_DRIVER_FUNC(SetIndexBufferData, name) \ + ASSIGN_DRIVER_FUNC(SetBufferData, name) \ ASSIGN_DRIVER_FUNC(PushVertexShaderParams, name) \ ASSIGN_DRIVER_FUNC(PushFragmentShaderParams, name) \ + ASSIGN_DRIVER_FUNC(PushComputeShaderParams, name) \ ASSIGN_DRIVER_FUNC(SetVertexSamplers, name) \ ASSIGN_DRIVER_FUNC(SetFragmentSamplers, name) \ ASSIGN_DRIVER_FUNC(GetTextureData2D, name) \ diff --git a/src/Refresh_Driver_Vulkan.c b/src/Refresh_Driver_Vulkan.c index 1326eff..df69506 100644 --- a/src/Refresh_Driver_Vulkan.c +++ b/src/Refresh_Driver_Vulkan.c @@ -140,6 +140,7 @@ typedef enum VulkanResourceAccessType RESOURCE_ACCESS_FRAGMENT_SHADER_READ_SAMPLED_IMAGE, RESOURCE_ACCESS_FRAGMENT_SHADER_READ_COLOR_ATTACHMENT, RESOURCE_ACCESS_FRAGMENT_SHADER_READ_DEPTH_STENCIL_ATTACHMENT, + RESOURCE_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER, RESOURCE_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE, RESOURCE_ACCESS_COLOR_ATTACHMENT_READ, RESOURCE_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ, @@ -509,6 +510,13 @@ static const VulkanResourceAccessInfo AccessMap[RESOURCE_ACCESS_TYPES_COUNT] = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL }, + /* RESOURCE_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER */ + { + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_ACCESS_UNIFORM_READ_BIT, + VK_IMAGE_LAYOUT_UNDEFINED + }, + /* RESOURCE_ACCESS_ANY_SHADER_READ_SAMPLED_IMAGE */ { VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, @@ -714,6 +722,9 @@ typedef struct VulkanComputePipeline VulkanComputePipelineLayout *pipelineLayout; VkDescriptorSet bufferDescriptorSet; /* updated by BindComputeBuffers */ VkDescriptorSet imageDescriptorSet; /* updated by BindComputeTextures */ + + VkDescriptorSet computeUBODescriptorSet; /* permanently set in Create function */ + VkDeviceSize computeUBOBlockSize; /* permanently set in Create function */ } VulkanComputePipeline; typedef struct VulkanTexture @@ -948,6 +959,7 @@ struct BufferDescriptorSetCache { VkDescriptorSetLayout descriptorSetLayout; uint32_t bindingCount; + VkDescriptorType descriptorType; BufferDescriptorSetHashArray buckets[NUM_DESCRIPTOR_SET_HASH_BUCKETS]; BufferDescriptorSetHashMap *elements; @@ -1049,6 +1061,7 @@ typedef struct ComputePipelineLayoutHash { VkDescriptorSetLayout bufferLayout; VkDescriptorSetLayout imageLayout; + VkDescriptorSetLayout uniformLayout; } ComputePipelineLayoutHash; typedef struct ComputePipelineLayoutHashMap @@ -1075,6 +1088,7 @@ static inline uint64_t ComputePipelineLayoutHashTable_GetHashCode(ComputePipelin uint64_t result = 1; result = result * HASH_FACTOR + (uint64_t) key.bufferLayout; result = result * HASH_FACTOR + (uint64_t) key.imageLayout; + result = result * HASH_FACTOR + (uint64_t) key.uniformLayout; return result; } @@ -1090,7 +1104,8 @@ static inline VulkanComputePipelineLayout* ComputePipelineLayoutHashArray_Fetch( { const ComputePipelineLayoutHash *e = &arr->elements[i].key; if ( key.bufferLayout == e->bufferLayout && - key.imageLayout == e->imageLayout ) + key.imageLayout == e->imageLayout && + key.uniformLayout == e->uniformLayout ) { return arr->elements[i].value; } @@ -1192,15 +1207,19 @@ typedef struct VulkanRenderer VkDescriptorSetLayout emptyVertexSamplerLayout; VkDescriptorSetLayout emptyFragmentSamplerLayout; VkDescriptorSetLayout emptyComputeBufferDescriptorSetLayout; + VkDescriptorSetLayout emptyComputeImageDescriptorSetLayout; VkDescriptorSet emptyVertexSamplerDescriptorSet; VkDescriptorSet emptyFragmentSamplerDescriptorSet; VkDescriptorSet emptyComputeBufferDescriptorSet; + VkDescriptorSet emptyComputeImageDescriptorSet; VkDescriptorSetLayout vertexParamLayout; VkDescriptorSetLayout fragmentParamLayout; + VkDescriptorSetLayout computeParamLayout; VulkanBuffer *dummyVertexUniformBuffer; VulkanBuffer *dummyFragmentUniformBuffer; + VulkanBuffer *dummyComputeUniformBuffer; VulkanBuffer *textureStagingBuffer; @@ -1214,6 +1233,7 @@ typedef struct VulkanRenderer VulkanBuffer *vertexUBO; VulkanBuffer *fragmentUBO; + VulkanBuffer *computeUBO; uint32_t minUBOAlignment; uint32_t vertexUBOOffset; @@ -1222,6 +1242,9 @@ typedef struct VulkanRenderer uint32_t fragmentUBOOffset; VkDeviceSize fragmentUBOBlockIncrement; + uint32_t computeUBOOffset; + VkDeviceSize computeUBOBlockIncrement; + uint32_t frameIndex; SDL_mutex *allocatorLock; @@ -3614,8 +3637,8 @@ static void VULKAN_DrawPrimitives( REFRESH_Renderer *driverData, uint32_t vertexStart, uint32_t primitiveCount, - uint32_t vertexUniformBufferOffset, - uint32_t fragmentUniformBufferOffset + uint32_t vertexParamOffset, + uint32_t fragmentParamOffset ) { VulkanRenderer *renderer = (VulkanRenderer*) driverData; VkDescriptorSet descriptorSets[4]; @@ -3626,8 +3649,8 @@ static void VULKAN_DrawPrimitives( descriptorSets[2] = renderer->currentGraphicsPipeline->vertexUBODescriptorSet; descriptorSets[3] = renderer->currentGraphicsPipeline->fragmentUBODescriptorSet; - dynamicOffsets[0] = vertexUniformBufferOffset; - dynamicOffsets[1] = fragmentUniformBufferOffset; + dynamicOffsets[0] = vertexParamOffset; + dynamicOffsets[1] = fragmentParamOffset; RECORD_CMD(renderer->vkCmdBindDescriptorSets( renderer->currentCommandBuffer, @@ -3656,25 +3679,27 @@ static void VULKAN_DispatchCompute( REFRESH_Renderer *driverData, uint32_t groupCountX, uint32_t groupCountY, - uint32_t groupCountZ + uint32_t groupCountZ, + uint32_t computeParamOffset ) { VulkanRenderer *renderer = (VulkanRenderer*) driverData; VulkanComputePipeline *computePipeline = renderer->currentComputePipeline; - VkDescriptorSet descriptorSets[2]; + VkDescriptorSet descriptorSets[3]; descriptorSets[0] = computePipeline->bufferDescriptorSet; descriptorSets[1] = computePipeline->imageDescriptorSet; + descriptorSets[2] = computePipeline->computeUBODescriptorSet; RECORD_CMD(renderer->vkCmdBindDescriptorSets( renderer->currentCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline->pipelineLayout->pipelineLayout, 0, - 2, + 3, descriptorSets, - 0, - NULL + 1, + &computeParamOffset )); RECORD_CMD(renderer->vkCmdDispatch( @@ -4023,6 +4048,7 @@ static ImageDescriptorSetCache* VULKAN_INTERNAL_CreateImageDescriptorSetCache( static BufferDescriptorSetCache* VULKAN_INTERNAL_CreateBufferDescriptorSetCache( VulkanRenderer *renderer, + VkDescriptorType descriptorType, VkDescriptorSetLayout descriptorSetLayout, uint32_t bindingCount ) { @@ -4040,6 +4066,10 @@ static BufferDescriptorSetCache* VULKAN_INTERNAL_CreateBufferDescriptorSetCache( bufferDescriptorSetCache->buckets[i].capacity = 0; } + bufferDescriptorSetCache->descriptorSetLayout = descriptorSetLayout; + bufferDescriptorSetCache->bindingCount = bindingCount; + bufferDescriptorSetCache->descriptorType = descriptorType; + bufferDescriptorSetCache->bufferDescriptorPools = SDL_malloc(sizeof(VkDescriptorPool)); bufferDescriptorSetCache->bufferDescriptorPoolCount = 1; bufferDescriptorSetCache->nextPoolSize = DESCRIPTOR_POOL_STARTING_SIZE * 2; @@ -4096,7 +4126,19 @@ static VkDescriptorSetLayout VULKAN_INTERNAL_FetchDescriptorSetLayout( } else if (shaderStageFlagBit == VK_SHADER_STAGE_COMPUTE_BIT) { - return renderer->emptyComputeBufferDescriptorSetLayout; + if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) + { + return renderer->emptyComputeBufferDescriptorSetLayout; + } + else if (descriptorType == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) + { + return renderer->emptyComputeImageDescriptorSetLayout; + } + else + { + REFRESH_LogError("Invalid descriptor type for compute shader: ", descriptorType); + return NULL_DESC_LAYOUT; + } } else { @@ -4548,7 +4590,7 @@ static REFRESH_GraphicsPipeline* VULKAN_CreateGraphicsPipeline( colorBlendStateCreateInfo.pNext = NULL; colorBlendStateCreateInfo.flags = 0; colorBlendStateCreateInfo.logicOpEnable = - pipelineCreateInfo->colorBlendState.blendOpEnable; + pipelineCreateInfo->colorBlendState.logicOpEnable; colorBlendStateCreateInfo.logicOp = RefreshToVK_LogicOp[ pipelineCreateInfo->colorBlendState.logicOp ]; @@ -4715,7 +4757,7 @@ static VulkanComputePipelineLayout* VULKAN_INTERNAL_FetchComputePipelineLayout( uint32_t imageBindingCount ) { VkResult vulkanResult; - VkDescriptorSetLayout setLayouts[2]; + VkDescriptorSetLayout setLayouts[3]; VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo; ComputePipelineLayoutHash pipelineLayoutHash; VulkanComputePipelineLayout *vulkanComputePipelineLayout; @@ -4734,6 +4776,8 @@ static VulkanComputePipelineLayout* VULKAN_INTERNAL_FetchComputePipelineLayout( VK_SHADER_STAGE_COMPUTE_BIT ); + pipelineLayoutHash.uniformLayout = renderer->computeParamLayout; + vulkanComputePipelineLayout = ComputePipelineLayoutHashArray_Fetch( &renderer->computePipelineLayoutHashTable, pipelineLayoutHash @@ -4748,11 +4792,12 @@ static VulkanComputePipelineLayout* VULKAN_INTERNAL_FetchComputePipelineLayout( setLayouts[0] = pipelineLayoutHash.bufferLayout; setLayouts[1] = pipelineLayoutHash.imageLayout; + setLayouts[2] = pipelineLayoutHash.uniformLayout; pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; pipelineLayoutCreateInfo.pNext = NULL; pipelineLayoutCreateInfo.flags = 0; - pipelineLayoutCreateInfo.setLayoutCount = 2; + pipelineLayoutCreateInfo.setLayoutCount = 3; pipelineLayoutCreateInfo.pSetLayouts = setLayouts; pipelineLayoutCreateInfo.pushConstantRangeCount = 0; pipelineLayoutCreateInfo.pPushConstantRanges = NULL; @@ -4790,6 +4835,7 @@ static VulkanComputePipelineLayout* VULKAN_INTERNAL_FetchComputePipelineLayout( vulkanComputePipelineLayout->bufferDescriptorSetCache = VULKAN_INTERNAL_CreateBufferDescriptorSetCache( renderer, + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, pipelineLayoutHash.bufferLayout, bufferBindingCount ); @@ -4819,7 +4865,10 @@ static REFRESH_ComputePipeline* VULKAN_CreateComputePipeline( ) { VkComputePipelineCreateInfo computePipelineCreateInfo; VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo; - VulkanComputePipelineLayout *computePipelineLayout; + + VkDescriptorSetAllocateInfo descriptorSetAllocateInfo; + VkDescriptorBufferInfo uniformBufferInfo; + VkWriteDescriptorSet writeDescriptorSet; VulkanRenderer *renderer = (VulkanRenderer*) driverData; VulkanComputePipeline *vulkanComputePipeline = SDL_malloc(sizeof(VulkanComputePipeline)); @@ -4832,7 +4881,7 @@ static REFRESH_ComputePipeline* VULKAN_CreateComputePipeline( pipelineShaderStageCreateInfo.pName = pipelineCreateInfo->computeShaderState.entryPointName; pipelineShaderStageCreateInfo.pSpecializationInfo = NULL; - computePipelineLayout = VULKAN_INTERNAL_FetchComputePipelineLayout( + vulkanComputePipeline->pipelineLayout = VULKAN_INTERNAL_FetchComputePipelineLayout( renderer, pipelineCreateInfo->pipelineLayoutCreateInfo.bufferBindingCount, pipelineCreateInfo->pipelineLayoutCreateInfo.imageBindingCount @@ -4842,7 +4891,8 @@ static REFRESH_ComputePipeline* VULKAN_CreateComputePipeline( computePipelineCreateInfo.pNext = NULL; computePipelineCreateInfo.flags = 0; computePipelineCreateInfo.stage = pipelineShaderStageCreateInfo; - computePipelineCreateInfo.layout = computePipelineLayout->pipelineLayout; + computePipelineCreateInfo.layout = + vulkanComputePipeline->pipelineLayout->pipelineLayout; computePipelineCreateInfo.basePipelineHandle = NULL; computePipelineCreateInfo.basePipelineIndex = 0; @@ -4855,6 +4905,56 @@ static REFRESH_ComputePipeline* VULKAN_CreateComputePipeline( &vulkanComputePipeline->pipeline ); + vulkanComputePipeline->computeUBOBlockSize = + VULKAN_INTERNAL_NextHighestAlignment( + pipelineCreateInfo->computeShaderState.uniformBufferSize, + renderer->minUBOAlignment + ); + + descriptorSetAllocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptorSetAllocateInfo.pNext = NULL; + descriptorSetAllocateInfo.descriptorPool = renderer->defaultDescriptorPool; + descriptorSetAllocateInfo.descriptorSetCount = 1; + descriptorSetAllocateInfo.pSetLayouts = &renderer->computeParamLayout; + + renderer->vkAllocateDescriptorSets( + renderer->logicalDevice, + &descriptorSetAllocateInfo, + &vulkanComputePipeline->computeUBODescriptorSet + ); + + if (vulkanComputePipeline->computeUBOBlockSize == 0) + { + uniformBufferInfo.buffer = renderer->dummyComputeUniformBuffer->subBuffers[0]->buffer; + uniformBufferInfo.offset = 0; + uniformBufferInfo.range = renderer->dummyComputeUniformBuffer->subBuffers[0]->size; + } + else + { + uniformBufferInfo.buffer = renderer->computeUBO->subBuffers[0]->buffer; + uniformBufferInfo.offset = 0; + uniformBufferInfo.range = vulkanComputePipeline->computeUBOBlockSize; + } + + writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writeDescriptorSet.pNext = NULL; + writeDescriptorSet.descriptorCount = 1; + writeDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + writeDescriptorSet.dstArrayElement = 0; + writeDescriptorSet.dstBinding = 0; + writeDescriptorSet.dstSet = vulkanComputePipeline->computeUBODescriptorSet; + writeDescriptorSet.pBufferInfo = &uniformBufferInfo; + writeDescriptorSet.pImageInfo = NULL; + writeDescriptorSet.pTexelBufferView = NULL; + + renderer->vkUpdateDescriptorSets( + renderer->logicalDevice, + 1, + &writeDescriptorSet, + 0, + NULL + ); + return (REFRESH_ComputePipeline*) vulkanComputePipeline; } @@ -5449,17 +5549,35 @@ static REFRESH_DepthStencilTarget* VULKAN_CreateDepthStencilTarget( return (REFRESH_DepthStencilTarget*) depthStencilTarget; } -static REFRESH_Buffer* VULKAN_CreateVertexBuffer( +static REFRESH_Buffer* VULKAN_CreateBuffer( REFRESH_Renderer *driverData, + REFRESH_BufferUsageFlags usageFlags, uint32_t sizeInBytes ) { VulkanBuffer *buffer = (VulkanBuffer*) SDL_malloc(sizeof(VulkanBuffer)); + VkBufferUsageFlags vulkanUsageFlags = 0; + + if (usageFlags & REFRESH_BUFFERUSAGE_VERTEX_BIT) + { + vulkanUsageFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + } + + if (usageFlags & REFRESH_BUFFERUSAGE_INDEX_BIT) + { + vulkanUsageFlags |= VK_BUFFER_USAGE_INDEX_BUFFER_BIT; + } + + if (usageFlags & REFRESH_BUFFERUSAGE_STORAGE_BIT) + { + vulkanUsageFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + } + if(!VULKAN_INTERNAL_CreateBuffer( (VulkanRenderer*) driverData, sizeInBytes, RESOURCE_ACCESS_VERTEX_BUFFER, - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + vulkanUsageFlags, SUB_BUFFER_COUNT, buffer )) { @@ -5470,27 +5588,6 @@ static REFRESH_Buffer* VULKAN_CreateVertexBuffer( return (REFRESH_Buffer*) buffer; } -static REFRESH_Buffer* VULKAN_CreateIndexBuffer( - REFRESH_Renderer *driverData, - uint32_t sizeInBytes -) { - VulkanBuffer *buffer = (VulkanBuffer*) SDL_malloc(sizeof(VulkanBuffer)); - - if (!VULKAN_INTERNAL_CreateBuffer( - (VulkanRenderer*) driverData, - sizeInBytes, - RESOURCE_ACCESS_INDEX_BUFFER, - VK_BUFFER_USAGE_INDEX_BUFFER_BIT, - SUB_BUFFER_COUNT, - buffer - )) { - REFRESH_LogError("Failed to create index buffer!"); - return NULL; - } - - return (REFRESH_Buffer*) buffer; -} - /* Setters */ static void VULKAN_INTERNAL_MaybeExpandStagingBuffer( @@ -6000,9 +6097,9 @@ static void VULKAN_SetTextureDataYUV( VULKAN_Submit(driverData); } -static void VULKAN_INTERNAL_SetBufferData( - REFRESH_Renderer* driverData, - REFRESH_Buffer* buffer, +static void VULKAN_SetBufferData( + REFRESH_Renderer *driverData, + REFRESH_Buffer *buffer, uint32_t offsetInBytes, void* data, uint32_t dataLength @@ -6066,39 +6163,6 @@ static void VULKAN_INTERNAL_SetBufferData( #undef SUBBUF } -static void VULKAN_SetVertexBufferData( - REFRESH_Renderer *driverData, - REFRESH_Buffer *buffer, - uint32_t offsetInBytes, - void* data, - uint32_t elementCount, - uint32_t vertexStride -) { - VULKAN_INTERNAL_SetBufferData( - driverData, - buffer, - offsetInBytes, - data, - elementCount * vertexStride - ); -} - -static void VULKAN_SetIndexBufferData( - REFRESH_Renderer *driverData, - REFRESH_Buffer *buffer, - uint32_t offsetInBytes, - void* data, - uint32_t dataLength -) { - VULKAN_INTERNAL_SetBufferData( - driverData, - buffer, - offsetInBytes, - data, - dataLength - ); -} - static uint32_t VULKAN_PushVertexShaderParams( REFRESH_Renderer *driverData, void *data, @@ -6118,7 +6182,7 @@ static uint32_t VULKAN_PushVertexShaderParams( return 0; } - VULKAN_INTERNAL_SetBufferData( + VULKAN_SetBufferData( driverData, (REFRESH_Buffer*) renderer->vertexUBO, renderer->vertexUBOOffset, @@ -6148,7 +6212,7 @@ static uint32_t VULKAN_PushFragmentShaderParams( return 0; } - VULKAN_INTERNAL_SetBufferData( + VULKAN_SetBufferData( driverData, (REFRESH_Buffer*) renderer->fragmentUBO, renderer->fragmentUBOOffset, @@ -6159,6 +6223,36 @@ static uint32_t VULKAN_PushFragmentShaderParams( return renderer->fragmentUBOOffset; } +static uint32_t VULKAN_PushComputeShaderParams( + REFRESH_Renderer *driverData, + void *data, + uint32_t elementCount +) { + VulkanRenderer* renderer = (VulkanRenderer*)driverData; + + renderer->computeUBOOffset += renderer->computeUBOBlockIncrement; + renderer->computeUBOBlockIncrement = renderer->currentComputePipeline->computeUBOBlockSize; + + if ( + renderer->computeUBOOffset + + renderer->currentComputePipeline->computeUBOBlockSize >= + UBO_BUFFER_SIZE * (renderer->frameIndex + 1) + ) { + REFRESH_LogError("Compute UBO overflow!"); + return 0; + } + + VULKAN_SetBufferData( + driverData, + (REFRESH_Buffer*) renderer->computeUBO, + renderer->computeUBOOffset, + data, + elementCount * renderer->currentComputePipeline->computeUBOBlockSize + ); + + return renderer->computeUBOOffset; +} + static inline uint8_t BufferDescriptorSetDataEqual( BufferDescriptorSetData *a, BufferDescriptorSetData *b, @@ -7040,7 +7134,7 @@ static void VULKAN_BindGraphicsPipeline( VulkanRenderer* renderer = (VulkanRenderer*) driverData; VulkanGraphicsPipeline* pipeline = (VulkanGraphicsPipeline*) graphicsPipeline; - /* bind dummy samplers */ + /* bind dummy sets */ if (pipeline->pipelineLayout->vertexSamplerDescriptorSetCache == NULL) { pipeline->vertexSamplerDescriptorSet = renderer->emptyVertexSamplerDescriptorSet; @@ -7141,13 +7235,24 @@ static void VULKAN_BindComputePipeline( VulkanRenderer *renderer = (VulkanRenderer*) driverData; VulkanComputePipeline *vulkanComputePipeline = (VulkanComputePipeline*) computePipeline; + /* bind dummy sets */ + if (vulkanComputePipeline->pipelineLayout->bufferDescriptorSetCache == NULL) + { + vulkanComputePipeline->bufferDescriptorSet = renderer->emptyComputeBufferDescriptorSet; + } + + if (vulkanComputePipeline->pipelineLayout->imageDescriptorSetCache == NULL) + { + vulkanComputePipeline->imageDescriptorSet = renderer->emptyComputeImageDescriptorSet; + } + renderer->vkCmdBindPipeline( renderer->currentCommandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, vulkanComputePipeline->pipeline ); - renderer->currentComputePipeline = NULL; + renderer->currentComputePipeline = vulkanComputePipeline; } static void VULKAN_BindComputeBuffers( @@ -8215,16 +8320,18 @@ static REFRESH_Device* VULKAN_CreateDevice( /* Variables: Descriptor set layouts */ VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo; - VkDescriptorSetLayoutBinding emptyVertexSamplerLayoutBinding; - VkDescriptorSetLayoutBinding emptyFragmentSamplerLayoutBinding; VkDescriptorSetLayoutBinding vertexParamLayoutBinding; VkDescriptorSetLayoutBinding fragmentParamLayoutBinding; + VkDescriptorSetLayoutBinding computeParamLayoutBinding; + VkDescriptorSetLayoutBinding emptyVertexSamplerLayoutBinding; + VkDescriptorSetLayoutBinding emptyFragmentSamplerLayoutBinding; VkDescriptorSetLayoutBinding emptyComputeBufferDescriptorSetLayoutBinding; + VkDescriptorSetLayoutBinding emptyComputeImageDescriptorSetLayoutBinding; /* Variables: UBO Creation */ VkDescriptorPoolCreateInfo defaultDescriptorPoolInfo; - VkDescriptorPoolSize poolSizes[3]; + VkDescriptorPoolSize poolSizes[4]; VkDescriptorSetAllocateInfo descriptorAllocateInfo; result = (REFRESH_Device*) SDL_malloc(sizeof(REFRESH_Device)); @@ -8517,11 +8624,27 @@ static REFRESH_Device* VULKAN_CreateDevice( return NULL; } + renderer->computeUBO = (VulkanBuffer*) SDL_malloc(sizeof(VulkanBuffer)); + + if (!VULKAN_INTERNAL_CreateBuffer( + renderer, + UBO_ACTUAL_SIZE, + RESOURCE_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + 1, + renderer->computeUBO + )) { + REFRESH_LogError("Failed to create compute UBO!"); + return NULL; + } + renderer->minUBOAlignment = renderer->physicalDeviceProperties.properties.limits.minUniformBufferOffsetAlignment; renderer->vertexUBOOffset = 0; renderer->vertexUBOBlockIncrement = 0; renderer->fragmentUBOOffset = 0; renderer->fragmentUBOBlockIncrement = 0; + renderer->computeUBOOffset = 0; + renderer->computeUBOBlockIncrement = 0; /* Set up UBO layouts */ @@ -8574,15 +8697,27 @@ static REFRESH_Device* VULKAN_CreateDevice( &renderer->emptyComputeBufferDescriptorSetLayout ); + emptyComputeImageDescriptorSetLayoutBinding.binding = 0; + emptyComputeImageDescriptorSetLayoutBinding.descriptorCount = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + emptyComputeImageDescriptorSetLayoutBinding.descriptorCount = 0; + emptyComputeImageDescriptorSetLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + emptyComputeImageDescriptorSetLayoutBinding.pImmutableSamplers = NULL; + + setLayoutCreateInfo.pBindings = &emptyComputeImageDescriptorSetLayoutBinding; + + vulkanResult = renderer->vkCreateDescriptorSetLayout( + renderer->logicalDevice, + &setLayoutCreateInfo, + NULL, + &renderer->emptyComputeImageDescriptorSetLayout + ); + vertexParamLayoutBinding.binding = 0; vertexParamLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; vertexParamLayoutBinding.descriptorCount = 1; vertexParamLayoutBinding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; vertexParamLayoutBinding.pImmutableSamplers = NULL; - setLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - setLayoutCreateInfo.pNext = NULL; - setLayoutCreateInfo.flags = 0; setLayoutCreateInfo.bindingCount = 1; setLayoutCreateInfo.pBindings = &vertexParamLayoutBinding; @@ -8621,6 +8756,22 @@ static REFRESH_Device* VULKAN_CreateDevice( return NULL; } + computeParamLayoutBinding.binding = 0; + computeParamLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; + computeParamLayoutBinding.descriptorCount = 1; + computeParamLayoutBinding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + computeParamLayoutBinding.pImmutableSamplers = NULL; + + setLayoutCreateInfo.bindingCount = 1; + setLayoutCreateInfo.pBindings = &computeParamLayoutBinding; + + vulkanResult = renderer->vkCreateDescriptorSetLayout( + renderer->logicalDevice, + &setLayoutCreateInfo, + NULL, + &renderer->computeParamLayout + ); + /* Default Descriptors */ /* default empty sampler descriptor sets */ @@ -8634,11 +8785,14 @@ static REFRESH_Device* VULKAN_CreateDevice( poolSizes[2].descriptorCount = 1; poolSizes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + poolSizes[3].descriptorCount = 1; + poolSizes[3].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + defaultDescriptorPoolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; defaultDescriptorPoolInfo.pNext = NULL; defaultDescriptorPoolInfo.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; - defaultDescriptorPoolInfo.maxSets = UBO_POOL_SIZE + 2 + 1; - defaultDescriptorPoolInfo.poolSizeCount = 3; + defaultDescriptorPoolInfo.maxSets = UBO_POOL_SIZE + 2 + 1 + 1; + defaultDescriptorPoolInfo.poolSizeCount = 4; defaultDescriptorPoolInfo.pPoolSizes = poolSizes; renderer->vkCreateDescriptorPool( @@ -8676,6 +8830,14 @@ static REFRESH_Device* VULKAN_CreateDevice( &renderer->emptyComputeBufferDescriptorSet ); + descriptorAllocateInfo.pSetLayouts = &renderer->emptyComputeImageDescriptorSetLayout; + + renderer->vkAllocateDescriptorSets( + renderer->logicalDevice, + &descriptorAllocateInfo, + &renderer->emptyComputeImageDescriptorSet + ); + /* Initialize buffer space */ renderer->buffersInUseCapacity = 32; @@ -8736,6 +8898,20 @@ static REFRESH_Device* VULKAN_CreateDevice( return NULL; } + renderer->dummyComputeUniformBuffer = (VulkanBuffer*) SDL_malloc(sizeof(VulkanBuffer)); + + if (!VULKAN_INTERNAL_CreateBuffer( + renderer, + 16, + RESOURCE_ACCESS_COMPUTE_SHADER_READ_UNIFORM_BUFFER, + VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + 1, + renderer->dummyComputeUniformBuffer + )) { + REFRESH_LogError("Fialed to create dummy compute uniform buffer!"); + return NULL; + } + /* Initialize caches */ for (i = 0; i < NUM_PIPELINE_LAYOUT_BUCKETS; i += 1)