I am using compute shader to process the input buffer data and store it as output texture using imagestore().
After executing the compute shader, I have 3 render calls sequentially.
Compute Shader Code:
#version 310 es
precision mediump image2D;
layout(std430) buffer; // Sets the default layout for SSBOs
layout(local_size_x = 256) in; // 256 threads per work group
layout(binding = 0) readonly buffer InputBuf
{
uint input_buff[];
} inputbuff;
layout (rgba32f, binding = 1 ) uniform writeonly image2D out_teximg;
void main()
{
int idx = int(gl_GlobalInvocationID.x);
int idy = int(gl_GlobalInvocationID.y);
unsigned int inputpix = inputbuff[1024 * idy + idx];
// some calculation on inputpix and output is rcolor, bcolor, gcolor
imageStore(out_teximg, ivec2(idx , idy), vec4(rcolor, bcolor, gcolor, 1.0));
barrier();
};
Code:
void initCompute()
{
glGenTextures(1, &computeOutTex);
glGenBuffers(1, &inSSBOId);
}
uint inputBuffData = { .... }; // input buffer data
void execute_compute()
{
// compute shader code starts...
glUseProgram(computePgmId);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, computeOutTex);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA32F, width, height);
glBindImageTexture(1, computeOutTex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); // binding is 1
glUniform1i( glGetUniformLocation(computePgmId, "out_teximg"), 0);
uint inputBuffSize = 1024 * 512 * 3;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, inSSBOId);
glBufferData(GL_SHADER_STORAGE_BUFFER, inputBuffSize, inputBuffData, GL_STATIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0 , inSSBOId); // binding is 0
glDispatchCompute(width / 256, height, 1);
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
// glFinish();
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
glBindImageTexture(1, 0, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA32F); // binding is 1
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, 0);// binding is 0
}
int draw()
{
glBindFramebuffer(GL_FRAMEBUFFER, m_FBOId); // Offscreen Rendering
glClear(GL_COLOR_BUFFER_BIT);
glUseProgram(compute_pgm);
execute_compute();
glUseProgram(render_pgm1);
glViewport(0,0,w,h);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, computeOutTex);
glDrawElements(); // Render the texture data
// 2nd draw call
glUseProgram(render_pgm2);
....
....
glDrawElements();
// 3rd draw call
glUseProgram(render_pgm3);
....
....
glDrawElements();
glBindFramebuffer(GL_FRAMEBUFFER, 0); // unbind FBO
}
Here, the only 2nd draw call is taking more time after using compute shader.
If glFinish() is called after glMemoryBarrier(), then only execute_compute() call is slowed down. Why compute shader is slowing down the subsequent draw calls? Is glFinish() really needed?