Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define BVH_STACK_SIZE 64
- [numthreads(32, 32, 1)]
- void RenderPass(uint3 GI : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID)
- {
- // This is a testing traversal that takes viewport and casts primary ray for each pixel (ray directions are generated in separate kernel)
- uint width = asuint(ResolutionParams.x);
- uint height = asuint(ResolutionParams.y);
- int id = DTid.y * width + DTid.x;
- // Get ray from buffer and initialize
- Ray r = Rays[id];
- float4 o = r.Origin;
- float4 d = r.Direction;
- float4 inv = r.Inverse;
- float4 oinv = o * inv;
- // Initialize stack for BVH stack traversal (0xFFFFFFFF being the entrypoint sentinel)
- uint node_id = 0;
- uint stack[BVH_STACK_SIZE];
- uint stack_ptr = 0;
- stack[stack_ptr] = 0xFFFFFFFF;
- // Entrypoint sentinel for BLAS
- int meshbvh_stack_ptr = -1;
- // Traversal variables (holding results)
- float tmin = 0.0f;
- float tmax = 10000.0f;
- float bU = 0.0f;
- float bV = 0.0f;
- float dist = tmax;
- bool hit = false;
- float4 temp = float4(0.0f, 0.0f, 0.0f, 1.0f);
- InstanceNode instance = Instances[0];
- // So, here the craziness begins, natively this should be a WHILE loop waiting until we are on entrypoint sentinel,
- // i.e. 0xFFFFFFFF - BUT - that is extremely slow, results easily get between 100-200ms for 1920x1080 rays. This being said
- // just switching over to for loop without static parameter (i.e. for (;;)) yields the same slow results.
- //
- // BUUUUT! Switching to fixed number of steps (i.e. 1000) results in much faster rendering (<15ms), fun fact this time holds
- // even when I increase resolution twice. There is never 1000 steps in the traversal at all.
- //
- // I could leave it as-is, but the problem is - occupancy. Using speculative while-while traversal or persistent threads
- // is just much more efficient, but trying to do that on a for loop is again - slowing it down.
- int i;
- [loop] for (i = 0; i < 1000; i++)
- //while (node_id != 0xFFFFFFFF) // This is significantly slower
- {
- temp.x += 0.1f;
- // Interior node hit
- [branch] if (ASTreeData[node_id].PrimitiveCount == 0)
- {
- // Fetch children bounding boxes
- float4 n0xy = ASTreeData[node_id].LXY;
- float4 n1xy = ASTreeData[node_id].RXY;
- float4 nz = ASTreeData[node_id].LRZ;
- // Test against both child AABBs
- float c0lox = n0xy.x * inv.x - oinv.x;
- float c0hix = n0xy.y * inv.x - oinv.x;
- float c0loy = n0xy.z * inv.y - oinv.y;
- float c0hiy = n0xy.w * inv.y - oinv.y;
- float c0loz = nz.x * inv.z - oinv.z;
- float c0hiz = nz.y * inv.z - oinv.z;
- float c1loz = nz.z * inv.z - oinv.z;
- float c1hiz = nz.w * inv.z - oinv.z;
- float c0min = max(max(min(c0lox, c0hix), min(c0loy, c0hiy)), max(min(c0loz, c0hiz), tmin));
- float c0max = min(min(max(c0lox, c0hix), max(c0loy, c0hiy)), min(max(c0loz, c0hiz), tmax));
- float c1lox = n1xy.x * inv.x - oinv.x;
- float c1hix = n1xy.y * inv.x - oinv.x;
- float c1loy = n1xy.z * inv.y - oinv.y;
- float c1hiy = n1xy.w * inv.y - oinv.y;
- float c1min = max(max(min(c1lox, c1hix), min(c1loy, c1hiy)), max(min(c1loz, c1hiz), tmin));
- float c1max = min(min(max(c1lox, c1hix), max(c1loy, c1hiy)), min(max(c1loz, c1hiz), tmax));
- // Which child AABBs were hit
- bool traverseChild0 = (c0max >= c0min);
- bool traverseChild1 = (c1max >= c1min);
- // If no children was hit, get node from stack
- if (!traverseChild0 && !traverseChild1)
- {
- // If we're on entrypoint sentinel of BLAS, get back into TLAS - reset ray
- if (stack_ptr == meshbvh_stack_ptr)
- {
- meshbvh_stack_ptr = -1;
- o = r.Origin;
- d = r.Direction;
- inv = r.Inverse;
- oinv = o * inv;
- }
- node_id = stack[stack_ptr];
- stack_ptr--;
- }
- // One or more child nodes was hit - continue in first one, push second (further) on stack (if both were hit)
- else if (traverseChild0 || traverseChild1)
- {
- uint first_child = node_id + 1;
- uint second_child = ASTreeData[node_id].PrimitiveOffset;
- node_id = (traverseChild0) ? first_child : second_child;
- if (traverseChild0 && traverseChild1)
- {
- if (c1min < c0min)
- {
- node_id = second_child;
- stack_ptr++;
- stack[stack_ptr] = first_child;
- }
- else
- {
- stack_ptr++;
- stack[stack_ptr] = second_child;
- }
- }
- }
- }
- // Leaf node of TLAS
- else if (ASTreeData[node_id].PrimitiveCount == -1)
- {
- // Store entrypoint sentinel for BLAS and continue traversal in BLAS
- meshbvh_stack_ptr = stack_ptr;
- uint blas_offset = ASTreeData[node_id].PrimitiveOffset;
- uint instance_index = ASIndexData[blas_offset];
- instance = Instances[instance_index];
- node_id = ASTreeNodes[Geometries[instance.GeometryNode].BVHNode + 1].Offset / 64;
- o = mul(r.Origin, instance.TransformInverse);
- d = mul(r.Direction, instance.TransformInverse);
- inv = rcp(d);
- oinv = o * inv;
- }
- // Leaf node of BLAS
- else
- {
- // Intersect ALL triangles in the node
- if (ASTreeData[node_id].PrimitiveCount > 0)
- {
- GeometryNode geom = Geometries[instance.GeometryNode];
- MemoryNode wbo = WoopNodes[geom.WoopBufferNode];
- uint index_offset = ASIndexNodes[ASTreeData[node_id].PrimitiveOffset].Offset / 4;
- for (uint j = 0; j < ASTreeData[node_id].PrimitiveCount; j++)
- {
- // Don't trash cache by reading index through it
- uint tri_idx = ASIndexData[ASTreeData[node_id].PrimitiveOffset + j] * 3;
- float4 r = WoopData[wbo.Offset / 16 + tri_idx + 0];
- float4 p = WoopData[wbo.Offset / 16 + tri_idx + 1];
- float4 q = WoopData[wbo.Offset / 16 + tri_idx + 2];
- float o_z = r.w - o.x * r.x - o.y * r.y - o.z * r.z;
- float i_z = 1.0f / (d.x * r.x + d.y * r.y + d.z * r.z);
- float t = o_z * i_z;
- if (t > tmin && t < tmax)
- {
- float o_x = p.w + o.x * p.x + o.y * p.y + o.z * p.z;
- float d_x = d.x * p.x + d.y * p.y + d.z * p.z;
- float u = o_x + t * d_x;
- if (u >= 0.0f && u <= 1.0f)
- {
- float o_y = q.w + o.x * q.x + o.y * q.y + o.z * q.z;
- float d_y = d.x * q.x + d.y * q.y + d.z * q.z;
- float v = o_y + t * d_y;
- if (v >= 0.0f && u + v <= 1.0f)
- {
- tmax = t;
- bU = u;
- bV = v;
- hit = true;
- }
- }
- }
- }
- }
- if (stack_ptr == meshbvh_stack_ptr)
- {
- meshbvh_stack_ptr = -1;
- o = r.Origin;
- d = r.Direction;
- inv = r.Inverse;
- oinv = o * inv;
- }
- node_id = stack[stack_ptr];
- stack_ptr--;
- }
- // Termination condition for for-loop approach
- if (node_id == 0xFFFFFFFF)
- {
- break;
- }
- }
- Output[DTid.xy] = float4(bU, bV, temp.x, temp.w);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement