From 1d41cc46e4230fc148c39535aefabc68b4f23738 Mon Sep 17 00:00:00 2001 From: Kristi K Date: Thu, 7 May 2026 13:13:17 +0200 Subject: [PATCH 1/6] Parse and load clutter scene objects --- Renderer/Renderer/SceneClutter.cs | 124 ++++++++++++++++++ Renderer/Renderer/World/WorldNodeLoader.cs | 30 +++++ .../Resource/ResourceTypes/Model.cs | 25 ++++ 3 files changed, 179 insertions(+) create mode 100644 Renderer/Renderer/SceneClutter.cs diff --git a/Renderer/Renderer/SceneClutter.cs b/Renderer/Renderer/SceneClutter.cs new file mode 100644 index 0000000000..2d846ba5ac --- /dev/null +++ b/Renderer/Renderer/SceneClutter.cs @@ -0,0 +1,124 @@ +using System.Linq; +using ValveKeyValue; +using ValveResourceFormat.Renderer.SceneNodes; +using ValveResourceFormat.ResourceTypes; +using ValveResourceFormat.Serialization.KeyValues; + +namespace ValveResourceFormat.Renderer +{ + /// + /// Scene node for instanced rendering of clutter objects. + /// + public class SceneClutter : SceneNode + { + /// + /// Clutter tile. + /// + public struct ClutterTile + { + /// Gets or sets the index of the first instance in this tile. + public int FirstInstance { get; set; } + + /// Gets or sets the index of the last instance in this tile. + public int LastInstance { get; set; } + + /// Gets or sets the world-space bounding box for this tile. + public AABB BoundsWs { get; set; } + } + + /// Gets the model scene node used to render this clutter. + public ModelSceneNode InstancedModel { get; } + + /// Gets the list of instance positions. + public List InstancePositions { get; private set; } = []; + + /// Gets the list of instance orientations (packed 32-bit). + public List InstanceOrientations { get; private set; } = []; + + /// Gets the list of instance scales. + public List InstanceScales { get; private set; } = []; + + /// Gets the list of instance tint colors (sRGB). + public List InstanceTints { get; private set; } = []; + + /// The tiles that make up this clutter. + public List Tiles { get; private set; } = []; + + /// The screen size at which each clutter instance begins to fade out. + public float BeginCullSize { get; set; } = 0.02f; + + /// The screen size at which each clutter instance is fully culled. + public float EndCullSize { get; set; } = 0.0125f; + + /// Initializes the scene clutter, loading the model and setting material group. + /// Owning scene. + /// Model resource providing the embedded or referenced mesh. + /// Material group name. + public SceneClutter(Scene scene, Model model, string? materialGroup) + : base(scene) + { + InstancedModel = new ModelSceneNode(scene, model, materialGroup, isWorldPreview: true); + LocalBoundingBox = InstancedModel.LocalBoundingBox; + } + + /// Parses instance data from the scene object. + /// KV3 object describing the clutter's instance data. + public void LoadInstanceData(KVObject clutterSceneObject) + { + // Load bounding box + var bounds = clutterSceneObject.GetSubCollection("m_Bounds"); + var minBounds = bounds.GetSubCollection("m_vMinBounds").ToVector3(); + var maxBounds = bounds.GetSubCollection("m_vMaxBounds").ToVector3(); + LocalBoundingBox = new AABB(minBounds, maxBounds); + + // Load instance positions + var positions = clutterSceneObject.GetArray("m_instancePositions"); + InstancePositions = [.. positions.Select(p => p.ToVector3())]; + + // Load instance orientations + var orientations = clutterSceneObject.GetIntegerArray("m_InstanceOrientations32"); + InstanceOrientations = [.. orientations.Select(o => (uint)o)]; + + // Load instance scales + InstanceScales = [.. clutterSceneObject.GetFloatArray("m_instanceScales")]; + + // Load instance tints + var tints = clutterSceneObject.GetArray("m_instanceTintSrgb"); + InstanceTints = [.. tints.Select(t => t.ToVector3()).Select(v => new Color32(v[0], v[1], v[2], 255))]; + + // Load tiles + var tiles = clutterSceneObject.GetArray("m_tiles"); + Tiles = [.. tiles.Select(tile => + { + var boundsWs = tile.GetSubCollection("m_BoundsWs"); + return new ClutterTile + { + FirstInstance = tile.GetInt32Property("m_nFirstInstance"), + LastInstance = tile.GetInt32Property("m_nLastInstance"), + BoundsWs = new AABB( + boundsWs.GetSubCollection("m_vMinBounds").ToVector3(), + boundsWs.GetSubCollection("m_vMaxBounds").ToVector3() + ) + }; + })]; + + // Load cull sizes + BeginCullSize = clutterSceneObject.GetFloatProperty("m_flBeginCullSize", BeginCullSize); + EndCullSize = clutterSceneObject.GetFloatProperty("m_flEndCullSize", EndCullSize); + } + + /// + public override IEnumerable GetSupportedRenderModes() => InstancedModel.GetSupportedRenderModes(); + +#if DEBUG + /// + public override void UpdateVertexArrayObjects() => InstancedModel.UpdateVertexArrayObjects(); +#endif + + /// + public override void Delete() + { + InstancedModel.Delete(); + } + } +} diff --git a/Renderer/Renderer/World/WorldNodeLoader.cs b/Renderer/Renderer/World/WorldNodeLoader.cs index 901a5b2544..869149d203 100644 --- a/Renderer/Renderer/World/WorldNodeLoader.cs +++ b/Renderer/Renderer/World/WorldNodeLoader.cs @@ -164,6 +164,36 @@ public void Load(Scene scene) aggregate.LoadFragments(sceneObject); } } + + foreach (var sceneObject in node.ClutterSceneObjects) + { + var renderableModel = sceneObject.GetStringProperty("m_renderableModel"); + + if (renderableModel != null) + { + var newResource = RendererContext.FileLoader.LoadFileCompiled(renderableModel); + if (newResource == null) + { + continue; + } + + var model = (Model?)newResource.DataBlock; + Debug.Assert(model != null); + + var layerIndex = sceneObject.GetIntegerProperty("m_nLayer"); + var materialGroup = sceneObject.GetStringProperty("m_materialGroup"); + + var clutter = new SceneClutter(scene, model, materialGroup) + { + LayerName = LayerNames[(int)layerIndex], + Name = renderableModel, + Flags = sceneObject.GetEnumValue("m_flags", normalize: true), + }; + + scene.Add(clutter, false); + clutter.LoadInstanceData(sceneObject); + } + } } } } diff --git a/ValveResourceFormat/Resource/ResourceTypes/Model.cs b/ValveResourceFormat/Resource/ResourceTypes/Model.cs index 2155561c63..69a14042fd 100644 --- a/ValveResourceFormat/Resource/ResourceTypes/Model.cs +++ b/ValveResourceFormat/Resource/ResourceTypes/Model.cs @@ -193,6 +193,31 @@ public void SetExternalMeshData(Mesh mesh) return result; } + /// + /// Gets all meshes. + /// + /// Enumerable of mesh, mesh index, name, and LoD mask tuples. + public IEnumerable<(Mesh Mesh, int MeshIndex, string Name, long LoDMask)> GetAllMeshes(IFileLoader fileLoader) + { + var lastEmbeddedIndex = 0; + foreach (var embedded in GetEmbeddedMeshesAndLoD()) + { + yield return (embedded.Mesh, embedded.MeshIndex, embedded.Name, embedded.LoDMask); + lastEmbeddedIndex = embedded.MeshIndex + 1; + } + + foreach (var referenced in GetReferenceMeshNamesAndLoD()) + { + var meshResource = fileLoader.LoadFileCompiled(referenced.MeshName); + if (meshResource?.DataBlock is not Mesh mesh) + { + continue; + } + + yield return (mesh, lastEmbeddedIndex + referenced.MeshIndex, referenced.MeshName, referenced.LoDMask); + } + } + /// /// Gets embedded meshes with their LoD masks. /// From fb699e2c480683fce914d13d0eec8f42401cab1d Mon Sep 17 00:00:00 2001 From: Kristi K Date: Thu, 7 May 2026 13:20:07 +0200 Subject: [PATCH 2/6] Create cull_clutter.comp.slang --- Renderer/Shaders/cull_clutter.comp.slang | 206 +++++++++++++++++++++++ 1 file changed, 206 insertions(+) create mode 100644 Renderer/Shaders/cull_clutter.comp.slang diff --git a/Renderer/Shaders/cull_clutter.comp.slang b/Renderer/Shaders/cull_clutter.comp.slang new file mode 100644 index 0000000000..3c6fa676db --- /dev/null +++ b/Renderer/Shaders/cull_clutter.comp.slang @@ -0,0 +1,206 @@ +#version 460 + +// Clutter instance culling compute shader. +// Unpacks clutter instance data and generates indirect draw arguments for visible instances. + +#include "common/ViewConstants.slang" +#include "common/structs.slang" + +layout (local_size_x = 64) in; + +// Packed instance data from CPU +struct ClutterInstanceData +{ + vec3 position; + float scale; + uint orientation32; + uint tintSrgb; +}; + +// Output transform and instance data +struct InstanceTransform +{ + mat3x4 transform; +}; + +struct InstanceData +{ + uint tintAlpha; + uint transformIndex; + uint visibleLPV; + uint identification; + uvec4 envMapVisibility; +}; + +// Input: Packed clutter instance data +layout(std430, binding = 5) readonly buffer ClutterInstanceBuffer +{ + ClutterInstanceData clutterInstances[]; +}; + +// Output: Transforms for visible instances +layout(std430, binding = 6) writeonly buffer TransformBuffer +{ + InstanceTransform instanceTransforms[]; +}; + +// Output: Per-instance data for visible instances +layout(std430, binding = 7) writeonly buffer InstanceBuffer +{ + InstanceData instanceData[]; +}; + +// Output: Indirect draw arguments +layout(std430, binding = 8) buffer IndirectDrawBuffer +{ + DrawCommand indirectDraws[]; +}; + +// Clutter parameters +uniform uint g_nTotalInstances; +uniform uint g_nDrawCommandIndex; +uniform uint g_nBaseTransformIndex; +uniform uint g_nBaseInstanceIndex; +uniform uint g_nIndexCount; +uniform uint g_nFirstIndex; +uniform int g_nBaseVertex; +uniform float g_flBeginCullSize; +uniform float g_flEndCullSize; + +// Unpack smallest-three quaternion from 32-bit packed value +vec4 UnpackOrientation32(uint packed) +{ + uint largestIndex = packed & 0x3u; + uint a10 = (packed >> 2) & 0x3FFu; + uint b10 = (packed >> 12) & 0x3FFu; + uint c10 = (packed >> 22) & 0x3FFu; + + // Convert 10-bit unsigned to signed [-511, 511] + int aSigned = int(a10); + int bSigned = int(b10); + int cSigned = int(c10); + + if (aSigned >= 512) aSigned -= 1024; + if (bSigned >= 512) bSigned -= 1024; + if (cSigned >= 512) cSigned -= 1024; + + // Normalize to [-1, 1] + float a = float(aSigned) / 511.0; + float b = float(bSigned) / 511.0; + float c = float(cSigned) / 511.0; + + // Reconstruct the largest component + float sumSquares = a * a + b * b + c * c; + float d = sqrt(max(0.0, 1.0 - sumSquares)); + + // Reconstruct quaternion based on which component was largest + if (largestIndex == 0u) return vec4(d, a, b, c); + if (largestIndex == 1u) return vec4(a, d, b, c); + if (largestIndex == 2u) return vec4(a, b, d, c); + return vec4(a, b, c, d); +} + +// Convert quaternion to 3x3 rotation matrix +mat3 QuaternionToMatrix(vec4 q) +{ + float xx = q.x * q.x; + float yy = q.y * q.y; + float zz = q.z * q.z; + float xy = q.x * q.y; + float xz = q.x * q.z; + float yz = q.y * q.z; + float wx = q.w * q.x; + float wy = q.w * q.y; + float wz = q.w * q.z; + + return mat3( + 1.0 - 2.0 * (yy + zz), 2.0 * (xy + wz), 2.0 * (xz - wy), + 2.0 * (xy - wz), 1.0 - 2.0 * (xx + zz), 2.0 * (yz + wx), + 2.0 * (xz + wy), 2.0 * (yz - wx), 1.0 - 2.0 * (xx + yy) + ); +} + +// Calculate fade alpha based on screen space size (returns 0-1, or -1 if culled) +float CalculateFadeAlpha(vec3 worldPos, float scale) +{ + if (g_flEndCullSize <= 0.0) + { + return 1.0; // Size culling disabled, full opacity + } + + // Project position to clip space + vec4 clipPos = g_matWorldToProjection * vec4(worldPos, 1.0); + + if (clipPos.w <= 0.0) + { + return -1.0; // Behind camera, cull + } + + // Calculate screen-space size (approximate) + float distance = length(worldPos - g_vCameraPositionWs); + float projectedSize = scale / distance; + + // Map to viewport scale + float screenSize = projectedSize * g_vViewportSize.y; + + if (screenSize < g_flEndCullSize) + { + return -1.0; // Too small, cull + } + + // Fade from 0 to 1 between EndCullSize and BeginCullSize + if (screenSize < g_flBeginCullSize) + { + float fade = (screenSize - g_flEndCullSize) / (g_flBeginCullSize - g_flEndCullSize); + return clamp(fade, 0.0, 1.0); + } + + return 1.0; // Full opacity +} + +void main() +{ + uint instanceIndex = gl_GlobalInvocationID.x; + + if (instanceIndex >= g_nTotalInstances) + { + return; + } + + ClutterInstanceData instance = clutterInstances[instanceIndex]; + + // Calculate fade alpha and test culling + float fadeAlpha = CalculateFadeAlpha(instance.position, instance.scale); + if (fadeAlpha < 0.0) + { + return; // Instance culled + } + + // Unpack orientation quaternion and convert to rotation matrix + vec4 quat = UnpackOrientation32(instance.orientation32); + mat3 rotation = QuaternionToMatrix(quat); + + // Build transform matrix (rotation + scale + translation) + mat3x4 transform; + transform[0] = vec4(rotation[0] * instance.scale, instance.position.x); + transform[1] = vec4(rotation[1] * instance.scale, instance.position.y); + transform[2] = vec4(rotation[2] * instance.scale, instance.position.z); + + // Write transform to buffer + uint transformIndex = g_nBaseTransformIndex + instanceIndex; + instanceTransforms[transformIndex].transform = transform; + + // Unpack tint color (RGB only, ignore source alpha) and apply fade alpha + uint tintRgb = instance.tintSrgb & 0x00FFFFFFu; // Mask out alpha + uint alphaChannel = uint(fadeAlpha * 255.0) << 24; // Convert fade to 0-255 and shift to alpha position + uint tintAlpha = tintRgb | alphaChannel; + + // Write instance data + uint instanceDataIndex = g_nBaseInstanceIndex + instanceIndex; + instanceData[instanceDataIndex].tintAlpha = tintAlpha; + instanceData[instanceDataIndex].transformIndex = transformIndex; + instanceData[instanceDataIndex].visibleLPV = 0u; // TODO: Light probe volume + instanceData[instanceDataIndex].identification = 0u; // TODO: Selection ID + instanceData[instanceDataIndex].envMapVisibility = uvec4(0u); // TODO: Env map visibility +} + From 2e939b5810c11338bd428232551bea78f988156a Mon Sep 17 00:00:00 2001 From: Kristi K Date: Thu, 7 May 2026 13:26:54 +0200 Subject: [PATCH 3/6] Cull using sphere radius --- Renderer/Renderer/SceneClutter.cs | 8 +++++++ Renderer/Shaders/cull_clutter.comp.slang | 30 +++++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Renderer/Renderer/SceneClutter.cs b/Renderer/Renderer/SceneClutter.cs index 2d846ba5ac..659ca664ac 100644 --- a/Renderer/Renderer/SceneClutter.cs +++ b/Renderer/Renderer/SceneClutter.cs @@ -50,6 +50,9 @@ public struct ClutterTile /// The screen size at which each clutter instance is fully culled. public float EndCullSize { get; set; } = 0.0125f; + /// Gets the bounding sphere radius of the instanced model. + public float ModelRadius { get; } + /// Initializes the scene clutter, loading the model and setting material group. /// Owning scene. /// Model resource providing the embedded or referenced mesh. @@ -59,6 +62,11 @@ public SceneClutter(Scene scene, Model model, string? materialGroup) { InstancedModel = new ModelSceneNode(scene, model, materialGroup, isWorldPreview: true); LocalBoundingBox = InstancedModel.LocalBoundingBox; + + // Calculate bounding sphere radius from model's AABB + var modelBounds = InstancedModel.LocalBoundingBox; + var extents = modelBounds.Max - modelBounds.Min; + ModelRadius = extents.Length() * 0.5f; } /// Parses instance data from the scene object. diff --git a/Renderer/Shaders/cull_clutter.comp.slang b/Renderer/Shaders/cull_clutter.comp.slang index 3c6fa676db..ce1ba9609d 100644 --- a/Renderer/Shaders/cull_clutter.comp.slang +++ b/Renderer/Shaders/cull_clutter.comp.slang @@ -8,7 +8,7 @@ layout (local_size_x = 64) in; -// Packed instance data from CPU +// Clutter instance data from CPU struct ClutterInstanceData { vec3 position; @@ -18,12 +18,12 @@ struct ClutterInstanceData }; // Output transform and instance data -struct InstanceTransform +struct DrawTransform { mat3x4 transform; }; -struct InstanceData +struct DrawInstance { uint tintAlpha; uint transformIndex; @@ -41,13 +41,13 @@ layout(std430, binding = 5) readonly buffer ClutterInstanceBuffer // Output: Transforms for visible instances layout(std430, binding = 6) writeonly buffer TransformBuffer { - InstanceTransform instanceTransforms[]; + DrawTransform instanceTransforms[]; }; // Output: Per-instance data for visible instances layout(std430, binding = 7) writeonly buffer InstanceBuffer { - InstanceData instanceData[]; + DrawInstance instanceData[]; }; // Output: Indirect draw arguments @@ -66,6 +66,7 @@ uniform uint g_nFirstIndex; uniform int g_nBaseVertex; uniform float g_flBeginCullSize; uniform float g_flEndCullSize; +uniform float g_flModelRadius; // Bounding sphere radius of the instanced model // Unpack smallest-three quaternion from 32-bit packed value vec4 UnpackOrientation32(uint packed) @@ -136,12 +137,23 @@ float CalculateFadeAlpha(vec3 worldPos, float scale) return -1.0; // Behind camera, cull } - // Calculate screen-space size (approximate) + // Calculate world-space radius of this instance + float worldRadius = g_flModelRadius * scale; + + // Calculate screen-space size using perspective projection + // Project the radius into screen space at this distance float distance = length(worldPos - g_vCameraPositionWs); - float projectedSize = scale / distance; - // Map to viewport scale - float screenSize = projectedSize * g_vViewportSize.y; + if (distance <= 0.0001) + { + return 1.0; // Very close, full opacity + } + + // Screen-space radius in pixels (using perspective projection) + // radius_screen = radius_world * focal_length / distance + // focal_length approximated using viewport height and FOV + float screenRadius = (worldRadius * g_vViewportSize.y) / (distance * 2.0); + float screenSize = screenRadius * 2.0; // Diameter in pixels if (screenSize < g_flEndCullSize) { From 5273cdabfddafbb9c17df6ed9706b908dca5c3e7 Mon Sep 17 00:00:00 2001 From: Kristi K Date: Thu, 7 May 2026 13:34:33 +0200 Subject: [PATCH 4/6] Create clutter gpu instances --- .../Renderer/Buffers/ReservedBufferSlots.cs | 2 + Renderer/Renderer/SceneClutter.cs | 46 +++++++++++++++++++ .../Renderer/UniformBuffers/GpuStructs.cs | 14 ++++++ 3 files changed, 62 insertions(+) diff --git a/Renderer/Renderer/Buffers/ReservedBufferSlots.cs b/Renderer/Renderer/Buffers/ReservedBufferSlots.cs index 2b4d9ba5e2..6a1a4135c8 100644 --- a/Renderer/Renderer/Buffers/ReservedBufferSlots.cs +++ b/Renderer/Renderer/Buffers/ReservedBufferSlots.cs @@ -48,6 +48,8 @@ public enum ReservedBufferSlots BoneTransforms = 11, /// Barn light constants SSBO slot. BarnLights = 12, + /// Clutter packed instance data SSBO slot. + ClutterInstances = 13, /// Guaranteed minimum binding point count in OpenGL 4.6. Max = 8, diff --git a/Renderer/Renderer/SceneClutter.cs b/Renderer/Renderer/SceneClutter.cs index 659ca664ac..4507e92f22 100644 --- a/Renderer/Renderer/SceneClutter.cs +++ b/Renderer/Renderer/SceneClutter.cs @@ -1,6 +1,8 @@ using System.Linq; using ValveKeyValue; +using ValveResourceFormat.Renderer.Buffers; using ValveResourceFormat.Renderer.SceneNodes; +using ValveResourceFormat.Renderer.Utils; using ValveResourceFormat.ResourceTypes; using ValveResourceFormat.Serialization.KeyValues; @@ -53,6 +55,18 @@ public struct ClutterTile /// Gets the bounding sphere radius of the instanced model. public float ModelRadius { get; } + /// GPU storage buffer containing packed instance data for compute culling. + public StorageBuffer? InstanceDataGpu { get; private set; } + + /// Gets or sets the base index into the scene's transform buffer for this clutter's instances. + public uint BaseTransformIndex { get; set; } + + /// Gets or sets the base index into the scene's instance buffer for this clutter's instances. + public uint BaseInstanceIndex { get; set; } + + /// Gets the number of instances in this clutter object. + public int InstanceCount => InstancePositions.Count; + /// Initializes the scene clutter, loading the model and setting material group. /// Owning scene. /// Model resource providing the embedded or referenced mesh. @@ -113,6 +127,36 @@ public void LoadInstanceData(KVObject clutterSceneObject) // Load cull sizes BeginCullSize = clutterSceneObject.GetFloatProperty("m_flBeginCullSize", BeginCullSize); EndCullSize = clutterSceneObject.GetFloatProperty("m_flEndCullSize", EndCullSize); + + UploadInstanceDataToGpu(); + } + + /// Packs instance data and uploads it to GPU storage buffer. + public void UploadInstanceDataToGpu() + { + if (InstancePositions.Count == 0) + { + return; + } + + // Pack instance data into GPU format + var instanceCount = InstancePositions.Count; + var packedData = new ClutterInstanceData[instanceCount]; + + for (var i = 0; i < instanceCount; i++) + { + packedData[i] = new ClutterInstanceData + { + Position = InstancePositions[i], + Scale = InstanceScales[i], + Orientation32 = InstanceOrientations[i], + TintSrgb = InstanceTints[i].PackedValue + }; + } + + // Create or update GPU buffer + InstanceDataGpu ??= new StorageBuffer(ReservedBufferSlots.ClutterInstances); + InstanceDataGpu.Create(packedData, packedData.Length * System.Runtime.InteropServices.Marshal.SizeOf()); } /// @@ -126,6 +170,8 @@ public void LoadInstanceData(KVObject clutterSceneObject) /// public override void Delete() { + InstanceDataGpu?.Delete(); + InstanceDataGpu = null; InstancedModel.Delete(); } } diff --git a/Renderer/Renderer/UniformBuffers/GpuStructs.cs b/Renderer/Renderer/UniformBuffers/GpuStructs.cs index c98b89d08a..19bbee9fc2 100644 --- a/Renderer/Renderer/UniformBuffers/GpuStructs.cs +++ b/Renderer/Renderer/UniformBuffers/GpuStructs.cs @@ -90,3 +90,17 @@ public readonly struct DrawElementsIndirectCommand /// Base instance used to index per-instance data. public readonly uint BaseInstance { get; init; } }; + +/// Packed clutter instance data for GPU compute culling. +[StructLayout(LayoutKind.Sequential)] +public struct ClutterInstanceData +{ + /// Instance world position. + public Vector3 Position; + /// Instance uniform scale. + public float Scale; + /// Instance orientation (smallest-three packed quaternion). + public uint Orientation32; + /// Instance tint color (RGBA8 sRGB). + public uint TintSrgb; +}; From 766a4cda234a76dae2efd7ef68bd3d813860f0a3 Mon Sep 17 00:00:00 2001 From: Kristi K Date: Thu, 7 May 2026 14:36:46 +0200 Subject: [PATCH 5/6] Cull and draw clutter attempt --- Renderer/Renderer.cs | 2 + Renderer/Renderer/Scene.cs | 172 ++++++++++++++++++++++- Renderer/Renderer/SceneClutter.cs | 16 ++- Renderer/Shaders/cull_clutter.comp.slang | 90 ++++++++---- 4 files changed, 242 insertions(+), 38 deletions(-) diff --git a/Renderer/Renderer.cs b/Renderer/Renderer.cs index 6c2e67144d..0923e5f8fe 100644 --- a/Renderer/Renderer.cs +++ b/Renderer/Renderer.cs @@ -346,6 +346,8 @@ void UpdatePerViewGpuBuffers(Scene scene, Camera camera, float deltaTime) scene.MeshletCullGpu(camera.ViewFrustum); } + scene.ClutterCullGpu(); + if (scene.CompactMeshletDraws) { scene.CompactIndirectDraws(); diff --git a/Renderer/Renderer/Scene.cs b/Renderer/Renderer/Scene.cs index 716e850330..0d12ed790f 100644 --- a/Renderer/Renderer/Scene.cs +++ b/Renderer/Renderer/Scene.cs @@ -118,6 +118,7 @@ public struct RenderContext private Shader? FrustumCullShader; private Shader? CompactionShader; + private Shader? ClutterCullShader; private Shader? DepthPyramidShader; private Shader? DepthPyramidNpotShader; @@ -206,6 +207,7 @@ public void Initialize() OutlineShader = RendererContext.ShaderLoader.LoadShader("vrf.outline"); FrustumCullShader = RendererContext.ShaderLoader.LoadShader("vrf.frustum_cull"); CompactionShader = RendererContext.ShaderLoader.LoadShader("vrf.compact_indirect_draws"); + ClutterCullShader = RendererContext.ShaderLoader.LoadShader("vrf.cull_clutter"); DepthPyramidShader = RendererContext.ShaderLoader.LoadShader("vrf.depth_pyramid"); DepthPyramidNpotShader = RendererContext.ShaderLoader.LoadShader("vrf.depth_pyramid", ("D_NPOT_DOWNSAMPLE", 1)); @@ -433,8 +435,12 @@ private void CreateInstanceTransformBuffers(bool deletePrevious = false) var maxId = nodes.Max(n => n.Id); - var instanceData = new ObjectDataStandard[maxId + 1]; - var transformData = new List(capacity: (int)maxId + 2) + // Calculate total clutter instances for buffer sizing + var totalClutterInstances = staticNodes.OfType().Sum(c => c.InstanceCount); + + // Allocate with extra space for clutter instances + var instanceData = new ObjectDataStandard[maxId + 1 + totalClutterInstances]; + var transformData = new List(capacity: (int)maxId + 2 + totalClutterInstances) { // Reserve index 0 for identity transform Matrix4x4.Identity.To3x4() @@ -479,6 +485,19 @@ private void CreateInstanceTransformBuffers(bool deletePrevious = false) }; } + // Assign base indices to clutter nodes after CPU nodes are allocated + var clutterBaseTransformIndex = transformData.Count; + var clutterBaseInstanceIndex = (int)maxId + 1; + + foreach (var clutter in staticNodes.OfType()) + { + clutter.BaseTransformIndex = clutterBaseTransformIndex; + clutter.BaseInstanceIndex = clutterBaseInstanceIndex; + + clutterBaseTransformIndex += clutter.InstanceCount; + clutterBaseInstanceIndex += clutter.InstanceCount; + } + InstanceBufferGpu = new StorageBuffer(ReservedBufferSlots.Objects); TransformBufferGpu = new StorageBuffer(ReservedBufferSlots.Transforms); @@ -492,7 +511,10 @@ private void CreateIndirectDrawBuffers(bool deletePrevious = false) var aggregateDrawCallCount = aggregateSceneNodes.Sum(agg => agg.Fragments.Count); var aggregateMeshletCount = aggregateSceneNodes.Sum(agg => agg.RenderMesh.Meshlets.Count); - if (aggregateMeshletCount == 0) + var clutterSceneNodes = staticNodes.OfType().ToList(); + var clutterDrawCallCount = clutterSceneNodes.Count; // One draw call per clutter object + + if (aggregateMeshletCount == 0 && clutterDrawCallCount == 0) { return; } @@ -530,8 +552,9 @@ private void CreateIndirectDrawBuffers(bool deletePrevious = false) // meshlets { + var totalIndirectDraws = aggregateMeshletCount + clutterDrawCallCount; var meshletDataGpu = new MeshletCullInfo[aggregateMeshletCount]; - var indirectDrawsGpu = new DrawElementsIndirectCommand[aggregateMeshletCount]; + var indirectDrawsGpu = new DrawElementsIndirectCommand[totalIndirectDraws]; var sceneDrawCount = 0; var sceneMeshletCount = 0; @@ -607,6 +630,25 @@ private void CreateIndirectDrawBuffers(bool deletePrevious = false) SceneMeshletCount = sceneMeshletCount; + // Allocate space for clutter draw commands after aggregates + var clutterDrawIndex = sceneMeshletCount; + foreach (var clutter in clutterSceneNodes) + { + clutter.IndirectDrawByteOffset = clutterDrawIndex * Unsafe.SizeOf(); + + // Initialize with zero instance count (will be filled by compute shader) + indirectDrawsGpu[clutterDrawIndex] = new DrawElementsIndirectCommand + { + Count = 0, + InstanceCount = 0, + FirstIndex = 0, + BaseVertex = 0, + BaseInstance = 0, + }; + + clutterDrawIndex++; + } + MeshletDataGpu = new StorageBuffer(ReservedBufferSlots.AggregateMeshlets); IndirectDrawsGpu = new StorageBuffer(ReservedBufferSlots.AggregateDraws); @@ -716,6 +758,9 @@ public List GetFrustumCullResults(Frustum frustum) [RenderPass.Outline] = [], }; + private record struct ClutterDrawRequest(SceneClutter Clutter, DrawCall Call); + private readonly List clutterDrawList = []; + private Dictionary> depthOnlyDraws { get; } = new() { [DepthOnlyProgram.Static] = [], @@ -793,6 +838,8 @@ public void CollectSceneDrawCalls(Camera camera, Frustum? cullFrustum = null) bucket.Clear(); } + clutterDrawList.Clear(); + WantsSceneColor = false; WantsSceneDepth = false; @@ -872,6 +919,16 @@ public void CollectSceneDrawCalls(Camera camera, Frustum? cullFrustum = null) }, RenderPass.OpaqueAggregate); } } + else if (node is SceneClutter clutter) + { + foreach (var mesh in clutter.InstancedModel.RenderableMeshes) + { + foreach (var call in mesh.DrawCallsOpaque) + { + clutterDrawList.Add(new ClutterDrawRequest(clutter, call)); + } + } + } else { var customRender = new MeshBatchRenderer.Request @@ -1184,6 +1241,63 @@ public void CompactIndirectDraws() } + /// + /// Dispatches the GPU clutter culling compute shader for all visible clutter nodes, writing transforms and indirect draw commands. + /// + public void ClutterCullGpu() + { + if (ClutterCullShader == null || clutterDrawList.Count == 0) + { + return; + } + + Debug.Assert(TransformBufferGpu != null); + Debug.Assert(InstanceBufferGpu != null); + Debug.Assert(IndirectDrawsGpu != null); + + using var _ = new GLDebugGroup("Cull Clutter"); + + ClutterCullShader.Use(); + + // Bind shared buffers once + TransformBufferGpu.BindBufferBase(); + InstanceBufferGpu.BindBufferBase(); + IndirectDrawsGpu.BindBufferBase(); + + // Dispatch for each visible clutter node + foreach (var request in clutterDrawList) + { + var clutter = request.Clutter; + var drawCall = request.Call; + + if (clutter.InstanceDataGpu == null || clutter.InstanceCount == 0) + { + continue; + } + + // Set per-clutter uniforms + ClutterCullShader.SetUniform1("g_nTotalInstances", (uint)clutter.InstanceCount); + ClutterCullShader.SetUniform1("g_nBaseTransformIndex", (uint)clutter.BaseTransformIndex); + ClutterCullShader.SetUniform1("g_nBaseInstanceIndex", (uint)clutter.BaseInstanceIndex); + ClutterCullShader.SetUniform1("g_flModelRadius", clutter.ModelRadius); + ClutterCullShader.SetUniform1("g_flBeginCullSize", clutter.BeginCullSize); + ClutterCullShader.SetUniform1("g_flEndCullSize", clutter.EndCullSize); + ClutterCullShader.SetUniform1("g_nDrawIndexCount", (uint)drawCall.IndexCount); + ClutterCullShader.SetUniform1("g_nDrawFirstIndex", (uint)drawCall.StartIndex); + ClutterCullShader.SetUniform1("g_nDrawBaseVertex", drawCall.BaseVertex); + ClutterCullShader.SetUniform1("g_nDrawCommandIndex", (uint)(clutter.IndirectDrawByteOffset / Unsafe.SizeOf())); + + // Bind per-clutter instance data + clutter.InstanceDataGpu.BindBufferBase(); + + // Dispatch compute shader + var workGroups = (clutter.InstanceCount + 63) / 64; + GL.DispatchCompute(workGroups, 1, 1); + } + + GL.MemoryBarrier(MemoryBarrierFlags.ShaderStorageBarrierBit); + } + /// /// Generates the hierarchical depth pyramid from the given depth texture by downsampling through compute shaders. /// @@ -1325,6 +1439,11 @@ public void RenderOpaqueLayer(RenderContext renderContext, Span depthOnl MeshBatchRenderer.Render(renderLists[renderContext.RenderPass], renderContext); } + using (new GLDebugGroup("Clutter Render")) + { + RenderClutter(renderContext); + } + using (new GLDebugGroup("StaticOverlay Render")) { renderContext.RenderPass = RenderPass.StaticOverlay; @@ -1332,6 +1451,51 @@ public void RenderOpaqueLayer(RenderContext renderContext, Span depthOnl } } + /// + /// Renders clutter objects using indirect draw commands. + /// + /// The render context for this pass. + private void RenderClutter(RenderContext renderContext) + { + if (IndirectDrawsGpu == null || clutterDrawList.Count == 0) + { + return; + } + + // Bind the indirect draw buffer + GL.BindBuffer(BufferTarget.DrawIndirectBuffer, IndirectDrawsGpu.Handle); + + foreach (var request in clutterDrawList) + { + // Bind VAO and material + if (request.Call.VertexArrayObject == -1) + { + request.Call.Material.Shader.EnsureLoaded(); + request.Call.UpdateVertexArrayObject(); + } + + GL.BindVertexArray(request.Call.VertexArrayObject); + request.Call.Material.Render(request.Call.Material.Shader); + + // Enable instancing for clutter + request.Call.Material.Shader.SetUniform1("bIsInstancing", 1); + + // Draw using indirect command + GL.MultiDrawElementsIndirect( + request.Call.PrimitiveType, + request.Call.IndexType, + request.Clutter.IndirectDrawByteOffset, + 1, // Draw one command per clutter object + 0 // Stride + ); + + request.Call.Material.PostRender(); + } + + GL.BindVertexArray(0); + GL.BindBuffer(BufferTarget.DrawIndirectBuffer, 0); + } + private bool occlusionDirty; static void ClearOccludedStateRecursive(Octree.Node node) diff --git a/Renderer/Renderer/SceneClutter.cs b/Renderer/Renderer/SceneClutter.cs index 4507e92f22..bd38d2a41b 100644 --- a/Renderer/Renderer/SceneClutter.cs +++ b/Renderer/Renderer/SceneClutter.cs @@ -1,8 +1,9 @@ +using System.Diagnostics; using System.Linq; +using OpenTK.Graphics.OpenGL; using ValveKeyValue; using ValveResourceFormat.Renderer.Buffers; using ValveResourceFormat.Renderer.SceneNodes; -using ValveResourceFormat.Renderer.Utils; using ValveResourceFormat.ResourceTypes; using ValveResourceFormat.Serialization.KeyValues; @@ -58,11 +59,14 @@ public struct ClutterTile /// GPU storage buffer containing packed instance data for compute culling. public StorageBuffer? InstanceDataGpu { get; private set; } - /// Gets or sets the base index into the scene's transform buffer for this clutter's instances. - public uint BaseTransformIndex { get; set; } + /// Gets or sets the byte offset into the indirect draw buffer for this clutter's draw command. + public int IndirectDrawByteOffset { get; set; } - /// Gets or sets the base index into the scene's instance buffer for this clutter's instances. - public uint BaseInstanceIndex { get; set; } + /// Gets or sets the base index into the transform buffer where this clutter's transforms begin. + public int BaseTransformIndex { get; set; } + + /// Gets or sets the base index into the instance buffer where this clutter's instance data begins. + public int BaseInstanceIndex { get; set; } /// Gets the number of instances in this clutter object. public int InstanceCount => InstancePositions.Count; @@ -106,7 +110,7 @@ public void LoadInstanceData(KVObject clutterSceneObject) // Load instance tints var tints = clutterSceneObject.GetArray("m_instanceTintSrgb"); - InstanceTints = [.. tints.Select(t => t.ToVector3()).Select(v => new Color32(v[0], v[1], v[2], 255))]; + InstanceTints = [.. tints.Select(t => t.ToVector3()).Select(v => new Color32((byte)v[0], (byte)v[1], (byte)v[2]))]; // Load tiles var tiles = clutterSceneObject.GetArray("m_tiles"); diff --git a/Renderer/Shaders/cull_clutter.comp.slang b/Renderer/Shaders/cull_clutter.comp.slang index ce1ba9609d..d33b3f7215 100644 --- a/Renderer/Shaders/cull_clutter.comp.slang +++ b/Renderer/Shaders/cull_clutter.comp.slang @@ -17,12 +17,6 @@ struct ClutterInstanceData uint tintSrgb; }; -// Output transform and instance data -struct DrawTransform -{ - mat3x4 transform; -}; - struct DrawInstance { uint tintAlpha; @@ -33,48 +27,55 @@ struct DrawInstance }; // Input: Packed clutter instance data -layout(std430, binding = 5) readonly buffer ClutterInstanceBuffer +layout(std430, binding = 13) readonly buffer ClutterInstanceBuffer { ClutterInstanceData clutterInstances[]; }; // Output: Transforms for visible instances -layout(std430, binding = 6) writeonly buffer TransformBuffer +layout(std430, binding = 1) writeonly buffer TransformBuffer { - DrawTransform instanceTransforms[]; + mat3x4 instanceTransforms[]; }; // Output: Per-instance data for visible instances -layout(std430, binding = 7) writeonly buffer InstanceBuffer +layout(std430, binding = 0) writeonly buffer InstanceBuffer { DrawInstance instanceData[]; }; // Output: Indirect draw arguments -layout(std430, binding = 8) buffer IndirectDrawBuffer +layout(std430, binding = 4) buffer IndirectDrawBuffer { DrawCommand indirectDraws[]; }; +// Atomic counters for dynamic buffer appending (initialized to CPU buffer sizes) +layout(std430, binding = 5) buffer ClutterCounterBuffer +{ + uint transformCounter; + uint instanceCounter; +}; + // Clutter parameters uniform uint g_nTotalInstances; -uniform uint g_nDrawCommandIndex; -uniform uint g_nBaseTransformIndex; -uniform uint g_nBaseInstanceIndex; -uniform uint g_nIndexCount; -uniform uint g_nFirstIndex; -uniform int g_nBaseVertex; uniform float g_flBeginCullSize; uniform float g_flEndCullSize; uniform float g_flModelRadius; // Bounding sphere radius of the instanced model +// Draw command parameters (set per clutter object) +uniform uint g_nDrawIndexCount; +uniform uint g_nDrawFirstIndex; +uniform int g_nDrawBaseVertex; +uniform uint g_nDrawCommandIndex; // Index into the indirect draw buffer + // Unpack smallest-three quaternion from 32-bit packed value -vec4 UnpackOrientation32(uint packed) +vec4 UnpackOrientation32(uint packedValue) { - uint largestIndex = packed & 0x3u; - uint a10 = (packed >> 2) & 0x3FFu; - uint b10 = (packed >> 12) & 0x3FFu; - uint c10 = (packed >> 22) & 0x3FFu; + uint largestIndex = packedValue & 0x3u; + uint a10 = (packedValue >> 2) & 0x3FFu; + uint b10 = (packedValue >> 12) & 0x3FFu; + uint c10 = (packedValue >> 22) & 0x3FFu; // Convert 10-bit unsigned to signed [-511, 511] int aSigned = int(a10); @@ -170,9 +171,20 @@ float CalculateFadeAlpha(vec3 worldPos, float scale) return 1.0; // Full opacity } +// Shared memory for counting visible instances +shared uint visibleCount; + void main() { uint instanceIndex = gl_GlobalInvocationID.x; + uint localIndex = gl_LocalInvocationID.x; + + // Initialize shared counter (first thread in workgroup) + if (localIndex == 0u) + { + visibleCount = 0u; + } + barrier(); if (instanceIndex >= g_nTotalInstances) { @@ -183,11 +195,16 @@ void main() // Calculate fade alpha and test culling float fadeAlpha = CalculateFadeAlpha(instance.position, instance.scale); - if (fadeAlpha < 0.0) + bool visible = fadeAlpha >= 0.0; + + if (!visible) { return; // Instance culled } + // Atomically increment visible instance counter + atomicAdd(visibleCount, 1u); + // Unpack orientation quaternion and convert to rotation matrix vec4 quat = UnpackOrientation32(instance.orientation32); mat3 rotation = QuaternionToMatrix(quat); @@ -198,21 +215,38 @@ void main() transform[1] = vec4(rotation[1] * instance.scale, instance.position.y); transform[2] = vec4(rotation[2] * instance.scale, instance.position.z); - // Write transform to buffer - uint transformIndex = g_nBaseTransformIndex + instanceIndex; - instanceTransforms[transformIndex].transform = transform; + // Atomically allocate next transform slot + uint transformIndex = atomicAdd(transformCounter, 1u); + instanceTransforms[transformIndex] = transform; // Unpack tint color (RGB only, ignore source alpha) and apply fade alpha uint tintRgb = instance.tintSrgb & 0x00FFFFFFu; // Mask out alpha uint alphaChannel = uint(fadeAlpha * 255.0) << 24; // Convert fade to 0-255 and shift to alpha position uint tintAlpha = tintRgb | alphaChannel; - // Write instance data - uint instanceDataIndex = g_nBaseInstanceIndex + instanceIndex; + // Atomically allocate next instance slot + uint instanceDataIndex = atomicAdd(instanceCounter, 1u); instanceData[instanceDataIndex].tintAlpha = tintAlpha; instanceData[instanceDataIndex].transformIndex = transformIndex; instanceData[instanceDataIndex].visibleLPV = 0u; // TODO: Light probe volume instanceData[instanceDataIndex].identification = 0u; // TODO: Selection ID instanceData[instanceDataIndex].envMapVisibility = uvec4(0u); // TODO: Env map visibility + + // Sync workgroup to ensure all threads have processed + barrier(); + + // First thread in workgroup writes the indirect draw command + if (localIndex == 0u && instanceIndex < g_nTotalInstances) + { + // Atomically allocate base instance index for this draw call + uint baseInstanceForDraw = atomicAdd(instanceCounter, 0u) - visibleCount; + + // Write draw command with actual visible instance count + indirectDraws[g_nDrawCommandIndex].indexCount = g_nDrawIndexCount; + indirectDraws[g_nDrawCommandIndex].instanceCount = visibleCount; + indirectDraws[g_nDrawCommandIndex].firstIndex = g_nDrawFirstIndex; + indirectDraws[g_nDrawCommandIndex].baseVertex = g_nDrawBaseVertex; + indirectDraws[g_nDrawCommandIndex].baseInstance = baseInstanceForDraw; + } } From 7a2256779e39364c170ed56830960d75d7868dda Mon Sep 17 00:00:00 2001 From: Kristi K Date: Thu, 7 May 2026 15:14:38 +0200 Subject: [PATCH 6/6] Clean up --- Renderer/Renderer/Scene.cs | 28 ++++------- Renderer/Renderer/SceneClutter.cs | 5 ++ Renderer/Shaders/cull_clutter.comp.slang | 59 +++++------------------- 3 files changed, 27 insertions(+), 65 deletions(-) diff --git a/Renderer/Renderer/Scene.cs b/Renderer/Renderer/Scene.cs index 0d12ed790f..62e23658de 100644 --- a/Renderer/Renderer/Scene.cs +++ b/Renderer/Renderer/Scene.cs @@ -636,14 +636,14 @@ private void CreateIndirectDrawBuffers(bool deletePrevious = false) { clutter.IndirectDrawByteOffset = clutterDrawIndex * Unsafe.SizeOf(); - // Initialize with zero instance count (will be filled by compute shader) + // Initialize draw command with static parameters indirectDrawsGpu[clutterDrawIndex] = new DrawElementsIndirectCommand { - Count = 0, - InstanceCount = 0, - FirstIndex = 0, - BaseVertex = 0, - BaseInstance = 0, + Count = (uint)clutter.DrawCall.IndexCount, + InstanceCount = 0, // Will be atomically incremented by compute shader + FirstIndex = (uint)clutter.DrawCall.StartIndex, + BaseVertex = clutter.DrawCall.BaseVertex, + BaseInstance = (uint)clutter.BaseInstanceIndex, }; clutterDrawIndex++; @@ -921,13 +921,7 @@ public void CollectSceneDrawCalls(Camera camera, Frustum? cullFrustum = null) } else if (node is SceneClutter clutter) { - foreach (var mesh in clutter.InstancedModel.RenderableMeshes) - { - foreach (var call in mesh.DrawCallsOpaque) - { - clutterDrawList.Add(new ClutterDrawRequest(clutter, call)); - } - } + clutterDrawList.Add(new ClutterDrawRequest(clutter, clutter.DrawCall)); } else { @@ -1268,13 +1262,14 @@ public void ClutterCullGpu() foreach (var request in clutterDrawList) { var clutter = request.Clutter; - var drawCall = request.Call; if (clutter.InstanceDataGpu == null || clutter.InstanceCount == 0) { continue; } + var drawCommandIndex = clutter.IndirectDrawByteOffset / Unsafe.SizeOf(); + // Set per-clutter uniforms ClutterCullShader.SetUniform1("g_nTotalInstances", (uint)clutter.InstanceCount); ClutterCullShader.SetUniform1("g_nBaseTransformIndex", (uint)clutter.BaseTransformIndex); @@ -1282,10 +1277,7 @@ public void ClutterCullGpu() ClutterCullShader.SetUniform1("g_flModelRadius", clutter.ModelRadius); ClutterCullShader.SetUniform1("g_flBeginCullSize", clutter.BeginCullSize); ClutterCullShader.SetUniform1("g_flEndCullSize", clutter.EndCullSize); - ClutterCullShader.SetUniform1("g_nDrawIndexCount", (uint)drawCall.IndexCount); - ClutterCullShader.SetUniform1("g_nDrawFirstIndex", (uint)drawCall.StartIndex); - ClutterCullShader.SetUniform1("g_nDrawBaseVertex", drawCall.BaseVertex); - ClutterCullShader.SetUniform1("g_nDrawCommandIndex", (uint)(clutter.IndirectDrawByteOffset / Unsafe.SizeOf())); + ClutterCullShader.SetUniform1("g_nDrawCommandIndex", (uint)drawCommandIndex); // Bind per-clutter instance data clutter.InstanceDataGpu.BindBufferBase(); diff --git a/Renderer/Renderer/SceneClutter.cs b/Renderer/Renderer/SceneClutter.cs index bd38d2a41b..570902c0c9 100644 --- a/Renderer/Renderer/SceneClutter.cs +++ b/Renderer/Renderer/SceneClutter.cs @@ -62,6 +62,9 @@ public struct ClutterTile /// Gets or sets the byte offset into the indirect draw buffer for this clutter's draw command. public int IndirectDrawByteOffset { get; set; } + /// Gets or sets the draw call used for rendering this clutter. + public DrawCall DrawCall { get; set; } + /// Gets or sets the base index into the transform buffer where this clutter's transforms begin. public int BaseTransformIndex { get; set; } @@ -81,6 +84,8 @@ public SceneClutter(Scene scene, Model model, string? materialGroup) InstancedModel = new ModelSceneNode(scene, model, materialGroup, isWorldPreview: true); LocalBoundingBox = InstancedModel.LocalBoundingBox; + DrawCall = InstancedModel.RenderableMeshes[0].DrawCallsOpaque[0]; // todo: more than one? + // Calculate bounding sphere radius from model's AABB var modelBounds = InstancedModel.LocalBoundingBox; var extents = modelBounds.Max - modelBounds.Min; diff --git a/Renderer/Shaders/cull_clutter.comp.slang b/Renderer/Shaders/cull_clutter.comp.slang index d33b3f7215..582d69f649 100644 --- a/Renderer/Shaders/cull_clutter.comp.slang +++ b/Renderer/Shaders/cull_clutter.comp.slang @@ -50,23 +50,13 @@ layout(std430, binding = 4) buffer IndirectDrawBuffer DrawCommand indirectDraws[]; }; -// Atomic counters for dynamic buffer appending (initialized to CPU buffer sizes) -layout(std430, binding = 5) buffer ClutterCounterBuffer -{ - uint transformCounter; - uint instanceCounter; -}; - // Clutter parameters uniform uint g_nTotalInstances; +uniform uint g_nBaseTransformIndex; +uniform uint g_nBaseInstanceIndex; uniform float g_flBeginCullSize; uniform float g_flEndCullSize; uniform float g_flModelRadius; // Bounding sphere radius of the instanced model - -// Draw command parameters (set per clutter object) -uniform uint g_nDrawIndexCount; -uniform uint g_nDrawFirstIndex; -uniform int g_nDrawBaseVertex; uniform uint g_nDrawCommandIndex; // Index into the indirect draw buffer // Unpack smallest-three quaternion from 32-bit packed value @@ -171,20 +161,9 @@ float CalculateFadeAlpha(vec3 worldPos, float scale) return 1.0; // Full opacity } -// Shared memory for counting visible instances -shared uint visibleCount; - void main() { uint instanceIndex = gl_GlobalInvocationID.x; - uint localIndex = gl_LocalInvocationID.x; - - // Initialize shared counter (first thread in workgroup) - if (localIndex == 0u) - { - visibleCount = 0u; - } - barrier(); if (instanceIndex >= g_nTotalInstances) { @@ -202,9 +181,6 @@ void main() return; // Instance culled } - // Atomically increment visible instance counter - atomicAdd(visibleCount, 1u); - // Unpack orientation quaternion and convert to rotation matrix vec4 quat = UnpackOrientation32(instance.orientation32); mat3 rotation = QuaternionToMatrix(quat); @@ -215,8 +191,15 @@ void main() transform[1] = vec4(rotation[1] * instance.scale, instance.position.y); transform[2] = vec4(rotation[2] * instance.scale, instance.position.z); - // Atomically allocate next transform slot - uint transformIndex = atomicAdd(transformCounter, 1u); + // Atomically increment the visible instance count for this draw command + // This returns the OLD value, which gives us our slot index within this clutter object + uint localSlotIndex = atomicAdd(indirectDraws[g_nDrawCommandIndex].instanceCount, 1u); + + // Calculate actual buffer indices by adding base offsets + uint transformIndex = g_nBaseTransformIndex + localSlotIndex; + uint instanceDataIndex = g_nBaseInstanceIndex + localSlotIndex; + + // Write transform to allocated slot instanceTransforms[transformIndex] = transform; // Unpack tint color (RGB only, ignore source alpha) and apply fade alpha @@ -224,29 +207,11 @@ void main() uint alphaChannel = uint(fadeAlpha * 255.0) << 24; // Convert fade to 0-255 and shift to alpha position uint tintAlpha = tintRgb | alphaChannel; - // Atomically allocate next instance slot - uint instanceDataIndex = atomicAdd(instanceCounter, 1u); + // Write instance data to allocated slot instanceData[instanceDataIndex].tintAlpha = tintAlpha; instanceData[instanceDataIndex].transformIndex = transformIndex; instanceData[instanceDataIndex].visibleLPV = 0u; // TODO: Light probe volume instanceData[instanceDataIndex].identification = 0u; // TODO: Selection ID instanceData[instanceDataIndex].envMapVisibility = uvec4(0u); // TODO: Env map visibility - - // Sync workgroup to ensure all threads have processed - barrier(); - - // First thread in workgroup writes the indirect draw command - if (localIndex == 0u && instanceIndex < g_nTotalInstances) - { - // Atomically allocate base instance index for this draw call - uint baseInstanceForDraw = atomicAdd(instanceCounter, 0u) - visibleCount; - - // Write draw command with actual visible instance count - indirectDraws[g_nDrawCommandIndex].indexCount = g_nDrawIndexCount; - indirectDraws[g_nDrawCommandIndex].instanceCount = visibleCount; - indirectDraws[g_nDrawCommandIndex].firstIndex = g_nDrawFirstIndex; - indirectDraws[g_nDrawCommandIndex].baseVertex = g_nDrawBaseVertex; - indirectDraws[g_nDrawCommandIndex].baseInstance = baseInstanceForDraw; - } }