Optimize Animated Model rendering with hardware instancing

All models are using the same global buffer for skinned bones which allows to share shader binding for instancing.
Refactor draw call for batching skinned mesh draws.
Remove `SkinnedMeshDrawData` and merge it into `AnimatedModel` internals.
This commit is contained in:
2026-06-15 17:59:41 +02:00
parent 4d1c627900
commit bc36168318
26 changed files with 430 additions and 367 deletions
+19 -38
View File
@@ -20,10 +20,6 @@ Buffer<float4> ObjectsBuffer : register(t0);
#if USE_SKINNING
// The skeletal bones matrix buffer (stored as 4x3, 3 float4 behind each other)
Buffer<float4> BoneMatrices : register(t1);
#if PER_BONE_MOTION_BLUR
// The skeletal bones matrix buffer from the previous frame
Buffer<float4> PrevBoneMatrices : register(t2);
#endif
#endif
// Geometry data passed though the graphics rendering stages up to the pixel shader
@@ -418,32 +414,8 @@ float4 VS_Depth(ModelInput_PosOnly input) : SV_Position
#if USE_SKINNING
#if PER_BONE_MOTION_BLUR
float3x4 GetPrevBoneMatrix(int index)
{
float4 a = PrevBoneMatrices[index * 3];
float4 b = PrevBoneMatrices[index * 3 + 1];
float4 c = PrevBoneMatrices[index * 3 + 2];
return float3x4(a, b, c);
}
float3 SkinPrevPosition(ModelInput_Skinned input)
{
float4 position = float4(input.Position.xyz, 1);
float weightsSum = input.BlendWeights.x + input.BlendWeights.y + input.BlendWeights.z + input.BlendWeights.w;
float mainWeight = input.BlendWeights.x + (1.0f - weightsSum); // Re-normalize to account for 16-bit weights encoding erros
float3x4 boneMatrix = mainWeight * GetPrevBoneMatrix(input.BlendIndices.x);
boneMatrix += input.BlendWeights.y * GetPrevBoneMatrix(input.BlendIndices.y);
boneMatrix += input.BlendWeights.z * GetPrevBoneMatrix(input.BlendIndices.z);
boneMatrix += input.BlendWeights.w * GetPrevBoneMatrix(input.BlendIndices.w);
return mul(boneMatrix, position);
}
#endif
// Calculates the transposed transform matrix for the given bone index
float3x4 GetBoneMatrix(int index)
float3x4 GetBoneMatrix(uint index)
{
float4 a = BoneMatrices[index * 3];
float4 b = BoneMatrices[index * 3 + 1];
@@ -452,14 +424,14 @@ float3x4 GetBoneMatrix(int index)
}
// Calculates the transposed transform matrix for the given vertex (uses blending)
float3x4 GetBoneMatrix(ModelInput_Skinned input)
float3x4 GetBoneMatrix(ModelInput_Skinned input, uint skinningOffset)
{
float weightsSum = input.BlendWeights.x + input.BlendWeights.y + input.BlendWeights.z + input.BlendWeights.w;
float mainWeight = input.BlendWeights.x + (1.0f - weightsSum); // Re-normalize to account for 16-bit weights encoding erros
float3x4 boneMatrix = mainWeight * GetBoneMatrix(input.BlendIndices.x);
boneMatrix += input.BlendWeights.y * GetBoneMatrix(input.BlendIndices.y);
boneMatrix += input.BlendWeights.z * GetBoneMatrix(input.BlendIndices.z);
boneMatrix += input.BlendWeights.w * GetBoneMatrix(input.BlendIndices.w);
float3x4 boneMatrix = mainWeight * GetBoneMatrix(input.BlendIndices.x + skinningOffset);
boneMatrix += input.BlendWeights.y * GetBoneMatrix(input.BlendIndices.y + skinningOffset);
boneMatrix += input.BlendWeights.z * GetBoneMatrix(input.BlendIndices.z + skinningOffset);
boneMatrix += input.BlendWeights.w * GetBoneMatrix(input.BlendIndices.w + skinningOffset);
return boneMatrix;
}
@@ -487,31 +459,40 @@ float3x3 SkinTangents(ModelInput_Skinned input, float3x4 boneMatrix)
// Vertex Shader function for GBuffers/Depth Pass (skinned mesh rendering)
META_VS(true, FEATURE_LEVEL_ES2)
META_PERMUTATION_1(USE_SKINNING=1)
META_PERMUTATION_2(USE_SKINNING=1, PER_BONE_MOTION_BLUR=1)
META_PERMUTATION_2(USE_SKINNING=1, USE_INSTANCING=0)
META_PERMUTATION_3(USE_SKINNING=1, USE_INSTANCING=0, PER_BONE_MOTION_BLUR=1)
META_PERMUTATION_2(USE_SKINNING=1, USE_INSTANCING=1)
META_PERMUTATION_3(USE_SKINNING=1, USE_INSTANCING=1, PER_BONE_MOTION_BLUR=1)
META_VS_IN_ELEMENT(POSITION, 0, R32G32B32_FLOAT, 0, 0, PER_VERTEX, 0, true)
META_VS_IN_ELEMENT(TEXCOORD, 0, R16G16_FLOAT, 0, ALIGN, PER_VERTEX, 0, true)
META_VS_IN_ELEMENT(NORMAL, 0, R10G10B10A2_UNORM, 0, ALIGN, PER_VERTEX, 0, true)
META_VS_IN_ELEMENT(TANGENT, 0, R10G10B10A2_UNORM, 0, ALIGN, PER_VERTEX, 0, true)
META_VS_IN_ELEMENT(BLENDINDICES, 0, R8G8B8A8_UINT, 0, ALIGN, PER_VERTEX, 0, true)
META_VS_IN_ELEMENT(BLENDWEIGHTS, 0, R16G16B16A16_FLOAT,0, ALIGN, PER_VERTEX, 0, true)
META_VS_IN_ELEMENT(ATTRIBUTE, 0, R32_UINT, 3, 0, PER_INSTANCE, 1, USE_INSTANCING)
VertexOutput VS_Skinned(ModelInput_Skinned input)
{
VertexOutput output;
// Load object data
#if USE_INSTANCING
output.Geometry.ObjectIndex = input.ObjectIndex;
#else
output.Geometry.ObjectIndex = DrawObjectIndex;
#endif
ObjectData object = LoadObject(ObjectsBuffer, output.Geometry.ObjectIndex);
// Perform skinning
float3x4 boneMatrix = GetBoneMatrix(input);
float3x4 boneMatrix = GetBoneMatrix(input, object.SkinningOffset);
float3 position = SkinPosition(input, boneMatrix);
float3x3 tangentToLocal = SkinTangents(input, boneMatrix);
// Compute world space vertex position
output.Geometry.WorldPosition = mul(float4(position, 1), object.WorldMatrix).xyz;
#if PER_BONE_MOTION_BLUR
float3 prevPosition = SkinPrevPosition(input);
int prevBonesOffset = (int)object.SkinningOffset + object.PrevBonesOffset; // Offset can be negative
float3x4 prevBoneMatrix = GetBoneMatrix(input, (uint)prevBonesOffset);
float3 prevPosition = SkinPosition(input, prevBoneMatrix);
output.Geometry.PrevWorldPosition = mul(float4(prevPosition, 1), object.PrevWorldMatrix).xyz;
#else
output.Geometry.PrevWorldPosition = mul(float4(position, 1), object.PrevWorldMatrix).xyz;
+1 -1
View File
@@ -4,7 +4,7 @@
"Major": 1,
"Minor": 13,
"Revision": 0,
"Build": 7005
"Build": 7006
},
"Company": "Flax",
"Copyright": "Copyright (c) 2012-2026 Wojciech Figat. All rights reserved.",
@@ -84,16 +84,8 @@ void ViewportIconsRenderer::DrawIcons(RenderContext& renderContext, Actor* actor
return;
Mesh::DrawInfo draw;
draw.Lightmap = nullptr;
draw.LightmapUVs = nullptr;
draw.Flags = StaticFlags::Transform;
draw.DrawModes = DrawPass::Forward;
draw.PerInstanceRandom = 0;
draw.StencilValue = 0;
draw.LODBias = 0;
draw.ForcedLOD = -1;
draw.SortOrder = 0;
draw.VertexColors = nullptr;
if (const auto scene = SceneObject::Cast<Scene>(actor))
{
-3
View File
@@ -95,9 +95,6 @@ void AnimationsSystem::Job(int32 index)
ZoneName(*graphName, graphName.Length());
#endif
// Prepare skinning data
animatedModel->SetupSkinningData();
// Animation delta time can be based on a time since last update or the current delta
float dt = animatedModel->UseTimeScale ? DeltaTime : UnscaledDeltaTime;
float t = animatedModel->UseTimeScale ? Time : UnscaledTime;
+2 -15
View File
@@ -163,7 +163,7 @@ void Foliage::DrawInstance(DrawContext& context, FoliageInstance& instance, int3
Matrix::Transformation(transform.Scale, transform.Orientation, translation, instance.CachedDrawWorld);
instance.CachedDrawWorldValid = true;
}
instanceData.Store(instance.CachedDrawWorld, instance.CachedDrawWorld, instance.LightmapUVsArea, drawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor);
instanceData.Store(instance.CachedDrawWorld, instance.CachedDrawWorld, instance.LightmapUVsArea, drawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, (byte)(lodDitherFactor * 255));
}
}
@@ -461,10 +461,6 @@ void Foliage::DrawFoliageJob(int32 i)
Mesh::DrawInfo draw;
draw.Flags = GetStaticFlags();
draw.DrawModes = (DrawPass)(DrawPass::Default & renderContext.View.Pass);
draw.LODBias = 0;
draw.ForcedLOD = -1;
draw.VertexColors = nullptr;
draw.Deformation = nullptr;
DrawType(renderContext, type, draw);
#endif
}
@@ -564,7 +560,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Me
firstInstance.Load(batch.DrawCall);
#if USE_EDITOR
if (renderContext.View.Mode == ViewMode::LightmapUVsDensity)
batch.DrawCall.Surface.LODDitherFactor = type.ScaleInLightmap; // See LightmapUVsDensityMaterialShader
batch.DrawCall.Surface.SkinningBonesOffset = *(uint32*)&type.ScaleInLightmap; // See LightmapUVsDensityMaterialShader
#endif
if (EnumHasAnyFlags(drawModes, DrawPass::Forward))
@@ -1298,16 +1294,11 @@ void Foliage::Draw(RenderContext& renderContext)
drawState.PrevWorld = world;
Mesh::DrawInfo draw;
draw.Flags = GetStaticFlags();
draw.LODBias = 0;
draw.ForcedLOD = -1;
draw.SortOrder = 0;
draw.VertexColors = nullptr;
draw.Lightmap = _scene ? _scene->LightmapsData.GetReadyLightmap(instance.LightmapTextureIndex) : nullptr;
draw.LightmapUVs = &instance.LightmapUVsArea;
draw.Buffer = &type.Entries;
draw.World = &world;
draw.DrawState = &drawState;
draw.Deformation = nullptr;
draw.Bounds = instance.Bounds;
draw.PerInstanceRandom = instance.Random;
draw.DrawModes = type._drawModes & view.Pass & view.GetShadowsDrawPassMask(type.ShadowsMode);
@@ -1321,10 +1312,6 @@ void Foliage::Draw(RenderContext& renderContext)
Mesh::DrawInfo draw;
draw.Flags = GetStaticFlags();
draw.DrawModes = (DrawPass)(DrawPass::Default & view.Pass);
draw.LODBias = 0;
draw.ForcedLOD = -1;
draw.VertexColors = nullptr;
draw.Deformation = nullptr;
#endif
#if FOLIAGE_USE_SINGLE_QUAD_TREE
if (Root)
@@ -10,7 +10,6 @@
#include "Engine/Level/Scene/Lightmap.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/Shaders/GPUConstantBuffer.h"
#include "Engine/Graphics/Models/SkinnedMeshDrawData.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Graphics/GPULimits.h"
@@ -56,19 +55,12 @@ void DeferredMaterialShader::Bind(BindParameters& params)
MaterialParams::Bind(params.ParamsLink, bindMeta);
context->BindSR(0, params.ObjectBuffer);
// Check if using mesh skinning
const bool useSkinning = drawCall.Surface.Skinning != nullptr;
bool perBoneMotionBlur = false;
// Bind skinning buffer
const bool useSkinning = drawCall.Surface.Skinning != DrawCall::SkinningMode::None;
const bool usePerBoneMotionBlur = drawCall.Surface.Skinning == DrawCall::SkinningMode::WithPrevBones;
if (useSkinning)
{
// Bind skinning buffer
ASSERT(drawCall.Surface.Skinning->IsReady());
context->BindSR(1, drawCall.Surface.Skinning->BoneMatrices->View());
if (drawCall.Surface.Skinning->PrevBoneMatrices && drawCall.Surface.Skinning->PrevBoneMatrices->IsAllocated())
{
context->BindSR(2, drawCall.Surface.Skinning->PrevBoneMatrices->View());
perBoneMotionBlur = true;
}
context->BindSR(1, drawCall.Surface.SkinningBones->View());
}
// Bind constants
@@ -90,9 +82,8 @@ void DeferredMaterialShader::Bind(BindParameters& params)
// Invert culling when scale is negative
cullMode = cullMode == CullMode::Normal ? CullMode::Inverted : CullMode::Normal;
}
ASSERT_LOW_LAYER(!(useSkinning && params.Instanced)); // No support for instancing skinned meshes
const auto cache = params.Instanced ? &_cacheInstanced : &_cache;
PipelineStateCache* psCache = cache->GetPS(view.Pass, useLightmap, useSkinning, perBoneMotionBlur);
PipelineStateCache* psCache = cache->GetPS(view.Pass, useLightmap, useSkinning, usePerBoneMotionBlur);
ASSERT(psCache);
GPUPipelineState* state = psCache->GetPS(cullMode, wireframe);
@@ -139,28 +130,35 @@ bool DeferredMaterialShader::Load()
psDesc.StencilReadMask = 0;
psDesc.StencilPassOp = StencilOperation::Replace;
auto vs = _shader->GetVS("VS");
auto vsInstanced = _shader->GetVS("VS", 1);
auto vsSkinned = _shader->GetVS("VS_Skinned");
auto vsSkinnedInstanced = _shader->GetVS("VS_Skinned", 2);
// GBuffer Pass
psDesc.VS = _shader->GetVS("VS");
psDesc.VS = vs;
failed |= psDesc.VS == nullptr;
psDesc.PS = _shader->GetPS("PS_GBuffer");
_cache.Default.Init(psDesc);
psDesc.VS = _shader->GetVS("VS", 1);
psDesc.VS = vsInstanced;
failed |= psDesc.VS == nullptr;
_cacheInstanced.Default.Init(psDesc);
// GBuffer Pass with lightmap (pixel shader permutation for USE_LIGHTMAP=1)
psDesc.VS = _shader->GetVS("VS");
// GBuffer Pass with lightmap (USE_LIGHTMAP=1)
psDesc.VS = vs;
failed |= psDesc.VS == nullptr;
psDesc.PS = _shader->GetPS("PS_GBuffer", 1);
_cache.DefaultLightmap.Init(psDesc);
psDesc.VS = _shader->GetVS("VS", 1);
psDesc.VS = vsInstanced;
failed |= psDesc.VS == nullptr;
_cacheInstanced.DefaultLightmap.Init(psDesc);
// GBuffer Pass with skinning
psDesc.VS = _shader->GetVS("VS_Skinned");
// GBuffer Pass with skinning (USE_SKINNING=1)
psDesc.VS = vsSkinned;
psDesc.PS = _shader->GetPS("PS_GBuffer");
_cache.DefaultSkinned.Init(psDesc);
psDesc.VS = vsSkinnedInstanced;
_cacheInstanced.DefaultSkinned.Init(psDesc);
psDesc.StencilEnable = false;
psDesc.StencilPassOp = StencilOperation::Keep;
@@ -169,13 +167,15 @@ bool DeferredMaterialShader::Load()
if (_shader->HasShader("PS_QuadOverdraw"))
{
// Quad Overdraw
psDesc.VS = _shader->GetVS("VS");
psDesc.VS = vs;
psDesc.PS = _shader->GetPS("PS_QuadOverdraw");
_cache.QuadOverdraw.Init(psDesc);
psDesc.VS = _shader->GetVS("VS", 1);
psDesc.VS = vsInstanced;
_cacheInstanced.Depth.Init(psDesc);
psDesc.VS = _shader->GetVS("VS_Skinned");
psDesc.VS = vsSkinned;
_cache.QuadOverdrawSkinned.Init(psDesc);
psDesc.VS = vsSkinnedInstanced;
_cacheInstanced.QuadOverdrawSkinned.Init(psDesc);
}
#endif
@@ -183,17 +183,22 @@ bool DeferredMaterialShader::Load()
psDesc.DepthWriteEnable = false;
psDesc.DepthEnable = true;
psDesc.DepthFunc = ComparisonFunc::DefaultEqual;
psDesc.VS = _shader->GetVS("VS");
psDesc.VS = vs;
psDesc.PS = _shader->GetPS("PS_MotionVectors");
_cache.MotionVectors.Init(psDesc);
_cacheInstanced.MotionVectors.Init(psDesc);
// Motion Vectors pass with skinning
psDesc.VS = _shader->GetVS("VS_Skinned");
psDesc.VS = vsSkinned;
_cache.MotionVectorsSkinned.Init(psDesc);
psDesc.VS = vsSkinnedInstanced;
_cacheInstanced.MotionVectorsSkinned.Init(psDesc);
// Motion Vectors pass with skinning (with per-bone motion blur)
psDesc.VS = _shader->GetVS("VS_Skinned", 1);
_cache.MotionVectorsSkinnedPerBone.Init(psDesc);
psDesc.VS = _shader->GetVS("VS_Skinned", 3);
_cacheInstanced.MotionVectorsSkinnedPerBone.Init(psDesc);
// Depth Pass
psDesc.CullMode = CullMode::TwoSided;
@@ -209,8 +214,8 @@ bool DeferredMaterialShader::Load()
{
// Materials with masking need full vertex buffer to get texcoord used to sample textures for per pixel masking.
// Materials with world pos offset need full VB to apply offset using texcoord etc.
psDesc.VS = _shader->GetVS("VS");
instancedDepthPassVS = _shader->GetVS("VS", 1);
psDesc.VS = vs;
instancedDepthPassVS = vsInstanced;
psDesc.PS = _shader->GetPS("PS_Depth");
}
else
@@ -224,8 +229,10 @@ bool DeferredMaterialShader::Load()
_cacheInstanced.Depth.Init(psDesc);
// Depth Pass with skinning
psDesc.VS = _shader->GetVS("VS_Skinned");
psDesc.VS = vsSkinned;
_cache.DepthSkinned.Init(psDesc);
psDesc.VS = vsSkinnedInstanced;
_cacheInstanced.DepthSkinned.Init(psDesc);
return failed;
}
@@ -8,7 +8,6 @@
#include "Engine/Graphics/GPULimits.h"
#include "Engine/Graphics/RenderView.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/Models/SkinnedMeshDrawData.h"
#include "Engine/Graphics/Shaders/GPUConstantBuffer.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Renderer/DrawCall.h"
@@ -58,13 +57,11 @@ void ForwardMaterialShader::Bind(BindParameters& params)
MaterialParams::Bind(params.ParamsLink, bindMeta);
context->BindSR(0, params.ObjectBuffer);
// Check if using mesh skinning
const bool useSkinning = drawCall.Surface.Skinning != nullptr;
// Bind skinning buffer
const bool useSkinning = drawCall.Surface.Skinning != DrawCall::SkinningMode::None;
if (useSkinning)
{
// Bind skinning buffer
ASSERT(drawCall.Surface.Skinning->IsReady());
context->BindSR(1, drawCall.Surface.Skinning->BoneMatrices->View());
context->BindSR(1, drawCall.Surface.SkinningBones->View());
}
// Bind constants
@@ -10,7 +10,7 @@
/// <summary>
/// Current materials shader version.
/// </summary>
#define MATERIAL_GRAPH_VERSION 183
#define MATERIAL_GRAPH_VERSION 184
class Material;
class GPUShader;
+1 -1
View File
@@ -32,6 +32,6 @@
#define MAX_BONES_PER_VERTEX MODEL_MAX_BONES_PER_VERTEX
// Defines the maximum allowed amount of skeleton bones to be used with skinned model
#define MODEL_MAX_BONES_PER_MODEL 0xffff
#define MODEL_MAX_BONES_PER_MODEL MAX_int16
// [Deprecated in v1.10] Use MODEL_MAX_BONES_PER_MODEL
#define MAX_BONES_PER_MODEL MODEL_MAX_BONES_PER_MODEL
+4 -4
View File
@@ -306,7 +306,7 @@ void Mesh::Draw(const RenderContext& renderContext, const DrawInfo& info, float
drawCall.Surface.PrevWorld = info.DrawState->PrevWorld;
drawCall.Surface.Lightmap = (info.Flags & StaticFlags::Lightmap) != StaticFlags::None ? info.Lightmap : nullptr;
drawCall.Surface.LightmapUVsArea = info.LightmapUVs ? *info.LightmapUVs : Half4::Zero;
drawCall.Surface.LODDitherFactor = lodDitherFactor;
drawCall.Surface.LODDitherFactor = (byte)(lodDitherFactor * 255);
drawCall.PerInstanceRandom = info.PerInstanceRandom;
drawCall.StencilValue = info.StencilValue;
#if USE_EDITOR
@@ -314,7 +314,7 @@ void Mesh::Draw(const RenderContext& renderContext, const DrawInfo& info, float
if (viewMode == ViewMode::LightmapUVsDensity || viewMode == ViewMode::LODPreview)
GBufferPass::AddIndexBufferToModelLOD(_indexBuffer, &((Model*)_model)->LODs[_lodIndex]);
if (viewMode == ViewMode::LightmapUVsDensity)
drawCall.Surface.LODDitherFactor = info.LightmapScale; // See LightmapUVsDensityMaterialShader
drawCall.Surface.SkinningBonesOffset = *(uint32*)&info.LightmapScale; // See LightmapUVsDensityMaterialShader
#endif
// Push draw call to the render list
@@ -369,7 +369,7 @@ void Mesh::Draw(const RenderContextBatch& renderContextBatch, const DrawInfo& in
drawCall.Surface.PrevWorld = info.DrawState->PrevWorld;
drawCall.Surface.Lightmap = (info.Flags & StaticFlags::Lightmap) != StaticFlags::None ? info.Lightmap : nullptr;
drawCall.Surface.LightmapUVsArea = info.LightmapUVs ? *info.LightmapUVs : Half4::Zero;
drawCall.Surface.LODDitherFactor = lodDitherFactor;
drawCall.Surface.LODDitherFactor = (byte)(lodDitherFactor * 255);
drawCall.PerInstanceRandom = info.PerInstanceRandom;
drawCall.StencilValue = info.StencilValue;
#if USE_EDITOR
@@ -377,7 +377,7 @@ void Mesh::Draw(const RenderContextBatch& renderContextBatch, const DrawInfo& in
if (viewMode == ViewMode::LightmapUVsDensity || viewMode == ViewMode::LODPreview)
GBufferPass::AddIndexBufferToModelLOD(_indexBuffer, &((Model*)_model)->LODs[_lodIndex]);
if (viewMode == ViewMode::LightmapUVsDensity)
drawCall.Surface.LODDitherFactor = info.LightmapScale; // See LightmapUVsDensityMaterialShader
drawCall.Surface.SkinningBonesOffset = *(uint32*)&info.LightmapScale; // See LightmapUVsDensityMaterialShader
#endif
// Push draw call to the render lists
+15 -2
View File
@@ -362,7 +362,10 @@ public:
/// <summary>
/// The skinning.
/// </summary>
SkinnedMeshDrawData* Skinning;
GPUBuffer* SkinningBones;
uint32 SkinningBonesOffset; // In Matrix3x4s
int16 PrevBonesOffset; // In Matrix3x4s, can be negative
bool WithPrevBones;
};
struct
@@ -425,9 +428,19 @@ public:
int8 SortOrder;
#if USE_EDITOR
float LightmapScale = -1.0f;
float LightmapScale;
#endif
// Zero-init.
FORCE_INLINE DrawInfo()
{
Platform::MemoryClear(this, sizeof(DrawInfo));
ForcedLOD = -1;
#if USE_EDITOR
LightmapScale = -1;
#endif
}
// Packs object layer into the stencil bits.
FORCE_INLINE void SetStencilValue(int32 layer)
{
+12 -4
View File
@@ -334,6 +334,7 @@ void SkinnedMesh::Draw(const RenderContext& renderContext, const DrawInfo& info,
return;
// Setup draw call
ASSERT(info.SkinningBones);
DrawCall drawCall;
drawCall.Geometry.IndexBuffer = _indexBuffer;
drawCall.Geometry.VertexBuffers[0] = _vertexBuffers[0];
@@ -348,8 +349,11 @@ void SkinnedMesh::Draw(const RenderContext& renderContext, const DrawInfo& info,
drawCall.ObjectRadius = (float)info.Bounds.Radius; // TODO: should it be kept in sync with ObjectPosition?
drawCall.Surface.GeometrySize = _box.GetSize();
drawCall.Surface.PrevWorld = info.DrawState->PrevWorld;
drawCall.Surface.Skinning = info.Skinning;
drawCall.Surface.LODDitherFactor = lodDitherFactor;
drawCall.Surface.Skinning = info.WithPrevBones ? DrawCall::SkinningMode::WithPrevBones : DrawCall::SkinningMode::Active;
drawCall.Surface.SkinningBones = info.SkinningBones;
drawCall.Surface.SkinningBonesOffset = info.SkinningBonesOffset;
drawCall.Surface.PrevBonesOffset = info.PrevBonesOffset;
drawCall.Surface.LODDitherFactor = (byte)(lodDitherFactor * 255);
drawCall.PerInstanceRandom = info.PerInstanceRandom;
drawCall.StencilValue = info.StencilValue;
@@ -376,6 +380,7 @@ void SkinnedMesh::Draw(const RenderContextBatch& renderContextBatch, const DrawI
return;
// Setup draw call
ASSERT(info.SkinningBones);
DrawCall drawCall;
drawCall.Geometry.IndexBuffer = _indexBuffer;
drawCall.Geometry.VertexBuffers[0] = _vertexBuffers[0];
@@ -389,8 +394,11 @@ void SkinnedMesh::Draw(const RenderContextBatch& renderContextBatch, const DrawI
drawCall.ObjectRadius = (float)info.Bounds.Radius; // TODO: should it be kept in sync with ObjectPosition?
drawCall.Surface.GeometrySize = _box.GetSize();
drawCall.Surface.PrevWorld = info.DrawState->PrevWorld;
drawCall.Surface.Skinning = info.Skinning;
drawCall.Surface.LODDitherFactor = lodDitherFactor;
drawCall.Surface.Skinning = info.WithPrevBones ? DrawCall::SkinningMode::WithPrevBones : DrawCall::SkinningMode::Active;
drawCall.Surface.SkinningBones = info.SkinningBones;
drawCall.Surface.SkinningBonesOffset = info.SkinningBonesOffset;
drawCall.Surface.PrevBonesOffset = info.PrevBonesOffset;
drawCall.Surface.LODDitherFactor = (byte)(lodDitherFactor * 255);
drawCall.PerInstanceRandom = info.PerInstanceRandom;
drawCall.StencilValue = info.StencilValue;
@@ -1,59 +0,0 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#include "SkinnedMeshDrawData.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Animations/Config.h"
#include "Engine/Core/Log.h"
#include "Engine/Core/Math/Matrix.h"
SkinnedMeshDrawData::~SkinnedMeshDrawData()
{
SAFE_DELETE_GPU_RESOURCE(BoneMatrices);
SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices);
}
void SkinnedMeshDrawData::Setup(int32 bonesCount)
{
if (BoneMatrices == nullptr)
{
BoneMatrices = GPUDevice::Instance->CreateBuffer(TEXT("BoneMatrices"));
}
const int32 elementsCount = bonesCount * 3; // 3 * float4 per bone
if (BoneMatrices->Init(GPUBufferDescription::Typed(elementsCount, PixelFormat::R32G32B32A32_Float, false, GPUResourceUsage::Dynamic)))
{
LOG(Error, "Failed to initialize the skinned mesh bones buffer");
return;
}
BonesCount = bonesCount;
_hasValidData = false;
_isDirty = true;
Data.Resize(BoneMatrices->GetSize());
SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices);
}
void SkinnedMeshDrawData::OnDataChanged(bool dropHistory)
{
// Setup previous frame bone matrices if needed
if (_hasValidData && !dropHistory)
{
ASSERT(BoneMatrices);
if (PrevBoneMatrices == nullptr)
{
PrevBoneMatrices = GPUDevice::Instance->CreateBuffer(TEXT("BoneMatrices"));
if (PrevBoneMatrices->Init(BoneMatrices->GetDescription()))
{
LOG(Fatal, "Failed to initialize the skinned mesh bones buffer");
}
}
Swap(PrevBoneMatrices, BoneMatrices);
}
else
{
SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices);
}
_isDirty = true;
_hasValidData = true;
}
@@ -1,80 +0,0 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Core/Collections/Array.h"
#include "Engine/Graphics/GPUBuffer.h"
/// <summary>
/// Data storage for the skinned meshes rendering
/// </summary>
class FLAXENGINE_API SkinnedMeshDrawData
{
private:
bool _hasValidData = false;
bool _isDirty = false;
public:
/// <summary>
/// The bones count.
/// </summary>
int32 BonesCount = 0;
/// <summary>
/// The bone matrices buffer. Contains prepared skeletal bones transformations (stored as 4x3, 3 Vector4 behind each other).
/// </summary>
GPUBuffer* BoneMatrices = nullptr;
/// <summary>
/// The bone matrices buffer used during the previous update. Used by per-bone motion blur.
/// </summary>
GPUBuffer* PrevBoneMatrices = nullptr;
/// <summary>
/// The CPU data buffer with the bones transformations (ready to be flushed with the GPU).
/// </summary>
Array<byte> Data;
public:
/// <summary>
/// Finalizes an instance of the <see cref="SkinnedMeshDrawData"/> class.
/// </summary>
~SkinnedMeshDrawData();
public:
/// <summary>
/// Determines whether this instance is ready for rendering.
/// </summary>
FORCE_INLINE bool IsReady() const
{
return BoneMatrices != nullptr && BoneMatrices->IsAllocated();
}
/// <summary>
/// Determines whether this instance has been modified and needs to be flushed with GPU buffer.
/// </summary>
FORCE_INLINE bool IsDirty() const
{
return _isDirty;
}
/// <summary>
/// Setups the data container for the specified bones amount.
/// </summary>
/// <param name="bonesCount">The bones count.</param>
void Setup(int32 bonesCount);
/// <summary>
/// After bones Data has been modified externally. Updates the bone matrices data for the GPU buffer. Ensure to call Flush before rendering.
/// </summary>
/// <param name="dropHistory">True if drop previous update bones used for motion blur, otherwise will keep them and do the update.</param>
void OnDataChanged(bool dropHistory);
/// <summary>
/// After bones Data has been sent to the GPU buffer.
/// </summary>
void OnFlush()
{
_isDirty = false;
}
};
+249 -80
View File
@@ -22,71 +22,165 @@
#include "Engine/Level/Scene/Scene.h"
#include "Engine/Level/SceneObjectsFactory.h"
#include "Engine/Profiler/Profiler.h"
#include "Engine/Profiler/ProfilerMemory.h"
#include "Engine/Serialization/Serialization.h"
// Implements efficient skinning data update within a shared GPUMemoryPass with manual resource transitions batched for all animated models.
// Implements efficient skinning data update within a shared GPUBuffer with memory sharing for all animated models.
class AnimatedModelRenderListExtension : public RenderList::IExtension
{
public:
struct Item
// Allocation within the global buffer (offset and size are in bytes)
struct Allocation
{
GPUBuffer* BoneMatrices;
void* Data;
int32 Size;
uint32 Size;
uint32 Offset;
};
RenderListBuffer<Item> Items;
GPUBuffer* GlobalBuffer = nullptr;
RenderListBuffer<Allocation> Updates;
CriticalSection Locker;
Array<Allocation> FreeList;
Array<byte> Data;
uint32 CurrentOffset = 0;
uint32 CurrentSize = 0;
uint32 ReallocateSize = 0; // Lower bound for the new buffer size to copy back from old buffer (in bytes)
volatile int64 UpdateSize = 0;
ReadWriteLock DataLocker; // Ensure to lock data writers when performing reallocation of the global buffer
void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
// Allocates a new skinned bones data block from the global buffer and returns its offset and size (in bytes).
Allocation Allocate(uint32 size)
{
Items.Clear();
Allocation result;
PROFILE_MEM(Animations);
ScopeLock lock(Updates.Locker());
// Check free items list to reuse allocation
auto* freeItems = FreeList.Get();
for (int32 i = 0; i < FreeList.Count(); i++)
{
if (freeItems[i].Size == size)
{
result = freeItems[i];
FreeList.RemoveAt(i);
return result;
}
}
// Check if need to create/resize the global buffer
if (CurrentOffset + size > CurrentSize)
{
DataLocker.WriteLock(); // Ensure none if writing to this buffer during resize
// First allocation sets it (in case multiple reallocs before draw)
if (ReallocateSize == 0)
ReallocateSize = CurrentSize;
// Grow buffer
CurrentSize = CurrentSize == 0 ? 16 * 1024 : CurrentSize * 2;
ASSERT(CurrentOffset + size <= CurrentSize);
Data.Resize(CurrentSize, true);
DataLocker.WriteUnlock();
}
// Allocate new block
result = { size, CurrentOffset };
CurrentOffset += size;
return result;
}
// Frees allocated memory back to the global buffer.
void Free(Allocation alloc)
{
PROFILE_MEM(Animations);
ScopeLock lock(Updates.Locker());
FreeList.Add(alloc);
// TODO: track active allocations count and roll back to offset 0 without free list when all allocations are freed to reduce fragmentation (eg. on scene changing)
}
private:
GPUBuffer* InitBuffer() const
{
GPUBuffer* buffer = GPUDevice::Instance->CreateBuffer(TEXT("BoneMatrices"));
if (buffer->Init(GPUBufferDescription::Typed((int32)(CurrentSize / sizeof(Float4)), PixelFormat::R32G32B32A32_Float, false, GPUResourceUsage::Dynamic)))
{
LOG(Error, "Failed to initialize the skinned mesh bones buffer");
SAFE_DELETE_GPU_RESOURCE(buffer);
}
return buffer;
}
public:
// [RenderList::IExtension]
void Dispose() override
{
// Free memory
Updates.Clear();
FreeList.Clear();
CurrentOffset = 0;
CurrentSize = 0;
ReallocateSize = 0;
SAFE_DELETE_GPU_RESOURCE(GlobalBuffer);
}
void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
{
// Free pending updates to collect the during drawing
Updates.Clear();
UpdateSize = 0;
// Setup global buffer on GPU
if (!CurrentSize)
return;
if (!GlobalBuffer)
{
GlobalBuffer = InitBuffer();
ReallocateSize = 0;
}
else if (ReallocateSize)
{
auto newGlobalBuffer = InitBuffer();
context->CopyBuffer(newGlobalBuffer, GlobalBuffer, ReallocateSize);
GlobalBuffer->DeleteObject(1.0f); // Delay destruction
GlobalBuffer = newGlobalBuffer;
ReallocateSize = 0;
}
}
void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
{
const int32 count = Items.Count();
const int32 count = Updates.Count();
if (count == 0)
return;
PROFILE_GPU_CPU_NAMED("Update Bones");
GPUMemoryPass pass(context);
Item* items = Items.Get();
ScopeWriteLock lock(DataLocker);
// Special case for D3D11 backend that doesn't need transitions
if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11)
auto* updates = Updates.Get();
auto globalBuffer = GlobalBuffer;
auto globalData = Data.Get();
if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11 || // Dynamic buffer cannot be updated partially on D3D11 (hence D3D11_MAP_WRITE_DISCARD), so update the whole buffer
count >= 1000 || // When updates count is large, it is more efficient to update the whole buffer at once
UpdateSize >= (uint32)(0.7f * CurrentOffset)) // When modified size is large, it is more efficient to update the whole buffer at once
{
for (int32 i = 0; i < count; i++)
{
Item& item = items[i];
context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size);
}
// Update whole buffer at once
context->UpdateBuffer(globalBuffer, globalData, CurrentOffset);
}
else
{
// Batch resource barriers for buffer update
for (int32 i = 0; i < count; i++)
pass.Transition(items[i].BoneMatrices, GPUResourceAccess::CopyWrite);
// Update all buffers within Memory Pass (no barriers between)
// Update all modified chunks of the buffer
for (int32 i = 0; i < count; i++)
{
Item& item = items[i];
context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size);
auto& item = updates[i];
context->UpdateBuffer(globalBuffer, globalData + item.Offset, item.Size, item.Offset);
}
// Batch resource barriers for reading in Vertex Shader
for (int32 i = 0; i < count; i++)
pass.Transition(items[i].BoneMatrices, GPUResourceAccess::ShaderReadGraphics);
pass.Transition(globalBuffer, GPUResourceAccess::ShaderReadGraphics);
}
#if COMPILE_WITH_PROFILER
// Insert amount of kilobytes of data updated into profiler trace
uint32 dataSize = 0;
for (int32 i = 0; i < count; i++)
dataSize += items[i].Size;
ZoneValue(dataSize / 1024);
ZoneValue(UpdateSize / 1024); // Trace amount of kilobytes of data updated
#endif
Items.Clear();
Updates.Clear();
UpdateSize = 0;
}
};
@@ -107,6 +201,94 @@ AnimatedModel::AnimatedModel(const SpawnParams& params)
_sphere = BoundingSphere(Vector3::Zero, 0.0f);
}
AnimatedModel::SkinnedBones::SkinnedBones()
{
static_assert(sizeof(*this) == sizeof(uint64), "Update size/alignment.");
*(uint64*)this = 0;
}
AnimatedModel::SkinnedBones::~SkinnedBones()
{
if (IsAllocated)
{
uint32 dataSize = BonesCount * sizeof(Matrix3x4) * (HasPrevBones ? 2 : 1);
RenderListExtension.Free({ dataSize, GlobalBufferOffset });
}
}
void AnimatedModel::SkinnedBones::Update(const SkeletonData& skeleton, const Array<Matrix>& nodesPose, bool perBoneMotionBlur, bool reset)
{
const int32 bonesCount = skeleton.Bones.Count();
// Swap between two halves of the buffer for current/previous frame bones
if (HasPrevBones)
{
IsPrevBones = !IsPrevBones;
}
// Lazy-allocate from global buffer (double the size when using prev frame bones for per-bone motion vectors)
if (!IsAllocated || BonesCount != bonesCount || HasPrevBones != perBoneMotionBlur)
{
if (IsAllocated)
{
uint32 dataSize = BonesCount * sizeof(Matrix3x4) * (HasPrevBones ? 2 : 1);
RenderListExtension.Free({ dataSize, GlobalBufferOffset });
}
uint32 dataSize = bonesCount * sizeof(Matrix3x4) * (perBoneMotionBlur ? 2 : 1);
auto alloc = RenderListExtension.Allocate(dataSize);
GlobalBufferOffset = alloc.Offset;
BonesCount = bonesCount;
IsAllocated = true;
IsPrevBones = false;
IsPrevFlushed = false;
HasPrevBones = perBoneMotionBlur;
}
else if (reset)
{
IsPrevBones = false;
IsPrevFlushed = false;
}
// Copy bones transformations to the CPU buffer (including bone offset matrix) and mark it as dirty to be flushed with GPU buffer later
RenderListExtension.DataLocker.ReadLock();
const SkeletonBone* bones = skeleton.Bones.Get();
const Matrix* nodes = nodesPose.Get();
Matrix3x4* output = (Matrix3x4*)(RenderListExtension.Data.Get() + GlobalBufferOffset); // DataLocker ensures it's safe to access (resizing happens within exclusive write-lock)
if (IsPrevBones)
output += BonesCount; // Write to the second half of the allocation
ASSERT(nodesPose.Count() == skeleton.Nodes.Count());
for (int32 boneIndex = 0; boneIndex < bonesCount; boneIndex++)
{
const SkeletonBone& bone = bones[boneIndex];
Matrix matrix;
Matrix::Multiply(bone.OffsetMatrix, nodes[bone.NodeIndex], matrix);
output[boneIndex].SetMatrixTranspose(matrix);
}
RenderListExtension.DataLocker.ReadUnlock();
IsDirty = true;
}
void AnimatedModel::SkinnedBones::Flush()
{
uint32 size = BonesCount * sizeof(Matrix3x4);
uint32 offset = GlobalBufferOffset;
if (IsPrevBones)
{
// Write to the second half of the allocation (1st half will contain previous frame bones)
offset += size;
// Mark initial flush of the previous frame bones
IsPrevFlushed = true;
}
// Add pending buffer update
RenderListExtension.Updates.Add({ size, offset });
Platform::InterlockedAdd(&RenderListExtension.UpdateSize, size);
// Clear dirty flag
IsDirty = false;
}
AnimatedModel::~AnimatedModel()
{
if (_deformation)
@@ -139,15 +321,6 @@ void AnimatedModel::UpdateAnimation()
void AnimatedModel::SetupSkinningData()
{
ASSERT(SkinnedModel && SkinnedModel->IsLoaded());
const int32 targetBonesCount = SkinnedModel->Skeleton.Bones.Count();
const int32 currentBonesCount = _skinningData.BonesCount;
if (targetBonesCount != currentBonesCount)
{
_skinningData.Setup(targetBonesCount);
}
}
void AnimatedModel::PreInitSkinningData()
@@ -158,7 +331,6 @@ void AnimatedModel::PreInitSkinningData()
PROFILE_MEM(Animations);
ScopeLock lock(SkinnedModel->Locker);
SetupSkinningData();
auto& skeleton = SkinnedModel->Skeleton;
const int32 bonesCount = skeleton.Bones.Count();
const int32 nodesCount = skeleton.Nodes.Count();
@@ -180,15 +352,7 @@ void AnimatedModel::PreInitSkinningData()
GraphInstance.RootTransform = nodesCount > 0 ? skeleton.Nodes[0].LocalTransform : Transform::Identity;
// Setup bones transformations including bone offset matrix
Matrix3x4* output = (Matrix3x4*)_skinningData.Data.Get();
const SkeletonBone* bones = skeleton.Bones.Get();
for (int32 boneIndex = 0; boneIndex < bonesCount; boneIndex++)
{
auto& bone = bones[boneIndex];
Matrix identityMatrix = bone.OffsetMatrix * nodesPose[bone.NodeIndex];
output[boneIndex].SetMatrixTranspose(identityMatrix);
}
_skinningData.OnDataChanged(true);
_bones.Update(skeleton, GraphInstance.NodesPose, PerBoneMotionBlur, true);
UpdateBounds();
UpdateSockets();
@@ -926,18 +1090,7 @@ void AnimatedModel::OnAnimationUpdated_Async()
// Calculate the final bones transformations and update skinning
{
ANIM_GRAPH_PROFILE_EVENT("Final Pose");
const int32 bonesCount = skeleton.Bones.Count();
Matrix3x4* output = (Matrix3x4*)_skinningData.Data.Get();
ASSERT(GraphInstance.NodesPose.Count() == skeleton.Nodes.Count());
ASSERT(_skinningData.Data.Count() == bonesCount * sizeof(Matrix3x4));
for (int32 boneIndex = 0; boneIndex < bonesCount; boneIndex++)
{
const SkeletonBone& bone = skeleton.Bones[boneIndex];
Matrix matrix;
Matrix::Multiply(bone.OffsetMatrix, GraphInstance.NodesPose.Get()[bone.NodeIndex], matrix);
output[boneIndex].SetMatrixTranspose(matrix);
}
_skinningData.OnDataChanged(!PerBoneMotionBlur);
_bones.Update(skeleton, GraphInstance.NodesPose, PerBoneMotionBlur);
}
//if (UpdateWhenOffscreen)
@@ -1077,18 +1230,26 @@ void AnimatedModel::Draw(RenderContext& renderContext)
GEOMETRY_DRAW_STATE_EVENT_BEGIN(_drawState, world);
_lastMinDstSqr = Math::Min(_lastMinDstSqr, Vector3::DistanceSquared(_transform.Translation, renderContext.View.WorldPosition));
if (_skinningData.IsReady())
if (_bones.IsAllocated)
{
// Flush skinning data with GPU
if (_skinningData.IsDirty())
{
RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() });
_skinningData.OnFlush();
}
if (_bones.IsDirty)
_bones.Flush();
SkinnedMesh::DrawInfo draw;
draw.Buffer = &Entries;
draw.Skinning = &_skinningData;
draw.SkinningBones = RenderListExtension.GlobalBuffer;
draw.SkinningBonesOffset = _bones.GlobalBufferOffset / sizeof(Matrix3x4);
draw.WithPrevBones = _bones.HasPrevBones && _bones.IsPrevFlushed;
if (draw.WithPrevBones)
{
draw.PrevBonesOffset = _bones.BonesCount;
if (_bones.IsPrevBones)
{
draw.SkinningBonesOffset += draw.PrevBonesOffset;
draw.PrevBonesOffset = -draw.PrevBonesOffset;
}
}
draw.World = &world;
draw.DrawState = &_drawState;
draw.Deformation = _deformation;
@@ -1120,18 +1281,26 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch)
GEOMETRY_DRAW_STATE_EVENT_BEGIN(_drawState, world);
_lastMinDstSqr = Math::Min(_lastMinDstSqr, Vector3::DistanceSquared(_transform.Translation, renderContext.View.WorldPosition));
if (_skinningData.IsReady())
if (_bones.IsAllocated)
{
// Flush skinning data with GPU
if (_skinningData.IsDirty())
{
RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() });
_skinningData.OnFlush();
}
if (_bones.IsDirty)
_bones.Flush();
SkinnedMesh::DrawInfo draw;
draw.Buffer = &Entries;
draw.Skinning = &_skinningData;
draw.SkinningBones = RenderListExtension.GlobalBuffer;
draw.SkinningBonesOffset = _bones.GlobalBufferOffset / sizeof(Matrix3x4);
draw.WithPrevBones = _bones.HasPrevBones && _bones.IsPrevFlushed;
if (draw.WithPrevBones)
{
draw.PrevBonesOffset = _bones.BonesCount;
if (_bones.IsPrevBones)
{
draw.SkinningBonesOffset += draw.PrevBonesOffset;
draw.PrevBonesOffset = -draw.PrevBonesOffset;
}
}
draw.World = &world;
draw.DrawState = &_drawState;
draw.Deformation = _deformation;
+20 -3
View File
@@ -5,7 +5,6 @@
#include "ModelInstanceActor.h"
#include "Engine/Content/Assets/SkinnedModel.h"
#include "Engine/Content/Assets/AnimationGraph.h"
#include "Engine/Graphics/Models/SkinnedMeshDrawData.h"
#include "Engine/Renderer/DrawCall.h"
#include "Engine/Core/Delegate.h"
@@ -80,9 +79,26 @@ private:
uint32 Usages;
};
struct SkinnedBones
{
uint32 GlobalBufferOffset; // In bytes
uint16 BonesCount;
uint16 IsAllocated : 1;
uint16 IsDirty : 1;
uint16 IsPrevBones : 1;
uint16 IsPrevFlushed : 1;
uint16 HasPrevBones : 1;
SkinnedBones();
~SkinnedBones();
void Update(const SkeletonData& skeleton, const Array<Matrix>& nodesPose, bool perBoneMotionBlur, bool reset = false);
void Flush();
};
GeometryDrawStateData _drawState;
SkinnedMeshDrawData _skinningData;
AnimationUpdateMode _actualMode;
SkinnedBones _bones;
uint32 _counter;
Real _lastMinDstSqr;
bool _isDuringUpdateEvent = false;
@@ -216,8 +232,9 @@ public:
/// <summary>
/// Validates and creates a proper skinning data.
/// [Deprecated in v1.13]
/// </summary>
API_FUNCTION() void SetupSkinningData();
API_FUNCTION() DEPRECATED("Not used anymore. Does nothing.") void SetupSkinningData();
/// <summary>
/// Creates and setups the skinning data (writes the identity bones transformations).
-8
View File
@@ -366,19 +366,11 @@ void Camera::Draw(RenderContext& renderContext)
draw.Buffer = &_previewModelBuffer;
draw.World = &world;
draw.DrawState = &drawState;
draw.Deformation = nullptr;
draw.Lightmap = nullptr;
draw.LightmapUVs = nullptr;
draw.Flags = StaticFlags::Transform;
draw.DrawModes = (DrawPass::Depth | DrawPass::GBuffer | DrawPass::Forward) & renderContext.View.Pass;
BoundingSphere::FromBox(_previewModelBox, draw.Bounds);
draw.Bounds.Center -= renderContext.View.Origin;
draw.PerInstanceRandom = GetPerInstanceRandom();
draw.StencilValue = 0;
draw.LODBias = 0;
draw.ForcedLOD = -1;
draw.SortOrder = 0;
draw.VertexColors = nullptr;
if (draw.DrawModes != DrawPass::None)
{
_previewModel->Draw(renderContext, draw);
+13 -3
View File
@@ -148,6 +148,13 @@ struct DrawCall
/// </summary>
int32 InstanceCount;
enum class SkinningMode
{
None = 0,
Active,
WithPrevBones,
};
union
{
struct
@@ -190,9 +197,12 @@ struct DrawCall
{
const Lightmap* Lightmap;
Half4 LightmapUVsArea;
SkinnedMeshDrawData* Skinning;
SkinningMode Skinning;
int16 PrevBonesOffset; // In Matrix3x4s, can be negative
byte LODDitherFactor; // The model LOD transition dither progress.
GPUBuffer* SkinningBones;
Float3 GeometrySize; // Object geometry size in the world (unscaled).
float LODDitherFactor; // The model LOD transition dither progress.
uint32 SkinningBonesOffset; // In Matrix3x4s
Matrix PrevWorld;
} Surface;
@@ -276,7 +286,7 @@ struct DrawCall
uint8 StencilValue;
/// <summary>
/// The world matrix determinant sign (used for geometry that is two sided or has inverse scale - needs to flip normal vectors and change triangles culling).
/// The world matrix determinant sign (used for geometry that is two-sided or has inverse scale - needs to flip normal vectors and change triangles culling).
/// 0 - sign is positive
/// 1 - sign is negative (flips object surfaces)
/// </summary>
@@ -104,7 +104,7 @@ void LightmapUVsDensityMaterialShader::Bind(BindParameters& params)
data.LightmapSize = 1024.0f;
data.LightmapArea = drawCall.Surface.LightmapUVsArea.ToFloat4();
const ModelLOD* drawCallModelLod;
float scaleInLightmap = drawCall.Surface.LODDitherFactor; // Reuse field
float scaleInLightmap = *(float*)&drawCall.Surface.SkinningBonesOffset; // Reuse field (the same bit-depth)
if (scaleInLightmap < 0.0f)
data.LightmapSize = -1.0f; // Not using lightmap
else if (GBufferPass::IndexBufferToModelLOD.TryGet(drawCall.Geometry.IndexBuffer, drawCallModelLod))
+21 -10
View File
@@ -47,7 +47,7 @@ namespace
}
}
void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Half4& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor)
void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Half4& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, byte lodDitherFactor, uint32 skinningOffset, int16 skinningPrevOffset)
{
Float2 lightmapUVsAreaPackedAliased = *(Float2*)&lightmapUVsArea;
Raw[0] = Float4(worldMatrix.M11, worldMatrix.M12, worldMatrix.M13, worldMatrix.M41);
@@ -57,11 +57,15 @@ void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldM
Raw[4] = Float4(prevWorldMatrix.M21, prevWorldMatrix.M22, prevWorldMatrix.M23, prevWorldMatrix.M42);
Raw[5] = Float4(prevWorldMatrix.M31, prevWorldMatrix.M32, prevWorldMatrix.M33, prevWorldMatrix.M43);
Raw[6] = Float4(geometrySize, perInstanceRandom);
Raw[7] = Float4(worldDeterminantSign, lodDitherFactor, lightmapUVsAreaPackedAliased.X, lightmapUVsAreaPackedAliased.Y);
// TODO: pack WorldDeterminantSign and LODDitherFactor
// 0-3 bits: LOD Dither Factor (0-1 range mapped to 0-255)
// 4 bit: World Determinant Sign (0 for normal or 1 for inversed)
// 5-15 bits: unused
// 16-31 bits: Offset in Skinning Bones buffer for previous frame bones (can be negative)
uint32 packed7x = (uint32)lodDitherFactor + (worldDeterminantSign < 0 ? 256 : 0) + ((skinningPrevOffset + 32760) << 16);
Raw[7] = Float4(*(float*)&packed7x, *(float*)&skinningOffset, lightmapUVsAreaPackedAliased.X, lightmapUVsAreaPackedAliased.Y);
}
void ShaderObjectData::Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Half4& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, float& lodDitherFactor) const
void ShaderObjectData::Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Half4& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, byte& lodDitherFactor, uint32& skinningOffset, int16& skinningPrevOffset) const
{
worldMatrix.SetRow1(Float4(Float3(Raw[0]), 0.0f));
worldMatrix.SetRow2(Float4(Float3(Raw[1]), 0.0f));
@@ -73,8 +77,11 @@ void ShaderObjectData::Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Half4&
prevWorldMatrix.SetRow4(Float4(Raw[3].W, Raw[4].W, Raw[5].W, 1.0f));
geometrySize = Float3(Raw[6]);
perInstanceRandom = Raw[6].W;
worldDeterminantSign = Raw[7].X;
lodDitherFactor = Raw[7].Y;
uint32 packed7x = *(uint32*)&Raw[7].X;
lodDitherFactor = packed7x & 255;
worldDeterminantSign = (packed7x & 256) == 256 ? -1.0f : 1.0f;
skinningOffset = *(uint32*)&Raw[7].Y;
skinningPrevOffset = (packed7x >> 16) - 32760;
Float2 lightmapUVsAreaPackedAliased(Raw[7].Z, Raw[7].W);
lightmapUVsArea = *(Half4*)&lightmapUVsAreaPackedAliased;
}
@@ -289,6 +296,11 @@ void RenderList::CleanupCache()
// Don't call it during rendering (data may be already in use)
ASSERT(GPUDevice::Instance == nullptr || GPUDevice::Instance->CurrentTask == nullptr);
// Free extensions
for (IExtension* e : GetExtensions())
e->Dispose();
// Free pooled memory
MemPoolLocker.Lock();
FreeRenderList.ClearDelete();
for (auto& e : MemPool)
@@ -935,7 +947,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
FORCE_INLINE bool CanUseInstancing(DrawPass pass)
{
return pass == DrawPass::GBuffer || pass == DrawPass::Depth;
return pass == DrawPass::GBuffer || pass == DrawPass::Depth || pass == DrawPass::MotionVectors;
}
FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b)
@@ -1217,15 +1229,14 @@ void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey)
{
if (drawCall.Surface.Lightmap)
CombineHash(batchKey, 1313);
if (drawCall.Surface.Skinning)
CombineHash(batchKey, 11);
CombineHash(batchKey, (byte)drawCall.Surface.Skinning);
}
bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, DrawPass pass)
{
// TODO: find reason why batching static meshes with lightmap causes problems with sampling in shader (flickering when meshes in batch order gets changes due to async draw calls collection)
if (a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr &&
a.Surface.Skinning == nullptr && b.Surface.Skinning == nullptr)
a.Surface.Skinning == b.Surface.Skinning)
{
auto& materialInfo = a.Material->GetInfo();
if (a.Material != b.Material)
+6 -4
View File
@@ -359,6 +359,8 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject
IExtension();
virtual ~IExtension();
// Event called when GPU Device is shutting down and the extension is being disposed. Used to clean up GPU resources (before scripting might destroy extension).
virtual void Dispose() {}
// Event called before collecting draw calls. Can be used for initialization.
virtual void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {}
// Event called after collecting draw calls. Can be used for cleanup or to perform additional drawing using collected draw calls data such as batched data processing.
@@ -690,18 +692,18 @@ GPU_CB_STRUCT(ShaderObjectData
{
Float4 Raw[8];
void FLAXENGINE_API Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Half4& lightmapUVsAreaPacked, const Float3& geometrySize, float perInstanceRandom = 0.0f, float worldDeterminantSign = 1.0f, float lodDitherFactor = 0.0f);
void FLAXENGINE_API Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Half4& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, float& lodDitherFactor) const;
void FLAXENGINE_API Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Half4& lightmapUVsAreaPacked, const Float3& geometrySize, float perInstanceRandom = 0.0f, float worldDeterminantSign = 1.0f, byte lodDitherFactor = 0, uint32 skinningOffset = 0, int16 skinningPrevOffset = 0);
void FLAXENGINE_API Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Half4& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, byte& lodDitherFactor, uint32& skinningOffset, int16& prevBonesOffset) const;
FORCE_INLINE void Store(const DrawCall& drawCall)
{
Store(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, drawCall.WorldDeterminant ? -1.0f : 1.0f, drawCall.Surface.LODDitherFactor);
Store(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, drawCall.WorldDeterminant ? -1.0f : 1.0f, drawCall.Surface.LODDitherFactor, drawCall.Surface.SkinningBonesOffset, drawCall.Surface.PrevBonesOffset);
}
FORCE_INLINE void Load(DrawCall& drawCall) const
{
float worldDeterminantSign;
Load(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, worldDeterminantSign, drawCall.Surface.LODDitherFactor);
Load(drawCall.World, drawCall.Surface.PrevWorld, drawCall.Surface.LightmapUVsArea, drawCall.Surface.GeometrySize, drawCall.PerInstanceRandom, worldDeterminantSign, drawCall.Surface.LODDitherFactor, drawCall.Surface.SkinningBonesOffset, drawCall.Surface.PrevBonesOffset);
drawCall.ObjectPosition = drawCall.World.GetTranslation();
drawCall.WorldDeterminant = worldDeterminantSign < 0 ? 1 : 0;
}
+5
View File
@@ -19,6 +19,7 @@
#include "Engine/Renderer/Lightmaps.h"
#endif
#define SHADOWS_USE_CACHE 1
#define SHADOWS_POSITION_ERROR METERS_TO_UNITS(0.1f)
#define SHADOWS_ROTATION_ERROR 0.9999f
#define SHADOWS_MAX_TILES 6
@@ -237,6 +238,7 @@ struct ShadowAtlasLight
void ValidateCache(const RenderView& view, const RenderLightData& light)
{
#if SHADOWS_USE_CACHE
if (!Cache.StaticValid || !Cache.DynamicValid)
return;
if (!Math::NearEqual(Cache.Distance, light.ShadowsDistance) ||
@@ -286,6 +288,9 @@ struct ShadowAtlasLight
Cache.DynamicValid = false;
}
}
#else
Cache.StaticValid = Cache.DynamicValid = false;
#endif
}
};
+8 -2
View File
@@ -126,7 +126,10 @@ void TerrainChunk::Draw(const RenderContext& renderContext) const
drawCall.SetStencilValue(_patch->_terrain->GetLayer());
#if USE_EDITOR
if (renderContext.View.Mode == ViewMode::LightmapUVsDensity)
drawCall.Surface.LODDitherFactor = 1.0f; // See LightmapUVsDensityMaterialShader
{
float lightmapScale = 1.0f;
drawCall.Surface.SkinningBonesOffset = *(uint32*)&lightmapScale; // See LightmapUVsDensityMaterialShader
}
#endif
// Add half-texel offset for heightmap sampling in vertex shader
@@ -187,7 +190,10 @@ void TerrainChunk::Draw(const RenderContext& renderContext, MaterialBase* materi
drawCall.SetStencilValue(_patch->_terrain->GetLayer());
#if USE_EDITOR
if (renderContext.View.Mode == ViewMode::LightmapUVsDensity)
drawCall.Surface.LODDitherFactor = 1.0f; // See LightmapUVsDensityMaterialShader
{
float lightmapScale = 1.0f;
drawCall.Surface.SkinningBonesOffset = *(uint32*)&lightmapScale; // See LightmapUVsDensityMaterialShader
}
#endif
// Add half-texel offset for heightmap sampling in vertex shader
@@ -487,7 +487,7 @@ bool MaterialGenerator::Generate(WriteStream& source, MaterialInfo& materialInfo
switch (baseLayer->Domain)
{
case MaterialDomain::Surface:
srv = 3; // Objects + Skinning Bones + Prev Bones
srv = 2; // Objects + Skinning Bones
break;
case MaterialDomain::Decal:
srv = 2; // Depth buffer + Stencil buffer
+1 -1
View File
@@ -1089,7 +1089,7 @@ bool ModelTool::ImportModel(const String& path, ModelData& data, Options& option
// Special case if imported model has no bones but has valid skeleton and meshes.
// We assume that every mesh uses a single bone. Copy nodes to bones.
if (data.Skeleton.Bones.IsEmpty() && Math::IsInRange(data.Skeleton.Nodes.Count(), 1, MODEL_MAX_BONES_PER_MODEL))
if (data.Skeleton.Bones.IsEmpty() && Math::IsInRange(data.Skeleton.Nodes.Count(), 1, (int32)MODEL_MAX_BONES_PER_MODEL))
{
data.Skeleton.Bones.Resize(data.Skeleton.Nodes.Count());
for (int32 i = 0; i < data.Skeleton.Nodes.Count(); i++)
+11 -3
View File
@@ -81,9 +81,11 @@ struct ObjectData
float4x4 PrevWorldMatrix;
float3 GeometrySize;
float WorldDeterminantSign;
float4 LightmapArea;
float LODDitherFactor;
float PerInstanceRandom;
float4 LightmapArea;
uint SkinningOffset;
int PrevBonesOffset;
};
float2 UnpackHalf2(uint xy)
@@ -115,8 +117,11 @@ ObjectData LoadObject(Buffer<float4> objectsBuffer, uint objectIndex)
object.PrevWorldMatrix[3] = float4(vector3.w, vector4.w, vector5.w, 1.0f);
object.GeometrySize = vector6.xyz;
object.PerInstanceRandom = vector6.w;
object.WorldDeterminantSign = vector7.x;
object.LODDitherFactor = vector7.y;
uint packed7x = asuint(vector7.x);
object.WorldDeterminantSign = (packed7x & 256) == 256 ? -1.0f : 1.0f;
object.LODDitherFactor = (packed7x & 255) / 255.0f;
object.SkinningOffset = asuint(vector7.y);
object.PrevBonesOffset = (int)(packed7x >> 16) - 32760;
object.LightmapArea.xy = UnpackHalf2(asuint(vector7.z));
object.LightmapArea.zw = UnpackHalf2(asuint(vector7.w));
return object;
@@ -247,6 +252,9 @@ struct ModelInput_Skinned
#endif
uint4 BlendIndices : BLENDINDICES;
float4 BlendWeights : BLENDWEIGHTS;
#if USE_INSTANCING
uint ObjectIndex : ATTRIBUTE0;
#endif
};
struct Model_VS2PS