Files
FlaxEngine/Source/Engine/Renderer/RenderList.cpp
T
mafiesto4 bc36168318 Optimize Animated Model rendering with hardware instancing
All models are using the same global buffer for skinned bones which allows to share shader binding for instancing.
Refactor draw call for batching skinned mesh draws.
Remove `SkinnedMeshDrawData` and merge it into `AnimatedModel` internals.
2026-06-15 17:59:41 +02:00

1257 lines
48 KiB
C++

// Copyright (c) Wojciech Figat. All rights reserved.
#include "RenderList.h"
#include "Engine/Core/Log.h"
#include "Engine/Core/Math/Half.h"
#include "Engine/Core/Collections/Sorting.h"
#include "Engine/Graphics/Materials/IMaterial.h"
#include "Engine/Graphics/Materials/MaterialShader.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPULimits.h"
#include "Engine/Graphics/RenderTargetPool.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/Graphics.h"
#include "Engine/Graphics/PostProcessEffect.h"
#include "Engine/Graphics/Shaders/GPUVertexLayout.h"
#include "Engine/Profiler/Profiler.h"
#include "Engine/Content/Assets/CubeTexture.h"
#include "Engine/Level/Scene/Lightmap.h"
#include "Engine/Level/Actors/PostFxVolume.h"
static_assert(sizeof(DrawCall) <= 288, "Too big draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Terrain), "Wrong draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Particle), "Wrong draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Custom), "Wrong draw call data size.");
static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObjectData::Raw), "Wrong object data.");
namespace
{
Array<RenderList*> FreeRenderList;
Array<Pair<void*, uintptr>> MemPool;
CriticalSection MemPoolLocker;
typedef Array<RenderList::IExtension*, FixedAllocation<8>> ExtensionsList;
ExtensionsList& GetExtensions()
{
static ExtensionsList list;
return list;
}
FORCE_INLINE bool IsSimpleMaterial(const MaterialInfo& info)
{
return EnumHasNoneFlags(info.UsageFlags, MaterialUsageFlags::UseMask | MaterialUsageFlags::UsePositionOffset | MaterialUsageFlags::UseDisplacement) &&
EnumHasNoneFlags(info.FeaturesFlags, MaterialFeaturesFlags::Wireframe) &&
info.BlendMode == MaterialBlendMode::Opaque;
}
}
void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Half4& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, byte lodDitherFactor, uint32 skinningOffset, int16 skinningPrevOffset)
{
Float2 lightmapUVsAreaPackedAliased = *(Float2*)&lightmapUVsArea;
Raw[0] = Float4(worldMatrix.M11, worldMatrix.M12, worldMatrix.M13, worldMatrix.M41);
Raw[1] = Float4(worldMatrix.M21, worldMatrix.M22, worldMatrix.M23, worldMatrix.M42);
Raw[2] = Float4(worldMatrix.M31, worldMatrix.M32, worldMatrix.M33, worldMatrix.M43);
Raw[3] = Float4(prevWorldMatrix.M11, prevWorldMatrix.M12, prevWorldMatrix.M13, prevWorldMatrix.M41);
Raw[4] = Float4(prevWorldMatrix.M21, prevWorldMatrix.M22, prevWorldMatrix.M23, prevWorldMatrix.M42);
Raw[5] = Float4(prevWorldMatrix.M31, prevWorldMatrix.M32, prevWorldMatrix.M33, prevWorldMatrix.M43);
Raw[6] = Float4(geometrySize, perInstanceRandom);
// 0-3 bits: LOD Dither Factor (0-1 range mapped to 0-255)
// 4 bit: World Determinant Sign (0 for normal or 1 for inversed)
// 5-15 bits: unused
// 16-31 bits: Offset in Skinning Bones buffer for previous frame bones (can be negative)
uint32 packed7x = (uint32)lodDitherFactor + (worldDeterminantSign < 0 ? 256 : 0) + ((skinningPrevOffset + 32760) << 16);
Raw[7] = Float4(*(float*)&packed7x, *(float*)&skinningOffset, lightmapUVsAreaPackedAliased.X, lightmapUVsAreaPackedAliased.Y);
}
void ShaderObjectData::Load(Matrix& worldMatrix, Matrix& prevWorldMatrix, Half4& lightmapUVsArea, Float3& geometrySize, float& perInstanceRandom, float& worldDeterminantSign, byte& lodDitherFactor, uint32& skinningOffset, int16& skinningPrevOffset) const
{
worldMatrix.SetRow1(Float4(Float3(Raw[0]), 0.0f));
worldMatrix.SetRow2(Float4(Float3(Raw[1]), 0.0f));
worldMatrix.SetRow3(Float4(Float3(Raw[2]), 0.0f));
worldMatrix.SetRow4(Float4(Raw[0].W, Raw[1].W, Raw[2].W, 1.0f));
prevWorldMatrix.SetRow1(Float4(Float3(Raw[3]), 0.0f));
prevWorldMatrix.SetRow2(Float4(Float3(Raw[4]), 0.0f));
prevWorldMatrix.SetRow3(Float4(Float3(Raw[5]), 0.0f));
prevWorldMatrix.SetRow4(Float4(Raw[3].W, Raw[4].W, Raw[5].W, 1.0f));
geometrySize = Float3(Raw[6]);
perInstanceRandom = Raw[6].W;
uint32 packed7x = *(uint32*)&Raw[7].X;
lodDitherFactor = packed7x & 255;
worldDeterminantSign = (packed7x & 256) == 256 ? -1.0f : 1.0f;
skinningOffset = *(uint32*)&Raw[7].Y;
skinningPrevOffset = (packed7x >> 16) - 32760;
Float2 lightmapUVsAreaPackedAliased(Raw[7].Z, Raw[7].W);
lightmapUVsArea = *(Half4*)&lightmapUVsAreaPackedAliased;
}
bool RenderLightData::CanRenderShadow(const RenderView& view) const
{
bool result = false;
switch (ShadowsMode)
{
case ShadowsCastingMode::StaticOnly:
result = view.IsOfflinePass;
break;
case ShadowsCastingMode::DynamicOnly:
result = !view.IsOfflinePass;
break;
case ShadowsCastingMode::All:
result = true;
break;
}
return result && ShadowsStrength > ZeroTolerance;
}
#if !BUILD_RELEASE
#include "Engine/Level/Actors/Light.h"
#include "Engine/Scripting/Scripting.h"
Light* RenderLightData::GetActor() const
{
return Scripting::TryFindObject<Light>(ID);
}
#endif
void RenderDirectionalLightData::SetShaderData(ShaderLightData& data, bool useShadow) const
{
data.SpotAngles.X = -2.0f;
data.SpotAngles.Y = 1.0f;
data.SourceRadius = SourceAngle;
data.SourceLength = 0;
data.Color = Color;
data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS);
data.Position = Float3::Zero;
data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0;
data.Direction = -Direction;
data.Radius = 0;
data.FalloffExponent = 0;
data.InverseSquared = 0;
data.RadiusInv = 0;
}
bool RenderLocalLightData::CanRenderShadow(const RenderView& view) const
{
// Fade shadow on distance
const float fadeDistance = Math::Max(ShadowsFadeDistance, 0.1f);
const float dstLightToView = Float3::Distance(Position, view.Position);
const float fade = 1 - Math::Saturate((dstLightToView - Radius - ShadowsDistance + fadeDistance) / fadeDistance);
return fade > ZeroTolerance && Radius > 10 && RenderLightData::CanRenderShadow(view);
}
void RenderSpotLightData::SetShaderData(ShaderLightData& data, bool useShadow) const
{
data.SpotAngles.X = CosOuterCone;
data.SpotAngles.Y = InvCosConeDifference;
data.SourceRadius = SourceRadius;
data.SourceLength = 0.0f;
data.Color = Color;
data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS);
data.Position = Position;
data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0;
data.Direction = Direction;
data.Radius = Radius;
data.FalloffExponent = FallOffExponent;
data.InverseSquared = UseInverseSquaredFalloff ? 1.0f : 0.0f;
data.RadiusInv = 1.0f / Radius;
}
void RenderPointLightData::SetShaderData(ShaderLightData& data, bool useShadow) const
{
data.SpotAngles.X = -2.0f;
data.SpotAngles.Y = 1.0f;
data.SourceRadius = SourceRadius;
data.SourceLength = SourceLength;
data.Color = Color;
data.MinRoughness = Math::Max(MinRoughness, MIN_ROUGHNESS);
data.Position = Position;
data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0;
data.Direction = Direction;
data.Radius = Radius;
data.FalloffExponent = FallOffExponent;
data.InverseSquared = UseInverseSquaredFalloff ? 1.0f : 0.0f;
data.RadiusInv = 1.0f / Radius;
}
void RenderSkyLightData::SetShaderData(ShaderLightData& data, bool useShadow) const
{
data.SpotAngles.X = AdditiveColor.X;
data.SpotAngles.Y = AdditiveColor.Y;
data.SourceRadius = AdditiveColor.Z;
data.SourceLength = Image ? Image->StreamingTexture()->TotalMipLevels() - 2.0f : 0.0f;
data.Color = Color;
data.MinRoughness = MIN_ROUGHNESS;
data.Position = Position;
data.ShadowsBufferAddress = useShadow ? ShadowsBufferAddress : 0;
data.Direction = Float3::Forward;
data.Radius = Radius;
data.FalloffExponent = 0;
data.InverseSquared = 0;
data.RadiusInv = 1.0f / Radius;
}
void RenderEnvironmentProbeData::SetShaderData(ShaderEnvProbeData& data) const
{
data.Data0 = Float4(Position, Brightness);
if (BoxProjection)
{
data.Data0.W *= -1;
data.Data1 = Float4(Scale * Radius, BlendDistance);
Quaternion invQuat;
Quaternion::Invert(Orientation, invQuat);
data.Data2 = *(Float4*)&invQuat;
}
else
{
data.Data1 = Float4(Radius, 0, 0, 0);
data.Data2 = Float4::Zero;
}
}
RenderFogData::RenderFogData()
{
Renderer = nullptr;
VolumetricFogTexture = nullptr;
Platform::MemoryClear(&ExponentialHeightFogData, sizeof(ExponentialHeightFogData));
ExponentialHeightFogData.FogMinOpacity = 1.0f;
ExponentialHeightFogData.FogCutoffDistance = 0.1f;
ExponentialHeightFogData.VolumetricFogMaxDistance = -1.0f;
VolumetricFogData.GridSliceParameters = Float4::One;
VolumetricFogData.ScreenSize = VolumetricFogData.VolumeTexelSize = Float2::Zero;
}
void RenderFogData::Init(const RenderView& view, IFogRenderer* renderer)
{
Renderer = renderer;
renderer->GetExponentialHeightFogData(view, ExponentialHeightFogData);
renderer->GetVolumetricFogOptions(VolumetricFog);
if (!VolumetricFog.UseVolumetricFog())
{
ExponentialHeightFogData.VolumetricFogMaxDistance = -1;
}
}
void* RendererAllocation::Allocate(uintptr size)
{
PROFILE_CPU();
size = AllocationUtils::AlignToPowerOf2((int32)size); // Reduce fragmentation by operating on power-of-2 blocks
void* result = nullptr;
MemPoolLocker.Lock();
for (int32 i = 0; i < MemPool.Count(); i++)
{
if (MemPool.Get()[i].Second == size)
{
result = MemPool.Get()[i].First;
MemPool.RemoveAt(i);
break;
}
}
MemPoolLocker.Unlock();
if (!result)
result = Platform::Allocate(size, 16);
return result;
}
void RendererAllocation::Free(void* ptr, uintptr size)
{
PROFILE_CPU();
size = AllocationUtils::AlignToPowerOf2((int32)size); // Reduce fragmentation by operating on power-of-2 blocks
MemPoolLocker.Lock();
MemPool.Add({ ptr, size });
MemPoolLocker.Unlock();
}
RenderList* RenderList::GetFromPool()
{
MemPoolLocker.Lock();
if (FreeRenderList.HasItems())
{
const auto result = FreeRenderList.Last();
FreeRenderList.RemoveLast();
MemPoolLocker.Unlock();
return result;
}
MemPoolLocker.Unlock();
return New<RenderList>();
}
void RenderList::ReturnToPool(RenderList* cache)
{
if (!cache)
return;
cache->Clear();
MemPoolLocker.Lock();
ASSERT(!FreeRenderList.Contains(cache));
FreeRenderList.Add(cache);
MemPoolLocker.Unlock();
}
void RenderList::CleanupCache()
{
// Don't call it during rendering (data may be already in use)
ASSERT(GPUDevice::Instance == nullptr || GPUDevice::Instance->CurrentTask == nullptr);
// Free extensions
for (IExtension* e : GetExtensions())
e->Dispose();
// Free pooled memory
MemPoolLocker.Lock();
FreeRenderList.ClearDelete();
for (auto& e : MemPool)
Platform::Free(e.First);
MemPool.Clear();
MemPoolLocker.Unlock();
}
RenderList::IExtension::IExtension()
{
GetExtensions().Add(this);
}
RenderList::IExtension::~IExtension()
{
GetExtensions().Remove(this);
}
bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const
{
// Sort by higher priority
if (Priority != other.Priority)
return Priority < other.Priority;
// Sort by lower size
return other.VolumeSizeSqr < VolumeSizeSqr;
}
void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weight, int32 priority, float volumeSizeSqr)
{
BlendableSettings blend;
blend.Provider = provider;
blend.Weight = weight;
blend.Priority = priority;
blend.VolumeSizeSqr = volumeSizeSqr;
Blendable.Add(blend);
}
void RenderList::AddDelayedDraw(DelayedDraw&& func)
{
_delayedDraws.Add(MoveTemp(func));
}
void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
{
if (_delayedDraws.Count() == 0)
return;
PROFILE_CPU();
for (DelayedDraw& e : _delayedDraws)
e(context, renderContextBatch, renderContextIndex);
_delayedDraws.Clear();
}
#define LOOP_EXTENSIONS() const auto& extensions = GetExtensions(); for (auto* e : extensions)
void RenderList::PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch)
{
LOOP_EXTENSIONS()
e->PreDraw(context, renderContextBatch);
}
void RenderList::PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch)
{
LOOP_EXTENSIONS()
e->PostDraw(context, renderContextBatch);
}
void RenderList::BlendSettings()
{
PROFILE_CPU();
Sorting::QuickSort(Blendable);
Settings = Graphics::PostProcessSettings;
for (auto& b : Blendable)
{
b.Provider->Blend(Settings, b.Weight);
}
}
void RenderList::RunPostFxPass(GPUContext* context, RenderContext& renderContext, MaterialPostFxLocation locationA, PostProcessEffectLocation locationB, GPUTexture*& inputOutput)
{
// Note: during this stage engine is using additive rendering to the light buffer (given as inputOutput parameter).
// Materials PostFx and Custom PostFx prefer sampling the input texture while rendering to the output.
// So we need to allocate a temporary render target (or reuse from cache) and use it as a ping pong buffer.
bool skipPass = true;
bool needTempTarget = true;
for (int32 i = 0; i < Settings.PostFxMaterials.Materials.Count(); i++)
{
const auto material = Settings.PostFxMaterials.Materials[i].Get();
if (material && material->IsReady() && material->IsPostFx() && material->GetInfo().PostFxLocation == locationA)
{
skipPass = false;
needTempTarget = true;
}
}
if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::CustomPostProcess))
{
for (const PostProcessEffect* fx : renderContext.List->PostFx)
{
if (fx->Location == locationB)
{
skipPass = false;
needTempTarget |= !fx->UseSingleTarget;
}
}
}
if (skipPass)
return;
auto tempDesc = inputOutput->GetDescription();
auto temp = needTempTarget ? RenderTargetPool::Get(tempDesc) : nullptr;
if (needTempTarget)
{
RENDER_TARGET_POOL_SET_NAME(temp, "RenderList.RunPostFxPassTemp");
}
auto input = inputOutput;
auto output = temp;
context->ResetRenderTarget();
MaterialBase::BindParameters bindParams(context, renderContext);
for (int32 i = 0; i < Settings.PostFxMaterials.Materials.Count(); i++)
{
auto material = Settings.PostFxMaterials.Materials[i].Get();
if (material && material->IsReady() && material->IsPostFx() && material->GetInfo().PostFxLocation == locationA)
{
context->ResetSR();
ASSERT(needTempTarget);
context->SetRenderTarget(*output);
bindParams.Input = *input;
material->Bind(bindParams);
context->DrawFullscreenTriangle();
context->ResetRenderTarget();
Swap(output, input);
}
}
if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::CustomPostProcess))
{
for (PostProcessEffect* fx : renderContext.List->PostFx)
{
if (fx->Location == locationB)
{
context->ResetSR();
context->ResetUA();
if (fx->UseSingleTarget || output == nullptr)
{
fx->Render(context, renderContext, input, nullptr);
}
else
{
ASSERT(needTempTarget);
fx->Render(context, renderContext, input, output);
Swap(input, output);
}
context->ResetRenderTarget();
}
}
}
inputOutput = input;
if (needTempTarget)
RenderTargetPool::Release(output);
context->ResetSR();
}
void RenderList::RunMaterialPostFxPass(GPUContext* context, RenderContext& renderContext, MaterialPostFxLocation location, GPUTexture*& input, GPUTexture*& output)
{
MaterialBase::BindParameters bindParams(context, renderContext);
for (int32 i = 0; i < Settings.PostFxMaterials.Materials.Count(); i++)
{
auto material = Settings.PostFxMaterials.Materials[i].Get();
if (material && material->IsReady() && material->IsPostFx() && material->GetInfo().PostFxLocation == location)
{
context->ResetSR();
context->SetRenderTarget(*output);
bindParams.Input = *input;
material->Bind(bindParams);
context->DrawFullscreenTriangle();
Swap(output, input);
}
context->ResetRenderTarget();
}
}
void RenderList::RunCustomPostFxPass(GPUContext* context, RenderContext& renderContext, PostProcessEffectLocation location, GPUTexture*& input, GPUTexture*& output)
{
if (!(renderContext.View.Flags & ViewFlags::CustomPostProcess))
return;
for (PostProcessEffect* fx : renderContext.List->PostFx)
{
if (fx->Location == location)
{
context->ResetSR();
if (fx->UseSingleTarget || output == nullptr)
{
fx->Render(context, renderContext, input, nullptr);
}
else
{
fx->Render(context, renderContext, input, output);
Swap(input, output);
}
context->ResetRenderTarget();
context->ResetSR();
}
}
}
bool RenderList::HasAnyPostFx(const RenderContext& renderContext, PostProcessEffectLocation postProcess) const
{
if (EnumHasAnyFlags(renderContext.View.Flags, ViewFlags::CustomPostProcess))
{
for (const PostProcessEffect* fx : renderContext.List->PostFx)
{
if (fx->Location == postProcess)
return true;
}
}
return false;
}
bool RenderList::HasAnyPostFx(const RenderContext& renderContext, MaterialPostFxLocation materialPostFx) const
{
for (int32 i = 0; i < Settings.PostFxMaterials.Materials.Count(); i++)
{
auto material = Settings.PostFxMaterials.Materials[i].Get();
if (material && material->IsReady() && material->IsPostFx() && material->GetInfo().PostFxLocation == materialPostFx)
{
return true;
}
}
return false;
}
BatchedDrawCall::BatchedDrawCall(RenderList* list)
: Instances(&list->Memory)
{
}
BatchedDrawCall::BatchedDrawCall(BatchedDrawCall&& other) noexcept
: DrawCall(other.DrawCall)
, ObjectsStartIndex(other.ObjectsStartIndex)
, Instances(MoveTemp(other.Instances))
{
}
void DrawCallsList::Clear()
{
Indices.Clear();
PreBatchedDrawCalls.Clear();
Batches.Clear();
CanUseInstancing = true;
}
bool DrawCallsList::IsEmpty() const
{
return Indices.Count() + PreBatchedDrawCalls.Count() == 0;
}
RenderListAlloc::~RenderListAlloc()
{
if (NeedFree && Data) // Render List memory doesn't need free (arena allocator)
RendererAllocation::Free(Data, Size);
}
void* RenderListAlloc::Init(RenderList* list, uint32 size, uint32 alignment)
{
ASSERT_LOW_LAYER(!Data);
Size = size;
if (size == 0)
return nullptr;
if (size < 1024 || (alignment != 16 && alignment != 8 && alignment != 4 && alignment != 1))
return (Data = list->Memory.Allocate(size, alignment));
NeedFree = true;
Data = RendererAllocation::Allocate(size);
return Data;
}
RenderList::RenderList(const SpawnParams& params)
: ScriptingObject(params)
, Memory(4 * 1024 * 1024, RendererAllocation::Allocate, RendererAllocation::Free) // 4MB pages, use page pooling via RendererAllocation
, DirectionalLights(4)
, SkyLights(4)
, EnvironmentProbes(32)
, Decals(64)
, Sky(nullptr)
, AtmosphericFog(nullptr)
, Blendable(32)
, ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer"))
, TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer"))
, _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } }))
{
}
void RenderList::Init(RenderContext& renderContext)
{
renderContext.View.Frustum.GetCorners(FrustumCornersWs);
for (int32 i = 0; i < 8; i++)
Float3::Transform(FrustumCornersWs[i], renderContext.View.View, FrustumCornersVs[i]);
}
void RenderList::Clear()
{
Scenes.Clear();
DrawCalls.Clear();
BatchedDrawCalls.Clear();
for (auto& list : DrawCallsLists)
list.Clear();
ShadowDepthDrawCallsList.Clear();
PointLights.Clear();
SpotLights.Clear();
SkyLights.Clear();
DirectionalLights.Clear();
EnvironmentProbes.Clear();
Decals.Clear();
VolumetricFogParticles.Clear();
Sky = nullptr;
AtmosphericFog = nullptr;
Fog = RenderFogData();
PostFx.Clear();
Settings = PostProcessSettings();
Blendable.Clear();
_delayedDraws.Clear();
_instanceBuffer.Clear();
ObjectBuffer.Clear();
TempObjectBuffer.Clear();
Memory.Free();
}
// Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance
PACK_STRUCT(struct PackedSortKey
{
uint32 DistanceKey;
uint8 GeoKey;
uint16 MaterialKey;
uint8 SortKey;
});
static_assert(sizeof(PackedSortKey) == sizeof(uint64), "Invalid sort key size");
// Sorting order: By Sort Order -> By Material -> By Geometry -> By Distance
PACK_STRUCT(struct PackedSortKeyForward
{
uint8 GeoKey;
uint16 MaterialKey;
uint32 DistanceKey;
uint8 SortKey;
});
static_assert(sizeof(PackedSortKeyForward) == sizeof(uint64), "Invalid sort key size");
// Sorting order: By Material -> By Geometry -> By Distance
PACK_STRUCT(struct PackedSortKeyDepth
{
uint32 DistanceKey;
uint8 Dummy;
uint8 GeoKey;
uint16 MaterialKey;
});
static_assert(sizeof(PackedSortKeyDepth) == sizeof(uint64), "Invalid sort key size");
FORCE_INLINE void CalculateSortKey(const RenderContext& renderContext, DrawCall& drawCall, int8 sortOrder)
{
const Float3 planeNormal = renderContext.View.Direction;
const float planePoint = -Float3::Dot(planeNormal, renderContext.View.Position);
const float distance = Float3::Dot(planeNormal, drawCall.ObjectPosition) - planePoint;
uint32 distanceKey = RenderTools::ComputeDistanceSortKey(distance);
uint32 material = GetHash(drawCall.Material);
IMaterial::InstancingHandler handler;
if (drawCall.Material->CanUseInstancing(renderContext, handler))
handler.GetHash(drawCall, material);
material = (material * 397) ^ drawCall.StencilValue;
uint32 geoKey = (uint32)(471 * drawCall.WorldDeterminant);
geoKey = (geoKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]);
geoKey = (geoKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]);
geoKey = (geoKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]);
geoKey = (geoKey * 397) ^ GetHash(drawCall.Geometry.IndexBuffer);
PackedSortKey key;
key.MaterialKey = (uint16)material;
key.DistanceKey = distanceKey;
key.GeoKey = (uint8)geoKey;
key.SortKey = (uint8)(sortOrder - MIN_int8);
drawCall.SortKey = *(uint64*)&key;
}
void RenderList::AddDrawCall(const RenderContext& renderContext, DrawPass drawModes, StaticFlags staticFlags, DrawCall& drawCall, bool receivesDecals, int8 sortOrder)
{
#if ENABLE_ASSERTION_LOW_LAYERS
// Ensure that draw modes are non-empty and in conjunction with material draw modes
auto materialDrawModes = drawCall.Material->GetDrawModes();
ASSERT_LOW_LAYER(drawModes != DrawPass::None && ((uint32)drawModes & ~(uint32)materialDrawModes) == 0);
#endif
// Finalize draw call initialization
drawCall.WorldDeterminant = drawCall.World.RotDeterminant() < 0 ? 1 : 0;
CalculateSortKey(renderContext, drawCall, sortOrder);
// Append draw call data
const int32 index = DrawCalls.Add(drawCall);
// Add draw call to proper draw lists
if ((drawModes & DrawPass::Depth) != DrawPass::None)
{
DrawCallsLists[(int32)DrawCallsListType::Depth].Indices.Add(index);
}
if ((drawModes & (DrawPass::GBuffer | DrawPass::GlobalSurfaceAtlas)) != DrawPass::None)
{
if (receivesDecals)
DrawCallsLists[(int32)DrawCallsListType::GBuffer].Indices.Add(index);
else
DrawCallsLists[(int32)DrawCallsListType::GBufferNoDecals].Indices.Add(index);
}
if ((drawModes & DrawPass::Forward) != DrawPass::None)
{
DrawCallsLists[(int32)DrawCallsListType::Forward].Indices.Add(index);
}
if ((drawModes & DrawPass::Distortion) != DrawPass::None)
{
DrawCallsLists[(int32)DrawCallsListType::Distortion].Indices.Add(index);
}
if ((drawModes & DrawPass::MotionVectors) != DrawPass::None &&
(staticFlags & StaticFlags::Transform) == StaticFlags::None &&
RenderTools::ComputeBoundsScreenRadiusSquared(drawCall.ObjectPosition, drawCall.ObjectRadius, renderContext.View) > Math::Square(Graphics::MotionVectors::MinObjectScreenSize))
{
DrawCallsLists[(int32)DrawCallsListType::MotionVectors].Indices.Add(index);
}
}
void RenderList::AddDrawCall(const RenderContextBatch& renderContextBatch, DrawPass drawModes, StaticFlags staticFlags, ShadowsCastingMode shadowsMode, const BoundingSphere& bounds, DrawCall& drawCall, bool receivesDecals, int8 sortOrder)
{
#if ENABLE_ASSERTION_LOW_LAYERS
// Ensure that draw modes are non-empty and in conjunction with material draw modes
auto materialDrawModes = drawCall.Material->GetDrawModes();
ASSERT_LOW_LAYER(drawModes != DrawPass::None && ((uint32)drawModes & ~(uint32)materialDrawModes) == 0);
#endif
const RenderContext& mainRenderContext = renderContextBatch.Contexts.Get()[0];
// Finalize draw call initialization
drawCall.WorldDeterminant = drawCall.World.RotDeterminant() < 0 ? 1 : 0;
CalculateSortKey(mainRenderContext, drawCall, sortOrder);
// Append draw call data
const int32 index = DrawCalls.Add(drawCall);
// Add draw call to proper draw lists
DrawPass modes = drawModes & mainRenderContext.View.GetShadowsDrawPassMask(shadowsMode);
drawModes = modes & mainRenderContext.View.Pass;
if (drawModes != DrawPass::None && mainRenderContext.View.CullingFrustum.Intersects(bounds))
{
if ((drawModes & DrawPass::Depth) != DrawPass::None)
{
DrawCallsLists[(int32)DrawCallsListType::Depth].Indices.Add(index);
}
if ((drawModes & (DrawPass::GBuffer | DrawPass::GlobalSurfaceAtlas)) != DrawPass::None)
{
if (receivesDecals)
DrawCallsLists[(int32)DrawCallsListType::GBuffer].Indices.Add(index);
else
DrawCallsLists[(int32)DrawCallsListType::GBufferNoDecals].Indices.Add(index);
}
if ((drawModes & DrawPass::Forward) != DrawPass::None)
{
DrawCallsLists[(int32)DrawCallsListType::Forward].Indices.Add(index);
}
if ((drawModes & DrawPass::Distortion) != DrawPass::None)
{
DrawCallsLists[(int32)DrawCallsListType::Distortion].Indices.Add(index);
}
if ((drawModes & DrawPass::MotionVectors) != DrawPass::None &&
(staticFlags & StaticFlags::Transform) == StaticFlags::None &&
RenderTools::ComputeBoundsScreenRadiusSquared(bounds.Center, (float)bounds.Radius, mainRenderContext.View) > Math::Square(Graphics::MotionVectors::MinObjectScreenSize))
{
DrawCallsLists[(int32)DrawCallsListType::MotionVectors].Indices.Add(index);
}
}
float minObjectPixelSizeSq = Math::Square(Graphics::Shadows::MinObjectPixelSize);
for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++)
{
const RenderContext& renderContext = renderContextBatch.Contexts.Get()[i];
ASSERT_LOW_LAYER(renderContext.View.Pass == DrawPass::Depth);
drawModes = modes & renderContext.View.Pass;
if (drawModes != DrawPass::None &&
(staticFlags & renderContext.View.StaticFlagsMask) == renderContext.View.StaticFlagsCompare &&
renderContext.View.CullingFrustum.Intersects(bounds) &&
RenderTools::ComputeBoundsScreenRadiusSquared(bounds.Center, (float)bounds.Radius, renderContext.View) * (renderContext.View.ScreenSize.X * renderContext.View.ScreenSize.Y) >= minObjectPixelSizeSq)
{
renderContext.List->ShadowDepthDrawCallsList.Indices.Add(index);
}
}
}
void RenderList::BuildObjectsBuffer()
{
int32 count = DrawCalls.Count();
for (const auto& e : BatchedDrawCalls)
count += e.Instances.Count();
ObjectBuffer.Clear();
if (count == 0)
return;
PROFILE_CPU();
PROFILE_MEM(GraphicsCommands);
ObjectBuffer.Data.Resize(count * sizeof(ShaderObjectData));
auto* src = (const DrawCall*)DrawCalls.Get();
auto* dst = (ShaderObjectData*)ObjectBuffer.Data.Get();
for (int32 i = 0; i < DrawCalls.Count(); i++)
{
dst->Store(src[i]);
dst++;
}
int32 startIndex = DrawCalls.Count();
for (auto& batch : BatchedDrawCalls)
{
batch.ObjectsStartIndex = startIndex;
Platform::MemoryCopy(dst, batch.Instances.Get(), batch.Instances.Count() * sizeof(ShaderObjectData));
dst += batch.Instances.Count();
startIndex += batch.Instances.Count();
}
ZoneValue(ObjectBuffer.Data.Count() / 1024); // Objects Buffer size in kB
}
void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, DrawCallsListType listType, DrawPass pass)
{
PROFILE_CPU();
PROFILE_MEM(GraphicsCommands);
const auto* drawCallsData = drawCalls.Get();
const auto* listData = list.Indices.Get();
const int32 listSize = list.Indices.Count();
ZoneValue(listSize);
// Use shared memory from renderer allocator
RenderListAlloc allocs[3];
uint64* sortedKeys = allocs[0].Init<uint64>(this, listSize);
uint64* tempKeys = allocs[1].Init<uint64>(this, listSize);
int32* tempIndices = allocs[2].Init<int32>(this, listSize);
// Setup sort keys
if (reverseDistance)
{
if (listType == DrawCallsListType::Forward)
{
// Transparency uses distance to take precedence over batching efficiency for correct draw order
for (int32 i = 0; i < listSize; i++)
{
const DrawCall& drawCall = drawCallsData[listData[i]];
PackedSortKey key = *(PackedSortKey*)&drawCall.SortKey;
PackedSortKeyForward forwardKey;
forwardKey.MaterialKey = key.MaterialKey;
forwardKey.DistanceKey = key.DistanceKey ^ MAX_uint32; // Reverse depth
forwardKey.GeoKey = key.GeoKey;
forwardKey.SortKey = key.SortKey ^ MAX_uint8; // Reverse sort order
sortedKeys[i] = *(uint64*)&forwardKey;
}
}
else
{
for (int32 i = 0; i < listSize; i++)
{
const DrawCall& drawCall = drawCallsData[listData[i]];
PackedSortKey key = *(PackedSortKey*)&drawCall.SortKey;
key.DistanceKey ^= MAX_uint32; // Reverse depth
key.SortKey ^= MAX_uint8; // Reverse sort order
sortedKeys[i] = *(uint64*)&key;
}
}
}
else
{
if (listType == DrawCallsListType::Depth)
{
// Reorder keys and drop sort order for better batching/instancing in depth/shadow passes
PackedSortKeyDepth depthKey;
depthKey.Dummy = 0;
for (int32 i = 0; i < listSize; i++)
{
const DrawCall& drawCall = drawCallsData[listData[i]];
PackedSortKey key = *(PackedSortKey*)&drawCall.SortKey;
depthKey.DistanceKey = key.DistanceKey;
depthKey.GeoKey = key.GeoKey;
depthKey.MaterialKey = key.MaterialKey;
sortedKeys[i] = *(uint64*)&depthKey;
}
}
else
{
// Copy keys as-is
for (int32 i = 0; i < listSize; i++)
sortedKeys[i] = drawCallsData[listData[i]].SortKey;
}
}
// Sort draw calls indices
int32* resultIndices = list.Indices.Get();
Sorting::RadixSort(sortedKeys, resultIndices, tempKeys, tempIndices, listSize);
if (resultIndices != list.Indices.Get())
Platform::MemoryCopy(list.Indices.Get(), resultIndices, sizeof(int32) * listSize);
// Perform draw calls batching
list.Batches.Clear();
for (int32 i = 0; i < listSize;)
{
const DrawCall& drawCall = drawCallsData[listData[i]];
int32 batchSize = 1;
int32 instanceCount = drawCall.InstanceCount;
IMaterial::InstancingHandler drawCallHandler, otherHandler;
if (instanceCount != 0 && drawCall.Material->CanUseInstancing(renderContext, drawCallHandler))
{
// Check the following draw calls sequence to merge them
for (int32 j = i + 1; j < listSize; j++)
{
const DrawCall& other = drawCallsData[listData[j]];
const bool canBatch =
other.Material->CanUseInstancing(renderContext, otherHandler) &&
other.InstanceCount != 0 &&
drawCallHandler.CanBatch == otherHandler.CanBatch &&
drawCallHandler.CanBatch(drawCall, other, pass) &&
drawCall.StencilValue == other.StencilValue;
if (!canBatch)
break;
batchSize++;
instanceCount += other.InstanceCount;
}
}
DrawBatch batch;
static_assert(sizeof(DrawBatch) == sizeof(uint64) * 2, "Fix the size of draw batch to optimize memory access.");
batch.SortKey = sortedKeys[i];
batch.StartIndex = i;
batch.BatchSize = batchSize;
batch.InstanceCount = instanceCount;
list.Batches.Add(batch);
i += batchSize;
}
// When using depth buffer draw calls are already almost ideally sorted by Radix Sort but transparency needs more stability to prevent flickering
if (listType == DrawCallsListType::Forward)
{
// Sort draw calls batches by depth
Array<DrawBatch, RendererAllocation> sortingBatches;
Sorting::MergeSort(list.Batches, &sortingBatches);
}
}
FORCE_INLINE bool CanUseInstancing(DrawPass pass)
{
return pass == DrawPass::GBuffer || pass == DrawPass::Depth || pass == DrawPass::MotionVectors;
}
FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b)
{
return a->Geometry.IndexBuffer == b->Geometry.IndexBuffer &&
a->Draw.IndicesCount == b->Draw.IndicesCount &&
a->Draw.StartIndex == b->Draw.StartIndex &&
Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0;
}
FORCE_INLINE Span<GPUBuffer*> GetVB(GPUBuffer* const* ptr, int32 maxSize)
{
while (ptr[maxSize - 1] == nullptr && maxSize > 1)
maxSize--;
return ToSpan<GPUBuffer*>(ptr, maxSize);
}
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input)
{
if (list.IsEmpty())
return;
PROFILE_GPU_CPU("Drawing");
PROFILE_MEM(GraphicsCommands);
const auto* drawCallsData = drawCallsList->DrawCalls.Get();
const auto* listData = list.Indices.Get();
const auto* batchesData = list.Batches.Get();
const auto context = GPUDevice::Instance->GetMainContext();
bool useInstancing = list.CanUseInstancing && CanUseInstancing(renderContext.View.Pass) && GPUDevice::Instance->Limits.HasInstancing;
TaaJitterRemoveContext taaJitterRemove(renderContext.View);
// Lazy-init objects buffer (if caller didn't do it)
if (drawCallsList->ObjectBuffer.Data.IsEmpty())
{
drawCallsList->BuildObjectsBuffer();
drawCallsList->ObjectBuffer.Flush(context);
}
// Clear SR slots to prevent any resources binding issues (leftovers from the previous passes)
context->ResetSR();
// Prepare instance buffer
if (useInstancing)
{
// Estimate the maximum amount of elements for all instanced draws
int32 instancesCount = 0;
for (int32 i = 0; i < list.Batches.Count(); i++)
{
const DrawBatch& batch = batchesData[i];
if (batch.BatchSize > 1)
instancesCount += batch.BatchSize;
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
instancesCount += batch.Instances.Count();
}
if (instancesCount != 0)
{
PROFILE_CPU_NAMED("Build Instancing");
_instanceBuffer.Clear();
_instanceBuffer.Data.Resize(instancesCount * sizeof(ShaderObjectDrawInstanceData));
auto instanceData = (ShaderObjectDrawInstanceData*)_instanceBuffer.Data.Get();
// Write to instance buffer
for (int32 i = 0; i < list.Batches.Count(); i++)
{
const DrawBatch& batch = batchesData[i];
if (batch.BatchSize > 1)
{
for (int32 j = 0; j < batch.BatchSize; j++)
{
instanceData->ObjectIndex = listData[batch.StartIndex + j];
instanceData++;
}
}
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
for (int32 j = 0; j < batch.Instances.Count(); j++)
{
instanceData->ObjectIndex = batch.ObjectsStartIndex + j;
instanceData++;
}
}
ASSERT((byte*)instanceData == _instanceBuffer.Data.end());
// Upload data
_instanceBuffer.Flush(context);
ZoneValue(instancesCount);
}
else
{
// No batches so no instancing
useInstancing = false;
}
}
// Execute draw calls
int32 materialBinds = list.Batches.Count();
MaterialBase::BindParameters bindParams(context, renderContext);
bindParams.ObjectBuffer = drawCallsList->ObjectBuffer.GetBuffer()->View();
bindParams.Input = input;
bindParams.BindViewData();
MaterialShaderDataPerDraw perDraw;
perDraw.DrawPadding = Float3::Zero;
GPUConstantBuffer* perDrawCB = IMaterial::BindParameters::PerDrawConstants;
context->BindCB(2, perDrawCB); // TODO: use rootSignature/pushConstants on D3D12/Vulkan
constexpr int32 vbMax = ARRAY_COUNT(DrawCall::Geometry.VertexBuffers);
if (useInstancing)
{
context->UpdateCB(perDrawCB, &perDraw);
GPUBuffer* vb[vbMax + 1];
uint32 vbOffsets[vbMax + 1];
vb[3] = _instanceBuffer.GetBuffer(); // Pass object index in a vertex stream at slot 3 (used by VS in Surface.shader)
vbOffsets[3] = 0;
int32 instanceBufferOffset = 0;
for (int32 i = 0; i < list.Batches.Count(); i++)
{
const DrawBatch& batch = batchesData[i];
uint32 drawCallIndex = listData[batch.StartIndex];
const DrawCall& drawCall = drawCallsData[drawCallIndex];
bindParams.Instanced = batch.BatchSize != 1;
bindParams.DrawCall = &drawCall;
bindParams.DrawCall->Material->Bind(bindParams);
if (bindParams.Instanced)
{
// One or more draw calls per batch
const DrawCall* activeDraw = &drawCall;
int32 activeCount = 1;
for (int32 j = 1; j <= batch.BatchSize; j++)
{
if (j != batch.BatchSize && DrawsEqual(activeDraw, drawCallsData + listData[batch.StartIndex + j]))
{
// Group two draw calls into active draw call
activeCount++;
continue;
}
// Draw whole active draw (instanced)
Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
context->BindIB(activeDraw->Geometry.IndexBuffer);
context->BindVB(GetVB(vb, ARRAY_COUNT(vb)), vbOffsets);
context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex);
instanceBufferOffset += activeCount;
// Reset active draw
activeDraw = drawCallsData + listData[batch.StartIndex + j];
activeCount = 1;
}
}
else
{
// Pass object index in constant buffer
perDraw.DrawObjectIndex = drawCallIndex;
context->UpdateCB(perDrawCB, &perDraw);
// Single-draw call batch
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
}
else
{
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
}
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
const DrawCall& drawCall = batch.DrawCall;
bindParams.Instanced = true;
bindParams.DrawCall = &drawCall;
bindParams.DrawCall->Material->Bind(bindParams);
Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(GetVB(vb, vbMax + 1), vbOffsets);
if (drawCall.InstanceCount == 0)
{
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
}
else
{
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, batch.Instances.Count(), instanceBufferOffset, 0, drawCall.Draw.StartIndex);
instanceBufferOffset += batch.Instances.Count();
}
}
materialBinds += list.PreBatchedDrawCalls.Count();
}
else
{
for (int32 i = 0; i < list.Batches.Count(); i++)
{
const DrawBatch& batch = batchesData[i];
bindParams.DrawCall = drawCallsData + listData[batch.StartIndex];
bindParams.DrawCall->Material->Bind(bindParams);
for (int32 j = 0; j < batch.BatchSize; j++)
{
perDraw.DrawObjectIndex = listData[batch.StartIndex + j];
context->UpdateCB(perDrawCB, &perDraw);
const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex];
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
}
else
{
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
}
}
for (int32 i = 0; i < list.PreBatchedDrawCalls.Count(); i++)
{
const BatchedDrawCall& batch = BatchedDrawCalls.Get()[list.PreBatchedDrawCalls.Get()[i]];
const DrawCall& drawCall = batch.DrawCall;
bindParams.DrawCall = &drawCall;
bindParams.DrawCall->Material->Bind(bindParams);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
for (int32 j = 0; j < batch.Instances.Count(); j++)
{
perDraw.DrawObjectIndex = batch.ObjectsStartIndex + j;
context->UpdateCB(perDrawCB, &perDraw);
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
}
materialBinds += list.PreBatchedDrawCalls.Count();
if (list.Batches.IsEmpty() && list.Indices.Count() != 0)
{
// Draw calls list has not been batched so execute draw calls separately
for (int32 j = 0; j < list.Indices.Count(); j++)
{
perDraw.DrawObjectIndex = listData[j];
context->UpdateCB(perDrawCB, &perDraw);
const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex];
bindParams.DrawCall = &drawCall;
drawCall.Material->Bind(bindParams);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
}
else
{
context->DrawIndexedInstanced(drawCall.Draw.IndicesCount, drawCall.InstanceCount, 0, 0, drawCall.Draw.StartIndex);
}
}
materialBinds += list.Indices.Count();
}
}
ZoneValue(materialBinds); // Material shaders bindings count
}
void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey)
{
if (drawCall.Surface.Lightmap)
CombineHash(batchKey, 1313);
CombineHash(batchKey, (byte)drawCall.Surface.Skinning);
}
bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b, DrawPass pass)
{
// TODO: find reason why batching static meshes with lightmap causes problems with sampling in shader (flickering when meshes in batch order gets changes due to async draw calls collection)
if (a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr &&
a.Surface.Skinning == b.Surface.Skinning)
{
auto& materialInfo = a.Material->GetInfo();
if (a.Material != b.Material)
{
// Batch simple materials during depth-only drawing (when using default vertex shader and no pixel shader)
if (pass == DrawPass::Depth)
{
return IsSimpleMaterial(materialInfo) && IsSimpleMaterial(b.Material->GetInfo());
}
return false;
}
// World determinant flips the culling mode
return (materialInfo.CullMode == CullMode::TwoSided || a.WorldDeterminant == b.WorldDeterminant);
}
return false;
}