#include #include #include #ifdef _DEBUG #define D3D_DEBUG_INFO #endif #include #ifdef USE_CRT_DBG #undef new #endif #include #ifdef USE_CRT_DBG #define new DBG_NEW #endif #include "base/logging.h" #include "math/lin/matrix4x4.h" #include "thin3d/thin3d.h" #include "thin3d/d3dx9_loader.h" #include "gfx/d3d9_state.h" namespace Draw { // Could be declared as u8 static const D3DCMPFUNC compareToD3D9[] = { D3DCMP_NEVER, D3DCMP_LESS, D3DCMP_EQUAL, D3DCMP_LESSEQUAL, D3DCMP_GREATER, D3DCMP_NOTEQUAL, D3DCMP_GREATEREQUAL, D3DCMP_ALWAYS }; // Could be declared as u8 static const D3DBLENDOP blendEqToD3D9[] = { D3DBLENDOP_ADD, D3DBLENDOP_SUBTRACT, D3DBLENDOP_REVSUBTRACT, D3DBLENDOP_MIN, D3DBLENDOP_MAX, }; // Could be declared as u8 static const D3DBLEND blendFactorToD3D9[] = { D3DBLEND_ZERO, D3DBLEND_ONE, D3DBLEND_SRCCOLOR, D3DBLEND_INVSRCCOLOR, D3DBLEND_DESTCOLOR, D3DBLEND_INVDESTCOLOR, D3DBLEND_SRCALPHA, D3DBLEND_INVSRCALPHA, D3DBLEND_DESTALPHA, D3DBLEND_INVDESTALPHA, D3DBLEND_BLENDFACTOR, D3DBLEND_INVBLENDFACTOR, D3DBLEND_BLENDFACTOR, D3DBLEND_INVBLENDFACTOR, D3DBLEND_ZERO, D3DBLEND_ZERO, D3DBLEND_ZERO, D3DBLEND_ZERO, }; static const D3DTEXTUREADDRESS texWrapToD3D9[] = { D3DTADDRESS_WRAP, D3DTADDRESS_MIRROR, D3DTADDRESS_CLAMP, D3DTADDRESS_BORDER, }; static const D3DTEXTUREFILTERTYPE texFilterToD3D9[] = { D3DTEXF_POINT, D3DTEXF_LINEAR, }; static const D3DPRIMITIVETYPE primToD3D9[] = { D3DPT_POINTLIST, D3DPT_LINELIST, D3DPT_LINESTRIP, D3DPT_TRIANGLELIST, D3DPT_TRIANGLESTRIP, D3DPT_TRIANGLEFAN, // These aren't available. D3DPT_POINTLIST, // tess D3DPT_POINTLIST, // geom ... D3DPT_POINTLIST, D3DPT_POINTLIST, D3DPT_POINTLIST, }; static const D3DSTENCILOP stencilOpToD3D9[] = { D3DSTENCILOP_KEEP, D3DSTENCILOP_ZERO, D3DSTENCILOP_REPLACE, D3DSTENCILOP_INCRSAT, D3DSTENCILOP_DECRSAT, D3DSTENCILOP_INVERT, D3DSTENCILOP_INCR, D3DSTENCILOP_DECR, }; static const int primCountDivisor[] = { 1, 2, 3, 3, 3, 1, 1, 1, 1, 1, }; D3DFORMAT FormatToD3DFMT(DataFormat fmt) { switch (fmt) { case DataFormat::R8G8B8A8_UNORM: return D3DFMT_A8R8G8B8; case DataFormat::R4G4B4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // emulated case DataFormat::B4G4R4A4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // native case DataFormat::A4R4G4B4_UNORM_PACK16: return D3DFMT_A4R4G4B4; // emulated case DataFormat::R5G6B5_UNORM_PACK16: return D3DFMT_R5G6B5; case DataFormat::A1R5G5B5_UNORM_PACK16: return D3DFMT_A1R5G5B5; case DataFormat::D24_S8: return D3DFMT_D24S8; case DataFormat::D16: return D3DFMT_D16; default: return D3DFMT_UNKNOWN; } } static int FormatToD3DDeclType(DataFormat type) { switch (type) { case DataFormat::R32_FLOAT: return D3DDECLTYPE_FLOAT1; case DataFormat::R32G32_FLOAT: return D3DDECLTYPE_FLOAT2; case DataFormat::R32G32B32_FLOAT: return D3DDECLTYPE_FLOAT3; case DataFormat::R32G32B32A32_FLOAT: return D3DDECLTYPE_FLOAT4; case DataFormat::R8G8B8A8_UNORM: return D3DDECLTYPE_UBYTE4N; // D3DCOLOR has a different byte ordering. default: return D3DDECLTYPE_UNUSED; } } class D3D9Buffer; class D3D9DepthStencilState : public DepthStencilState { public: BOOL depthTestEnabled; BOOL depthWriteEnabled; D3DCMPFUNC depthCompare; BOOL stencilEnabled; D3DSTENCILOP stencilFail; D3DSTENCILOP stencilZFail; D3DSTENCILOP stencilPass; D3DCMPFUNC stencilCompareOp; uint8_t stencilReference; uint8_t stencilCompareMask; uint8_t stencilWriteMask; void Apply(LPDIRECT3DDEVICE9 device) { device->SetRenderState(D3DRS_ZENABLE, depthTestEnabled); if (depthTestEnabled) { device->SetRenderState(D3DRS_ZWRITEENABLE, depthWriteEnabled); device->SetRenderState(D3DRS_ZFUNC, depthCompare); } device->SetRenderState(D3DRS_STENCILENABLE, stencilEnabled); if (stencilEnabled) { device->SetRenderState(D3DRS_STENCILFAIL, stencilFail); device->SetRenderState(D3DRS_STENCILZFAIL, stencilZFail); device->SetRenderState(D3DRS_STENCILPASS, stencilPass); device->SetRenderState(D3DRS_STENCILFUNC, stencilCompareOp); device->SetRenderState(D3DRS_STENCILMASK, stencilCompareMask); device->SetRenderState(D3DRS_STENCILREF, stencilReference); device->SetRenderState(D3DRS_STENCILWRITEMASK, stencilWriteMask); } } }; class D3D9RasterState : public RasterState { public: DWORD cullMode; void Apply(LPDIRECT3DDEVICE9 device) { device->SetRenderState(D3DRS_CULLMODE, cullMode); device->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE); } }; class D3D9BlendState : public BlendState { public: bool enabled; D3DBLENDOP eqCol, eqAlpha; D3DBLEND srcCol, srcAlpha, dstCol, dstAlpha; uint32_t fixedColor; uint32_t colorMask; void Apply(LPDIRECT3DDEVICE9 device) { device->SetRenderState(D3DRS_ALPHABLENDENABLE, (DWORD)enabled); device->SetRenderState(D3DRS_BLENDOP, eqCol); device->SetRenderState(D3DRS_BLENDOPALPHA, eqAlpha); device->SetRenderState(D3DRS_SRCBLEND, srcCol); device->SetRenderState(D3DRS_DESTBLEND, dstCol); device->SetRenderState(D3DRS_SRCBLENDALPHA, srcAlpha); device->SetRenderState(D3DRS_DESTBLENDALPHA, dstAlpha); device->SetRenderState(D3DRS_COLORWRITEENABLE, colorMask); // device->SetRenderState(, fixedColor); } }; class D3D9SamplerState : public SamplerState { public: D3DTEXTUREADDRESS wrapS, wrapT; D3DTEXTUREFILTERTYPE magFilt, minFilt, mipFilt; void Apply(LPDIRECT3DDEVICE9 device, int index) { device->SetSamplerState(index, D3DSAMP_ADDRESSU, wrapS); device->SetSamplerState(index, D3DSAMP_ADDRESSV, wrapT); device->SetSamplerState(index, D3DSAMP_MAGFILTER, magFilt); device->SetSamplerState(index, D3DSAMP_MINFILTER, minFilt); device->SetSamplerState(index, D3DSAMP_MIPFILTER, mipFilt); } }; class D3D9InputLayout : public InputLayout { public: D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc &desc); ~D3D9InputLayout() { if (decl_) { decl_->Release(); } } int GetStride(int binding) const { return stride_[binding]; } void Apply(LPDIRECT3DDEVICE9 device) { device->SetVertexDeclaration(decl_); } private: LPDIRECT3DVERTEXDECLARATION9 decl_; int stride_[4]; }; class D3D9ShaderModule : public ShaderModule { public: D3D9ShaderModule(ShaderStage stage) : stage_(stage), vshader_(nullptr), pshader_(nullptr) {} ~D3D9ShaderModule() { if (vshader_) vshader_->Release(); if (pshader_) pshader_->Release(); } bool Compile(LPDIRECT3DDEVICE9 device, const uint8_t *data, size_t size); void Apply(LPDIRECT3DDEVICE9 device) { if (stage_ == ShaderStage::FRAGMENT) { device->SetPixelShader(pshader_); } else { device->SetVertexShader(vshader_); } } ShaderStage GetStage() const override { return stage_; } private: ShaderStage stage_; LPDIRECT3DVERTEXSHADER9 vshader_; LPDIRECT3DPIXELSHADER9 pshader_; }; class D3D9Pipeline : public Pipeline { public: D3D9Pipeline(LPDIRECT3DDEVICE9 device) : device_(device) {} ~D3D9Pipeline() { if (depthStencil) depthStencil->Release(); if (blend) blend->Release(); if (raster) raster->Release(); if (inputLayout) inputLayout->Release(); } bool RequiresBuffer() override { return false; } D3D9ShaderModule *vshader; D3D9ShaderModule *pshader; D3DPRIMITIVETYPE prim; int primDivisor; D3D9InputLayout *inputLayout = nullptr; D3D9DepthStencilState *depthStencil = nullptr; D3D9BlendState *blend = nullptr; D3D9RasterState *raster = nullptr; UniformBufferDesc dynamicUniforms; void Apply(LPDIRECT3DDEVICE9 device); private: LPDIRECT3DDEVICE9 device_; }; class D3D9Texture : public Texture { public: D3D9Texture(LPDIRECT3DDEVICE9 device, LPDIRECT3DDEVICE9EX deviceEx, const TextureDesc &desc); ~D3D9Texture(); void SetToSampler(LPDIRECT3DDEVICE9 device, int sampler); private: void SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data); bool Create(const TextureDesc &desc); LPDIRECT3DDEVICE9 device_; LPDIRECT3DDEVICE9EX deviceEx_; TextureType type_; DataFormat format_; D3DFORMAT d3dfmt_; LPDIRECT3DTEXTURE9 tex_; LPDIRECT3DVOLUMETEXTURE9 volTex_; LPDIRECT3DCUBETEXTURE9 cubeTex_; }; D3D9Texture::D3D9Texture(LPDIRECT3DDEVICE9 device, LPDIRECT3DDEVICE9EX deviceEx, const TextureDesc &desc) : device_(device), deviceEx_(deviceEx), tex_(nullptr), volTex_(nullptr), cubeTex_(nullptr) { Create(desc); } D3D9Texture::~D3D9Texture() { if (tex_) { tex_->Release(); } if (volTex_) { volTex_->Release(); } if (cubeTex_) { cubeTex_->Release(); } } bool D3D9Texture::Create(const TextureDesc &desc) { width_ = desc.width; height_ = desc.height; depth_ = desc.depth; type_ = desc.type; format_ = desc.format; tex_ = NULL; d3dfmt_ = FormatToD3DFMT(desc.format); HRESULT hr = E_FAIL; D3DPOOL pool = D3DPOOL_MANAGED; int usage = 0; if (deviceEx_ != nullptr) { pool = D3DPOOL_DEFAULT; usage = D3DUSAGE_DYNAMIC; } if (desc.generateMips) usage |= D3DUSAGE_AUTOGENMIPMAP; switch (type_) { case TextureType::LINEAR1D: case TextureType::LINEAR2D: hr = device_->CreateTexture(desc.width, desc.height, desc.generateMips ? 0 : desc.mipLevels, usage, d3dfmt_, pool, &tex_, NULL); break; case TextureType::LINEAR3D: hr = device_->CreateVolumeTexture(desc.width, desc.height, desc.depth, desc.mipLevels, usage, d3dfmt_, pool, &volTex_, NULL); break; case TextureType::CUBE: hr = device_->CreateCubeTexture(desc.width, desc.mipLevels, usage, d3dfmt_, pool, &cubeTex_, NULL); break; } if (FAILED(hr)) { ELOG("Texture creation failed"); return false; } if (desc.initData.size()) { // In D3D9, after setting D3DUSAGE_AUTOGENMIPS, we can only access the top layer. The rest will be // automatically generated. int maxLevel = desc.generateMips ? 1 : (int)desc.initData.size(); for (int i = 0; i < maxLevel; i++) { SetImageData(0, 0, 0, width_, height_, depth_, i, 0, desc.initData[i]); } } return true; } // Just switches R and G. inline uint32_t Shuffle8888(uint32_t x) { return (x & 0xFF00FF00) | ((x >> 16) & 0xFF) | ((x << 16) & 0xFF0000); } void D3D9Texture::SetImageData(int x, int y, int z, int width, int height, int depth, int level, int stride, const uint8_t *data) { if (!tex_) return; if (level == 0) { width_ = width; height_ = height; depth_ = depth; } if (!stride) { stride = width * (int)DataFormatSizeInBytes(format_); } switch (type_) { case TextureType::LINEAR2D: { D3DLOCKED_RECT rect; if (x == 0 && y == 0) { tex_->LockRect(level, &rect, NULL, D3DLOCK_DISCARD); for (int i = 0; i < height; i++) { uint8_t *dest = (uint8_t *)rect.pBits + rect.Pitch * i; const uint8_t *source = data + stride * i; int j; switch (format_) { case DataFormat::B4G4R4A4_UNORM_PACK16: // We emulate support for this format. for (j = 0; j < width; j++) { uint16_t color = ((const uint16_t *)source)[j]; ((uint16_t *)dest)[j] = (color << 12) | (color >> 4); } break; case DataFormat::A4R4G4B4_UNORM_PACK16: // Native memcpy(dest, source, width * sizeof(uint16_t)); break; case DataFormat::R8G8B8A8_UNORM: for (j = 0; j < width; j++) { ((uint32_t *)dest)[j] = Shuffle8888(((uint32_t *)source)[j]); } break; case DataFormat::B8G8R8A8_UNORM: memcpy(dest, source, sizeof(uint32_t) * width); break; } } tex_->UnlockRect(level); } break; } default: ELOG("Non-LINEAR2D textures not yet supported"); break; } } void D3D9Texture::SetToSampler(LPDIRECT3DDEVICE9 device, int sampler) { switch (type_) { case TextureType::LINEAR1D: case TextureType::LINEAR2D: device->SetTexture(sampler, tex_); break; case TextureType::LINEAR3D: device->SetTexture(sampler, volTex_); break; case TextureType::CUBE: device->SetTexture(sampler, cubeTex_); break; } } class D3D9Context : public DrawContext { public: D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx); ~D3D9Context(); const DeviceCaps &GetDeviceCaps() const override { return caps_; } uint32_t GetSupportedShaderLanguages() const override { return (uint32_t)ShaderLanguage::HLSL_D3D9 | (uint32_t)ShaderLanguage::HLSL_D3D9_BYTECODE; } uint32_t GetDataFormatSupport(DataFormat fmt) const override; ShaderModule *CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t dataSize) override; DepthStencilState *CreateDepthStencilState(const DepthStencilStateDesc &desc) override; BlendState *CreateBlendState(const BlendStateDesc &desc) override; SamplerState *CreateSamplerState(const SamplerStateDesc &desc) override; RasterState *CreateRasterState(const RasterStateDesc &desc) override; Buffer *CreateBuffer(size_t size, uint32_t usageFlags) override; Pipeline *CreateGraphicsPipeline(const PipelineDesc &desc) override; InputLayout *CreateInputLayout(const InputLayoutDesc &desc) override; Texture *CreateTexture(const TextureDesc &desc) override; Framebuffer *CreateFramebuffer(const FramebufferDesc &desc) override; void UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) override; void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits) override {} bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) override; // These functions should be self explanatory. void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) override; // color must be 0, for now. void BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) override; uintptr_t GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) override; void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; void BindTextures(int start, int count, Texture **textures) override; void BindSamplerStates(int start, int count, SamplerState **states) override { for (int i = 0; i < count; ++i) { D3D9SamplerState *s = static_cast(states[start + i]); s->Apply(device_, start + i); } } void BindVertexBuffers(int start, int count, Buffer **buffers, int *offsets) override { for (int i = 0; i < count; i++) { curVBuffers_[i + start] = (D3D9Buffer *)buffers[i]; curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0; } } void BindIndexBuffer(Buffer *indexBuffer, int offset) override { curIBuffer_ = (D3D9Buffer *)indexBuffer; curIBufferOffset_ = offset; } void BindPipeline(Pipeline *pipeline) override { curPipeline_ = (D3D9Pipeline *)pipeline; } void UpdateDynamicUniformBuffer(const void *ub, size_t size) override; // Raster state void SetScissorRect(int left, int top, int width, int height) override; void SetViewports(int count, Viewport *viewports) override; void SetBlendFactor(float color[4]) override; void Draw(int vertexCount, int offset) override; void DrawIndexed(int vertexCount, int offset) override; void DrawUP(const void *vdata, int vertexCount) override; void Clear(int mask, uint32_t colorval, float depthVal, int stencilVal); uintptr_t GetNativeObject(NativeObject obj) override { switch (obj) { case NativeObject::CONTEXT: return (uintptr_t)d3d_; case NativeObject::DEVICE: return (uintptr_t)device_; case NativeObject::DEVICE_EX: return (uintptr_t)deviceEx_; default: return 0; } } std::string GetInfoString(InfoField info) const override { switch (info) { case APIVERSION: return "DirectX 9.0"; case VENDORSTRING: return identifier_.Description; case VENDOR: return ""; case DRIVER: return identifier_.Driver; // eh, sort of case SHADELANGVERSION: return shadeLangVersion_; case APINAME: return "Direct3D 9"; default: return "?"; } } void HandleEvent(Event ev, int width, int height, void *param1, void *param2) override; private: LPDIRECT3D9 d3d_; LPDIRECT3D9EX d3dEx_; LPDIRECT3DDEVICE9 device_; LPDIRECT3DDEVICE9EX deviceEx_; int adapterId_ = -1; D3DADAPTER_IDENTIFIER9 identifier_{}; D3DCAPS9 d3dCaps_; char shadeLangVersion_[64]{}; DeviceCaps caps_{}; // Bound state D3D9Pipeline *curPipeline_ = nullptr; D3D9Buffer *curVBuffers_[4]{}; int curVBufferOffsets_[4]{}; D3D9Buffer *curIBuffer_ = nullptr; int curIBufferOffset_ = 0; // Framebuffer state LPDIRECT3DSURFACE9 deviceRTsurf = 0; LPDIRECT3DSURFACE9 deviceDSsurf = 0; bool supportsINTZ = false; }; #define FB_DIV 1 #define FOURCC_INTZ ((D3DFORMAT)(MAKEFOURCC('I', 'N', 'T', 'Z'))) D3D9Context::D3D9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx) : d3d_(d3d), d3dEx_(d3dEx), adapterId_(adapterId), device_(device), deviceEx_(deviceEx), caps_{} { if (FAILED(d3d->GetAdapterIdentifier(adapterId, 0, &identifier_))) { ELOG("Failed to get adapter identifier: %d", adapterId); } switch (identifier_.VendorId) { case 0x10DE: caps_.vendor = GPUVendor::VENDOR_NVIDIA; break; case 0x1002: case 0x1022: caps_.vendor = GPUVendor::VENDOR_AMD; break; case 0x163C: case 0x8086: case 0x8087: caps_.vendor = GPUVendor::VENDOR_INTEL; break; default: caps_.vendor = GPUVendor::VENDOR_UNKNOWN; } if (!FAILED(device->GetDeviceCaps(&d3dCaps_))) { sprintf(shadeLangVersion_, "PS: %04x VS: %04x", d3dCaps_.PixelShaderVersion & 0xFFFF, d3dCaps_.VertexShaderVersion & 0xFFFF); } else { strcpy(shadeLangVersion_, "N/A"); } caps_.multiViewport = false; caps_.anisoSupported = true; caps_.depthRangeMinusOneToOne = false; caps_.preferredDepthBufferFormat = DataFormat::D24_S8; caps_.dualSourceBlend = false; caps_.tesselationShaderSupported = false; caps_.framebufferBlitSupported = true; caps_.framebufferCopySupported = false; caps_.framebufferDepthBlitSupported = true; caps_.framebufferDepthCopySupported = false; if (d3d) { D3DDISPLAYMODE displayMode; d3d->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &displayMode); // To be safe, make sure both the display format and the FBO format support INTZ. HRESULT displayINTZ = d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, displayMode.Format, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ); HRESULT fboINTZ = d3d->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, D3DFMT_A8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, FOURCC_INTZ); supportsINTZ = SUCCEEDED(displayINTZ) && SUCCEEDED(fboINTZ); } } D3D9Context::~D3D9Context() { } ShaderModule *D3D9Context::CreateShaderModule(ShaderStage stage, ShaderLanguage language, const uint8_t *data, size_t size) { D3D9ShaderModule *shader = new D3D9ShaderModule(stage); if (shader->Compile(device_, data, size)) { return shader; } else { delete shader; return NULL; } } Pipeline *D3D9Context::CreateGraphicsPipeline(const PipelineDesc &desc) { if (!desc.shaders.size()) { ELOG("Pipeline requires at least one shader"); return NULL; } D3D9Pipeline *pipeline = new D3D9Pipeline(device_); for (auto iter : desc.shaders) { if (!iter) { ELOG("NULL shader passed to CreateGraphicsPipeline"); return false; } if (iter->GetStage() == ShaderStage::FRAGMENT) { pipeline->pshader = static_cast(iter); } else if (iter->GetStage() == ShaderStage::VERTEX) { pipeline->vshader = static_cast(iter); } } pipeline->prim = primToD3D9[(int)desc.prim]; pipeline->primDivisor = primCountDivisor[(int)desc.prim]; pipeline->depthStencil = (D3D9DepthStencilState *)desc.depthStencil; pipeline->blend = (D3D9BlendState *)desc.blend; pipeline->raster = (D3D9RasterState *)desc.raster; pipeline->inputLayout = (D3D9InputLayout *)desc.inputLayout; pipeline->depthStencil->AddRef(); pipeline->blend->AddRef(); pipeline->raster->AddRef(); pipeline->inputLayout->AddRef(); if (desc.uniformDesc) pipeline->dynamicUniforms = *desc.uniformDesc; return pipeline; } DepthStencilState *D3D9Context::CreateDepthStencilState(const DepthStencilStateDesc &desc) { D3D9DepthStencilState *ds = new D3D9DepthStencilState(); ds->depthTestEnabled = desc.depthTestEnabled; ds->depthWriteEnabled = desc.depthWriteEnabled; ds->depthCompare = compareToD3D9[(int)desc.depthCompare]; ds->stencilEnabled = desc.stencilEnabled; ds->stencilCompareOp = compareToD3D9[(int)desc.front.compareOp]; ds->stencilPass = stencilOpToD3D9[(int)desc.front.passOp]; ds->stencilFail = stencilOpToD3D9[(int)desc.front.failOp]; ds->stencilZFail = stencilOpToD3D9[(int)desc.front.depthFailOp]; ds->stencilWriteMask = desc.front.writeMask; ds->stencilReference = desc.front.reference; ds->stencilCompareMask = desc.front.compareMask; return ds; } InputLayout *D3D9Context::CreateInputLayout(const InputLayoutDesc &desc) { D3D9InputLayout *fmt = new D3D9InputLayout(device_, desc); return fmt; } BlendState *D3D9Context::CreateBlendState(const BlendStateDesc &desc) { D3D9BlendState *bs = new D3D9BlendState(); bs->enabled = desc.enabled; bs->eqCol = blendEqToD3D9[(int)desc.eqCol]; bs->srcCol = blendFactorToD3D9[(int)desc.srcCol]; bs->dstCol = blendFactorToD3D9[(int)desc.dstCol]; bs->eqAlpha = blendEqToD3D9[(int)desc.eqAlpha]; bs->srcAlpha = blendFactorToD3D9[(int)desc.srcAlpha]; bs->dstAlpha = blendFactorToD3D9[(int)desc.dstAlpha]; bs->colorMask = desc.colorMask; // Ignore logic ops, we don't support them in D3D9 return bs; } SamplerState *D3D9Context::CreateSamplerState(const SamplerStateDesc &desc) { D3D9SamplerState *samps = new D3D9SamplerState(); samps->wrapS = texWrapToD3D9[(int)desc.wrapU]; samps->wrapT = texWrapToD3D9[(int)desc.wrapV]; samps->magFilt = texFilterToD3D9[(int)desc.magFilter]; samps->minFilt = texFilterToD3D9[(int)desc.minFilter]; samps->mipFilt = texFilterToD3D9[(int)desc.mipFilter]; return samps; } RasterState *D3D9Context::CreateRasterState(const RasterStateDesc &desc) { D3D9RasterState *rs = new D3D9RasterState(); rs->cullMode = D3DCULL_NONE; if (desc.cull == CullMode::NONE) { return rs; } switch (desc.frontFace) { case Facing::CW: switch (desc.cull) { case CullMode::FRONT: rs->cullMode = D3DCULL_CCW; break; case CullMode::BACK: rs->cullMode = D3DCULL_CW; break; } case Facing::CCW: switch (desc.cull) { case CullMode::FRONT: rs->cullMode = D3DCULL_CW; break; case CullMode::BACK: rs->cullMode = D3DCULL_CCW; break; } } return rs; } Texture *D3D9Context::CreateTexture(const TextureDesc &desc) { D3D9Texture *tex = new D3D9Texture(device_, deviceEx_, desc); return tex; } void D3D9Context::BindTextures(int start, int count, Texture **textures) { for (int i = start; i < start + count; i++) { D3D9Texture *tex = static_cast(textures[i - start]); if (tex) { tex->SetToSampler(device_, i); } else { device_->SetTexture(i, nullptr); } } } static void SemanticToD3D9UsageAndIndex(int semantic, BYTE *usage, BYTE *index) { *index = 0; switch (semantic) { case SEM_POSITION: *usage = D3DDECLUSAGE_POSITION; break; case SEM_NORMAL: *usage = D3DDECLUSAGE_NORMAL; break; case SEM_TANGENT: *usage = D3DDECLUSAGE_TANGENT; break; case SEM_BINORMAL: *usage = D3DDECLUSAGE_BINORMAL; break; case SEM_COLOR0: *usage = D3DDECLUSAGE_COLOR; break; case SEM_TEXCOORD0: *usage = D3DDECLUSAGE_TEXCOORD; break; case SEM_TEXCOORD1: *usage = D3DDECLUSAGE_TEXCOORD; *index = 1; break; } } D3D9InputLayout::D3D9InputLayout(LPDIRECT3DDEVICE9 device, const InputLayoutDesc &desc) : decl_(NULL) { D3DVERTEXELEMENT9 *elements = new D3DVERTEXELEMENT9[desc.attributes.size() + 1]; size_t i; for (i = 0; i < desc.attributes.size(); i++) { elements[i].Stream = desc.attributes[i].binding; elements[i].Offset = desc.attributes[i].offset; elements[i].Method = D3DDECLMETHOD_DEFAULT; SemanticToD3D9UsageAndIndex(desc.attributes[i].location, &elements[i].Usage, &elements[i].UsageIndex); elements[i].Type = FormatToD3DDeclType(desc.attributes[i].format); } D3DVERTEXELEMENT9 end = D3DDECL_END(); // Zero the last one. memcpy(&elements[i], &end, sizeof(elements[i])); for (i = 0; i < desc.bindings.size(); i++) { stride_[i] = desc.bindings[i].stride; } HRESULT hr = device->CreateVertexDeclaration(elements, &decl_); if (FAILED(hr)) { ELOG("Error creating vertex decl"); } delete[] elements; } // Simulate a simple buffer type like the other backends have, use the usage flags to create the right internal type. class D3D9Buffer : public Buffer { public: D3D9Buffer(LPDIRECT3DDEVICE9 device, size_t size, uint32_t flags) : vbuffer_(nullptr), ibuffer_(nullptr), maxSize_(size) { if (flags & BufferUsageFlag::INDEXDATA) { DWORD usage = D3DUSAGE_DYNAMIC; device->CreateIndexBuffer((UINT)size, usage, D3DFMT_INDEX32, D3DPOOL_DEFAULT, &ibuffer_, NULL); } else { DWORD usage = D3DUSAGE_DYNAMIC; device->CreateVertexBuffer((UINT)size, usage, 0, D3DPOOL_DEFAULT, &vbuffer_, NULL); } } virtual ~D3D9Buffer() override { if (ibuffer_) { ibuffer_->Release(); } if (vbuffer_) { vbuffer_->Release(); } } LPDIRECT3DVERTEXBUFFER9 vbuffer_; LPDIRECT3DINDEXBUFFER9 ibuffer_; size_t maxSize_; }; Buffer *D3D9Context::CreateBuffer(size_t size, uint32_t usageFlags) { return new D3D9Buffer(device_, size, usageFlags); } inline void Transpose4x4(float out[16], const float in[16]) { for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { out[i * 4 + j] = in[j * 4 + i]; } } } void D3D9Context::UpdateDynamicUniformBuffer(const void *ub, size_t size) { if (size != curPipeline_->dynamicUniforms.uniformBufferSize) Crash(); for (auto &uniform : curPipeline_->dynamicUniforms.uniforms) { int count = 0; switch (uniform.type) { case UniformType::FLOAT4: count = 1; break; case UniformType::MATRIX4X4: count = 4; break; } const float *srcPtr = (const float *)((const uint8_t *)ub + uniform.offset); if (uniform.vertexReg != -1) { float transp[16]; Transpose4x4(transp, srcPtr); device_->SetVertexShaderConstantF(uniform.vertexReg, transp, count); } if (uniform.fragmentReg != -1) { device_->SetPixelShaderConstantF(uniform.fragmentReg, srcPtr, count); } } } void D3D9Context::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, size_t size, UpdateBufferFlags flags) { D3D9Buffer *buf = (D3D9Buffer *)buffer; if (!size) return; if (offset + size > buf->maxSize_) { ELOG("Can't SubData with bigger size than buffer was created with"); return; } if (buf->vbuffer_) { void *ptr; HRESULT res = buf->vbuffer_->Lock((UINT)offset, (UINT)size, &ptr, (flags & UPDATE_DISCARD) ? D3DLOCK_DISCARD : 0); if (!FAILED(res)) { memcpy(ptr, data, size); buf->vbuffer_->Unlock(); } } else if (buf->ibuffer_) { void *ptr; HRESULT res = buf->ibuffer_->Lock((UINT)offset, (UINT)size, &ptr, (flags & UPDATE_DISCARD) ? D3DLOCK_DISCARD : 0); if (!FAILED(res)) { memcpy(ptr, data, size); buf->ibuffer_->Unlock(); } } } void D3D9Pipeline::Apply(LPDIRECT3DDEVICE9 device) { vshader->Apply(device); pshader->Apply(device); blend->Apply(device); depthStencil->Apply(device); raster->Apply(device); } void D3D9Context::Draw(int vertexCount, int offset) { device_->SetStreamSource(0, curVBuffers_[0]->vbuffer_, curVBufferOffsets_[0], curPipeline_->inputLayout->GetStride(0)); curPipeline_->Apply(device_); curPipeline_->inputLayout->Apply(device_); device_->DrawPrimitive(curPipeline_->prim, offset, vertexCount / 3); } void D3D9Context::DrawIndexed(int vertexCount, int offset) { D3D9Buffer *vbuf = static_cast(curVBuffers_[0]); D3D9Buffer *ibuf = static_cast(curIBuffer_); curPipeline_->Apply(device_); curPipeline_->inputLayout->Apply(device_); device_->SetStreamSource(0, curVBuffers_[0]->vbuffer_, curVBufferOffsets_[0], curPipeline_->inputLayout->GetStride(0)); device_->SetIndices(curIBuffer_->ibuffer_); device_->DrawIndexedPrimitive(curPipeline_->prim, 0, 0, vertexCount, 0, vertexCount / curPipeline_->primDivisor); } void D3D9Context::DrawUP(const void *vdata, int vertexCount) { curPipeline_->Apply(device_); curPipeline_->inputLayout->Apply(device_); device_->DrawPrimitiveUP(curPipeline_->prim, vertexCount / 3, vdata, curPipeline_->inputLayout->GetStride(0)); } static uint32_t SwapRB(uint32_t c) { return (c & 0xFF00FF00) | ((c >> 16) & 0xFF) | ((c << 16) & 0xFF0000); } void D3D9Context::Clear(int mask, uint32_t colorval, float depthVal, int stencilVal) { UINT d3dMask = 0; if (mask & FBChannel::FB_COLOR_BIT) d3dMask |= D3DCLEAR_TARGET; if (mask & FBChannel::FB_DEPTH_BIT) d3dMask |= D3DCLEAR_ZBUFFER; if (mask & FBChannel::FB_STENCIL_BIT) d3dMask |= D3DCLEAR_STENCIL; device_->Clear(0, NULL, d3dMask, (D3DCOLOR)SwapRB(colorval), depthVal, stencilVal); } void D3D9Context::SetScissorRect(int left, int top, int width, int height) { using namespace DX9; dxstate.scissorRect.set(left, top, left + width, top + height); } void D3D9Context::SetViewports(int count, Viewport *viewports) { using namespace DX9; int x = (int)viewports[0].TopLeftX; int y = (int)viewports[0].TopLeftY; int w = (int)viewports[0].Width; int h = (int)viewports[0].Height; dxstate.viewport.set(x, y, w, h, viewports[0].MinDepth, viewports[0].MaxDepth); } void D3D9Context::SetBlendFactor(float color[4]) { uint32_t r = (uint32_t)(color[0] * 255.0f); uint32_t g = (uint32_t)(color[1] * 255.0f); uint32_t b = (uint32_t)(color[2] * 255.0f); uint32_t a = (uint32_t)(color[3] * 255.0f); device_->SetRenderState(D3DRS_BLENDFACTOR, r | (g << 8) | (b << 16) | (a << 24)); } bool D3D9ShaderModule::Compile(LPDIRECT3DDEVICE9 device, const uint8_t *data, size_t size) { LPD3DXMACRO defines = nullptr; LPD3DXINCLUDE includes = nullptr; DWORD flags = 0; LPD3DXBUFFER codeBuffer = nullptr; LPD3DXBUFFER errorBuffer = nullptr; const char *source = (const char *)data; const char *profile = stage_ == ShaderStage::FRAGMENT ? "ps_2_0" : "vs_2_0"; HRESULT hr = dyn_D3DXCompileShader(source, (UINT)strlen(source), defines, includes, "main", profile, flags, &codeBuffer, &errorBuffer, nullptr); if (FAILED(hr)) { const char *error = errorBuffer ? (const char *)errorBuffer->GetBufferPointer() : "(no errorbuffer returned)"; if (hr == ERROR_MOD_NOT_FOUND) { // No D3D9-compatible shader compiler installed. error = "D3D9 shader compiler not installed"; } OutputDebugStringA(source); OutputDebugStringA(error); if (errorBuffer) errorBuffer->Release(); if (codeBuffer) codeBuffer->Release(); return false; } bool success = false; if (stage_ == ShaderStage::FRAGMENT) { HRESULT result = device->CreatePixelShader((DWORD *)codeBuffer->GetBufferPointer(), &pshader_); success = SUCCEEDED(result); } else { HRESULT result = device->CreateVertexShader((DWORD *)codeBuffer->GetBufferPointer(), &vshader_); success = SUCCEEDED(result); } codeBuffer->Release(); return true; } class D3D9Framebuffer : public Framebuffer { public: ~D3D9Framebuffer(); uint32_t id; LPDIRECT3DSURFACE9 surf; LPDIRECT3DSURFACE9 depthstencil; LPDIRECT3DTEXTURE9 tex; LPDIRECT3DTEXTURE9 depthstenciltex; int width; int height; FBColorDepth colorDepth; }; Framebuffer *D3D9Context::CreateFramebuffer(const FramebufferDesc &desc) { static uint32_t id = 0; D3D9Framebuffer *fbo = new D3D9Framebuffer{}; fbo->width = desc.width; fbo->height = desc.height; fbo->colorDepth = desc.colorDepth; fbo->depthstenciltex = nullptr; HRESULT rtResult = device_->CreateTexture(fbo->width, fbo->height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &fbo->tex, NULL); if (FAILED(rtResult)) { ELOG("Failed to create render target"); delete fbo; return NULL; } fbo->tex->GetSurfaceLevel(0, &fbo->surf); HRESULT dsResult; if (supportsINTZ) { dsResult = device_->CreateTexture(fbo->width, fbo->height, 1, D3DUSAGE_DEPTHSTENCIL, FOURCC_INTZ, D3DPOOL_DEFAULT, &fbo->depthstenciltex, NULL); if (SUCCEEDED(dsResult)) { dsResult = fbo->depthstenciltex->GetSurfaceLevel(0, &fbo->depthstencil); } } else { dsResult = device_->CreateDepthStencilSurface(fbo->width, fbo->height, D3DFMT_D24S8, D3DMULTISAMPLE_NONE, 0, FALSE, &fbo->depthstencil, NULL); } if (FAILED(dsResult)) { ELOG("Failed to create depth buffer"); fbo->surf->Release(); fbo->tex->Release(); if (fbo->depthstenciltex) { fbo->depthstenciltex->Release(); } delete fbo; return NULL; } fbo->id = id++; return fbo; } D3D9Framebuffer::~D3D9Framebuffer() { tex->Release(); surf->Release(); depthstencil->Release(); if (depthstenciltex) { depthstenciltex->Release(); } } void D3D9Context::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp) { using namespace DX9; if (fbo) { D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; device_->SetRenderTarget(0, fb->surf); device_->SetDepthStencilSurface(fb->depthstencil); } else { device_->SetRenderTarget(0, deviceRTsurf); device_->SetDepthStencilSurface(deviceDSsurf); } int clearFlags = 0; if (rp.color == RPAction::CLEAR) { clearFlags |= D3DCLEAR_TARGET; } if (rp.depth == RPAction::CLEAR) { clearFlags |= D3DCLEAR_ZBUFFER; } if (rp.stencil == RPAction::CLEAR) { clearFlags |= D3DCLEAR_STENCIL; } if (clearFlags) { dxstate.scissorTest.force(false); device_->Clear(0, nullptr, clearFlags, (D3DCOLOR)SwapRB(rp.clearColor), rp.clearDepth, rp.clearStencil); dxstate.scissorRect.restore(); } dxstate.scissorRect.restore(); dxstate.viewport.restore(); } uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBits, int attachment) { D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; if (channelBits & FB_SURFACE_BIT) { switch (channelBits & 7) { case FB_DEPTH_BIT: return (uintptr_t)fb->depthstencil; case FB_STENCIL_BIT: return (uintptr_t)fb->depthstencil; case FB_COLOR_BIT: default: return (uintptr_t)fb->surf; } } else { switch (channelBits & 7) { case FB_DEPTH_BIT: return (uintptr_t)fb->depthstenciltex; case FB_STENCIL_BIT: return 0; // Can't texture from stencil case FB_COLOR_BIT: default: return (uintptr_t)fb->tex; } } } LPDIRECT3DSURFACE9 fbo_get_color_for_read(D3D9Framebuffer *fbo) { return fbo->surf; } void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int color) { D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; switch (channelBit) { case FB_DEPTH_BIT: if (fb->depthstenciltex) { device_->SetTexture(binding, fb->depthstenciltex); } break; case FB_COLOR_BIT: default: if (fb->tex) { device_->SetTexture(binding, fb->tex); } break; } } void D3D9Context::GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) { D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; if (fb) { *w = fb->width; *h = fb->height; } else { *w = targetWidth_; *h = targetHeight_; } } bool D3D9Context::BlitFramebuffer(Framebuffer *srcfb, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dstfb, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter) { D3D9Framebuffer *src = (D3D9Framebuffer *)srcfb; D3D9Framebuffer *dst = (D3D9Framebuffer *)dstfb; if (channelBits != FB_COLOR_BIT) return false; RECT srcRect{ (LONG)srcX1, (LONG)srcY1, (LONG)srcX2, (LONG)srcY2 }; RECT dstRect{ (LONG)dstX1, (LONG)dstY1, (LONG)dstX2, (LONG)dstY2 }; LPDIRECT3DSURFACE9 srcSurf = src ? src->surf : deviceRTsurf; LPDIRECT3DSURFACE9 dstSurf = dst ? dst->surf : deviceRTsurf; return SUCCEEDED(device_->StretchRect(srcSurf, &srcRect, dstSurf, &dstRect, filter == FB_BLIT_LINEAR ? D3DTEXF_LINEAR : D3DTEXF_POINT)); } void D3D9Context::HandleEvent(Event ev, int width, int height, void *param1, void *param2) { switch (ev) { case Event::LOST_BACKBUFFER: if (deviceRTsurf) deviceRTsurf->Release(); if (deviceDSsurf) deviceDSsurf->Release(); deviceRTsurf = nullptr; deviceDSsurf = nullptr; break; case Event::GOT_BACKBUFFER: device_->GetRenderTarget(0, &deviceRTsurf); device_->GetDepthStencilSurface(&deviceDSsurf); break; } } DrawContext *T3DCreateDX9Context(IDirect3D9 *d3d, IDirect3D9Ex *d3dEx, int adapterId, IDirect3DDevice9 *device, IDirect3DDevice9Ex *deviceEx) { int d3dx_ver = LoadD3DX9Dynamic(); if (!d3dx_ver) { ELOG("Failed to load D3DX9!"); return NULL; } return new D3D9Context(d3d, d3dEx, adapterId, device, deviceEx); } // Only partial implementation! uint32_t D3D9Context::GetDataFormatSupport(DataFormat fmt) const { switch (fmt) { case DataFormat::B8G8R8A8_UNORM: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_AUTOGEN_MIPS; case DataFormat::R4G4B4A4_UNORM_PACK16: return 0; case DataFormat::B4G4R4A4_UNORM_PACK16: return FMT_TEXTURE; // emulated support case DataFormat::R5G6B5_UNORM_PACK16: case DataFormat::A1R5G5B5_UNORM_PACK16: case DataFormat::A4R4G4B4_UNORM_PACK16: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_AUTOGEN_MIPS; // native support case DataFormat::R8G8B8A8_UNORM: return FMT_RENDERTARGET | FMT_TEXTURE | FMT_INPUTLAYOUT | FMT_AUTOGEN_MIPS; case DataFormat::R32_FLOAT: case DataFormat::R32G32_FLOAT: case DataFormat::R32G32B32_FLOAT: case DataFormat::R32G32B32A32_FLOAT: return FMT_INPUTLAYOUT; case DataFormat::R8_UNORM: return 0; case DataFormat::BC1_RGBA_UNORM_BLOCK: case DataFormat::BC2_UNORM_BLOCK: case DataFormat::BC3_UNORM_BLOCK: return FMT_TEXTURE; default: return 0; } } } // namespace Draw