/*=============================================================================
	D3D9.cpp: Unreal Direct3D9 support implementation for Windows.
	Copyright 1999 Epic Games, Inc. All Rights Reserved.

	OpenGL renderer by Daniel Vogel <vogel@lokigames.com>
	Loki Software, Inc.

	Other URenderDevice subclasses include:
	* USoftwareRenderDevice: Software renderer.
	* UGlideRenderDevice: 3dfx Glide renderer.
	* UDirect3DRenderDevice: Direct3D renderer.
	* UD3D9RenderDevice: Direct3D9 renderer.

	Revision history:
	* Created by Daniel Vogel based on XMesaGLDrv
	* Changes (John Fulmer, Jeroen Janssen)
	* Major changes (Daniel Vogel)
	* Ported back to Win32 (Fredrik Gustafsson)
	* Unification and addition of vertex arrays (Daniel Vogel)
	* Actor triangle caching (Steve Sinclair)
	* One pass fogging (Daniel Vogel)
	* Windows gamma support (Daniel Vogel)
	* 2X blending support (Daniel Vogel)
	* Better detail texture handling (Daniel Vogel)
	* Scaleability (Daniel Vogel)
	* Texture LOD bias (Daniel Vogel)
	* RefreshRate support on Windows (Jason Dick)
	* Finer control over gamma (Daniel Vogel)
	* (NOT ALWAYS) Fixed Windows bitdepth switching (Daniel Vogel)

	* Various modifications and additions by Chris Dohnal
	* Initial TruForm based on TruForm renderer modifications by NitroGL
	* Additional TruForm and Deus Ex updates by Leonhard Gruenschloss


	UseTrilinear	whether to use trilinear filtering
	UseAlphaPalette	set to 0 for buggy drivers (GeForce)
	UseS3TC			whether to use compressed textures
	MaxAnisotropy	maximum level of anisotropy used
	MaxTMUnits		maximum number of TMUs UT will try to use
	LODBias			texture lod bias
	RefreshRate		requested refresh rate (Windows only)
	GammaOffset		offset for the gamma correction


TODO:
	- DOCUMENTATION!!! (especially all subtle assumptions)

=============================================================================*/

#include "D3D9Drv.h"
#include "D3D9.h"


/*-----------------------------------------------------------------------------
	Globals.
-----------------------------------------------------------------------------*/

#ifdef UTD3D9R_USE_DEBUG_D3D9_DLL
static const char *g_d3d9DllName = "d3d9d.dll";
#else
static const char *g_d3d9DllName = "d3d9.dll";
#endif

#ifdef UTGLR_UNREAL_BUILD
const DWORD GUglyHackFlags = 0;
#endif

static const TCHAR *g_pSection = TEXT("D3D9Drv.D3D9RenderDevice");


/*-----------------------------------------------------------------------------
	Vertex programs.
-----------------------------------------------------------------------------*/

//Vertex shader definitions
///////////////////////////

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"
	"dcl_texcoord0 v7\n"

	"mov oT0, v7\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpDefaultRenderingState[] = {
	0xFFFE0101, 0x003CFFFE, 0x47554244, 0x00000028, 0x000000B8, 0x00000058, 0x00000000, 0x00000000,
	0x00000006, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000000F8, 0xFFFF0003,
	0x00000104, 0xFFFF0004, 0x00000110, 0xFFFF0005, 0x0000011C, 0xFFFF0006, 0x00000128, 0xFFFF0007,
	0x00000134, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63,
	0x726F6C6F, 0x0A357620, 0x5F6C6364, 0x63786574, 0x64726F6F, 0x37762030, 0x766F6D0A, 0x30546F20,
	0x3776202C, 0x766F6D0A, 0x30446F20, 0x3576202C, 0x78346D0A, 0x506F2034, 0x202C736F, 0x202C3076,
	0x000A3063, 0x7263694D, 0x666F736F, 0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x41207265,
	0x6D657373, 0x72656C62, 0x302E3520, 0x30302E34, 0x3039322E, 0xABAB0034, 0x0000001F, 0x80000000,
	0x900F0000, 0x0000001F, 0x8000000A, 0x900F0005, 0x0000001F, 0x80000005, 0x900F0007, 0x00000001,
	0xE00F0000, 0x90E40007, 0x00000001, 0xD00F0000, 0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000,
	0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"
	"dcl_color1 v6\n"
	"dcl_texcoord0 v7\n"

	"mov oT0, v7\n"

	"mov oD0, v5\n"
	"mov oD1, v6\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpDefaultRenderingStateWithFog[] = {
	0xFFFE0101, 0x0046FFFE, 0x47554244, 0x00000028, 0x000000E2, 0x00000068, 0x00000000, 0x00000000,
	0x00000008, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000120, 0xFFFF0003,
	0x0000012C, 0xFFFF0004, 0x00000138, 0xFFFF0005, 0x00000144, 0xFFFF0006, 0x00000150, 0xFFFF0007,
	0x0000015C, 0xFFFF0008, 0x00000168, 0xFFFF0009, 0x00000174, 0x312E7376, 0x640A312E, 0x705F6C63,
	0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x5F6C6364, 0x6F6C6F63,
	0x76203172, 0x63640A36, 0x65745F6C, 0x6F6F6378, 0x20306472, 0x6D0A3776, 0x6F20766F, 0x202C3054,
	0x6D0A3776, 0x6F20766F, 0x202C3044, 0x6D0A3576, 0x6F20766F, 0x202C3144, 0x6D0A3676, 0x20347834,
	0x736F506F, 0x3076202C, 0x3063202C, 0x694D000A, 0x736F7263, 0x2074666F, 0x20295228, 0x58443344,
	0x68532039, 0x72656461, 0x73734120, 0x6C626D65, 0x35207265, 0x2E34302E, 0x322E3030, 0x00343039,
	0x0000001F, 0x80000000, 0x900F0000, 0x0000001F, 0x8000000A, 0x900F0005, 0x0000001F, 0x8001000A,
	0x900F0006, 0x0000001F, 0x80000005, 0x900F0007, 0x00000001, 0xE00F0000, 0x90E40007, 0x00000001,
	0xD00F0000, 0x90E40005, 0x00000001, 0xD00F0001, 0x90E40006, 0x00000014, 0xC00F0000, 0x90E40000,
	0xA0E40000, 0x0000FFFF
};

#ifdef UTGLR_RUNE_BUILD
#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"
	"dcl_texcoord0 v7\n"

	"mov oT0, v7\n"

	"mov oD0, v5\n"

	"mov oFog, v0.z\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpDefaultRenderingStateWithLinearFog[] = {
	0xFFFE0101, 0x0042FFFE, 0x47554244, 0x00000028, 0x000000CF, 0x00000060, 0x00000000, 0x00000000,
	0x00000007, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000110, 0xFFFF0003,
	0x0000011C, 0xFFFF0004, 0x00000128, 0xFFFF0005, 0x00000134, 0xFFFF0006, 0x00000140, 0xFFFF0007,
	0x0000014C, 0xFFFF0008, 0x00000158, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69,
	0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x5F6C6364, 0x63786574, 0x64726F6F, 0x37762030,
	0x766F6D0A, 0x30546F20, 0x3776202C, 0x766F6D0A, 0x30446F20, 0x3576202C, 0x766F6D0A, 0x6F466F20,
	0x76202C67, 0x0A7A2E30, 0x3478346D, 0x6F506F20, 0x76202C73, 0x63202C30, 0x4D000A30, 0x6F726369,
	0x74666F73, 0x29522820, 0x44334420, 0x53203958, 0x65646168, 0x73412072, 0x626D6573, 0x2072656C,
	0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00, 0x0000001F, 0x80000000, 0x900F0000, 0x0000001F,
	0x8000000A, 0x900F0005, 0x0000001F, 0x80000005, 0x900F0007, 0x00000001, 0xE00F0000, 0x90E40007,
	0x00000001, 0xD00F0000, 0x90E40005, 0x00000001, 0xC00F0001, 0x90AA0000, 0x00000014, 0xC00F0000,
	0x90E40000, 0xA0E40000, 0x0000FFFF
};
#endif

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceSingleTexture[] = {
	0xFFFE0101, 0x005DFFFE, 0x47554244, 0x00000028, 0x0000013B, 0x00000078, 0x00000000, 0x00000000,
	0x0000000A, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x0000017C, 0xFFFF0003,
	0x00000188, 0xFFFF0004, 0x00000194, 0xFFFF0005, 0x000001A4, 0xFFFF0006, 0x000001B4, 0xFFFF0007,
	0x000001C4, 0xFFFF0008, 0x000001D4, 0xFFFF0009, 0x000001E4, 0xFFFF000A, 0x000001F4, 0xFFFF000B,
	0x00000200, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63,
	0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072, 0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220,
	0x76202C79, 0x63202C30, 0x64610A35, 0x30722064, 0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34,
	0x20646461, 0x792E3072, 0x3072202C, 0x202C792E, 0x2E35632D, 0x64610A77, 0x31722064, 0x2C79782E,
	0x2E307220, 0x202C7978, 0x2E36632D, 0x6D0A7978, 0x6F206C75, 0x782E3054, 0x72202C79, 0x79782E31,
	0x3663202C, 0x0A777A2E, 0x20766F6D, 0x2C30446F, 0x0A357620, 0x3478346D, 0x6F506F20, 0x76202C73,
	0x63202C30, 0x4D000A30, 0x6F726369, 0x74666F73, 0x29522820, 0x44334420, 0x53203958, 0x65646168,
	0x73412072, 0x626D6573, 0x2072656C, 0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00, 0x0000001F,
	0x80000000, 0x900F0000, 0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000,
	0xA0E40004, 0x00000008, 0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000,
	0xA1FF0004, 0x00000002, 0x80020000, 0x80550000, 0xA1FF0005, 0x00000002, 0x80030001, 0x80540000,
	0xA1540006, 0x00000005, 0xE0030000, 0x80540001, 0xA0FE0006, 0x00000001, 0xD00F0000, 0x90E40005,
	0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceDualTexture[] = {
	0xFFFE0101, 0x006DFFFE, 0x47554244, 0x00000028, 0x0000017D, 0x00000088, 0x00000000, 0x00000000,
	0x0000000C, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001BC, 0xFFFF0003,
	0x000001C8, 0xFFFF0004, 0x000001D4, 0xFFFF0005, 0x000001E4, 0xFFFF0006, 0x000001F4, 0xFFFF0007,
	0x00000204, 0xFFFF0008, 0x00000214, 0xFFFF0009, 0x00000224, 0xFFFF000A, 0x00000234, 0xFFFF000B,
	0x00000244, 0xFFFF000C, 0x00000254, 0xFFFF000D, 0x00000260, 0x312E7376, 0x640A312E, 0x705F6C63,
	0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072,
	0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30, 0x64610A35, 0x30722064,
	0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072, 0x3072202C, 0x202C792E,
	0x2E35632D, 0x64610A77, 0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E36632D, 0x6D0A7978,
	0x6F206C75, 0x782E3054, 0x72202C79, 0x79782E31, 0x3663202C, 0x0A777A2E, 0x20646461, 0x782E3172,
	0x72202C79, 0x79782E30, 0x632D202C, 0x79782E37, 0x6C756D0A, 0x31546F20, 0x2C79782E, 0x2E317220,
	0x202C7978, 0x7A2E3763, 0x6F6D0A77, 0x446F2076, 0x76202C30, 0x346D0A35, 0x6F203478, 0x2C736F50,
	0x2C307620, 0x0A306320, 0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320,
	0x20726564, 0x65737341, 0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0000001F,
	0x80000000, 0x900F0000, 0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000,
	0xA0E40004, 0x00000008, 0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000,
	0xA1FF0004, 0x00000002, 0x80020000, 0x80550000, 0xA1FF0005, 0x00000002, 0x80030001, 0x80540000,
	0xA1540006, 0x00000005, 0xE0030000, 0x80540001, 0xA0FE0006, 0x00000002, 0x80030001, 0x80540000,
	0xA1540007, 0x00000005, 0xE0030001, 0x80540001, 0xA0FE0007, 0x00000001, 0xD00F0000, 0x90E40005,
	0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"add r1.xy, r0.xy, -c8.xy\n"
	"mul oT2.xy, r1.xy, c8.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceTripleTexture[] = {
	0xFFFE0101, 0x007EFFFE, 0x47554244, 0x00000028, 0x000001BF, 0x00000098, 0x00000000, 0x00000000,
	0x0000000E, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000200, 0xFFFF0003,
	0x0000020C, 0xFFFF0004, 0x00000218, 0xFFFF0005, 0x00000228, 0xFFFF0006, 0x00000238, 0xFFFF0007,
	0x00000248, 0xFFFF0008, 0x00000258, 0xFFFF0009, 0x00000268, 0xFFFF000A, 0x00000278, 0xFFFF000B,
	0x00000288, 0xFFFF000C, 0x00000298, 0xFFFF000D, 0x000002A8, 0xFFFF000E, 0x000002B8, 0xFFFF000F,
	0x000002C4, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63,
	0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072, 0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220,
	0x76202C79, 0x63202C30, 0x64610A35, 0x30722064, 0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34,
	0x20646461, 0x792E3072, 0x3072202C, 0x202C792E, 0x2E35632D, 0x64610A77, 0x31722064, 0x2C79782E,
	0x2E307220, 0x202C7978, 0x2E36632D, 0x6D0A7978, 0x6F206C75, 0x782E3054, 0x72202C79, 0x79782E31,
	0x3663202C, 0x0A777A2E, 0x20646461, 0x782E3172, 0x72202C79, 0x79782E30, 0x632D202C, 0x79782E37,
	0x6C756D0A, 0x31546F20, 0x2C79782E, 0x2E317220, 0x202C7978, 0x7A2E3763, 0x64610A77, 0x31722064,
	0x2C79782E, 0x2E307220, 0x202C7978, 0x2E38632D, 0x6D0A7978, 0x6F206C75, 0x782E3254, 0x72202C79,
	0x79782E31, 0x3863202C, 0x0A777A2E, 0x20766F6D, 0x2C30446F, 0x0A357620, 0x3478346D, 0x6F506F20,
	0x76202C73, 0x63202C30, 0x4D000A30, 0x6F726369, 0x74666F73, 0x29522820, 0x44334420, 0x53203958,
	0x65646168, 0x73412072, 0x626D6573, 0x2072656C, 0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00,
	0x0000001F, 0x80000000, 0x900F0000, 0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000,
	0x90E40000, 0xA0E40004, 0x00000008, 0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000,
	0x80000000, 0xA1FF0004, 0x00000002, 0x80020000, 0x80550000, 0xA1FF0005, 0x00000002, 0x80030001,
	0x80540000, 0xA1540006, 0x00000005, 0xE0030000, 0x80540001, 0xA0FE0006, 0x00000002, 0x80030001,
	0x80540000, 0xA1540007, 0x00000005, 0xE0030001, 0x80540001, 0xA0FE0007, 0x00000002, 0x80030001,
	0x80540000, 0xA1540008, 0x00000005, 0xE0030002, 0x80540001, 0xA0FE0008, 0x00000001, 0xD00F0000,
	0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"add r1.xy, r0.xy, -c8.xy\n"
	"mul oT2.xy, r1.xy, c8.zw\n"

	"add r1.xy, r0.xy, -c9.xy\n"
	"mul oT3.xy, r1.xy, c9.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceQuadTexture[] = {
	0xFFFE0101, 0x008EFFFE, 0x47554244, 0x00000028, 0x00000201, 0x000000A8, 0x00000000, 0x00000000,
	0x00000010, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000240, 0xFFFF0003,
	0x0000024C, 0xFFFF0004, 0x00000258, 0xFFFF0005, 0x00000268, 0xFFFF0006, 0x00000278, 0xFFFF0007,
	0x00000288, 0xFFFF0008, 0x00000298, 0xFFFF0009, 0x000002A8, 0xFFFF000A, 0x000002B8, 0xFFFF000B,
	0x000002C8, 0xFFFF000C, 0x000002D8, 0xFFFF000D, 0x000002E8, 0xFFFF000E, 0x000002F8, 0xFFFF000F,
	0x00000308, 0xFFFF0010, 0x00000318, 0xFFFF0011, 0x00000324, 0x312E7376, 0x640A312E, 0x705F6C63,
	0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072,
	0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30, 0x64610A35, 0x30722064,
	0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072, 0x3072202C, 0x202C792E,
	0x2E35632D, 0x64610A77, 0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E36632D, 0x6D0A7978,
	0x6F206C75, 0x782E3054, 0x72202C79, 0x79782E31, 0x3663202C, 0x0A777A2E, 0x20646461, 0x782E3172,
	0x72202C79, 0x79782E30, 0x632D202C, 0x79782E37, 0x6C756D0A, 0x31546F20, 0x2C79782E, 0x2E317220,
	0x202C7978, 0x7A2E3763, 0x64610A77, 0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E38632D,
	0x6D0A7978, 0x6F206C75, 0x782E3254, 0x72202C79, 0x79782E31, 0x3863202C, 0x0A777A2E, 0x20646461,
	0x782E3172, 0x72202C79, 0x79782E30, 0x632D202C, 0x79782E39, 0x6C756D0A, 0x33546F20, 0x2C79782E,
	0x2E317220, 0x202C7978, 0x7A2E3963, 0x6F6D0A77, 0x446F2076, 0x76202C30, 0x346D0A35, 0x6F203478,
	0x2C736F50, 0x2C307620, 0x0A306320, 0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433,
	0x61685320, 0x20726564, 0x65737341, 0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430,
	0x0000001F, 0x80000000, 0x900F0000, 0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000,
	0x90E40000, 0xA0E40004, 0x00000008, 0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000,
	0x80000000, 0xA1FF0004, 0x00000002, 0x80020000, 0x80550000, 0xA1FF0005, 0x00000002, 0x80030001,
	0x80540000, 0xA1540006, 0x00000005, 0xE0030000, 0x80540001, 0xA0FE0006, 0x00000002, 0x80030001,
	0x80540000, 0xA1540007, 0x00000005, 0xE0030001, 0x80540001, 0xA0FE0007, 0x00000002, 0x80030001,
	0x80540000, 0xA1540008, 0x00000005, 0xE0030002, 0x80540001, 0xA0FE0008, 0x00000002, 0x80030001,
	0x80540000, 0xA1540009, 0x00000005, 0xE0030003, 0x80540001, 0xA0FE0009, 0x00000001, 0xD00F0000,
	0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"mul oT0.x, v0.z, c6.x\n"
	"mov oT0.y, c6.y\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceDetailAlpha[] = {
	0xFFFE0101, 0x006AFFFE, 0x47554244, 0x00000028, 0x00000171, 0x00000088, 0x00000000, 0x00000000,
	0x0000000C, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001B0, 0xFFFF0003,
	0x000001BC, 0xFFFF0004, 0x000001C8, 0xFFFF0005, 0x000001D8, 0xFFFF0006, 0x000001E8, 0xFFFF0007,
	0x000001F8, 0xFFFF0008, 0x00000208, 0xFFFF0009, 0x00000218, 0xFFFF000A, 0x00000224, 0xFFFF000B,
	0x00000234, 0xFFFF000C, 0x00000244, 0xFFFF000D, 0x00000250, 0x312E7376, 0x640A312E, 0x705F6C63,
	0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072,
	0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30, 0x64610A35, 0x30722064,
	0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072, 0x3072202C, 0x202C792E,
	0x2E35632D, 0x756D0A77, 0x546F206C, 0x2C782E30, 0x2E307620, 0x63202C7A, 0x0A782E36, 0x20766F6D,
	0x2E30546F, 0x63202C79, 0x0A792E36, 0x20646461, 0x782E3172, 0x72202C79, 0x79782E30, 0x632D202C,
	0x79782E37, 0x6C756D0A, 0x31546F20, 0x2C79782E, 0x2E317220, 0x202C7978, 0x7A2E3763, 0x6F6D0A77,
	0x446F2076, 0x76202C30, 0x346D0A35, 0x6F203478, 0x2C736F50, 0x2C307620, 0x0A306320, 0x63694D00,
	0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320, 0x20726564, 0x65737341, 0x656C626D,
	0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0000001F, 0x80000000, 0x900F0000, 0x0000001F,
	0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000, 0xA0E40004, 0x00000008, 0x80020000,
	0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000, 0xA1FF0004, 0x00000002, 0x80020000,
	0x80550000, 0xA1FF0005, 0x00000005, 0xE0010000, 0x90AA0000, 0xA0000006, 0x00000001, 0xE0020000,
	0xA0550006, 0x00000002, 0x80030001, 0x80540000, 0xA1540007, 0x00000005, 0xE0030001, 0x80540001,
	0xA0FE0007, 0x00000001, 0xD00F0000, 0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000,
	0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"mul oT0.x, v0.z, c6.x\n"
	"mov oT0.y, c6.y\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"add r1.xy, r0.xy, -c8.xy\n"
	"mul oT2.xy, r1.xy, c8.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceSingleTextureAndDetailTexture[] = {
	0xFFFE0101, 0x007BFFFE, 0x47554244, 0x00000028, 0x000001B3, 0x00000098, 0x00000000, 0x00000000,
	0x0000000E, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001F4, 0xFFFF0003,
	0x00000200, 0xFFFF0004, 0x0000020C, 0xFFFF0005, 0x0000021C, 0xFFFF0006, 0x0000022C, 0xFFFF0007,
	0x0000023C, 0xFFFF0008, 0x0000024C, 0xFFFF0009, 0x0000025C, 0xFFFF000A, 0x00000268, 0xFFFF000B,
	0x00000278, 0xFFFF000C, 0x00000288, 0xFFFF000D, 0x00000298, 0xFFFF000E, 0x000002A8, 0xFFFF000F,
	0x000002B4, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63,
	0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072, 0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220,
	0x76202C79, 0x63202C30, 0x64610A35, 0x30722064, 0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34,
	0x20646461, 0x792E3072, 0x3072202C, 0x202C792E, 0x2E35632D, 0x756D0A77, 0x546F206C, 0x2C782E30,
	0x2E307620, 0x63202C7A, 0x0A782E36, 0x20766F6D, 0x2E30546F, 0x63202C79, 0x0A792E36, 0x20646461,
	0x782E3172, 0x72202C79, 0x79782E30, 0x632D202C, 0x79782E37, 0x6C756D0A, 0x31546F20, 0x2C79782E,
	0x2E317220, 0x202C7978, 0x7A2E3763, 0x64610A77, 0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978,
	0x2E38632D, 0x6D0A7978, 0x6F206C75, 0x782E3254, 0x72202C79, 0x79782E31, 0x3863202C, 0x0A777A2E,
	0x20766F6D, 0x2C30446F, 0x0A357620, 0x3478346D, 0x6F506F20, 0x76202C73, 0x63202C30, 0x4D000A30,
	0x6F726369, 0x74666F73, 0x29522820, 0x44334420, 0x53203958, 0x65646168, 0x73412072, 0x626D6573,
	0x2072656C, 0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00, 0x0000001F, 0x80000000, 0x900F0000,
	0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000, 0xA0E40004, 0x00000008,
	0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000, 0xA1FF0004, 0x00000002,
	0x80020000, 0x80550000, 0xA1FF0005, 0x00000005, 0xE0010000, 0x90AA0000, 0xA0000006, 0x00000001,
	0xE0020000, 0xA0550006, 0x00000002, 0x80030001, 0x80540000, 0xA1540007, 0x00000005, 0xE0030001,
	0x80540001, 0xA0FE0007, 0x00000002, 0x80030001, 0x80540000, 0xA1540008, 0x00000005, 0xE0030002,
	0x80540001, 0xA0FE0008, 0x00000001, 0xD00F0000, 0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000,
	0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"mul oT0.x, v0.z, c6.x\n"
	"mov oT0.y, c6.y\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"add r1.xy, r0.xy, -c8.xy\n"
	"mul oT2.xy, r1.xy, c8.zw\n"

	"add r1.xy, r0.xy, -c9.xy\n"
	"mul oT3.xy, r1.xy, c9.zw\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceDualTextureAndDetailTexture[] = {
	0xFFFE0101, 0x008BFFFE, 0x47554244, 0x00000028, 0x000001F5, 0x000000A8, 0x00000000, 0x00000000,
	0x00000010, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000234, 0xFFFF0003,
	0x00000240, 0xFFFF0004, 0x0000024C, 0xFFFF0005, 0x0000025C, 0xFFFF0006, 0x0000026C, 0xFFFF0007,
	0x0000027C, 0xFFFF0008, 0x0000028C, 0xFFFF0009, 0x0000029C, 0xFFFF000A, 0x000002A8, 0xFFFF000B,
	0x000002B8, 0xFFFF000C, 0x000002C8, 0xFFFF000D, 0x000002D8, 0xFFFF000E, 0x000002E8, 0xFFFF000F,
	0x000002F8, 0xFFFF0010, 0x00000308, 0xFFFF0011, 0x00000314, 0x312E7376, 0x640A312E, 0x705F6C63,
	0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072,
	0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30, 0x64610A35, 0x30722064,
	0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072, 0x3072202C, 0x202C792E,
	0x2E35632D, 0x756D0A77, 0x546F206C, 0x2C782E30, 0x2E307620, 0x63202C7A, 0x0A782E36, 0x20766F6D,
	0x2E30546F, 0x63202C79, 0x0A792E36, 0x20646461, 0x782E3172, 0x72202C79, 0x79782E30, 0x632D202C,
	0x79782E37, 0x6C756D0A, 0x31546F20, 0x2C79782E, 0x2E317220, 0x202C7978, 0x7A2E3763, 0x64610A77,
	0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E38632D, 0x6D0A7978, 0x6F206C75, 0x782E3254,
	0x72202C79, 0x79782E31, 0x3863202C, 0x0A777A2E, 0x20646461, 0x782E3172, 0x72202C79, 0x79782E30,
	0x632D202C, 0x79782E39, 0x6C756D0A, 0x33546F20, 0x2C79782E, 0x2E317220, 0x202C7978, 0x7A2E3963,
	0x6F6D0A77, 0x446F2076, 0x76202C30, 0x346D0A35, 0x6F203478, 0x2C736F50, 0x2C307620, 0x0A306320,
	0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320, 0x20726564, 0x65737341,
	0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0000001F, 0x80000000, 0x900F0000,
	0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000, 0xA0E40004, 0x00000008,
	0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000, 0xA1FF0004, 0x00000002,
	0x80020000, 0x80550000, 0xA1FF0005, 0x00000005, 0xE0010000, 0x90AA0000, 0xA0000006, 0x00000001,
	0xE0020000, 0xA0550006, 0x00000002, 0x80030001, 0x80540000, 0xA1540007, 0x00000005, 0xE0030001,
	0x80540001, 0xA0FE0007, 0x00000002, 0x80030001, 0x80540000, 0xA1540008, 0x00000005, 0xE0030002,
	0x80540001, 0xA0FE0008, 0x00000002, 0x80030001, 0x80540000, 0xA1540009, 0x00000005, 0xE0030003,
	0x80540001, 0xA0FE0009, 0x00000001, 0xD00F0000, 0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000,
	0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"mov oT1, v0\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceSingleTextureWithPos[] = {
	0xFFFE0101, 0x0062FFFE, 0x47554244, 0x00000028, 0x0000014F, 0x00000080, 0x00000000, 0x00000000,
	0x0000000B, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000190, 0xFFFF0003,
	0x0000019C, 0xFFFF0004, 0x000001A8, 0xFFFF0005, 0x000001B8, 0xFFFF0006, 0x000001C8, 0xFFFF0007,
	0x000001D8, 0xFFFF0008, 0x000001E8, 0xFFFF0009, 0x000001F8, 0xFFFF000A, 0x00000208, 0xFFFF000B,
	0x00000214, 0xFFFF000C, 0x00000220, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69,
	0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072, 0x3076202C, 0x3463202C,
	0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30, 0x64610A35, 0x30722064, 0x202C782E, 0x782E3072,
	0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072, 0x3072202C, 0x202C792E, 0x2E35632D, 0x64610A77,
	0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E36632D, 0x6D0A7978, 0x6F206C75, 0x782E3054,
	0x72202C79, 0x79782E31, 0x3663202C, 0x0A777A2E, 0x20766F6D, 0x2C31546F, 0x0A307620, 0x20766F6D,
	0x2C30446F, 0x0A357620, 0x3478346D, 0x6F506F20, 0x76202C73, 0x63202C30, 0x4D000A30, 0x6F726369,
	0x74666F73, 0x29522820, 0x44334420, 0x53203958, 0x65646168, 0x73412072, 0x626D6573, 0x2072656C,
	0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00, 0x0000001F, 0x80000000, 0x900F0000, 0x0000001F,
	0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000, 0xA0E40004, 0x00000008, 0x80020000,
	0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000, 0xA1FF0004, 0x00000002, 0x80020000,
	0x80550000, 0xA1FF0005, 0x00000002, 0x80030001, 0x80540000, 0xA1540006, 0x00000005, 0xE0030000,
	0x80540001, 0xA0FE0006, 0x00000001, 0xE00F0001, 0x90E40000, 0x00000001, 0xD00F0000, 0x90E40005,
	0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"mov oT2, v0\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceDualTextureWithPos[] = {
	0xFFFE0101, 0x0072FFFE, 0x47554244, 0x00000028, 0x00000191, 0x00000090, 0x00000000, 0x00000000,
	0x0000000D, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001D0, 0xFFFF0003,
	0x000001DC, 0xFFFF0004, 0x000001E8, 0xFFFF0005, 0x000001F8, 0xFFFF0006, 0x00000208, 0xFFFF0007,
	0x00000218, 0xFFFF0008, 0x00000228, 0xFFFF0009, 0x00000238, 0xFFFF000A, 0x00000248, 0xFFFF000B,
	0x00000258, 0xFFFF000C, 0x00000268, 0xFFFF000D, 0x00000274, 0xFFFF000E, 0x00000280, 0x312E7376,
	0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69, 0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620,
	0x20337064, 0x782E3072, 0x3076202C, 0x3463202C, 0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30,
	0x64610A35, 0x30722064, 0x202C782E, 0x782E3072, 0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072,
	0x3072202C, 0x202C792E, 0x2E35632D, 0x64610A77, 0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978,
	0x2E36632D, 0x6D0A7978, 0x6F206C75, 0x782E3054, 0x72202C79, 0x79782E31, 0x3663202C, 0x0A777A2E,
	0x20646461, 0x782E3172, 0x72202C79, 0x79782E30, 0x632D202C, 0x79782E37, 0x6C756D0A, 0x31546F20,
	0x2C79782E, 0x2E317220, 0x202C7978, 0x7A2E3763, 0x6F6D0A77, 0x546F2076, 0x76202C32, 0x6F6D0A30,
	0x446F2076, 0x76202C30, 0x346D0A35, 0x6F203478, 0x2C736F50, 0x2C307620, 0x0A306320, 0x63694D00,
	0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320, 0x20726564, 0x65737341, 0x656C626D,
	0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0000001F, 0x80000000, 0x900F0000, 0x0000001F,
	0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000, 0xA0E40004, 0x00000008, 0x80020000,
	0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000, 0xA1FF0004, 0x00000002, 0x80020000,
	0x80550000, 0xA1FF0005, 0x00000002, 0x80030001, 0x80540000, 0xA1540006, 0x00000005, 0xE0030000,
	0x80540001, 0xA0FE0006, 0x00000002, 0x80030001, 0x80540000, 0xA1540007, 0x00000005, 0xE0030001,
	0x80540001, 0xA0FE0007, 0x00000001, 0xE00F0002, 0x90E40000, 0x00000001, 0xD00F0000, 0x90E40005,
	0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"vs.1.1\n"

	"dcl_position v0\n"
	"dcl_color v5\n"

	"dp3 r0.x, v0, c4\n"
	"dp3 r0.y, v0, c5\n"
	"add r0.x, r0.x, -c4.w\n"
	"add r0.y, r0.y, -c5.w\n"

	"add r1.xy, r0.xy, -c6.xy\n"
	"mul oT0.xy, r1.xy, c6.zw\n"

	"add r1.xy, r0.xy, -c7.xy\n"
	"mul oT1.xy, r1.xy, c7.zw\n"

	"add r1.xy, r0.xy, -c8.xy\n"
	"mul oT2.xy, r1.xy, c8.zw\n"

	"mov oT3, v0\n"

	"mov oD0, v5\n"

	"m4x4 oPos, v0, c0\n"
;
#endif
static const DWORD g_vpComplexSurfaceTripleTextureWithPos[] = {
	0xFFFE0101, 0x0083FFFE, 0x47554244, 0x00000028, 0x000001D3, 0x000000A0, 0x00000000, 0x00000000,
	0x0000000F, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000214, 0xFFFF0003,
	0x00000220, 0xFFFF0004, 0x0000022C, 0xFFFF0005, 0x0000023C, 0xFFFF0006, 0x0000024C, 0xFFFF0007,
	0x0000025C, 0xFFFF0008, 0x0000026C, 0xFFFF0009, 0x0000027C, 0xFFFF000A, 0x0000028C, 0xFFFF000B,
	0x0000029C, 0xFFFF000C, 0x000002AC, 0xFFFF000D, 0x000002BC, 0xFFFF000E, 0x000002CC, 0xFFFF000F,
	0x000002D8, 0xFFFF0010, 0x000002E4, 0x312E7376, 0x640A312E, 0x705F6C63, 0x7469736F, 0x206E6F69,
	0x640A3076, 0x635F6C63, 0x726F6C6F, 0x0A357620, 0x20337064, 0x782E3072, 0x3076202C, 0x3463202C,
	0x3370640A, 0x2E307220, 0x76202C79, 0x63202C30, 0x64610A35, 0x30722064, 0x202C782E, 0x782E3072,
	0x632D202C, 0x0A772E34, 0x20646461, 0x792E3072, 0x3072202C, 0x202C792E, 0x2E35632D, 0x64610A77,
	0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E36632D, 0x6D0A7978, 0x6F206C75, 0x782E3054,
	0x72202C79, 0x79782E31, 0x3663202C, 0x0A777A2E, 0x20646461, 0x782E3172, 0x72202C79, 0x79782E30,
	0x632D202C, 0x79782E37, 0x6C756D0A, 0x31546F20, 0x2C79782E, 0x2E317220, 0x202C7978, 0x7A2E3763,
	0x64610A77, 0x31722064, 0x2C79782E, 0x2E307220, 0x202C7978, 0x2E38632D, 0x6D0A7978, 0x6F206C75,
	0x782E3254, 0x72202C79, 0x79782E31, 0x3863202C, 0x0A777A2E, 0x20766F6D, 0x2C33546F, 0x0A307620,
	0x20766F6D, 0x2C30446F, 0x0A357620, 0x3478346D, 0x6F506F20, 0x76202C73, 0x63202C30, 0x4D000A30,
	0x6F726369, 0x74666F73, 0x29522820, 0x44334420, 0x53203958, 0x65646168, 0x73412072, 0x626D6573,
	0x2072656C, 0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00, 0x0000001F, 0x80000000, 0x900F0000,
	0x0000001F, 0x8000000A, 0x900F0005, 0x00000008, 0x80010000, 0x90E40000, 0xA0E40004, 0x00000008,
	0x80020000, 0x90E40000, 0xA0E40005, 0x00000002, 0x80010000, 0x80000000, 0xA1FF0004, 0x00000002,
	0x80020000, 0x80550000, 0xA1FF0005, 0x00000002, 0x80030001, 0x80540000, 0xA1540006, 0x00000005,
	0xE0030000, 0x80540001, 0xA0FE0006, 0x00000002, 0x80030001, 0x80540000, 0xA1540007, 0x00000005,
	0xE0030001, 0x80540001, 0xA0FE0007, 0x00000002, 0x80030001, 0x80540000, 0xA1540008, 0x00000005,
	0xE0030002, 0x80540001, 0xA0FE0008, 0x00000001, 0xE00F0003, 0x90E40000, 0x00000001, 0xD00F0000,
	0x90E40005, 0x00000014, 0xC00F0000, 0x90E40000, 0xA0E40000, 0x0000FFFF
};


//Stream definitions
////////////////////

static const D3DVERTEXELEMENT9 g_oneColorStreamDef[] = {
	{ 0, 0,  D3DDECLTYPE_FLOAT3,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,	0 },
	{ 0, 12, D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		0 },
	D3DDECL_END()
};

static const D3DVERTEXELEMENT9 g_standardSingleTextureStreamDef[] = {
	{ 0, 0,  D3DDECLTYPE_FLOAT3,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,	0 },
	{ 0, 12, D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		0 },
	{ 2, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	0 },
	D3DDECL_END()
};

static const D3DVERTEXELEMENT9 g_standardDoubleTextureStreamDef[] = {
	{ 0, 0,  D3DDECLTYPE_FLOAT3,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,	0 },
	{ 0, 12, D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		0 },
	{ 2, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	0 },
	{ 3, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	1 },
	D3DDECL_END()
};

static const D3DVERTEXELEMENT9 g_standardTripleTextureStreamDef[] = {
	{ 0, 0,  D3DDECLTYPE_FLOAT3,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,	0 },
	{ 0, 12, D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		0 },
	{ 2, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	0 },
	{ 3, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	1 },
	{ 4, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	2 },
	D3DDECL_END()
};

static const D3DVERTEXELEMENT9 g_standardQuadTextureStreamDef[] = {
	{ 0, 0,  D3DDECLTYPE_FLOAT3,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,	0 },
	{ 0, 12, D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		0 },
	{ 2, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	0 },
	{ 3, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	1 },
	{ 4, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	2 },
	{ 5, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	3 },
	D3DDECL_END()
};

static const D3DVERTEXELEMENT9 *g_standardNTextureStreamDefs[MAX_TMUNITS] = {
	g_standardSingleTextureStreamDef,
	g_standardDoubleTextureStreamDef,
	g_standardTripleTextureStreamDef,
	g_standardQuadTextureStreamDef
};

static const D3DVERTEXELEMENT9 g_twoColorSingleTextureStreamDef[] = {
	{ 0, 0,  D3DDECLTYPE_FLOAT3,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION,	0 },
	{ 0, 12, D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		0 },
	{ 1, 0,  D3DDECLTYPE_D3DCOLOR,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR,		1 },
	{ 2, 0,  D3DDECLTYPE_FLOAT2,	D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD,	0 },
	D3DDECL_END()
};


/*-----------------------------------------------------------------------------
	Fragment programs.
-----------------------------------------------------------------------------*/

//Pixel shader definitions
///////////////////////////

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"

	"texld r0, t0, s0\n"

	"mul r0, r0, v0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpDefaultRenderingState[] = {
	0xFFFF0200, 0x0039FFFE, 0x47554244, 0x00000028, 0x000000AC, 0x00000058, 0x00000000, 0x00000000,
	0x00000006, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000000EC, 0xFFFF0003,
	0x000000F8, 0xFFFF0004, 0x00000104, 0xFFFF0005, 0x00000110, 0xFFFF0006, 0x00000120, 0xFFFF0007,
	0x00000130, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162, 0x325F6C63, 0x30732064,
	0x6C63640A, 0x2E307420, 0x740A7978, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320, 0x206C756D,
	0x202C3072, 0x202C3072, 0x6D0A3076, 0x6F20766F, 0x202C3043, 0x000A3072, 0x7263694D, 0x666F736F,
	0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x41207265, 0x6D657373, 0x72656C62, 0x302E3520,
	0x30302E34, 0x3039322E, 0xABAB0034, 0x0200001F, 0x80000000, 0x900F0000, 0x0200001F, 0x90000000,
	0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800,
	0x03000005, 0x800F0000, 0x80E40000, 0x90E40000, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl v1.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"

	"texld r0, t0, s0\n"

	"mul r0, r0, v0\n"
	"add r0.rgb, r0, v1\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpDefaultRenderingStateWithFog[] = {
	0xFFFF0200, 0x0045FFFE, 0x47554244, 0x00000028, 0x000000DB, 0x00000068, 0x00000000, 0x00000000,
	0x00000008, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x0000011C, 0xFFFF0003,
	0x00000128, 0xFFFF0004, 0x00000134, 0xFFFF0005, 0x00000140, 0xFFFF0006, 0x0000014C, 0xFFFF0007,
	0x0000015C, 0xFFFF0008, 0x0000016C, 0xFFFF0009, 0x0000017C, 0x325F7370, 0x640A305F, 0x76206C63,
	0x67722E30, 0x640A6162, 0x76206C63, 0x67722E31, 0x640A6162, 0x325F6C63, 0x30732064, 0x6C63640A,
	0x2E307420, 0x740A7978, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320, 0x206C756D, 0x202C3072,
	0x202C3072, 0x610A3076, 0x72206464, 0x67722E30, 0x72202C62, 0x76202C30, 0x6F6D0A31, 0x436F2076,
	0x72202C30, 0x4D000A30, 0x6F726369, 0x74666F73, 0x29522820, 0x44334420, 0x53203958, 0x65646168,
	0x73412072, 0x626D6573, 0x2072656C, 0x34302E35, 0x2E30302E, 0x34303932, 0xABABAB00, 0x0200001F,
	0x80000000, 0x900F0000, 0x0200001F, 0x80000000, 0x900F0001, 0x0200001F, 0x90000000, 0xA00F0800,
	0x0200001F, 0x80000000, 0xB0030000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000005,
	0x800F0000, 0x80E40000, 0x90E40000, 0x03000002, 0x80070000, 0x80E40000, 0x90E40001, 0x02000001,
	0x800F0800, 0x80E40000, 0x0000FFFF
};

#ifdef UTGLR_RUNE_BUILD
#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"

	"texld r0, t0, s0\n"

	"mul r0, r0, v0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpDefaultRenderingStateWithLinearFog[] = {
	0xFFFF0200, 0x0039FFFE, 0x47554244, 0x00000028, 0x000000AC, 0x00000058, 0x00000000, 0x00000000,
	0x00000006, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000000EC, 0xFFFF0003,
	0x000000F8, 0xFFFF0004, 0x00000104, 0xFFFF0005, 0x00000110, 0xFFFF0006, 0x00000120, 0xFFFF0007,
	0x00000130, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162, 0x325F6C63, 0x30732064,
	0x6C63640A, 0x2E307420, 0x740A7978, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320, 0x206C756D,
	0x202C3072, 0x202C3072, 0x6D0A3076, 0x6F20766F, 0x202C3043, 0x000A3072, 0x7263694D, 0x666F736F,
	0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x41207265, 0x6D657373, 0x72656C62, 0x302E3520,
	0x30302E34, 0x3039322E, 0xABAB0034, 0x0200001F, 0x80000000, 0x900F0000, 0x0200001F, 0x90000000,
	0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800,
	0x03000005, 0x800F0000, 0x80E40000, 0x90E40000, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};
#endif

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl_2d s0\n"
	"dcl t0.xy\n"

	"texld r0, t0, s0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpComplexSurfaceSingleTexture[] = {
	0xFFFF0200, 0x002EFFFE, 0x47554244, 0x00000028, 0x00000081, 0x00000048, 0x00000000, 0x00000000,
	0x00000004, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000000C0, 0xFFFF0003,
	0x000000CC, 0xFFFF0004, 0x000000D8, 0xFFFF0005, 0x000000E8, 0x325F7370, 0x640A305F, 0x325F6C63,
	0x30732064, 0x6C63640A, 0x2E307420, 0x740A7978, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320,
	0x20766F6D, 0x2C30436F, 0x0A307220, 0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433,
	0x61685320, 0x20726564, 0x65737341, 0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430,
	0x0200001F, 0x90000000, 0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x03000042, 0x800F0000,
	0xB0E40000, 0xA0E40800, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"

	"mul r0, r0, r1\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpComplexSurfaceDualTextureModulated[] = {
	0xFFFF0200, 0x0043FFFE, 0x47554244, 0x00000028, 0x000000D5, 0x00000068, 0x00000000, 0x00000000,
	0x00000008, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000114, 0xFFFF0003,
	0x00000120, 0xFFFF0004, 0x0000012C, 0xFFFF0005, 0x00000138, 0xFFFF0006, 0x00000144, 0xFFFF0007,
	0x00000154, 0xFFFF0008, 0x00000164, 0xFFFF0009, 0x00000174, 0x325F7370, 0x640A305F, 0x325F6C63,
	0x30732064, 0x6C63640A, 0x2E307420, 0x640A7978, 0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420,
	0x740A7978, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320, 0x6C786574, 0x31722064, 0x3174202C,
	0x3173202C, 0x6C756D0A, 0x2C307220, 0x2C307220, 0x0A317220, 0x20766F6D, 0x2C30436F, 0x0A307220,
	0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320, 0x20726564, 0x65737341,
	0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0200001F, 0x90000000, 0xA00F0800,
	0x0200001F, 0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F, 0x80000000,
	0xB0030001, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000042, 0x800F0001, 0xB0E40001,
	0xA0E40801, 0x03000005, 0x800F0000, 0x80E40000, 0x80E40001, 0x02000001, 0x800F0800, 0x80E40000,
	0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"

	"mul r0, r0, r1\n"
	"add r0.rgb, r0, r0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpComplexSurfaceDualTextureModulated2X[] = {
	0xFFFF0200, 0x004AFFFE, 0x47554244, 0x00000028, 0x000000F0, 0x00000070, 0x00000000, 0x00000000,
	0x00000009, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000130, 0xFFFF0003,
	0x0000013C, 0xFFFF0004, 0x00000148, 0xFFFF0005, 0x00000154, 0xFFFF0006, 0x00000160, 0xFFFF0007,
	0x00000170, 0xFFFF0008, 0x00000180, 0xFFFF0009, 0x00000190, 0xFFFF000A, 0x000001A0, 0x325F7370,
	0x640A305F, 0x325F6C63, 0x30732064, 0x6C63640A, 0x2E307420, 0x640A7978, 0x325F6C63, 0x31732064,
	0x6C63640A, 0x2E317420, 0x740A7978, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320, 0x6C786574,
	0x31722064, 0x3174202C, 0x3173202C, 0x6C756D0A, 0x2C307220, 0x2C307220, 0x0A317220, 0x20646461,
	0x722E3072, 0x202C6267, 0x202C3072, 0x6D0A3072, 0x6F20766F, 0x202C3043, 0x000A3072, 0x7263694D,
	0x666F736F, 0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x41207265, 0x6D657373, 0x72656C62,
	0x302E3520, 0x30302E34, 0x3039322E, 0xABAB0034, 0x0200001F, 0x90000000, 0xA00F0800, 0x0200001F,
	0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F, 0x80000000, 0xB0030001,
	0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000042, 0x800F0001, 0xB0E40001, 0xA0E40801,
	0x03000005, 0x800F0000, 0x80E40000, 0x80E40001, 0x03000002, 0x80070000, 0x80E40000, 0x80E40000,
	0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"def c0, 1.0f, 1.0f, 1.0f, 1.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"

	"sub r1.a, c0.a, r1.a\n"

	"mad r0.rgb, r0, r1.aaaa, r1\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpComplexSurfaceSingleTextureWithFog[] = {
	0xFFFF0200, 0x0057FFFE, 0x47554244, 0x00000028, 0x00000126, 0x00000078, 0x00000000, 0x00000000,
	0x0000000A, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000164, 0xFFFF0003,
	0x00000170, 0xFFFF0004, 0x0000017C, 0xFFFF0005, 0x00000188, 0xFFFF0006, 0x00000194, 0xFFFF0007,
	0x000001AC, 0xFFFF0008, 0x000001BC, 0xFFFF0009, 0x000001CC, 0xFFFF000A, 0x000001DC, 0xFFFF000B,
	0x000001F0, 0x325F7370, 0x640A305F, 0x325F6C63, 0x30732064, 0x6C63640A, 0x2E307420, 0x640A7978,
	0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420, 0x640A7978, 0x63206665, 0x31202C30, 0x2C66302E,
	0x302E3120, 0x31202C66, 0x2C66302E, 0x302E3120, 0x65740A66, 0x20646C78, 0x202C3072, 0x202C3074,
	0x740A3073, 0x646C7865, 0x2C317220, 0x2C317420, 0x0A317320, 0x20627573, 0x612E3172, 0x3063202C,
	0x202C612E, 0x612E3172, 0x64616D0A, 0x2E307220, 0x2C626772, 0x2C307220, 0x2E317220, 0x61616161,
	0x3172202C, 0x766F6D0A, 0x30436F20, 0x3072202C, 0x694D000A, 0x736F7263, 0x2074666F, 0x20295228,
	0x58443344, 0x68532039, 0x72656461, 0x73734120, 0x6C626D65, 0x35207265, 0x2E34302E, 0x322E3030,
	0x00343039, 0x0200001F, 0x90000000, 0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x0200001F,
	0x90000000, 0xA00F0801, 0x0200001F, 0x80000000, 0xB0030001, 0x05000051, 0xA00F0000, 0x3F800000,
	0x3F800000, 0x3F800000, 0x3F800000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000042,
	0x800F0001, 0xB0E40001, 0xA0E40801, 0x03000002, 0x80080001, 0xA0FF0000, 0x81FF0001, 0x04000004,
	0x80070000, 0x80E40000, 0x80FF0001, 0x80E40001, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"dcl_2d s2\n"
	"dcl t2.xy\n"
	"def c0, 1.0f, 1.0f, 1.0f, 1.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"
	"texld r2, t2, s2\n"

	"sub r2.a, c0.a, r2.a\n"

	"mul r0, r0, r1\n"

	"mad r0.rgb, r0, r2.aaaa, r2\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpComplexSurfaceDualTextureModulatedWithFog[] = {
	0xFFFF0200, 0x006CFFFE, 0x47554244, 0x00000028, 0x0000017A, 0x00000098, 0x00000000, 0x00000000,
	0x0000000E, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001B8, 0xFFFF0003,
	0x000001C4, 0xFFFF0004, 0x000001D0, 0xFFFF0005, 0x000001DC, 0xFFFF0006, 0x000001E8, 0xFFFF0007,
	0x000001F4, 0xFFFF0008, 0x00000200, 0xFFFF0009, 0x00000218, 0xFFFF000A, 0x00000228, 0xFFFF000B,
	0x00000238, 0xFFFF000C, 0x00000248, 0xFFFF000D, 0x00000258, 0xFFFF000E, 0x00000268, 0xFFFF000F,
	0x0000027C, 0x325F7370, 0x640A305F, 0x325F6C63, 0x30732064, 0x6C63640A, 0x2E307420, 0x640A7978,
	0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420, 0x640A7978, 0x325F6C63, 0x32732064, 0x6C63640A,
	0x2E327420, 0x640A7978, 0x63206665, 0x31202C30, 0x2C66302E, 0x302E3120, 0x31202C66, 0x2C66302E,
	0x302E3120, 0x65740A66, 0x20646C78, 0x202C3072, 0x202C3074, 0x740A3073, 0x646C7865, 0x2C317220,
	0x2C317420, 0x0A317320, 0x6C786574, 0x32722064, 0x3274202C, 0x3273202C, 0x6275730A, 0x2E327220,
	0x63202C61, 0x2C612E30, 0x2E327220, 0x756D0A61, 0x3072206C, 0x3072202C, 0x3172202C, 0x64616D0A,
	0x2E307220, 0x2C626772, 0x2C307220, 0x2E327220, 0x61616161, 0x3272202C, 0x766F6D0A, 0x30436F20,
	0x3072202C, 0x694D000A, 0x736F7263, 0x2074666F, 0x20295228, 0x58443344, 0x68532039, 0x72656461,
	0x73734120, 0x6C626D65, 0x35207265, 0x2E34302E, 0x322E3030, 0x00343039, 0x0200001F, 0x90000000,
	0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F,
	0x80000000, 0xB0030001, 0x0200001F, 0x90000000, 0xA00F0802, 0x0200001F, 0x80000000, 0xB0030002,
	0x05000051, 0xA00F0000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x03000042, 0x800F0000,
	0xB0E40000, 0xA0E40800, 0x03000042, 0x800F0001, 0xB0E40001, 0xA0E40801, 0x03000042, 0x800F0002,
	0xB0E40002, 0xA0E40802, 0x03000002, 0x80080002, 0xA0FF0000, 0x81FF0002, 0x03000005, 0x800F0000,
	0x80E40000, 0x80E40001, 0x04000004, 0x80070000, 0x80E40000, 0x80FF0002, 0x80E40002, 0x02000001,
	0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"dcl_2d s2\n"
	"dcl t2.xy\n"
	"def c0, 1.0f, 1.0f, 1.0f, 1.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"
	"texld r2, t2, s2\n"

	"sub r2.a, c0.a, r2.a\n"

	"mul r0, r0, r1\n"
	"add r0.rgb, r0, r0\n"

	"mad r0.rgb, r0, r2.aaaa, r2\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpComplexSurfaceDualTextureModulated2XWithFog[] = {
	0xFFFF0200, 0x0073FFFE, 0x47554244, 0x00000028, 0x00000195, 0x000000A0, 0x00000000, 0x00000000,
	0x0000000F, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001D4, 0xFFFF0003,
	0x000001E0, 0xFFFF0004, 0x000001EC, 0xFFFF0005, 0x000001F8, 0xFFFF0006, 0x00000204, 0xFFFF0007,
	0x00000210, 0xFFFF0008, 0x0000021C, 0xFFFF0009, 0x00000234, 0xFFFF000A, 0x00000244, 0xFFFF000B,
	0x00000254, 0xFFFF000C, 0x00000264, 0xFFFF000D, 0x00000274, 0xFFFF000E, 0x00000284, 0xFFFF000F,
	0x00000294, 0xFFFF0010, 0x000002A8, 0x325F7370, 0x640A305F, 0x325F6C63, 0x30732064, 0x6C63640A,
	0x2E307420, 0x640A7978, 0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420, 0x640A7978, 0x325F6C63,
	0x32732064, 0x6C63640A, 0x2E327420, 0x640A7978, 0x63206665, 0x31202C30, 0x2C66302E, 0x302E3120,
	0x31202C66, 0x2C66302E, 0x302E3120, 0x65740A66, 0x20646C78, 0x202C3072, 0x202C3074, 0x740A3073,
	0x646C7865, 0x2C317220, 0x2C317420, 0x0A317320, 0x6C786574, 0x32722064, 0x3274202C, 0x3273202C,
	0x6275730A, 0x2E327220, 0x63202C61, 0x2C612E30, 0x2E327220, 0x756D0A61, 0x3072206C, 0x3072202C,
	0x3172202C, 0x6464610A, 0x2E307220, 0x2C626772, 0x2C307220, 0x0A307220, 0x2064616D, 0x722E3072,
	0x202C6267, 0x202C3072, 0x612E3272, 0x2C616161, 0x0A327220, 0x20766F6D, 0x2C30436F, 0x0A307220,
	0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320, 0x20726564, 0x65737341,
	0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0200001F, 0x90000000, 0xA00F0800,
	0x0200001F, 0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F, 0x80000000,
	0xB0030001, 0x0200001F, 0x90000000, 0xA00F0802, 0x0200001F, 0x80000000, 0xB0030002, 0x05000051,
	0xA00F0000, 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000, 0x03000042, 0x800F0000, 0xB0E40000,
	0xA0E40800, 0x03000042, 0x800F0001, 0xB0E40001, 0xA0E40801, 0x03000042, 0x800F0002, 0xB0E40002,
	0xA0E40802, 0x03000002, 0x80080002, 0xA0FF0000, 0x81FF0002, 0x03000005, 0x800F0000, 0x80E40000,
	0x80E40001, 0x03000002, 0x80070000, 0x80E40000, 0x80E40000, 0x04000004, 0x80070000, 0x80E40000,
	0x80FF0002, 0x80E40002, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl t1.xyz\n"
	"def c0, 0.002631578947f, 0.999f, 0.0f, 0.0f\n"

	"mul_sat r1.x, t1.z, c0.x\n"
	"sub r0, c0.yyyy, r1.xxxx\n"
	"texkill r0\n"

	"texld r0, t0, s0\n"

	"lrp r2, r1.xxxx, v0, r0\n"

	"mov oC0, r2\n"
;
#endif
static const DWORD g_fpDetailTexture[] = {
	0xFFFF0200, 0x0062FFFE, 0x47554244, 0x00000028, 0x00000151, 0x00000080, 0x00000000, 0x00000000,
	0x0000000B, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000190, 0xFFFF0003,
	0x0000019C, 0xFFFF0004, 0x000001A8, 0xFFFF0005, 0x000001B4, 0xFFFF0006, 0x000001C0, 0xFFFF0007,
	0x000001D8, 0xFFFF0008, 0x000001E8, 0xFFFF0009, 0x000001F8, 0xFFFF000A, 0x00000200, 0xFFFF000B,
	0x00000210, 0xFFFF000C, 0x00000224, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162,
	0x325F6C63, 0x30732064, 0x6C63640A, 0x2E307420, 0x640A7978, 0x74206C63, 0x79782E31, 0x65640A7A,
	0x30632066, 0x2E30202C, 0x36323030, 0x37353133, 0x37343938, 0x30202C66, 0x3939392E, 0x30202C66,
	0x2C66302E, 0x302E3020, 0x756D0A66, 0x61735F6C, 0x31722074, 0x202C782E, 0x7A2E3174, 0x3063202C,
	0x730A782E, 0x72206275, 0x63202C30, 0x79792E30, 0x202C7979, 0x782E3172, 0x0A787878, 0x6B786574,
	0x206C6C69, 0x740A3072, 0x646C7865, 0x2C307220, 0x2C307420, 0x0A307320, 0x2070726C, 0x202C3272,
	0x782E3172, 0x2C787878, 0x2C307620, 0x0A307220, 0x20766F6D, 0x2C30436F, 0x0A327220, 0x63694D00,
	0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320, 0x20726564, 0x65737341, 0x656C626D,
	0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0200001F, 0x80000000, 0x900F0000, 0x0200001F,
	0x90000000, 0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x0200001F, 0x80000000, 0xB0070001,
	0x05000051, 0xA00F0000, 0x3B2C7692, 0x3F7FBE77, 0x00000000, 0x00000000, 0x03000005, 0x80110001,
	0xB0AA0001, 0xA0000000, 0x03000002, 0x800F0000, 0xA0550000, 0x81000001, 0x01000041, 0x800F0000,
	0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x04000012, 0x800F0002, 0x80000001, 0x90E40000,
	0x80E40000, 0x02000001, 0x800F0800, 0x80E40002, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xyzw\n"
	"dcl t1.xyz\n"
	"def c0, 0.002631578947f, 0.999f, 0.0f, 0.0f\n"
	"def c1, 4.223f, 4.223f, 0.0f, 1.0f\n"

	"mul_sat r1.x, t1.z, c0.x\n"
	"sub r0, c0.yyyy, r1.xxxx\n"
	"texkill r0\n"

	"texld r0, t0, s0\n"

	"lrp r2, r1.xxxx, v0, r0\n"

	"mul r0, t0, c1\n"
	"texld r0, r0, s0\n"

	"mul_sat r1.x, r1.x, c1.x\n"
	"lrp r3, r1.xxxx, v0, r0\n"

	"mul r2, r2, r3\n"
	"add r2, r2, r2\n"

	"mov oC0, r2\n"
;
#endif
static const DWORD g_fpDetailTextureTwoLayer[] = {
	0xFFFF0200, 0x0095FFFE, 0x47554244, 0x00000028, 0x0000021D, 0x000000B8, 0x00000000, 0x00000000,
	0x00000012, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x0000025C, 0xFFFF0003,
	0x00000268, 0xFFFF0004, 0x00000274, 0xFFFF0005, 0x00000280, 0xFFFF0006, 0x0000028C, 0xFFFF0007,
	0x000002A4, 0xFFFF0008, 0x000002BC, 0xFFFF0009, 0x000002CC, 0xFFFF000A, 0x000002DC, 0xFFFF000B,
	0x000002E4, 0xFFFF000C, 0x000002F4, 0xFFFF000D, 0x00000308, 0xFFFF000E, 0x00000318, 0xFFFF000F,
	0x00000328, 0xFFFF0010, 0x00000338, 0xFFFF0011, 0x0000034C, 0xFFFF0012, 0x0000035C, 0xFFFF0013,
	0x0000036C, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162, 0x325F6C63, 0x30732064,
	0x6C63640A, 0x2E307420, 0x777A7978, 0x6C63640A, 0x2E317420, 0x0A7A7978, 0x20666564, 0x202C3063,
	0x30302E30, 0x31333632, 0x39383735, 0x2C663734, 0x392E3020, 0x2C663939, 0x302E3020, 0x30202C66,
	0x0A66302E, 0x20666564, 0x202C3163, 0x32322E34, 0x202C6633, 0x32322E34, 0x202C6633, 0x66302E30,
	0x2E31202C, 0x6D0A6630, 0x735F6C75, 0x72207461, 0x2C782E31, 0x2E317420, 0x63202C7A, 0x0A782E30,
	0x20627573, 0x202C3072, 0x792E3063, 0x2C797979, 0x2E317220, 0x78787878, 0x7865740A, 0x6C6C696B,
	0x0A307220, 0x6C786574, 0x30722064, 0x3074202C, 0x3073202C, 0x70726C0A, 0x2C327220, 0x2E317220,
	0x78787878, 0x3076202C, 0x3072202C, 0x6C756D0A, 0x2C307220, 0x2C307420, 0x0A316320, 0x6C786574,
	0x30722064, 0x3072202C, 0x3073202C, 0x6C756D0A, 0x7461735F, 0x2E317220, 0x72202C78, 0x2C782E31,
	0x2E316320, 0x726C0A78, 0x33722070, 0x3172202C, 0x7878782E, 0x76202C78, 0x72202C30, 0x756D0A30,
	0x3272206C, 0x3272202C, 0x3372202C, 0x6464610A, 0x2C327220, 0x2C327220, 0x0A327220, 0x20766F6D,
	0x2C30436F, 0x0A327220, 0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320,
	0x20726564, 0x65737341, 0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0200001F,
	0x80000000, 0x900F0000, 0x0200001F, 0x90000000, 0xA00F0800, 0x0200001F, 0x80000000, 0xB00F0000,
	0x0200001F, 0x80000000, 0xB0070001, 0x05000051, 0xA00F0000, 0x3B2C7692, 0x3F7FBE77, 0x00000000,
	0x00000000, 0x05000051, 0xA00F0001, 0x408722D1, 0x408722D1, 0x00000000, 0x3F800000, 0x03000005,
	0x80110001, 0xB0AA0001, 0xA0000000, 0x03000002, 0x800F0000, 0xA0550000, 0x81000001, 0x01000041,
	0x800F0000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x04000012, 0x800F0002, 0x80000001,
	0x90E40000, 0x80E40000, 0x03000005, 0x800F0000, 0xB0E40000, 0xA0E40001, 0x03000042, 0x800F0000,
	0x80E40000, 0xA0E40800, 0x03000005, 0x80110001, 0x80000001, 0xA0000001, 0x04000012, 0x800F0003,
	0x80000001, 0x90E40000, 0x80E40000, 0x03000005, 0x800F0002, 0x80E40002, 0x80E40003, 0x03000002,
	0x800F0002, 0x80E40002, 0x80E40002, 0x02000001, 0x800F0800, 0x80E40002, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"dcl t2.xyz\n"
	"def c0, 0.002631578947f, 0.999f, 0.0f, 0.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"

	"mul_sat r2.x, t2.z, c0.x\n"
	"lrp r3, r2.xxxx, v0, r1\n"

	"mul r0.rgb, r0, r3\n"
	"add r0.rgb, r0, r0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpSingleTextureAndDetailTexture[] = {
	0xFFFF0200, 0x0072FFFE, 0x47554244, 0x00000028, 0x00000190, 0x00000098, 0x00000000, 0x00000000,
	0x0000000E, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000001D0, 0xFFFF0003,
	0x000001DC, 0xFFFF0004, 0x000001E8, 0xFFFF0005, 0x000001F4, 0xFFFF0006, 0x00000200, 0xFFFF0007,
	0x0000020C, 0xFFFF0008, 0x00000218, 0xFFFF0009, 0x00000230, 0xFFFF000A, 0x00000240, 0xFFFF000B,
	0x00000250, 0xFFFF000C, 0x00000260, 0xFFFF000D, 0x00000274, 0xFFFF000E, 0x00000284, 0xFFFF000F,
	0x00000294, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162, 0x325F6C63, 0x30732064,
	0x6C63640A, 0x2E307420, 0x640A7978, 0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420, 0x640A7978,
	0x74206C63, 0x79782E32, 0x65640A7A, 0x30632066, 0x2E30202C, 0x36323030, 0x37353133, 0x37343938,
	0x30202C66, 0x3939392E, 0x30202C66, 0x2C66302E, 0x302E3020, 0x65740A66, 0x20646C78, 0x202C3072,
	0x202C3074, 0x740A3073, 0x646C7865, 0x2C317220, 0x2C317420, 0x0A317320, 0x5F6C756D, 0x20746173,
	0x782E3272, 0x3274202C, 0x202C7A2E, 0x782E3063, 0x70726C0A, 0x2C337220, 0x2E327220, 0x78787878,
	0x3076202C, 0x3172202C, 0x6C756D0A, 0x2E307220, 0x2C626772, 0x2C307220, 0x0A337220, 0x20646461,
	0x722E3072, 0x202C6267, 0x202C3072, 0x6D0A3072, 0x6F20766F, 0x202C3043, 0x000A3072, 0x7263694D,
	0x666F736F, 0x52282074, 0x33442029, 0x20395844, 0x64616853, 0x41207265, 0x6D657373, 0x72656C62,
	0x302E3520, 0x30302E34, 0x3039322E, 0xABAB0034, 0x0200001F, 0x80000000, 0x900F0000, 0x0200001F,
	0x90000000, 0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801,
	0x0200001F, 0x80000000, 0xB0030001, 0x0200001F, 0x80000000, 0xB0070002, 0x05000051, 0xA00F0000,
	0x3B2C7692, 0x3F7FBE77, 0x00000000, 0x00000000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800,
	0x03000042, 0x800F0001, 0xB0E40001, 0xA0E40801, 0x03000005, 0x80110002, 0xB0AA0002, 0xA0000000,
	0x04000012, 0x800F0003, 0x80000002, 0x90E40000, 0x80E40001, 0x03000005, 0x80070000, 0x80E40000,
	0x80E40003, 0x03000002, 0x80070000, 0x80E40000, 0x80E40000, 0x02000001, 0x800F0800, 0x80E40000,
	0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"dcl t2.xyz\n"
	"def c0, 0.002631578947f, 0.999f, 0.0f, 0.0f\n"
	"def c1, 4.223f, 4.223f, 0.0f, 1.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"

	"mul_sat r2.x, t2.z, c0.x\n"
	"lrp r3, r2.xxxx, v0, r1\n"

	"mul r0.rgb, r0, r3\n"
	"add r0.rgb, r0, r0\n"

	"mul r1.xy, t1, c1\n"
	"texld r1, r1, s1\n"

	"mul_sat r2.x, r2.x, c1.x\n"
	"lrp r3, r2.xxxx, v0, r1\n"

	"mul r0.rgb, r0, r3\n"
	"add r0.rgb, r0, r0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpSingleTextureAndDetailTextureTwoLayer[] = {
	0xFFFF0200, 0x00A7FFFE, 0x47554244, 0x00000028, 0x00000265, 0x000000D0, 0x00000000, 0x00000000,
	0x00000015, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x000002A4, 0xFFFF0003,
	0x000002B0, 0xFFFF0004, 0x000002BC, 0xFFFF0005, 0x000002C8, 0xFFFF0006, 0x000002D4, 0xFFFF0007,
	0x000002E0, 0xFFFF0008, 0x000002EC, 0xFFFF0009, 0x00000304, 0xFFFF000A, 0x0000031C, 0xFFFF000B,
	0x0000032C, 0xFFFF000C, 0x0000033C, 0xFFFF000D, 0x0000034C, 0xFFFF000E, 0x00000360, 0xFFFF000F,
	0x00000370, 0xFFFF0010, 0x00000380, 0xFFFF0011, 0x00000390, 0xFFFF0012, 0x000003A0, 0xFFFF0013,
	0x000003B0, 0xFFFF0014, 0x000003C4, 0xFFFF0015, 0x000003D4, 0xFFFF0016, 0x000003E4, 0x325F7370,
	0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162, 0x325F6C63, 0x30732064, 0x6C63640A, 0x2E307420,
	0x640A7978, 0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420, 0x640A7978, 0x74206C63, 0x79782E32,
	0x65640A7A, 0x30632066, 0x2E30202C, 0x36323030, 0x37353133, 0x37343938, 0x30202C66, 0x3939392E,
	0x30202C66, 0x2C66302E, 0x302E3020, 0x65640A66, 0x31632066, 0x2E34202C, 0x66333232, 0x2E34202C,
	0x66333232, 0x2E30202C, 0x202C6630, 0x66302E31, 0x7865740A, 0x7220646C, 0x74202C30, 0x73202C30,
	0x65740A30, 0x20646C78, 0x202C3172, 0x202C3174, 0x6D0A3173, 0x735F6C75, 0x72207461, 0x2C782E32,
	0x2E327420, 0x63202C7A, 0x0A782E30, 0x2070726C, 0x202C3372, 0x782E3272, 0x2C787878, 0x2C307620,
	0x0A317220, 0x206C756D, 0x722E3072, 0x202C6267, 0x202C3072, 0x610A3372, 0x72206464, 0x67722E30,
	0x72202C62, 0x72202C30, 0x756D0A30, 0x3172206C, 0x2C79782E, 0x2C317420, 0x0A316320, 0x6C786574,
	0x31722064, 0x3172202C, 0x3173202C, 0x6C756D0A, 0x7461735F, 0x2E327220, 0x72202C78, 0x2C782E32,
	0x2E316320, 0x726C0A78, 0x33722070, 0x3272202C, 0x7878782E, 0x76202C78, 0x72202C30, 0x756D0A31,
	0x3072206C, 0x6267722E, 0x3072202C, 0x3372202C, 0x6464610A, 0x2E307220, 0x2C626772, 0x2C307220,
	0x0A307220, 0x20766F6D, 0x2C30436F, 0x0A307220, 0x63694D00, 0x6F736F72, 0x28207466, 0x44202952,
	0x39584433, 0x61685320, 0x20726564, 0x65737341, 0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30,
	0xAB003430, 0x0200001F, 0x80000000, 0x900F0000, 0x0200001F, 0x90000000, 0xA00F0800, 0x0200001F,
	0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F, 0x80000000, 0xB0030001,
	0x0200001F, 0x80000000, 0xB0070002, 0x05000051, 0xA00F0000, 0x3B2C7692, 0x3F7FBE77, 0x00000000,
	0x00000000, 0x05000051, 0xA00F0001, 0x408722D1, 0x408722D1, 0x00000000, 0x3F800000, 0x03000042,
	0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000042, 0x800F0001, 0xB0E40001, 0xA0E40801, 0x03000005,
	0x80110002, 0xB0AA0002, 0xA0000000, 0x04000012, 0x800F0003, 0x80000002, 0x90E40000, 0x80E40001,
	0x03000005, 0x80070000, 0x80E40000, 0x80E40003, 0x03000002, 0x80070000, 0x80E40000, 0x80E40000,
	0x03000005, 0x80030001, 0xB0E40001, 0xA0E40001, 0x03000042, 0x800F0001, 0x80E40001, 0xA0E40801,
	0x03000005, 0x80110002, 0x80000002, 0xA0000001, 0x04000012, 0x800F0003, 0x80000002, 0x90E40000,
	0x80E40001, 0x03000005, 0x80070000, 0x80E40000, 0x80E40003, 0x03000002, 0x80070000, 0x80E40000,
	0x80E40000, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"dcl_2d s2\n"
	"dcl t2.xy\n"
	"dcl t3.xyz\n"
	"def c0, 0.002631578947f, 0.999f, 0.0f, 0.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"
	"texld r2, t2, s2\n"

	"mul r0, r0, r1\n"
	"mad r0.rgb, r0, v0.aaaa, r0\n"

	"mul_sat r1.x, t3.z, c0.x\n"
	"lrp r3, r1.xxxx, v0, r2\n"

	"mul r0.rgb, r0, r3\n"
	"add r0.rgb, r0, r0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpDualTextureAndDetailTexture[] = {
	0xFFFF0200, 0x0090FFFE, 0x47554244, 0x00000028, 0x00000208, 0x000000C0, 0x00000000, 0x00000000,
	0x00000013, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x00000248, 0xFFFF0003,
	0x00000254, 0xFFFF0004, 0x00000260, 0xFFFF0005, 0x0000026C, 0xFFFF0006, 0x00000278, 0xFFFF0007,
	0x00000284, 0xFFFF0008, 0x00000290, 0xFFFF0009, 0x0000029C, 0xFFFF000A, 0x000002A8, 0xFFFF000B,
	0x000002C0, 0xFFFF000C, 0x000002D0, 0xFFFF000D, 0x000002E0, 0xFFFF000E, 0x000002F0, 0xFFFF000F,
	0x00000300, 0xFFFF0010, 0x00000314, 0xFFFF0011, 0x00000324, 0xFFFF0012, 0x00000338, 0xFFFF0013,
	0x00000348, 0xFFFF0014, 0x00000358, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162,
	0x325F6C63, 0x30732064, 0x6C63640A, 0x2E307420, 0x640A7978, 0x325F6C63, 0x31732064, 0x6C63640A,
	0x2E317420, 0x640A7978, 0x325F6C63, 0x32732064, 0x6C63640A, 0x2E327420, 0x640A7978, 0x74206C63,
	0x79782E33, 0x65640A7A, 0x30632066, 0x2E30202C, 0x36323030, 0x37353133, 0x37343938, 0x30202C66,
	0x3939392E, 0x30202C66, 0x2C66302E, 0x302E3020, 0x65740A66, 0x20646C78, 0x202C3072, 0x202C3074,
	0x740A3073, 0x646C7865, 0x2C317220, 0x2C317420, 0x0A317320, 0x6C786574, 0x32722064, 0x3274202C,
	0x3273202C, 0x6C756D0A, 0x2C307220, 0x2C307220, 0x0A317220, 0x2064616D, 0x722E3072, 0x202C6267,
	0x202C3072, 0x612E3076, 0x2C616161, 0x0A307220, 0x5F6C756D, 0x20746173, 0x782E3172, 0x3374202C,
	0x202C7A2E, 0x782E3063, 0x70726C0A, 0x2C337220, 0x2E317220, 0x78787878, 0x3076202C, 0x3272202C,
	0x6C756D0A, 0x2E307220, 0x2C626772, 0x2C307220, 0x0A337220, 0x20646461, 0x722E3072, 0x202C6267,
	0x202C3072, 0x6D0A3072, 0x6F20766F, 0x202C3043, 0x000A3072, 0x7263694D, 0x666F736F, 0x52282074,
	0x33442029, 0x20395844, 0x64616853, 0x41207265, 0x6D657373, 0x72656C62, 0x302E3520, 0x30302E34,
	0x3039322E, 0xABAB0034, 0x0200001F, 0x80000000, 0x900F0000, 0x0200001F, 0x90000000, 0xA00F0800,
	0x0200001F, 0x80000000, 0xB0030000, 0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F, 0x80000000,
	0xB0030001, 0x0200001F, 0x90000000, 0xA00F0802, 0x0200001F, 0x80000000, 0xB0030002, 0x0200001F,
	0x80000000, 0xB0070003, 0x05000051, 0xA00F0000, 0x3B2C7692, 0x3F7FBE77, 0x00000000, 0x00000000,
	0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000042, 0x800F0001, 0xB0E40001, 0xA0E40801,
	0x03000042, 0x800F0002, 0xB0E40002, 0xA0E40802, 0x03000005, 0x800F0000, 0x80E40000, 0x80E40001,
	0x04000004, 0x80070000, 0x80E40000, 0x90FF0000, 0x80E40000, 0x03000005, 0x80110001, 0xB0AA0003,
	0xA0000000, 0x04000012, 0x800F0003, 0x80000001, 0x90E40000, 0x80E40002, 0x03000005, 0x80070000,
	0x80E40000, 0x80E40003, 0x03000002, 0x80070000, 0x80E40000, 0x80E40000, 0x02000001, 0x800F0800,
	0x80E40000, 0x0000FFFF
};

#if 0
static const char *g_tempShaderString =
	"ps_2_0\n"

	"dcl v0.rgba\n"
	"dcl_2d s0\n"
	"dcl t0.xy\n"
	"dcl_2d s1\n"
	"dcl t1.xy\n"
	"dcl_2d s2\n"
	"dcl t2.xy\n"
	"dcl t3.xyz\n"
	"def c0, 0.002631578947f, 0.999f, 0.0f, 0.0f\n"
	"def c1, 4.223f, 4.223f, 0.0f, 1.0f\n"

	"texld r0, t0, s0\n"
	"texld r1, t1, s1\n"
	"texld r2, t2, s2\n"

	"mul r0, r0, r1\n"
	"mad r0.rgb, r0, v0.aaaa, r0\n"

	"mul_sat r1.x, t3.z, c0.x\n"
	"lrp r3, r1.xxxx, v0, r2\n"

	"mul r0.rgb, r0, r3\n"
	"add r0.rgb, r0, r0\n"

	"mul r2.xy, t2, c1\n"
	"texld r2, r2, s2\n"

	"mul_sat r1.x, r1.x, c1.x\n"
	"lrp r3, r1.xxxx, v0, r2\n"

	"mul r0.rgb, r0, r3\n"
	"add r0.rgb, r0, r0\n"

	"mov oC0, r0\n"
;
#endif
static const DWORD g_fpDualTextureAndDetailTextureTwoLayer[] = {
	0xFFFF0200, 0x00C5FFFE, 0x47554244, 0x00000028, 0x000002DD, 0x000000F8, 0x00000000, 0x00000000,
	0x0000001A, 0x00000028, 0x00000000, 0x00000000, 0x00000000, 0xFFFF0002, 0x0000031C, 0xFFFF0003,
	0x00000328, 0xFFFF0004, 0x00000334, 0xFFFF0005, 0x00000340, 0xFFFF0006, 0x0000034C, 0xFFFF0007,
	0x00000358, 0xFFFF0008, 0x00000364, 0xFFFF0009, 0x00000370, 0xFFFF000A, 0x0000037C, 0xFFFF000B,
	0x00000394, 0xFFFF000C, 0x000003AC, 0xFFFF000D, 0x000003BC, 0xFFFF000E, 0x000003CC, 0xFFFF000F,
	0x000003DC, 0xFFFF0010, 0x000003EC, 0xFFFF0011, 0x00000400, 0xFFFF0012, 0x00000410, 0xFFFF0013,
	0x00000424, 0xFFFF0014, 0x00000434, 0xFFFF0015, 0x00000444, 0xFFFF0016, 0x00000454, 0xFFFF0017,
	0x00000464, 0xFFFF0018, 0x00000474, 0xFFFF0019, 0x00000488, 0xFFFF001A, 0x00000498, 0xFFFF001B,
	0x000004A8, 0x325F7370, 0x640A305F, 0x76206C63, 0x67722E30, 0x640A6162, 0x325F6C63, 0x30732064,
	0x6C63640A, 0x2E307420, 0x640A7978, 0x325F6C63, 0x31732064, 0x6C63640A, 0x2E317420, 0x640A7978,
	0x325F6C63, 0x32732064, 0x6C63640A, 0x2E327420, 0x640A7978, 0x74206C63, 0x79782E33, 0x65640A7A,
	0x30632066, 0x2E30202C, 0x36323030, 0x37353133, 0x37343938, 0x30202C66, 0x3939392E, 0x30202C66,
	0x2C66302E, 0x302E3020, 0x65640A66, 0x31632066, 0x2E34202C, 0x66333232, 0x2E34202C, 0x66333232,
	0x2E30202C, 0x202C6630, 0x66302E31, 0x7865740A, 0x7220646C, 0x74202C30, 0x73202C30, 0x65740A30,
	0x20646C78, 0x202C3172, 0x202C3174, 0x740A3173, 0x646C7865, 0x2C327220, 0x2C327420, 0x0A327320,
	0x206C756D, 0x202C3072, 0x202C3072, 0x6D0A3172, 0x72206461, 0x67722E30, 0x72202C62, 0x76202C30,
	0x61612E30, 0x202C6161, 0x6D0A3072, 0x735F6C75, 0x72207461, 0x2C782E31, 0x2E337420, 0x63202C7A,
	0x0A782E30, 0x2070726C, 0x202C3372, 0x782E3172, 0x2C787878, 0x2C307620, 0x0A327220, 0x206C756D,
	0x722E3072, 0x202C6267, 0x202C3072, 0x610A3372, 0x72206464, 0x67722E30, 0x72202C62, 0x72202C30,
	0x756D0A30, 0x3272206C, 0x2C79782E, 0x2C327420, 0x0A316320, 0x6C786574, 0x32722064, 0x3272202C,
	0x3273202C, 0x6C756D0A, 0x7461735F, 0x2E317220, 0x72202C78, 0x2C782E31, 0x2E316320, 0x726C0A78,
	0x33722070, 0x3172202C, 0x7878782E, 0x76202C78, 0x72202C30, 0x756D0A32, 0x3072206C, 0x6267722E,
	0x3072202C, 0x3372202C, 0x6464610A, 0x2E307220, 0x2C626772, 0x2C307220, 0x0A307220, 0x20766F6D,
	0x2C30436F, 0x0A307220, 0x63694D00, 0x6F736F72, 0x28207466, 0x44202952, 0x39584433, 0x61685320,
	0x20726564, 0x65737341, 0x656C626D, 0x2E352072, 0x302E3430, 0x39322E30, 0xAB003430, 0x0200001F,
	0x80000000, 0x900F0000, 0x0200001F, 0x90000000, 0xA00F0800, 0x0200001F, 0x80000000, 0xB0030000,
	0x0200001F, 0x90000000, 0xA00F0801, 0x0200001F, 0x80000000, 0xB0030001, 0x0200001F, 0x90000000,
	0xA00F0802, 0x0200001F, 0x80000000, 0xB0030002, 0x0200001F, 0x80000000, 0xB0070003, 0x05000051,
	0xA00F0000, 0x3B2C7692, 0x3F7FBE77, 0x00000000, 0x00000000, 0x05000051, 0xA00F0001, 0x408722D1,
	0x408722D1, 0x00000000, 0x3F800000, 0x03000042, 0x800F0000, 0xB0E40000, 0xA0E40800, 0x03000042,
	0x800F0001, 0xB0E40001, 0xA0E40801, 0x03000042, 0x800F0002, 0xB0E40002, 0xA0E40802, 0x03000005,
	0x800F0000, 0x80E40000, 0x80E40001, 0x04000004, 0x80070000, 0x80E40000, 0x90FF0000, 0x80E40000,
	0x03000005, 0x80110001, 0xB0AA0003, 0xA0000000, 0x04000012, 0x800F0003, 0x80000001, 0x90E40000,
	0x80E40002, 0x03000005, 0x80070000, 0x80E40000, 0x80E40003, 0x03000002, 0x80070000, 0x80E40000,
	0x80E40000, 0x03000005, 0x80030002, 0xB0E40002, 0xA0E40001, 0x03000042, 0x800F0002, 0x80E40002,
	0xA0E40802, 0x03000005, 0x80110001, 0x80000001, 0xA0000001, 0x04000012, 0x800F0003, 0x80000001,
	0x90E40000, 0x80E40002, 0x03000005, 0x80070000, 0x80E40000, 0x80E40003, 0x03000002, 0x80070000,
	0x80E40000, 0x80E40000, 0x02000001, 0x800F0800, 0x80E40000, 0x0000FFFF
};


/*-----------------------------------------------------------------------------
	D3D9Drv.
-----------------------------------------------------------------------------*/

IMPLEMENT_CLASS(UD3D9RenderDevice);


#ifdef UTD3D9R_INCLUDE_SHADER_ASM
void UD3D9RenderDevice::AssembleShader(void) {
	HRESULT hResult;
	LPD3DXBUFFER pBufCompiled = NULL;
	LPD3DXBUFFER pBufErrors = NULL;

	dout << L"Enter shader assembly" << std::endl;

	hResult = D3DXAssembleShader(g_tempShaderString, strlen(g_tempShaderString), NULL, NULL,
		D3DXSHADER_DEBUG, &pBufCompiled, &pBufErrors);
	if (FAILED(hResult)) {
		DWORD bufSize;
		std::string sMsg;
		DWORD u;
		VOID *bufPtr;

		dout << L"ERROR ASSEMBLING SHADER" << std::endl;

		bufSize = pBufErrors->GetBufferSize();
		bufPtr = pBufErrors->GetBufferPointer();
		dout << L"Size = " << bufSize << std::endl;
		for (u = 0; u < bufSize; u++) {
			sMsg += *((const char *)bufPtr + u);
		}
		dout << L"Data = " << appFromAnsi(sMsg.c_str()) << std::endl;

		//Free buffers
		if (pBufCompiled) pBufCompiled->Release();
		if (pBufErrors) pBufErrors->Release();

		return;
	}
	dout << L"SHADER ASSEMBLED OKAY" << std::endl;

	{
		DWORD compBufSize = pBufCompiled->GetBufferSize();
		VOID *compBufPtr = pBufCompiled->GetBufferPointer();
		DWORD u;
		std::basic_string<TCHAR> sMsg;

		dout << L"Compiled Size = " << compBufSize << std::endl;
		dout << L"Data = " << std::endl;
		for (u = 0; u < compBufSize; u += 4) {
			if ((u % 32) == 0) {
				dout << sMsg << std::endl;
				sMsg.resize(0);
			}
			sMsg += L"0x" + HexString(32, *(DWORD *)((BYTE *)compBufPtr + u)) + L", ";
		}
		dout << sMsg << std::endl;
	}

	dout << L"Leave shader assembly" << std::endl;

	//Free buffers
	if (pBufCompiled) pBufCompiled->Release();
	if (pBufErrors) pBufErrors->Release();

	return;
}
#endif


void UD3D9RenderDevice::StaticConstructor() {
	unsigned int u;

	guard(UD3D9RenderDevice::StaticConstructor);

#ifdef UTGLR_DX_BUILD
#define UTGLR_DEFAULT_ONEXBLENDING	1
#else
#define UTGLR_DEFAULT_ONEXBLENDING	0
#endif

#define CPP_PROPERTY_LOCAL(_name) _name, CPP_PROPERTY(_name)
#define CPP_PROPERTY_LOCAL_DCV(_name) _name, CPP_PROPERTY(DCV._name)

	//Set parameter defaults and add parameters
#ifdef UTGLR_UNREAL_BUILD
	SC_AddBoolConfigParam(0,  TEXT("DetailTextures"), CPP_PROPERTY_LOCAL(DetailTextures), 0);
#endif
	SC_AddFloatConfigParam(TEXT("LODBias"), CPP_PROPERTY_LOCAL(LODBias), 0.0f);
	SC_AddFloatConfigParam(TEXT("GammaOffset"), CPP_PROPERTY_LOCAL(GammaOffset), 0.0f);
	SC_AddFloatConfigParam(TEXT("GammaOffsetRed"), CPP_PROPERTY_LOCAL(GammaOffsetRed), 0.0f);
	SC_AddFloatConfigParam(TEXT("GammaOffsetGreen"), CPP_PROPERTY_LOCAL(GammaOffsetGreen), 0.0f);
	SC_AddFloatConfigParam(TEXT("GammaOffsetBlue"), CPP_PROPERTY_LOCAL(GammaOffsetBlue), 0.0f);
	SC_AddBoolConfigParam(1,  TEXT("GammaCorrectScreenshots"), CPP_PROPERTY_LOCAL(GammaCorrectScreenshots), 0);
	SC_AddBoolConfigParam(0,  TEXT("OneXBlending"), CPP_PROPERTY_LOCAL(OneXBlending), UTGLR_DEFAULT_ONEXBLENDING);
	SC_AddIntConfigParam(TEXT("MaxLogUOverV"), CPP_PROPERTY_LOCAL(MaxLogUOverV), 8);
	SC_AddIntConfigParam(TEXT("MaxLogVOverU"), CPP_PROPERTY_LOCAL(MaxLogVOverU), 8);
	SC_AddIntConfigParam(TEXT("MinLogTextureSize"), CPP_PROPERTY_LOCAL(MinLogTextureSize), 0);
	SC_AddIntConfigParam(TEXT("MaxLogTextureSize"), CPP_PROPERTY_LOCAL(MaxLogTextureSize), 8);
	SC_AddBoolConfigParam(7,  TEXT("UseMultiTexture"), CPP_PROPERTY_LOCAL(UseMultiTexture), 1);
	SC_AddBoolConfigParam(6,  TEXT("UsePalette"), CPP_PROPERTY_LOCAL(UsePalette), 0);
	SC_AddBoolConfigParam(5,  TEXT("UsePrecache"), CPP_PROPERTY_LOCAL(UsePrecache), 0);
	SC_AddBoolConfigParam(4,  TEXT("UseTrilinear"), CPP_PROPERTY_LOCAL(UseTrilinear), 0);
	SC_AddBoolConfigParam(3,  TEXT("UseAlphaPalette"), CPP_PROPERTY_LOCAL(UseAlphaPalette), 0);
	SC_AddBoolConfigParam(2,  TEXT("UseS3TC"), CPP_PROPERTY_LOCAL(UseS3TC), 0);
	SC_AddBoolConfigParam(1,  TEXT("Use16BitTextures"), CPP_PROPERTY_LOCAL(Use16BitTextures), 0);
	SC_AddBoolConfigParam(0,  TEXT("Use565Textures"), CPP_PROPERTY_LOCAL(Use565Textures), 0);
	SC_AddIntConfigParam(TEXT("MaxAnisotropy"), CPP_PROPERTY_LOCAL(MaxAnisotropy), 0);
	SC_AddBoolConfigParam(0,  TEXT("NoFiltering"), CPP_PROPERTY_LOCAL(NoFiltering), 0);
	SC_AddIntConfigParam(TEXT("MaxTMUnits"), CPP_PROPERTY_LOCAL(MaxTMUnits), 0);
	SC_AddIntConfigParam(TEXT("RefreshRate"), CPP_PROPERTY_LOCAL(RefreshRate), 0);
	SC_AddIntConfigParam(TEXT("DetailMax"), CPP_PROPERTY_LOCAL(DetailMax), 0);
	SC_AddBoolConfigParam(11, TEXT("UseDetailAlpha"), CPP_PROPERTY_LOCAL(UseDetailAlpha), 1);
	SC_AddBoolConfigParam(10, TEXT("DetailClipping"), CPP_PROPERTY_LOCAL(DetailClipping), 0);
	SC_AddBoolConfigParam(9,  TEXT("ColorizeDetailTextures"), CPP_PROPERTY_LOCAL(ColorizeDetailTextures), 0);
	SC_AddBoolConfigParam(8,  TEXT("SinglePassFog"), CPP_PROPERTY_LOCAL(SinglePassFog), 0);
	SC_AddBoolConfigParam(7,  TEXT("SinglePassDetail"), CPP_PROPERTY_LOCAL_DCV(SinglePassDetail), 0);
	SC_AddBoolConfigParam(6,  TEXT("BufferClippedActorTris"), CPP_PROPERTY_LOCAL(BufferClippedActorTris), 1);
	SC_AddBoolConfigParam(5,  TEXT("BufferTileQuads"), CPP_PROPERTY_LOCAL(BufferTileQuads), 1);
	SC_AddBoolConfigParam(4,  TEXT("UseSSE"), CPP_PROPERTY_LOCAL(UseSSE), 1);
	SC_AddBoolConfigParam(3,  TEXT("UseSSE2"), CPP_PROPERTY_LOCAL(UseSSE2), 1);
	SC_AddBoolConfigParam(2,  TEXT("UseTexIdPool"), CPP_PROPERTY_LOCAL(UseTexIdPool), 1);
	SC_AddBoolConfigParam(1,  TEXT("UseTexPool"), CPP_PROPERTY_LOCAL(UseTexPool), 1);
	SC_AddBoolConfigParam(0,  TEXT("CacheStaticMaps"), CPP_PROPERTY_LOCAL(CacheStaticMaps), 0);
	SC_AddIntConfigParam(TEXT("DynamicTexIdRecycleLevel"), CPP_PROPERTY_LOCAL(DynamicTexIdRecycleLevel), 100);
	SC_AddBoolConfigParam(2,  TEXT("TexDXT1ToDXT3"), CPP_PROPERTY_LOCAL(TexDXT1ToDXT3), 0);
	SC_AddBoolConfigParam(1,  TEXT("UseVertexProgram"), CPP_PROPERTY_LOCAL_DCV(UseVertexProgram), 0);
	SC_AddBoolConfigParam(0,  TEXT("UseFragmentProgram"), CPP_PROPERTY_LOCAL_DCV(UseFragmentProgram), 0);
	SC_AddIntConfigParam(TEXT("SwapInterval"), CPP_PROPERTY_LOCAL(SwapInterval), -1);
	SC_AddIntConfigParam(TEXT("FrameRateLimit"), CPP_PROPERTY_LOCAL(FrameRateLimit), 0);
	SC_AddBoolConfigParam(7,  TEXT("SceneNodeHack"), CPP_PROPERTY_LOCAL(SceneNodeHack), 1);
	SC_AddBoolConfigParam(6,  TEXT("SmoothMaskedTextures"), CPP_PROPERTY_LOCAL(SmoothMaskedTextures), 0);
	SC_AddBoolConfigParam(5,  TEXT("MaskedTextureHack"), CPP_PROPERTY_LOCAL(MaskedTextureHack), 1);
	SC_AddBoolConfigParam(4,  TEXT("UseTripleBuffering"), CPP_PROPERTY_LOCAL(UseTripleBuffering), 0);
	SC_AddBoolConfigParam(3,  TEXT("UsePureDevice"), CPP_PROPERTY_LOCAL(UsePureDevice), 0);
	SC_AddBoolConfigParam(2,  TEXT("UseSoftwareVertexProcessing"), CPP_PROPERTY_LOCAL(UseSoftwareVertexProcessing), 0);
	SC_AddBoolConfigParam(1,  TEXT("RequestHighResolutionZ"), CPP_PROPERTY_LOCAL(RequestHighResolutionZ), 1);
	SC_AddBoolConfigParam(0,  TEXT("UseAA"), CPP_PROPERTY_LOCAL(UseAA), 0);
	SC_AddIntConfigParam(TEXT("NumAASamples"), CPP_PROPERTY_LOCAL(NumAASamples), 0);
	SC_AddBoolConfigParam(1,  TEXT("NoAATiles"), CPP_PROPERTY_LOCAL(NoAATiles), 0);
	SC_AddBoolConfigParam(0,  TEXT("ZRangeHack"), CPP_PROPERTY_LOCAL(ZRangeHack), 0);

#undef CPP_PROPERTY_LOCAL
#undef CPP_PROPERTY_LOCAL_DCV

	//Driver flags
	SpanBased				= 0;
	SupportsFogMaps			= 1;
#ifdef UTGLR_RUNE_BUILD
	SupportsDistanceFog		= 1;
#else
	SupportsDistanceFog		= 0;
#endif
	FullscreenOnly			= 0;

	SupportsLazyTextures	= 0;
	PrefersDeferredLoad		= 0;

	//Mark device pointers as invalid
	m_d3d9 = NULL;
	m_d3dDevice = NULL;

	//Invalidate fixed texture ids
	m_pNoTexObj = NULL;
	m_pAlphaTexObj = NULL;

	//Mark all vertex buffer objects as invalid
	m_d3dVertexColorBuffer = NULL;
	m_d3dSecondaryColorBuffer = NULL;
	for (u = 0; u < MAX_TMUNITS; u++) {
		m_d3dTexCoordBuffer[u] = NULL;
	}

	//Mark all vertex declarations as not created
	m_oneColorVertexDecl = NULL;
	for (u = 0; u < MAX_TMUNITS; u++) {
		m_standardNTextureVertexDecl[u] = NULL;
	}
	m_twoColorSingleTextureVertexDecl = NULL;

	//Mark all vertex shader definitions as not created
	m_vpDefaultRenderingState = NULL;
	m_vpDefaultRenderingStateWithFog = NULL;
#ifdef UTGLR_RUNE_BUILD
	m_vpDefaultRenderingStateWithLinearFog = NULL;
#endif
	for (u = 0; u < MAX_TMUNITS; u++) {
		m_vpComplexSurface[u] = NULL;
	}
	m_vpComplexSurfaceDetailAlpha = NULL;
	m_vpComplexSurfaceSingleTextureAndDetailTexture = NULL;
	m_vpComplexSurfaceDualTextureAndDetailTexture = NULL;
	m_vpComplexSurfaceSingleTextureWithPos = NULL;
	m_vpComplexSurfaceDualTextureWithPos = NULL;
	m_vpComplexSurfaceTripleTextureWithPos = NULL;

	//Mark all fragment shader definitions as not created
	m_fpDefaultRenderingState = NULL;
	m_fpDefaultRenderingStateWithFog = NULL;
#ifdef UTGLR_RUNE_BUILD
	m_fpDefaultRenderingStateWithLinearFog = NULL;
#endif
	m_fpComplexSurfaceSingleTexture = NULL;
	m_fpComplexSurfaceDualTextureModulated = NULL;
	m_fpComplexSurfaceDualTextureModulated2X = NULL;
	m_fpComplexSurfaceSingleTextureWithFog = NULL;
	m_fpComplexSurfaceDualTextureModulatedWithFog = NULL;
	m_fpComplexSurfaceDualTextureModulated2XWithFog = NULL;
	m_fpDetailTexture = NULL;
	m_fpDetailTextureTwoLayer = NULL;
	m_fpSingleTextureAndDetailTexture = NULL;
	m_fpSingleTextureAndDetailTextureTwoLayer = NULL;
	m_fpDualTextureAndDetailTexture = NULL;
	m_fpDualTextureAndDetailTextureTwoLayer = NULL;

	//Reset TMUnits in case resource cleanup code is ever called before this is initialized
	TMUnits = 0;

	//Clear the SetRes is device reset flag
	m_SetRes_isDeviceReset = false;

	unguard;
}


void UD3D9RenderDevice::SC_AddBoolConfigParam(DWORD BitMaskOffset, const TCHAR *pName, UBOOL &param, ECppProperty EC_CppProperty, INT InOffset, UBOOL defaultValue) {
	param = (((defaultValue) != 0) ? 1 : 0) << BitMaskOffset; //Doesn't exactly work like a UBOOL "// Boolean 0 (false) or 1 (true)."
	new(GetClass(), pName, RF_Public)UBoolProperty(EC_CppProperty, InOffset, TEXT("Options"), CPF_Config);
}

void UD3D9RenderDevice::SC_AddIntConfigParam(const TCHAR *pName, INT &param, ECppProperty EC_CppProperty, INT InOffset, INT defaultValue) {
	param = defaultValue;
	new(GetClass(), pName, RF_Public)UIntProperty(EC_CppProperty, InOffset, TEXT("Options"), CPF_Config);
}

void UD3D9RenderDevice::SC_AddFloatConfigParam(const TCHAR *pName, FLOAT &param, ECppProperty EC_CppProperty, INT InOffset, FLOAT defaultValue) {
	param = defaultValue;
	new(GetClass(), pName, RF_Public)UFloatProperty(EC_CppProperty, InOffset, TEXT("Options"), CPF_Config);
}


void UD3D9RenderDevice::DbgPrintInitParam(const TCHAR *pName, INT value) {
	dout << TEXT("utd3d9r: ") << pName << TEXT(" = ") << value << std::endl;
	return;
}

void UD3D9RenderDevice::DbgPrintInitParam(const TCHAR *pName, FLOAT value) {
	dout << TEXT("utd3d9r: ") << pName << TEXT(" = ") << value << std::endl;
	return;
}


#ifdef UTGLR_INCLUDE_SSE_CODE
bool UD3D9RenderDevice::CPU_DetectCPUID(void) {
	//Check for cpuid instruction support
	__try {
		__asm {
			//CPUID function 0
			xor eax, eax
			cpuid
		}
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		return false;
	}

	return true;
}

bool UD3D9RenderDevice::CPU_DetectSSE(void) {
	bool bSupportsSSE;

	//Check for cpuid instruction support
	if (CPU_DetectCPUID() != true) {
		return false;
	}

	//Check for SSE support
	bSupportsSSE = false;
	__asm {
		//CPUID function 1
		mov eax, 1
		cpuid

		//Check the SSE bit
		test edx, 0x02000000
		jz l_no_sse

		//Set bSupportsSSE to true
		mov bSupportsSSE, 1

l_no_sse:
	}

	//Return if no CPU SSE support
	if (bSupportsSSE == false) {
		return bSupportsSSE;
	}

	//Check for SSE OS support
	__try {
		__asm {
			//Execute SSE instruction
			xorps xmm0, xmm0
		}
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		//Clear SSE support flag
		bSupportsSSE = false;
	}

	return bSupportsSSE;
}

bool UD3D9RenderDevice::CPU_DetectSSE2(void) {
	bool bSupportsSSE2;

	//Check for cpuid instruction support
	if (CPU_DetectCPUID() != true) {
		return false;
	}

	//Check for SSE2 support
	bSupportsSSE2 = false;
	__asm {
		//CPUID function 1
		mov eax, 1
		cpuid

		//Check the SSE2 bit
		test edx, 0x04000000
		jz l_no_sse2

		//Set bSupportsSSE2 to true
		mov bSupportsSSE2, 1

l_no_sse2:
	}

	//Return if no CPU SSE2 support
	if (bSupportsSSE2 == false) {
		return bSupportsSSE2;
	}

	//Check for SSE2 OS support
	__try {
		__asm {
			//Execute SSE2 instruction
			xorpd xmm0, xmm0
		}
	}
	__except (EXCEPTION_EXECUTE_HANDLER) {
		//Clear SSE2 support flag
		bSupportsSSE2 = false;
	}

	return bSupportsSSE2;
}
#endif //UTGLR_INCLUDE_SSE_CODE


static void FASTCALL Buffer3Verts(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	FGLTexCoord *pTexCoordArray = &pRD->m_pTexCoordArray[0][pRD->BufferedVerts];
	FGLVertexColor *pVertexColorArray = &pRD->m_pVertexColorArray[pRD->BufferedVerts];
	FGLSecondaryColor *pSecondaryColorArray = &pRD->m_pSecondaryColorArray[pRD->BufferedVerts];
	pRD->BufferedVerts += 3;
	for (INT i = 0; i < 3; i++) {
		const FTransTexture* P = *Pts++;

		pTexCoordArray->u = P->U * pRD->TexInfo[0].UMult;
		pTexCoordArray->v = P->V * pRD->TexInfo[0].VMult;
		pTexCoordArray++;

		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		if (pRD->m_requestedColorFlags & CF_FOG_MODE) {
			FLOAT f255_Times_One_Minus_FogW = 255.0f * (1.0f - P->Fog.W);

			pVertexColorArray->color = UD3D9RenderDevice::FPlaneTo_BGRScaled_A255(&P->Light, f255_Times_One_Minus_FogW);

			pSecondaryColorArray->specular = UD3D9RenderDevice::FPlaneTo_BGR_A0(&P->Fog);
			pSecondaryColorArray++;
		}
		else if (pRD->m_requestedColorFlags & CF_COLOR_ARRAY) {
#ifdef UTGLR_RUNE_BUILD
			pVertexColorArray->color = UD3D9RenderDevice::FPlaneTo_BGR_Aub(&P->Light, pRD->m_gpAlpha);
#else
			pVertexColorArray->color = UD3D9RenderDevice::FPlaneTo_BGR_A255(&P->Light);
#endif
		}
		else {
			pVertexColorArray->color = 0xFFFFFFFF;
		}
		pVertexColorArray++;
	}
}

static void FASTCALL Buffer3BasicVerts(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	FGLTexCoord *pTexCoordArray = &pRD->m_pTexCoordArray[0][pRD->BufferedVerts];
	FGLVertexColor *pVertexColorArray = &pRD->m_pVertexColorArray[pRD->BufferedVerts];
	pRD->BufferedVerts += 3;
	FLOAT UMult = pRD->TexInfo[0].UMult;
	FLOAT VMult = pRD->TexInfo[0].VMult;
	for (INT i = 0; i < 3; i++) {
		const FTransTexture* P = *Pts++;

		pTexCoordArray->u = P->U * UMult;
		pTexCoordArray->v = P->V * VMult;
		pTexCoordArray++;

		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		pVertexColorArray->color = 0xFFFFFFFF;
		pVertexColorArray++;
	}
}

static void FASTCALL Buffer3ColoredVerts(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	FGLTexCoord *pTexCoordArray = &pRD->m_pTexCoordArray[0][pRD->BufferedVerts];
	FGLVertexColor *pVertexColorArray = &pRD->m_pVertexColorArray[pRD->BufferedVerts];
	pRD->BufferedVerts += 3;
	for (INT i = 0; i < 3; i++) {
		const FTransTexture* P = *Pts++;

		pTexCoordArray->u = P->U * pRD->TexInfo[0].UMult;
		pTexCoordArray->v = P->V * pRD->TexInfo[0].VMult;
		pTexCoordArray++;

		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		pVertexColorArray->color = UD3D9RenderDevice::FPlaneTo_BGR_A255(&P->Light);
		pVertexColorArray++;
	}
}

#ifdef UTGLR_INCLUDE_SSE_CODE
__declspec(naked) static void FASTCALL Buffer3ColoredVerts_SSE(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	static float f255 = 255.0f;
	__asm {
		//pRD is in ecx
		//Pts is in edx

		push ebx
		push esi
		push edi

		mov eax, [ecx]UD3D9RenderDevice.BufferedVerts

		lea ebx, [eax*8]
		add ebx, [ecx]UD3D9RenderDevice.m_pTexCoordArray[0]

		mov edi, eax
		shl edi, 4
		add edi, [ecx]UD3D9RenderDevice.m_pVertexColorArray

		//BufferedVerts += 3
		add eax, 3
		mov [ecx]UD3D9RenderDevice.BufferedVerts, eax

		lea eax, [ecx]UD3D9RenderDevice.TexInfo
		movss xmm0, [eax]FTexInfo.UMult
		movss xmm1, [eax]FTexInfo.VMult
		movss xmm2, f255

		//Pts in edx
		//Get PtsPlus12B
		lea esi, [edx + 12]

v_loop:
			mov eax, [edx]
			add edx, 4

			movss xmm3, [eax]FTransTexture.U
			mulss xmm3, xmm0
			movss [ebx]FGLTexCoord.u, xmm3
			movss xmm3, [eax]FTransTexture.V
			mulss xmm3, xmm1
			movss [ebx]FGLTexCoord.v, xmm3
			add ebx, TYPE FGLTexCoord

			mov ecx, [eax]FOutVector.Point.X
			mov [edi]FGLVertexColor.x, ecx
			mov ecx, [eax]FOutVector.Point.Y
			mov [edi]FGLVertexColor.y, ecx
			mov ecx, [eax]FOutVector.Point.Z
			mov [edi]FGLVertexColor.z, ecx

			movss xmm3, [eax]FTransSample.Light + 0
			mulss xmm3, xmm2
			movss xmm4, [eax]FTransSample.Light + 4
			mulss xmm4, xmm2
			movss xmm5, [eax]FTransSample.Light + 8
			mulss xmm5, xmm2
			cvtss2si eax, xmm3
			shl eax, 16
			cvtss2si ecx, xmm4
			and ecx, 255
			shl ecx, 8
			or eax, ecx
			cvtss2si ecx, xmm5
			and ecx, 255
			or ecx, 0xFF000000
			or eax, ecx
			mov [edi]FGLVertexColor.color, eax
			add edi, TYPE FGLVertexColor

			cmp edx, esi
			jne v_loop

		pop edi
		pop esi
		pop ebx

		ret
	}
}

__declspec(naked) static void FASTCALL Buffer3ColoredVerts_SSE2(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	static __m128 fColorMul = { 255.0f, 255.0f, 255.0f, 0.0f };
	static DWORD alphaOr = 0xFF000000;
	__asm {
		//pRD is in ecx
		//Pts is in edx

		push ebx
		push esi
		push edi

		mov eax, [ecx]UD3D9RenderDevice.BufferedVerts

		lea ebx, [eax*8]
		add ebx, [ecx]UD3D9RenderDevice.m_pTexCoordArray[0]

		mov edi, eax
		shl edi, 4
		add edi, [ecx]UD3D9RenderDevice.m_pVertexColorArray

		//BufferedVerts += 3
		add eax, 3
		mov [ecx]UD3D9RenderDevice.BufferedVerts, eax

		lea eax, [ecx]UD3D9RenderDevice.TexInfo
		movss xmm0, [eax]FTexInfo.UMult
		movss xmm1, [eax]FTexInfo.VMult
		movaps xmm2, fColorMul
		movd xmm3, alphaOr

		//Pts in edx
		//Get PtsPlus12B
		lea esi, [edx + 12]

v_loop:
			mov eax, [edx]
			add edx, 4

			movss xmm4, [eax]FTransTexture.U
			mulss xmm4, xmm0
			movss [ebx]FGLTexCoord.u, xmm4
			movss xmm4, [eax]FTransTexture.V
			mulss xmm4, xmm1
			movss [ebx]FGLTexCoord.v, xmm4
			add ebx, TYPE FGLTexCoord

			mov ecx, [eax]FOutVector.Point.X
			mov [edi]FGLVertexColor.x, ecx
			mov ecx, [eax]FOutVector.Point.Y
			mov [edi]FGLVertexColor.y, ecx
			mov ecx, [eax]FOutVector.Point.Z
			mov [edi]FGLVertexColor.z, ecx

			movups xmm4, [eax]FTransSample.Light
			shufps xmm4, xmm4, 0xC6
			mulps xmm4, xmm2
			cvtps2dq xmm4, xmm4
			packssdw xmm4, xmm4
			packuswb xmm4, xmm4
			por xmm4, xmm3
			movd [edi]FGLVertexColor.color, xmm4
			add edi, TYPE FGLVertexColor

			cmp edx, esi
			jne v_loop

		pop edi
		pop esi
		pop ebx

		ret
	}
}
#endif

static void FASTCALL Buffer3FoggedVerts(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	FGLTexCoord *pTexCoordArray = &pRD->m_pTexCoordArray[0][pRD->BufferedVerts];
	FGLVertexColor *pVertexColorArray = &pRD->m_pVertexColorArray[pRD->BufferedVerts];
	FGLSecondaryColor *pSecondaryColorArray = &pRD->m_pSecondaryColorArray[pRD->BufferedVerts];
	pRD->BufferedVerts += 3;
	for (INT i = 0; i < 3; i++) {
		const FTransTexture* P = *Pts++;

		pTexCoordArray->u = P->U * pRD->TexInfo[0].UMult;
		pTexCoordArray->v = P->V * pRD->TexInfo[0].VMult;
		pTexCoordArray++;

		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		FLOAT f255_Times_One_Minus_FogW = 255.0f * (1.0f - P->Fog.W);
		pVertexColorArray->color = UD3D9RenderDevice::FPlaneTo_BGRScaled_A255(&P->Light, f255_Times_One_Minus_FogW);
		pVertexColorArray++;

		pSecondaryColorArray->specular = UD3D9RenderDevice::FPlaneTo_BGR_A0(&P->Fog);
		pSecondaryColorArray++;
	}
}

#ifdef UTGLR_INCLUDE_SSE_CODE
__declspec(naked) static void FASTCALL Buffer3FoggedVerts_SSE(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	static float f255 = 255.0f;
	static float f1 = 1.0f;
	__asm {
		//pRD is in ecx
		//Pts is in edx

		push ebx
		push esi
		push edi
		push ebp
		sub esp, 4

		mov eax, [ecx]UD3D9RenderDevice.BufferedVerts

		lea ebx, [eax*8]
		add ebx, [ecx]UD3D9RenderDevice.m_pTexCoordArray[0]

		mov edi, eax
		shl edi, 4
		add edi, [ecx]UD3D9RenderDevice.m_pVertexColorArray

		lea esi, [eax*4]
		add esi, [ecx]UD3D9RenderDevice.m_pSecondaryColorArray

		//BufferedVerts += 3
		add eax, 3
		mov [ecx]UD3D9RenderDevice.BufferedVerts, eax

		lea eax, [ecx]UD3D9RenderDevice.TexInfo
		movss xmm0, [eax]FTexInfo.UMult
		movss xmm1, [eax]FTexInfo.VMult
		movss xmm2, f255

		//Pts in edx
		//Get PtsPlus12B
		lea ebp, [edx + 12]

v_loop:
			mov eax, [edx]
			add edx, 4

			movss xmm3, [eax]FTransTexture.U
			mulss xmm3, xmm0
			movss [ebx]FGLTexCoord.u, xmm3
			movss xmm3, [eax]FTransTexture.V
			mulss xmm3, xmm1
			movss [ebx]FGLTexCoord.v, xmm3
			add ebx, TYPE FGLTexCoord

			mov [esp], ebx

			movss xmm6, f1
			subss xmm6, [eax]FTransSample.Fog + 12
			mulss xmm6, xmm2

			mov ecx, [eax]FOutVector.Point.X
			mov [edi]FGLVertexColor.x, ecx
			mov ecx, [eax]FOutVector.Point.Y
			mov [edi]FGLVertexColor.y, ecx
			mov ecx, [eax]FOutVector.Point.Z
			mov [edi]FGLVertexColor.z, ecx

			movss xmm3, [eax]FTransSample.Light + 0
			mulss xmm3, xmm6
			movss xmm4, [eax]FTransSample.Light + 4
			mulss xmm4, xmm6
			movss xmm5, [eax]FTransSample.Light + 8
			mulss xmm5, xmm6
			cvtss2si ebx, xmm3
			shl ebx, 16
			cvtss2si ecx, xmm4
			and ecx, 255
			shl ecx, 8
			or ebx, ecx
			cvtss2si ecx, xmm5
			and ecx, 255
			or ecx, 0xFF000000
			or ebx, ecx
			mov [edi]FGLVertexColor.color, ebx
			add edi, TYPE FGLVertexColor

			mov ebx, [esp]

			movss xmm3, [eax]FTransSample.Fog + 0
			mulss xmm3, xmm2
			movss xmm4, [eax]FTransSample.Fog + 4
			mulss xmm4, xmm2
			movss xmm5, [eax]FTransSample.Fog + 8
			mulss xmm5, xmm2
			cvtss2si eax, xmm3
			and eax, 255
			shl eax, 16
			cvtss2si ecx, xmm4
			and ecx, 255
			shl ecx, 8
			or eax, ecx
			cvtss2si ecx, xmm5
			and ecx, 255
			or eax, ecx
			mov [esi]FGLSecondaryColor.specular, eax
			add esi, TYPE FGLSecondaryColor

			cmp edx, ebp
			jne v_loop

		add esp, 4
		pop ebp
		pop edi
		pop esi
		pop ebx

		ret
	}
}

__declspec(naked) static void FASTCALL Buffer3FoggedVerts_SSE2(UD3D9RenderDevice *pRD, FTransTexture** Pts) {
	static __m128 fColorMul = { 255.0f, 255.0f, 255.0f, 0.0f };
	static DWORD alphaOr = 0xFF000000;
	static float f1 = 1.0f;
	__asm {
		//pRD is in ecx
		//Pts is in edx

		push ebx
		push esi
		push edi
		push ebp

		mov eax, [ecx]UD3D9RenderDevice.BufferedVerts

		lea ebx, [eax*8]
		add ebx, [ecx]UD3D9RenderDevice.m_pTexCoordArray[0]

		mov edi, eax
		shl edi, 4
		add edi, [ecx]UD3D9RenderDevice.m_pVertexColorArray

		lea esi, [eax*4]
		add esi, [ecx]UD3D9RenderDevice.m_pSecondaryColorArray

		//BufferedVerts += 3
		add eax, 3
		mov [ecx]UD3D9RenderDevice.BufferedVerts, eax

		lea eax, [ecx]UD3D9RenderDevice.TexInfo
		movss xmm0, [eax]FTexInfo.UMult
		movss xmm1, [eax]FTexInfo.VMult
		movaps xmm2, fColorMul
		movd xmm3, alphaOr

		//Pts in edx
		//Get PtsPlus12B
		lea ebp, [edx + 12]

v_loop:
			mov eax, [edx]
			add edx, 4

			movss xmm4, [eax]FTransTexture.U
			mulss xmm4, xmm0
			movss [ebx]FGLTexCoord.u, xmm4
			movss xmm4, [eax]FTransTexture.V
			mulss xmm4, xmm1
			movss [ebx]FGLTexCoord.v, xmm4
			add ebx, TYPE FGLTexCoord

			movss xmm6, f1
			subss xmm6, [eax]FTransSample.Fog + 12
			mulss xmm6, xmm2
			shufps xmm6, xmm6, 0x00

			mov ecx, [eax]FOutVector.Point.X
			mov [edi]FGLVertexColor.x, ecx
			mov ecx, [eax]FOutVector.Point.Y
			mov [edi]FGLVertexColor.y, ecx
			mov ecx, [eax]FOutVector.Point.Z
			mov [edi]FGLVertexColor.z, ecx

			movups xmm4, [eax]FTransSample.Light
			shufps xmm4, xmm4, 0xC6
			mulps xmm4, xmm6
			cvtps2dq xmm4, xmm4
			packssdw xmm4, xmm4
			packuswb xmm4, xmm4
			por xmm4, xmm3
			movd [edi]FGLVertexColor.color, xmm4
			add edi, TYPE FGLVertexColor

			movups xmm4, [eax]FTransSample.Fog
			shufps xmm4, xmm4, 0xC6
			mulps xmm4, xmm2
			cvtps2dq xmm4, xmm4
			packssdw xmm4, xmm4
			packuswb xmm4, xmm4
			movd [esi]FGLSecondaryColor.specular, xmm4
			add esi, TYPE FGLSecondaryColor

			cmp edx, ebp
			jne v_loop

		pop ebp
		pop edi
		pop esi
		pop ebx

		ret
	}
}
#endif


//Must be called with (NumPts > 3)
void UD3D9RenderDevice::BufferAdditionalClippedVerts(FTransTexture** Pts, INT NumPts) {
	INT i;

	i = 3;
	do {
		const FTransTexture* P;
		FGLTexCoord *pTexCoordArray;
		FGLVertexColor *pVertexColorArray;

		P = Pts[0];
		pTexCoordArray = &m_pTexCoordArray[0][BufferedVerts];
		pTexCoordArray->u = P->U * TexInfo[0].UMult;
		pTexCoordArray->v = P->V * TexInfo[0].VMult;
		pVertexColorArray = &m_pVertexColorArray[BufferedVerts];
		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		if (m_requestedColorFlags & CF_FOG_MODE) {
			FLOAT f255_Times_One_Minus_FogW = 255.0f * (1.0f - P->Fog.W);
			pVertexColorArray->color = FPlaneTo_BGRScaled_A255(&P->Light, f255_Times_One_Minus_FogW);

			m_pSecondaryColorArray[BufferedVerts].specular = FPlaneTo_BGR_A0(&P->Fog);
		}
		else if (m_requestedColorFlags & CF_COLOR_ARRAY) {
#ifdef UTGLR_RUNE_BUILD
			pVertexColorArray->color = FPlaneTo_BGR_Aub(&P->Light, m_gpAlpha);
#else
			pVertexColorArray->color = FPlaneTo_BGR_A255(&P->Light);
#endif
		}
		else {
			pVertexColorArray->color = 0xFFFFFFFF;
		}
		BufferedVerts++;

		P = Pts[i - 1];
		pTexCoordArray = &m_pTexCoordArray[0][BufferedVerts];
		pTexCoordArray->u = P->U * TexInfo[0].UMult;
		pTexCoordArray->v = P->V * TexInfo[0].VMult;
		pVertexColorArray = &m_pVertexColorArray[BufferedVerts];
		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		if (m_requestedColorFlags & CF_FOG_MODE) {
			FLOAT f255_Times_One_Minus_FogW = 255.0f * (1.0f - P->Fog.W);
			pVertexColorArray->color = FPlaneTo_BGRScaled_A255(&P->Light, f255_Times_One_Minus_FogW);

			m_pSecondaryColorArray[BufferedVerts].specular = FPlaneTo_BGR_A0(&P->Fog);
		}
		else if (m_requestedColorFlags & CF_COLOR_ARRAY) {
#ifdef UTGLR_RUNE_BUILD
			pVertexColorArray->color = FPlaneTo_BGR_Aub(&P->Light, m_gpAlpha);
#else
			pVertexColorArray->color = FPlaneTo_BGR_A255(&P->Light);
#endif
		}
		else {
			pVertexColorArray->color = 0xFFFFFFFF;
		}
		BufferedVerts++;

		P = Pts[i];
		pTexCoordArray = &m_pTexCoordArray[0][BufferedVerts];
		pTexCoordArray->u = P->U * TexInfo[0].UMult;
		pTexCoordArray->v = P->V * TexInfo[0].VMult;
		pVertexColorArray = &m_pVertexColorArray[BufferedVerts];
		pVertexColorArray->x = P->Point.X;
		pVertexColorArray->y = P->Point.Y;
		pVertexColorArray->z = P->Point.Z;
		if (m_requestedColorFlags & CF_FOG_MODE) {
			FLOAT f255_Times_One_Minus_FogW = 255.0f * (1.0f - P->Fog.W);
			pVertexColorArray->color = FPlaneTo_BGRScaled_A255(&P->Light, f255_Times_One_Minus_FogW);

			m_pSecondaryColorArray[BufferedVerts].specular = FPlaneTo_BGR_A0(&P->Fog);
		}
		else if (m_requestedColorFlags & CF_COLOR_ARRAY) {
#ifdef UTGLR_RUNE_BUILD
			pVertexColorArray->color = FPlaneTo_BGR_Aub(&P->Light, m_gpAlpha);
#else
			pVertexColorArray->color = FPlaneTo_BGR_A255(&P->Light);
#endif
		}
		else {
			pVertexColorArray->color = 0xFFFFFFFF;
		}
		BufferedVerts++;
	} while (++i < NumPts);

	return;
}


void UD3D9RenderDevice::BuildGammaRamp(float redGamma, float greenGamma, float blueGamma, int brightness, D3DGAMMARAMP &ramp) {
	unsigned int u;

	//Parameter clamping
	if (brightness < -50) brightness = -50;
	if (brightness > 50) brightness = 50;

	float oneOverRedGamma = 1.0f / (2.5f * redGamma);
	float oneOverGreenGamma = 1.0f / (2.5f * greenGamma);
	float oneOverBlueGamma = 1.0f / (2.5f * blueGamma);
	for (u = 0; u < 256; u++) {
		int iVal;
		int iValRed, iValGreen, iValBlue;

		//Initial value
		iVal = u;

		//Brightness
		iVal += brightness;
		//Clamping
		if (iVal < 0) iVal = 0;
		if (iVal > 255) iVal = 255;

		//Gamma
		iValRed = (int)appRound((float)appPow(iVal / 255.0f, oneOverRedGamma) * 65535.0f);
		iValGreen = (int)appRound((float)appPow(iVal / 255.0f, oneOverGreenGamma) * 65535.0f);
		iValBlue = (int)appRound((float)appPow(iVal / 255.0f, oneOverBlueGamma) * 65535.0f);

		//Save results
		ramp.red[u] = (_WORD)iValRed;
		ramp.green[u] = (_WORD)iValGreen;
		ramp.blue[u] = (_WORD)iValBlue;
	}

	return;
}

void UD3D9RenderDevice::BuildGammaRamp(float redGamma, float greenGamma, float blueGamma, int brightness, FByteGammaRamp &ramp) {
	unsigned int u;

	//Parameter clamping
	if (brightness < -50) brightness = -50;
	if (brightness > 50) brightness = 50;

	float oneOverRedGamma = 1.0f / (2.5f * redGamma);
	float oneOverGreenGamma = 1.0f / (2.5f * greenGamma);
	float oneOverBlueGamma = 1.0f / (2.5f * blueGamma);
	for (u = 0; u < 256; u++) {
		int iVal;
		int iValRed, iValGreen, iValBlue;

		//Initial value
		iVal = u;

		//Brightness
		iVal += brightness;
		//Clamping
		if (iVal < 0) iVal = 0;
		if (iVal > 255) iVal = 255;

		//Gamma
		iValRed = (int)appRound((float)appPow(iVal / 255.0f, oneOverRedGamma) * 255.0f);
		iValGreen = (int)appRound((float)appPow(iVal / 255.0f, oneOverGreenGamma) * 255.0f);
		iValBlue = (int)appRound((float)appPow(iVal / 255.0f, oneOverBlueGamma) * 255.0f);

		//Save results
		ramp.red[u] = (BYTE)iValRed;
		ramp.green[u] = (BYTE)iValGreen;
		ramp.blue[u] = (BYTE)iValBlue;
	}

	return;
}

void UD3D9RenderDevice::SetGamma(FLOAT GammaCorrection) {
//	FGammaRamp gammaRamp;
	D3DGAMMARAMP gammaRamp;

	GammaCorrection += GammaOffset;

	//Do not attempt to set gamma if <= zero
	if (GammaCorrection <= 0.0f) {
		return;
	}

	BuildGammaRamp(GammaCorrection + GammaOffsetRed, GammaCorrection + GammaOffsetGreen, GammaCorrection + GammaOffsetBlue, Brightness, gammaRamp);

/*	if (g_gammaFirstTime) {
		if (GetDeviceGammaRamp(m_hDC, &g_originalGammaRamp)) {
			g_haveOriginalGammaRamp = true;
		}
		g_gammaFirstTime = false;
	}*/

	m_setGammaRampSucceeded = false;
//	if (SetDeviceGammaRamp(m_hDC, &gammaRamp)) {
	m_d3dDevice->SetGammaRamp(0, D3DSGR_NO_CALIBRATION, &gammaRamp);
	if (1) {
		m_setGammaRampSucceeded = true;
		SavedGammaCorrection = GammaCorrection;
	}

	return;
}

void UD3D9RenderDevice::ResetGamma(void) {
/*	//Restore gamma ramp if original was successfully saved
	if (g_haveOriginalGammaRamp) {
		HWND hDesktopWnd;
		HDC hDC;

		hDesktopWnd = GetDesktopWindow();
		hDC = GetDC(hDesktopWnd);

		// vogel: grrr, UClient::destroy is called before this gets called so hDC is invalid
		SetDeviceGammaRamp(hDC, &g_originalGammaRamp);

		ReleaseDC(hDesktopWnd, hDC);
	}*/

	return;
}



UBOOL UD3D9RenderDevice::FailedInitf(const TCHAR* Fmt, ...) {
	TCHAR TempStr[4096];
	GET_VARARGS(TempStr, ARRAY_COUNT(TempStr), Fmt);
	debugf(NAME_Init, TempStr);
	Exit();
	return 0;
}

void UD3D9RenderDevice::Exit() {
	guard(UD3D9RenderDevice::Exit);
	check(NumDevices > 0);

	//Shutdown D3D
	if (m_d3d9) {
		UnsetRes();
	}

	//Reset gamma ramp
	ResetGamma();

	//Shut down global D3D
	if (--NumDevices == 0) {
#if 0
		//Free modules
		if (hModuleD3d9) {
			verify(FreeLibrary(hModuleD3d9));
			hModuleD3d9 = NULL;
		}
#endif
	}

	unguard;
}

void UD3D9RenderDevice::ShutdownAfterError() {
	guard(UD3D9RenderDevice::ShutdownAfterError);

	debugf(NAME_Exit, TEXT("UD3D9RenderDevice::ShutdownAfterError"));

	if (DebugBit(DEBUG_BIT_BASIC)) {
		dout << TEXT("utd3d9r: ShutdownAfterError") << std::endl;
	}

	//ChangeDisplaySettings(NULL, 0);

	//Reset gamma ramp
	ResetGamma();

	unguard;
}


UBOOL UD3D9RenderDevice::SetRes(INT NewX, INT NewY, INT NewColorBytes, UBOOL Fullscreen) {
	guard(UD3D9RenderDevice::SetRes);

	HRESULT hResult;
	bool saved_SetRes_isDeviceReset;

	//Get debug bits
	{
		INT i = 0;
		if (!GConfig->GetInt(g_pSection, TEXT("DebugBits"), i)) i = 0;
		m_debugBits = i;
	}
	//Display debug bits
	if (DebugBit(DEBUG_BIT_ANY)) dout << TEXT("utd3d9r: DebugBits = ") << m_debugBits << std::endl;


	debugf(TEXT("Enter SetRes()"));

	//Save parameters in case need to reset device
	m_SetRes_NewX = NewX;
	m_SetRes_NewY = NewY;
	m_SetRes_NewColorBytes = NewColorBytes;
	m_SetRes_Fullscreen = Fullscreen;

	//Save copy of SetRes is device reset flag
	saved_SetRes_isDeviceReset = m_SetRes_isDeviceReset;
	//Reset SetRes is device reset flag
	m_SetRes_isDeviceReset = false;

	// If not fullscreen, and color bytes hasn't changed, do nothing.
	//If SetRes called due to device reset, do full destroy/recreate
	if (m_d3dDevice && !saved_SetRes_isDeviceReset && !Fullscreen && !WasFullscreen && (NewColorBytes == Viewport->ColorBytes)) {
		//Resize viewport
		if (!Viewport->ResizeViewport(BLIT_HardwarePaint | BLIT_Direct3D, NewX, NewY, NewColorBytes)) {
			return 0;
		}

		//Free old resources if they exist
		FreePermanentResources();

		//Get real viewport size
		NewX = Viewport->SizeX;
		NewY = Viewport->SizeY;

		//Don't break editor and tiny windowed mode
		if (NewX < 16) NewX = 16;
		if (NewY < 16) NewY = 16;

		//Set new size
		m_d3dpp.BackBufferWidth = NewX;
		m_d3dpp.BackBufferHeight = NewY;

		//Reset device
		hResult = m_d3dDevice->Reset(&m_d3dpp);
		if (FAILED(hResult)) {
			appErrorf(TEXT("Failed to create D3D device for new window size"));
		}

		//Initialize permanent rendering state, including allocation of some resources
		InitPermanentResourcesAndRenderingState();

		//Set viewport
		D3DVIEWPORT9 d3dViewport;
		d3dViewport.X = 0;
		d3dViewport.Y = 0;
		d3dViewport.Width = NewX;
		d3dViewport.Height = NewY;
		d3dViewport.MinZ = 0.0f;
		d3dViewport.MaxZ = 1.0f;
		m_d3dDevice->SetViewport(&d3dViewport);

		return 1;
	}


	// Exit res.
	if (m_d3d9) {
		debugf(TEXT("UnSetRes() -> m_d3d9 != NULL"));
		UnsetRes();
	}

	//Search for closest resolution match if fullscreen requested
	//No longer changing resolution here
	if (Fullscreen) {
		INT FindX = NewX, FindY = NewY, BestError = MAXINT;
		for (INT i = 0; i < Modes.Num(); i++) {
			if (Modes(i).Z==NewColorBytes*8) {
				INT Error
				=	(Modes(i).X-FindX)*(Modes(i).X-FindX)
				+	(Modes(i).Y-FindY)*(Modes(i).Y-FindY);
				if (Error < BestError) {
					NewX      = Modes(i).X;
					NewY      = Modes(i).Y;
					BestError = Error;
				}
			}
		}
	}

	// Change window size.
	UBOOL Result = Viewport->ResizeViewport(Fullscreen ? (BLIT_Fullscreen | BLIT_Direct3D) : (BLIT_HardwarePaint | BLIT_Direct3D), NewX, NewY, NewColorBytes);
	if (!Result) {
		return 0;
	}


	//Create main D3D9 object
	m_d3d9 = pDirect3DCreate9(D3D_SDK_VERSION);
	if (!m_d3d9) {
		appErrorf(TEXT("Direct3DCreate8 failed"));
	}


	//Get D3D caps
	hResult = m_d3d9->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &m_d3dCaps);
	if (FAILED(hResult)) {
		appErrorf(TEXT("GetDeviceCaps failed"));
	}


	//Create D3D device

	//Get current display mode
	D3DDISPLAYMODE d3ddm;
	hResult = m_d3d9->GetAdapterDisplayMode(D3DADAPTER_DEFAULT, &d3ddm);
	if (FAILED(hResult)) {
		appErrorf(TEXT("Failed to get current display mode"));
	}

	//Check if SetRes device reset
	//If so, get current bit depth
	//But don't check if was fullscreen
	if (saved_SetRes_isDeviceReset && !Fullscreen) {
		switch (d3ddm.Format) {
		case D3DFMT_R5G6B5: NewColorBytes = 2; break;
		case D3DFMT_X1R5G5B5: NewColorBytes = 2; break;
		case D3DFMT_A1R5G5B5: NewColorBytes = 2; break;
		default:
			NewColorBytes = 4;
		}
	}
	//Update saved NewColorBytes
	m_SetRes_NewColorBytes = NewColorBytes;

	//Don't break editor and tiny windowed mode
	if (NewX < 16) NewX = 16;
	if (NewY < 16) NewY = 16;

	//Set presentation parameters
	appMemzero(&m_d3dpp, sizeof(m_d3dpp));
	m_d3dpp.Windowed = TRUE;
	m_d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
	m_d3dpp.BackBufferWidth = NewX;
	m_d3dpp.BackBufferHeight = NewY;
	m_d3dpp.BackBufferFormat = d3ddm.Format;
	m_d3dpp.EnableAutoDepthStencil = TRUE;

	//Check if should be full screen
	if (Fullscreen) {
		m_d3dpp.Windowed = FALSE;
		m_d3dpp.BackBufferFormat = (NewColorBytes <= 2) ? D3DFMT_R5G6B5 : D3DFMT_X8R8G8B8;
	}

	//Choose initial depth buffer format
	if ((NewColorBytes <= 2) && !RequestHighResolutionZ) {
		m_d3dpp.AutoDepthStencilFormat = D3DFMT_D16;
		m_numDepthBits = 16;
	}
	else {
		m_d3dpp.AutoDepthStencilFormat = D3DFMT_D32;
		m_numDepthBits = 32;
	}

	//Reduce depth buffer format if necessary based on what's supported
	if (m_d3dpp.AutoDepthStencilFormat == D3DFMT_D32) {
		if (!CheckDepthFormat(d3ddm.Format, m_d3dpp.BackBufferFormat, D3DFMT_D32)) {
			m_d3dpp.AutoDepthStencilFormat = D3DFMT_D24X8;
			m_numDepthBits = 24;
		}
	}
	if (m_d3dpp.AutoDepthStencilFormat == D3DFMT_D24X8) {
		if (!CheckDepthFormat(d3ddm.Format, m_d3dpp.BackBufferFormat, D3DFMT_D24X8)) {
			m_d3dpp.AutoDepthStencilFormat = D3DFMT_D16;
			m_numDepthBits = 16;
		}
	}

	m_usingAA = false;
	m_curAAEnable = true;
	m_defAAEnable = true;

	//Select AA mode
	if (UseAA) {
		D3DMULTISAMPLE_TYPE MultiSampleType;

		switch (NumAASamples) {
		case  0: MultiSampleType = D3DMULTISAMPLE_NONE; break;
		case  1: MultiSampleType = D3DMULTISAMPLE_NONE; break;
		case  2: MultiSampleType = D3DMULTISAMPLE_2_SAMPLES; break;
		case  3: MultiSampleType = D3DMULTISAMPLE_3_SAMPLES; break;
		case  4: MultiSampleType = D3DMULTISAMPLE_4_SAMPLES; break;
		case  5: MultiSampleType = D3DMULTISAMPLE_5_SAMPLES; break;
		case  6: MultiSampleType = D3DMULTISAMPLE_6_SAMPLES; break;
		case  7: MultiSampleType = D3DMULTISAMPLE_7_SAMPLES; break;
		case  8: MultiSampleType = D3DMULTISAMPLE_8_SAMPLES; break;
		case  9: MultiSampleType = D3DMULTISAMPLE_9_SAMPLES; break;
		case 10: MultiSampleType = D3DMULTISAMPLE_10_SAMPLES; break;
		case 11: MultiSampleType = D3DMULTISAMPLE_11_SAMPLES; break;
		case 12: MultiSampleType = D3DMULTISAMPLE_12_SAMPLES; break;
		case 13: MultiSampleType = D3DMULTISAMPLE_13_SAMPLES; break;
		case 14: MultiSampleType = D3DMULTISAMPLE_14_SAMPLES; break;
		case 15: MultiSampleType = D3DMULTISAMPLE_15_SAMPLES; break;
		case 16: MultiSampleType = D3DMULTISAMPLE_16_SAMPLES; break;
		default:
			MultiSampleType = D3DMULTISAMPLE_NONE;
		}
		m_d3dpp.MultiSampleType = MultiSampleType;

		hResult = m_d3d9->CheckDeviceMultiSampleType(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_d3dpp.BackBufferFormat, m_d3dpp.Windowed, m_d3dpp.MultiSampleType, NULL);
		if (FAILED(hResult)) {
			m_d3dpp.MultiSampleType = D3DMULTISAMPLE_NONE;
		}
		hResult = m_d3d9->CheckDeviceMultiSampleType(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_d3dpp.AutoDepthStencilFormat, m_d3dpp.Windowed, m_d3dpp.MultiSampleType, NULL);
		if (FAILED(hResult)) {
			m_d3dpp.MultiSampleType = D3DMULTISAMPLE_NONE;
		}

		if (m_d3dpp.MultiSampleType != D3DMULTISAMPLE_NONE) {
			m_usingAA = true;
		}
	}

	//Set swap interval
	if (SwapInterval >= 0) {
		switch (SwapInterval) {
		case 0:
			if (m_d3dCaps.PresentationIntervals & D3DPRESENT_INTERVAL_IMMEDIATE) {
				m_d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_IMMEDIATE;
			}
			break;
		case 1:
			if (m_d3dCaps.PresentationIntervals & D3DPRESENT_INTERVAL_ONE) {
				m_d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_ONE;
			}
			break;
		default:
			;
		}
	}


	//Set increased back buffer count if using triple buffering
	if (UseTripleBuffering) {
		m_d3dpp.BackBufferCount = 2;
	}

	//Set initial HW/SW vertex processing preference
	m_doSoftwareVertexInit = false;
	if (UseSoftwareVertexProcessing) {
		m_doSoftwareVertexInit = true;
	}

	//Try HW vertex init if not forcing SW vertex init
	if (!m_doSoftwareVertexInit) {
		bool tryDefaultRefreshRate = true;
		DWORD behaviorFlags = D3DCREATE_HARDWARE_VERTEXPROCESSING;

		//Check if should use pure device
		if (UsePureDevice && (m_d3dCaps.DevCaps & D3DDEVCAPS_PUREDEVICE)) {
			behaviorFlags |= D3DCREATE_PUREDEVICE;
		}

		//Possibly attempt to set refresh rate if fullscreen
		if (!m_d3dpp.Windowed && (RefreshRate > 0)) {
			//Attempt to create with specific refresh rate
			m_d3dpp.FullScreen_RefreshRateInHz = RefreshRate;
			hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			//Try again if triple buffering failed
			if (FAILED(hResult) && UseTripleBuffering && (m_d3dpp.BackBufferCount != 2)) {
				hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			}
			if (FAILED(hResult)) {
			}
			else {
				tryDefaultRefreshRate = false;
			}
		}

		if (tryDefaultRefreshRate) {
			//Attempt to create with default refresh rate
			m_d3dpp.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
			hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			//Try again if triple buffering failed
			if (FAILED(hResult) && UseTripleBuffering && (m_d3dpp.BackBufferCount != 2)) {
				hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			}
			if (FAILED(hResult)) {
				debugf(NAME_Init, TEXT("Failed to create D3D device with hardware vertex processing"));
				m_doSoftwareVertexInit = true;
			}
		}
	}
	//Try SW vertex init if forced earlier or if HW vertex init failed
	if (m_doSoftwareVertexInit) {
		bool tryDefaultRefreshRate = true;
		DWORD behaviorFlags = D3DCREATE_SOFTWARE_VERTEXPROCESSING;

		//Possibly attempt to set refresh rate if fullscreen
		if (!m_d3dpp.Windowed && (RefreshRate > 0)) {
			//Attempt to create with specific refresh rate
			m_d3dpp.FullScreen_RefreshRateInHz = RefreshRate;
			hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			//Try again if triple buffering failed
			if (FAILED(hResult) && UseTripleBuffering && (m_d3dpp.BackBufferCount != 2)) {
				hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			}
			if (FAILED(hResult)) {
			}
			else {
				tryDefaultRefreshRate = false;
			}
		}

		if (tryDefaultRefreshRate) {
			//Attempt to create with default refresh rate
			m_d3dpp.FullScreen_RefreshRateInHz = D3DPRESENT_RATE_DEFAULT;
			hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			//Try again if triple buffering failed
			if (FAILED(hResult) && UseTripleBuffering && (m_d3dpp.BackBufferCount != 2)) {
				hResult = m_d3d9->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hWnd, behaviorFlags, &m_d3dpp, &m_d3dDevice);
			}
			if (FAILED(hResult)) {
				appErrorf(TEXT("Failed to create D3D device"));
			}
		}
	}


	//Reset previous SwapBuffers status to okay
	m_prevSwapBuffersStatus = true;

	//Display depth buffer bit depth
	debugf(NAME_Init, TEXT("Depth bits: %u"), m_numDepthBits);

	//Get other defaults
	if (!GConfig->GetInt(g_pSection, TEXT("Brightness"), Brightness)) Brightness = 0;

	//Debug parameter listing
	if (DebugBit(DEBUG_BIT_BASIC)) {
		#define UTGLR_DEBUG_SHOW_PARAM_REG(_name) DbgPrintInitParam(TEXT(#_name), _name)
		#define UTGLR_DEBUG_SHOW_PARAM_DCV(_name) DbgPrintInitParam(TEXT(#_name), DCV._name)

		UTGLR_DEBUG_SHOW_PARAM_REG(LODBias);
		UTGLR_DEBUG_SHOW_PARAM_REG(GammaOffset);
		UTGLR_DEBUG_SHOW_PARAM_REG(GammaOffsetRed);
		UTGLR_DEBUG_SHOW_PARAM_REG(GammaOffsetGreen);
		UTGLR_DEBUG_SHOW_PARAM_REG(GammaOffsetBlue);
		UTGLR_DEBUG_SHOW_PARAM_REG(Brightness);
		UTGLR_DEBUG_SHOW_PARAM_REG(GammaCorrectScreenshots);
		UTGLR_DEBUG_SHOW_PARAM_REG(OneXBlending);
		UTGLR_DEBUG_SHOW_PARAM_REG(MaxLogUOverV);
		UTGLR_DEBUG_SHOW_PARAM_REG(MaxLogVOverU);
		UTGLR_DEBUG_SHOW_PARAM_REG(MinLogTextureSize);
		UTGLR_DEBUG_SHOW_PARAM_REG(MaxLogTextureSize);
		UTGLR_DEBUG_SHOW_PARAM_REG(MaxAnisotropy);
		UTGLR_DEBUG_SHOW_PARAM_REG(TMUnits);
		UTGLR_DEBUG_SHOW_PARAM_REG(MaxTMUnits);
		UTGLR_DEBUG_SHOW_PARAM_REG(RefreshRate);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseMultiTexture);
		UTGLR_DEBUG_SHOW_PARAM_REG(UsePalette);
		UTGLR_DEBUG_SHOW_PARAM_REG(UsePrecache);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseTrilinear);
//		UTGLR_DEBUG_SHOW_PARAM_REG(UseVertexSpecular);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseAlphaPalette);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseS3TC);
		UTGLR_DEBUG_SHOW_PARAM_REG(Use16BitTextures);
		UTGLR_DEBUG_SHOW_PARAM_REG(Use565Textures);
		UTGLR_DEBUG_SHOW_PARAM_REG(NoFiltering);
		UTGLR_DEBUG_SHOW_PARAM_REG(DetailMax);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseDetailAlpha);
		UTGLR_DEBUG_SHOW_PARAM_REG(DetailClipping);
		UTGLR_DEBUG_SHOW_PARAM_REG(ColorizeDetailTextures);
		UTGLR_DEBUG_SHOW_PARAM_REG(SinglePassFog);
		UTGLR_DEBUG_SHOW_PARAM_DCV(SinglePassDetail);
//		UTGLR_DEBUG_SHOW_PARAM_REG(BufferActorTris);
		UTGLR_DEBUG_SHOW_PARAM_REG(BufferClippedActorTris);
		UTGLR_DEBUG_SHOW_PARAM_REG(BufferTileQuads);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseSSE);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseSSE2);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseTexIdPool);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseTexPool);
		UTGLR_DEBUG_SHOW_PARAM_REG(CacheStaticMaps);
		UTGLR_DEBUG_SHOW_PARAM_REG(DynamicTexIdRecycleLevel);
		UTGLR_DEBUG_SHOW_PARAM_REG(TexDXT1ToDXT3);
		UTGLR_DEBUG_SHOW_PARAM_DCV(UseVertexProgram);
		UTGLR_DEBUG_SHOW_PARAM_DCV(UseFragmentProgram);
		UTGLR_DEBUG_SHOW_PARAM_REG(SwapInterval);
		UTGLR_DEBUG_SHOW_PARAM_REG(FrameRateLimit);
		UTGLR_DEBUG_SHOW_PARAM_REG(SceneNodeHack);
		UTGLR_DEBUG_SHOW_PARAM_REG(SmoothMaskedTextures);
		UTGLR_DEBUG_SHOW_PARAM_REG(MaskedTextureHack);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseTripleBuffering);
		UTGLR_DEBUG_SHOW_PARAM_REG(UsePureDevice);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseSoftwareVertexProcessing);
		UTGLR_DEBUG_SHOW_PARAM_REG(RequestHighResolutionZ);
		UTGLR_DEBUG_SHOW_PARAM_REG(UseAA);
		UTGLR_DEBUG_SHOW_PARAM_REG(NumAASamples);
		UTGLR_DEBUG_SHOW_PARAM_REG(NoAATiles);
		UTGLR_DEBUG_SHOW_PARAM_REG(ZRangeHack);

		#undef UTGLR_DEBUG_SHOW_PARAM_REG
		#undef UTGLR_DEBUG_SHOW_PARAM_DCV
	}


#ifdef UTGLR_INCLUDE_SSE_CODE
	if (UseSSE) {
		if (!CPU_DetectSSE()) {
			UseSSE = 0;
		}
	}
	if (UseSSE2) {
		if (!CPU_DetectSSE2()) {
			UseSSE2 = 0;
		}
	}
#else
	UseSSE = 0;
	UseSSE2 = 0;
#endif
	if (DebugBit(DEBUG_BIT_BASIC)) dout << TEXT("utd3d9r: UseSSE = ") << UseSSE << std::endl;
	if (DebugBit(DEBUG_BIT_BASIC)) dout << TEXT("utd3d9r: UseSSE2 = ") << UseSSE2 << std::endl;

	SetGamma(Viewport->GetOuterUClient()->Brightness);

	//Restrict dynamic tex id recycle level range
	if (DynamicTexIdRecycleLevel < 10) DynamicTexIdRecycleLevel = 10;

	//Always use vertex specular unless caps check fails later
	UseVertexSpecular = 1;

	SupportsTC = UseS3TC;


	//Set paletted texture capability flag
	m_palettedTextureCap = false;

	//Set DXT texture capability flags
	//Check for DXT1 support
	m_dxt1TextureCap = true;
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_DXT1);
	if (FAILED(hResult)) {
		m_dxt1TextureCap = false;
	}
	//Check for DXT3 support
	m_dxt3TextureCap = true;
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_DXT3);
	if (FAILED(hResult)) {
		m_dxt3TextureCap = false;
	}

	//Set 16-bit texture capability flag
	m_16BitTextureCap = true;
	//Check RGB565
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_R5G6B5);
	if (FAILED(hResult)) {
		m_16BitTextureCap = false;
	}
	//Check RGBA5551
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_A1R5G5B5);
	if (FAILED(hResult)) {
		m_16BitTextureCap = false;
	}
	//Check RGB555
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_X1R5G5B5);
	if (FAILED(hResult)) {
		m_16BitTextureCap = false;
	}

	//Set alpha texture capability flag
	m_alphaTextureCap = true;
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, d3ddm.Format, 0, D3DRTYPE_TEXTURE, D3DFMT_A8);
	if (FAILED(hResult)) {
		m_alphaTextureCap = false;
	}



	// Validate flags.

	//Special extensions validation for init only config pass
	if (!m_dxt1TextureCap) SupportsTC = 0;

	//DCV refresh
	ConfigValidate_RefreshDCV();

	//Required extensions config validation pass
	ConfigValidate_RequiredExtensions();


	if (!MaxTMUnits || (MaxTMUnits > MAX_TMUNITS)) {
		MaxTMUnits = MAX_TMUNITS;
	}

	if (UseMultiTexture) {
		TMUnits = m_d3dCaps.MaxSimultaneousTextures;
		debugf(TEXT("%i Texture Mapping Units found"), TMUnits);
		if (TMUnits > MaxTMUnits) {
			TMUnits = MaxTMUnits;
		}
	}
	else {
		TMUnits = 1;
	}


	//Main config validation pass (after set TMUnits)
	ConfigValidate_Main();


	if (MaxAnisotropy < 0) {
		MaxAnisotropy = 0;
	}
	if (MaxAnisotropy) {
		int iMaxAnisotropyLimit;
		iMaxAnisotropyLimit = m_d3dCaps.MaxAnisotropy;
		debugf(TEXT("MaxAnisotropy = %i"), iMaxAnisotropyLimit); 
		if (MaxAnisotropy > iMaxAnisotropyLimit) {
			MaxAnisotropy = iMaxAnisotropyLimit;
		}
	}

	BufferActorTris = 1;

	if (SupportsTC) {
		debugf(TEXT("Trying to use S3TC extension."));
	}

	//Special case MaxLogTextureSize == 0
	if (MaxLogTextureSize == 0) MaxLogTextureSize = 8;

	INT MaxTextureSize = Min(m_d3dCaps.MaxTextureWidth, m_d3dCaps.MaxTextureHeight);
	INT Dummy = -1;
	while (MaxTextureSize > 0) {
		MaxTextureSize >>= 1;
		Dummy++;
	}

	if ((MaxLogTextureSize > Dummy) || (SupportsTC)) MaxLogTextureSize = Dummy;
	if ((MinLogTextureSize < 2) || (SupportsTC)) MinLogTextureSize = 2;

	if (SupportsTC) {
		MaxLogUOverV = MaxLogTextureSize;
		MaxLogVOverU = MaxLogTextureSize;
	}
	else {
		MaxLogUOverV = 8;
		MaxLogVOverU = 8;

		INT MaxTextureAspectRatio = m_d3dCaps.MaxTextureAspectRatio;
		if (MaxTextureAspectRatio > 0) {
			INT MaxLogTextureAspectRatio = -1;
			while (MaxTextureAspectRatio > 0) {
				MaxTextureAspectRatio >>= 1;
				MaxLogTextureAspectRatio++;
			}
			if (MaxLogTextureAspectRatio < MaxLogUOverV) MaxLogUOverV = MaxLogTextureAspectRatio;
			if (MaxLogTextureAspectRatio < MaxLogVOverU) MaxLogVOverU = MaxLogTextureAspectRatio;
		}
	}

	debugf(TEXT("MinLogTextureSize = %i"), MinLogTextureSize);
	debugf(TEXT("MaxLogTextureSize = %i"), MaxLogTextureSize);


	//Set pointers to aligned memory
	MapDotArray = (FGLMapDot *)AlignMemPtr(m_MapDotArrayMem, VERTEX_ARRAY_ALIGN);


	// Verify hardware defaults.
	check(MinLogTextureSize >= 0);
	check(MaxLogTextureSize >= 0);
	check(MinLogTextureSize < MaxLogTextureSize);
	check(MinLogTextureSize <= MaxLogTextureSize);

	// Flush textures.
	Flush(1);

	//Invalidate fixed texture ids
	m_pNoTexObj = NULL;
	m_pAlphaTexObj = NULL;

	//Initialize permanent rendering state, including allocation of some resources
	InitPermanentResourcesAndRenderingState();


	//Initialize previous lock variables
	PL_DetailTextures = DetailTextures;
	PL_OneXBlending = OneXBlending;
	PL_MaxLogUOverV = MaxLogUOverV;
	PL_MaxLogVOverU = MaxLogVOverU;
	PL_MinLogTextureSize = MinLogTextureSize;
	PL_MaxLogTextureSize = MaxLogTextureSize;
	PL_NoFiltering = NoFiltering;
	PL_UseTrilinear = UseTrilinear;
	PL_Use16BitTextures = Use16BitTextures;
	PL_Use565Textures = Use565Textures;
	PL_TexDXT1ToDXT3 = TexDXT1ToDXT3;
	PL_MaxAnisotropy = MaxAnisotropy;
	PL_SmoothMaskedTextures = SmoothMaskedTextures;
	PL_MaskedTextureHack = MaskedTextureHack;
	PL_LODBias = LODBias;
	PL_UsePalette = UsePalette;
	PL_UseAlphaPalette = UseAlphaPalette;
	PL_UseDetailAlpha = UseDetailAlpha;
	PL_SinglePassDetail = SinglePassDetail;
	PL_UseVertexProgram = UseVertexProgram;
	PL_UseFragmentProgram = UseFragmentProgram;
	PL_UseSSE = UseSSE;
	PL_UseSSE2 = UseSSE2;


	//Reset current frame count
	m_currentFrameCount = 0;

	// Remember fullscreenness.
	WasFullscreen = Fullscreen;

	return 1;

	unguard;
}

void UD3D9RenderDevice::UnsetRes() {
	guard(UD3D9RenderDevice::UnsetRes);

	check(m_d3d9);
	check(m_d3dDevice);

	//Flush textures
	Flush(1);

	//Free fixed textures if they were allocated
	if (m_pNoTexObj) {
		m_pNoTexObj->Release();
		m_pNoTexObj = NULL;
	}
	if (m_pAlphaTexObj) {
		m_pAlphaTexObj->Release();
		m_pAlphaTexObj = NULL;
	}

	//Free permanent resources
	FreePermanentResources();

	//Release D3D device
	m_d3dDevice->Release();
	m_d3dDevice = NULL;

	//Release main D3D9 object
	m_d3d9->Release();
	m_d3d9 = NULL;

	unguard;
}


bool UD3D9RenderDevice::CheckDepthFormat(D3DFORMAT adapterFormat, D3DFORMAT backBufferFormat, D3DFORMAT depthBufferFormat) {
	HRESULT hResult;

	//Check depth format
	hResult = m_d3d9->CheckDeviceFormat(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, adapterFormat, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_SURFACE, depthBufferFormat);
	if (FAILED(hResult)) {
		return false;
	}

	//Check depth format compatibility
	hResult = m_d3d9->CheckDepthStencilMatch(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, adapterFormat, backBufferFormat, depthBufferFormat);
	if (FAILED(hResult)) {
		return false;
	}

	return true;
}


void UD3D9RenderDevice::ConfigValidate_RefreshDCV(void) {
	#define UTGLR_REFRESH_DCV(_name) _name = DCV._name

	UTGLR_REFRESH_DCV(SinglePassDetail);
	UTGLR_REFRESH_DCV(UseVertexProgram);
	UTGLR_REFRESH_DCV(UseFragmentProgram);

	#undef UTGLR_REFRESH_DCV

	return;
}

void UD3D9RenderDevice::ConfigValidate_RequiredExtensions(void) {
	if (m_d3dCaps.PixelShaderVersion < D3DPS_VERSION(2,0)) UseFragmentProgram = 0;
	if (m_d3dCaps.VertexShaderVersion < D3DVS_VERSION(1,1)) UseVertexProgram = 0;
	if (!(m_d3dCaps.TextureOpCaps & D3DTEXOPCAPS_BLENDCURRENTALPHA)) DetailTextures = 0;
	if (!(m_d3dCaps.TextureOpCaps & D3DTEXOPCAPS_BLENDCURRENTALPHA)) UseDetailAlpha = 0;
	if (!m_alphaTextureCap) UseDetailAlpha = 0;
	if (!(m_d3dCaps.TextureFilterCaps & D3DPTFILTERCAPS_MINFANISOTROPIC)) MaxAnisotropy = 0;
	if (!(m_d3dCaps.RasterCaps & D3DPRASTERCAPS_MIPMAPLODBIAS)) LODBias = 0;
	if (!m_palettedTextureCap) UsePalette = 0;
	if (!(m_d3dCaps.TextureCaps & D3DPTEXTURECAPS_ALPHAPALETTE)) UseAlphaPalette = 0;
	if (!m_dxt3TextureCap) TexDXT1ToDXT3 = 0;
	if (!m_16BitTextureCap) Use16BitTextures = 0;

	if (!(m_d3dCaps.TextureOpCaps & D3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR)) SinglePassFog = 0;

	//Force 1X blending if no 2X modulation support
	if (!(m_d3dCaps.TextureOpCaps & D3DTEXOPCAPS_MODULATE2X)) OneXBlending = 0x1;	//Must use proper bit offset for Bool param

	return;
}

void UD3D9RenderDevice::ConfigValidate_Main(void) {
	//Detail alpha requires at least two texture units
	if (TMUnits < 2) UseDetailAlpha = 0;

	//Single pass detail texturing requires at least 4 texture units
	if (TMUnits < 4) SinglePassDetail = 0;
	//Single pass detail texturing requires detail alpha
	if (!UseDetailAlpha) SinglePassDetail = 0;

	//Limit maximum DetailMax
	if (DetailMax > 3) DetailMax = 3;

	//Must use detail alpha for vertex program detail textures
	if (DetailTextures) {
		if (!UseDetailAlpha) {
			UseVertexProgram = 0;
		}
	}

	//Fragment program mode requires vertex program mode
	if (!UseVertexProgram) UseFragmentProgram = 0;

	return;
}


void UD3D9RenderDevice::InitPermanentResourcesAndRenderingState(void) {
	guard(InitPermanentResourcesAndRenderingState);

	unsigned int u;
	HRESULT hResult;

#ifdef UTD3D9R_INCLUDE_SHADER_ASM
	AssembleShader();
#endif

	//Set view matrix
	D3DMATRIX d3dView = { +1.0f,  0.0f,  0.0f,  0.0f,
						   0.0f, -1.0f,  0.0f,  0.0f,
						   0.0f,  0.0f, -1.0f,  0.0f,
						   0.0f,  0.0f,  0.0f, +1.0f };
	m_d3dDevice->SetTransform(D3DTS_VIEW, &d3dView);

	//Little white texture for no texture operations
	InitNoTextureSafe();

	m_d3dDevice->SetRenderState(D3DRS_ZENABLE, D3DZB_TRUE);
	m_d3dDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE);
	m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);

	m_d3dDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_GREATER);
	m_d3dDevice->SetRenderState(D3DRS_ALPHAREF, 127);

	m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
	m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO);

	m_d3dDevice->SetRenderState(D3DRS_SHADEMODE, D3DSHADE_GOURAUD);
	m_d3dDevice->SetRenderState(D3DRS_DITHERENABLE, TRUE);

#ifdef UTGLR_RUNE_BUILD
	m_d3dDevice->SetRenderState(D3DRS_FOGTABLEMODE, D3DFOG_LINEAR);
	FLOAT fFogStart = 0.0f;
	m_d3dDevice->SetRenderState(D3DRS_FOGSTART, *(DWORD *)&fFogStart);
	m_gpFogEnabled = false;
#endif

	m_d3dDevice->SetRenderState(D3DRS_LIGHTING, FALSE);
	m_d3dDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);

	//Color and alpha modulation on texEnv0
	m_d3dDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_MODULATE);
	m_d3dDevice->SetTextureStageState(0, D3DTSS_ALPHAOP, D3DTOP_MODULATE);

	//Set default texture stage tracking values
	for (u = 0; u < MAX_TMUNITS; u++) {
		m_curTexStageParams[u] = CT_DEFAULT_TEX_PARAMS;
	}

	if (LODBias) {
		SetTexLODBiasState(TMUnits);
	}

	if (MaxAnisotropy) {
		SetTexMaxAnisotropyState(TMUnits);
	}

	if (UseDetailAlpha) {	// vogel: alpha texture for better detail textures (no vertex alpha)
		InitAlphaTextureSafe();
	}

	//Initialize texture environment state
	InitOrInvalidateTexEnvState();

	//Reset current texture ids to hopefully unused values
	for (u = 0; u < MAX_TMUNITS; u++) {
		TexInfo[u].CurrentCacheID = TEX_CACHE_ID_UNUSED;
		TexInfo[u].pBind = NULL;
	}


	//Create vertex buffers
	D3DPOOL vertexBufferPool = D3DPOOL_DEFAULT;

	//Vertex and primary color
	hResult = m_d3dDevice->CreateVertexBuffer(sizeof(FGLVertexColor) * VERTEX_ARRAY_SIZE, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, vertexBufferPool, &m_d3dVertexColorBuffer, NULL);
	if (FAILED(hResult)) {
		appErrorf(TEXT("CreateVertexBuffer failed"));
	}

	//Secondary color
	hResult = m_d3dDevice->CreateVertexBuffer(sizeof(FGLSecondaryColor) * VERTEX_ARRAY_SIZE, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, vertexBufferPool, &m_d3dSecondaryColorBuffer, NULL);
	if (FAILED(hResult)) {
		appErrorf(TEXT("CreateVertexBuffer failed"));
	}

	//TexCoord
	for (u = 0; u < TMUnits; u++) {
		hResult = m_d3dDevice->CreateVertexBuffer(sizeof(FGLTexCoord) * VERTEX_ARRAY_SIZE, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, vertexBufferPool, &m_d3dTexCoordBuffer[u], NULL);
		if (FAILED(hResult)) {
			appErrorf(TEXT("CreateVertexBuffer failed"));
		}
	}


	//Create stream definitions

	//Stream definition with vertices and color
	hResult = m_d3dDevice->CreateVertexDeclaration(g_oneColorStreamDef, &m_oneColorVertexDecl);
	if (FAILED(hResult)) {
		appErrorf(TEXT("CreateVertexDeclaration failed"));
	}

	//Standard stream definitions with vertices, color, and a variable number of tex coords
	for (u = 0; u < TMUnits; u++) {
		hResult = m_d3dDevice->CreateVertexDeclaration(g_standardNTextureStreamDefs[u], &m_standardNTextureVertexDecl[u]);
		if (FAILED(hResult)) {
			appErrorf(TEXT("CreateVertexDeclaration failed"));
		}
	}

	//Stream definition with vertices, two colors, and one tex coord
	hResult = m_d3dDevice->CreateVertexDeclaration(g_twoColorSingleTextureStreamDef, &m_twoColorSingleTextureVertexDecl);
	if (FAILED(hResult)) {
		appErrorf(TEXT("CreateVertexDeclaration failed"));
	}


	//Initialize vertex buffer state tracking information
	m_curVertexBufferPos = 0;
	m_vertexColorBufferNeedsDiscard = false;
	m_secondaryColorBufferNeedsDiscard = false;
	for (u = 0; u < MAX_TMUNITS; u++) {
		m_texCoordBufferNeedsDiscard[u] = false;
	}


	//Set stream sources
	//Vertex and primary color
	hResult = m_d3dDevice->SetStreamSource(0, m_d3dVertexColorBuffer, 0, sizeof(FGLVertexColor));
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetStreamSource failed"));
	}

	//Secondary Color
	hResult = m_d3dDevice->SetStreamSource(1, m_d3dSecondaryColorBuffer, 0, sizeof(FGLSecondaryColor));
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetStreamSource failed"));
	}

	//TexCoord
	for (u = 0; u < TMUnits; u++) {
		hResult = m_d3dDevice->SetStreamSource(2 + u, m_d3dTexCoordBuffer[u], 0, sizeof(FGLTexCoord));
		if (FAILED(hResult)) {
			appErrorf(TEXT("SetStreamSource failed"));
		}
	}


	//Setup vertex programs
	if (UseVertexProgram) {
		//Attempt to initialize vertex program mode
		TryInitializeVertexProgramMode();
	}

	//Fragment program mode requires vertex program mode
	if (!UseVertexProgram) UseFragmentProgram = 0;


	//Setup fragment programs
	if (UseFragmentProgram) {
		//Attempt to initialize fragment program mode
		TryInitializeFragmentProgramMode();
	}


	//Set default stream definition
	hResult = m_d3dDevice->SetVertexDeclaration(m_standardNTextureVertexDecl[0]);
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetVertexDeclaration failed"));
	}
	m_curVertexDecl = m_standardNTextureVertexDecl[0];

	//No vertex shader current at initialization
	m_curVertexShader = NULL;

	//No pixel shader current at initialization
	m_curPixelShader = NULL;


	//Initialize texture state cache information
	m_texEnableBits = 0x1;

	// Init variables.
	BufferedVerts = 0;
	BufferedTileVerts = 0;

	m_curBlendFlags = PF_Occlude;
	m_smoothMaskedTexturesBit = 0;
	m_lastPolyFlags = 0;

	//Initialize color flags
	m_requestedColorFlags = 0;

	//Initialize Z range hack state
	m_useZRangeHack = false;
	m_nearZRangeHackProjectionActive = false;
	m_requestNearZRangeHackProjection = false;
	m_lastRequestNearZRangeHackProjection = false;


	unguard;
}

void UD3D9RenderDevice::FreePermanentResources(void) {
	guard(FreePermanentResources);

	unsigned int u;
	HRESULT hResult;

	//Free vertex programs if they were allocated and leave vertex program mode if necessary
	ShutdownVertexProgramMode();

	//Free fragment programs if they were allocated and leave fragment program mode if necessary
	ShutdownFragmentProgramMode();


	//Unset stream sources
	//Vertex
	hResult = m_d3dDevice->SetStreamSource(0, NULL, 0, 0);
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetStreamSource failed"));
	}

	//Secondary Color
	hResult = m_d3dDevice->SetStreamSource(1, NULL, 0, 0);
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetStreamSource failed"));
	}

	//TexCoord
	for (u = 0; u < TMUnits; u++) {
		hResult = m_d3dDevice->SetStreamSource(2 + u, NULL, 0, 0);
		if (FAILED(hResult)) {
			appErrorf(TEXT("SetStreamSource failed"));
		}
	}


	//Free vertex buffers
	if (m_d3dVertexColorBuffer) {
		m_d3dVertexColorBuffer->Release();
		m_d3dVertexColorBuffer = NULL;
	}
	if (m_d3dSecondaryColorBuffer) {
		m_d3dSecondaryColorBuffer->Release();
		m_d3dSecondaryColorBuffer = NULL;
	}
	for (u = 0; u < TMUnits; u++) {
		if (m_d3dTexCoordBuffer[u]) {
			m_d3dTexCoordBuffer[u]->Release();
			m_d3dTexCoordBuffer[u] = NULL;
		}
	}


	//Set vertex declaration to something else so that it isn't using a current vertex declaration
	m_d3dDevice->SetFVF(D3DFVF_XYZ | D3DFVF_DIFFUSE);

	//Free stream definitions
	//Standard stream definition with vertices and color
	if (m_oneColorVertexDecl) {
		m_oneColorVertexDecl->Release();
		m_oneColorVertexDecl = NULL;
	}
	//Standard stream definitions with vertices, color, and a variable number of tex coords
	for (u = 0; u < TMUnits; u++) {
		if (m_standardNTextureVertexDecl[u]) {
			m_standardNTextureVertexDecl[u]->Release();
			m_standardNTextureVertexDecl[u] = NULL;
		}
	}
	//Stream definition with vertices, two colors, and one tex coord
	if (m_twoColorSingleTextureVertexDecl) {
		m_twoColorSingleTextureVertexDecl->Release();
		m_twoColorSingleTextureVertexDecl = NULL;
	}

	unguard;
}


UBOOL UD3D9RenderDevice::Init(UViewport* InViewport, INT NewX, INT NewY, INT NewColorBytes, UBOOL Fullscreen) {
	guard(UD3D9RenderDevice::Init);

	debugf(TEXT("Initializing D3D9Drv..."));

	if (NumDevices == 0) {
		g_gammaFirstTime = true;
		g_haveOriginalGammaRamp = false;
	}

	// Get list of device modes.
	for (INT i = 0; ; i++) {
		UBOOL UnicodeOS;

#if defined(NO_UNICODE_OS_SUPPORT) || !defined(UNICODE)
		UnicodeOS = 0;
#elif defined(NO_ANSI_OS_SUPPORT)
		UnicodeOS = 1;
#else
		UnicodeOS = GUnicodeOS;
#endif

		if (!UnicodeOS) {
#if defined(NO_UNICODE_OS_SUPPORT) || !defined(UNICODE) || !defined(NO_ANSI_OS_SUPPORT)
			DEVMODEA Tmp;
			appMemzero(&Tmp, sizeof(Tmp));
			Tmp.dmSize = sizeof(Tmp);
			if (!EnumDisplaySettingsA(NULL, i, &Tmp)) {
				break;
			}
			Modes.AddUniqueItem(FPlane(Tmp.dmPelsWidth, Tmp.dmPelsHeight, Tmp.dmBitsPerPel, Tmp.dmDisplayFrequency));
#endif
		}
		else {
#if !defined(NO_UNICODE_OS_SUPPORT) && defined(UNICODE)
			DEVMODEW Tmp;
			appMemzero(&Tmp, sizeof(Tmp));
			Tmp.dmSize = sizeof(Tmp);
			if (!EnumDisplaySettingsW(NULL, i, &Tmp)) {
				break;
			}
			Modes.AddUniqueItem(FPlane(Tmp.dmPelsWidth, Tmp.dmPelsHeight, Tmp.dmBitsPerPel, Tmp.dmDisplayFrequency));
#endif
		}
	}

	//Load D3D9 library
	if (!hModuleD3d9) {
		hModuleD3d9 = LoadLibraryA(g_d3d9DllName);
		if (!hModuleD3d9) {
			debugf(NAME_Init, TEXT("Failed to load %s"), appFromAnsi(g_d3d9DllName));
			return 0;
		}
		pDirect3DCreate9 = (LPDIRECT3DCREATE9)GetProcAddress(hModuleD3d9, "Direct3DCreate9");
		if (!pDirect3DCreate9) {
			debugf(NAME_Init, TEXT("Failed to load function from %s"), appFromAnsi(g_d3d9DllName));
			return 0;
		}
	}

	NumDevices++;

	// Init this rendering context.
	m_zeroPrefixBindTrees = m_localZeroPrefixBindTrees;
	m_nonZeroPrefixBindTrees = m_localNonZeroPrefixBindTrees;
	m_nonZeroPrefixBindChain = &m_localNonZeroPrefixBindChain;
	m_RGBA8TexPool = &m_localRGBA8TexPool;

	Viewport = InViewport;

	//Save main window handle
	m_hWnd = (HWND)InViewport->GetWindow();
	check(m_hWnd);
	//Get main window DC
	m_hDC = GetDC(m_hWnd);
	check(m_hDC);

	if (!SetRes(NewX, NewY, NewColorBytes, Fullscreen)) {
		return FailedInitf(LocalizeError("ResFailed"));
	}

	return 1;
	unguard;
}

UBOOL UD3D9RenderDevice::Exec(const TCHAR* Cmd, FOutputDevice& Ar) {
	guard(UD3D9RenderDevice::Exec);

#ifndef UTGLR_UNREAL_BUILD
	if (URenderDevice::Exec(Cmd, Ar)) {
		return 1;
	}
#endif
	if (ParseCommand(&Cmd, TEXT("DGL"))) {
		if (ParseCommand(&Cmd, TEXT("BUFFERTRIS"))) {
			BufferActorTris = !BufferActorTris;
			if (!UseVertexSpecular) BufferActorTris = 0;
			debugf(TEXT("BUFFERTRIS [%i]"), BufferActorTris);
			return 1;
		}
		else if (ParseCommand(&Cmd,TEXT("BUILD"))) {
			debugf(TEXT("D3D9 renderer built: ?????"));
			return 1;
		}
		else if (ParseCommand(&Cmd, TEXT("AA"))) {
			if (m_usingAA) {
				m_defAAEnable = !m_defAAEnable;
				debugf(TEXT("AA Enable [%u]"), (m_defAAEnable) ? 1 : 0);
			}
			return 1;
		}

		return 0;
	}
	else if (ParseCommand(&Cmd, TEXT("GetRes"))) {
		TArray<FPlane> Relevant;
		INT i;
		for (i = 0; i < Modes.Num(); i++) {
			if (Modes(i).Z == (Viewport->ColorBytes * 8))
				if
				(	(Modes(i).X!=320 || Modes(i).Y!=200)
				&&	(Modes(i).X!=640 || Modes(i).Y!=400) )
				Relevant.AddUniqueItem(FPlane(Modes(i).X, Modes(i).Y, 0, 0));
		}
		appQsort(&Relevant(0), Relevant.Num(), sizeof(FPlane), (QSORT_COMPARE)CompareRes);
		FString Str;
		for (i = 0; i < Relevant.Num(); i++) {
			Str += FString::Printf(TEXT("%ix%i "), (INT)Relevant(i).X, (INT)Relevant(i).Y);
		}
		Ar.Log(*Str.LeftChop(1));
		return 1;
	}

	return 0;
	unguard;
}

void UD3D9RenderDevice::Lock(FPlane InFlashScale, FPlane InFlashFog, FPlane ScreenClear, DWORD RenderLockFlags, BYTE* InHitData, INT* InHitSize) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: Lock = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::Lock);
	check(LockCount == 0);
	++LockCount;


	//Reset stats
	BindCycles = ImageCycles = ComplexCycles = GouraudCycles = TileCycles = 0;

	m_vpEnableCount = 0;
	m_vpSwitchCount = 0;
	m_fpEnableCount = 0;
	m_fpSwitchCount = 0;
	m_AASwitchCount = 0;
	m_sceneNodeCount = 0;
	m_sceneNodeHackCount = 0;
	m_stat0Count = 0;
	m_stat1Count = 0;


	HRESULT hResult;

	//Check for lost device
	hResult = m_d3dDevice->TestCooperativeLevel();
	if (FAILED(hResult)) {
#if 0
{
	dout << L"utd3d9r: Device Lost" << std::endl;
}
#endif
		//Wait for device to become available again
		while (1) {
			//Check if device can be reset and restored
			if (hResult == D3DERR_DEVICENOTRESET) {
				//Set new resolution
				m_SetRes_isDeviceReset = true;
				if (!SetRes(m_SetRes_NewX, m_SetRes_NewY, m_SetRes_NewColorBytes, m_SetRes_Fullscreen)) {
					appErrorf(TEXT("Failed to reset lost D3D device"));
				}

				//Exit wait loop
				break;
			}
			//If not lost and not ready to be restored, error
			else if (hResult != D3DERR_DEVICELOST) {
				appErrorf(TEXT("Error checking for lost D3D device"));
			}
			//Otherwise, device is lost and cannot be restored yet

			//Wait
			Sleep(100);

			//Don't wait for device to become available here to prevent deadlock
			break;
		}
	}

	//D3D begin scene
	if (FAILED(m_d3dDevice->BeginScene())) {
		appErrorf(TEXT("BeginScene failed"));
	}

	//Clear the Z-buffer
	if (1 || GIsEditor || (RenderLockFlags & LOCKR_ClearScreen)) {
		SetBlend(PF_Occlude);
		m_d3dDevice->Clear(0, NULL, D3DCLEAR_ZBUFFER | ((RenderLockFlags & LOCKR_ClearScreen) ? D3DCLEAR_TARGET : 0), (DWORD)FColor(ScreenClear).TrueColor(), 1.0f, 0);
	}
	m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);


	bool flushTextures = false;
	bool needVertexProgramReload = false;
	bool needFragmentProgramReload = false;


	//DCV refresh
	ConfigValidate_RefreshDCV();

	//Required extensions config validation pass
	ConfigValidate_RequiredExtensions();


	//Main config validation pass
	ConfigValidate_Main();


	//Detect changes in 1X blending setting and force tex env flush if necessary
	if (OneXBlending != PL_OneXBlending) {
		PL_OneXBlending = OneXBlending;
		InitOrInvalidateTexEnvState();
	}

	//Prevent changes to these parameters
	MaxLogUOverV = PL_MaxLogUOverV;
	MaxLogVOverU = PL_MaxLogVOverU;
	MinLogTextureSize = PL_MinLogTextureSize;
	MaxLogTextureSize = PL_MaxLogTextureSize;

	//Detect changes in various texture related options and force texture flush if necessary
	if (NoFiltering != PL_NoFiltering) {
		PL_NoFiltering = NoFiltering;
		flushTextures = true;
	}
	if (UseTrilinear != PL_UseTrilinear) {
		PL_UseTrilinear = UseTrilinear;
		flushTextures = true;
	}
	if (Use16BitTextures != PL_Use16BitTextures) {
		PL_Use16BitTextures = Use16BitTextures;
		flushTextures = true;
	}
	if (Use565Textures != PL_Use565Textures) {
		PL_Use565Textures = Use565Textures;
		flushTextures = true;
	}
	if (TexDXT1ToDXT3 != PL_TexDXT1ToDXT3) {
		PL_TexDXT1ToDXT3 = TexDXT1ToDXT3;
		flushTextures = true;
	}
	//MaxAnisotropy cannot be negative
	if (MaxAnisotropy < 0) {
		MaxAnisotropy = 0;
	}
	if (MaxAnisotropy > m_d3dCaps.MaxAnisotropy) {
		MaxAnisotropy = m_d3dCaps.MaxAnisotropy;
	}
	if (MaxAnisotropy != PL_MaxAnisotropy) {
		PL_MaxAnisotropy = MaxAnisotropy;
		flushTextures = true;

		SetTexMaxAnisotropyState(TMUnits);
	}

	if (SmoothMaskedTextures != PL_SmoothMaskedTextures) {
		PL_SmoothMaskedTextures = SmoothMaskedTextures;
		//Clear masked blending state if set before adjusting smooth masked textures bit
		SetBlend(PF_Occlude);
	}
	//Set smooth masked textures bit
	m_smoothMaskedTexturesBit = (SmoothMaskedTextures != 0) ? PF_Masked : 0;

	if (MaskedTextureHack != PL_MaskedTextureHack) {
		PL_MaskedTextureHack = MaskedTextureHack;
		flushTextures = true;
	}

	if (LODBias != PL_LODBias) {
		PL_LODBias = LODBias;
		SetTexLODBiasState(TMUnits);
	}

	if (UsePalette != PL_UsePalette) {
		PL_UsePalette = UsePalette;
		flushTextures = true;
	}
	if (UseAlphaPalette != PL_UseAlphaPalette) {
		PL_UseAlphaPalette = UseAlphaPalette;
		flushTextures = true;
	}

	if (DetailTextures != PL_DetailTextures) {
		PL_DetailTextures = DetailTextures;
		flushTextures = true;
		if (DetailTextures) {
			needFragmentProgramReload = true;
		}
	}

	if (UseDetailAlpha != PL_UseDetailAlpha) {
		PL_UseDetailAlpha = UseDetailAlpha;
		if (UseDetailAlpha) {
			InitAlphaTextureSafe();
			needVertexProgramReload = true;
		}
	}

	if (SinglePassDetail != PL_SinglePassDetail) {
		PL_SinglePassDetail = SinglePassDetail;
		if (SinglePassDetail) {
			needVertexProgramReload = true;
		}
	}

	//Extra vertex programs needed in fragment program mode
	if (UseFragmentProgram != PL_UseFragmentProgram) {
		if (UseFragmentProgram) {
			needVertexProgramReload = true;
		}
	}


	if (UseVertexProgram != PL_UseVertexProgram) {
		PL_UseVertexProgram = UseVertexProgram;
		if (UseVertexProgram) {
			//Attempt to initialize vertex program mode
			TryInitializeVertexProgramMode();
			needVertexProgramReload = false;
		}
		else {
			//Free vertex programs if they were allocated and leave vertex program mode if necessary
			ShutdownVertexProgramMode();
		}
	}

	//Check if vertex program reload is necessary
	if (UseVertexProgram) {
		if (needVertexProgramReload) {
			//Attempt to initialize vertex program mode
			TryInitializeVertexProgramMode();
		}
	}

	//Fragment program mode requires vertex program mode
	if (!UseVertexProgram) UseFragmentProgram = 0;


	if (UseFragmentProgram != PL_UseFragmentProgram) {
		PL_UseFragmentProgram = UseFragmentProgram;
		if (UseFragmentProgram) {
			//Attempt to initialize fragment program mode
			TryInitializeFragmentProgramMode();
			needFragmentProgramReload = false;
		}
		else {
			//Free fragment programs if they were allocated and leave fragment program mode if necessary
			ShutdownFragmentProgramMode();
		}
	}

	//Check if fragment program reload is necessary
	if (UseFragmentProgram) {
		if (needFragmentProgramReload) {
			//Attempt to initialize fragment program mode
			TryInitializeFragmentProgramMode();
		}
	}


	if (UseSSE != PL_UseSSE) {
#ifdef UTGLR_INCLUDE_SSE_CODE
		if (UseSSE) {
			if (!CPU_DetectSSE()) {
				UseSSE = 0;
			}
		}
#else
		UseSSE = 0;
#endif
		PL_UseSSE = UseSSE;
	}
	if (UseSSE2 != PL_UseSSE2) {
#ifdef UTGLR_INCLUDE_SSE_CODE
		if (UseSSE2) {
			if (!CPU_DetectSSE2()) {
				UseSSE2 = 0;
			}
		}
#else
		UseSSE2 = 0;
#endif
		PL_UseSSE2 = UseSSE2;
	}


#ifdef UTGLR_UNREAL_BUILD
	ZRangeHack = 0;
#endif


	//Initialize buffer verts proc pointers
	m_pBuffer3BasicVertsProc = Buffer3BasicVerts;
	m_pBuffer3ColoredVertsProc = Buffer3ColoredVerts;
	m_pBuffer3FoggedVertsProc = Buffer3FoggedVerts;

#ifdef UTGLR_INCLUDE_SSE_CODE
	//Initialize SSE buffer verts proc pointers
	if (UseSSE) {
		m_pBuffer3ColoredVertsProc = Buffer3ColoredVerts_SSE;
		m_pBuffer3FoggedVertsProc = Buffer3FoggedVerts_SSE;
	}
	if (UseSSE2) {
		m_pBuffer3ColoredVertsProc = Buffer3ColoredVerts_SSE2;
		m_pBuffer3FoggedVertsProc = Buffer3FoggedVerts_SSE2;
	}
#endif //UTGLR_INCLUDE_SSE_CODE

	m_pBuffer3VertsProc = NULL;


	//Initialize render passes no check proc pointers
	if (UseVertexProgram) {
		m_pRenderPassesNoCheckSetupProc = &UD3D9RenderDevice::RenderPassesNoCheckSetup_VP;
		m_pRenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTextureProc = &UD3D9RenderDevice::RenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTexture_VP;
		if (UseFragmentProgram) {
			m_pRenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTextureProc = &UD3D9RenderDevice::RenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTexture_FP;
		}
	}
	else {
		m_pRenderPassesNoCheckSetupProc = &UD3D9RenderDevice::RenderPassesNoCheckSetup;
		m_pRenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTextureProc = &UD3D9RenderDevice::RenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTexture;
	}

	//Initialize buffer detail texture data proc pointer
	m_pBufferDetailTextureDataProc = &UD3D9RenderDevice::BufferDetailTextureData;
#ifdef UTGLR_INCLUDE_SSE_CODE
	if (UseSSE2) {
		m_pBufferDetailTextureDataProc = &UD3D9RenderDevice::BufferDetailTextureData_SSE2;
	}
#endif //UTGLR_INCLUDE_SSE_CODE


	//Precalculate the cutoff for buffering actor triangles based on config settings
	if (!BufferActorTris) {
		m_bufferActorTrisCutoff = 0;
	}
	else if (!BufferClippedActorTris) {
		m_bufferActorTrisCutoff = 3;
	}
	else {
		m_bufferActorTrisCutoff = 10;
	}

	//Precalculate detail texture color
	if (ColorizeDetailTextures) {
		m_detailTextureColor4ub = 0x00408040;
	}
	else {
		m_detailTextureColor4ub = 0x00808080;
	}

	//Precalculate mask for MaskedTextureHack based on if it's enabled
	m_maskedTextureHackMask = (MaskedTextureHack) ? TEX_CACHE_ID_FLAG_MASKED : 0;

	// Remember stuff.
	FlashScale = InFlashScale;
	FlashFog   = InFlashFog;
	//HitCount = 0;
	HitData    = InHitData;
	HitSize    = InHitSize;

	//Flush textures if necessary due to config change
	if (flushTextures) {
		Flush(1);
	}

	unguard;
}

void UD3D9RenderDevice::SetSceneNode(FSceneNode* Frame) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: SetSceneNode = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::SetSceneNode);

	EndBuffering();		// Flush vertex array before changing the projection matrix!

	m_sceneNodeCount++;

	//No need to set default AA state here
	//No need to set default projection state as this function always sets/initializes it
	SetDefaultStreamState();
	SetDefaultTextureState();

	// Precompute stuff.
	FLOAT One_Over_FX = 1.0f / Frame->FX;
	m_Aspect = Frame->FY * One_Over_FX;
	m_RProjZ = appTan(Viewport->Actor->FovAngle * PI / 360.0);
	m_RFX2 = 2.0f * m_RProjZ * One_Over_FX;
	m_RFY2 = 2.0f * m_RProjZ * One_Over_FX;

	//Remember Frame->X and Frame->Y for scene node hack
	m_sceneNodeX = Frame->X;
	m_sceneNodeY = Frame->Y;

	// Set viewport.
	D3DVIEWPORT9 d3dViewport;
	d3dViewport.X = Frame->XB;
	d3dViewport.Y = Frame->YB;
	d3dViewport.Width = Frame->X;
	d3dViewport.Height = Frame->Y;
	d3dViewport.MinZ = 0.0f;
	d3dViewport.MaxZ = 1.0f;
	m_d3dDevice->SetViewport(&d3dViewport);

	//Decide whether or not to use Z range hack
	m_useZRangeHack = false;
	if (ZRangeHack) {
		m_useZRangeHack = true;
	}

	// Set projection.
	if (Frame->Viewport->IsOrtho()) {
		//Don't use Z range hack if ortho projection
		m_useZRangeHack = false;

		SetOrthoProjection();
	}
	else {
		SetProjectionStateNoCheck(false);
	}

	// Set clip planes.
	if (HitData) {
	}

	unguard;
}

void UD3D9RenderDevice::Unlock(UBOOL Blit) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: Unlock = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::Unlock);

	EndBuffering();

	SetDefaultAAState();
	SetDefaultProjectionState();
	SetDefaultStreamState();
	SetDefaultTextureState();

	// Unlock and render.
	check(LockCount == 1);

	//D3D end scene
	if (FAILED(m_d3dDevice->EndScene())) {
		appErrorf(TEXT("EndScene failed"));
	}

	if (Blit) {
		HRESULT hResult;
		bool swapBuffersStatus;

		//Present
		hResult = m_d3dDevice->Present(NULL, NULL, NULL, NULL);
		swapBuffersStatus = (FAILED(hResult)) ? false : true;
		//Don't signal error if device is lost
		if (hResult == D3DERR_DEVICELOST) swapBuffersStatus = true;

		check(swapBuffersStatus);
		if (!m_prevSwapBuffersStatus) {
			check(swapBuffersStatus);
		}
		m_prevSwapBuffersStatus = swapBuffersStatus;
	}

	--LockCount;

	// Hits.
	if (HitData) {
		*HitSize = 0;
	}

	//Scan for old textures
	if (UseTexIdPool) {
		//Scan for old textures
		ScanForOldTextures();
	}

	//Increment current frame count
	m_currentFrameCount++;

	//Check for optional frame rate limit
	if (FrameRateLimit >= 20) {
#if defined UTGLR_DX_BUILD || defined UTGLR_UNREAL_BUILD || defined UTGLR_RUNE_BUILD
		FLOAT curFrameTimestamp;
#else
		FTime curFrameTimestamp;
#endif
		float timeDiff;
		float rcpFrameRateLimit;

		curFrameTimestamp = appSeconds();
		timeDiff = curFrameTimestamp - m_prevFrameTimestamp;
		m_prevFrameTimestamp = curFrameTimestamp;

		rcpFrameRateLimit = 1.0f / FrameRateLimit;
		if (timeDiff < rcpFrameRateLimit) {
			float sleepTime;

			sleepTime = rcpFrameRateLimit - timeDiff;
			appSleep(sleepTime);

			m_prevFrameTimestamp = appSeconds();
		}
	}


#if 0
	dout << TEXT("VP enable count = ") << m_vpEnableCount << std::endl;
	dout << TEXT("VP switch count = ") << m_vpSwitchCount << std::endl;
	dout << TEXT("FP enable count = ") << m_fpEnableCount << std::endl;
	dout << TEXT("FP switch count = ") << m_fpSwitchCount << std::endl;
	dout << TEXT("AA switch count = ") << m_AASwitchCount << std::endl;
	dout << TEXT("Scene node count = ") << m_sceneNodeCount << std::endl;
	dout << TEXT("Scene node hack count = ") << m_sceneNodeHackCount << std::endl;
	dout << TEXT("Stat 0 count = ") << m_stat0Count << std::endl;
	dout << TEXT("Stat 1 count = ") << m_stat1Count << std::endl;
#endif


	unguard;
}

void UD3D9RenderDevice::Flush(UBOOL AllowPrecache) {
	guard(UD3D9RenderDevice::Flush);
	unsigned int u;

	if (!m_d3dDevice) {
		return;
	}

	for (u = 0; u < TMUnits; u++) {
		m_d3dDevice->SetTexture(u, NULL);
	}

	for (u = 0; u < NUM_CTTree_TREES; u++) {
		DWORD_CTTree_t *zeroPrefixBindTree = &m_zeroPrefixBindTrees[u];
		for (DWORD_CTTree_t::node_t *zpbmPtr = zeroPrefixBindTree->begin(); zpbmPtr != zeroPrefixBindTree->end(); zpbmPtr = zeroPrefixBindTree->next_node(zpbmPtr)) {
			zpbmPtr->data.pTexObj->Release();
		}
		zeroPrefixBindTree->clear(&m_DWORD_CTTree_Allocator);
	}

	for (u = 0; u < NUM_CTTree_TREES; u++) {
		QWORD_CTTree_t *nonZeroPrefixBindTree = &m_nonZeroPrefixBindTrees[u];
		for (QWORD_CTTree_t::node_t *nzpbmPtr = nonZeroPrefixBindTree->begin(); nzpbmPtr != nonZeroPrefixBindTree->end(); nzpbmPtr = nonZeroPrefixBindTree->next_node(nzpbmPtr)) {
			nzpbmPtr->data.pTexObj->Release();
		}
		nonZeroPrefixBindTree->clear(&m_QWORD_CTTree_Allocator);
	}

	m_nonZeroPrefixBindChain->mark_as_clear();

	for (TexPoolMap_t::node_t *RGBA8TpPtr = m_RGBA8TexPool->begin(); RGBA8TpPtr != m_RGBA8TexPool->end(); RGBA8TpPtr = m_RGBA8TexPool->next_node(RGBA8TpPtr)) {
		while (QWORD_CTTree_NodePool_t::node_t *texPoolNodePtr = RGBA8TpPtr->data.try_remove()) {
			texPoolNodePtr->data.pTexObj->Release();
			m_QWORD_CTTree_Allocator.free_node(texPoolNodePtr);
		}
	}
	m_RGBA8TexPool->clear(&m_TexPoolMap_Allocator);

	while (QWORD_CTTree_NodePool_t::node_t *nzpnpPtr = m_nonZeroPrefixNodePool.try_remove()) {
		m_QWORD_CTTree_Allocator.free_node(nzpnpPtr);
	}

	AllocatedTextures = 0;

	//Reset current texture ids to hopefully unused values
	for (u = 0; u < MAX_TMUNITS; u++) {
		TexInfo[u].CurrentCacheID = TEX_CACHE_ID_UNUSED;
		TexInfo[u].pBind = NULL;
	}

	if (AllowPrecache && UsePrecache && !GIsEditor) {
		PrecacheOnFlip = 1;
	}

	SetGamma(Viewport->GetOuterUClient()->Brightness);

	unguard;
}


void UD3D9RenderDevice::DrawComplexSurface(FSceneNode* Frame, FSurfaceInfo& Surface, FSurfaceFacet& Facet) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: DrawComplexSurface = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::DrawComplexSurface);

	EndBuffering();		// vogel: might have still been locked (can happen!)

	if (SceneNodeHack) {
		if ((Frame->X != m_sceneNodeX) || (Frame->Y != m_sceneNodeY)) {
			m_sceneNodeHackCount++;
			SetSceneNode(Frame);
		}
	}

	SetDefaultAAState();
	SetDefaultProjectionState();
	//This function uses cached stream state information
	//This function uses cached vertex program state information
	//This function uses cached fragment program state information
	//This function uses cached texture state information

	check(Surface.Texture);

	clock(ComplexCycles);

	//Calculate UDot and VDot intermediates for complex surface
	m_csUDot = Facet.MapCoords.XAxis | Facet.MapCoords.Origin;
	m_csVDot = Facet.MapCoords.YAxis | Facet.MapCoords.Origin;

	// Buffer "static" geometry.
	INT Index;
	if (UseVertexProgram) {
		Index = BufferStaticComplexSurfaceGeometry_VP(Facet);
	}
	else {
		Index = BufferStaticComplexSurfaceGeometry(Facet);
	}

	//Reject invalid surfaces early
	if (Index == 0) {
		return;
	}

	//Save number of points
	m_csPtCount = Index;

	//See if detail texture should be drawn
	//FogMap and DetailTexture are mutually exclusive effects
	bool drawDetailTexture = false;
	if ((DetailTextures != 0) && Surface.DetailTexture && !Surface.FogMap) {
		drawDetailTexture = true;
	}

	//Check for detail texture
	if (drawDetailTexture == true) {
		DWORD anyIsNearBits;

		//Buffer detail texture data
		anyIsNearBits = (this->*m_pBufferDetailTextureDataProc)(380.0f);

		//Do not draw detail texture if no vertices are near
		if (anyIsNearBits == 0) {
			drawDetailTexture = false;
		}
	}


	DWORD PolyFlags = Surface.PolyFlags;

	//Initialize render passes state information
	m_rpPassCount = 0;
	m_rpTMUnits = TMUnits;
	m_rpForceSingle = false;
	m_rpMasked = ((PolyFlags & PF_Masked) == 0) ? false : true;
	m_rpSetDepthEqual = false;
	m_rpColor = 0xFFFFFFFF;


	//Do static render passes state setup
	if (UseVertexProgram) {
		const FVector &XAxis = Facet.MapCoords.XAxis;
		const FVector &YAxis = Facet.MapCoords.YAxis;

		FLOAT vsParams[8] = { XAxis.X, XAxis.Y, XAxis.Z, m_csUDot,
							  YAxis.X, YAxis.Y, YAxis.Z, m_csVDot };

		m_d3dDevice->SetVertexShaderConstantF(4, vsParams, 2);
	}

	AddRenderPass(Surface.Texture, PolyFlags & ~PF_FlatShaded, 0.0f);

	if (Surface.MacroTexture) {
		AddRenderPass(Surface.MacroTexture, PF_Modulated, -0.5f);
	}

	if (Surface.LightMap) {
		AddRenderPass(Surface.LightMap, PF_Modulated, -0.5f);
	}

	if (Surface.FogMap) {
		//Check for single pass fog mode
		if (!SinglePassFog) {
			RenderPasses();
		}

		AddRenderPass(Surface.FogMap, PF_Highlighted, -0.5f);
	}

	// Draw detail texture overlaid, in a separate pass.
	if (drawDetailTexture == true) {
		bool singlePassDetail = false;

		//Check if single pass detail mode is enabled and if can use it
		if (SinglePassDetail) {
			//Only attempt single pass detail if single texture rendering wasn't forced earlier
			if (!m_rpForceSingle) {
				//Single pass detail must be done with one or two normal passes
				if ((m_rpPassCount == 1) || (m_rpPassCount == 2)) {
					singlePassDetail = true;
				}
			}
		}

		if (singlePassDetail) {
			RenderPasses_SingleOrDualTextureAndDetailTexture(*Surface.DetailTexture);
		}
		else {
			RenderPasses();

			bool clipDetailTexture = (DetailClipping != 0);

			if (m_rpMasked) {
				//Cannot use detail texture clipping with masked mode
				//It will not work with m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_EQUAL);
				clipDetailTexture = false;

				if (m_rpSetDepthEqual == false) {
					m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_EQUAL);
					m_rpSetDepthEqual = true;
				}
			}

			//This function should only be called if at least one polygon will be detail textured
			if (UseFragmentProgram) {
				DrawDetailTexture_FP(*Surface.DetailTexture);
			}
			else if (UseVertexProgram) {
				DrawDetailTexture_VP(*Surface.DetailTexture);
			}
			else {
				DrawDetailTexture(*Surface.DetailTexture, clipDetailTexture);
			}
		}
	}
	else {
		RenderPasses();
	}

	// UnrealEd selection.
	if (GIsEditor && (PolyFlags & PF_Selected)) {
		//No need to set default AA state here as it is always set on entry to DrawComplexSurface
		//No need to set default projection state here as it is always set on entry to DrawComplexSurface
		SetDefaultStreamState();
		SetDefaultTextureState();

		SetNoTexture(0);
		SetBlend(PF_Highlighted);

		for (FSavedPoly* Poly = Facet.Polys; Poly; Poly = Poly->Next) {
			INT NumPts = Poly->NumPts;

			//Make sure at least NumPts entries are left in the vertex buffers
			if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
				FlushVertexBuffers();
			}

			//Lock vertexColor and texCoord0 buffers
			LockVertexColorBuffer();
			LockTexCoordBuffer(0);

			FGLTexCoord *pTexCoordArray = m_pTexCoordArray[0];
			FGLVertexColor *pVertexColorArray = m_pVertexColorArray;

			for (INT i = 0; i < Poly->NumPts; i++) {
				pTexCoordArray[i].u = 0.5f;
				pTexCoordArray[i].v = 0.5f;

				pVertexColorArray[i].x = Poly->Pts[i]->Point.X;
				pVertexColorArray[i].y = Poly->Pts[i]->Point.Y;
				pVertexColorArray[i].z = Poly->Pts[i]->Point.Z;
				pVertexColorArray[i].color = 0x7F00007F;
			}

			//Unlock vertexColor and texCoord0 buffers
			UnlockVertexColorBuffer();
			UnlockTexCoordBuffer(0);

			//Draw the triangle fan
			m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

			//Advance vertex buffer position
			m_curVertexBufferPos += NumPts;
		}
	}

	if (m_rpSetDepthEqual == true) {
		m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);
	}

	unclock(ComplexCycles);
	unguard;
}

#ifdef UTGLR_RUNE_BUILD
void UD3D9RenderDevice::PreDrawFogSurface() {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: PreDrawFogSurface = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::PreDrawFogSurface);

	EndBuffering();

	SetDefaultAAState();
	SetDefaultProjectionState();
	SetDefaultStreamState();
	SetDefaultTextureState();

	SetBlend(PF_AlphaBlend);

	SetNoTexture(0);

	unguard;
}

void UD3D9RenderDevice::PostDrawFogSurface() {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: PostDrawFogSurface = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::PostDrawFogSurface);

	SetBlend(0);

	unguard;
}

void UD3D9RenderDevice::DrawFogSurface(FSceneNode* Frame, FFogSurf &FogSurf) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: DrawFogSurface = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::DrawFogSurface);

	FPlane Modulate(Clamp(FogSurf.FogColor.X, 0.0f, 1.0f), Clamp(FogSurf.FogColor.Y, 0.0f, 1.0f), Clamp(FogSurf.FogColor.Z, 0.0f, 1.0f), 0.0f);

	FLOAT RFogDistance = 1.0f / FogSurf.FogDistance;

	if (FogSurf.PolyFlags & PF_Masked) {
		m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_EQUAL);
	}

	//Set stream state
	SetDefaultStreamState();

	for (FSavedPoly* Poly = FogSurf.Polys; Poly; Poly = Poly->Next) {
		INT NumPts = Poly->NumPts;

		//Make sure at least NumPts entries are left in the vertex buffers
		if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
			FlushVertexBuffers();
		}

		//Lock vertexColor and texCoord0 buffers
		LockVertexColorBuffer();
		LockTexCoordBuffer(0);

		INT Index = 0;
		for (INT i = 0; i < NumPts; i++) {
			FTransform* P = Poly->Pts[i];

			Modulate.W = P->Point.Z * RFogDistance;
			if (Modulate.W > 1.0f) {
				Modulate.W = 1.0f;
			}
			else if (Modulate.W < 0.0f) {
				Modulate.W = 0.0f;
			}

			FGLVertexColor &destVertexColor = m_pVertexColorArray[Index];
			destVertexColor.x = P->Point.X;
			destVertexColor.y = P->Point.Y;
			destVertexColor.z = P->Point.Z;
			destVertexColor.color = FPlaneTo_BGRA(&Modulate);

			FGLTexCoord &destTexCoord = m_pTexCoordArray[0][Index];
			destTexCoord.u = 0.0f;
			destTexCoord.v = 0.0f;

			Index++;
		}

		//Unlock vertexColor and texCoord0 buffers
		UnlockVertexColorBuffer();
		UnlockTexCoordBuffer(0);

		//Draw the triangles
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

		//Advance vertex buffer position
		m_curVertexBufferPos += NumPts;
	}

	if (FogSurf.PolyFlags & PF_Masked) {
		m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_LESSEQUAL);
	}

	unguard;
}

void UD3D9RenderDevice::PreDrawGouraud(FSceneNode* Frame, FLOAT FogDistance, FPlane FogColor) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: PreDrawGouraud = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::PreDrawGouraud);

	if (FogDistance > 0.0f) {
		EndBuffering();

		//Enable fog
		m_gpFogEnabled = true;
		m_d3dDevice->SetRenderState(D3DRS_FOGENABLE, TRUE);

		//Default fog mode is LINEAR
		//Default fog start is 0.0f
		m_d3dDevice->SetRenderState(D3DRS_FOGCOLOR, FPlaneTo_BGRAClamped(&FogColor));
		FLOAT fFogDistance = FogDistance;
		m_d3dDevice->SetRenderState(D3DRS_FOGEND, *(DWORD *)&fFogDistance);
	}

	unguard;
}

void UD3D9RenderDevice::PostDrawGouraud(FLOAT FogDistance) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: PostDrawGouraud = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::PostDrawGouraud);

	if (FogDistance > 0.0f) {
		EndBuffering();

		//Disable fog
		m_gpFogEnabled = false;
		m_d3dDevice->SetRenderState(D3DRS_FOGENABLE, FALSE);
	}

	unguard;
}
#endif

void UD3D9RenderDevice::DrawGouraudPolygonOld(FSceneNode* Frame, FTextureInfo& Info, FTransTexture** Pts, INT NumPts, DWORD PolyFlags, FSpanBuffer* Span) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: DrawGouraudPolygonOld = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::DrawGouraudPolygonOld);
	clock(GouraudCycles);

	//Decide if should request near Z range hack projection
	bool requestNearZRangeHackProjection = false;
	if (m_useZRangeHack && (GUglyHackFlags & 0x1)) {
		requestNearZRangeHackProjection = true;
	}
	//Set projection state
	SetProjectionState(requestNearZRangeHackProjection);

	//Check if should render fog and if vertex specular is supported
#ifdef UTGLR_RUNE_BUILD
	bool drawFog = (((PolyFlags & (PF_RenderFog | PF_Translucent | PF_Modulated | PF_AlphaBlend)) == PF_RenderFog) && UseVertexSpecular) ? true : false;
#else
	bool drawFog = (((PolyFlags & (PF_RenderFog | PF_Translucent | PF_Modulated)) == PF_RenderFog) && UseVertexSpecular) ? true : false;
#endif

	//If not drawing fog, disable the PF_RenderFog flag
	if (!drawFog) {
		PolyFlags &= ~PF_RenderFog;
	}

	SetBlend(PolyFlags);
	SetTextureNoPanBias(0, Info, PolyFlags);

#ifdef UTGLR_RUNE_BUILD
	BYTE alpha = 255;
	if (PolyFlags & PF_AlphaBlend) {
		alpha = appRound(Info.Texture->Alpha * 255.0f);
	}
#endif

	{
		IDirect3DVertexDeclaration9 *vertexDecl = (drawFog) ? m_twoColorSingleTextureVertexDecl : m_standardNTextureVertexDecl[0];
		IDirect3DVertexShader9 *vertexShader = NULL;
		IDirect3DPixelShader9 *pixelShader = NULL;

		if (UseVertexProgram) {
			vertexShader = m_vpDefaultRenderingState;
			if (drawFog) {
				vertexShader = m_vpDefaultRenderingStateWithFog;
			}
#ifdef UTGLR_RUNE_BUILD
			if (m_gpFogEnabled) {
				vertexShader = m_vpDefaultRenderingStateWithLinearFog;
			}
#endif
		}
		if (UseFragmentProgram) {
			pixelShader = m_fpDefaultRenderingState;
			if (drawFog) {
				pixelShader = m_fpDefaultRenderingStateWithFog;
			}
#ifdef UTGLR_RUNE_BUILD
			if (m_gpFogEnabled) {
				pixelShader = m_fpDefaultRenderingStateWithLinearFog;
			}
#endif
		}

		//Set stream state
		SetStreamState(vertexDecl, vertexShader, pixelShader);
	}

	//Make sure at least NumPts entries are left in the vertex buffers
	if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor and texCoord0 buffers
	//Lock secondary color buffer if fog
	LockVertexColorBuffer();
	if (drawFog) {
		LockSecondaryColorBuffer();
	}
	LockTexCoordBuffer(0);

	INT Index = 0;
	for (INT i = 0; i < NumPts; i++) {
		FTransTexture* P = Pts[i];

		FGLTexCoord &destTexCoord = m_pTexCoordArray[0][Index];
		destTexCoord.u = P->U * TexInfo[0].UMult;
		destTexCoord.v = P->V * TexInfo[0].VMult;

		FGLVertexColor &destVertexColor = m_pVertexColorArray[Index];
		destVertexColor.x = P->Point.X;
		destVertexColor.y = P->Point.Y;
		destVertexColor.z = P->Point.Z;

		if (PolyFlags & PF_Modulated) {
			destVertexColor.color = 0xFFFFFFFF;
		}
		else if (drawFog) {
			FLOAT f255_Times_One_Minus_FogW = 255.0f * (1.0f - P->Fog.W);
			destVertexColor.color = FPlaneTo_BGRScaled_A255(&P->Light, f255_Times_One_Minus_FogW);
			m_pSecondaryColorArray[Index].specular = FPlaneTo_BGR_A0(&P->Fog);
		}
		else {
#ifdef UTGLR_RUNE_BUILD
			destVertexColor.color = FPlaneTo_BGR_Aub(&P->Light, alpha);
#else
			destVertexColor.color = FPlaneTo_BGR_A255(&P->Light);
#endif
		}

		Index++;
	}

	//Unlock vertexColor and texCoord0 buffers
	//Unlock secondary color buffer if fog
	UnlockVertexColorBuffer();
	if (drawFog) {
		UnlockSecondaryColorBuffer();
	}
	UnlockTexCoordBuffer(0);

#ifdef UTGLR_DEBUG_ACTOR_WIREFRAME
	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
#endif

	//Draw the triangles
	m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

	//Advance vertex buffer position
	m_curVertexBufferPos += NumPts;

#ifdef UTGLR_DEBUG_ACTOR_WIREFRAME
	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID);
#endif

	unclock(GouraudCycles);
	unguard;
}

void UD3D9RenderDevice::DrawGouraudPolygon(FSceneNode* Frame, FTextureInfo& Info, FTransTexture** Pts, INT NumPts, DWORD PolyFlags, FSpanBuffer* Span) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: DrawGouraudPolygon = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::DrawGouraudPolygon);

	EndTileBuffering();

	if (SceneNodeHack) {
		if ((Frame->X != m_sceneNodeX) || (Frame->Y != m_sceneNodeY)) {
			m_sceneNodeHackCount++;
			SetSceneNode(Frame);
		}
	}

	//Reject invalid polygons early so that other parts of the code do not have to deal with them
	if (NumPts < 3) {
		return;
	}

	if (NumPts > m_bufferActorTrisCutoff) {
		EndBuffering();

		SetDefaultAAState();
		//No need to set default projection state here as DrawGouraudPolygonOld will set its own projection state
		//No need to set default stream state here as DrawGouraudPolygonOld will set its own stream state
		SetDefaultTextureState();

		DrawGouraudPolygonOld(Frame, Info, Pts, NumPts, PolyFlags, Span);

		return;
	}

	//Load texture cache id
	QWORD CacheID = Info.CacheID;

	//Only attempt to alter texture cache id on certain textures
	if ((CacheID & 0xFF) == 0xE0) {
		//Alter texture cache id if masked texture hack is enabled and texture is masked
		CacheID |= ((PolyFlags & PF_Masked) ? TEX_CACHE_ID_FLAG_MASKED : 0) & m_maskedTextureHackMask;

		//Check for 16 bit texture option
		if (Use16BitTextures) {
			if (Info.Palette && (Info.Palette[128].A == 255)) {
				CacheID |= TEX_CACHE_ID_FLAG_16BIT;
			}
		}
	}

	//Decide if should request near Z range hack projection
	bool requestNearZRangeHackProjection = false;
	if (m_useZRangeHack && (GUglyHackFlags & 0x1)) {
		requestNearZRangeHackProjection = true;
	}

	//Check if need to start new poly buffering
	//Make sure enough entries are left in the vertex buffers
	//based on the current position when it was locked
	if ((m_lastPolyFlags != PolyFlags) ||
		(requestNearZRangeHackProjection != m_lastRequestNearZRangeHackProjection) ||
		(TexInfo[0].CurrentCacheID != CacheID) ||
		((m_curVertexBufferPos + BufferedVerts + NumPts) >= (VERTEX_ARRAY_SIZE - 14)) ||
		(BufferedVerts == 0))
	{
		EndGouraudPolygonBuffering();

		//Check if vertex buffer flush is required
		if ((m_curVertexBufferPos + BufferedVerts + NumPts) >= (VERTEX_ARRAY_SIZE - 14)) {
			FlushVertexBuffers();
		}

		//Check if should render fog and if vertex specular is supported
		//Also set other color flags
		if (PolyFlags & PF_Modulated) {
			m_requestedColorFlags = 0;
		}
		else {
			m_requestedColorFlags = CF_COLOR_ARRAY;

#ifdef UTGLR_RUNE_BUILD
			if (((PolyFlags & (PF_RenderFog | PF_Translucent | PF_Modulated | PF_AlphaBlend)) == PF_RenderFog) && UseVertexSpecular) {
#else
			if (((PolyFlags & (PF_RenderFog | PF_Translucent | PF_Modulated)) == PF_RenderFog) && UseVertexSpecular) {
#endif
				m_requestedColorFlags = CF_COLOR_ARRAY | CF_FOG_MODE;
			}
		}

		//If not drawing fog, disable the PF_RenderFog flag
		if (!(m_requestedColorFlags & CF_FOG_MODE)) {
			PolyFlags &= ~PF_RenderFog;
		}

		//Default texture state not needed for polygon caching stage

		SetBlend(PolyFlags);
		SetTextureNoPanBias(0, Info, PolyFlags);

		//Set request near Z range hack projection flag
		m_requestNearZRangeHackProjection = requestNearZRangeHackProjection;

		//Save last used poly flags
		m_lastPolyFlags = PolyFlags;

		//Save last used request near Z range hack projection
		m_lastRequestNearZRangeHackProjection = m_requestNearZRangeHackProjection;

		//Select a buffer verts proc
		if (m_requestedColorFlags & CF_FOG_MODE) {
			m_pBuffer3VertsProc = m_pBuffer3FoggedVertsProc;
		}
		else if (m_requestedColorFlags & CF_COLOR_ARRAY) {
			m_pBuffer3VertsProc = m_pBuffer3ColoredVertsProc;
		}
		else {
			m_pBuffer3VertsProc = m_pBuffer3BasicVertsProc;
		}
#ifdef UTGLR_RUNE_BUILD
		m_gpAlpha = 255;
		if (PolyFlags & PF_AlphaBlend) {
			m_gpAlpha = appRound(Info.Texture->Alpha * 255.0f);
			m_pBuffer3VertsProc = Buffer3Verts;
		}
#endif

		//Lock vertexColor and texCoord0 buffers
		//Lock secondary color buffer if fog
		LockVertexColorBuffer();
		if (m_requestedColorFlags & CF_FOG_MODE) {
			LockSecondaryColorBuffer();
		}
		LockTexCoordBuffer(0);

		{
			IDirect3DVertexDeclaration9 *vertexDecl = (m_requestedColorFlags & CF_FOG_MODE) ? m_twoColorSingleTextureVertexDecl : m_standardNTextureVertexDecl[0];
			IDirect3DVertexShader9 *vertexShader = NULL;
			IDirect3DPixelShader9 *pixelShader = NULL;

			if (UseVertexProgram) {
				vertexShader = m_vpDefaultRenderingState;
				if (m_requestedColorFlags & CF_FOG_MODE) {
					vertexShader = m_vpDefaultRenderingStateWithFog;
				}
#ifdef UTGLR_RUNE_BUILD
				if (m_gpFogEnabled) {
					vertexShader = m_vpDefaultRenderingStateWithLinearFog;
				}
#endif
			}
			if (UseFragmentProgram) {
				pixelShader = m_fpDefaultRenderingState;
				if (m_requestedColorFlags & CF_FOG_MODE) {
					pixelShader = m_fpDefaultRenderingStateWithFog;
				}
#ifdef UTGLR_RUNE_BUILD
				if (m_gpFogEnabled) {
					pixelShader = m_fpDefaultRenderingStateWithLinearFog;
				}
#endif
			}

			//Set stream state
			SetStreamState(vertexDecl, vertexShader, pixelShader);
		}
	}

	//Buffer 3 vertices from the first (and perhaps only) triangle
	(m_pBuffer3VertsProc)(this, Pts);

	if (NumPts > 3) {
		//Buffer additional vertices from a clipped triangle
		BufferAdditionalClippedVerts(Pts, NumPts);
	}

	unguard;
}

void UD3D9RenderDevice::DrawTile(FSceneNode* Frame, FTextureInfo& Info, FLOAT X, FLOAT Y, FLOAT XL, FLOAT YL, FLOAT U, FLOAT V, FLOAT UL, FLOAT VL, class FSpanBuffer* Span, FLOAT Z, FPlane Color, FPlane Fog, DWORD PolyFlags) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: DrawTile = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::DrawTile);

	EndGouraudPolygonBuffering();

	if (SceneNodeHack) {
		if ((Frame->X != m_sceneNodeX) || (Frame->Y != m_sceneNodeY)) {
			m_sceneNodeHackCount++;
			SetSceneNode(Frame);
		}
	}

	//Adjust Z coordinate if Z range hack is active
	if (m_useZRangeHack) {
		if ((Z >= 0.5f) && (Z < 8.0f)) {
			Z = (((Z - 0.5f) / 7.5f) * 4.0f) + 4.0f;
		}
	}

	FLOAT PX1 = X - Frame->FX2 - 0.5f;
	FLOAT PX2 = PX1 + XL;
	FLOAT PY1 = Y - Frame->FY2 - 0.5f;
	FLOAT PY2 = PY1 + YL;

	FLOAT RFX2_Times_Z = m_RFX2 * Z;
	FLOAT RFY2_Times_Z = m_RFY2 * Z;

	FLOAT RPX1 = RFX2_Times_Z * PX1;
	FLOAT RPX2 = RFX2_Times_Z * PX2;
	FLOAT RPY1 = RFY2_Times_Z * PY1;
	FLOAT RPY2 = RFY2_Times_Z * PY2;

	if (BufferTileQuads) {
		//Load texture cache id
		QWORD CacheID = Info.CacheID;

		//Only attempt to alter texture cache id on certain textures
		if ((CacheID & 0xFF) == 0xE0) {
			//Alter texture cache id if masked texture hack is enabled and texture is masked
			CacheID |= ((PolyFlags & PF_Masked) ? TEX_CACHE_ID_FLAG_MASKED : 0) & m_maskedTextureHackMask;

			//Check for 16 bit texture option
			if (Use16BitTextures) {
				if (Info.Palette && (Info.Palette[128].A == 255)) {
					CacheID |= TEX_CACHE_ID_FLAG_16BIT;
				}
			}
		}

		//Check if need to start new tile buffering
		if ((m_lastPolyFlags != PolyFlags) ||
			(TexInfo[0].CurrentCacheID != CacheID) ||
			(m_curVertexBufferPos + BufferedTileVerts >= (VERTEX_ARRAY_SIZE - 6)) ||
			(BufferedTileVerts == 0))
		{
			//Flush any previously buffered tiles
			EndTileBuffering();

			//Check if vertex buffer flush is required
			if (m_curVertexBufferPos + BufferedTileVerts >= (VERTEX_ARRAY_SIZE - 6)) {
				FlushVertexBuffers();
			}

			//Default vertex program state not needed for polygon caching stage
			//Default texture state not needed for polygon caching stage

			//Save last used poly flags before local modification
			m_lastPolyFlags = PolyFlags;

#ifdef UTGLR_RUNE_BUILD
			if (Info.Palette && Info.Palette[128].A != 255 && !(PolyFlags & (PF_Translucent | PF_AlphaBlend))) {
#else
			if (Info.Palette && Info.Palette[128].A != 255 && !(PolyFlags & PF_Translucent)) {
#endif
				PolyFlags |= PF_Highlighted;
			}

			SetBlend(PolyFlags);
			SetTextureNoPanBias(0, Info, PolyFlags);

			if (PolyFlags & PF_Modulated) {
				m_requestedColorFlags = 0;
			}
			else {
				m_requestedColorFlags = CF_COLOR_ARRAY;
			}

			//Lock vertexColor and texCoord0 buffers
			LockVertexColorBuffer();
			LockTexCoordBuffer(0);

			//Set stream state
			SetDefaultStreamState();
		}

		//Get tile color
		DWORD tileColor;
		tileColor = 0xFFFFFFFF;
		if (!(PolyFlags & PF_Modulated)) {
			if (UseSSE2) {
#ifdef UTGLR_INCLUDE_SSE_CODE
				static __m128 fColorMul = { 255.0f, 255.0f, 255.0f, 0.0f };
				__m128 fColorMulReg;
				__m128 fColor;
				__m128 fAlpha;
				__m128i iColor;

				fColorMulReg = fColorMul;
				fColor = _mm_loadu_ps(&Color.X);
				fColor = _mm_mul_ps(fColor, fColorMulReg);

				//RGBA to BGRA
				fColor = _mm_shuffle_ps(fColor, fColor,  _MM_SHUFFLE(3, 0, 1, 2));

				fAlpha = _mm_setzero_ps();
				fAlpha = _mm_move_ss(fAlpha, fColorMulReg);
#ifdef UTGLR_RUNE_BUILD
				if (PolyFlags & PF_AlphaBlend) {
					fAlpha = _mm_mul_ss(fAlpha, _mm_load_ss(&Info.Texture->Alpha));
				}
#endif
				fAlpha = _mm_shuffle_ps(fAlpha, fAlpha,  _MM_SHUFFLE(0, 1, 1, 1));

				fColor = _mm_or_ps(fColor, fAlpha);

				iColor = _mm_cvtps_epi32(fColor);
				iColor = _mm_packs_epi32(iColor, iColor);
				iColor = _mm_packus_epi16(iColor, iColor);

				tileColor = _mm_cvtsi128_si32(iColor);
#endif
			}
			else {
#ifdef UTGLR_RUNE_BUILD
				if (PolyFlags & PF_AlphaBlend) {
					Color.W = Info.Texture->Alpha;
					tileColor = FPlaneTo_BGRAClamped(&Color);
				}
				else {
					tileColor = FPlaneTo_BGRClamped_A255(&Color);
				}
#else
				tileColor = FPlaneTo_BGRClamped_A255(&Color);
#endif
			}
		}

		//Buffer the tile
		FGLVertexColor *pVertexColorArray = &m_pVertexColorArray[BufferedTileVerts];
		FGLTexCoord *pTexCoordArray = &m_pTexCoordArray[0][BufferedTileVerts];

		pVertexColorArray[0].x = RPX1;
		pVertexColorArray[0].y = RPY1;
		pVertexColorArray[0].z = Z;
		pVertexColorArray[0].color = tileColor;

		pVertexColorArray[1].x = RPX2;
		pVertexColorArray[1].y = RPY1;
		pVertexColorArray[1].z = Z;
		pVertexColorArray[1].color = tileColor;

		pVertexColorArray[2].x = RPX2;
		pVertexColorArray[2].y = RPY2;
		pVertexColorArray[2].z = Z;
		pVertexColorArray[2].color = tileColor;

		pVertexColorArray[3].x = RPX1;
		pVertexColorArray[3].y = RPY1;
		pVertexColorArray[3].z = Z;
		pVertexColorArray[3].color = tileColor;

		pVertexColorArray[4].x = RPX2;
		pVertexColorArray[4].y = RPY2;
		pVertexColorArray[4].z = Z;
		pVertexColorArray[4].color = tileColor;

		pVertexColorArray[5].x = RPX1;
		pVertexColorArray[5].y = RPY2;
		pVertexColorArray[5].z = Z;
		pVertexColorArray[5].color = tileColor;

		FLOAT TexInfoUMult = TexInfo[0].UMult;
		FLOAT TexInfoVMult = TexInfo[0].VMult;

		FLOAT SU1 = (U) * TexInfoUMult;
		FLOAT SU2 = (U + UL) * TexInfoUMult;
		FLOAT SV1 = (V) * TexInfoVMult;
		FLOAT SV2 = (V + VL) * TexInfoVMult;

		pTexCoordArray[0].u = SU1;
		pTexCoordArray[0].v = SV1;

		pTexCoordArray[1].u = SU2;
		pTexCoordArray[1].v = SV1;

		pTexCoordArray[2].u = SU2;
		pTexCoordArray[2].v = SV2;

		pTexCoordArray[3].u = SU1;
		pTexCoordArray[3].v = SV1;

		pTexCoordArray[4].u = SU2;
		pTexCoordArray[4].v = SV2;

		pTexCoordArray[5].u = SU1;
		pTexCoordArray[5].v = SV2;

		BufferedTileVerts += 6;
	}
	else {
		DWORD tileColor;

		EndTileBuffering();

		clock(TileCycles);

		if (NoAATiles) {
			SetDisabledAAState();
		}
		else {
			SetDefaultAAState();
		}
		SetDefaultProjectionState();
		SetDefaultStreamState();
		SetDefaultTextureState();

#ifdef UTGLR_RUNE_BUILD
		if (Info.Palette && Info.Palette[128].A != 255 && !(PolyFlags & (PF_Translucent | PF_AlphaBlend))) {
#else
		if (Info.Palette && Info.Palette[128].A != 255 && !(PolyFlags & PF_Translucent)) {
#endif
			PolyFlags |= PF_Highlighted;
		}

		SetBlend(PolyFlags);
		SetTextureNoPanBias(0, Info, PolyFlags);

		tileColor = 0xFFFFFFFF;
		if (!(PolyFlags & PF_Modulated)) {
#ifdef UTGLR_RUNE_BUILD
			if (PolyFlags & PF_AlphaBlend) {
				Color.W = Info.Texture->Alpha;
				tileColor = FPlaneTo_BGRAClamped(&Color);
			}
			else {
				tileColor = FPlaneTo_BGRClamped_A255(&Color);
			}
#else
			tileColor = FPlaneTo_BGRClamped_A255(&Color);
#endif
		}

		//Make sure at least 4 entries are left in the vertex buffers
		if ((m_curVertexBufferPos + 4) >= VERTEX_ARRAY_SIZE) {
			FlushVertexBuffers();
		}

		//Lock vertexColor and texCoord0 buffers
		LockVertexColorBuffer();
		LockTexCoordBuffer(0);

		FGLTexCoord *pTexCoordArray = m_pTexCoordArray[0];
		FGLVertexColor *pVertexColorArray = m_pVertexColorArray;

		FLOAT TexInfoUMult = TexInfo[0].UMult;
		FLOAT TexInfoVMult = TexInfo[0].VMult;

		FLOAT SU1 = (U) * TexInfoUMult;
		FLOAT SU2 = (U + UL) * TexInfoUMult;
		FLOAT SV1 = (V) * TexInfoVMult;
		FLOAT SV2 = (V + VL) * TexInfoVMult;

		pTexCoordArray[0].u = SU1;
		pTexCoordArray[0].v = SV1;

		pTexCoordArray[1].u = SU2;
		pTexCoordArray[1].v = SV1;

		pTexCoordArray[2].u = SU2;
		pTexCoordArray[2].v = SV2;

		pTexCoordArray[3].u = SU1;
		pTexCoordArray[3].v = SV2;

		pVertexColorArray[0].x = RPX1;
		pVertexColorArray[0].y = RPY1;
		pVertexColorArray[0].z = Z;
		pVertexColorArray[0].color = tileColor;

		pVertexColorArray[1].x = RPX2;
		pVertexColorArray[1].y = RPY1;
		pVertexColorArray[1].z = Z;
		pVertexColorArray[1].color = tileColor;

		pVertexColorArray[2].x = RPX2;
		pVertexColorArray[2].y = RPY2;
		pVertexColorArray[2].z = Z;
		pVertexColorArray[2].color = tileColor;

		pVertexColorArray[3].x = RPX1;
		pVertexColorArray[3].y = RPY2;
		pVertexColorArray[3].z = Z;
		pVertexColorArray[3].color = tileColor;

		//Unlock vertexColor and texCoord0 buffers
		UnlockVertexColorBuffer();
		UnlockTexCoordBuffer(0);

		//Draw the tile
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, 2);

		//Advance vertex buffer position
		m_curVertexBufferPos += 4;

		unclock(TileCycles);
	}

	unguard;
}

void UD3D9RenderDevice::Draw2DLine(FSceneNode* Frame, FPlane Color, DWORD LineFlags, FVector P1, FVector P2) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: Draw2DLine = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::Draw2DLine);

	EndBuffering();

	SetDefaultAAState();
	SetDefaultProjectionState();
	SetDefaultStreamState();
	SetDefaultTextureState();

	SetNoTexture(0);
	SetBlend(PF_Highlighted);

	DWORD lineColor = FPlaneTo_BGR_A255(&Color);

	//Make sure at least 2 entries are left in the vertex buffers
	if ((m_curVertexBufferPos + 2) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor and texCoord0 buffers
	LockVertexColorBuffer();
	LockTexCoordBuffer(0);

	FGLTexCoord *pTexCoordArray = m_pTexCoordArray[0];
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;

	pTexCoordArray[0].u = 0.0f;
	pTexCoordArray[0].v = 0.0f;

	pTexCoordArray[1].u = 1.0f;
	pTexCoordArray[1].v = 0.0f;

	pVertexColorArray[0].x = m_RFX2 * P1.Z * (P1.X - Frame->FX2 - 0.5f);
	pVertexColorArray[0].y = m_RFY2 * P1.Z * (P1.Y - Frame->FY2 - 0.5f);
	pVertexColorArray[0].z = P1.Z;
	pVertexColorArray[0].color = lineColor;

	pVertexColorArray[1].x = m_RFX2 * P2.Z * (P2.X - Frame->FX2 - 0.5f);
	pVertexColorArray[1].y = m_RFY2 * P2.Z * (P2.Y - Frame->FY2 - 0.5f);
	pVertexColorArray[1].z = P2.Z;
	pVertexColorArray[1].color = lineColor;

	//Unlock vertexColor and texCoord0 buffers
	UnlockVertexColorBuffer();
	UnlockTexCoordBuffer(0);

	//Draw the line
	m_d3dDevice->DrawPrimitive(D3DPT_LINELIST, m_curVertexBufferPos, 1);

	//Advance vertex buffer position
	m_curVertexBufferPos += 2;

	unguard;
}

void UD3D9RenderDevice::Draw3DLine(FSceneNode* Frame, FPlane Color, DWORD LineFlags, FVector P1, FVector P2) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: Draw3DLine = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::Draw3DLine);

	EndBuffering();

	SetDefaultAAState();
	SetDefaultProjectionState();
	SetDefaultStreamState();
	SetDefaultTextureState();

	P1 = P1.TransformPointBy(Frame->Coords);
	P2 = P2.TransformPointBy(Frame->Coords);
	if (Frame->Viewport->IsOrtho()) {
		// Zoom.
		P1.X = (P1.X) / Frame->Zoom + Frame->FX2;
		P1.Y = (P1.Y) / Frame->Zoom + Frame->FY2;
		P2.X = (P2.X) / Frame->Zoom + Frame->FX2;
		P2.Y = (P2.Y) / Frame->Zoom + Frame->FY2;
		P1.Z = P2.Z = 1;

		// See if points form a line parallel to our line of sight (i.e. line appears as a dot).
		if (Abs(P2.X - P1.X) + Abs(P2.Y - P1.Y) >= 0.2) {
			Draw2DLine(Frame, Color, LineFlags, P1, P2);
		}
		else if (Frame->Viewport->Actor->OrthoZoom < ORTHO_LOW_DETAIL) {
			Draw2DPoint(Frame, Color, LINE_None, P1.X - 1, P1.Y - 1, P1.X + 1, P1.Y + 1, P1.Z);
		}
	}
	else {
		SetNoTexture(0);
		SetBlend(PF_Highlighted);

		DWORD lineColor = FPlaneTo_BGR_A255(&Color);

		//Make sure at least 2 entries are left in the vertex buffers
		if ((m_curVertexBufferPos + 2) >= VERTEX_ARRAY_SIZE) {
			FlushVertexBuffers();
		}

		//Lock vertexColor and texCoord0 buffers
		LockVertexColorBuffer();
		LockTexCoordBuffer(0);

		FGLTexCoord *pTexCoordArray = m_pTexCoordArray[0];
		FGLVertexColor *pVertexColorArray = m_pVertexColorArray;

		pTexCoordArray[0].u = 0.0f;
		pTexCoordArray[0].v = 0.0f;

		pTexCoordArray[1].u = 1.0f;
		pTexCoordArray[1].v = 0.0f;

		pVertexColorArray[0].x = P1.X;
		pVertexColorArray[0].y = P1.Y;
		pVertexColorArray[0].z = P1.Z;
		pVertexColorArray[0].color = lineColor;

		pVertexColorArray[1].x = P2.X;
		pVertexColorArray[1].y = P2.Y;
		pVertexColorArray[1].z = P2.Z;
		pVertexColorArray[1].color = lineColor;

		//Unlock vertexColor and texCoord0 buffers
		UnlockVertexColorBuffer();
		UnlockTexCoordBuffer(0);

		//Draw the line
		m_d3dDevice->DrawPrimitive(D3DPT_LINELIST, m_curVertexBufferPos, 1);

		//Advance vertex buffer position
		m_curVertexBufferPos += 2;
	}

	unguard;
}

void UD3D9RenderDevice::Draw2DPoint(FSceneNode* Frame, FPlane Color, DWORD LineFlags, FLOAT X1, FLOAT Y1, FLOAT X2, FLOAT Y2, FLOAT Z) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: Draw2DPoint = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::Draw2DPoint);

	EndBuffering();

	SetDefaultAAState();
	SetDefaultProjectionState();
	SetDefaultStreamState();
	SetDefaultTextureState();

	SetBlend(PF_Highlighted);
	SetNoTexture(0);

	DWORD pointColor = FPlaneTo_BGR_A255(&Color);

	//Make sure at least 4 entries are left in the vertex buffers
	if ((m_curVertexBufferPos + 4) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor and texCoord0 buffers
	LockVertexColorBuffer();
	LockTexCoordBuffer(0);

	FGLTexCoord *pTexCoordArray = m_pTexCoordArray[0];
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;

	pTexCoordArray[0].u = 0.0f;
	pTexCoordArray[0].v = 0.0f;

	pTexCoordArray[1].u = 1.0f;
	pTexCoordArray[1].v = 0.0f;

	pTexCoordArray[2].u = 1.0f;
	pTexCoordArray[2].v = 1.0f;

	pTexCoordArray[3].u = 0.0f;
	pTexCoordArray[3].v = 1.0f;

	FLOAT X1Pos = m_RFX2 * Z * (X1 - Frame->FX2 - 0.5f);
	FLOAT Y1Pos = m_RFY2 * Z * (Y1 - Frame->FY2 - 0.5f);
	FLOAT X2Pos = m_RFX2 * Z * (X2 - Frame->FX2 - 0.5f);
	FLOAT Y2Pos = m_RFY2 * Z * (Y2 - Frame->FY2 - 0.5f);

	pVertexColorArray[0].x = X1Pos;
	pVertexColorArray[0].y = Y1Pos;
	pVertexColorArray[0].z = Z;
	pVertexColorArray[0].color = pointColor;

	pVertexColorArray[1].x = X2Pos;
	pVertexColorArray[1].y = Y1Pos;
	pVertexColorArray[1].z = Z;
	pVertexColorArray[1].color = pointColor;

	pVertexColorArray[2].x = X2Pos;
	pVertexColorArray[2].y = Y2Pos;
	pVertexColorArray[2].z = Z;
	pVertexColorArray[2].color = pointColor;

	pVertexColorArray[3].x = X1Pos;
	pVertexColorArray[3].y = Y2Pos;
	pVertexColorArray[3].z = Z;
	pVertexColorArray[3].color = pointColor;

	//Unlock vertexColor and texCoord0 buffers
	UnlockVertexColorBuffer();
	UnlockTexCoordBuffer(0);

	//Draw the point
	m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, 2);

	//Advance vertex buffer position
	m_curVertexBufferPos += 4;

	unguard;
}


void UD3D9RenderDevice::ClearZ(FSceneNode* Frame) {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: ClearZ = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::ClearZ);

	EndBuffering();

	//Default AA state not required for glClear
	//Default projection state not required for glClear
	//Default stream state not required for glClear
	//Default vertex program state not required for glClear
	//Default fragment program state not required for glClear
	//Default texture state not required for glClear

	SetBlend(PF_Occlude);
	m_d3dDevice->Clear(0, NULL, D3DCLEAR_ZBUFFER, 0, 1.0f, 0);

	unguard;
}

void UD3D9RenderDevice::PushHit(const BYTE* Data, INT Count) {
	guard(UD3D9RenderDevice::PushHit);
	unguard;
}

void UD3D9RenderDevice::PopHit(INT Count, UBOOL bForce) {
	guard(UD3D9RenderDevice::PopHit);
	unguard;
}

void UD3D9RenderDevice::GetStats(TCHAR* Result) {
	guard(UD3D9RenderDevice::GetStats);

	double msPerCycle = GSecondsPerCycle * 1000.0f;
	appSprintf
	(
		Result,
		TEXT("D3D9 stats: Bind=%04.1f Image=%04.1f Complex=%04.1f Gouraud=%04.1f Tile=%04.1f"),
		msPerCycle * BindCycles,
		msPerCycle * ImageCycles,
		msPerCycle * ComplexCycles,
		msPerCycle * GouraudCycles,
		msPerCycle * TileCycles
	);

	unguard;
}

void UD3D9RenderDevice::ReadPixels(FColor* Pixels) {
	guard(UD3D9RenderDevice::ReadPixels);

	INT x, y;
	INT SizeX, SizeY;
	INT StartX = 0, StartY = 0;
	HRESULT hResult;
	IDirect3DSurface9 *d3dsFrontBuffer = NULL;
	D3DDISPLAYMODE d3ddm;

	SizeX = Viewport->SizeX;
	SizeY = Viewport->SizeY;

	//Get current display mode
	hResult = m_d3dDevice->GetDisplayMode(0, &d3ddm);
	if (FAILED(hResult)) {
		return;
	}

	//Check if windowed
	if (m_d3dpp.Windowed) {
		POINT upperLeft = { 0, 0 };

		if (!ClientToScreen(m_hWnd, &upperLeft)) {
			return;
		}

		//Check if entirely off the screen
		if (upperLeft.x <= -(INT)d3ddm.Width) return;
		if (upperLeft.y <= -(INT)d3ddm.Height) return;
		if (upperLeft.x >= (INT)d3ddm.Width) return;
		if (upperLeft.y >= (INT)d3ddm.Height) return;

		//Set start position
		StartX = upperLeft.x;
		if (StartX < 0) {
			SizeX -= (0 - StartX);
			StartX = 0;
		}
		if (SizeX <= 0) return;

		StartY = upperLeft.y;
		if (StartY < 0) {
			SizeY -= (0 - StartY);
			StartY = 0;
		}
		if (SizeY <= 0) return;

		if ((StartX + SizeX) > d3ddm.Width) {
			SizeX = d3ddm.Width - StartX;
		}
		if (SizeX <= 0) return;

		if ((StartY + SizeY) > d3ddm.Height) {
			SizeY = d3ddm.Height - StartY;
		}
		if (SizeY <= 0) return;
	}

	//Create surface to hold screenshot
	hResult = m_d3dDevice->CreateOffscreenPlainSurface(d3ddm.Width, d3ddm.Height, D3DFMT_A8R8G8B8, D3DPOOL_SCRATCH, &d3dsFrontBuffer, NULL);
	if (FAILED(hResult)) {
		return;
	}

	//Get copy of front buffer
	hResult = m_d3dDevice->GetFrontBufferData(0, d3dsFrontBuffer);
	if (FAILED(hResult)) {
		//Release surface to hold screenshot
		d3dsFrontBuffer->Release();

		return;
	}

	//Clamp size just in case
	if (SizeX > d3ddm.Width) SizeX = d3ddm.Width;
	if (SizeY > d3ddm.Height) SizeY = d3ddm.Height;

	//Lock screenshot surface
	D3DLOCKED_RECT lockRect;
	hResult = d3dsFrontBuffer->LockRect(&lockRect, NULL, D3DLOCK_NOSYSLOCK | D3DLOCK_READONLY);
	if (FAILED(hResult)) {
		//Release surface to hold screenshot
		d3dsFrontBuffer->Release();

		return;
	}

	DWORD *pScreenshot = (DWORD *)lockRect.pBits;
	pScreenshot = (DWORD *)((BYTE *)pScreenshot + (StartY * lockRect.Pitch));
	INT DestSizeX = Viewport->SizeX;
	INT DestSizeY = Viewport->SizeY;
	for (y = 0; y < SizeY; y++) {
		for (x = 0; x < SizeX; x++) {
			DWORD dwPixel = pScreenshot[StartX + x];
			Pixels[(y * DestSizeX) + x] = FColor(((dwPixel >> 0) & 0xFF), ((dwPixel >> 8) & 0xFF), ((dwPixel >> 16) & 0xFF), 0xFF);
		}
		pScreenshot = (DWORD *)((BYTE *)pScreenshot + lockRect.Pitch);
	}

	//Unlock screenshot surface
	d3dsFrontBuffer->UnlockRect();

	//Release surface to hold screenshot
	d3dsFrontBuffer->Release();

	//Gamma correct screenshots if the option is true and the gamma ramp was set successfully
	if (GammaCorrectScreenshots && m_setGammaRampSucceeded) {
		FByteGammaRamp gammaByteRamp;
		BuildGammaRamp(SavedGammaCorrection, SavedGammaCorrection, SavedGammaCorrection, Brightness, gammaByteRamp);
		for (y = 0; y < DestSizeY; y++) {
			for (x = 0; x < DestSizeX; x++) {
				Pixels[x + y * DestSizeX].R = gammaByteRamp.red[Pixels[x + y * DestSizeX].R];
				Pixels[x + y * DestSizeX].G = gammaByteRamp.green[Pixels[x + y * DestSizeX].G];
				Pixels[x + y * DestSizeX].B = gammaByteRamp.blue[Pixels[x + y * DestSizeX].B];
			}
		}
	}

	unguard;
}

void UD3D9RenderDevice::EndFlash() {
#ifdef UTGLR_DEBUG_SHOW_CALL_COUNTS
{
	static int si;
	dout << L"utd3d9r: EndFlash = " << si++ << std::endl;
}
#endif
	guard(UD3D9RenderDevice::EndFlash);
	if (FlashScale != FPlane(.5,.5,.5,0) || FlashFog != FPlane(0,0,0,0)) {
		EndBuffering();

		SetDefaultAAState();
		SetDefaultProjectionState();
		SetDefaultStreamState();
		SetDefaultTextureState();

		SetBlend(PF_Highlighted);
		SetNoTexture(0);

		FPlane tempPlane = FPlane(FlashFog.X, FlashFog.Y, FlashFog.Z, 1.0 - Min(FlashScale.X * 2.0f, 1.0f));
		DWORD flashColor = FPlaneTo_BGRA(&tempPlane);

		FLOAT RFX2 = m_RProjZ;
		FLOAT RFY2 = m_RProjZ * m_Aspect;

		//Adjust Z coordinate if Z range hack is active
		FLOAT ZCoord = 1.0f;
		if (m_useZRangeHack) {
			ZCoord = (((ZCoord - 0.5f) / 7.5f) * 4.0f) + 4.0f;
		}

		//Make sure at least 4 entries are left in the vertex buffers
		if ((m_curVertexBufferPos + 4) >= VERTEX_ARRAY_SIZE) {
			FlushVertexBuffers();
		}

		//Lock vertexColor and texCoord0 buffers
		LockVertexColorBuffer();
		LockTexCoordBuffer(0);

		FGLTexCoord *pTexCoordArray = m_pTexCoordArray[0];
		FGLVertexColor *pVertexColorArray = m_pVertexColorArray;

		pTexCoordArray[0].u = 0.0f;
		pTexCoordArray[0].v = 0.0f;

		pTexCoordArray[1].u = 1.0f;
		pTexCoordArray[1].v = 0.0f;

		pTexCoordArray[2].u = 1.0f;
		pTexCoordArray[2].v = 1.0f;

		pTexCoordArray[3].u = 0.0f;
		pTexCoordArray[3].v = 1.0f;

		pVertexColorArray[0].x = RFX2 * (-1.0f * ZCoord);
		pVertexColorArray[0].y = RFY2 * (-1.0f * ZCoord);
		pVertexColorArray[0].z = ZCoord;
		pVertexColorArray[0].color = flashColor;

		pVertexColorArray[1].x = RFX2 * (+1.0f * ZCoord);
		pVertexColorArray[1].y = RFY2 * (-1.0f * ZCoord);
		pVertexColorArray[1].z = ZCoord;
		pVertexColorArray[1].color = flashColor;

		pVertexColorArray[2].x = RFX2 * (+1.0f * ZCoord);
		pVertexColorArray[2].y = RFY2 * (+1.0f * ZCoord);
		pVertexColorArray[2].z = ZCoord;
		pVertexColorArray[2].color = flashColor;

		pVertexColorArray[3].x = RFX2 * (-1.0f * ZCoord);
		pVertexColorArray[3].y = RFY2 * (+1.0f * ZCoord);
		pVertexColorArray[3].z = ZCoord;
		pVertexColorArray[3].color = flashColor;

		//Unlock vertexColor and texCoord0 buffers
		UnlockVertexColorBuffer();
		UnlockTexCoordBuffer(0);

		//Draw the square
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, 2);

		//Advance vertex buffer position
		m_curVertexBufferPos += 4;
	}
	unguard;
}

void UD3D9RenderDevice::PrecacheTexture(FTextureInfo& Info, DWORD PolyFlags) {
	guard(UD3D9RenderDevice::PrecacheTexture);
	SetTextureNoPanBias(0, Info, PolyFlags);
	unguard;
}


//This function is safe to call multiple times to initialize once
void UD3D9RenderDevice::InitNoTextureSafe(void) {
	guard(UD3D9RenderDevice::InitNoTexture);
	unsigned int u, v;
	HRESULT hResult;
	D3DLOCKED_RECT lockRect;
	DWORD *pTex;

	//Return early if already initialized
	if (m_pNoTexObj != 0) {
		return;
	}

	//Create the texture
	hResult = m_d3dDevice->CreateTexture(4, 4, 1, 0, D3DFMT_A8R8G8B8, D3DPOOL_MANAGED, &m_pNoTexObj, NULL);
	if (FAILED(hResult)) {
		appErrorf(TEXT("CreateTexture (basic RGBA8) failed"));
	}

	//Lock texture level 0
	if (FAILED(m_pNoTexObj->LockRect(0, &lockRect, NULL, D3DLOCK_NOSYSLOCK))) {
		appErrorf(TEXT("Texture lock failed"));
	}

	//Write texture
	pTex = (DWORD *)lockRect.pBits;
	for (u = 0; u < 4; u++) {
		for (v = 0; v < 4; v++) {
			pTex[v] = 0xFFFFFFFF;
		}
		pTex = (DWORD *)((BYTE *)pTex + lockRect.Pitch);
	}

	//Unlock texture level 0
	if (FAILED(m_pNoTexObj->UnlockRect(0))) {
		appErrorf(TEXT("Texture unlock failed"));
	}

	return;
	unguard;
}

//This function is safe to call multiple times to initialize once
void UD3D9RenderDevice::InitAlphaTextureSafe(void) {
	guard(UD3D9RenderDevice::InitAlphaTexture);
	unsigned int u;
	HRESULT hResult;
	D3DLOCKED_RECT lockRect;
	BYTE *pTex;

	//Return early if already initialized
	if (m_pAlphaTexObj != 0) {
		return;
	}

	//Create the texture
	hResult = m_d3dDevice->CreateTexture(256, 1, 1, 0, D3DFMT_A8, D3DPOOL_MANAGED, &m_pAlphaTexObj, NULL);
	if (FAILED(hResult)) {
		appErrorf(TEXT("CreateTexture (alpha) failed"));
	}

	//Lock texture level 0
	if (FAILED(m_pAlphaTexObj->LockRect(0, &lockRect, NULL, D3DLOCK_NOSYSLOCK))) {
		appErrorf(TEXT("Texture lock failed"));
	}

	//Write texture
	pTex = (BYTE *)lockRect.pBits;
	for (u = 0; u < 256; u++) {
		pTex[u] = 255 - u;
	}

	//Unlock texture level 0
	if (FAILED(m_pAlphaTexObj->UnlockRect(0))) {
		appErrorf(TEXT("Texture unlock failed"));
	}

	return;
	unguard;
}

void UD3D9RenderDevice::ScanForOldTextures(void) {
	guard(UD3D9RenderDevice::ScanForOldTextures);

	unsigned int u;
	FCachedTexture *pCT;

	//Prevent currently bound textures from being recycled
	for (u = 0; u < MAX_TMUNITS; u++) {
		FCachedTexture *pBind = TexInfo[u].pBind;
		if (pBind != NULL) {
			//Update last used frame count so that the texture will not be recycled
			pBind->LastUsedFrameCount = m_currentFrameCount;

			//Move node to tail of linked list if in LRU list
			if (pBind->bindType == BIND_TYPE_NON_ZERO_PREFIX_LRU_LIST) {
				m_nonZeroPrefixBindChain->unlink(pBind);
				m_nonZeroPrefixBindChain->link_to_tail(pBind);
			}
		}
	}

	pCT = m_nonZeroPrefixBindChain->begin();
	while (pCT != m_nonZeroPrefixBindChain->end()) {
		DWORD numFramesSinceUsed = m_currentFrameCount - pCT->LastUsedFrameCount;
		if (numFramesSinceUsed > DynamicTexIdRecycleLevel) {
			//See if the tex pool is not enabled, or the tex format is not RGBA8, or the texture has mipmaps
			if (!UseTexPool || (pCT->texFormat != D3DFMT_A8R8G8B8) || (pCT->texParams.filter & CT_HAS_MIPMAPS_BIT)) {
				//Remove node from linked list
				m_nonZeroPrefixBindChain->unlink(pCT);

				//Get pointer to node in bind map
				QWORD_CTTree_t::node_t *pNode = (QWORD_CTTree_t::node_t *)((BYTE *)pCT - (DWORD)&(((QWORD_CTTree_t::node_t *)0)->data));
				//Extract tree index
				BYTE treeIndex = pCT->treeIndex;
				//Advanced cached texture pointer to next entry in linked list
				pCT = pCT->pNext;

				//Remove node from bind map
				m_nonZeroPrefixBindTrees[treeIndex].remove(pNode);

				//Delete the texture
				pNode->data.pTexObj->Release();
#if 0
{
	static int si;
	dout << L"utd3d9r: Texture delete = " << si++ << std::endl;
}
#endif

				continue;
			}
			else {
				TexPoolMap_t::node_t *texPoolPtr;

#if 0
{
	static int si;
	dout << L"utd3d9r: TexPool free = " << si++ << L", Id = 0x" << HexString(32, (DWORD)pCT->pTexObj)
		<< L", u = " << pCT->UBits << L", v = " << pCT->VBits << std::endl;
}
#endif

				//Remove node from linked list
				m_nonZeroPrefixBindChain->unlink(pCT);

				//Create a key from the lg2 width and height of the texture object
				TexPoolMapKey_t texPoolKey = MakeTexPoolMapKey(pCT->UBits, pCT->VBits);

				//Get pointer to node in bind map
				QWORD_CTTree_t::node_t *pNode = (QWORD_CTTree_t::node_t *)((BYTE *)pCT - (DWORD)&(((QWORD_CTTree_t::node_t *)0)->data));
				//Extract tree index
				BYTE treeIndex = pCT->treeIndex;
				//Advanced cached texture pointer to next entry in linked list
				pCT = pCT->pNext;

				//Remove node from bind map
				m_nonZeroPrefixBindTrees[treeIndex].remove(pNode);

				//See if the key does not yet exist
				texPoolPtr = m_RGBA8TexPool->find(texPoolKey);
				//If the key does not yet exist, add an empty vector in its place
				if (texPoolPtr == 0) {
					texPoolPtr = m_TexPoolMap_Allocator.alloc_node();
					texPoolPtr->key = texPoolKey;
					texPoolPtr->data = QWORD_CTTree_NodePool_t();
					m_RGBA8TexPool->insert(texPoolPtr);
				}

				//Add node plus texture id to a list in the tex pool based on its dimensions
				texPoolPtr->data.add(pNode);

				continue;
			}
		}

		//The list is sorted
		//Stop searching on first one not to be recycled
		break;

		pCT = pCT->pNext;
	}

	unguard;
}

void UD3D9RenderDevice::SetNoTextureNoCheck(INT Multi) {
	guard(UD3D9RenderDevice::SetNoTexture);

	// Set small white texture.
	clock(BindCycles);

	//Set texture
	m_d3dDevice->SetTexture(Multi, m_pNoTexObj);

	//Set filter
	SetTexFilter(Multi, CT_MIN_FILTER_POINT | CT_MIP_FILTER_NONE);

	TexInfo[Multi].CurrentCacheID = TEX_CACHE_ID_NO_TEX;
	TexInfo[Multi].pBind = NULL;

	unclock(BindCycles);

	unguard;
}

void UD3D9RenderDevice::SetAlphaTextureNoCheck(INT Multi) {
	guard(UD3D9RenderDevice::SetAlphaTexture);

	// Set alpha gradient texture.
	clock(BindCycles);

	//Set texture
	m_d3dDevice->SetTexture(Multi, m_pAlphaTexObj);

	//Set filter
	SetTexFilter(Multi, CT_MIN_FILTER_LINEAR | CT_MIP_FILTER_NONE | CT_MAG_FILTER_LINEAR_NOT_POINT_BIT | CT_ADDRESS_CLAMP_NOT_WRAP_BIT);

	TexInfo[Multi].CurrentCacheID = TEX_CACHE_ID_ALPHA_TEX;
	TexInfo[Multi].pBind = NULL;

	unclock(BindCycles);

	unguard;
}

//This function must use Tex.CurrentCacheID and NEVER use Info.CacheID to reference the texture cache id
//This makes it work with the masked texture hack code
void UD3D9RenderDevice::SetTextureNoCheck(DWORD texNum, FTexInfo& Tex, FTextureInfo& Info, DWORD PolyFlags) {
	guard(UD3D9RenderDevice::SetTexture);

	// Make current.
	clock(BindCycles);

	bool isZeroPrefixCacheID = ((Tex.CurrentCacheID & 0xFFFFFFFF00000000) == 0) ? true : false;

	FCachedTexture *pBind = NULL;
	bool existingBind = false;
	HRESULT hResult;

	if (isZeroPrefixCacheID) {
		DWORD CacheIDSuffix = (Tex.CurrentCacheID & 0x00000000FFFFFFFF);

		DWORD_CTTree_t *zeroPrefixBindTree = &m_zeroPrefixBindTrees[CTZeroPrefixCacheIDSuffixToTreeIndex(CacheIDSuffix)];
		DWORD_CTTree_t::node_t *bindTreePtr = zeroPrefixBindTree->find(CacheIDSuffix);
		if (bindTreePtr != 0) {
			pBind = &bindTreePtr->data;
			existingBind = true;
		}
		else {
			DWORD_CTTree_t::node_t *pNewNode;

			//Insert new texture info
			pNewNode = m_DWORD_CTTree_Allocator.alloc_node();
			pNewNode->key = CacheIDSuffix;
			zeroPrefixBindTree->insert(pNewNode);
			pBind = &pNewNode->data;

			//Set bind type
			pBind->bindType = BIND_TYPE_ZERO_PREFIX;

			//Set default tex params
			pBind->texParams = CT_DEFAULT_TEX_PARAMS;
			pBind->dynamicTexBits = (PolyFlags & PF_NoSmooth) ? DT_NO_SMOOTH_BIT : 0;

			//Cache texture info for the new texture
			CacheTextureInfo(pBind, Info, PolyFlags);

#if 0
{
	static int si;
	dout << L"utd3d9r: Create texture zp = " << si++ << std::endl;
}
#endif
			//Create the texture
			hResult = m_d3dDevice->CreateTexture(
				1U << pBind->UBits, 1U << pBind->VBits, (Info.NumMips == 1) ? 1 : (pBind->MaxLevel + 1),
				0, pBind->texFormat, D3DPOOL_MANAGED, &pBind->pTexObj, NULL);
			if (FAILED(hResult)) {
				appErrorf(TEXT("CreateTexture failed"));
			}

			//Allocate a new texture id
			AllocatedTextures++;
		}
	}
	else {
		DWORD CacheIDSuffix = (Tex.CurrentCacheID & 0x00000000FFFFFFFF);
		DWORD treeIndex = CTNonZeroPrefixCacheIDSuffixToTreeIndex(CacheIDSuffix);

		QWORD_CTTree_t *nonZeroPrefixBindTree = &m_nonZeroPrefixBindTrees[treeIndex];
		QWORD_CTTree_t::node_t *bindTreePtr = nonZeroPrefixBindTree->find(Tex.CurrentCacheID);
		if (bindTreePtr != 0) {
			pBind = &bindTreePtr->data;
			pBind->LastUsedFrameCount = m_currentFrameCount;

			//Check if texture is in LRU list
			if (pBind->bindType == BIND_TYPE_NON_ZERO_PREFIX_LRU_LIST) {
				//Move node to tail of linked list
				m_nonZeroPrefixBindChain->unlink(pBind);
				m_nonZeroPrefixBindChain->link_to_tail(pBind);
			}

			existingBind = true;
		}
		else {
			QWORD_CTTree_t::node_t *pNewNode;

			//Allocate a new node
			//Use the node pool if it is not empty
			pNewNode = m_nonZeroPrefixNodePool.try_remove();
			if (!pNewNode) {
				pNewNode = m_QWORD_CTTree_Allocator.alloc_node();
			}

			//Insert new texture info
			pNewNode->key = Tex.CurrentCacheID;
			nonZeroPrefixBindTree->insert(pNewNode);
			pBind = &pNewNode->data;
			pBind->LastUsedFrameCount = m_currentFrameCount;

			//Set bind type
			pBind->bindType = BIND_TYPE_NON_ZERO_PREFIX_LRU_LIST;
			if (CacheStaticMaps && ((Tex.CurrentCacheID & 0xFF) == 0x18)) {
				pBind->bindType = BIND_TYPE_NON_ZERO_PREFIX;
			}

			//Save tree index
			pBind->treeIndex = (BYTE)treeIndex;

			//Set default tex params
			pBind->texParams = CT_DEFAULT_TEX_PARAMS;
			pBind->dynamicTexBits = (PolyFlags & PF_NoSmooth) ? DT_NO_SMOOTH_BIT : 0;

			//Check if texture should be in LRU list
			if (pBind->bindType == BIND_TYPE_NON_ZERO_PREFIX_LRU_LIST) {
				//Add node to linked list
				m_nonZeroPrefixBindChain->link_to_tail(pBind);
			}

			//Cache texture info for the new texture
			CacheTextureInfo(pBind, Info, PolyFlags);

			//See if the tex pool is enabled
			bool needTexIdAllocate = true;
			if (UseTexPool) {
				//See if the format will be RGBA8
				//Only textures without mipmaps are stored in the tex pool
				if ((pBind->texType == TEX_TYPE_NORMAL) && (Info.NumMips == 1)) {
					TexPoolMap_t::node_t *texPoolPtr;

					//Create a key from the lg2 width and height of the texture object
					TexPoolMapKey_t texPoolKey = MakeTexPoolMapKey(pBind->UBits, pBind->VBits);

					//Search for the key in the map
					texPoolPtr = m_RGBA8TexPool->find(texPoolKey);
					if (texPoolPtr != 0) {
						QWORD_CTTree_NodePool_t::node_t *texPoolNodePtr;

						//Get a reference to the pool of nodes with tex ids of the right dimension
						QWORD_CTTree_NodePool_t &texPool = texPoolPtr->data;

						//Attempt to get a texture id for the tex pool
						if ((texPoolNodePtr = texPool.try_remove()) != 0) {
							//Use texture id from node in tex pool
							pBind->pTexObj = texPoolNodePtr->data.pTexObj;

							//Use tex params from node in tex pool
							pBind->texParams = texPoolNodePtr->data.texParams;
							pBind->dynamicTexBits = texPoolNodePtr->data.dynamicTexBits;

							//Then add node to free list
							m_nonZeroPrefixNodePool.add(texPoolNodePtr);

#if 0
{
	static int si;
	dout << L"utd3d9r: TexPool retrieve = " << si++ << L", Id = 0x" << HexString(32, (DWORD)pBind->pTexObj)
		<< L", u = " << pBind->UBits << L", v = " << pBind->VBits << std::endl;
}
#endif

							//Clear the need tex id allocate flag
							needTexIdAllocate = false;
						}
					}
				}
			}
			if (needTexIdAllocate) {
#if 0
{
	static int si;
	dout << L"utd3d9r: Create texture nzp = " << si++ << std::endl;
}
#endif
				//Create the texture
				hResult = m_d3dDevice->CreateTexture(
					1U << pBind->UBits, 1U << pBind->VBits, (Info.NumMips == 1) ? 1 : (pBind->MaxLevel + 1),
					0, pBind->texFormat, D3DPOOL_MANAGED, &pBind->pTexObj, NULL);
				if (FAILED(hResult)) {
					appErrorf(TEXT("CreateTexture failed"));
				}

				//Allocate a new texture id
				AllocatedTextures++;
			}
		}
	}

	//Save pointer to current texture bind for current texture unit
	Tex.pBind = pBind;

	//Set texture
	m_d3dDevice->SetTexture(texNum, pBind->pTexObj);

	unclock(BindCycles);

	// Account for all the impact on scale normalization.
	Tex.UMult = pBind->UMult;
	Tex.VMult = pBind->VMult;

	//Check for any changes to dynamic texture object parameters
	{
		BYTE desiredDynamicTexBits;

		desiredDynamicTexBits = (PolyFlags & PF_NoSmooth) ? DT_NO_SMOOTH_BIT : 0;
		if (desiredDynamicTexBits != pBind->dynamicTexBits) {
			BYTE dynamicTexBitsXor;

			dynamicTexBitsXor = desiredDynamicTexBits ^ pBind->dynamicTexBits;

			//Update dynamic tex bits early as there are no subsequent dependencies
			pBind->dynamicTexBits = desiredDynamicTexBits;

			if (dynamicTexBitsXor & DT_NO_SMOOTH_BIT) {
				BYTE desiredTexParamsFilter;

				//Set partial desired filter tex params
				desiredTexParamsFilter = 0;
				if (NoFiltering) {
					desiredTexParamsFilter |= CT_MIN_FILTER_POINT | CT_MIP_FILTER_NONE;
				}
				else if (PolyFlags & PF_NoSmooth) {
					desiredTexParamsFilter |= CT_MIN_FILTER_POINT;
					desiredTexParamsFilter |= ((pBind->texParams.filter & CT_HAS_MIPMAPS_BIT) == 0) ? CT_MIP_FILTER_NONE : CT_MIP_FILTER_POINT;
				}
				else {
					desiredTexParamsFilter |= (MaxAnisotropy) ? CT_MIN_FILTER_ANISOTROPIC : CT_MIN_FILTER_LINEAR;
					desiredTexParamsFilter |= ((pBind->texParams.filter & CT_HAS_MIPMAPS_BIT) == 0) ? CT_MIP_FILTER_NONE : (UseTrilinear ? CT_MIP_FILTER_LINEAR : CT_MIP_FILTER_POINT);
					desiredTexParamsFilter |= CT_MAG_FILTER_LINEAR_NOT_POINT_BIT;
				}

				//Store partial updated texture parameter state in cached texture object
				const BYTE MODIFIED_TEX_PARAMS_FILTER_BITS = CT_MIN_FILTER_MASK | CT_MIP_FILTER_MASK | CT_MAG_FILTER_LINEAR_NOT_POINT_BIT;
				pBind->texParams.filter = (pBind->texParams.filter & ~MODIFIED_TEX_PARAMS_FILTER_BITS) | desiredTexParamsFilter;
			}
		}
	}

	// Upload if needed.
	if (!existingBind || Info.bRealtimeChanged) {
		FColor paletteIndex0;

		// Cleanup texture flags.
		if (SupportsLazyTextures) {
			Info.Load();
		}
		Info.bRealtimeChanged = 0;

		//Set palette index 0 to black for masked paletted textures
		if (Info.Palette && (PolyFlags & PF_Masked)) {
			paletteIndex0 = Info.Palette[0];
			Info.Palette[0] = FColor(0,0,0,0);
		}

		// Download the texture.
		clock(ImageCycles);

		if (pBind->texType == TEX_TYPE_PALETTED) {
//			glColorTableEXT(GL_TEXTURE_2D, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, Info.Palette);
		}

		m_texConvertCtx.pBind = pBind;

		UBOOL SkipMipmaps = (Info.NumMips == 1);
		INT MaxLevel = pBind->MaxLevel;

		//Only calculate texture filter parameters for new textures
		if (!existingBind) {
			tex_params_t desiredTexParams;

			//Set desired filter tex params
			desiredTexParams.filter = 0;
			if (NoFiltering) {
				desiredTexParams.filter |= CT_MIN_FILTER_POINT | CT_MIP_FILTER_NONE;
			}
			else if (PolyFlags & PF_NoSmooth) {
				desiredTexParams.filter |= CT_MIN_FILTER_POINT;
				desiredTexParams.filter |= SkipMipmaps ? CT_MIP_FILTER_NONE : CT_MIP_FILTER_POINT;
			}
			else {
				desiredTexParams.filter |= (MaxAnisotropy) ? CT_MIN_FILTER_ANISOTROPIC : CT_MIN_FILTER_LINEAR;
				desiredTexParams.filter |= SkipMipmaps ? CT_MIP_FILTER_NONE : (UseTrilinear ? CT_MIP_FILTER_LINEAR : CT_MIP_FILTER_POINT);
				desiredTexParams.filter |= CT_MAG_FILTER_LINEAR_NOT_POINT_BIT;
			}

			if (!SkipMipmaps) {
				desiredTexParams.filter |= CT_HAS_MIPMAPS_BIT;
			}

			//Store updated texture parameter state in cached texture object
			pBind->texParams = desiredTexParams;
		}


		//Some textures only upload the base texture
		INT MaxUploadLevel = MaxLevel;
		if (SkipMipmaps) {
			MaxUploadLevel = 0;
		}


		//Set initial texture width and height in the context structure
		//Setup code must ensure that both UBits and VBits are greater than or equal to 0
		m_texConvertCtx.texWidthPow2 = 1 << pBind->UBits;
		m_texConvertCtx.texHeightPow2 = 1 << pBind->VBits;

		guard(WriteTexture);
		for (INT Level = 0; Level <= MaxUploadLevel; Level++) {
			// Convert the mipmap.
			INT MipIndex = pBind->BaseMip + Level;
			INT stepBits = 0;
			if (MipIndex >= Info.NumMips) {
				stepBits = MipIndex - (Info.NumMips - 1);
				MipIndex = Info.NumMips - 1;
			}
			m_texConvertCtx.stepBits = stepBits;

			FMipmapBase* Mip = Info.Mips[MipIndex];
			if (Mip->DataPtr) {
				//Lock texture level
				if (FAILED(pBind->pTexObj->LockRect(Level, &m_texConvertCtx.lockRect, NULL, D3DLOCK_NOSYSLOCK))) {
					appErrorf(TEXT("Texture lock failed"));
				}

				switch (pBind->texType) {
				case TEX_TYPE_COMPRESSED_DXT1:
					guard(ConvertDXT1_DXT1);
					ConvertDXT1_DXT1(Mip, Level);
					unguard;
					break;

				case TEX_TYPE_COMPRESSED_DXT1_TO_DXT3:
					guard(ConvertDXT1_DXT3);
					ConvertDXT1_DXT3(Mip, Level);
					unguard;
					break;

				case TEX_TYPE_PALETTED:
					guard(ConvertP8_P8);
					if (stepBits == 0) {
//						ConvertP8_P8_NoStep(Mip, Level);
					}
					else {
//						ConvertP8_P8(Mip, Level);
					}
					unguard;
					break;

				case TEX_TYPE_HAS_PALETTE:
					switch (pBind->texFormat) {
					case D3DFMT_R5G6B5:
						guard(ConvertP8_RGB565);
						if (stepBits == 0) {
							ConvertP8_RGB565_NoStep(Mip, Info.Palette, Level);
						}
						else {
							ConvertP8_RGB565(Mip, Info.Palette, Level);
						}
						unguard;
						break;

					case D3DFMT_X1R5G5B5:
					case D3DFMT_A1R5G5B5:
						guard(ConvertP8_RGBA5551);
						if (stepBits == 0) {
							ConvertP8_RGBA5551_NoStep(Mip, Info.Palette, Level);
						}
						else {
							ConvertP8_RGBA5551(Mip, Info.Palette, Level);
						}
						unguard;
						break;

					default:
						guard(ConvertP8_RGBA8888);
						if (stepBits == 0) {
							ConvertP8_RGBA8888_NoStep(Mip, Info.Palette, Level);
						}
						else {
							ConvertP8_RGBA8888(Mip, Info.Palette, Level);
						}
						unguard;
					}
					break;

				default:
					guard(ConvertBGRA7777);
					(this->*pBind->pConvertBGRA7777)(Mip, Level);
					unguard;
				}

				DWORD texWidth, texHeight;

				//Get current texture width and height
				texWidth = m_texConvertCtx.texWidthPow2;
				texHeight = m_texConvertCtx.texHeightPow2;

				//Calculate and save next texture width and height
				//Both are divided by two down to a floor of 1
				//Texture width and height must be even powers of 2 for the following code to work
				m_texConvertCtx.texWidthPow2 = (texWidth & 0x1) | (texWidth >> 1);
				m_texConvertCtx.texHeightPow2 = (texHeight & 0x1) | (texHeight >> 1);

				//Unlock texture level
				if (FAILED(pBind->pTexObj->UnlockRect(Level))) {
					appErrorf(TEXT("Texture unlock failed"));
				}
			}
		}
		unguard;

		unclock(ImageCycles);

		//Restore palette index 0 for masked paletted textures
		if (Info.Palette && (PolyFlags & PF_Masked)) {
			Info.Palette[0] = paletteIndex0;
		}

		// Cleanup.
		if (SupportsLazyTextures) {
			Info.Unload();
		}
	}

	//Set texture filter parameters
	SetTexFilter(texNum, pBind->texParams.filter);

	unguard;
}

void UD3D9RenderDevice::CacheTextureInfo(FCachedTexture *pBind, const FTextureInfo &Info, DWORD PolyFlags) {
#if 0
{
	dout << L"utd3d9r: CacheId = "
		<< HexString(32, (DWORD)((QWORD)Info.CacheID >> 32)) << L":"
		<< HexString(32, (DWORD)((QWORD)Info.CacheID & 0xFFFFFFFF)) << std::endl;
}
{
	const UTexture *pTexture = Info.Texture;
	const TCHAR *pName = pTexture->GetFullName();
	if (pName) dout << L"utd3d9r: TexName = " << pName << std::endl;
}
{
	dout << L"utd3d9r: NumMips = " << Info.NumMips << std::endl;
}
{
	unsigned int u;

	dout << L"utd3d9r: ZPBindTree Size = ";
	for (u = 0; u < NUM_CTTree_TREES; u++) {
		dout << m_zeroPrefixBindTrees[u].calc_size();
		if (u != (NUM_CTTree_TREES - 1)) dout << L", ";
	}
	dout << std::endl;

	dout << L"utd3d9r: NZPBindTree Size = ";
	for (u = 0; u < NUM_CTTree_TREES; u++) {
		dout << m_nonZeroPrefixBindTrees[u].calc_size();
		if (u != (NUM_CTTree_TREES - 1)) dout << L", ";
	}
	dout << std::endl;
}
#endif

	// Figure out scaling info for the texture.
	DWORD texFlags = 0;
	INT BaseMip = 0;
	INT UBits = Info.Mips[0]->UBits;
	INT VBits = Info.Mips[0]->VBits;
	INT UCopyBits = 0;
	INT VCopyBits = 0;
	if ((UBits - VBits) > MaxLogUOverV) {
		VCopyBits += (UBits - VBits) - MaxLogUOverV;
		VBits = UBits - MaxLogUOverV;
	}
	if ((VBits - UBits) > MaxLogVOverU) {
		UCopyBits += (VBits - UBits) - MaxLogVOverU;
		UBits = VBits - MaxLogVOverU;
	}
	if (UBits < MinLogTextureSize) {
		UCopyBits += MinLogTextureSize - UBits;
		UBits += MinLogTextureSize - UBits;
	}
	if (VBits < MinLogTextureSize) {
		VCopyBits += MinLogTextureSize - VBits;
		VBits += MinLogTextureSize - VBits;
	}
	if (UBits > MaxLogTextureSize) {
		BaseMip += UBits - MaxLogTextureSize;
		VBits -= UBits - MaxLogTextureSize;
		UBits = MaxLogTextureSize;
		if (VBits < 0) {
			VCopyBits = -VBits;
			VBits = 0;
		}
	}
	if (VBits > MaxLogTextureSize) {
		BaseMip += VBits - MaxLogTextureSize;
		UBits -= VBits - MaxLogTextureSize;
		VBits = MaxLogTextureSize;
		if (UBits < 0) {
			UCopyBits = -UBits;
			UBits = 0;
		}
	}

	pBind->BaseMip = BaseMip;
	pBind->MaxLevel = Min(UBits, VBits) - MinLogTextureSize;
	pBind->UBits = UBits;
	pBind->VBits = VBits;

	pBind->UMult = 1.0f / (Info.UScale * (Info.USize << UCopyBits));
	pBind->VMult = 1.0f / (Info.VScale * (Info.VSize << VCopyBits));

	pBind->UClampVal = Info.UClamp - 1;
	pBind->VClampVal = Info.VClamp - 1;

	//Check for texture that does not require clamping
	//No clamp required if ((Info.UClamp == Info.USize) & (Info.VClamp == Info.VSize))
	if (((Info.UClamp ^ Info.USize) | (Info.VClamp ^ Info.VSize)) == 0) {
		texFlags |= TEX_FLAG_NO_CLAMP;
	}


	//Determine texture type
	//PolyFlags PF_Masked cannot change if existing texture is updated as it caches texture type information here
	bool paletted = false;
	if (UsePalette && Info.Palette) {
		paletted = true;
		if (!UseAlphaPalette) {
			if ((PolyFlags & PF_Masked) || (Info.Palette[0].A != 255)) {
				paletted = false;
			}
		}
	}

	if ((Info.Format == TEXF_DXT1) && SupportsTC) {
		if (TexDXT1ToDXT3 && (!(PolyFlags & PF_Masked))) {
			pBind->texType = TEX_TYPE_COMPRESSED_DXT1_TO_DXT3;
			pBind->texFormat = D3DFMT_DXT3;
		}
		else {
			pBind->texType = TEX_TYPE_COMPRESSED_DXT1;
			pBind->texFormat = D3DFMT_DXT1;
		}
	}
	else if (paletted) {
		pBind->texType = TEX_TYPE_PALETTED;
		pBind->texFormat = D3DFMT_P8;
	}
	else if (Info.Palette) {
		pBind->texType = TEX_TYPE_HAS_PALETTE;
		pBind->texFormat = D3DFMT_A8R8G8B8;
		//Check if texture should be 16-bit
		if (PolyFlags & PF_Memorized) {
			pBind->texFormat = (PolyFlags & PF_Masked) ? D3DFMT_A1R5G5B5 : ((Use565Textures) ? D3DFMT_R5G6B5 : D3DFMT_X1R5G5B5);
		}
	}
	else {
		pBind->texType = TEX_TYPE_NORMAL;
		if (texFlags & TEX_FLAG_NO_CLAMP) {
			pBind->pConvertBGRA7777 = &UD3D9RenderDevice::ConvertBGRA7777_BGRA8888_NoClamp;
		}
		else {
			pBind->pConvertBGRA7777 = &UD3D9RenderDevice::ConvertBGRA7777_BGRA8888;
		}
		pBind->texFormat = D3DFMT_A8R8G8B8;
	}

	return;
}


void UD3D9RenderDevice::ConvertDXT1_DXT1(const FMipmapBase *Mip, INT Level) {
	const DWORD *pSrc = (DWORD *)Mip->DataPtr;
	DWORD *pTex = (DWORD *)m_texConvertCtx.lockRect.pBits;
	DWORD UBlocks = 1U << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level - 2);
	DWORD VBlocks = 1U << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level - 2);

	for (DWORD v = 0; v < VBlocks; v++) {
		DWORD *pDest = pTex;
		for (DWORD u = 0; u < UBlocks; u++) {
			//Copy one block
			pDest[0] = pSrc[0];
			pDest[1] = pSrc[1];
			pSrc += 2;
			pDest += 2;
		}
		pTex = (DWORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	}

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertDXT1_DXT1 = " << si++ << std::endl;
	}
#endif
}

void UD3D9RenderDevice::ConvertDXT1_DXT3(const FMipmapBase *Mip, INT Level) {
	const DWORD *pSrc = (DWORD *)Mip->DataPtr;
	DWORD *pTex = (DWORD *)m_texConvertCtx.lockRect.pBits;
	DWORD UBlocks = 1U << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level - 2);
	DWORD VBlocks = 1U << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level - 2);

	for (DWORD v = 0; v < VBlocks; v++) {
		DWORD *pDest = pTex;
		for (DWORD u = 0; u < UBlocks; u++) {
			//Copy one block
			pDest[0] = 0xFFFFFFFF;
			pDest[1] = 0xFFFFFFFF;
			pDest[2] = pSrc[0];
			pDest[3] = pSrc[1];
			pSrc += 2;
			pDest += 4;
		}
		pTex = (DWORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	}

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertDXT1_DXT3 = " << si++ << std::endl;
	}
#endif
}

/*void UD3D9RenderDevice::ConvertP8_P8(const FMipmapBase *Mip, INT Level) {
	BYTE* Ptr = (BYTE*)m_texConvertCtx.pCompose;
	INT StepBits = m_texConvertCtx.stepBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT ij_inc = 1 << StepBits;
	INT i_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level + StepBits);
	INT j_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level + StepBits);
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			*Ptr++ = Base[j & UMask];
		} while ((j += ij_inc) < j_stop);
	} while ((i += ij_inc) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_P8 = " << si++ << std::endl;
	}
#endif
}*/

/*void UD3D9RenderDevice::ConvertP8_P8_NoStep(const FMipmapBase *Mip, INT Level) {
	BYTE* Ptr = (BYTE*)m_texConvertCtx.pCompose;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT i_stop = m_texConvertCtx.texHeightPow2;
	INT j_stop = m_texConvertCtx.texWidthPow2;
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			*Ptr++ = Base[j & UMask];
		} while ((j += 1) < j_stop);
	} while ((i += 1) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_P8_NoStep = " << si++ << std::endl;
	}
#endif
}*/

void UD3D9RenderDevice::ConvertP8_RGBA8888(const FMipmapBase *Mip, const FColor *Palette, INT Level) {
	DWORD *pTex = (DWORD *)m_texConvertCtx.lockRect.pBits;
	INT StepBits = m_texConvertCtx.stepBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT ij_inc = 1 << StepBits;
	INT i_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level + StepBits);
	INT j_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level + StepBits);
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			DWORD dwColor = GET_COLOR_DWORD(Palette[Base[j & UMask]]);
			pTex[j] = (dwColor & 0xFF00FF00) | ((dwColor >> 16) & 0xFF) | ((dwColor << 16) & 0xFF0000);
		} while ((j += ij_inc) < j_stop);
		pTex = (DWORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += ij_inc) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_RGBA8888 = " << si++ << std::endl;
	}
#endif
}

void UD3D9RenderDevice::ConvertP8_RGBA8888_NoStep(const FMipmapBase *Mip, const FColor *Palette, INT Level) {
	DWORD *pTex = (DWORD *)m_texConvertCtx.lockRect.pBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT i_stop = m_texConvertCtx.texHeightPow2;
	INT j_stop = m_texConvertCtx.texWidthPow2;
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			DWORD dwColor = GET_COLOR_DWORD(Palette[Base[j & UMask]]);
			pTex[j] = (dwColor & 0xFF00FF00) | ((dwColor >> 16) & 0xFF) | ((dwColor << 16) & 0xFF0000);
		} while ((j += 1) < j_stop);
		pTex = (DWORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += 1) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_RGBA8888_NoStep = " << si++ << std::endl;
	}
#endif
}

void FASTCALL UD3D9RenderDevice::ConvertP8_RGB565(const FMipmapBase *Mip, const FColor *Palette, INT Level) {
	_WORD *pTex = (_WORD *)m_texConvertCtx.lockRect.pBits;
	INT StepBits = m_texConvertCtx.stepBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT ij_inc = 1 << StepBits;
	INT i_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level + StepBits);
	INT j_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level + StepBits);
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			DWORD dwColor = GET_COLOR_DWORD(Palette[Base[j & UMask]]);
			pTex[j] = ((dwColor >> 19) & 0x001F) | ((dwColor >> 5) & 0x07E0) | ((dwColor << 8) & 0xF800);
		} while ((j += ij_inc) < j_stop);
		pTex = (WORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += ij_inc) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_RGBA565 = " << si++ << std::endl;
	}
#endif
}

void FASTCALL UD3D9RenderDevice::ConvertP8_RGB565_NoStep(const FMipmapBase *Mip, const FColor *Palette, INT Level) {
	_WORD *pTex = (_WORD *)m_texConvertCtx.lockRect.pBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT i_stop = m_texConvertCtx.texHeightPow2;
	INT j_stop = m_texConvertCtx.texWidthPow2;
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			DWORD dwColor = GET_COLOR_DWORD(Palette[Base[j & UMask]]);
			pTex[j] = ((dwColor >> 19) & 0x001F) | ((dwColor >> 5) & 0x07E0) | ((dwColor << 8) & 0xF800);
		} while ((j += 1) < j_stop);
		pTex = (_WORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += 1) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_RGBA565_NoStep = " << si++ << std::endl;
	}
#endif
}

void FASTCALL UD3D9RenderDevice::ConvertP8_RGBA5551(const FMipmapBase *Mip, const FColor *Palette, INT Level) {
	_WORD *pTex = (_WORD *)m_texConvertCtx.lockRect.pBits;
	INT StepBits = m_texConvertCtx.stepBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT ij_inc = 1 << StepBits;
	INT i_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level + StepBits);
	INT j_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level + StepBits);
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			DWORD dwColor = GET_COLOR_DWORD(Palette[Base[j & UMask]]);
			pTex[j] = ((dwColor >> 19) & 0x001F) | ((dwColor >> 6) & 0x03E0) | ((dwColor << 7) & 0x7C00) | ((dwColor >> 16) & 0x8000);
		} while ((j += ij_inc) < j_stop);
		pTex = (WORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += ij_inc) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_RGBA5551 = " << si++ << std::endl;
	}
#endif
}

void FASTCALL UD3D9RenderDevice::ConvertP8_RGBA5551_NoStep(const FMipmapBase *Mip, const FColor *Palette, INT Level) {
	_WORD *pTex = (_WORD *)m_texConvertCtx.lockRect.pBits;
	DWORD UMask = Mip->USize - 1;
	DWORD VMask = Mip->VSize - 1;
	INT i_stop = m_texConvertCtx.texHeightPow2;
	INT j_stop = m_texConvertCtx.texWidthPow2;
	INT i = 0;
	do { //i_stop always >= 1
		BYTE* Base = (BYTE*)Mip->DataPtr + (i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1
			DWORD dwColor = GET_COLOR_DWORD(Palette[Base[j & UMask]]);
			pTex[j] = ((dwColor >> 19) & 0x001F) | ((dwColor >> 6) & 0x03E0) | ((dwColor << 7) & 0x7C00) | ((dwColor >> 16) & 0x8000);
		} while ((j += 1) < j_stop);
		pTex = (_WORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += 1) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertP8_RGBA5551_NoStep = " << si++ << std::endl;
	}
#endif
}

void UD3D9RenderDevice::ConvertBGRA7777_BGRA8888(const FMipmapBase *Mip, INT Level) {
	DWORD *pTex = (DWORD *)m_texConvertCtx.lockRect.pBits;
	INT StepBits = m_texConvertCtx.stepBits;
	DWORD VMask = Mip->VSize - 1;
	DWORD VClampVal = m_texConvertCtx.pBind->VClampVal;
	DWORD UMask = Mip->USize - 1;
	DWORD UClampVal = m_texConvertCtx.pBind->UClampVal;
	INT ij_inc = 1 << StepBits;
	INT i_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level + StepBits);
	INT j_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level + StepBits);
	INT i = 0;
	do { //i_stop always >= 1
		FColor* Base = (FColor*)Mip->DataPtr + Min<DWORD>(i & VMask, VClampVal) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1;
			DWORD dwColor = GET_COLOR_DWORD(Base[Min<DWORD>(j & UMask, UClampVal)]);
			pTex[j] = dwColor * 2; // because of 7777
		} while ((j += ij_inc) < j_stop);
		pTex = (DWORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += ij_inc) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertBGRA7777_BGRA8888 = " << si++ << std::endl;
	}
#endif
}

void UD3D9RenderDevice::ConvertBGRA7777_BGRA8888_NoClamp(const FMipmapBase *Mip, INT Level) {
	DWORD *pTex = (DWORD *)m_texConvertCtx.lockRect.pBits;
	INT StepBits = m_texConvertCtx.stepBits;
	DWORD VMask = Mip->VSize - 1;
	DWORD UMask = Mip->USize - 1;
	INT ij_inc = 1 << StepBits;
	INT i_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->VBits - Level + StepBits);
	INT j_stop = 1 << Max(0, (INT)m_texConvertCtx.pBind->UBits - Level + StepBits);
	INT i = 0;
	do { //i_stop always >= 1
		FColor* Base = (FColor*)Mip->DataPtr + (DWORD)(i & VMask) * Mip->USize;
		INT j = 0;
		do { //j_stop always >= 1;
			DWORD dwColor = GET_COLOR_DWORD(Base[(DWORD)(j & UMask)]);
			pTex[j] = dwColor * 2; // because of 7777
		} while ((j += ij_inc) < j_stop);
		pTex = (DWORD *)((BYTE *)pTex + m_texConvertCtx.lockRect.Pitch);
	} while ((i += ij_inc) < i_stop);

#ifdef UTGLR_DEBUG_SHOW_TEX_CONVERT_COUNTS
	{
		static int si;
		dout << L"utd3d9r: ConvertBGRA7777_BGRA8888_NoClamp = " << si++ << std::endl;
	}
#endif
}

void UD3D9RenderDevice::SetBlendNoCheck(DWORD blendFlags) {
	guardSlow(UD3D9RenderDevice::SetBlend);

	// Detect changes in the blending modes.
	DWORD Xor = m_curBlendFlags ^ blendFlags;

	//Update main copy of current blend flags early
	m_curBlendFlags = blendFlags;

#ifdef UTGLR_RUNE_BUILD
	const DWORD GL_BLEND_FLAG_BITS = PF_Translucent | PF_Modulated | PF_Highlighted | PF_AlphaBlend;
#else
	const DWORD GL_BLEND_FLAG_BITS = PF_Translucent | PF_Modulated | PF_Highlighted;
#endif
	DWORD relevantBlendFlagBits = GL_BLEND_FLAG_BITS | m_smoothMaskedTexturesBit;
	if (Xor & (relevantBlendFlagBits)) {
		if (!(blendFlags & (relevantBlendFlagBits))) {
			m_d3dDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
			m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
			m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO);
		}
		else {
			if (blendFlags & PF_Translucent) {
				m_d3dDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
				m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
				m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCCOLOR);
			}
			else if (blendFlags & PF_Modulated) {
				m_d3dDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
				m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_DESTCOLOR);
				m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_SRCCOLOR);
			}
			else if (blendFlags & PF_Highlighted) {
				m_d3dDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
				m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
				m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
			}
#ifdef UTGLR_RUNE_BUILD
			else if (blendFlags & PF_AlphaBlend) {
				m_d3dDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
				m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
				m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
			}
#endif
			else if (blendFlags & PF_Masked) {
				m_d3dDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
				m_d3dDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
				m_d3dDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
			}
		}
	}
	if (Xor & PF_Masked) {
		m_d3dDevice->SetRenderState(D3DRS_ALPHATESTENABLE, ((blendFlags & PF_Masked) == 0) ? FALSE : TRUE);
	}
	if (Xor & PF_Invisible) {
		DWORD colorEnableBits = ((blendFlags & PF_Invisible) == 0) ? D3DCOLORWRITEENABLE_ALPHA | D3DCOLORWRITEENABLE_BLUE | D3DCOLORWRITEENABLE_GREEN | D3DCOLORWRITEENABLE_RED : 0;
		m_d3dDevice->SetRenderState(D3DRS_COLORWRITEENABLE, colorEnableBits);
	}
	if (Xor & PF_Occlude) {
		DWORD flag = ((blendFlags & PF_Occlude) == 0) ? FALSE : TRUE;
		m_d3dDevice->SetRenderState(D3DRS_ZWRITEENABLE, flag);
	}
	if (Xor & PF_RenderFog) {
		DWORD flag = ((blendFlags & PF_RenderFog) == 0) ? FALSE : TRUE;
		m_d3dDevice->SetRenderState(D3DRS_SPECULARENABLE, flag);
	}

	unguardSlow;
}

//This function will initialize or invalidate the texture environment state
//The current architecture allows both operations to be done in the same way
void UD3D9RenderDevice::InitOrInvalidateTexEnvState(void) {
	INT TMU;

	//For initialization, flags for all texture units are cleared
	//For initialization, first texture unit is modulated by default rather
	//than disabled, but priority bit encoding of flags will prevent problems
	//from the mismatch and will only result in one extra state update
	//For invalidation, flags for all texture units are also cleared as it is
	//fast enough and has no potential outside interaction side effects
	for (TMU = 0; TMU < MAX_TMUNITS; TMU++) {
		m_curTexEnvFlags[TMU] = 0;
	}

	//Set TexEnv 0 to modulated by default
	SetTexEnv(0, PF_Modulated);

	return;
}

void UD3D9RenderDevice::SetTexLODBiasState(INT TMUnits) {
	INT TMU;

	//Set texture LOD bias for all texture units
	for (TMU = 0; TMU < TMUnits; TMU++) {
		float fParam;

		//Set texture LOD bias
		fParam = LODBias;
		m_d3dDevice->SetSamplerState(TMU, D3DSAMP_MIPMAPLODBIAS, *(DWORD *)&fParam);
	}

	return;
}

void UD3D9RenderDevice::SetTexMaxAnisotropyState(INT TMUnits) {
	INT TMU;

	//Set maximum level of anistropy for all texture units
	for (TMU = 0; TMU < TMUnits; TMU++) {
		m_d3dDevice->SetSamplerState(TMU, D3DSAMP_MAXANISOTROPY, MaxAnisotropy);
	}

	return;
}

void UD3D9RenderDevice::SetTexEnvNoCheck(DWORD texUnit, DWORD texEnvFlags) {
	guardSlow(UD3D9RenderDevice::SetTexEnv);

	//Update current tex env flags early as there are no subsequent dependencies
	m_curTexEnvFlags[texUnit] = texEnvFlags;

	//Mark the texture unit as enabled
	m_texEnableBits |= 1U << texUnit;

	if (texEnvFlags & PF_Modulated) {
		D3DTEXTUREOP texOp;

		if ((texEnvFlags & PF_FlatShaded) || (texUnit != 0) && !OneXBlending) {
			texOp = D3DTOP_MODULATE2X;
		}
		else {
			texOp = D3DTOP_MODULATE;
		}

		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLOROP, texOp);
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_ALPHAOP, D3DTOP_MODULATE);

//		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLORARG1, D3DTA_TEXTURE);
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLORARG2, D3DTA_CURRENT);
	}
	else if (texEnvFlags & PF_Memorized) {
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLOROP, D3DTOP_BLENDCURRENTALPHA);
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1);

//		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLORARG1, D3DTA_TEXTURE);
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLORARG2, D3DTA_DIFFUSE);
	}
	else if (texEnvFlags & PF_Highlighted) {
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLOROP, D3DTOP_MODULATEINVALPHA_ADDCOLOR);
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_ALPHAOP, D3DTOP_SELECTARG2);

//		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLORARG1, D3DTA_TEXTURE);
		m_d3dDevice->SetTextureStageState(texUnit, D3DTSS_COLORARG2, D3DTA_CURRENT);
	}

	unguardSlow;
}


void FASTCALL UD3D9RenderDevice::SetTexFilterNoCheck(DWORD texNum, BYTE texFilterParams) {
	guardSlow(UD3D9RenderDevice::SetTexFilter);

	BYTE texFilterParamsXor = m_curTexStageParams[texNum].filter ^ texFilterParams;

	//Update main copy of current tex filter params early
	m_curTexStageParams[texNum].filter = texFilterParams;

	if (texFilterParamsXor & CT_MIN_FILTER_MASK) {
		D3DTEXTUREFILTERTYPE texFilterType = D3DTEXF_POINT;

		switch (texFilterParams & CT_MIN_FILTER_MASK) {
		case CT_MIN_FILTER_POINT: texFilterType = D3DTEXF_POINT; break;
		case CT_MIN_FILTER_LINEAR: texFilterType = D3DTEXF_LINEAR; break;
		case CT_MIN_FILTER_ANISOTROPIC: texFilterType = D3DTEXF_ANISOTROPIC; break;
		default:
			;
		}

		m_d3dDevice->SetSamplerState(texNum, D3DSAMP_MINFILTER, texFilterType);
	}
	if (texFilterParamsXor & CT_MIP_FILTER_MASK) {
		D3DTEXTUREFILTERTYPE texFilterType = D3DTEXF_NONE;

		switch (texFilterParams & CT_MIP_FILTER_MASK) {
		case CT_MIP_FILTER_NONE: texFilterType = D3DTEXF_NONE; break;
		case CT_MIP_FILTER_POINT: texFilterType = D3DTEXF_POINT; break;
		case CT_MIP_FILTER_LINEAR: texFilterType = D3DTEXF_LINEAR; break;
		default:
			;
		}

		m_d3dDevice->SetSamplerState(texNum, D3DSAMP_MIPFILTER, texFilterType);
	}
	if (texFilterParamsXor & CT_MAG_FILTER_LINEAR_NOT_POINT_BIT) {
		m_d3dDevice->SetSamplerState(texNum, D3DSAMP_MAGFILTER, (texFilterParams & CT_MAG_FILTER_LINEAR_NOT_POINT_BIT) ? D3DTEXF_LINEAR : D3DTEXF_POINT);
	}
	if (texFilterParamsXor & CT_ADDRESS_CLAMP_NOT_WRAP_BIT) {
		D3DTEXTUREADDRESS texAddressMode = (texFilterParams & CT_ADDRESS_CLAMP_NOT_WRAP_BIT) ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP;
		m_d3dDevice->SetSamplerState(texNum, D3DSAMP_ADDRESSU, texAddressMode);
		m_d3dDevice->SetSamplerState(texNum, D3DSAMP_ADDRESSV, texAddressMode);
	}

	unguardSlow;
}


void UD3D9RenderDevice::SetVertexDeclNoCheck(IDirect3DVertexDeclaration9 *vertexDecl) {
	HRESULT hResult;

	//Set vertex declaration
	hResult = m_d3dDevice->SetVertexDeclaration(vertexDecl);
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetVertexDeclaration failed"));
	}

	//Save new current vertex declaration
	m_curVertexDecl = vertexDecl;

	return;
}

void UD3D9RenderDevice::SetVertexShaderNoCheck(IDirect3DVertexShader9 *vertexShader) {
	HRESULT hResult;

	//Set vertex shader
	hResult = m_d3dDevice->SetVertexShader(vertexShader);
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetVertexShader failed"));
	}

	m_vpSwitchCount++;
	if ((vertexShader != NULL) && (m_curVertexShader == NULL)) m_vpEnableCount++;

	//Save new current vertex shader
	m_curVertexShader = vertexShader;

	return;
}

void UD3D9RenderDevice::SetPixelShaderNoCheck(IDirect3DPixelShader9 *pixelShader) {
	HRESULT hResult;

	//Set pixel shader
	hResult = m_d3dDevice->SetPixelShader(pixelShader);
	if (FAILED(hResult)) {
		appErrorf(TEXT("SetPixelShader failed"));
	}

	m_fpSwitchCount++;
	if ((pixelShader != NULL) && (m_curPixelShader == NULL)) m_fpEnableCount++;

	//Save new current pixel shader
	m_curPixelShader = pixelShader;

	return;
}


void UD3D9RenderDevice::SetAAStateNoCheck(bool AAEnable) {
	//Save new AA state
	m_curAAEnable = AAEnable;

	m_AASwitchCount++;

	//Set new AA state
	m_d3dDevice->SetRenderState(D3DRS_MULTISAMPLEANTIALIAS, (AAEnable) ? TRUE : FALSE);

	return;
}


bool UD3D9RenderDevice::InitializeVertexPrograms(void) {
	bool initOk = true;


	//Create vertex programs if not already created
	#define UTGLR_VS_CONDITIONAL_LOAD(_var, _vp, _name) \
		if (!_var) { \
			initOk &= LoadVertexProgram(&_var, _vp, _name); \
		}


	//Default rendering state
	UTGLR_VS_CONDITIONAL_LOAD(m_vpDefaultRenderingState, g_vpDefaultRenderingState,
		TEXT("Default rendering state"));

	//Default rendering state with fog
	UTGLR_VS_CONDITIONAL_LOAD(m_vpDefaultRenderingStateWithFog, g_vpDefaultRenderingStateWithFog,
		TEXT("Default rendering state with fog"));

#ifdef UTGLR_RUNE_BUILD
	//Default rendering state with linear fog
	UTGLR_VS_CONDITIONAL_LOAD(m_vpDefaultRenderingStateWithLinearFog, g_vpDefaultRenderingStateWithLinearFog,
		TEXT("Default rendering state with linear fog"));
#endif


	//Complex surface single texture
	UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurface[0], g_vpComplexSurfaceSingleTexture,
		TEXT("Complex surface single texture"));

	if (TMUnits >= 2) {
		//Complex surface dual texture
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurface[1], g_vpComplexSurfaceDualTexture,
			TEXT("Complex surface dual texture"));
	}

	if (TMUnits >= 3) {
		//Complex surface triple texture
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurface[2], g_vpComplexSurfaceTripleTexture,
			TEXT("Complex surface triple texture"));
	}

	if (TMUnits >= 4) {
		//Complex surface quad texture
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurface[3], g_vpComplexSurfaceQuadTexture,
			TEXT("Complex surface quad texture"));
	}


	if (UseDetailAlpha) {
		//Complex surface detail alpha
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurfaceDetailAlpha, g_vpComplexSurfaceDetailAlpha,
			TEXT("Complex surface detail alpha"));
	}

	if (SinglePassDetail) {
		//Complex surface single texture and detail texture
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurfaceSingleTextureAndDetailTexture, g_vpComplexSurfaceSingleTextureAndDetailTexture,
			TEXT("Complex surface single texture and detail texture"));

		//Complex surface dual texture and detail texture
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurfaceDualTextureAndDetailTexture, g_vpComplexSurfaceDualTextureAndDetailTexture,
			TEXT("Complex surface dual texture and detail texture"));
	}


	if (UseFragmentProgram) {
		//Complex surface single texture with position
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurfaceSingleTextureWithPos, g_vpComplexSurfaceSingleTextureWithPos,
			TEXT("Complex surface single texture with position"));

		//Complex surface dual texture with position
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurfaceDualTextureWithPos, g_vpComplexSurfaceDualTextureWithPos,
			TEXT("Complex surface dual texture with position"));

		//Complex surface triple texture with position
		UTGLR_VS_CONDITIONAL_LOAD(m_vpComplexSurfaceTripleTextureWithPos, g_vpComplexSurfaceTripleTextureWithPos,
			TEXT("Complex surface triple texture with position"));
	}


	#undef UTGLR_VS_CONDITIONAL_LOAD

	return initOk;
}

bool UD3D9RenderDevice::LoadVertexProgram(IDirect3DVertexShader9 **ppShader, const DWORD *pFunction, const TCHAR *pName) {
	HRESULT hResult;

	if (DebugBit(DEBUG_BIT_BASIC)) {
		dout << TEXT("utd3d9r: Loading vertex program \"") << pName << TEXT("\"") << std::endl;
	}

	hResult = m_d3dDevice->CreateVertexShader(pFunction, ppShader);
	if (FAILED(hResult)) {
		if (DebugBit(DEBUG_BIT_BASIC)) {
			dout << TEXT("utd3d9r: Vertex program load error") << std::endl;
		}

		return false;
	}

	return true;
}

//Attempts to initializes vertex program mode
//Safe to call multiple times as already created vertex programs will not be recreated
void UD3D9RenderDevice::TryInitializeVertexProgramMode(void) {
	//Initialize vertex programs
	if (!InitializeVertexPrograms()) {
		//Shutdown vertex program mode
		ShutdownVertexProgramMode();

		//Disable vertex program mode
		DCV.UseVertexProgram = 0;
		UseVertexProgram = 0;
		PL_UseVertexProgram = 0;

		if (DebugBit(DEBUG_BIT_BASIC)) dout << TEXT("utd3d9r: Vertex program initialization failed") << std::endl;
	}

	return;
}

//Shuts down vertex program mode if it is active
//Freeing the vertex program names takes care of releasing resources
//Safe to call even if vertex program mode is not supported or was never initialized
void UD3D9RenderDevice::ShutdownVertexProgramMode(void) {
	//Make sure that a vertex program is not current
	SetVertexShaderNoCheck(NULL);

	#define UTGLR_VS_RELEASE(_var) \
		if (_var) { \
			_var->Release(); \
			_var = NULL; \
		}

	//Free vertex programs if they were created

	//Default rendering state
	UTGLR_VS_RELEASE(m_vpDefaultRenderingState);

	//Default rendering state with fog
	UTGLR_VS_RELEASE(m_vpDefaultRenderingStateWithFog);

	//Default rendering state with linear fog
#ifdef UTGLR_RUNE_BUILD
	UTGLR_VS_RELEASE(m_vpDefaultRenderingStateWithLinearFog);
#endif


	//Complex surface single texture
	UTGLR_VS_RELEASE(m_vpComplexSurface[0]);

	//Complex surface double texture
	UTGLR_VS_RELEASE(m_vpComplexSurface[1]);

	//Complex surface triple texture
	UTGLR_VS_RELEASE(m_vpComplexSurface[2]);

	//Complex surface quad texture
	UTGLR_VS_RELEASE(m_vpComplexSurface[3]);


	//Complex surface detail alpha
	UTGLR_VS_RELEASE(m_vpComplexSurfaceDetailAlpha);

	//Complex surface single texture and detail texture
	UTGLR_VS_RELEASE(m_vpComplexSurfaceSingleTextureAndDetailTexture);

	//Complex surface dual texture and detail texture
	UTGLR_VS_RELEASE(m_vpComplexSurfaceDualTextureAndDetailTexture);


	//Complex surface single texture with position
	UTGLR_VS_RELEASE(m_vpComplexSurfaceSingleTextureWithPos);

	//Complex surface dual texture with position
	UTGLR_VS_RELEASE(m_vpComplexSurfaceDualTextureWithPos);

	//Complex surface triple texture with position
	UTGLR_VS_RELEASE(m_vpComplexSurfaceTripleTextureWithPos);


	#undef UTGLR_VS_RELEASE

	return;
}


bool UD3D9RenderDevice::InitializeFragmentPrograms(void) {
	bool initOk = true;


	//Create fragment programs if not already created
	#define UTGLR_PS_CONDITIONAL_LOAD(_var, _fp, _name) \
		if (!_var) { \
			initOk &= LoadFragmentProgram(&_var, _fp, _name); \
		}


	//Default rendering state
	UTGLR_PS_CONDITIONAL_LOAD(m_fpDefaultRenderingState, g_fpDefaultRenderingState,
		TEXT("Default rendering state"));

	//Default rendering state with fog
	UTGLR_PS_CONDITIONAL_LOAD(m_fpDefaultRenderingStateWithFog, g_fpDefaultRenderingStateWithFog,
		TEXT("Default rendering state with fog"));

#ifdef UTGLR_RUNE_BUILD
	//Default rendering state with linear fog
	UTGLR_PS_CONDITIONAL_LOAD(m_fpDefaultRenderingStateWithLinearFog, g_fpDefaultRenderingStateWithLinearFog,
		TEXT("Default rendering state with linear fog"));
#endif


	//Complex surface single texture
	UTGLR_PS_CONDITIONAL_LOAD(m_fpComplexSurfaceSingleTexture, g_fpComplexSurfaceSingleTexture,
		TEXT("Complex surface single texture"));

	//Complex surface dual texture modulated
	UTGLR_PS_CONDITIONAL_LOAD(m_fpComplexSurfaceDualTextureModulated, g_fpComplexSurfaceDualTextureModulated,
		TEXT("Complex surface dual texture modulated"));

	//Complex surface dual texture modulated 2X
	UTGLR_PS_CONDITIONAL_LOAD(m_fpComplexSurfaceDualTextureModulated2X, g_fpComplexSurfaceDualTextureModulated2X,
		TEXT("Complex surface dual texture modulated 2X"));


	//Complex surface single texture with fog
	UTGLR_PS_CONDITIONAL_LOAD(m_fpComplexSurfaceSingleTextureWithFog, g_fpComplexSurfaceSingleTextureWithFog,
		TEXT("Complex surface single texture with fog"));

	//Complex surface dual texture modulated with fog
	UTGLR_PS_CONDITIONAL_LOAD(m_fpComplexSurfaceDualTextureModulatedWithFog, g_fpComplexSurfaceDualTextureModulatedWithFog,
		TEXT("Complex surface dual texture modulated with fog"));

	//Complex surface dual texture modulated 2X with fog
	UTGLR_PS_CONDITIONAL_LOAD(m_fpComplexSurfaceDualTextureModulated2XWithFog, g_fpComplexSurfaceDualTextureModulated2XWithFog,
		TEXT("Complex surface dual texture modulated 2X with fog"));


	if (DetailTextures) {
		//Detail texture
		UTGLR_PS_CONDITIONAL_LOAD(m_fpDetailTexture, g_fpDetailTexture,
			TEXT("Detail texture"));

		//Detail texture two layer
		UTGLR_PS_CONDITIONAL_LOAD(m_fpDetailTextureTwoLayer, g_fpDetailTextureTwoLayer,
			TEXT("Detail texture two layer"));

		//Single texture and detail texture
		UTGLR_PS_CONDITIONAL_LOAD(m_fpSingleTextureAndDetailTexture, g_fpSingleTextureAndDetailTexture,
			TEXT("Complex surface single texture and detail texture"));

		//Single texture and detail texture two layer
		UTGLR_PS_CONDITIONAL_LOAD(m_fpSingleTextureAndDetailTextureTwoLayer, g_fpSingleTextureAndDetailTextureTwoLayer,
			TEXT("Complex surface single texture and detail texture two layer"));

		//Dual texture and detail texture
		UTGLR_PS_CONDITIONAL_LOAD(m_fpDualTextureAndDetailTexture, g_fpDualTextureAndDetailTexture,
			TEXT("Complex surface dual texture and detail texture"));

		//Dual texture and detail texture two layer
		UTGLR_PS_CONDITIONAL_LOAD(m_fpDualTextureAndDetailTextureTwoLayer, g_fpDualTextureAndDetailTextureTwoLayer,
			TEXT("Complex surface dual texture and detail texture two layer"));
	}


	#undef UTGLR_PS_CONDITIONAL_LOAD

	return initOk;
}

bool UD3D9RenderDevice::LoadFragmentProgram(IDirect3DPixelShader9 **ppShader, const DWORD *pFunction, const TCHAR *pName) {
	HRESULT hResult;

	if (DebugBit(DEBUG_BIT_BASIC)) {
		dout << TEXT("utd3d9r: Loading fragment program \"") << pName << TEXT("\"") << std::endl;
	}

	hResult = m_d3dDevice->CreatePixelShader(pFunction, ppShader);
	if (FAILED(hResult)) {
		if (DebugBit(DEBUG_BIT_BASIC)) {
			dout << TEXT("utd3d9r: Fragment program load error") << std::endl;
		}

		return false;
	}

	return true;
}

//Attempts to initializes fragment program mode
//Safe to call multiple times as already created fragment programs will not be recreated
void UD3D9RenderDevice::TryInitializeFragmentProgramMode(void) {
	//Initialize fragment programs
	if (!InitializeFragmentPrograms()) {
		//Shutdown fragment program mode
		ShutdownFragmentProgramMode();

		//Disable fragment program mode
		DCV.UseFragmentProgram = 0;
		UseFragmentProgram = 0;
		PL_UseFragmentProgram = 0;

		if (DebugBit(DEBUG_BIT_BASIC)) dout << TEXT("utd3d9r: Fragment program initialization failed") << std::endl;
	}

	return;
}

//Shuts down fragment program mode if it is active
//Freeing the fragment program names takes care of releasing resources
//Safe to call even if fragment program mode is not supported or was never initialized
void UD3D9RenderDevice::ShutdownFragmentProgramMode(void) {
	//Make sure that a fragment program is not current
	SetPixelShaderNoCheck(NULL);

	#define UTGLR_PS_RELEASE(_var) \
		if (_var) { \
			_var->Release(); \
			_var = NULL; \
		}

	//Free fragment programs if they were created

	//Default rendering state
	UTGLR_PS_RELEASE(m_fpDefaultRenderingState);

	//Default rendering state with fog
	UTGLR_PS_RELEASE(m_fpDefaultRenderingStateWithFog);

#ifdef UTGLR_RUNE_BUILD
	//Default rendering state with linear fog
	UTGLR_PS_RELEASE(m_fpDefaultRenderingStateWithLinearFog);
#endif


	//Complex surface single texture
	UTGLR_PS_RELEASE(m_fpComplexSurfaceSingleTexture);

	//Complex surface dual texture modulated
	UTGLR_PS_RELEASE(m_fpComplexSurfaceDualTextureModulated);

	//Complex surface dual texture modulated 2X
	UTGLR_PS_RELEASE(m_fpComplexSurfaceDualTextureModulated2X);

	//Complex surface single texture with fog
	UTGLR_PS_RELEASE(m_fpComplexSurfaceSingleTextureWithFog);

	//Complex surface dual texture modulated with fog
	UTGLR_PS_RELEASE(m_fpComplexSurfaceDualTextureModulatedWithFog);

	//Complex surface dual texture modulated 2X with fog
	UTGLR_PS_RELEASE(m_fpComplexSurfaceDualTextureModulated2XWithFog);


	//Detail texture
	UTGLR_PS_RELEASE(m_fpDetailTexture);

	//Detail texture two layer
	UTGLR_PS_RELEASE(m_fpDetailTextureTwoLayer);

	//Single texture and detail texture
	UTGLR_PS_RELEASE(m_fpSingleTextureAndDetailTexture);

	//Single texture and detail texture two layer
	UTGLR_PS_RELEASE(m_fpSingleTextureAndDetailTextureTwoLayer);

	//Dual texture and detail texture
	UTGLR_PS_RELEASE(m_fpDualTextureAndDetailTexture);

	//Dual texture and detail texture two layer
	UTGLR_PS_RELEASE(m_fpDualTextureAndDetailTextureTwoLayer);


	#undef UTGLR_PS_RELEASE

	return;
}


void UD3D9RenderDevice::SetProjectionStateNoCheck(bool requestNearZRangeHackProjection) {
	float left, right, bottom, top, zNear, zFar;
	float invRightMinusLeft, invTopMinusBottom, invNearMinusFar;
	D3DMATRIX d3dProj;

	//Save new Z range hack projection state
	m_nearZRangeHackProjectionActive = requestNearZRangeHackProjection;

	//Set default zNearVal
	FLOAT zNearVal = 0.5f;

	FLOAT zScaleVal = 1.0f;
	if (requestNearZRangeHackProjection) {
#ifdef UTGLR_DEBUG_Z_RANGE_HACK_WIREFRAME
		m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
#endif

		zScaleVal = 0.125f;
		zNearVal = 0.5;
	}
	else {
#ifdef UTGLR_DEBUG_Z_RANGE_HACK_WIREFRAME
		m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID);
#endif

		if (m_useZRangeHack) {
			zNearVal = 4.0f;
		}
	}

	left = -m_RProjZ * zNearVal;
	right = +m_RProjZ * zNearVal;
	bottom = -m_Aspect*m_RProjZ * zNearVal;
	top = +m_Aspect*m_RProjZ * zNearVal;
	zNear = 1.0f * zNearVal;
	zFar = 32768.0f;
	if (requestNearZRangeHackProjection) {
		zFar = 4096.0f;
	}

	invRightMinusLeft = 1.0f / (right - left);
	invTopMinusBottom = 1.0f / (top - bottom);
	invNearMinusFar = 1.0f / (zNear - zFar);

	d3dProj.m[0][0] = 2.0f * zNear * invRightMinusLeft;
	d3dProj.m[0][1] = 0.0f;
	d3dProj.m[0][2] = 0.0f;
	d3dProj.m[0][3] = 0.0f;

	d3dProj.m[1][0] = 0.0f;
	d3dProj.m[1][1] = 2.0f * zNear * invTopMinusBottom;
	d3dProj.m[1][2] = 0.0f;
	d3dProj.m[1][3] = 0.0f;

	d3dProj.m[2][0] = 0.0f;
	d3dProj.m[2][1] = 0.0f;
	d3dProj.m[2][2] = zScaleVal * (zFar * invNearMinusFar);
	d3dProj.m[2][3] = -1.0f;

	d3dProj.m[3][0] = 0.0f;
	d3dProj.m[3][1] = 0.0f;
	d3dProj.m[3][2] = zScaleVal * zScaleVal * (zNear * zFar * invNearMinusFar);
	d3dProj.m[3][3] = 0.0f;

	m_d3dDevice->SetTransform(D3DTS_PROJECTION, &d3dProj);

	if (UseVertexProgram) {
		FLOAT vsTransMatrix[16];

		//Transpose and scale by -1y and -1z
		vsTransMatrix[0]  = d3dProj.m[0][0];
		vsTransMatrix[1]  = -d3dProj.m[1][0];
		vsTransMatrix[2]  = -d3dProj.m[2][0];
		vsTransMatrix[3]  = d3dProj.m[3][0];
		vsTransMatrix[4]  = d3dProj.m[0][1];
		vsTransMatrix[5]  = -d3dProj.m[1][1];
		vsTransMatrix[6]  = -d3dProj.m[2][1];
		vsTransMatrix[7]  = d3dProj.m[3][1];
		vsTransMatrix[8]  = d3dProj.m[0][2];
		vsTransMatrix[9]  = -d3dProj.m[1][2];
		vsTransMatrix[10] = -d3dProj.m[2][2];
		vsTransMatrix[11] = d3dProj.m[3][2];
		vsTransMatrix[12] = d3dProj.m[0][3];
		vsTransMatrix[13] = -d3dProj.m[1][3];
		vsTransMatrix[14] = -d3dProj.m[2][3];
		vsTransMatrix[15] = d3dProj.m[3][3];

		m_d3dDevice->SetVertexShaderConstantF(0, vsTransMatrix, 4);
	}

	return;
}

void UD3D9RenderDevice::SetOrthoProjection(void) {
	float left, right, bottom, top, zNear, zFar;
	float invRightMinusLeft, invTopMinusBottom, invNearMinusFar;
	D3DMATRIX d3dProj;

	//Save new Z range hack projection state
	m_nearZRangeHackProjectionActive = false;

	left = -m_RProjZ * 0.5f;
	right = +m_RProjZ * 0.5f;
	bottom = -m_Aspect*m_RProjZ * 0.5f;
	top = +m_Aspect*m_RProjZ * 0.5f;
	zNear = 1.0f * 0.5f;
	zFar = 32768.0f;

	invRightMinusLeft = 1.0f / (right - left);
	invTopMinusBottom = 1.0f / (top - bottom);
	invNearMinusFar = 1.0f / (zNear - zFar);

	d3dProj.m[0][0] = 2.0f * invRightMinusLeft;
	d3dProj.m[0][1] = 0.0f;
	d3dProj.m[0][2] = 0.0f;
	d3dProj.m[0][3] = 0.0f;

	d3dProj.m[1][0] = 0.0f;
	d3dProj.m[1][1] = 2.0f * invTopMinusBottom;
	d3dProj.m[1][2] = 0.0f;
	d3dProj.m[1][3] = 0.0f;

	d3dProj.m[2][0] = 0.0f;
	d3dProj.m[2][1] = 0.0f;
	d3dProj.m[2][2] = 1.0f * invNearMinusFar;
	d3dProj.m[2][3] = 0.0f;

	d3dProj.m[3][0] = 0.0f;
	d3dProj.m[3][1] = 0.0f;
	d3dProj.m[3][2] = zNear * invNearMinusFar;
	d3dProj.m[3][3] = 1.0f;

	m_d3dDevice->SetTransform(D3DTS_PROJECTION, &d3dProj);

	if (UseVertexProgram) {
		FLOAT vsTransMatrix[16];

		//Transpose and scale by -1y and -1z
		vsTransMatrix[0]  = d3dProj.m[0][0];
		vsTransMatrix[1]  = -d3dProj.m[1][0];
		vsTransMatrix[2]  = -d3dProj.m[2][0];
		vsTransMatrix[3]  = d3dProj.m[3][0];
		vsTransMatrix[4]  = d3dProj.m[0][1];
		vsTransMatrix[5]  = -d3dProj.m[1][1];
		vsTransMatrix[6]  = -d3dProj.m[2][1];
		vsTransMatrix[7]  = d3dProj.m[3][1];
		vsTransMatrix[8]  = d3dProj.m[0][2];
		vsTransMatrix[9]  = -d3dProj.m[1][2];
		vsTransMatrix[10] = -d3dProj.m[2][2];
		vsTransMatrix[11] = d3dProj.m[3][2];
		vsTransMatrix[12] = d3dProj.m[0][3];
		vsTransMatrix[13] = -d3dProj.m[1][3];
		vsTransMatrix[14] = -d3dProj.m[2][3];
		vsTransMatrix[15] = d3dProj.m[3][3];

		m_d3dDevice->SetVertexShaderConstantF(0, vsTransMatrix, 4);
	}

	return;
}


void UD3D9RenderDevice::RenderPassesExec(void) {
	guard(UD3D9RenderDevice::RenderPassesExec);

	//Some render passes paths may use fragment program

	if (m_rpMasked && m_rpForceSingle && !m_rpSetDepthEqual) {
		m_d3dDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_EQUAL);
		m_rpSetDepthEqual = true;
	}

	//Call the render passes no check setup proc
	(this->*m_pRenderPassesNoCheckSetupProc)();

	m_rpTMUnits = 1;
	m_rpForceSingle = true;


	for (INT PolyNum = 0; PolyNum < m_csPolyCount; PolyNum++) {
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos + MultiDrawFirstArray[PolyNum], MultiDrawCountArray[PolyNum] - 2);
	}

#ifdef UTGLR_DEBUG_WORLD_WIREFRAME
	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);

	SetBlend(PF_Modulated);

	for (PolyNum = 0; PolyNum < m_csPolyCount; PolyNum++) {
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos + MultiDrawFirstArray[PolyNum], MultiDrawCountArray[PolyNum] - 2);
	}

	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID);
#endif

	//Advance vertex buffer position
	m_curVertexBufferPos += m_csPtCount;

#if 0
{
	dout << L"utd3d9r: PassCount = " << m_rpPassCount << std::endl;
}
#endif
	m_rpPassCount = 0;


	unguard;
}

void UD3D9RenderDevice::RenderPassesExec_SingleOrDualTextureAndDetailTexture(FTextureInfo &DetailTextureInfo) {
	guard(UD3D9RenderDevice::RenderPassesExec_SingleOrDualTextureAndDetailTexture);

	//Some render passes paths may use fragment program

	//The dual texture and detail texture path can never be executed if single pass rendering were forced earlier
	//The depth function will never need to be changed due to single pass rendering here

	//Call the render passes no check setup dual texture and detail texture proc
	(this->*m_pRenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTextureProc)(DetailTextureInfo);

	//Single texture rendering does not need to be forced here since the detail texture is always the last pass


	for (INT PolyNum = 0; PolyNum < m_csPolyCount; PolyNum++) {
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos + MultiDrawFirstArray[PolyNum], MultiDrawCountArray[PolyNum] - 2);
	}

#ifdef UTGLR_DEBUG_WORLD_WIREFRAME
	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);

	SetBlend(PF_Modulated);

	for (PolyNum = 0; PolyNum < m_csPolyCount; PolyNum++) {
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos + MultiDrawFirstArray[PolyNum], MultiDrawCountArray[PolyNum] - 2);
	}

	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID);
#endif

	//Advance vertex buffer position
	m_curVertexBufferPos += m_csPtCount;

#if 0
{
	dout << L"utd3d9r: PassCount = " << m_rpPassCount << std::endl;
}
#endif
	m_rpPassCount = 0;


	unguard;
}

//Must be called with (m_rpPassCount > 0)
void UD3D9RenderDevice::RenderPassesNoCheckSetup(void) {
	INT i;
	INT t;

	SetBlend(MultiPass.TMU[0].PolyFlags);

	i = 0;
	do {
		if (i != 0) {
			SetTexEnv(i, MultiPass.TMU[i].PolyFlags);
		}

		SetTexture(i, *MultiPass.TMU[i].Info, MultiPass.TMU[i].PolyFlags, MultiPass.TMU[i].PanBias);
	} while (++i < m_rpPassCount);

	//Set stream state based on number of texture units in use
	SetStreamState(m_standardNTextureVertexDecl[m_rpPassCount - 1], NULL, NULL);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(m_rpPassCount);

	//Make sure at least m_csPtCount entries are left in the vertex buffers
	if ((m_curVertexBufferPos + m_csPtCount) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor and texCoord buffers
	LockVertexColorBuffer();
	t = 0;
	do {
		LockTexCoordBuffer(t);
	} while (++t < m_rpPassCount);

	//Write vertex and color
	const FGLVertex *pSrcVertexArray = m_csVertexArray;
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
	DWORD rpColor = m_rpColor;
	i = m_csPtCount;
	do {
		pVertexColorArray->x = pSrcVertexArray->x;
		pVertexColorArray->y = pSrcVertexArray->y;
		pVertexColorArray->z = pSrcVertexArray->z;
		pVertexColorArray->color = rpColor;
		pSrcVertexArray++;
		pVertexColorArray++;
	} while (--i != 0);

	//Write texCoord
	t = 0;
	do {
		FLOAT UPan = TexInfo[t].UPan;
		FLOAT VPan = TexInfo[t].VPan;
		FLOAT UMult = TexInfo[t].UMult;
		FLOAT VMult = TexInfo[t].VMult;
		const FGLMapDot *pMapDot = MapDotArray;
		FGLTexCoord *pTexCoord = m_pTexCoordArray[t];

		INT ptCounter = m_csPtCount;
		do {
			pTexCoord->u = (pMapDot->u - UPan) * UMult;
			pTexCoord->v = (pMapDot->v - VPan) * VMult;

			pMapDot++;
			pTexCoord++;
		} while (--ptCounter != 0);
	} while (++t < m_rpPassCount);

	//Unlock vertexColor and texCoord buffers
	UnlockVertexColorBuffer();
	t = 0;
	do {
		UnlockTexCoordBuffer(t);
	} while (++t < m_rpPassCount);

	return;
}

//Must be called with (m_rpPassCount > 0)
void UD3D9RenderDevice::RenderPassesNoCheckSetup_VP(void) {
	INT i;
	FLOAT vsParams[MAX_TMUNITS * 4];
	IDirect3DPixelShader9 *pixelShader = NULL;

	SetBlend(MultiPass.TMU[0].PolyFlags);

	//Look for a fragment program that can use if they're enabled
	if (UseFragmentProgram) {
		if (m_rpPassCount == 1) {
			pixelShader = m_fpComplexSurfaceSingleTexture;
		}
		else if (m_rpPassCount == 2) {
			if (MultiPass.TMU[1].PolyFlags == PF_Modulated) {
				if (OneXBlending) {
					pixelShader = m_fpComplexSurfaceDualTextureModulated;
				}
				else {
					pixelShader = m_fpComplexSurfaceDualTextureModulated2X;
				}
			}
			else if (MultiPass.TMU[1].PolyFlags == PF_Highlighted) {
				pixelShader = m_fpComplexSurfaceSingleTextureWithFog;
			}
		}
		else if (m_rpPassCount == 3) {
			if (MultiPass.TMU[2].PolyFlags == PF_Highlighted) {
				if (OneXBlending) {
					pixelShader = m_fpComplexSurfaceDualTextureModulatedWithFog;
				}
				else {
					pixelShader = m_fpComplexSurfaceDualTextureModulated2XWithFog;
				}
			}
		}
	}

	i = 0;
	do {
		if (i != 0) {
			//No TexEnv setup for fragment program
			if (!pixelShader) {
				SetTexEnv(i, MultiPass.TMU[i].PolyFlags);
			}
		}

		SetTexture(i, *MultiPass.TMU[i].Info, MultiPass.TMU[i].PolyFlags, MultiPass.TMU[i].PanBias);

		vsParams[(i * 4) + 0] = TexInfo[i].UPan;
		vsParams[(i * 4) + 1] = TexInfo[i].VPan;
		vsParams[(i * 4) + 2] = TexInfo[i].UMult;
		vsParams[(i * 4) + 3] = TexInfo[i].VMult;
	} while (++i < m_rpPassCount);
	m_d3dDevice->SetVertexShaderConstantF(6, vsParams, m_rpPassCount);

	//Set vertex program based on number of texture units in use
	//Set fragment program if found a suitable one
	SetStreamState(m_oneColorVertexDecl, m_vpComplexSurface[m_rpPassCount - 1], pixelShader);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(m_rpPassCount);

	//Make sure at least m_csPtCount entries are left in the vertex buffers
	if ((m_curVertexBufferPos + m_csPtCount) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor buffer
	LockVertexColorBuffer();

	//Write vertex and color
	const FGLVertex *pSrcVertexArray = m_csVertexArray;
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
	DWORD rpColor = m_rpColor;
	i = m_csPtCount;
	do {
		pVertexColorArray->x = pSrcVertexArray->x;
		pVertexColorArray->y = pSrcVertexArray->y;
		pVertexColorArray->z = pSrcVertexArray->z;
		pVertexColorArray->color = rpColor;
		pSrcVertexArray++;
		pVertexColorArray++;
	} while (--i != 0);

	//Unlock vertexColor buffer
	UnlockVertexColorBuffer();

	return;
}

//Must be called with (m_rpPassCount > 0)
void UD3D9RenderDevice::RenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTexture(FTextureInfo &DetailTextureInfo) {
	INT i;
	INT t;
	FLOAT NearZ  = 380.0f;
	FLOAT RNearZ = 1.0f / NearZ;

	//Two extra texture units used for detail texture
	m_rpPassCount += 2;

	SetBlend(MultiPass.TMU[0].PolyFlags);

	//Surface texture must be 2X blended
	//Also force PF_Modulated for the TexEnv stage
	MultiPass.TMU[0].PolyFlags |= (PF_Modulated | PF_FlatShaded);

	//Detail texture uses first two texture units
	//Other textures use last two texture units
	i = 2;
	do {
		if (i != 0) {
			SetTexEnv(i, MultiPass.TMU[i - 2].PolyFlags);
		}

		SetTexture(i, *MultiPass.TMU[i - 2].Info, MultiPass.TMU[i - 2].PolyFlags, MultiPass.TMU[i - 2].PanBias);
	} while (++i < m_rpPassCount);

	SetAlphaTexture(0);

	SetTexEnv(1, PF_Memorized);
	SetTextureNoPanBias(1, DetailTextureInfo, PF_Modulated);

	//Set stream state based on number of texture units in use
	SetStreamState(m_standardNTextureVertexDecl[m_rpPassCount - 1], NULL, NULL);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(m_rpPassCount);

	//Make sure at least m_csPtCount entries are left in the vertex buffers
	if ((m_curVertexBufferPos + m_csPtCount) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor and texCoord buffers
	LockVertexColorBuffer();
	t = 0;
	do {
		LockTexCoordBuffer(t);
	} while (++t < m_rpPassCount);

	//Write vertex and color
	const FGLVertex *pSrcVertexArray = m_csVertexArray;
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
	DWORD detailColor = m_detailTextureColor4ub | 0xFF000000;
	i = m_csPtCount;
	do {
		pVertexColorArray->x = pSrcVertexArray->x;
		pVertexColorArray->y = pSrcVertexArray->y;
		pVertexColorArray->z = pSrcVertexArray->z;
		pVertexColorArray->color = detailColor;
		pSrcVertexArray++;
		pVertexColorArray++;
	} while (--i != 0);

	//Alpha texture for detail texture uses texture unit 0
	{
		INT t = 0;
		const FGLVertex *pVertex = &m_csVertexArray[0];
		FGLTexCoord *pTexCoord = m_pTexCoordArray[t];

		INT ptCounter = m_csPtCount;
		do {
			pTexCoord->u = pVertex->z * RNearZ;
			pTexCoord->v = 0.5f;

			pVertex++;
			pTexCoord++;
		} while (--ptCounter != 0);
	}
	//Detail texture uses texture unit 1
	//Remaining 1 or 2 textures use texture units 2 and 3
	t = 1;
	do {
		FLOAT UPan = TexInfo[t].UPan;
		FLOAT VPan = TexInfo[t].VPan;
		FLOAT UMult = TexInfo[t].UMult;
		FLOAT VMult = TexInfo[t].VMult;
		const FGLMapDot *pMapDot = &MapDotArray[0];
		FGLTexCoord *pTexCoord = m_pTexCoordArray[t];

		INT ptCounter = m_csPtCount;
		do {
			pTexCoord->u = (pMapDot->u - UPan) * UMult;
			pTexCoord->v = (pMapDot->v - VPan) * VMult;

			pMapDot++;
			pTexCoord++;
		} while (--ptCounter != 0);
	} while (++t < m_rpPassCount);

	//Unlock vertexColor and texCoord buffers
	UnlockVertexColorBuffer();
	t = 0;
	do {
		UnlockTexCoordBuffer(t);
	} while (++t < m_rpPassCount);

	return;
}

//Must be called with (m_rpPassCount > 0)
void UD3D9RenderDevice::RenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTexture_VP(FTextureInfo &DetailTextureInfo) {
	INT i;
	IDirect3DVertexShader9 *vertexShader = NULL;
	FLOAT vsParams[MAX_TMUNITS * 4];

	//Two extra texture units used for detail texture
	m_rpPassCount += 2;

	SetBlend(MultiPass.TMU[0].PolyFlags);

	//Surface texture must be 2X blended
	//Also force PF_Modulated for the TexEnv stage
	MultiPass.TMU[0].PolyFlags |= (PF_Modulated | PF_FlatShaded);

	if (m_rpPassCount == 3) {
		vertexShader = m_vpComplexSurfaceSingleTextureAndDetailTexture;
	}
	else {
		vertexShader = m_vpComplexSurfaceDualTextureAndDetailTexture;
	}

	//Detail texture uses first two texture units
	//Other textures use last two texture units
	i = 2;
	do {
		if (i != 0) {
			SetTexEnv(i, MultiPass.TMU[i - 2].PolyFlags);
		}

		SetTexture(i, *MultiPass.TMU[i - 2].Info, MultiPass.TMU[i - 2].PolyFlags, MultiPass.TMU[i - 2].PanBias);

		vsParams[(i * 4) + 0] = TexInfo[i].UPan;
		vsParams[(i * 4) + 1] = TexInfo[i].VPan;
		vsParams[(i * 4) + 2] = TexInfo[i].UMult;
		vsParams[(i * 4) + 3] = TexInfo[i].VMult;
	} while (++i < m_rpPassCount);

	SetAlphaTexture(0);

	SetTextureNoPanBias(1, DetailTextureInfo, PF_Modulated);
	SetTexEnv(1, PF_Memorized);

	vsParams[0] = 0.002631578947f;
	vsParams[1] = 0.5f;
	vsParams[2] = 0.0f;
	vsParams[3] = 0.0f;
	vsParams[4] = TexInfo[1].UPan;
	vsParams[5] = TexInfo[1].VPan;
	vsParams[6] = TexInfo[1].UMult;
	vsParams[7] = TexInfo[1].VMult;
	m_d3dDevice->SetVertexShaderConstantF(6, vsParams, m_rpPassCount);

	//Set stream state
	SetStreamState(m_oneColorVertexDecl, vertexShader, NULL);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(m_rpPassCount);

	//Make sure at least m_csPtCount entries are left in the vertex buffers
	if ((m_curVertexBufferPos + m_csPtCount) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor buffer
	LockVertexColorBuffer();

	//Write vertex and color
	const FGLVertex *pSrcVertexArray = m_csVertexArray;
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
	DWORD detailColor = m_detailTextureColor4ub | 0xFF000000;
	i = m_csPtCount;
	do {
		pVertexColorArray->x = pSrcVertexArray->x;
		pVertexColorArray->y = pSrcVertexArray->y;
		pVertexColorArray->z = pSrcVertexArray->z;
		pVertexColorArray->color = detailColor;
		pSrcVertexArray++;
		pVertexColorArray++;
	} while (--i != 0);

	//Unlock vertexColor buffer
	UnlockVertexColorBuffer();

	return;
}

//Must be called with (m_rpPassCount > 0)
void UD3D9RenderDevice::RenderPassesNoCheckSetup_SingleOrDualTextureAndDetailTexture_FP(FTextureInfo &DetailTextureInfo) {
	INT i;
	DWORD detailTexUnit;
	IDirect3DVertexShader9 *vertexShader = NULL;
	IDirect3DPixelShader9 *pixelShader = NULL;
	FLOAT vsParams[3 * 4];

	//One extra texture unit used for detail texture
	m_rpPassCount += 1;

	//Detail texture is in the last texture unit
	detailTexUnit = (m_rpPassCount - 1);

	if (m_rpPassCount == 2) {
		vertexShader = m_vpComplexSurfaceDualTextureWithPos;
	}
	else {
		vertexShader = m_vpComplexSurfaceTripleTextureWithPos;
	}
	if (DetailMax >= 2) {
		if (m_rpPassCount == 2) {
			pixelShader = m_fpSingleTextureAndDetailTextureTwoLayer;
		}
		else {
			pixelShader = m_fpDualTextureAndDetailTextureTwoLayer;
		}
	}
	else {
		if (m_rpPassCount == 2) {
			pixelShader = m_fpSingleTextureAndDetailTexture;
		}
		else {
			pixelShader = m_fpDualTextureAndDetailTexture;
		}
	}

	SetBlend(MultiPass.TMU[0].PolyFlags);

	//First one or two textures in first two texture units
	i = 0;
	do {
		//No TexEnv setup for fragment program
		//Only works with modulated

		SetTexture(i, *MultiPass.TMU[i].Info, MultiPass.TMU[i].PolyFlags, MultiPass.TMU[i].PanBias);

		vsParams[(i * 4) + 0] = TexInfo[i].UPan;
		vsParams[(i * 4) + 1] = TexInfo[i].VPan;
		vsParams[(i * 4) + 2] = TexInfo[i].UMult;
		vsParams[(i * 4) + 3] = TexInfo[i].VMult;
	} while (++i < detailTexUnit);

	//Detail texture in second or third texture unit
	//No TexEnv to set in fragment program mode
	SetTextureNoPanBias(detailTexUnit, DetailTextureInfo, PF_Modulated);

	vsParams[(detailTexUnit * 4) + 0] = TexInfo[detailTexUnit].UPan;
	vsParams[(detailTexUnit * 4) + 1] = TexInfo[detailTexUnit].VPan;
	vsParams[(detailTexUnit * 4) + 2] = TexInfo[detailTexUnit].UMult;
	vsParams[(detailTexUnit * 4) + 3] = TexInfo[detailTexUnit].VMult;
	m_d3dDevice->SetVertexShaderConstantF(6, vsParams, m_rpPassCount);

	//Set stream state
	SetStreamState(m_oneColorVertexDecl, vertexShader, pixelShader);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(m_rpPassCount);

	//Make sure at least m_csPtCount entries are left in the vertex buffers
	if ((m_curVertexBufferPos + m_csPtCount) >= VERTEX_ARRAY_SIZE) {
		FlushVertexBuffers();
	}

	//Lock vertexColor buffer
	LockVertexColorBuffer();

	//Write vertex and color
	const FGLVertex *pSrcVertexArray = m_csVertexArray;
	FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
	DWORD detailColor = m_detailTextureColor4ub | ((OneXBlending) ? 0x00000000 : 0xFF000000);
	i = m_csPtCount;
	do {
		pVertexColorArray->x = pSrcVertexArray->x;
		pVertexColorArray->y = pSrcVertexArray->y;
		pVertexColorArray->z = pSrcVertexArray->z;
		pVertexColorArray->color = detailColor;
		pSrcVertexArray++;
		pVertexColorArray++;
	} while (--i != 0);

	//Unlock vertexColor buffer
	UnlockVertexColorBuffer();

	return;
}

//Modified this routine to always set up detail texture state
//It should only be called if at least one polygon will be detail textured
void UD3D9RenderDevice::DrawDetailTexture(FTextureInfo &DetailTextureInfo, bool clipDetailTexture) {
	//Setup detail texture state
	SetBlend(PF_Modulated);

	//Set detail alpha mode flag
	bool detailAlphaMode = ((clipDetailTexture == false) && UseDetailAlpha) ? true : false;

	if (detailAlphaMode) {
		SetAlphaTexture(0);
		//TexEnv 0 is PF_Modulated by default

		SetTextureNoPanBias(1, DetailTextureInfo, PF_Modulated);
		SetTexEnv(1, PF_Memorized);

		//Set stream state for two textures
		SetStreamState(m_standardNTextureVertexDecl[1], NULL, NULL);

		//Check for additional enabled texture units that should be disabled
		DisableSubsequentTextures(2);
	}
	else {
		SetTextureNoPanBias(0, DetailTextureInfo, PF_Modulated);
		SetTexEnv(0, PF_Memorized);

		if (clipDetailTexture == true) {
			FLOAT fDepthBias = -1.0f;
			m_d3dDevice->SetRenderState(D3DRS_SLOPESCALEDEPTHBIAS, *(DWORD *)&fDepthBias);
		}

		//Set stream state for one texture
		SetStreamState(m_standardNTextureVertexDecl[0], NULL, NULL);

		//Check for additional enabled texture units that should be disabled
		DisableSubsequentTextures(1);
	}


	//Get detail texture color
	DWORD detailColor = m_detailTextureColor4ub;

	INT detailPassNum = 0;
	FLOAT NearZ  = 380.0f;
	FLOAT RNearZ = 1.0f / NearZ;
	FLOAT DetailScale = 1.0f;
	do {
		//Set up new NearZ and rescan points if subsequent pass
		if (detailPassNum > 0) {
			//Adjust NearZ and detail texture scaling
			NearZ /= 4.223f;
			RNearZ *= 4.223f;
			DetailScale *= 4.223f;

			//Rescan points
			(this->*m_pBufferDetailTextureDataProc)(NearZ);
		}

		//Calculate scaled UMult and VMult for detail texture based on mode
		FLOAT DetailUMult;
		FLOAT DetailVMult;
		if (detailAlphaMode) {
			DetailUMult = TexInfo[1].UMult * DetailScale;
			DetailVMult = TexInfo[1].VMult * DetailScale;
		}
		else {
			DetailUMult = TexInfo[0].UMult * DetailScale;
			DetailVMult = TexInfo[0].VMult * DetailScale;
		}

		INT Index = 0;

		INT *pNumPts = &MultiDrawCountArray[0];
		DWORD *pDetailTextureIsNear = DetailTextureIsNearArray;
		for (DWORD PolyNum = 0; PolyNum < m_csPolyCount; PolyNum++, pNumPts++, pDetailTextureIsNear++) {
			DWORD NumPts = *pNumPts;
			DWORD isNearBits = *pDetailTextureIsNear;

			//Skip the polygon if it will not be detail textured
			if (isNearBits == 0) {
				Index += NumPts;
				continue;
			}
			INT StartIndex = Index;

			DWORD allPtsBits = ~(~0U << NumPts);
			//Detail alpha mode
			if (detailAlphaMode) {
				//Make sure at least NumPts entries are left in the vertex buffers
				if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
					FlushVertexBuffers();
				}

				//Lock vertexColor, texCoord0, and texCoord1 buffers
				LockVertexColorBuffer();
				LockTexCoordBuffer(0);
				LockTexCoordBuffer(1);

				FGLTexCoord *pTexCoord0 = m_pTexCoordArray[0];
				FGLTexCoord *pTexCoord1 = m_pTexCoordArray[1];
				FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
				const FGLVertex *pSrcVertexArray = &m_csVertexArray[StartIndex];
				for (INT i = 0; i < NumPts; i++) {
					FLOAT U = MapDotArray[Index].u;
					FLOAT V = MapDotArray[Index].v;

					FLOAT PointZ_Times_RNearZ = pSrcVertexArray[i].z * RNearZ;
					pTexCoord0[i].u = PointZ_Times_RNearZ;
					pTexCoord0[i].v = 0.5f;
					pTexCoord1[i].u = (U - TexInfo[1].UPan) * DetailUMult;
					pTexCoord1[i].v = (V - TexInfo[1].VPan) * DetailVMult;

					pVertexColorArray[i].x = pSrcVertexArray[i].x;
					pVertexColorArray[i].y = pSrcVertexArray[i].y;
					pVertexColorArray[i].z = pSrcVertexArray[i].z;
					pVertexColorArray[i].color = detailColor | 0xFF000000;

					Index++;
				}

				//Unlock vertexColor,texCoord0, and texCoord1 buffers
				UnlockVertexColorBuffer();
				UnlockTexCoordBuffer(0);
				UnlockTexCoordBuffer(1);

				//Draw the triangles
				m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

				//Advance vertex buffer position
				m_curVertexBufferPos += NumPts;
			}
			//Otherwise, no clipping required, or clipping required, but DetailClipping not enabled
			else if ((clipDetailTexture == false) || (isNearBits == allPtsBits)) {
				//Make sure at least NumPts entries are left in the vertex buffers
				if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
					FlushVertexBuffers();
				}

				//Lock vertexColor and texCoord0 buffers
				LockVertexColorBuffer();
				LockTexCoordBuffer(0);

				FGLTexCoord *pTexCoord = m_pTexCoordArray[0];
				FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
				const FGLVertex *pSrcVertexArray = &m_csVertexArray[StartIndex];
				for (INT i = 0; i < NumPts; i++) {
					FLOAT U = MapDotArray[Index].u;
					FLOAT V = MapDotArray[Index].v;

					pTexCoord[i].u = (U - TexInfo[0].UPan) * DetailUMult;
					pTexCoord[i].v = (V - TexInfo[0].VPan) * DetailVMult;

					pVertexColorArray[i].x = pSrcVertexArray[i].x;
					pVertexColorArray[i].y = pSrcVertexArray[i].y;
					pVertexColorArray[i].z = pSrcVertexArray[i].z;
					DWORD alpha = appRound((1.0f - (Clamp(pSrcVertexArray[i].z, 0.0f, NearZ) * RNearZ)) * 255.0f);
					pVertexColorArray[i].color = detailColor | (alpha << 24);

					Index++;
				}

				//Unlock vertexColor and texCoord0 buffers
				UnlockVertexColorBuffer();
				UnlockTexCoordBuffer(0);

				//Draw the triangles
				m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

				//Advance vertex buffer position
				m_curVertexBufferPos += NumPts;
			}
			//Otherwise, clipping required and DetailClipping enabled
			else {
				//Make sure at least (NumPts * 2) entries are left in the vertex buffers
				if ((m_curVertexBufferPos + (NumPts * 2)) >= VERTEX_ARRAY_SIZE) {
					FlushVertexBuffers();
				}

				//Lock vertexColor and texCoord0 buffers
				LockVertexColorBuffer();
				LockTexCoordBuffer(0);

				DWORD NextIndex = 0;
				DWORD isNear_i_bit = 1U << (NumPts - 1);
				DWORD isNear_j_bit = 1U;
				FGLTexCoord *pTexCoord = m_pTexCoordArray[0];
				for (INT i = 0, j = NumPts - 1; i < NumPts; j = i++, isNear_j_bit = isNear_i_bit, isNear_i_bit >>= 1) {
					const FGLVertex &Point = m_csVertexArray[Index];
					FLOAT U = MapDotArray[Index].u;
					FLOAT V = MapDotArray[Index].v;

					if (((isNear_i_bit & isNearBits) != 0) && ((isNear_j_bit & isNearBits) == 0)) {
						const FGLVertex &PrevPoint = m_csVertexArray[StartIndex + j];
						FLOAT PrevU = MapDotArray[StartIndex + j].u;
						FLOAT PrevV = MapDotArray[StartIndex + j].v;

						FLOAT dist = PrevPoint.z - Point.z;
						FLOAT m = 1.0f;
						if (dist > 0.001f) {
							m = (NearZ - Point.z) / dist;
						}
						FGLVertexColor *pVertexColor = &m_pVertexColorArray[NextIndex];
						pVertexColor->x = (m * (PrevPoint.x - Point.x)) + Point.x;
						pVertexColor->y = (m * (PrevPoint.y - Point.y)) + Point.y;
						pVertexColor->z = NearZ;
						DWORD alpha = 0;
						pVertexColor->color = detailColor | (alpha << 24);

						pTexCoord[NextIndex].u = ((m * (PrevU - U)) + U - TexInfo[0].UPan) * DetailUMult;
						pTexCoord[NextIndex].v = ((m * (PrevV - V)) + V - TexInfo[0].VPan) * DetailVMult;

						NextIndex++;
					}

					if ((isNear_i_bit & isNearBits) != 0) {
						pTexCoord[NextIndex].u = (U - TexInfo[0].UPan) * DetailUMult;
						pTexCoord[NextIndex].v = (V - TexInfo[0].VPan) * DetailVMult;

						FGLVertexColor *pVertexColor = &m_pVertexColorArray[NextIndex];
						pVertexColor->x = Point.x;
						pVertexColor->y = Point.y;
						pVertexColor->z = Point.z;
						DWORD alpha = appRound((1.0f - (Clamp(Point.z, 0.0f, NearZ) * RNearZ)) * 255.0f);
						pVertexColor->color = detailColor | (alpha << 24);

						NextIndex++;
					}

					if (((isNear_i_bit & isNearBits) == 0) && ((isNear_j_bit & isNearBits) != 0)) {
						const FGLVertex &PrevPoint = m_csVertexArray[StartIndex + j];
						FLOAT PrevU = MapDotArray[StartIndex + j].u;
						FLOAT PrevV = MapDotArray[StartIndex + j].v;

						FLOAT dist = Point.z - PrevPoint.z;
						FLOAT m = 1.0f;
						if (dist > 0.001f) {
							m = (NearZ - PrevPoint.z) / dist;
						}
						FGLVertexColor *pVertexColor = &m_pVertexColorArray[NextIndex];
						pVertexColor->x = (m * (Point.x - PrevPoint.x)) + PrevPoint.x;
						pVertexColor->y = (m * (Point.y - PrevPoint.y)) + PrevPoint.y;
						pVertexColor->z = NearZ;
						DWORD alpha = 0;
						pVertexColor->color = detailColor | (alpha << 24);

						pTexCoord[NextIndex].u = ((m * (U - PrevU)) + PrevU - TexInfo[0].UPan) * DetailUMult;
						pTexCoord[NextIndex].v = ((m * (V - PrevV)) + PrevV - TexInfo[0].VPan) * DetailVMult;

						NextIndex++;
					}

					Index++;
				}

				//Unlock vertexColor and texCoord0 buffers
				UnlockVertexColorBuffer();
				UnlockTexCoordBuffer(0);

				//Draw the triangles
				m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NextIndex - 2);

				//Advance vertex buffer position
				m_curVertexBufferPos += NextIndex;
			}
		}
	} while (++detailPassNum < DetailMax);


	//Clear detail texture state
	if (detailAlphaMode) {
		//TexEnv 0 was left in default state of PF_Modulated
	}
	else {
		SetTexEnv(0, PF_Modulated);

		if (clipDetailTexture == true) {
			FLOAT fDepthBias = 0.0f;
			m_d3dDevice->SetRenderState(D3DRS_SLOPESCALEDEPTHBIAS, *(DWORD *)&fDepthBias);
		}
	}

	return;
}

//Modified this routine to always set up detail texture state
//It should only be called if at least one polygon will be detail textured
void UD3D9RenderDevice::DrawDetailTexture_VP(FTextureInfo &DetailTextureInfo) {
	INT Index = 0;

	//Setup detail texture state
	SetBlend(PF_Modulated);

	SetAlphaTexture(0);
	//TexEnv 0 is PF_Modulated by default

	SetTextureNoPanBias(1, DetailTextureInfo, PF_Modulated);
	SetTexEnv(1, PF_Memorized);

	FLOAT vsParams[8] = { 0.002631578947f, 0.5f, 0.0f, 0.0f,
						  TexInfo[1].UPan, TexInfo[1].VPan, TexInfo[1].UMult, TexInfo[1].VMult };
	m_d3dDevice->SetVertexShaderConstantF(6, vsParams, 2);

	//Set detail alpha vertex program
	SetStreamState(m_oneColorVertexDecl, m_vpComplexSurfaceDetailAlpha, NULL);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(2);

	DWORD detailColor = m_detailTextureColor4ub | 0xFF000000;
	INT *pNumPts = &MultiDrawCountArray[0];
	DWORD *pDetailTextureIsNear = DetailTextureIsNearArray;
	DWORD csPolyCount = m_csPolyCount;
	for (DWORD PolyNum = 0; PolyNum < csPolyCount; PolyNum++, pNumPts++, pDetailTextureIsNear++) {
		INT NumPts = *pNumPts;
		DWORD isNearBits = *pDetailTextureIsNear;
		INT i;

		//Skip the polygon if it will not be detail textured
		if (isNearBits == 0) {
			Index += NumPts;
			continue;
		}

		//Make sure at least NumPts entries are left in the vertex buffers
		if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
			FlushVertexBuffers();
		}

		//Lock vertexColor buffer
		LockVertexColorBuffer();

		//Write vertex
		const FGLVertex *pSrcVertexArray = &m_csVertexArray[Index];
		FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
		for (i = 0; i < NumPts; i++) {
			pVertexColorArray[i].x = pSrcVertexArray[i].x;
			pVertexColorArray[i].y = pSrcVertexArray[i].y;
			pVertexColorArray[i].z = pSrcVertexArray[i].z;
			pVertexColorArray[i].color = detailColor;
		}

		//Unlock vertexColor buffers
		UnlockVertexColorBuffer();

		//Draw the triangles
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

		//Advance vertex buffer position
		m_curVertexBufferPos += NumPts;

		Index += NumPts;
	}


	//Clear detail texture state
	//TexEnv 0 was left in default state of PF_Modulated

	return;
}

//Modified this routine to always set up detail texture state
//It should only be called if at least one polygon will be detail textured
void UD3D9RenderDevice::DrawDetailTexture_FP(FTextureInfo &DetailTextureInfo) {
	INT Index = 0;

	//Setup detail texture state
	SetBlend(PF_Modulated);

	//No TexEnv to set in fragment program mode
	SetTextureNoPanBias(0, DetailTextureInfo, PF_Modulated);

	FLOAT vsParams[4] = { TexInfo[0].UPan, TexInfo[0].VPan, TexInfo[0].UMult, TexInfo[0].VMult };
	m_d3dDevice->SetVertexShaderConstantF(6, vsParams, 1);

	//Set vertex program and fragment program for detail texture
	IDirect3DPixelShader9 *pixelShader = m_fpDetailTexture;
	if (DetailMax >= 2) pixelShader = m_fpDetailTextureTwoLayer;
	SetStreamState(m_oneColorVertexDecl, m_vpComplexSurfaceSingleTextureWithPos, pixelShader);

	//Check for additional enabled texture units that should be disabled
	DisableSubsequentTextures(1);

	DWORD detailColor = m_detailTextureColor4ub | 0xFF000000;
	INT *pNumPts = &MultiDrawCountArray[0];
	DWORD *pDetailTextureIsNear = DetailTextureIsNearArray;
	DWORD csPolyCount = m_csPolyCount;
	for (DWORD PolyNum = 0; PolyNum < csPolyCount; PolyNum++, pNumPts++, pDetailTextureIsNear++) {
		INT NumPts = *pNumPts;
		DWORD isNearBits = *pDetailTextureIsNear;
		INT i;

		//Skip the polygon if it will not be detail textured
		if (isNearBits == 0) {
			Index += NumPts;
			continue;
		}

		//Make sure at least NumPts entries are left in the vertex buffers
		if ((m_curVertexBufferPos + NumPts) >= VERTEX_ARRAY_SIZE) {
			FlushVertexBuffers();
		}

		//Lock vertexColor buffer
		LockVertexColorBuffer();

		//Write vertex
		const FGLVertex *pSrcVertexArray = &m_csVertexArray[Index];
		FGLVertexColor *pVertexColorArray = m_pVertexColorArray;
		for (i = 0; i < NumPts; i++) {
			pVertexColorArray[i].x = pSrcVertexArray[i].x;
			pVertexColorArray[i].y = pSrcVertexArray[i].y;
			pVertexColorArray[i].z = pSrcVertexArray[i].z;
			pVertexColorArray[i].color = detailColor;
		}

		//Unlock vertexColor buffers
		UnlockVertexColorBuffer();

		//Draw the triangles
		m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLEFAN, m_curVertexBufferPos, NumPts - 2);

		//Advance vertex buffer position
		m_curVertexBufferPos += NumPts;

		Index += NumPts;
	}


	//Clear detail texture state
	//TexEnv 0 was left in default state of PF_Modulated

	return;
}

INT UD3D9RenderDevice::BufferStaticComplexSurfaceGeometry(const FSurfaceFacet& Facet) {
	INT Index = 0;

	// Buffer "static" geometry.
	m_csPolyCount = 0;
	FGLMapDot *pMapDot = &MapDotArray[0];
	FGLVertex *pVertex = &m_csVertexArray[0];
	for (FSavedPoly* Poly = Facet.Polys; Poly; Poly = Poly->Next) {
		//Skip if no points
		INT NumPts = Poly->NumPts;
		if (NumPts <= 0) {
			continue;
		}

		DWORD csPolyCount = m_csPolyCount;
		MultiDrawFirstArray[csPolyCount] = Index;
		MultiDrawCountArray[csPolyCount] = NumPts;
		m_csPolyCount = csPolyCount + 1;

		Index += NumPts;
		if (Index > VERTEX_ARRAY_SIZE) {
			return 0;
		}
		FTransform **pPts = &Poly->Pts[0];
		do {
			const FVector &Point = (*pPts++)->Point;

			pMapDot->u = (Facet.MapCoords.XAxis | Point) - m_csUDot;
			pMapDot->v = (Facet.MapCoords.YAxis | Point) - m_csVDot;
			pMapDot++;

			pVertex->x = Point.X;
			pVertex->y = Point.Y;
			pVertex->z = Point.Z;
			pVertex++;
		} while (--NumPts != 0);
	}

	return Index;
}

INT UD3D9RenderDevice::BufferStaticComplexSurfaceGeometry_VP(const FSurfaceFacet& Facet) {
	INT Index = 0;

	// Buffer "static" geometry.
	m_csPolyCount = 0;
	FGLVertex *pVertex = &m_csVertexArray[0];
	for (FSavedPoly* Poly = Facet.Polys; Poly; Poly = Poly->Next) {
		//Skip if no points
		INT NumPts = Poly->NumPts;
		if (NumPts <= 0) {
			continue;
		}

		DWORD csPolyCount = m_csPolyCount;
		MultiDrawFirstArray[csPolyCount] = Index;
		MultiDrawCountArray[csPolyCount] = NumPts;
		m_csPolyCount = csPolyCount + 1;

		Index += NumPts;
		if (Index > VERTEX_ARRAY_SIZE) {
			return 0;
		}
		FTransform **pPts = &Poly->Pts[0];
		do {
			const FVector &Point = (*pPts++)->Point;

			pVertex->x = Point.X;
			pVertex->y = Point.Y;
			pVertex->z = Point.Z;
			pVertex++;
		} while (--NumPts != 0);
	}

	return Index;
}

DWORD UD3D9RenderDevice::BufferDetailTextureData(FLOAT NearZ) {
	DWORD *pDetailTextureIsNear = DetailTextureIsNearArray;
	DWORD anyIsNearBits = 0;

	FGLVertex *pVertex = &m_csVertexArray[0];
	INT *pNumPts = &MultiDrawCountArray[0];
	DWORD csPolyCount = m_csPolyCount;
	do {
		INT NumPts = *pNumPts++;
		DWORD isNear = 0;

		do {
			isNear <<= 1;
			if (pVertex->z < NearZ) {
				isNear |= 1;
			}
			pVertex++;
		} while (--NumPts != 0);

		*pDetailTextureIsNear++ = isNear;
		anyIsNearBits |= isNear;
	} while (--csPolyCount != 0);

	return anyIsNearBits;
}

#ifdef UTGLR_INCLUDE_SSE_CODE
__declspec(naked) DWORD UD3D9RenderDevice::BufferDetailTextureData_SSE2(FLOAT NearZ) {
	__asm {
		movd xmm0, [esp+4]

		push esi
		push edi

		lea esi, [ecx]this.m_csVertexArray
		lea edx, [ecx]this.MultiDrawCountArray
		lea edi, [ecx]this.DetailTextureIsNearArray

		pxor xmm1, xmm1

		mov ecx, [ecx]this.m_csPolyCount

		poly_count_loop:
			mov eax, [edx]
			add edx, 4

			pxor xmm2, xmm2

			num_pts_loop:
				movss xmm3, [esi+8]
				add esi, TYPE FGLVertex

				pslld xmm2, 1

				cmpltss xmm3, xmm0
				psrld xmm3, 31

				por xmm2, xmm3

				dec eax
				jne num_pts_loop

			movd [edi], xmm2
			add edi, 4

			por xmm1, xmm2

			dec ecx
			jne poly_count_loop

		movd eax, xmm1

		pop edi
		pop esi

		ret 4
	}
}
#endif //UTGLR_INCLUDE_SSE_CODE

void UD3D9RenderDevice::EndBufferingNoCheck(void) {
	SetDefaultAAState();
	//No need to set default projection state here as EndBufferingNoCheck sets its own projection state
	//No need to set stream state here as it is set when beginning buffering
	SetDefaultTextureState();

	clock(GouraudCycles);

	//Set projection state
	SetProjectionState(m_requestNearZRangeHackProjection);


	//Unlock vertexColor and texCoord0 buffers
	//Unlock secondary color buffer if fog
	UnlockVertexColorBuffer();
	if (m_requestedColorFlags & CF_FOG_MODE) {
		UnlockSecondaryColorBuffer();
	}
	UnlockTexCoordBuffer(0);

#ifdef UTGLR_DEBUG_ACTOR_WIREFRAME
	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
#endif

	//Draw the triangles
	m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLELIST, m_curVertexBufferPos, BufferedVerts / 3);

	//Advance vertex buffer position
	m_curVertexBufferPos += BufferedVerts;

#ifdef UTGLR_DEBUG_ACTOR_WIREFRAME
	m_d3dDevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_SOLID);
#endif

	BufferedVerts = 0;

	unclock(GouraudCycles);
}

void UD3D9RenderDevice::EndTileBufferingNoCheck(void) {
	if (NoAATiles) {
		SetDisabledAAState();
	}
	else {
		SetDefaultAAState();
	}
	SetDefaultProjectionState();
	//No need to set default stream state here as it is set when beginning buffering
	SetDefaultTextureState();

	clock(TileCycles);

	//Unlock vertexColor and texCoord0 buffers
	UnlockVertexColorBuffer();
	UnlockTexCoordBuffer(0);

	//Draw the quads (stored as triangles)
	m_d3dDevice->DrawPrimitive(D3DPT_TRIANGLELIST, m_curVertexBufferPos, BufferedTileVerts / 3);

	//Advance vertex buffer position
	m_curVertexBufferPos += BufferedTileVerts;

	BufferedTileVerts = 0;

	unclock(TileCycles);
}


// Static variables.
INT UD3D9RenderDevice::NumDevices = 0;
INT UD3D9RenderDevice::LockCount = 0;

HMODULE UD3D9RenderDevice::hModuleD3d9 = NULL;
LPDIRECT3DCREATE9 UD3D9RenderDevice::pDirect3DCreate9 = NULL;

bool UD3D9RenderDevice::g_gammaFirstTime = false;
bool UD3D9RenderDevice::g_haveOriginalGammaRamp = false;
//UD3D9RenderDevice::FGammaRamp UD3D9RenderDevice::g_originalGammaRamp;

/*-----------------------------------------------------------------------------
	The End.
-----------------------------------------------------------------------------*/
