/*
Donut Bump Mapping Demo
This demo shows how to use a bump mapping technique using Glide(tm)
Copyright (C) 1999  3Dfx Interactive, Inc.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

#include "basics.h"
#include "xforms.h"
#include "clip.h"

//#define USE_ASM

#define MAX_INTERMIDIATE_VERTS  18
/*
#define STRUCT_DWORD_COPY(d, s, n) \
	{ \
		long *__dst__, *__src__; \
		__dst__ = (long *)(d); \
		__src__ = (long *)(s); \
		__asm push		edi \
		__asm push		esi \
		__asm mov			edi, dword ptr [__dst__] \
		__asm mov			esi, dword ptr [__src__] \
		__asm mov			ecx, n \
		__asm rep			movsd \
		__asm pop			esi \
		__asm pop			edi \
	}

#define VERTEX_COPY(d, s)  STRUCT_DWORD_COPY(d, s, 12)
*/

// global variables
float gMinClipX, gMinClipY, gMinClipZ, gMaxClipX, gMaxClipY, gMaxClipZ;
float gMinClipZInv, gMaxClipZInv;

// extra vertices used when intersections create new vertices
static GrVertex gClippedVerts[MAX_INTERMIDIATE_VERTS], *gCurrFreeClippedVert;

// IEEE integer representation of the above floats right shifted by 1
int giMinClipX, giMinClipY, giMinClipZ, giMaxClipX, giMaxClipY, giMaxClipZ;


// the near_plane is used when clipping vertices with a negative W
// these vertices get clipped to the near clip plane, so the resulting
// W becomes the near plane, so there's no need to compute the interpolation
// of W, and no need for recomputing 1/W, since it'll just be 1/near_plane
// which can be precomputed in this function
void SetClipVolume(float min_x, float max_x, float min_y, float max_y, float min_z, float max_z)
{
	gMinClipX = min_x;
	gMinClipY = min_y;
	gMinClipZ = min_z;
	gMaxClipX = max_x;
	gMaxClipY = max_y;
	gMaxClipZ = max_z;

	gMinClipZInv = 1.0f/gMinClipZ;
	gMaxClipZInv = 1.0f/gMaxClipZ;

	// to avoid overflow when doing the floating subtracts
	// in integer (using IEEE integer representation of floats)
	// I need to drop off one bit from the floats
	// NOTE: to maintain the sign, I must use SAR rather than SHR
	//       so I'm assuming >> is SAR (which seems to be the case)
	giMinClipX = (*(int *)&gMinClipX)>>1;
	giMinClipY = (*(int *)&gMinClipY)>>1;
	giMinClipZ = (*(int *)&gMinClipZ)>>1;
	giMaxClipX = (*(int *)&gMaxClipX)>>1;
	giMaxClipY = (*(int *)&gMaxClipY)>>1;
	giMaxClipZ = (*(int *)&gMaxClipZ)>>1;
}

// Cohen-Sutherland clipping algorithm outcodes
// bit 1: left of window
// bit 2: right of window
// bit 3: below window
// bit 4: above window
// bit 5: behind front plane
// bit 6: beyond back plane
// A picture of the lower 4 bits and the 2D viewport
//         |      |
//    1001 | 1000 | 1010
//      9  |   8  |   a
//   ------+------+------
//         |      |
//    0001 | 0000 | 0010
//      1  |   0  |   2
//   ------+------+------
//         |      |
//    0101 | 0100 | 0110
//      5  |   4  |   6
#ifdef USE_ASM
// this table takes a 3-bit sign mask and creates a 6-bit sign mask as follows:
// 000 - 000000 - 0x00
// 001 - 000011 - 0x03
// 010 - 001100 - 0x0c
// 011 - 001111 - 0x0f
// 100 - 110000 - 0x30
// 101 - 110011 - 0x33
// 110 - 111100 - 0x3c
// 111 - 111111 - 0x3f
int sign_mask_table[8] = {0x00, 0x03, 0x0c, 0x0f, 0x30, 0x33, 0x3c, 0x3f};
__declspec(naked) unsigned int ComputeOutcode(const GrVertex *v)
{
	__asm // 23 cycles
	{
		mov			edx, [esp + 4] // edx = v
		push		ebx

		push		edi
		xor			eax, eax // eax = outcodes

		xor			ecx, ecx // ecx = sign mask
		push		esi

		mov			ebx, (GrVertex)[edx].z // ebx = z
		mov			esi, [gMaxClipZ]

		mov			edi, [gMinClipZ]
		cmp			esi, ebx // maxz - z

		adc			eax, eax
		cmp			ebx, edi // z - minz

		adc			eax, eax
		add			ebx, ebx // carry if ebx = z < 0

		adc			ecx, ecx
		mov			esi, [gMaxClipY]

		mov			ebx, (GrVertex)[edx].y // ebx = y
		mov			edi, [gMinClipY]

		cmp			esi, ebx // maxy - y
		mov			esi, [gMaxClipX]

		adc			eax, eax
		cmp			ebx, edi // y - miny

		adc			eax, eax
		add			ebx, ebx // carry if ebx = y < 0

		adc			ecx, ecx
		mov			ebx, (GrVertex)[edx].x // ebx = x

		mov			edi, [gMinClipX]
		cmp			esi, ebx // maxx - x

		adc			eax, eax
		cmp			ebx, edi // x - minx

		adc			eax, eax
		add			ebx, ebx // carry if ebx = x < 0

		adc			ecx, ecx
		mov			edx, (GrVertex)[edx].z // edx = z

		pop			esi
		pop			edi

		sar			edx, 31 // sign mask of z
		mov			ecx, [sign_mask_table + 4*ecx] // sign mask of zzyyxx

		xor			eax, ecx // outcodes if z > 0
		and			edx, 0xf

		xor			eax, edx
		pop			ebx

		ret
	}
}
#else // USE_ASM
unsigned int ComputeOutcode(const GrVertex *v)
{
	unsigned int res;

	res = 0;

	if (v->x < gMinClipX)
		res |= 0x1;
	else if (gMaxClipX < v->x)
		res |= 0x2;

	if (v->y < gMinClipY)
		res |= 0x4;
	else if (gMaxClipY < v->y)
		res |= 0x8;

	if (v->z < gMinClipZ)
		res |= 0x10;
	else if (gMaxClipZ < v->z)
		res |= 0x20;

	// flip the x, y outcodes if z < 0
	if (v->z < 0)
	{
		res ^= 0xf;
	}

	return res;
}
#endif // USE_ASM

#ifdef USE_ASM
__declspec(naked) void VertexCopy(GrVertex *dst, const GrVertex *src)
{
	__asm
	{
		mov			edx, [esp + 4]
		mov			ecx, [esp + 8]

		mov			eax, [ecx + 4*GR_VERTEX_X_OFFSET]

		mov			[edx + 4*GR_VERTEX_X_OFFSET], eax // x
		mov			eax, [ecx + 4*GR_VERTEX_Y_OFFSET]

		mov			[edx + 4*GR_VERTEX_Y_OFFSET], eax // y
		mov			eax, [ecx + 4*GR_VERTEX_Z_OFFSET]

		mov			[edx + 4*GR_VERTEX_Z_OFFSET], eax // z
		mov			eax, [ecx + 4*GR_VERTEX_OOW_OFFSET]

		mov			[edx + 4*GR_VERTEX_OOW_OFFSET], eax // oow
		mov			eax, [ecx + 4*GR_VERTEX_R_OFFSET]

		mov			[edx + 4*GR_VERTEX_R_OFFSET], eax // r
		mov			eax, [ecx + 4*GR_VERTEX_G_OFFSET]

		mov			[edx + 4*GR_VERTEX_G_OFFSET], eax // g
		mov			eax, [ecx + 4*GR_VERTEX_B_OFFSET]

		mov			[edx + 4*GR_VERTEX_B_OFFSET], eax // b
		mov			eax, [ecx + 4*GR_VERTEX_A_OFFSET]

		mov			[edx + 4*GR_VERTEX_A_OFFSET], eax // a
		mov			eax, [ecx + 4*GR_VERTEX_S0_OFFSET]

		mov			[edx + 4*GR_VERTEX_S0_OFFSET], eax // s0
		mov			eax, [ecx + 4*GR_VERTEX_T0_OFFSET]

		mov			[edx + 4*GR_VERTEX_T0_OFFSET], eax // t0
		mov			eax, [ecx + 4*GR_VERTEX_S1_OFFSET]

		mov			[edx + 4*GR_VERTEX_S1_OFFSET], eax // s1
		mov			eax, [ecx + 4*GR_VERTEX_T1_OFFSET]

		mov			[edx + 4*GR_VERTEX_T1_OFFSET], eax // t1

		ret
	}
}
#else // USE_ASM
void VertexCopy(GrVertex *dst, const GrVertex *src)
{
	dst->x = src->x;
	dst->y = src->y;
	dst->z = src->z;
	dst->oow = src->oow;
	dst->r = src->r;
	dst->g = src->g;
	dst->b = src->b;
	dst->a = src->a;
	dst->tmuvtx[0].sow = src->tmuvtx[0].sow;
	dst->tmuvtx[0].tow = src->tmuvtx[0].tow;
	dst->tmuvtx[1].sow = src->tmuvtx[1].sow;
	dst->tmuvtx[1].tow = src->tmuvtx[1].tow;
}
#endif // USE_ASM

#ifdef USE_ASM
static __declspec(naked) void Interpolate(GrVertex *new_v, float t, const GrVertex *v0, const GrVertex *v1)
{
	__asm
	{
		mov			ecx, [esp + 12] // ecx = v0
		mov			edx, [esp + 16] // edx = v1

		mov			eax, [esp + 4] // eax = new_v

		fld			dword ptr [edx + 4*GR_VERTEX_X_OFFSET]
		// v1->x
		fsub		dword ptr [ecx + 4*GR_VERTEX_X_OFFSET]
		// v1->x - v0->x (2)
		fld			dword ptr [edx + 4*GR_VERTEX_Y_OFFSET]
		// v1->y
		// v1->x - v0->x (1)
		fsub		dword ptr [ecx + 4*GR_VERTEX_Y_OFFSET]
		// v1->y - v0->y (2)
		// v1->x - v0->x (0)
		fxch		st(1)
		// v1->x - v0->x (0)
		// v1->y - v0->y (2)
		fmul		dword ptr [esp + 8]
		// t*(v1->x - v0->x) (2)
		// v1->y - v0->y (1)
		fld			dword ptr [edx + 4*GR_VERTEX_OOW_OFFSET]
		// v1->oow
		// t*(v1->x - v0->x) (1)
		// v1->y - v0->y (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_OOW_OFFSET]
		// v1->oow - v0->oow (2)
		// t*(v1->x - v0->x) (0)
		// v1->y - v0->y (0)
		fxch		st(2)
		// v1->y - v0->y (0)
		// t*(v1->x - v0->x) (0)
		// v1->oow - v0->oow (2)
		fmul		dword ptr [esp + 8]
		// t*(v1->y - v0->y) (2)
		// t*(v1->x - v0->x) (0)
		// v1->oow - v0->oow (1)
		fxch		st(1)
		// t*(v1->x - v0->x) (0)
		// t*(v1->y - v0->y) (2)
		// v1->oow - v0->oow (1)
		fadd		dword ptr [ecx + 4*GR_VERTEX_X_OFFSET]
		// v0->x + t*(v1->x - v0->x) (2)
		// t*(v1->y - v0->y) (1)
		// v1->oow - v0->oow (0)
		fxch		st(2)
		// v1->oow - v0->oow (0)
		// t*(v1->y - v0->y) (1)
		// v0->x + t*(v1->x - v0->x) (2)
		fmul		dword ptr [esp + 8]
		// t*(v1->oow - v0->oow) (2)
		// t*(v1->y - v0->y) (0)
		// v0->x + t*(v1->x - v0->x) (1)
		fxch		st(1)
		// t*(v1->y - v0->y) (0)
		// t*(v1->oow - v0->oow) (2)
		// v0->x + t*(v1->x - v0->x) (1)
		fadd		dword ptr [ecx + 4*GR_VERTEX_Y_OFFSET]
		// v0->y + t*(v1->y - v0->y) (2)
		// t*(v1->oow - v0->oow) (1)
		// v0->x + t*(v1->x - v0->x) (0)
		fld			dword ptr [edx + 4*GR_VERTEX_R_OFFSET]
		// v1->r
		// v0->y + t*(v1->y - v0->y) (1)
		// t*(v1->oow - v0->oow) (0)
		// v0->x + t*(v1->x - v0->x) (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_R_OFFSET]
		// v1->r - v0->r (2)
		// v0->y + t*(v1->y - v0->y) (0)
		// t*(v1->oow - v0->oow) (0)
		// v0->x + t*(v1->x - v0->x) (0)
		fxch		st(2)
		// t*(v1->oow - v0->oow) (0)
		// v0->y + t*(v1->y - v0->y) (0)
		// v1->r - v0->r (2)
		// v0->x + t*(v1->x - v0->x) (0)
		fadd		dword ptr [ecx + 4*GR_VERTEX_OOW_OFFSET]
		// v0->oow + t*(v1->oow - v0->oow) (2)
		// v0->y + t*(v1->y - v0->y) (0)
		// v1->r - v0->r (1)
		// v0->x + t*(v1->x - v0->x) (0)
		fxch		st(3)
		// v0->x + t*(v1->x - v0->x) (0)
		// v0->y + t*(v1->y - v0->y) (0)
		// v1->r - v0->r (1)
		// v0->oow + t*(v1->oow - v0->oow) (2)
		fstp		dword ptr [eax + 4*GR_VERTEX_X_OFFSET]
		fstp		dword ptr [eax + 4*GR_VERTEX_Y_OFFSET]
		// v1->r - v0->r (0)
		// v0->oow + t*(v1->oow - v0->oow) (0)
		fmul		dword ptr [esp + 8]
		// t*(v1->r - v0->r) (2)
		// v0->oow + t*(v1->oow - v0->oow) (0)
		fxch		st(1)
		// v0->oow + t*(v1->oow - v0->oow) (0)
		// t*(v1->r - v0->r) (2)
		fstp		dword ptr [eax + 4*GR_VERTEX_OOW_OFFSET]
		// t*(v1->r - v0->r) (0)
		fadd		dword ptr [ecx + 4*GR_VERTEX_R_OFFSET]
		// v0->r + t*(v1->r - v0->r) (2)
		fld			dword ptr [edx + 4*GR_VERTEX_G_OFFSET]
		// v1->g
		// v0->r + t*(v1->r - v0->r) (1)
		fsub		dword ptr [ecx + 4*GR_VERTEX_G_OFFSET]
		// v1->g - v0->g (2)
		// v0->r + t*(v1->r - v0->r) (0)
		fld			dword ptr [edx + 4*GR_VERTEX_B_OFFSET]
		// v1->b
		// v1->g - v0->g (1)
		// v0->r + t*(v1->r - v0->r) (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_B_OFFSET]
		// v1->b - v0->b (2)
		// v1->g - v0->g (0)
		// v0->r + t*(v1->r - v0->r) (0)
		fxch		st(1)
		// v1->g - v0->g (0)
		// v1->b - v0->b (2)
		// v0->r + t*(v1->r - v0->r) (0)
		fmul		dword ptr [esp + 8]
		// t*(v1->g - v0->g) (2)
		// v1->b - v0->b (1)
		// v0->r + t*(v1->r - v0->r) (0)
		fxch		st(2)
		// v0->r + t*(v1->r - v0->r) (0)
		// v1->b - v0->b (1)
		// t*(v1->g - v0->g) (2)
		fstp		dword ptr [eax + 4*GR_VERTEX_R_OFFSET]
		// v1->b - v0->b (0)
		// t*(v1->g - v0->g) (0)
		fmul		dword ptr [esp + 8]
		// t*(v1->b - v0->b) (2)
		// t*(v1->g - v0->g) (0)
		fxch		st(1)
		// t*(v1->g - v0->g) (0)
		// t*(v1->b - v0->b) (2)
		fadd		dword ptr [ecx + 4*GR_VERTEX_G_OFFSET]
		// v0->g + t*(v1->g - v0->g) (2)
		// t*(v1->b - v0->b) (1)
		fld			dword ptr [edx + 4*GR_VERTEX_A_OFFSET]
		// v1->a
		// v0->g + t*(v1->g - v0->g) (1)
		// t*(v1->b - v0->b) (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_A_OFFSET]
		// v1->a - v0->a (2)
		// v0->g + t*(v1->g - v0->g) (0)
		// t*(v1->b - v0->b) (0)
		fxch		st(2)
		// t*(v1->b - v0->b) (0)
		// v0->g + t*(v1->g - v0->g) (0)
		// v1->a - v0->a (2)
		fadd		dword ptr [ecx + 4*GR_VERTEX_B_OFFSET]
		// v0->b + t*(v1->b - v0->b) (2)
		// v0->g + t*(v1->g - v0->g) (0)
		// v1->a - v0->a (1)
		fxch		st(1)
		// v0->g + t*(v1->g - v0->g) (0)
		// v0->b + t*(v1->b - v0->b) (2)
		// v1->a - v0->a (1)
		fstp		dword ptr [eax + 4*GR_VERTEX_G_OFFSET]
		// v0->b + t*(v1->b - v0->b) (0)
		// v1->a - v0->a (0)
		fld			dword ptr [edx + 4*GR_VERTEX_S0_OFFSET]
		// v1->s0
		// v0->b + t*(v1->b - v0->b) (0)
		// v1->a - v0->a (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_S0_OFFSET]
		// v1->s0 - v0->s0 (2)
		// v0->b + t*(v1->b - v0->b) (0)
		// v1->a - v0->a (0)
		fxch		st(1)
		// v0->b + t*(v1->b - v0->b) (0)
		// v1->s0 - v0->s0 (2)
		// v1->a - v0->a (0)
		fstp		dword ptr [eax + 4*GR_VERTEX_B_OFFSET]
		// v1->s0 - v0->s0 (0)
		// v1->a - v0->a (0)
		fmul		dword ptr [esp + 8]
		// t*(v1->s0 - v0->s0) (2)
		// v1->a - v0->a (0)
		fld			dword ptr [edx + 4*GR_VERTEX_T0_OFFSET]
		// v1->t0
		// t*(v1->s0 - v0->s0) (1)
		// v1->a - v0->a (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_T0_OFFSET]
		// v1->t0 - v0->t0 (2)
		// t*(v1->s0 - v0->s0) (0)
		// v1->a - v0->a (0)
		fxch		st(2)
		// v1->a - v0->a (0)
		// t*(v1->s0 - v0->s0) (0)
		// v1->t0 - v0->t0 (2)
		fmul		dword ptr [esp + 8]
		// t*(v1->a - v0->a) (2)
		// t*(v1->s0 - v0->s0) (0)
		// v1->t0 - v0->t0 (1)
		fxch		st(1)
		// t*(v1->s0 - v0->s0) (0)
		// t*(v1->a - v0->a) (2)
		// v1->t0 - v0->t0 (1)
		fadd		dword ptr [ecx + 4*GR_VERTEX_S0_OFFSET]
		// v0->s0 + t*(v1->s0 - v0->s0) (2)
		// t*(v1->a - v0->a) (1)
		// v1->t0 - v0->t0 (0)
		fxch		st(2)
		// v1->t0 - v0->t0 (0)
		// t*(v1->a - v0->a) (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (2)
		fmul		dword ptr [esp + 8]
		// t*(v1->t0 - v0->t0) (2)
		// t*(v1->a - v0->a) (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (1)
		fxch		st(1)
		// t*(v1->a - v0->a) (0)
		// t*(v1->t0 - v0->t0) (2)
		// v0->s0 + t*(v1->s0 - v0->s0) (1)
		fadd		dword ptr [ecx + 4*GR_VERTEX_A_OFFSET]
		// v0->a + t*(v1->a - v0->a) (2)
		// t*(v1->t0 - v0->t0) (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fld			dword ptr [edx + 4*GR_VERTEX_S1_OFFSET]
		// v1->s1
		// v0->a + t*(v1->a - v0->a) (1)
		// t*(v1->t0 - v0->t0) (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_S1_OFFSET]
		// v1->s1 - v0->s1 (2)
		// v0->a + t*(v1->a - v0->a) (0)
		// t*(v1->t0 - v0->t0) (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fxch		st(2)
		// t*(v1->t0 - v0->t0) (0)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->s1 - v0->s1 (2)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fadd		dword ptr [ecx + 4*GR_VERTEX_T0_OFFSET]
		// v0->t0 + t*(v1->t0 - v0->t0) (2)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->s1 - v0->s1 (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fld			dword ptr [edx + 4*GR_VERTEX_T1_OFFSET]
		// v1->t1
		// v0->t0 + t*(v1->t0 - v0->t0) (1)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->s1 - v0->s1 (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fsub		dword ptr [ecx + 4*GR_VERTEX_T1_OFFSET]
		// v1->t1 - v0->t1 (2)
		// v0->t0 + t*(v1->t0 - v0->t0) (0)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->s1 - v0->s1 (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fxch		st(3)
		// v1->s1 - v0->s1 (0)
		// v0->t0 + t*(v1->t0 - v0->t0) (0)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->t1 - v0->t1 (2)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fmul		dword ptr [esp + 8]
		// t*(v1->s1 - v0->s1) (2)
		// v0->t0 + t*(v1->t0 - v0->t0) (0)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->t1 - v0->t1 (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fxch		st(1)
		// v0->t0 + t*(v1->t0 - v0->t0) (0)
		// t*(v1->s1 - v0->s1) (2)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->t1 - v0->t1 (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fstp		dword ptr [eax + 4*GR_VERTEX_T0_OFFSET]
		// t*(v1->s1 - v0->s1) (0)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->t1 - v0->t1 (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fadd		dword ptr [ecx + 4*GR_VERTEX_S1_OFFSET]
		// v0->s1 + t*(v1->s1 - v0->s1) (2)
		// v0->a + t*(v1->a - v0->a) (0)
		// v1->t1 - v0->t1 (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fxch		st(2)
		// v1->t1 - v0->t1 (0)
		// v0->a + t*(v1->a - v0->a) (0)
		// v0->s1 + t*(v1->s1 - v0->s1) (2)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fmul		dword ptr [esp + 8]
		// t*(v1->t1 - v0->t1) (2)
		// v0->a + t*(v1->a - v0->a) (0)
		// v0->s1 + t*(v1->s1 - v0->s1) (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fxch		st(1)
		// v0->a + t*(v1->a - v0->a) (0)
		// t*(v1->t1 - v0->t1) (2)
		// v0->s1 + t*(v1->s1 - v0->s1) (1)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fstp		dword ptr [eax + 4*GR_VERTEX_A_OFFSET]
		// t*(v1->t1 - v0->t1) (0)
		// v0->s1 + t*(v1->s1 - v0->s1) (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fadd		dword ptr [ecx + 4*GR_VERTEX_T1_OFFSET]
		// v0->t1 + t*(v1->t1 - v0->t1) (2)
		// v0->s1 + t*(v1->s1 - v0->s1) (0)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		fxch		st(2)
		// v0->s0 + t*(v1->s0 - v0->s0) (0)
		// v0->s1 + t*(v1->s1 - v0->s1) (0)
		// v0->t1 + t*(v1->t1 - v0->t1) (2)
		fstp		dword ptr [eax + 4*GR_VERTEX_S0_OFFSET]
		fstp		dword ptr [eax + 4*GR_VERTEX_S1_OFFSET]
		fstp		dword ptr [eax + 4*GR_VERTEX_T1_OFFSET]

		ret
	}
}
#else // USE_ASM
static void Interpolate(GrVertex *new_v, float t, const GrVertex *v0, const GrVertex *v1)
{
	new_v->x = v0->x + t*(v1->x - v0->x);
	new_v->y = v0->y + t*(v1->y - v0->y);
	new_v->oow = v0->oow + t*(v1->oow - v0->oow);
	new_v->r = v0->r + t*(v1->r - v0->r);
	new_v->g = v0->g + t*(v1->g - v0->g);
	new_v->b = v0->b + t*(v1->b - v0->b);
	new_v->a = v0->a + t*(v1->a - v0->a);
	new_v->tmuvtx[0].sow = v0->tmuvtx[0].sow + t*(v1->tmuvtx[0].sow - v0->tmuvtx[0].sow);
	new_v->tmuvtx[0].tow = v0->tmuvtx[0].tow + t*(v1->tmuvtx[0].tow - v0->tmuvtx[0].tow);
	new_v->tmuvtx[1].sow = v0->tmuvtx[1].sow + t*(v1->tmuvtx[1].sow - v0->tmuvtx[1].sow);
	new_v->tmuvtx[1].tow = v0->tmuvtx[1].tow + t*(v1->tmuvtx[1].tow - v0->tmuvtx[1].tow);
}
#endif // USE_ASM

static int ClipEdgeAgainstPlane(int plane, GrVertex **new_v0, GrVertex **new_v1, const GrVertex *v0, const GrVertex *v1)
{
	int verts_added;
	float t, d0, d1, delta;
	GrVertex vert0, vert1, *new_v;
	const GrVertex *src_v0, *src_v1;
	float new_z, new_z_inv;

	switch (plane)
	{
		case 0: // left
			d0 = v0->x - gMinClipX;
			d1 = v1->x - gMinClipX;
			delta = v1->x - v0->x;
			break;

		case 1: // right
			d0 = gMaxClipX - v0->x;
			d1 = gMaxClipX - v1->x;
			delta = v0->x - v1->x;
			break;

		case 2: // bottom
			d0 = v0->y - gMinClipY;
			d1 = v1->y - gMinClipY;
			delta = v1->y - v0->y;
			break;

		case 3: // top
			d0 = gMaxClipY - v0->y;
			d1 = gMaxClipY - v1->y;
			delta = v0->y - v1->y;
			break;

		case 4: // front
			d0 = v0->z - gMinClipZ;
			d1 = v1->z - gMinClipZ;
			delta = v1->z - v0->z;
			new_z = gMinClipZ;
			new_z_inv = gMinClipZInv;
			break;

		case 5: // back
			d0 = gMaxClipZ - v0->z;
			d1 = gMaxClipZ - v1->z;
			delta = v0->z - v1->z;
			new_z = gMaxClipZ;
			new_z_inv = gMaxClipZInv;
			break;
	}

	if (*(int *)&d0 >= 0 && *(int *)&d1 >= 0) // trivial accept
	{
		*new_v0 = (GrVertex *)v0;
		verts_added = 1;
	}
	else if (*(int *)&d0 < 0 && *(int *)&d1 < 0) // trivial reject
	{
		verts_added = 0;
	}
	else // intersect
	{
		t = 1.0f/delta;

		if (plane > 3) // z planes
		{
			///VERTEX_COPY(&vert0, v0);
			///VERTEX_COPY(&vert1, v1);
			VertexCopy(&vert0, v0);
			VertexCopy(&vert1, v1);

			src_v0 = &vert0;
			src_v1 = &vert1;

			// undo the 1/w divide (back to homogeneous space)
			vert0.x *= vert0.z;
			vert0.y *= vert0.z;
			vert0.tmuvtx[0].sow *= vert0.z;
			vert0.tmuvtx[0].tow *= vert0.z;
			vert0.tmuvtx[1].sow *= vert0.z;
			vert0.tmuvtx[1].tow *= vert0.z;

			vert1.x *= vert1.z;
			vert1.y *= vert1.z;
			vert1.tmuvtx[0].sow *= vert1.z;
			vert1.tmuvtx[0].tow *= vert1.z;
			vert1.tmuvtx[1].sow *= vert1.z;
			vert1.tmuvtx[1].tow *= vert1.z;
		}
		else
		{
			src_v0 = v0;
			src_v1 = v1;
		}

		if (*(int *)&d0 < 0)
		{
			t *= d0;
			*new_v0 = gCurrFreeClippedVert++;
			Interpolate(*new_v0, -t, src_v0, src_v1);
			verts_added = 1;
			new_v = *new_v0;
		}
		else
		{
			t *= d1;
			*new_v0 = (GrVertex *)v0;
			*new_v1 = gCurrFreeClippedVert++;
			Interpolate(*new_v1, t, src_v1, src_v0);
			verts_added = 2;
			new_v = *new_v1;
		}

		if (plane > 3) // z planes
		{
			// redo the 1/w divide with the new w (back to screen space)
			new_v->z = new_z;
			new_v->oow = new_z_inv;
			new_v->x *= new_v->oow;
			new_v->y *= new_v->oow;
			new_v->tmuvtx[0].sow *= new_v->oow;
			new_v->tmuvtx[0].tow *= new_v->oow;
			new_v->tmuvtx[1].sow *= new_v->oow;
			new_v->tmuvtx[1].tow *= new_v->oow;
		}
	}

	return verts_added;
}

int ClipAndDrawTriangle(const GrVertex *v0, const GrVertex *v1, const GrVertex *v2)
{
	GrVertex *clipped_vert_ptrs[2][MAX_INTERMIDIATE_VERTS];
	GrVertex **src, **dst;
	int i, toggle, plane, verts_added, num_verts, num_clipped_verts;

	gCurrFreeClippedVert = &gClippedVerts[0];

	clipped_vert_ptrs[0][0] = (GrVertex *)v0;
	clipped_vert_ptrs[0][1] = (GrVertex *)v1;
	clipped_vert_ptrs[0][2] = (GrVertex *)v2;
	num_clipped_verts = 3;

	// clip the poly against each of the 6 clip planes
	// clip against the z planes first to get rid of any negative Zs
	for (plane=5, toggle=0; plane>=0; plane--, toggle^=1)
	{
		// toggle back and forth between src and dst
		// filling in dst from src, and then next iteration
		// using the newly filled in dst as src...
		src = &clipped_vert_ptrs[toggle  ][0];
		dst = &clipped_vert_ptrs[toggle^1][0];

		num_verts = 0;
		for (i=num_clipped_verts-1; i>0; i--)
		{
			verts_added = ClipEdgeAgainstPlane(plane, dst, dst+1, *src, *(src+1));
			num_verts += verts_added;
			dst += verts_added;
			src++;
		}

		// we can terminate if no verts were added because the
		// last plane intersection could at most add 2 more verts which doesn't
		// make a polygon
		if (!num_verts)
		{
			return 0;
		}

		// do the last step separately to avoid having to wrap
		num_verts += ClipEdgeAgainstPlane(plane, dst, dst+1, *src, *(src-(num_clipped_verts-1)));

		num_clipped_verts = num_verts;
	}

	// draw clipped poly
#ifdef USE_GLIDE3
	grDrawVertexArray(GR_POLYGON, num_clipped_verts, &clipped_vert_ptrs[0][0]);
#else
	src = &clipped_vert_ptrs[0][0];
	for (i=1; i<num_clipped_verts-1; i++)
	{
		grDrawTriangle(src[0], src[i], src[i+1]);
	}
#endif // USE_GLIDE3

	// to get the number of triangles (num triangles = num verts - 2 for a polygon)
	return num_clipped_verts - 2;
}

int ClipAndDrawLine(const GrVertex *v0, const GrVertex *v1)
{
	GrVertex *clipped_verts[2];
	int plane;

	gCurrFreeClippedVert = &gClippedVerts[0];

	clipped_verts[0] = (GrVertex *)v0;
	clipped_verts[1] = (GrVertex *)v1;

	for (plane=5; plane>=0; plane--)
	{
		if (!ClipEdgeAgainstPlane(plane, &clipped_verts[0], &clipped_verts[1], clipped_verts[0], clipped_verts[1]))
		{
			return 0;
		}
	}

	grDrawLine(clipped_verts[0], clipped_verts[1]);

	return 1;
}
