/*
Donut Bump Mapping Demo
This demo shows how to use a bump mapping technique using Glide(tm)
Copyright (C) 1999  3Dfx Interactive, Inc.

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
*/

#include "basics.h"
#include "lighting.h"
#include "mathutil.h"
#include "tlib.h"
#include "texcache.h"
#include "crystal.h"
#include "clip.h"
#include "camera.h"
#include "xforms.h"
#include "util.h"
#include "bspline.h"
#include "linux_utils.h"

//#define USE_ASM


typedef struct
{
	float x, y;
	float magnitude;
	int start_time;
	int time_it_lasts;
	float half_time_it_lasts_inv;
} Drop;

typedef struct _cubic_forward_diff
{
	float f0_0;
	float f1_0;
	float f2_0;
	float f3_0;
} CubicForwardDiff;

// external variables
extern float SCREEN_RES_X, SCREEN_RES_Y;
extern int gSubdivs, gDynamicLod, gWireframe, gSpecular, gEnvmap, gEditing;
extern int gDrawNormals, gFillinGaps, gPause, gUseCtrlNorms, gForwardDifferencing;
extern Matrix gProjMat;
extern TextureCache *gTexCache[2];
extern CrystalBall gObjectCrystalBall, gLightCrystalBall;
extern POINT gMousePos1;
extern Vector *gSelectedCtrlPt;
extern Camera gCamera;
extern Vector gWorldScale;
extern float gLightAmbientFactor, gLightDiffuseFactor, gLightSpecularFactor;

// local variables
static GlideTexture gTexture, gEnvmapTexture;
static VertexData gVerts[(MAX_SUBDIVS+1)*(MAX_SUBDIVS+1)];

// global variables
BSplineSurface *gSurfaces;
int gNumSurfaces;
int gSurfaceType;

// local function prototypes
static int DrawConvexPolygon(int num_verts, VertexData *vert_ptrs[]);
static int DrawBSplinePatch(BSplinePatch *patch, float s0, float t0, float s1, float t1);
static int DrawBSplinePatchForwardDiff(BSplinePatch *patch, float s0, float t0, float s1, float t1);
void VertexDataCopy(VertexData *dest, const VertexData *src);


// use the control points of the surface to compute the
// average normals at each control point
void SetControlNormals(BSplineSurface *surface)
{
	int i, j;
	Vector v[4], n[4], *ctrl_pt, *ctrl_norm;

	ctrl_pt = &surface->ctrl_pts[0];
	ctrl_norm = &surface->ctrl_norms[0];
	for (j=0; j<surface->nv; j++)
	{
		for (i=0; i<surface->nu; i++)
		{
			if (i < surface->nu-1)
			{
				v[0][X] = (*(ctrl_pt+1))[X] - (*ctrl_pt)[X];
				v[0][Y] = (*(ctrl_pt+1))[Y] - (*ctrl_pt)[Y];
				v[0][Z] = (*(ctrl_pt+1))[Z] - (*ctrl_pt)[Z];
			}
			else
			{
				v[0][X] = (*ctrl_pt)[X] - (*(ctrl_pt-1))[X];
				v[0][Y] = (*ctrl_pt)[Y] - (*(ctrl_pt-1))[Y];
				v[0][Z] = (*ctrl_pt)[Z] - (*(ctrl_pt-1))[Z];
			}

			if (j < surface->nv-1)
			{
				v[1][X] = (*(ctrl_pt+surface->nu))[X] - (*ctrl_pt)[X];
				v[1][Y] = (*(ctrl_pt+surface->nu))[Y] - (*ctrl_pt)[Y];
				v[1][Z] = (*(ctrl_pt+surface->nu))[Z] - (*ctrl_pt)[Z];
			}
			else
			{
				v[1][X] = (*ctrl_pt)[X] - (*(ctrl_pt-surface->nu))[X];
				v[1][Y] = (*ctrl_pt)[Y] - (*(ctrl_pt-surface->nu))[Y];
				v[1][Z] = (*ctrl_pt)[Z] - (*(ctrl_pt-surface->nu))[Z];
			}

			if (i > 0)
			{
				v[2][X] = (*(ctrl_pt-1))[X] - (*ctrl_pt)[X];
				v[2][Y] = (*(ctrl_pt-1))[Y] - (*ctrl_pt)[Y];
				v[2][Z] = (*(ctrl_pt-1))[Z] - (*ctrl_pt)[Z];
			}
			else
			{
				v[2][X] = (*ctrl_pt)[X] - (*(ctrl_pt+1))[X];
				v[2][Y] = (*ctrl_pt)[Y] - (*(ctrl_pt+1))[Y];
				v[2][Z] = (*ctrl_pt)[Z] - (*(ctrl_pt+1))[Z];
			}

			if (j > 0)
			{
				v[3][X] = (*(ctrl_pt-surface->nu))[X] - (*ctrl_pt)[X];
				v[3][Y] = (*(ctrl_pt-surface->nu))[Y] - (*ctrl_pt)[Y];
				v[3][Z] = (*(ctrl_pt-surface->nu))[Z] - (*ctrl_pt)[Z];
			}
			else
			{
				v[3][X] = (*ctrl_pt)[X] - (*(ctrl_pt+surface->nu))[X];
				v[3][Y] = (*ctrl_pt)[Y] - (*(ctrl_pt+surface->nu))[Y];
				v[3][Z] = (*ctrl_pt)[Z] - (*(ctrl_pt+surface->nu))[Z];
			}

			CrossProduct(n[0], v[0], v[1]);
			CrossProduct(n[1], v[1], v[2]);
			CrossProduct(n[2], v[2], v[3]);
			CrossProduct(n[3], v[3], v[0]);
//			Normalize(n[0]);
//			Normalize(n[1]);
//			Normalize(n[2]);
//			Normalize(n[3]);

			(*ctrl_norm)[X] = 0.25f*(n[0][X] + n[1][X] + n[2][X] + n[3][X]);
			(*ctrl_norm)[Y] = 0.25f*(n[0][Y] + n[1][Y] + n[2][Y] + n[3][Y]);
			(*ctrl_norm)[Z] = 0.25f*(n[0][Z] + n[1][Z] + n[2][Z] + n[3][Z]);
			(*ctrl_norm)[W] = 0.0f;

			Normalize((*ctrl_norm));

			ctrl_pt++;
			ctrl_norm++;
		}
	}
}

void dope()
{
	int i, j;

	gNumSurfaces = 1;
	gSurfaces = new BSplineSurface;
	gSurfaces->nu = 22;
	gSurfaces->nv = 22;
	gSurfaces->ctrl_pts = new Vector[gSurfaces->nu*gSurfaces->nv];
	gSurfaces->ctrl_norms = new Vector[gSurfaces->nu*gSurfaces->nv];
	gSurfaces->patches = new BSplinePatch[(gSurfaces->nu-3)*(gSurfaces->nv-3)];
	for (j=0; j<gSurfaces->nu-3; j++)
	{
		for (i=0; i<gSurfaces->nv-3; i++)
		{
			gSurfaces->patches[i*(gSurfaces->nu-3) + j].ctrl_pts = gSurfaces->ctrl_pts;
			gSurfaces->patches[i*(gSurfaces->nu-3) + j].nu = gSurfaces->nu;
			gSurfaces->patches[i*(gSurfaces->nu-3) + j].u = j+3;
			gSurfaces->patches[i*(gSurfaces->nu-3) + j].v = i+3;
		}
	}

	for (j=0; j<gSurfaces->nv; j++)
	{
		for (i=0; i<gSurfaces->nu; i++)
		{
			gSurfaces->ctrl_pts[j*gSurfaces->nu + i][X] = 0.5f - (gSurfaces->nu>>1) + i;
			gSurfaces->ctrl_pts[j*gSurfaces->nu + i][Y] = 0.5f - (gSurfaces->nv>>1) + j;
			gSurfaces->ctrl_pts[j*gSurfaces->nu + i][Z] = 0.0f;
			gSurfaces->ctrl_pts[j*gSurfaces->nu + i][W] = 1.0f;
		}
	}

	SetControlNormals(gSurfaces);

	SaveSurfaces("data/surf.txt");
	CleanupSurface();
}

void DropWaves()
{
  const int NUM_DROPS = 4;
	int i, j, k, curr_time;
	float ht, dist_sqr, max_dist_sqr, max_dist_sqr_inv, factor, time;
	Vector *vert;
	static Drop drops[NUM_DROPS];

	curr_time = linux_timeGetTime();

	for (k=0; k<NUM_DROPS; k++)
	{
	  if (curr_time - drops[k].start_time > drops[k].time_it_lasts)
		{
		  //drops[k].x = (gSurfaces->nu*0.5f); // rand())*(1.0f/RAND_MAX);
		  //drops[k].y = (gSurfaces->nv*0.5f); // rand())*(1.0f/RAND_MAX);

		  float r1 = (float)rand() / (float)RAND_MAX;
		  float r2 = (float)rand() / (float)RAND_MAX;

		  drops[k].x = ((float)gSurfaces->nu) * r1;
		  drops[k].y = ((float)gSurfaces->nv) * r2;


			drops[k].start_time = curr_time;
			drops[k].time_it_lasts = (int)(5000.0f + (10000.0f*rand())*(1.0f/RAND_MAX));
			drops[k].half_time_it_lasts_inv = 1.0f/(0.5f*drops[k].time_it_lasts);
			drops[k].magnitude = 0.01f*(50.0f + 100.0f*rand()*(1.0f/RAND_MAX));
		}
	}

	time = DEG_TO_RAD(linux_timeGetTime()>>3);
	max_dist_sqr = (float)SQR(gSurfaces->nu>>1) + (float)SQR(gSurfaces->nv>>1);
	max_dist_sqr_inv = 1.0f/max_dist_sqr;
	vert = &gSurfaces->ctrl_pts[0];
	for (j=0; j<gSurfaces->nv; j++)
	{
		for (i=0; i<gSurfaces->nu; i++)
		{
			(*vert)[Z] = 0.0f;
			for (k=0; k<NUM_DROPS; k++)
			{
				dist_sqr = SQR(i - drops[k].x) + SQR(j - drops[k].y);
				if (dist_sqr <= max_dist_sqr)
				{
					factor = fsqrt(dist_sqr*max_dist_sqr_inv);
					ht = 1.0f - ABS(curr_time - (drops[k].start_time + (drops[k].time_it_lasts>>1))) * drops[k].half_time_it_lasts_inv;
					ht *= drops[k].magnitude * SQR(1.0f-factor) * (float)cos(5.0f*(2.0f*PI)*factor - time);
					(*vert)[Z] -= ht;
				}
			}
			vert++;
		}
	}

	if (gUseCtrlNorms)
	{
		SetControlNormals(gSurfaces);
	}
}

FxBool InitSurface()
{
//	dope();

	gSelectedCtrlPt = NULL;
	gNumSurfaces = 0;
	gSurfaces = NULL;

	if (!LoadGlideTextureTGA("data/tex.tga", &gTexture.tex_info, GR_TEXFMT_RGB_565, 0, 4))
	{
		return FXFALSE;
	}
	if (!LoadGlideTextureTGA("data/env.tga", &gEnvmapTexture.tex_info, GR_TEXFMT_ARGB_4444, 127, 4))
	{
		delete [] gTexture.tex_info.data;
		return FXFALSE;
	}

	if (!LoadSurfaces("data/surf.txt"))
	{
		delete [] gTexture.tex_info.data;
		delete [] gEnvmapTexture.tex_info.data;
		return FXFALSE;
	}

	/*
	if (!SpherizeTexture(&gEnvmapTexture.tex_info))
	{
		delete [] gTexture.tex_info.data;
		delete [] gEnvmapTexture.tex_info.data;
		return FXFALSE;
	}
	*/

	GetTexCache(GR_TMU0)->CacheTexture(&gTexture);
	if (GetNumTMUs() > 1)
	{
		GetTexCache(GR_TMU1)->CacheTexture(&gEnvmapTexture);
	}

	gSurfaceType = SURFACE_TYPE_BSPLINE;

	return FXTRUE;
}

void ReloadBsplineTextures()
{
	GetTexCache(GR_TMU0)->ReloadTexture(&gTexture);
	if (GetNumTMUs() > 1)
	{
		GetTexCache(GR_TMU1)->ReloadTexture(&gEnvmapTexture);
	}
}

FxBool LoadSurfaces(char *filename)
{
	FILE *fp;
	char str[256];
	int i, j;
	BSplineSurface *surface;
	BSplinePatch *patch;

	if (!(fp = fopen(filename, "r")))
	{
		return FXFALSE;
	}

	// read in the header to make sure it's the right type of file
	if (!fgets(str, 256, fp))
	{
		fclose(fp);
		return FXFALSE;
	}
	if (strncmp(str, "B-Spline Surface", 16))
	{
		fclose(fp);
		return FXFALSE;
	}

	// read in number of surfaces
	// skip white spaces and anything else that's not "%d"
	do
	{
		if (!fgets(str, 256, fp))
		{
			fclose(fp);
			return FXFALSE;
		}
	} while (sscanf(str, "%d", &i) != 1);
	gNumSurfaces = i;

	// delete any surfaces before we reallocate more
	if (gSurfaces)
	{
		for (surface = &gSurfaces[0]; surface != &gSurfaces[gNumSurfaces]; surface++)
		{
			delete [] surface->ctrl_pts;
			delete [] surface->ctrl_norms;
		}
		delete [] gSurfaces;
	}
	gSurfaces = new BSplineSurface[gNumSurfaces];
	if (!gSurfaces)
	{
		fclose(fp);
		return FXFALSE;
	}
	for (surface = &gSurfaces[0]; surface != &gSurfaces[gNumSurfaces]; surface++)
	{
		// read in nu and nv for each surface
		// skip white spaces and anything else that's not "%d %d"
		do
		{
			if (!fgets(str, 256, fp))
			{
				delete [] gSurfaces;
				gSurfaces = NULL;
				gNumSurfaces = 0;
				fclose(fp);
				return FXFALSE;
			}
		} while (sscanf(str, "%d %d", &surface->nu, &surface->nv) != 2);

		// read in the cotrol points
		surface->ctrl_pts = new Vector[surface->nu*surface->nv];
		surface->ctrl_norms = new Vector[surface->nu*surface->nv];
		if (!surface->ctrl_pts || !surface->ctrl_norms)
		{
			delete [] gSurfaces;
			gSurfaces = NULL;
			gNumSurfaces = 0;
			fclose(fp);
			return FXFALSE;
		}
		i = 0;
		while (i < surface->nu*surface->nv)
		{
			if (!fgets(str, 256, fp))
			{
				delete [] gSurfaces;
				gSurfaces = NULL;
				gNumSurfaces = 0;
				fclose(fp);
				return FXFALSE;
			}
			if (sscanf(str, "%f %f %f", &surface->ctrl_pts[i][X], &surface->ctrl_pts[i][Y], &surface->ctrl_pts[i][Z]) == 3)
			{
				surface->ctrl_pts[i][W] = 1.0f;
				i++;
			}
		}

		SetControlNormals(surface);

		surface->patches = new BSplinePatch[(surface->nu-3)*(surface->nv-3)];
		patch = surface->patches;
		for (j=3; j<surface->nu; j++)
		{
			for (i=3; i<surface->nv; i++)
			{
				patch->ctrl_pts = surface->ctrl_pts;
				patch->ctrl_norms = surface->ctrl_norms;
				patch->nu = surface->nu;
//				patch->u = j;
//				patch->v = i;
				patch++;
			}
		}
	}

	fclose(fp);

	return FXTRUE;
}

FxBool SaveSurfaces(char *filename)
{
	FILE *fp;
	int i, j, index;
	BSplineSurface *surface;

	if (!(fp = fopen(filename, "w")))
	{
		return FXFALSE;
	}

	fprintf(fp, "B-Spline Surface\n\n");
	fprintf(fp, "%d\n\n", gNumSurfaces);
	for (surface = &gSurfaces[0]; surface != &gSurfaces[gNumSurfaces]; surface++)
	{
		fprintf(fp, "%d %d\n\n", surface->nu, surface->nv);
		index = 0;
		for (j=0; j<surface->nv; j++)
		{
			for (i=0; i<surface->nu; i++)
			{
				fprintf(fp, "%.2f %.2f %.2f\n", surface->ctrl_pts[index][X], surface->ctrl_pts[index][Y], surface->ctrl_pts[index][Z]);
				index++;
			}
			fprintf(fp, "\n");
		}
	}

	fclose(fp);

	return FXTRUE;
}

void CleanupSurface()
{
	int i;

	for (i=0; i<gNumSurfaces; i++)
	{
		delete [] gSurfaces[i].ctrl_pts;
		delete [] gSurfaces[i].ctrl_norms;
		delete [] gSurfaces[i].patches;
	}
	delete [] gSurfaces;
	gSurfaces = NULL;
	gNumSurfaces = 0;

	GetTexCache(GR_TMU0)->DestroyGlideTexture(&gTexture);
	if (GetNumTMUs() > 1)
	{
		GetTexCache(GR_TMU1)->DestroyGlideTexture(&gEnvmapTexture);
	}
}

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define MAX_POINTS  256
#define CURVE_SUBDIVS  64

// local variables
static int gNumPoints;
static float gPoints[MAX_POINTS][2];

void DrawBezierSection(float *p0, float *p1, float *p2, float *p3)
{
	int i;
	float t, t2, t3, one_minus_t, one_minus_t2, one_minus_t3;
	GrVertex verts[CURVE_SUBDIVS+1];

	for (i=0; i<=CURVE_SUBDIVS; i++)
	{
		t = (float)i*(1.0f/(float)CURVE_SUBDIVS);
		t2 = t*t;
		t3 = t*t2;
		one_minus_t = 1.0f-t;
		one_minus_t2 = one_minus_t*one_minus_t;
		one_minus_t3 = one_minus_t*one_minus_t2;

		// compute the Bezier basis functions
		verts[i].x = one_minus_t3*p0[X] + 3.0f*t*one_minus_t2*p1[X] + 3.0f*t2*one_minus_t*p2[X] + t3*p3[X];
		verts[i].y = one_minus_t3*p0[Y] + 3.0f*t*one_minus_t2*p1[Y] + 3.0f*t2*one_minus_t*p2[Y] + t3*p3[Y];
		verts[i].oow = 1.0f;

		if (i > 0)
		{
			grConstantColorValue(0xff00ffff);
			grDrawLine(&verts[i-1], &verts[i]);
		}
		if (i == 0 || i == CURVE_SUBDIVS)
		{
			GrVertex vert0, vert1;

			grConstantColorValue(0xff0000ff);
			vert0.oow = vert1.oow = 1.0f;
			vert0.x = verts[i].x - 4.0f;
			vert0.y = verts[i].y - 4.0f;
			vert1.x = verts[i].x + 4.0f;
			vert1.y = verts[i].y + 4.0f;
			grDrawLine(&vert0, &vert1);
			vert0.x += 8.0f;
			vert1.x -= 8.0f;
			grDrawLine(&vert0, &vert1);
		}
	}
}

void DrawBSplineSection(float *p0, float *p1, float *p2, float *p3)
{
	int i;
	float t, t2, t3, b_3, b_2, b_1, b_0;
	GrVertex verts[CURVE_SUBDIVS+1];

	for (i=0; i<=CURVE_SUBDIVS; i++)
	{
		t = (float)i*(1.0f/(float)CURVE_SUBDIVS);
		t2 = t*t;
		t3 = t*t2;

		// basis functions
		b_3 = (1.0f/6.0f)*(1.0f - 3.0f*t + 3.0f*t2 - t3);
		b_2 = (1.0f/6.0f)*(4.0f - 6.0f*t2 + 3.0f*t3);
		b_1 = (1.0f/6.0f)*(1.0f + 3.0f*t + 3.0f*t2 - 3.0f*t3);
		b_0 = (1.0f/6.0f)*(t3);

		// compute the BSpline basis functions
		verts[i].x = b_3*p0[X] + b_2*p1[X] + b_1*p2[X] + b_0*p3[X];
		verts[i].y = b_3*p0[Y] + b_2*p1[Y] + b_1*p2[Y] + b_0*p3[Y];
		verts[i].oow = 1.0f;

		if (i > 0)
		{
			grConstantColorValue(0xff00ffff);
			grDrawLine(&verts[i-1], &verts[i]);
		}
		if (i == 0 || i == CURVE_SUBDIVS)
		{
			GrVertex vert0, vert1;

			grConstantColorValue(0xff0000ff);
			vert0.oow = vert1.oow = 1.0f;
			vert0.x = verts[i].x - 4.0f;
			vert0.y = verts[i].y - 4.0f;
			vert1.x = verts[i].x + 4.0f;
			vert1.y = verts[i].y + 4.0f;
			grDrawLine(&vert0, &vert1);
			vert0.x += 8.0f;
			vert1.x -= 8.0f;
			grDrawLine(&vert0, &vert1);
		}
	}
}

void DrawBezierCurve()
{
	int i;

	for (i=0; i<(gNumPoints & ~3); i+=4)
	{
		DrawBezierSection(gPoints[i], gPoints[i+1], gPoints[i+2], gPoints[i+3]);
	}
}

void DrawBSplineCurve()
{
	int i;

	for (i=3; i<gNumPoints; i++)
	{
		DrawBSplineSection(gPoints[i-3], gPoints[i-2], gPoints[i-1], gPoints[i]);
	}
}

// these control points spell out "donut" in cursive
// these points were created in 800x600 resolution
// so convert if necessary
static void SetupCurve()
{
	/*
	// no continuity
	gNumPoints = 8;
	gPoints[0][X] = 156.0f;
	gPoints[0][Y] = 299.0f;
	gPoints[1][X] = 171.0f;
	gPoints[1][Y] = 233.0f;
	gPoints[2][X] = 198.0f;
	gPoints[2][Y] = 176.0f;
	gPoints[3][X] = 296.0f;
	gPoints[3][Y] = 146.0f;
	gPoints[4][X] = 400.0f;
	gPoints[4][Y] = 138.0f;
	gPoints[5][X] = 462.0f;
	gPoints[5][Y] = 179.0f;
	gPoints[6][X] = 491.0f;
	gPoints[6][Y] = 228.0f;
	gPoints[7][X] = 488.0f;
	gPoints[7][Y] = 298.0f;
	// c1 continuous
	gNumPoints = 8;
	gPoints[0][X] = 237.0f;
	gPoints[0][Y] = 329.0f;
	gPoints[1][X] = 298.0f;
	gPoints[1][Y] = 213.0f;
	gPoints[2][X] = 453.0f;
	gPoints[2][Y] = 224.0f;
	gPoints[3][X] = 530.0f;
	gPoints[3][Y] = 303.0f;
	gPoints[4][X] = 530.0f;
	gPoints[4][Y] = 303.0f;
	gPoints[5][X] = 607.0f;
	gPoints[5][Y] = 382.0f;
	gPoints[6][X] = 606.0f;
	gPoints[6][Y] = 445.0f;
	gPoints[7][X] = 504.0f;
	gPoints[7][Y] = 495.0f;
	// g0 continuous
	gNumPoints = 8;
	gPoints[0][X] = 237.0f;
	gPoints[0][Y] = 329.0f;
	gPoints[1][X] = 298.0f;
	gPoints[1][Y] = 213.0f;
	gPoints[2][X] = 453.0f;
	gPoints[2][Y] = 224.0f;
	gPoints[3][X] = 526.0f;
	gPoints[3][Y] = 302.0f;
	gPoints[4][X] = 526.0f;
	gPoints[4][Y] = 302.0f;
	gPoints[5][X] = 501.0f;
	gPoints[5][Y] = 385.0f;
	gPoints[6][X] = 433.0f;
	gPoints[6][Y] = 446.0f;
	gPoints[7][X] = 339.0f;
	gPoints[7][Y] = 450.0f;
	// curve section
	gNumPoints = 4;
	gPoints[0][X] = 237.0f;
	gPoints[0][Y] = 329.0f;
	gPoints[1][X] = 298.0f;
	gPoints[1][Y] = 213.0f;
	gPoints[2][X] = 453.0f;
	gPoints[2][Y] = 224.0f;
	gPoints[3][X] = 493.0f;
	gPoints[3][Y] = 315.0f;
	return;
	*/

	int i;

	gNumPoints = 100;
	gPoints[0][X] = 117.0f;
	gPoints[0][Y] = 345.0f;
	gPoints[1][X] =  53.0f;
	gPoints[1][Y] = 349.0f;
	gPoints[2][X] =  59.0f;
	gPoints[2][Y] = 428.0f;
	gPoints[3][X] = 107.0f;
	gPoints[3][Y] = 426.0f;
	gPoints[4][X] = 107.0f;
	gPoints[4][Y] = 426.0f;
	gPoints[5][X] = 150.0f;
	gPoints[5][Y] = 427.0f;
	gPoints[6][X] = 167.0f;
	gPoints[6][Y] = 334.0f;
	gPoints[7][X] = 204.0f;
	gPoints[7][Y] = 214.0f;
	gPoints[8][X] = 204.0f;
	gPoints[8][Y] = 214.0f;
	gPoints[9][X] = 233.0f;
	gPoints[9][Y] =  94.0f;
	gPoints[10][X] = 185.0f;
	gPoints[10][Y] = 112.0f;
	gPoints[11][X] = 171.0f;
	gPoints[11][Y] = 261.0f;
	gPoints[12][X] = 171.0f;
	gPoints[12][Y] = 261.0f;
	gPoints[13][X] = 155.0f;
	gPoints[13][Y] = 352.0f;
	gPoints[14][X] = 157.0f;
	gPoints[14][Y] = 414.0f;
	gPoints[15][X] = 189.0f;
	gPoints[15][Y] = 426.0f;
	gPoints[16][X] = 189.0f;
	gPoints[16][Y] = 426.0f;
	gPoints[17][X] = 226.0f;
	gPoints[17][Y] = 434.0f;
	gPoints[18][X] = 233.0f;
	gPoints[18][Y] = 350.0f;
	gPoints[19][X] = 288.0f;
	gPoints[19][Y] = 347.0f;
	gPoints[20][X] = 288.0f;
	gPoints[20][Y] = 347.0f;
	gPoints[21][X] = 297.0f;
	gPoints[21][Y] = 344.0f;
	gPoints[22][X] = 264.0f;
	gPoints[22][Y] = 347.0f;
	gPoints[23][X] = 247.0f;
	gPoints[23][Y] = 373.0f;
	gPoints[24][X] = 247.0f;
	gPoints[24][Y] = 373.0f;
	gPoints[25][X] = 226.0f;
	gPoints[25][Y] = 391.0f;
	gPoints[26][X] = 237.0f;
	gPoints[26][Y] = 433.0f;
	gPoints[27][X] = 278.0f;
	gPoints[27][Y] = 432.0f;
	gPoints[28][X] = 278.0f;
	gPoints[28][Y] = 432.0f;
	gPoints[29][X] = 328.0f;
	gPoints[29][Y] = 426.0f;
	gPoints[30][X] = 317.0f;
	gPoints[30][Y] = 364.0f;
	gPoints[31][X] = 299.0f;
	gPoints[31][Y] = 352.0f;
	gPoints[32][X] = 299.0f;
	gPoints[32][Y] = 352.0f;
	gPoints[33][X] = 301.0f;
	gPoints[33][Y] = 335.0f;
	gPoints[34][X] = 315.0f;
	gPoints[34][Y] = 355.0f;
	gPoints[35][X] = 355.0f;
	gPoints[35][Y] = 342.0f;
	gPoints[36][X] = 355.0f;
	gPoints[36][Y] = 342.0f;
	gPoints[37][X] = 402.0f;
	gPoints[37][Y] = 326.0f;
	gPoints[38][X] = 361.0f;
	gPoints[38][Y] = 373.0f;
	gPoints[39][X] = 344.0f;
	gPoints[39][Y] = 408.0f;
	gPoints[40][X] = 344.0f;
	gPoints[40][Y] = 408.0f;
	gPoints[41][X] = 319.0f;
	gPoints[41][Y] = 452.0f;
	gPoints[42][X] = 345.0f;
	gPoints[42][Y] = 418.0f;
	gPoints[43][X] = 361.0f;
	gPoints[43][Y] = 383.0f;
	gPoints[44][X] = 361.0f;
	gPoints[44][Y] = 383.0f;
	gPoints[45][X] = 380.0f;
	gPoints[45][Y] = 341.0f;
	gPoints[46][X] = 389.0f;
	gPoints[46][Y] = 338.0f;
	gPoints[47][X] = 428.0f;
	gPoints[47][Y] = 335.0f;
	gPoints[48][X] = 428.0f;
	gPoints[48][Y] = 335.0f;
	gPoints[49][X] = 465.0f;
	gPoints[49][Y] = 337.0f;
	gPoints[50][X] = 428.0f;
	gPoints[50][Y] = 375.0f;
	gPoints[51][X] = 412.0f;
	gPoints[51][Y] = 410.0f;
	gPoints[52][X] = 412.0f;
	gPoints[52][Y] = 410.0f;
	gPoints[53][X] = 409.0f;
	gPoints[53][Y] = 449.0f;
	gPoints[54][X] = 441.0f;
	gPoints[54][Y] = 429.0f;
	gPoints[55][X] = 459.0f;
	gPoints[55][Y] = 412.0f;
	gPoints[56][X] = 459.0f;
	gPoints[56][Y] = 412.0f;
	gPoints[57][X] = 485.0f;
	gPoints[57][Y] = 384.0f;
	gPoints[58][X] = 495.0f;
	gPoints[58][Y] = 364.0f;
	gPoints[59][X] = 503.0f;
	gPoints[59][Y] = 352.0f;
	gPoints[60][X] = 503.0f;
	gPoints[60][Y] = 352.0f;
	gPoints[61][X] = 523.0f;
	gPoints[61][Y] = 318.0f;
	gPoints[62][X] = 512.0f;
	gPoints[62][Y] = 346.0f;
	gPoints[63][X] = 479.0f;
	gPoints[63][Y] = 399.0f;
	gPoints[64][X] = 479.0f;
	gPoints[64][Y] = 399.0f;
	gPoints[65][X] = 462.0f;
	gPoints[65][Y] = 445.0f;
	gPoints[66][X] = 493.0f;
	gPoints[66][Y] = 435.0f;
	gPoints[67][X] = 513.0f;
	gPoints[67][Y] = 431.0f;
	gPoints[68][X] = 513.0f;
	gPoints[68][Y] = 431.0f;
	gPoints[69][X] = 541.0f;
	gPoints[69][Y] = 420.0f;
	gPoints[70][X] = 551.0f;
	gPoints[70][Y] = 399.0f;
	gPoints[71][X] = 571.0f;
	gPoints[71][Y] = 364.0f;
	gPoints[72][X] = 571.0f;
	gPoints[72][Y] = 364.0f;
	gPoints[73][X] = 596.0f;
	gPoints[73][Y] = 312.0f;
	gPoints[74][X] = 585.0f;
	gPoints[74][Y] = 352.0f;
	gPoints[75][X] = 559.0f;
	gPoints[75][Y] = 393.0f;
	gPoints[76][X] = 559.0f;
	gPoints[76][Y] = 393.0f;
	gPoints[77][X] = 534.0f;
	gPoints[77][Y] = 443.0f;
	gPoints[78][X] = 572.0f;
	gPoints[78][Y] = 441.0f;
	gPoints[79][X] = 598.0f;
	gPoints[79][Y] = 417.0f;
	gPoints[80][X] = 598.0f;
	gPoints[80][Y] = 417.0f;
	gPoints[81][X] = 628.0f;
	gPoints[81][Y] = 382.0f;
	gPoints[82][X] = 647.0f;
	gPoints[82][Y] = 355.0f;
	gPoints[83][X] = 673.0f;
	gPoints[83][Y] = 279.0f;
	gPoints[84][X] = 673.0f;
	gPoints[84][Y] = 279.0f;
	gPoints[85][X] = 740.0f;
	gPoints[85][Y] = 114.0f;
	gPoints[86][X] = 712.0f;
	gPoints[86][Y] = 121.0f;
	gPoints[87][X] = 686.0f;
	gPoints[87][Y] = 212.0f;
	gPoints[88][X] = 686.0f;
	gPoints[88][Y] = 212.0f;
	gPoints[89][X] = 657.0f;
	gPoints[89][Y] = 288.0f;
	gPoints[90][X] = 630.0f;
	gPoints[90][Y] = 395.0f;
	gPoints[91][X] = 645.0f;
	gPoints[91][Y] = 421.0f;
	gPoints[92][X] = 645.0f;
	gPoints[92][Y] = 421.0f;
	gPoints[93][X] = 669.0f;
	gPoints[93][Y] = 450.0f;
	gPoints[94][X] = 705.0f;
	gPoints[94][Y] = 434.0f;
	gPoints[95][X] = 747.0f;
	gPoints[95][Y] = 400.0f;
	gPoints[96][X] = 616.0f;
	gPoints[96][Y] = 203.0f;
	gPoints[97][X] = 651.0f;
	gPoints[97][Y] = 205.0f;
	gPoints[98][X] = 722.0f;
	gPoints[98][Y] = 200.0f;
	gPoints[99][X] = 772.0f;
	gPoints[99][Y] = 190.0f;

	// convert from 800x600 to current resolution
	for (i=0; i<gNumPoints; i++)
	{
		gPoints[i][X] *= SCREEN_RES_X/800.0f;
		gPoints[i][Y] *= SCREEN_RES_Y/600.0f;
	}
}
/*
int RenderBSplineSurface2()
{
	static int first = 1;
	if (first)
	{
		first = 0;
		SetupCurve();
	}
	static int bspline = 0;

	GrVertex vert0, vert1;
	int i;

	if ((GetAsyncKeyState(VK_SPACE) & 0x8001) == 0x8001)
	{
		bspline ^= 1;
	}

	grAlphaBlendFunction(GR_BLEND_SRC_ALPHA, GR_BLEND_ONE_MINUS_SRC_ALPHA, GR_BLEND_ZERO, GR_BLEND_ZERO);
	grColorCombine(GR_COMBINE_FUNCTION_LOCAL, GR_COMBINE_FACTOR_NONE,
								 GR_COMBINE_LOCAL_CONSTANT, GR_COMBINE_OTHER_NONE, FXFALSE);
	grAlphaCombine(GR_COMBINE_FUNCTION_LOCAL, GR_COMBINE_FACTOR_NONE,
								 GR_COMBINE_LOCAL_CONSTANT, GR_COMBINE_OTHER_NONE, FXFALSE);

	grConstantColorValue(0xffffff00);
	for (i=0; i<gNumPoints; i++)
	{
		vert0.oow = vert1.oow = 1.0f;
		vert0.x = gPoints[i][X];
		vert0.y = gPoints[i][Y] - 4.0f;
		vert1.x = gPoints[i][X];
		vert1.y = gPoints[i][Y] + 3.0f;
		grDrawLine(&vert0, &vert1);
		vert0.x = gPoints[i][X] - 4.0f;
		vert0.y = gPoints[i][Y];
		vert1.x = gPoints[i][X] + 3.0f;
		vert1.y = gPoints[i][Y];
		grDrawLine(&vert0, &vert1);
	}

	grConstantColorValue(0xff00ffff);
	if (bspline)
	{
		DrawBSplineCurve();
	}
	else
	{
		DrawBezierCurve();
	}

	return 0;
}
*/
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////

int RenderBSplineSurface()
{
	int i, num_polys;

	if (!gPause)
	{
		DropWaves();
	}

	grAlphaBlendFunction(GR_BLEND_ONE, GR_BLEND_ZERO, GR_BLEND_ZERO, GR_BLEND_ZERO);

	GetTexCache(GR_TMU0)->TexSource(&gTexture);
	if (GetNumTMUs() > 1)
	{
		GetTexCache(GR_TMU1)->TexSource(&gEnvmapTexture);
	}

	num_polys = 0;
	for (i=0; i<gNumSurfaces; i++)
	{
		num_polys += DrawBSplineSurface(&gSurfaces[i], 0.0f, 0.0f, 1024.0f, 1024.0f);
	}

	return num_polys;
}

// draw a strip of triangles from a polygon which could be convex
// this is used to fill in the gaps of adjacent bspline patches that
// don't have the same subdivs
static int DrawConvexPolygon(int num_verts, VertexData *vert_ptrs[])
{
	int i, num_polys;
	VertexData *v0, *v1, *v2;

	num_polys = 0;

	if (!(num_verts & 1))
	{
		v0 = vert_ptrs[num_verts>>1];
		v1 = vert_ptrs[(num_verts>>1)-1];
		v2 = vert_ptrs[(num_verts>>1)+1];
		if (!(v0->outcode | v1->outcode | v2->outcode))
		{
			grDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
			num_polys++;
		}
		else if (!(v0->outcode & v1->outcode & v2->outcode))
		{
			num_polys += ClipAndDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
		}
	}

	v0 = vert_ptrs[0];
	v1 = vert_ptrs[1];
	v2 = vert_ptrs[num_verts-1];
	if (!(v0->outcode | v1->outcode | v2->outcode))
	{
		grDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
		num_polys++;
	}
	else if (!(v0->outcode & v1->outcode & v2->outcode))
	{
		num_polys += ClipAndDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
	}

	// do a strip mesh
	for (i=2; i<(num_verts+1)>>1; i++)
	{
		v0 = v1;
		v1 = v2;
		v2 = vert_ptrs[i];
		if (!(v0->outcode | v1->outcode | v2->outcode))
		{
			grDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
			num_polys++;
		}
		else if (!(v0->outcode & v1->outcode & v2->outcode))
		{
			num_polys += ClipAndDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
		}

		v0 = v1;
		v1 = v2;
		v2 = vert_ptrs[num_verts-i];
		if (!(v0->outcode | v1->outcode | v2->outcode))
		{
			grDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
			num_polys++;
		}
		else if (!(v0->outcode & v1->outcode & v2->outcode))
		{
			num_polys += ClipAndDrawTriangle(&v0->vertex, &v1->vertex, &v2->vertex);
		}
	}

	return num_polys;
}

int DrawBSplineSurface(BSplineSurface *surface, float s0, float t0, float s1, float t1)
{
	int i, j, k, num_polys, num_verts, u, v;
	float ds, s, dt, t, dist_inv, frustum_half_angle;
	BSplinePatch *patch;
	VertexData *vert_ptrs[MAX_SUBDIVS+MAX_SUBDIVS+2];
	Vector *pt[4], center_pt, frustum_inner_normals[4], dir[4], cam_pos;

	if (gEnvmap)
	{
		grTexCombine(GR_TMU1, GR_COMBINE_FUNCTION_LOCAL,
								 GR_COMBINE_FACTOR_NONE,
								 GR_COMBINE_FUNCTION_LOCAL,
								 GR_COMBINE_FACTOR_NONE, FXFALSE, FXFALSE);
		grTexCombine(GR_TMU0, GR_COMBINE_FUNCTION_BLEND,
								 GR_COMBINE_FACTOR_OTHER_ALPHA,
								 GR_COMBINE_FUNCTION_LOCAL,
								 GR_COMBINE_FACTOR_ONE, FXFALSE, FXFALSE);
		// enable 2nd S and Ts
#ifdef USE_GLIDE3
		grVertexLayout(GR_PARAM_ST1, offsetof(GrVertex, tmuvtx[1]), GR_PARAM_ENABLE);
#else
		grHints(GR_HINT_STWHINT, GR_STWHINT_ST_DIFF_TMU1);
#endif // USE_GLIDE3
	}
	else
	{
		grTexCombine(GR_TMU0, GR_COMBINE_FUNCTION_LOCAL,
								 GR_COMBINE_FACTOR_NONE,
								 GR_COMBINE_FUNCTION_LOCAL,
								 GR_COMBINE_FACTOR_NONE, FXFALSE, FXFALSE);
		// disable 2nd S and Ts
#ifdef USE_GLIDE3
		grVertexLayout(GR_PARAM_ST1, 0, GR_PARAM_DISABLE);
#else
		grHints(GR_HINT_STWHINT, 0);
#endif // USE_GLIDE3
	}

	if (gSpecular)
	{
		grColorCombine(GR_COMBINE_FUNCTION_SCALE_OTHER_ADD_LOCAL_ALPHA,
									 GR_COMBINE_FACTOR_LOCAL,
									 GR_COMBINE_LOCAL_ITERATED,
									 GR_COMBINE_OTHER_TEXTURE, FXFALSE);
	}
	else
	{
		grColorCombine(GR_COMBINE_FUNCTION_SCALE_OTHER,
									 GR_COMBINE_FACTOR_LOCAL,
									 GR_COMBINE_LOCAL_ITERATED,
									 GR_COMBINE_OTHER_TEXTURE, FXFALSE);
	}

	grAlphaCombine(GR_COMBINE_FUNCTION_LOCAL,
								 GR_COMBINE_FACTOR_NONE,
								 GR_COMBINE_LOCAL_ITERATED,
								 GR_COMBINE_OTHER_NONE, FXFALSE);

	grAlphaBlendFunction(GR_BLEND_ONE, GR_BLEND_ZERO, GR_BLEND_ZERO, GR_BLEND_ZERO);

	frustum_half_angle = DEG_TO_RAD(50.0f);
	// bottom frustum pyramid plane
	RotateByAxis(frustum_inner_normals[0], gCamera.v,     -frustum_half_angle, gCamera.u);
	// top frustum pyramid plane
	RotateByAxis(frustum_inner_normals[1], gCamera.v, frustum_half_angle - PI, gCamera.u);

	frustum_half_angle = DEG_TO_RAD(40.0f);
	// right frustum pyramid plane
	RotateByAxis(frustum_inner_normals[2], gCamera.u, PI - frustum_half_angle, gCamera.v);
	// left frustum pyramid plane
	RotateByAxis(frustum_inner_normals[3], gCamera.u,      frustum_half_angle, gCamera.v);

	// scale the camera position due to the scale matrix
	cam_pos[X] = gCamera.pos[X]/gWorldScale[X];
	cam_pos[Y] = gCamera.pos[Y]/gWorldScale[Y];
	cam_pos[Z] = gCamera.pos[Z]/gWorldScale[Z];

	if (gSurfaceType == SURFACE_TYPE_BSPLINE)
	{
		u = surface->nu-3;
		v = surface->nv-3;
	}
	else
	{
		u = (surface->nu-1)/3;
		v = (surface->nv-1)/3;
	}

	ds = (s1 - s0)/(float)u;
	dt = (t1 - t0)/(float)v;

	num_polys = 0;
	patch = surface->patches;

	for (j=0, t=t0; j<v; j++, t+=dt)
	{
		for (i=0, s=s0; i<u; i++, s+=ds, patch++)
		{
			if (gSurfaceType == SURFACE_TYPE_BSPLINE)
			{
				patch->u = i + 3;
				patch->v = j + 3;
			}
			else
			{
				patch->u = i*3 + 3;
				patch->v = j*3 + 3;
			}

			// 0 subdivs, unless it's drawn
			patch->subdivs = 0;

			pt[0] = &patch->ctrl_pts[(patch->v-3)*patch->nu + (patch->u-3)];
			pt[1] = pt[0] + 3;
			pt[2] = pt[0] + 3*patch->nu;
			pt[3] = pt[2] + 3;

			// find out if the patch is in the view frustum
			dir[0][X] = (*pt[0])[X] - cam_pos[X];
			dir[0][Y] = (*pt[0])[Y] - cam_pos[Y];
			dir[0][Z] = (*pt[0])[Z] - cam_pos[Z];
			dir[1][X] = (*pt[1])[X] - cam_pos[X];
			dir[1][Y] = (*pt[1])[Y] - cam_pos[Y];
			dir[1][Z] = (*pt[1])[Z] - cam_pos[Z];
			dir[2][X] = (*pt[2])[X] - cam_pos[X];
			dir[2][Y] = (*pt[2])[Y] - cam_pos[Y];
			dir[2][Z] = (*pt[2])[Z] - cam_pos[Z];
			dir[3][X] = (*pt[3])[X] - cam_pos[X];
			dir[3][Y] = (*pt[3])[Y] - cam_pos[Y];
			dir[3][Z] = (*pt[3])[Z] - cam_pos[Z];

			// skip whole patch if all 4 corners of the patch are outside of frustum
			if (DotProduct(dir[0], frustum_inner_normals[0]) < 0 &&
					DotProduct(dir[1], frustum_inner_normals[0]) < 0 &&
					DotProduct(dir[2], frustum_inner_normals[0]) < 0 &&
					DotProduct(dir[3], frustum_inner_normals[0]) < 0)
			{
				continue;
			}
			if (DotProduct(dir[0], frustum_inner_normals[1]) < 0 &&
					DotProduct(dir[1], frustum_inner_normals[1]) < 0 &&
					DotProduct(dir[2], frustum_inner_normals[1]) < 0 &&
					DotProduct(dir[3], frustum_inner_normals[1]) < 0)
			{
				continue;
			}
			if (DotProduct(dir[0], frustum_inner_normals[2]) < 0 &&
					DotProduct(dir[1], frustum_inner_normals[2]) < 0 &&
					DotProduct(dir[2], frustum_inner_normals[2]) < 0 &&
					DotProduct(dir[3], frustum_inner_normals[2]) < 0)
			{
				continue;
			}
			if (DotProduct(dir[0], frustum_inner_normals[3]) < 0 &&
					DotProduct(dir[1], frustum_inner_normals[3]) < 0 &&
					DotProduct(dir[2], frustum_inner_normals[3]) < 0 &&
					DotProduct(dir[3], frustum_inner_normals[3]) < 0)
			{
				continue;
			}

			if (gDynamicLod)
			{
				const float LOD_FACTOR = 20.0f;

				// find the center of the patch by averaging the border control points
				center_pt[X] = 0.25f*((*pt[0])[X] + (*pt[1])[X] + (*pt[2])[X] + (*pt[3])[X]);
				center_pt[Y] = 0.25f*((*pt[0])[Y] + (*pt[1])[Y] + (*pt[2])[Y] + (*pt[3])[Y]);
				center_pt[Z] = 0.25f*((*pt[0])[Z] + (*pt[1])[Z] + (*pt[2])[Z] + (*pt[3])[Z]);
				// find the distance from the camera to the center of the patch ....
				dist_inv = fsqrt_inv(SQR(cam_pos[X] - center_pt[X]) + SQR(cam_pos[Y] - center_pt[Y]) + SQR(cam_pos[Z] - center_pt[Z]));
				// .... and use it for lod/subdivs
				if (gSurfaceType == SURFACE_TYPE_BSPLINE)
				{
					patch->subdivs = CLAMP((int)(LOD_FACTOR*dist_inv), 1, MAX_SUBDIVS);
				}
				else
				{
					patch->subdivs = CLAMP((int)(3.0f*LOD_FACTOR*dist_inv), 1, MAX_SUBDIVS);
				}
			}
			else
			{
				if (gSurfaceType == SURFACE_TYPE_BSPLINE)
				{
					patch->subdivs = gSubdivs;
				}
				else
				{
					patch->subdivs = MIN(3*gSubdivs, MAX_SUBDIVS);
				}
			}
			if (gForwardDifferencing)
			{
				num_polys += DrawBSplinePatchForwardDiff(patch, s, t, s+ds, t+dt);
			}
			else
			{
				num_polys += DrawBSplinePatch(patch, s, t, s+ds, t+dt);
			}
		}
	}

	if (gDynamicLod && gFillinGaps)
	{
		// need to disable backface culling
		grCullMode(GR_CULL_DISABLE);
		if (gFillinGaps == 2)
		{
			grColorCombine(GR_COMBINE_FUNCTION_LOCAL, GR_COMBINE_FACTOR_NONE, GR_COMBINE_LOCAL_CONSTANT, GR_COMBINE_OTHER_NONE, FXFALSE);
			grConstantColorValue(0xff00ff00);
		}

		patch = surface->patches;
		for (j=0; j<v; j++)
		{
			for (i=0; i<u; i++, patch++)
			{
				if (!patch->subdivs)
				{
					continue;
				}

				// check left edge of patch
				if (i > 0 && (patch-1)->subdivs && patch->subdivs != (patch-1)->subdivs)
				{
					// add the verts
					num_verts = 0;
					for (k=0; k<=(patch-1)->subdivs; k++)
					{
						vert_ptrs[num_verts] = &(patch-1)->right_verts[k];
						num_verts++;
					}
					for (k=patch->subdivs-1; k>=1; k--)
					{
						vert_ptrs[num_verts] = &patch->left_verts[k];
						num_verts++;
					}

					// strip mesh the gap
					if (num_verts >= 3)
					{
						num_polys += DrawConvexPolygon(num_verts, vert_ptrs);
					}
				}

				// check right edge of patch
				if (i < u-1 && (patch+1)->subdivs && patch->subdivs != (patch+1)->subdivs)
				{
					// add the verts
					num_verts = 0;
					for (k=0; k<=patch->subdivs; k++)
					{
						vert_ptrs[num_verts] = &patch->right_verts[k];
						num_verts++;
					}
					for (k=(patch+1)->subdivs-1; k>=1; k--)
					{
						vert_ptrs[num_verts] = &(patch+1)->left_verts[k];
						num_verts++;
					}

					// strip mesh the gap
					if (num_verts >= 3)
					{
						num_polys += DrawConvexPolygon(num_verts, vert_ptrs);
					}
				}

				// check bottom edge of patch
				if (j > 0 && (patch-u)->subdivs && patch->subdivs != (patch-u)->subdivs)
				{
					// add the verts
					num_verts = 0;
					for (k=0; k<=(patch-u)->subdivs; k++)
					{
						vert_ptrs[num_verts] = &(patch-u)->top_verts[k];
						num_verts++;
					}
					for (k=patch->subdivs-1; k>=1; k--)
					{
						vert_ptrs[num_verts] = &patch->bottom_verts[k];
						num_verts++;
					}

					// strip mesh the gap
					if (num_verts >= 3)
					{
						num_polys += DrawConvexPolygon(num_verts, vert_ptrs);
					}
				}

				// check top edge of patch
				if (j < v-1 && (patch+u)->subdivs && patch->subdivs != (patch+u)->subdivs)
				{
					// add the verts
					num_verts = 0;
					for (k=0; k<=patch->subdivs; k++)
					{
						vert_ptrs[num_verts] = &patch->top_verts[k];
						num_verts++;
					}
					for (k=(patch+u)->subdivs-1; k>=1; k--)
					{
						vert_ptrs[num_verts] = &(patch+u)->bottom_verts[k];
						num_verts++;
					}

					// strip mesh the gap
					if (num_verts >= 3)
					{
						num_polys += DrawConvexPolygon(num_verts, vert_ptrs);
					}
				}
			}
		}

		// re-enable backface culling
		grCullMode(GR_CULL_NEGATIVE);
	}

	if (gEditing)
	{
		VertexData v0, v1;
		float scale;

		grColorCombine(GR_COMBINE_FUNCTION_LOCAL,
									 GR_COMBINE_FACTOR_NONE,
									 GR_COMBINE_LOCAL_CONSTANT,
									 GR_COMBINE_OTHER_NONE, FXFALSE);

		// draw the control points
		BeginXforms();
		for (i=0; i<surface->nu*surface->nv; i++)
		{
			if (gSelectedCtrlPt == &surface->ctrl_pts[i] && ((linux_timeGetTime()>>2) & 1))
			{
				grConstantColorValue(0xff00ffff);
			}
			else
			{
				grConstantColorValue(0xff0000ff);
			}

			XformVertex(&v0, &surface->ctrl_pts[i]);

			v1.vertex.x = v0.vertex.x;
			v1.vertex.y = v0.vertex.y;
			v1.vertex.z = v0.vertex.z;
			v1.vertex.oow = v0.vertex.oow;
			v1.outcode = v0.outcode;

			scale = 500.0f*v0.vertex.oow;
			v0.vertex.x += scale;
			v0.vertex.y += scale;
			v1.vertex.x -= scale;
			v1.vertex.y -= scale;
			ClipAndDrawLine(&v0.vertex, &v1.vertex);
			v0.vertex.x -= scale + scale;
			v1.vertex.x += scale + scale;
			ClipAndDrawLine(&v0.vertex, &v1.vertex);
		}
		EndXforms();
	}

	return num_polys;
}

Vector *SelectControlPoint(int x, int y)
{
	int i;
	Vector *res, pt;
	float w_inv, dist_sqr, min_dist_sqr;
	BSplineSurface *surface;

	res = NULL;
	min_dist_sqr = 10.0f*10.0f;
	for (surface = &gSurfaces[0]; surface != &gSurfaces[gNumSurfaces]; surface++)
	{
		for (i=0; i<surface->nu*surface->nv; i++)
		{
			MatMultVec4x4_3(pt, gProjMat, surface->ctrl_pts[i]);
			w_inv = 1.0f/pt[W];
			pt[X] *= w_inv;
			pt[Y] *= w_inv;
			dist_sqr = SQR(gMousePos1.x - pt[X]) + SQR(gMousePos1.y - pt[Y]);
			if (dist_sqr < min_dist_sqr)
			{
				res = &surface->ctrl_pts[i];
				min_dist_sqr = dist_sqr;
			}
		}
	}

	return res;
}

float HeightAt(BSplineSurface *surface, float x, float y)
{
	int i, u, v;
	float s, s2, s3, t, t2, t3;
	float b_3, b_2, b_1, b_0;
	Vector *ctrl_pt, bv[4];

	u = v = -1;

	for (i=1; i<surface->nu; i++)
	{
		if (x > surface->ctrl_pts[i-1][X] && x <= surface->ctrl_pts[i][X])
		{
			u = i;
			break;
		}
	}

	for (i=1; i<surface->nu; i++)
	{
		if (y > surface->ctrl_pts[(i-1)*surface->nu][Y] && y <= surface->ctrl_pts[i*surface->nu][Y])
		{
			v = i;
			break;
		}
	}

	if (u < 3)
	{
		u = 3;
	}
	if (v < 3)
	{
		v = 3;
	}

	ctrl_pt = &surface->ctrl_pts[v*surface->nu + u];

	s = 1.0f - (surface->ctrl_pts[v*surface->nu + u][X] - x) /
						 (surface->ctrl_pts[v*surface->nu + u][X] - surface->ctrl_pts[v*surface->nu + u-3][X]);
	t = 1.0f - (surface->ctrl_pts[v*surface->nu + u][Y] - y) /
						 (surface->ctrl_pts[v*surface->nu + u][Y] - surface->ctrl_pts[(v-3)*surface->nu + u][Y]);

	s2 = s*s;
	s3 = s*s2;
	// basis functions
	b_3 = (1.0f/6.0f)*(1.0f - 3.0f*s + 3.0f*s2 - s3);
	b_2 = (1.0f/6.0f)*(4.0f - 6.0f*s2 + 3.0f*s3);
	b_1 = (1.0f/6.0f)*(1.0f + 3.0f*s + 3.0f*s2 - 3.0f*s3);
	b_0 = (1.0f/6.0f)*(s3);

	// calculate the basis curves
	for (i=3; i>=0; i--)
	{
		bv[i][Z] = b_3*(*(ctrl_pt-3))[Z] + b_2*(*(ctrl_pt-2))[Z] + b_1*(*(ctrl_pt-1))[Z] + b_0*(*(ctrl_pt))[Z];

		ctrl_pt -= surface->nu;
	}

	t2 = t*t;
	t3 = t*t2;
	// basis functions
	b_3 = (1.0f/6.0f)*(1.0f - 3.0f*t + 3.0f*t2 - t3);
	b_2 = (1.0f/6.0f)*(4.0f - 6.0f*t2 + 3.0f*t3);
	b_1 = (1.0f/6.0f)*(1.0f + 3.0f*t + 3.0f*t2 - 3.0f*t3);
	b_0 = (1.0f/6.0f)*(t3);

	return b_3*bv[0][Z] + b_2*bv[1][Z] + b_1*bv[2][Z] + b_0*bv[3][Z];
}

// draws the patch after the gVerts array has been set with all
// the transformed and lit verts
int DrawPatch(BSplinePatch *patch)
{
	int i, j, num_polys;
	VertexData *vert_ptrs[4];

	num_polys = 0;

	// now that we have all the vertices, start drawing
	vert_ptrs[0] = &gVerts[0];
	vert_ptrs[1] = &gVerts[1];
	vert_ptrs[2] = &gVerts[patch->subdivs+1];
	vert_ptrs[3] = &gVerts[patch->subdivs+2];

	for (j=patch->subdivs; j>0; j--, vert_ptrs[0]++, vert_ptrs[1]++, vert_ptrs[2]++, vert_ptrs[3]++)
	{
		for (i=patch->subdivs; i>0; i--, vert_ptrs[0]++, vert_ptrs[1]++, vert_ptrs[2]++, vert_ptrs[3]++)
		{
//			if (vert_ptrs[0]->vertex.oow < 0 || vert_ptrs[1]->vertex.oow < 0 || vert_ptrs[2]->vertex.oow < 0 || vert_ptrs[3]->vertex.oow < 0)
//			{
//				continue;
//			}

			// trivial accept (we can add to the strip)
			if ((vert_ptrs[0]->outcode | vert_ptrs[1]->outcode | vert_ptrs[2]->outcode | vert_ptrs[3]->outcode) == 0)
			{
				if (gWireframe)
				{
					grDrawLine(&vert_ptrs[0]->vertex, &vert_ptrs[1]->vertex);
					grDrawLine(&vert_ptrs[1]->vertex, &vert_ptrs[3]->vertex);
					grDrawLine(&vert_ptrs[3]->vertex, &vert_ptrs[0]->vertex);
					grDrawLine(&vert_ptrs[3]->vertex, &vert_ptrs[2]->vertex);
					grDrawLine(&vert_ptrs[2]->vertex, &vert_ptrs[0]->vertex);
				}
				else
				{
#ifdef USE_GLIDE3
					grDrawVertexArray(GR_TRIANGLE_STRIP, 4, vert_ptrs);
#else
					grDrawTriangle(&vert_ptrs[0]->vertex, &vert_ptrs[1]->vertex, &vert_ptrs[3]->vertex);
					grDrawTriangle(&vert_ptrs[0]->vertex, &vert_ptrs[3]->vertex, &vert_ptrs[2]->vertex);
#endif // USE_GLIDE3
				}
				num_polys += 2;
			}
			else
			{
				if ((vert_ptrs[0]->outcode & vert_ptrs[1]->outcode & vert_ptrs[3]->outcode) == 0)
				{
					// trivially accept
					if ((vert_ptrs[0]->outcode | vert_ptrs[1]->outcode | vert_ptrs[3]->outcode) == 0)
					{
						if (gWireframe)
						{
							grDrawLine(&vert_ptrs[0]->vertex, &vert_ptrs[1]->vertex);
							grDrawLine(&vert_ptrs[1]->vertex, &vert_ptrs[3]->vertex);
							grDrawLine(&vert_ptrs[3]->vertex, &vert_ptrs[0]->vertex);
						}
						else
						{
							grDrawTriangle(&vert_ptrs[0]->vertex, &vert_ptrs[1]->vertex, &vert_ptrs[3]->vertex);
						}
						num_polys++;
					}
					// need to clip
					else
					{
						if (gWireframe)
						{
							ClipAndDrawLine(&vert_ptrs[0]->vertex, &vert_ptrs[1]->vertex);
							ClipAndDrawLine(&vert_ptrs[1]->vertex, &vert_ptrs[3]->vertex);
							ClipAndDrawLine(&vert_ptrs[3]->vertex, &vert_ptrs[0]->vertex);
							num_polys++;
						}
						else
						{
							num_polys += ClipAndDrawTriangle(&vert_ptrs[0]->vertex, &vert_ptrs[1]->vertex, &vert_ptrs[3]->vertex);
						}
					}
				}

				// trivial reject
				if ((vert_ptrs[0]->outcode & vert_ptrs[3]->outcode & vert_ptrs[2]->outcode) == 0)
				{
					// trivially accept
					if ((vert_ptrs[0]->outcode | vert_ptrs[3]->outcode | vert_ptrs[2]->outcode) == 0)
					{
						if (gWireframe)
						{
							grDrawLine(&vert_ptrs[0]->vertex, &vert_ptrs[3]->vertex);
							grDrawLine(&vert_ptrs[3]->vertex, &vert_ptrs[2]->vertex);
							grDrawLine(&vert_ptrs[2]->vertex, &vert_ptrs[0]->vertex);
						}
						else
						{
							grDrawTriangle(&vert_ptrs[0]->vertex, &vert_ptrs[3]->vertex, &vert_ptrs[2]->vertex);
						}
						num_polys++;
					}
					// need to clip
					else
					{
						if (gWireframe)
						{
							ClipAndDrawLine(&vert_ptrs[0]->vertex, &vert_ptrs[3]->vertex);
							ClipAndDrawLine(&vert_ptrs[3]->vertex, &vert_ptrs[2]->vertex);
							ClipAndDrawLine(&vert_ptrs[2]->vertex, &vert_ptrs[0]->vertex);
							num_polys++;
						}
						else
						{
							num_polys += ClipAndDrawTriangle(&vert_ptrs[0]->vertex, &vert_ptrs[3]->vertex, &vert_ptrs[2]->vertex);
						}
					}
				}
			}
		}
	}

	return num_polys;
}

// ctrl_pts is the array of control vertices that make up the b-spline
// nu is a row of the ctrl_pts array
// u and v are coordinates in the ctrl_pts array and must both be at least 3
// this routine takes a 4 by 4 section of the array of control points and
// generates a b-spline surface
static int DrawBSplinePatch(BSplinePatch *patch, float s0, float t0, float s1, float t1)
{
	static Vector basis[MAX_SUBDIVS+1], deriv_basis[MAX_SUBDIVS+1];
	VertexData *vert;
	int i, j, ctrl_pt_index;
	float ds, s, dt, t, t2, t3;
	float diffuse, specular;
	Vector point, norm, tangent_u, tangent_v, *ctrl_pt, *ctrl_norm;
	Matrix m0, m1, m2, n0, n1, n2;
	Matrix bv, dv_bv, nv;

	dt = 1.0f/(float)patch->subdivs;

	// compute the basis functions and the derivative basis functions for every step
	if (gSurfaceType == SURFACE_TYPE_BSPLINE)
	{
		for (i=0, t=0.0f; i<=patch->subdivs; i++, t+=dt)
		{
			t2 = t*t;
			t3 = t*t2;

			// basis functions
			basis[i][0] = (1.0f/6.0f)*(     -t3 + 3.0f*t2 - 3.0f*t + 1.0f);
			basis[i][1] = (1.0f/6.0f)*( 3.0f*t3 - 6.0f*t2          + 4.0f);
			basis[i][2] = (1.0f/6.0f)*(-3.0f*t3 + 3.0f*t2 + 3.0f*t + 1.0f);
			basis[i][3] = (1.0f/6.0f)*(      t3);

			if (!gUseCtrlNorms)
			{
				// derivative of the basis functions (used for tangent vectors)
				deriv_basis[i][0] = (1.0f/6.0f)*(-3.0f*t2 +  6.0f*t - 3.0f);
				deriv_basis[i][1] = (1.0f/6.0f)*( 9.0f*t2 - 12.0f*t);
				deriv_basis[i][2] = (1.0f/6.0f)*(-9.0f*t2 +  6.0f*t + 3.0f);
				deriv_basis[i][3] = (1.0f/6.0f)*( 3.0f*t2);
			}
		}
	}
	else
	{
		float one_minus_t, one_minus_t2, one_minus_t3;

		for (i=0, t=0.0f; i<=patch->subdivs; i++, t+=dt)
		{
			t2 = t*t;
			t3 = t*t2;

			one_minus_t = 1.0f-t;
			one_minus_t2 = one_minus_t*one_minus_t;
			one_minus_t3 = one_minus_t*one_minus_t2;

			// basis functions
			basis[i][0] = one_minus_t3;
			basis[i][1] = 3.0f*t*one_minus_t2;
			basis[i][2] = 3.0f*t2*one_minus_t;
			basis[i][3] = t3;

			if (!gUseCtrlNorms)
			{
				// derivative of the basis functions (used for tangent vectors)
				deriv_basis[i][0] = -3.0f*one_minus_t2;
				deriv_basis[i][1] = 3.0f - 12.0f*t + 9.0f*t2;
				deriv_basis[i][2] = 6.0f*t - 9.0f*t2;
				deriv_basis[i][3] = 3.0f*t2;
			}
		}
	}

	ds = (s1 - s0)*dt;
	dt = (t1 - t0)*dt;

	ctrl_pt_index = (patch->v-3)*patch->nu + (patch->u-3);

	ctrl_pt = &patch->ctrl_pts[ctrl_pt_index];
	ctrl_norm = &patch->ctrl_norms[ctrl_pt_index];
	for (i=0; i<4; i++)
	{
		m0[i][0] = (*(ctrl_pt  ))[X]; m1[i][0] = (*(ctrl_pt  ))[Y]; m2[i][0] = (*(ctrl_pt  ))[Z];
		m0[i][1] = (*(ctrl_pt+1))[X]; m1[i][1] = (*(ctrl_pt+1))[Y]; m2[i][1] = (*(ctrl_pt+1))[Z];
		m0[i][2] = (*(ctrl_pt+2))[X]; m1[i][2] = (*(ctrl_pt+2))[Y]; m2[i][2] = (*(ctrl_pt+2))[Z];
		m0[i][3] = (*(ctrl_pt+3))[X]; m1[i][3] = (*(ctrl_pt+3))[Y]; m2[i][3] = (*(ctrl_pt+3))[Z];
		ctrl_pt += patch->nu;

		if (gUseCtrlNorms)
		{
			n0[i][0] = (*(ctrl_norm  ))[X]; n1[i][0] = (*(ctrl_norm  ))[Y]; n2[i][0] = (*(ctrl_norm  ))[Z];
			n0[i][1] = (*(ctrl_norm+1))[X]; n1[i][1] = (*(ctrl_norm+1))[Y]; n2[i][1] = (*(ctrl_norm+1))[Z];
			n0[i][2] = (*(ctrl_norm+2))[X]; n1[i][2] = (*(ctrl_norm+2))[Y]; n2[i][2] = (*(ctrl_norm+2))[Z];
			n0[i][3] = (*(ctrl_norm+3))[X]; n1[i][3] = (*(ctrl_norm+3))[Y]; n2[i][3] = (*(ctrl_norm+3))[Z];
			ctrl_norm += patch->nu;
		}
	}

	// set the current transformation matrix
	SetCurrMatrix(gProjMat);
	BeginXforms();

	vert = &gVerts[0];
	for (j=0, s=s0; j<=patch->subdivs; j++, s+=ds)
	{
		// calculate the basis curves
		MatMultVec4x4_4(bv[0], m0, basis[j]);
		MatMultVec4x4_4(bv[1], m1, basis[j]);
		MatMultVec4x4_4(bv[2], m2, basis[j]);
		if (gUseCtrlNorms)
		{
			MatMultVec4x4_4(nv[0], n0, basis[j]);
			MatMultVec4x4_4(nv[1], n1, basis[j]);
			MatMultVec4x4_4(nv[2], n2, basis[j]);
		}
		else
		{
			MatMultVec4x4_4(dv_bv[0], m0, deriv_basis[j]);
			MatMultVec4x4_4(dv_bv[1], m1, deriv_basis[j]);
			MatMultVec4x4_4(dv_bv[2], m2, deriv_basis[j]);
		}

		for (i=0, t=t0; i<=patch->subdivs; i++, t+=dt)
		{
			// calculate the surface point from the basis curves
			MatMultVec3x4_4(point, bv, basis[i]);

			// use control normals to get the normal
			if (gUseCtrlNorms)
			{
				MatMultVec3x4_4(norm, nv, basis[i]);
			}
			// use tangent vectors to get the normal
			else
			{
				// calculate the tangent vectors
				MatMultVec3x4_4(tangent_u, bv, deriv_basis[i]);
				MatMultVec3x4_4(tangent_v, dv_bv, basis[i]);

				// compute the normal from the tangent vectors
				CrossProduct(norm, tangent_v, tangent_u);
			}

			FastApproxNormalize(norm);

			// do the lighting calculations
			if (gSpecular)
			{
				DirLightDiffuseAndSpecular(norm, &diffuse, &specular);
				specular *= gLightSpecularFactor;
			}
			else
			{
				diffuse = DirLightDiffuse(norm);
				specular = 0.0f;
			}
			diffuse = gLightAmbientFactor + gLightDiffuseFactor*diffuse;

			// set up the vertex
			XformVertex(vert, &point);

			if (gEnvmap)
			{
//				MatMultVec3x3_2(norm, gObjectCrystalBall.rot_mat, norm);
				vert->vertex.tmuvtx[1].sow = 256.0f*(0.5f + 0.5f*norm[X])*vert->vertex.oow;
				vert->vertex.tmuvtx[1].tow = 256.0f*(0.5f + 0.5f*norm[Y])*vert->vertex.oow;
			}
			vert->vertex.tmuvtx[0].sow = s*vert->vertex.oow;
			vert->vertex.tmuvtx[0].tow = t*vert->vertex.oow;
			vert->vertex.r = diffuse;
			vert->vertex.g = diffuse;
			vert->vertex.b = diffuse;
			vert->vertex.a = specular;

			if (gDrawNormals)
			{
				VertexData v0, v1;

				v0.vertex.x = vert->vertex.x;
				v0.vertex.y = vert->vertex.y;
				v0.vertex.z = vert->vertex.z;
				v0.vertex.oow = vert->vertex.oow;
				v0.vertex.r = 255.0f;
				v0.vertex.g = 255.0f;
				v0.vertex.b = 255.0f;
				v0.vertex.a = 255.0f;
				v0.vertex.tmuvtx[0].sow = 0.0f;
				v0.vertex.tmuvtx[0].tow = 0.0f;
				v0.outcode = vert->outcode;

				point[X] += 0.25f*norm[X];
				point[Y] += 0.25f*norm[Y];
				point[Z] += 0.25f*norm[Z];
				point[W] = 1.0f;

				XformVertex(&v1, &point);
				ClipAndDrawLine(&v0.vertex, &v1.vertex);
			}

			vert++;
		}
	}

	EndXforms();

	if (gFillinGaps)
	{
		VertexData *left_vert, *right_vert, *bottom_vert, *top_vert;

		left_vert = &gVerts[0];
		right_vert = &gVerts[patch->subdivs*(patch->subdivs+1)];
		bottom_vert = &gVerts[0];
		top_vert = &gVerts[patch->subdivs];

		for (i=0; i<=patch->subdivs; i++)
		{
			VertexDataCopy(&patch->left_verts[i], left_vert);
			VertexDataCopy(&patch->right_verts[i], right_vert);
			VertexDataCopy(&patch->bottom_verts[i], bottom_vert);
			VertexDataCopy(&patch->top_verts[i], top_vert);

			left_vert++;
			right_vert++;
			bottom_vert += patch->subdivs+1;
			top_vert += patch->subdivs+1;
		}
	}

	return DrawPatch(patch);
}

inline void SetupForwardDiff(CubicForwardDiff *fd, Vector *pt0, Vector *pt1, Vector *pt2, Vector *pt3, float delta, float delta2, float delta3)
{
	float a, b, c, d;
	float tmp0, tmp1, tmp2;

	/*
	a = (1.0f/6.0f)*((     -(*pt0)[0]) + ( 3.0f*(*pt1)[0]) + (-3.0f*(*pt2)[0]) + ((*pt3)[0]));
	b = (1.0f/6.0f)*(( 3.0f*(*pt0)[0]) + (-6.0f*(*pt1)[0]) + ( 3.0f*(*pt2)[0]));
	c = (1.0f/6.0f)*((-3.0f*(*pt0)[0])                     + ( 3.0f*(*pt2)[0]));
	d = (1.0f/6.0f)*((      (*pt0)[0]) + ( 4.0f*(*pt1)[0]) + (      (*pt2)[0]));
	fd[0].f0_0 = d;
	fd[0].f1_0 = a*delta3 + b*delta2 + c*delta;
	fd[0].f2_0 = a*6.0f*delta3 + b*2.0f*delta2;
	fd[0].f3_0 = a*6.0f*delta3;

	a = (1.0f/6.0f)*((     -(*pt0)[1]) + ( 3.0f*(*pt1)[1]) + (-3.0f*(*pt2)[1]) + ((*pt3)[1]));
	b = (1.0f/6.0f)*(( 3.0f*(*pt0)[1]) + (-6.0f*(*pt1)[1]) + ( 3.0f*(*pt2)[1]));
	c = (1.0f/6.0f)*((-3.0f*(*pt0)[1])                     + ( 3.0f*(*pt2)[1]));
	d = (1.0f/6.0f)*((      (*pt0)[1]) + ( 4.0f*(*pt1)[1]) + (      (*pt2)[1]));
	fd[1].f0_0 = d;
	fd[1].f1_0 = a*delta3 + b*delta2 + c*delta;
	fd[1].f2_0 = a*6.0f*delta3 + b*2.0f*delta2;
	fd[1].f3_0 = a*6.0f*delta3;

	a = (1.0f/6.0f)*((     -(*pt0)[2]) + ( 3.0f*(*pt1)[2]) + (-3.0f*(*pt2)[2]) + ((*pt3)[2]));
	b = (1.0f/6.0f)*(( 3.0f*(*pt0)[2]) + (-6.0f*(*pt1)[2]) + ( 3.0f*(*pt2)[2]));
	c = (1.0f/6.0f)*((-3.0f*(*pt0)[2])                     + ( 3.0f*(*pt2)[2]));
	d = (1.0f/6.0f)*((      (*pt0)[2]) + ( 4.0f*(*pt1)[2]) + (      (*pt2)[2]));
	fd[2].f0_0 = d;
	fd[2].f1_0 = a*delta3 + b*delta2 + c*delta;
	fd[2].f2_0 = a*6.0f*delta3 + b*2.0f*delta2;
	fd[2].f3_0 = a*6.0f*delta3;
	*/

	tmp0 = 3.0f*(*pt0)[X];
	tmp1 = 3.0f*(*pt1)[X];
	tmp2 = 3.0f*(*pt2)[X];
	a = (1.0f/6.0f)*(-(*pt0)[X] + tmp1 - tmp2 + (*pt3)[X]);
	b = (1.0f/6.0f)*(tmp0 - (tmp1 + tmp1) + tmp2);
	c = (1.0f/6.0f)*(-tmp0 + tmp2);
	d = (1.0f/6.0f)*((*pt0)[X] + ((*pt1)[X] + tmp1) + (*pt2)[X]);

	tmp0 = a*delta3;
	tmp1 = b*delta2;
	tmp2 = 6.0f*tmp0;
	fd[X].f0_0 = d;
	fd[X].f1_0 = tmp0 + tmp1 + c*delta;
	fd[X].f2_0 = tmp2 + tmp1 + tmp1;
	fd[X].f3_0 = tmp2;

	tmp0 = 3.0f*(*pt0)[Y];
	tmp1 = 3.0f*(*pt1)[Y];
	tmp2 = 3.0f*(*pt2)[Y];
	a = (1.0f/6.0f)*(-(*pt0)[Y] + tmp1 - tmp2 + (*pt3)[Y]);
	b = (1.0f/6.0f)*(tmp0 - (tmp1 + tmp1) + tmp2);
	c = (1.0f/6.0f)*(-tmp0 + tmp2);
	d = (1.0f/6.0f)*((*pt0)[Y] + ((*pt1)[Y] + tmp1) + (*pt2)[Y]);

	tmp0 = a*delta3;
	tmp1 = b*delta2;
	tmp2 = 6.0f*tmp0;
	fd[Y].f0_0 = d;
	fd[Y].f1_0 = tmp0 + tmp1 + c*delta;
	fd[Y].f2_0 = tmp2 + tmp1 + tmp1;
	fd[Y].f3_0 = tmp2;

	tmp0 = 3.0f*(*pt0)[Z];
	tmp1 = 3.0f*(*pt1)[Z];
	tmp2 = 3.0f*(*pt2)[Z];
	a = (1.0f/6.0f)*(-(*pt0)[Z] + tmp1 - tmp2 + (*pt3)[Z]);
	b = (1.0f/6.0f)*(tmp0 - (tmp1 + tmp1) + tmp2);
	c = (1.0f/6.0f)*(-tmp0 + tmp2);
	d = (1.0f/6.0f)*((*pt0)[Z] + ((*pt1)[Z] + tmp1) + (*pt2)[Z]);

	tmp0 = a*delta3;
	tmp1 = b*delta2;
	tmp2 = 6.0f*tmp0;
	fd[Z].f0_0 = d;
	fd[Z].f1_0 = tmp0 + tmp1 + c*delta;
	fd[Z].f2_0 = tmp2 + tmp1 + tmp1;
	fd[Z].f3_0 = tmp2;
}

/*
#define SetAndUpdateForwardDiffVector(pt, fd)\
{\
	(pt)[0] = (fd)[0].f0_0;\
	(fd)[0].f0_0 += (fd)[0].f1_0;\
	(fd)[0].f1_0 += (fd)[0].f2_0;\
	(fd)[0].f2_0 += (fd)[0].f3_0; \
	(pt)[1] = (fd)[1].f0_0; \
	(fd)[1].f0_0 += (fd)[1].f1_0; \
	(fd)[1].f1_0 += (fd)[1].f2_0; \
	(fd)[1].f2_0 += (fd)[1].f3_0; \
	(pt)[2] = (fd)[2].f0_0; \
	(fd)[2].f0_0 += (fd)[2].f1_0; \
	(fd)[2].f1_0 += (fd)[2].f2_0; \
	(fd)[2].f2_0 += (fd)[2].f3_0; \
}
*/

#define SetAndUpdateForwardDiffVector(pt, fd){(pt)[0] = (fd)[0].f0_0;(fd)[0].f0_0 += (fd)[0].f1_0;(fd)[0].f1_0 += (fd)[0].f2_0;(fd)[0].f2_0 += (fd)[0].f3_0;(pt)[1] = (fd)[1].f0_0;(fd)[1].f0_0 += (fd)[1].f1_0;(fd)[1].f1_0 += (fd)[1].f2_0;(fd)[1].f2_0 += (fd)[1].f3_0;(pt)[2] = (fd)[2].f0_0;(fd)[2].f0_0 += (fd)[2].f1_0;(fd)[2].f1_0 += (fd)[2].f2_0;(fd)[2].f2_0 += (fd)[2].f3_0;}

static int DrawBSplinePatchForwardDiff(BSplinePatch *patch, float s0, float t0, float s1, float t1)
{
	static CubicForwardDiff fwrd_diff_curve_pt[12], fwrd_diff_curve_norm[12], fwrd_diff_pt[3], fwrd_diff_norm[3];
	VertexData *vert;
	int i, j, ctrl_pt_index;
	float ds, s, dt, t;
	float diffuse, specular;
	Vector point, norm, *ctrl_pt, *ctrl_norm;
	float d, d2, d3;
	Vector ctrl_curve_pt[4], ctrl_curve_norm[4];

	ctrl_pt_index = (patch->v-3)*patch->nu + (patch->u-3);

	d = 1.0f/(float)patch->subdivs;
	d2 = d*d;
	d3 = d*d2;

	ctrl_pt = &patch->ctrl_pts[ctrl_pt_index];
	SetupForwardDiff(&fwrd_diff_curve_pt[0], ctrl_pt, ctrl_pt+1, ctrl_pt+2, ctrl_pt+3, d, d2, d3);
	ctrl_pt += patch->nu;
	SetupForwardDiff(&fwrd_diff_curve_pt[3], ctrl_pt, ctrl_pt+1, ctrl_pt+2, ctrl_pt+3, d, d2, d3);
	ctrl_pt += patch->nu;
	SetupForwardDiff(&fwrd_diff_curve_pt[6], ctrl_pt, ctrl_pt+1, ctrl_pt+2, ctrl_pt+3, d, d2, d3);
	ctrl_pt += patch->nu;
	SetupForwardDiff(&fwrd_diff_curve_pt[9], ctrl_pt, ctrl_pt+1, ctrl_pt+2, ctrl_pt+3, d, d2, d3);

	ctrl_norm = &patch->ctrl_norms[ctrl_pt_index];
	SetupForwardDiff(&fwrd_diff_curve_norm[0], ctrl_norm, ctrl_norm+1, ctrl_norm+2, ctrl_norm+3, d, d2, d3);
	ctrl_norm += patch->nu;
	SetupForwardDiff(&fwrd_diff_curve_norm[3], ctrl_norm, ctrl_norm+1, ctrl_norm+2, ctrl_norm+3, d, d2, d3);
	ctrl_norm += patch->nu;
	SetupForwardDiff(&fwrd_diff_curve_norm[6], ctrl_norm, ctrl_norm+1, ctrl_norm+2, ctrl_norm+3, d, d2, d3);
	ctrl_norm += patch->nu;
	SetupForwardDiff(&fwrd_diff_curve_norm[9], ctrl_norm, ctrl_norm+1, ctrl_norm+2, ctrl_norm+3, d, d2, d3);

	ds = (s1 - s0)*d;
	dt = (t1 - t0)*d;

	// set the current transformation matrix
	SetCurrMatrix(gProjMat);
	BeginXforms();

	vert = &gVerts[0];
	for (j=0, s=s0; j<=patch->subdivs; j++, s+=ds)
	{
		SetAndUpdateForwardDiffVector(ctrl_curve_pt[0], &fwrd_diff_curve_pt[0]);
		SetAndUpdateForwardDiffVector(ctrl_curve_pt[1], &fwrd_diff_curve_pt[3]);
		SetAndUpdateForwardDiffVector(ctrl_curve_pt[2], &fwrd_diff_curve_pt[6]);
		SetAndUpdateForwardDiffVector(ctrl_curve_pt[3], &fwrd_diff_curve_pt[9]);
		SetupForwardDiff(&fwrd_diff_pt[0], ctrl_curve_pt, ctrl_curve_pt+1, ctrl_curve_pt+2, ctrl_curve_pt+3, d, d2, d3);

		SetAndUpdateForwardDiffVector(ctrl_curve_norm[0], &fwrd_diff_curve_norm[0]);
		SetAndUpdateForwardDiffVector(ctrl_curve_norm[1], &fwrd_diff_curve_norm[3]);
		SetAndUpdateForwardDiffVector(ctrl_curve_norm[2], &fwrd_diff_curve_norm[6]);
		SetAndUpdateForwardDiffVector(ctrl_curve_norm[3], &fwrd_diff_curve_norm[9]);
		SetupForwardDiff(&fwrd_diff_norm[0], ctrl_curve_norm, ctrl_curve_norm+1, ctrl_curve_norm+2, ctrl_curve_norm+3, d, d2, d3);

		for (i=0, t=t0; i<=patch->subdivs; i++, t+=dt)
		{
			SetAndUpdateForwardDiffVector(point, fwrd_diff_pt);

			SetAndUpdateForwardDiffVector(norm, fwrd_diff_norm);

			FastApproxNormalize(norm);

			// do the lighting calculations
			if (gSpecular)
			{
				DirLightDiffuseAndSpecular(norm, &diffuse, &specular);
				specular *= gLightSpecularFactor;
			}
			else
			{
				diffuse = DirLightDiffuse(norm);
				specular = 0.0f;
			}
			diffuse = gLightAmbientFactor + gLightDiffuseFactor*diffuse;

			// set up the vertex
			XformVertex(vert, &point);

			if (gEnvmap)
			{
//				MatMultVec3x3_2(norm, gObjectCrystalBall.rot_mat, norm);
				vert->vertex.tmuvtx[1].sow = 256.0f*(0.5f + 0.5f*norm[X])*vert->vertex.oow;
				vert->vertex.tmuvtx[1].tow = 256.0f*(0.5f + 0.5f*norm[Y])*vert->vertex.oow;
			}
			vert->vertex.tmuvtx[0].sow = s*vert->vertex.oow;
			vert->vertex.tmuvtx[0].tow = t*vert->vertex.oow;
			vert->vertex.r = diffuse;
			vert->vertex.g = diffuse;
			vert->vertex.b = diffuse;
			vert->vertex.a = specular;

			if (gDrawNormals)
			{
				VertexData v0, v1;

				v0.vertex.x = vert->vertex.x;
				v0.vertex.y = vert->vertex.y;
				v0.vertex.z = vert->vertex.z;
				v0.vertex.oow = vert->vertex.oow;
				v0.vertex.r = 255.0f;
				v0.vertex.g = 255.0f;
				v0.vertex.b = 255.0f;
				v0.vertex.a = 255.0f;
				v0.vertex.tmuvtx[0].sow = 0.0f;
				v0.vertex.tmuvtx[0].tow = 0.0f;
				v0.outcode = vert->outcode;

				point[X] += 0.25f*norm[X];
				point[Y] += 0.25f*norm[Y];
				point[Z] += 0.25f*norm[Z];
				point[W] = 1.0f;

				XformVertex(&v1, &point);
				ClipAndDrawLine(&v0.vertex, &v1.vertex);
			}

			vert++;
		}
	}

	EndXforms();

	if (gFillinGaps)
	{
		VertexData *left_vert, *right_vert, *bottom_vert, *top_vert;

		left_vert = &gVerts[0];
		right_vert = &gVerts[patch->subdivs*(patch->subdivs+1)];
		bottom_vert = &gVerts[0];
		top_vert = &gVerts[patch->subdivs];

		for (i=0; i<=patch->subdivs; i++)
		{
			VertexDataCopy(&patch->left_verts[i], left_vert);
			VertexDataCopy(&patch->right_verts[i], right_vert);
			VertexDataCopy(&patch->bottom_verts[i], bottom_vert);
			VertexDataCopy(&patch->top_verts[i], top_vert);

			left_vert++;
			right_vert++;
			bottom_vert += patch->subdivs+1;
			top_vert += patch->subdivs+1;
		}
	}

	return DrawPatch(patch);
}

#ifdef USE_ASM
__declspec(naked) void VertexDataCopy(VertexData *dest, const VertexData *src)
{
	__asm
	{
		mov			edx, [esp + 4]
		mov			ecx, [esp + 8]

		mov			eax, [ecx + 4*GR_VERTEX_X_OFFSET]

		mov			[edx + 4*GR_VERTEX_X_OFFSET], eax // x
		mov			eax, [ecx + 4*GR_VERTEX_Y_OFFSET]

		mov			[edx + 4*GR_VERTEX_Y_OFFSET], eax // y
		mov			eax, [ecx + 4*GR_VERTEX_OOW_OFFSET]

		mov			[edx + 4*GR_VERTEX_OOW_OFFSET], eax // oow
		mov			eax, [ecx + 4*GR_VERTEX_R_OFFSET]

		mov			[edx + 4*GR_VERTEX_R_OFFSET], eax // r
		mov			eax, [ecx + 4*GR_VERTEX_G_OFFSET]

		mov			[edx + 4*GR_VERTEX_G_OFFSET], eax // g
		mov			eax, [ecx + 4*GR_VERTEX_B_OFFSET]

		mov			[edx + 4*GR_VERTEX_B_OFFSET], eax // b
		mov			eax, [ecx + 4*GR_VERTEX_A_OFFSET]

		mov			[edx + 4*GR_VERTEX_A_OFFSET], eax // a
		mov			eax, [ecx + 4*GR_VERTEX_S0_OFFSET]

		mov			[edx + 4*GR_VERTEX_S0_OFFSET], eax // s0
		mov			eax, [ecx + 4*GR_VERTEX_T0_OFFSET]

		mov			[edx + 4*GR_VERTEX_T0_OFFSET], eax // t0
		mov			eax, [ecx + 4*GR_VERTEX_S1_OFFSET]

		mov			[edx + 4*GR_VERTEX_S1_OFFSET], eax // s1
		mov			eax, [ecx + 4*GR_VERTEX_T1_OFFSET]

		mov			[edx + 4*GR_VERTEX_T1_OFFSET], eax // t1
		mov			eax, (VertexData)[ecx].outcode

		mov			(VertexData)[edx].outcode, eax // outcode

		ret
	}
}
#else // USE_ASM
void VertexDataCopy(VertexData *dest, const VertexData *src)
{
	dest->vertex.x = src->vertex.x;
	dest->vertex.y = src->vertex.y;
	dest->vertex.oow = src->vertex.oow;
	dest->vertex.r = src->vertex.r;
	dest->vertex.g = src->vertex.g;
	dest->vertex.b = src->vertex.b;
	dest->vertex.a = src->vertex.a;
	dest->vertex.tmuvtx[0].sow = src->vertex.tmuvtx[0].sow;
	dest->vertex.tmuvtx[0].tow = src->vertex.tmuvtx[0].tow;
	dest->vertex.tmuvtx[1].sow = src->vertex.tmuvtx[1].sow;
	dest->vertex.tmuvtx[1].tow = src->vertex.tmuvtx[1].tow;
	dest->outcode = src->outcode;
}
#endif // USE_ASM
