/* display.c, X11 interface                                                 */

/* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */

/*
 * Disclaimer of Warranty
 *
 * These software programs are available to the user without any license fee or
 * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
 * any and all warranties, whether express, implied, or statuary, including any
 * implied warranties or merchantability or of fitness for a particular
 * purpose.  In no event shall the copyright-holder be liable for any
 * incidental, punitive, or consequential damages of any kind whatsoever
 * arising from the use of these programs.
 *
 * This disclaimer of warranty extends to the user of these programs and user's
 * customers, employees, agents, transferees, successors, and assigns.
 *
 * The MPEG Software Simulation Group does not represent or warrant that the
 * programs furnished hereunder are free of infringement of any third-party
 * patents.
 *
 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 * are subject to royalty fees to patent holders.  Many of these patents are
 * general enough such that they are unavoidable regardless of implementation
 * design.
 *
 */

#ifdef DISPLAY

 /* the Xlib interface is closely modeled after
  * mpeg_play 2.0 by the Berkeley Plateau Research Group
  */

#include <stdio.h>
#include <stdlib.h>

#include <X11/Xlib.h>
#include <X11/Xutil.h>

#include "config.h"
#include "global.h"

/* private prototypes */
static void display_image _ANSI_ARGS_((XImage *ximage, unsigned char *dithered_image));
static void ditherframe _ANSI_ARGS_((unsigned char *src[]));
static void dithertop _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbot _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void dithertop420 _ANSI_ARGS_((unsigned char *src[],
                                      unsigned char *dst));
static void ditherbot420 _ANSI_ARGS_((unsigned char *src[],
                                      unsigned char *dst));

/* local data */
static unsigned char *dithered_image, *dithered_image2;

static unsigned char ytab[256+16];
static unsigned char utab[128+16];
static unsigned char vtab[128+16];

/* X11 related variables */
static Display *display;
static Window window;
static GC gc;
static XImage *ximage, *ximage2;
static unsigned char pixel[256];

#ifdef SH_MEM

#include <sys/ipc.h>
#include <sys/shm.h>
#include <X11/extensions/XShm.h>

static int HandleXError _ANSI_ARGS_((Display *dpy, XErrorEvent *event));
static void InstallXErrorHandler _ANSI_ARGS_((void));
static void DeInstallXErrorHandler _ANSI_ARGS_((void));

static int shmem_flag;
static XShmSegmentInfo shminfo1, shminfo2;
static int gXErrorFlag;
static int CompletionType = -1;

static int HandleXError(dpy, event)
Display *dpy;
XErrorEvent *event;
{
  gXErrorFlag = 1;

  return 0;
}

static void InstallXErrorHandler()
{
  XSetErrorHandler(HandleXError);
  XFlush(display);
}

static void DeInstallXErrorHandler()
{
  XSetErrorHandler(NULL);
  XFlush(display);
}

#endif

/* connect to server, create and map window,
 * allocate colors and (shared) memory
 */
void init_display(name)
char *name;
{
  int crv, cbu, cgu, cgv;
  int y, u, v, r, g, b;
  int i;
  char dummy;
  int screen;
  Colormap cmap;
  int private;
  XColor xcolor;
  unsigned int fg, bg;
  char *hello = "MPEG-2 Display";
  XSizeHints hint;
  XVisualInfo vinfo;
  XEvent xev;
  unsigned long tmp_pixel;
  XWindowAttributes xwa;

  display = XOpenDisplay(name);

  if (display == NULL)
    error("Can not open display\n");

  screen = DefaultScreen(display);

  hint.x = 200;
  hint.y = 200;
  hint.width = horizontal_size;
  hint.height = vertical_size;
  hint.flags = PPosition | PSize;

  /* Get some colors */

  bg = WhitePixel (display, screen);
  fg = BlackPixel (display, screen);

  /* Make the window */

  if (!XMatchVisualInfo(display, screen, 8, PseudoColor, &vinfo))
  {
    if (!XMatchVisualInfo(display, screen, 8, GrayScale, &vinfo))
      error("requires 8 bit display\n");
  }

  window = XCreateSimpleWindow (display, DefaultRootWindow (display),
             hint.x, hint.y, hint.width, hint.height, 4, fg, bg);

  XSelectInput(display, window, StructureNotifyMask);

  /* Tell other applications about this window */

  XSetStandardProperties (display, window, hello, hello, None, NULL, 0, &hint);

  /* Map window. */

  XMapWindow(display, window);

  /* Wait for map. */
  do
  {
    XNextEvent(display, &xev);
  }
  while (xev.type != MapNotify || xev.xmap.event != window);

  XSelectInput(display, window, NoEventMask);

  /* matrix coefficients */
  crv = convmat[matrix_coefficients][0];
  cbu = convmat[matrix_coefficients][1];
  cgu = convmat[matrix_coefficients][2];
  cgv = convmat[matrix_coefficients][3];

  /* allocate colors */

  gc = DefaultGC(display, screen);
  cmap = DefaultColormap(display, screen);
  private = 0;

  /* color allocation:
   * i is the (internal) 8 bit color number, it consists of separate
   * bit fields for Y, U and V: i = (yyyyuuvv), we don't use yyyy=0000
   * and yyyy=1111, this leaves 32 colors for other applications
   *
   * the allocated colors correspond to the following Y, U and V values:
   * Y:   24, 40, 56, 72, 88, 104, 120, 136, 152, 168, 184, 200, 216, 232
   * U,V: -48, -16, 16, 48
   *
   * U and V values span only about half the color space; this gives
   * usually much better quality, although highly saturated colors can
   * not be displayed properly
   *
   * translation to R,G,B is implicitly done by the color look-up table
   */
  for (i=16; i<240; i++)
  {
    /* color space conversion */
    y = 16*((i>>4)&15) + 8;
    u = 32*((i>>2)&3)  - 48;
    v = 32*(i&3)       - 48;

    y = 76309 * (y - 16); /* (255/219)*65536 */

    r = clp[(y + crv*v + 32768)>>16];
    g = clp[(y - cgu*u -cgv*v + 32768)>>16];
    b = clp[(y + cbu*u + 32786)>>16];

    /* X11 colors are 16 bit */
    xcolor.red   = r << 8;
    xcolor.green = g << 8;
    xcolor.blue  = b << 8;

    if (XAllocColor(display, cmap, &xcolor) != 0)
      pixel[i] = xcolor.pixel;
    else
    {
      /* allocation failed, have to use a private colormap */

      if (private)
        error("Couldn't allocate private colormap");

      private = 1;

      if (!quiet)
        fprintf(stderr, "Using private colormap (%d colors were available).\n",
          i-16);

      /* Free colors. */
      while (--i >= 16)
      {
        tmp_pixel = pixel[i]; /* because XFreeColors expects unsigned long */
        XFreeColors(display, cmap, &tmp_pixel, 1, 0);
      }

      /* i is now 15, this restarts the outer loop */

      /* create private colormap */

      XGetWindowAttributes(display, window, &xwa);
      cmap = XCreateColormap(display, window, xwa.visual, AllocNone);
      XSetWindowColormap(display, window, cmap);
    }
  }

#ifdef SH_MEM
  if (XShmQueryExtension(display))
    shmem_flag = 1;
  else
  {
    shmem_flag = 0;
    if (!quiet)
      fprintf(stderr, "Shared memory not supported\nReverting to normal Xlib\n");
  }

  if (shmem_flag)
    CompletionType = XShmGetEventBase(display) + ShmCompletion;

  InstallXErrorHandler();

  if (shmem_flag)
  {

    ximage = XShmCreateImage(display, None, 8, ZPixmap, NULL,
                             &shminfo1,
                             coded_picture_width, coded_picture_height);

    if (!prog_seq)
      ximage2 = XShmCreateImage(display, None, 8, ZPixmap, NULL,
                                &shminfo2,
                                coded_picture_width, coded_picture_height);

    /* If no go, then revert to normal Xlib calls. */

    if (ximage==NULL || (!prog_seq && ximage2==NULL))
    {
      if (ximage!=NULL)
        XDestroyImage(ximage);
      if (!prog_seq && ximage2!=NULL)
        XDestroyImage(ximage2);
      if (!quiet)
        fprintf(stderr, "Shared memory error, disabling (Ximage error)\n");
      goto shmemerror;
    }

    /* Success here, continue. */

    shminfo1.shmid = shmget(IPC_PRIVATE, 
                            ximage->bytes_per_line * ximage->height,
                            IPC_CREAT | 0777);
    if (!prog_seq)
      shminfo2.shmid = shmget(IPC_PRIVATE, 
                              ximage2->bytes_per_line * ximage2->height,
                              IPC_CREAT | 0777);

    if (shminfo1.shmid<0 || (!prog_seq && shminfo2.shmid<0))
    {
      XDestroyImage(ximage);
      if (!prog_seq)
        XDestroyImage(ximage2);
      if (!quiet)
        fprintf(stderr, "Shared memory error, disabling (seg id error)\n");
      goto shmemerror;
    }

    shminfo1.shmaddr = (char *) shmat(shminfo1.shmid, 0, 0);
    shminfo2.shmaddr = (char *) shmat(shminfo2.shmid, 0, 0);

    if (shminfo1.shmaddr==((char *) -1) ||
        (!prog_seq && shminfo2.shmaddr==((char *) -1)))
    {
      XDestroyImage(ximage);
      if (shminfo1.shmaddr!=((char *) -1))
        shmdt(shminfo1.shmaddr);
      if (!prog_seq)
      {
        XDestroyImage(ximage2);
        if (shminfo2.shmaddr!=((char *) -1))
          shmdt(shminfo2.shmaddr);
      }
      if (!quiet)
      {
        fprintf(stderr, "Shared memory error, disabling (address error)\n");
      }
      goto shmemerror;
    }

    ximage->data = shminfo1.shmaddr;
    dithered_image = (unsigned char *)ximage->data;
    shminfo1.readOnly = False;
    XShmAttach(display, &shminfo1);
    if (!prog_seq)
    {
      ximage2->data = shminfo2.shmaddr;
      dithered_image2 = (unsigned char *)ximage2->data;
      shminfo2.readOnly = False;
      XShmAttach(display, &shminfo2);
    }

    XSync(display, False);

    if (gXErrorFlag)
    {
      /* Ultimate failure here. */
      XDestroyImage(ximage);
      shmdt(shminfo1.shmaddr);
      if (!prog_seq)
      {
        XDestroyImage(ximage2);
        shmdt(shminfo2.shmaddr);
      }
      if (!quiet)
        fprintf(stderr, "Shared memory error, disabling.\n");
      gXErrorFlag = 0;
      goto shmemerror;
    }
    else
    {
      shmctl(shminfo1.shmid, IPC_RMID, 0);
      if (!prog_seq)
        shmctl(shminfo2.shmid, IPC_RMID, 0);
    }

    if (!quiet)
    {
      fprintf(stderr, "Sharing memory.\n");
    }
  }
  else
  {
shmemerror:
    shmem_flag = 0;
#endif

    ximage = XCreateImage(display,None,8,ZPixmap,0,&dummy,
                          coded_picture_width,coded_picture_height,8,0);

    if (!(dithered_image = (unsigned char *)malloc(coded_picture_width*
                                                   coded_picture_height)))
      error("malloc failed");

    if (!prog_seq)
    {
      ximage2 = XCreateImage(display,None,8,ZPixmap,0,&dummy,
                             coded_picture_width,coded_picture_height,8,0);

      if (!(dithered_image2 = (unsigned char *)malloc(coded_picture_width*
                                                      coded_picture_height)))
        error("malloc failed");
    }

#ifdef SH_MEM
  }

  DeInstallXErrorHandler();
#endif
}

void exit_display()
{
#ifdef SH_MEM
  if (shmem_flag)
  {
    XShmDetach(display, &shminfo1);
    XDestroyImage(ximage);
    shmdt(shminfo1.shmaddr);
    if (!prog_seq)
    {
      XShmDetach(display, &shminfo2);
      XDestroyImage(ximage2);
      shmdt(shminfo2.shmaddr);
    }
  }
#endif
}

static void display_image(ximage,dithered_image)
XImage *ximage;
unsigned char *dithered_image;
{
  /* display dithered image */
#ifdef SH_MEM
  if (shmem_flag)
  {
    XShmPutImage(display, window, gc, ximage, 
       	         0, 0, 0, 0, ximage->width, ximage->height, True);
    XFlush(display);
      
    while (1)
    {
      XEvent xev;
	
      XNextEvent(display, &xev);
      if (xev.type == CompletionType)
        break;
    }
  }
  else 
#endif
  {
    ximage->data = (char *) dithered_image; 
    XPutImage(display, window, gc, ximage, 0, 0, 0, 0, ximage->width, ximage->height);
  }
}

void display_second_field()
{
  display_image(ximage2,dithered_image2);
}

/* 4x4 ordered dither
 *
 * threshold pattern:
 *   0  8  2 10
 *  12  4 14  6
 *   3 11  1  9
 *  15  7 13  5
 */

void init_dither()
{
  int i, v;

  for (i=-8; i<256+8; i++)
  {
    v = i>>4;
    if (v<1)
      v = 1;
    else if (v>14)
      v = 14;
    ytab[i+8] = v<<4;
  }

  for (i=0; i<128+16; i++)
  {
    v = (i-40)>>4;
    if (v<0)
      v = 0;
    else if (v>3)
      v = 3;
    utab[i] = v<<2;
    vtab[i] = v;
  }
}

void dither(src)
unsigned char *src[];
{
  if (prog_seq)
    ditherframe(src);
  else
  {
    if ((pict_struct==FRAME_PICTURE && topfirst) || pict_struct==BOTTOM_FIELD)
    {
      /* top field first */
      if (chroma_format==CHROMA420 && hiQdither)
      {
        dithertop420(src,dithered_image);
        ditherbot420(src,dithered_image2);
      }
      else
      {
        dithertop(src,dithered_image);
        ditherbot(src,dithered_image2);
      }
    }
    else
    {
      /* bottom field first */
      if (chroma_format==CHROMA420 && hiQdither)
      {
        ditherbot420(src,dithered_image);
        dithertop420(src,dithered_image2);
      }
      else
      {
        ditherbot(src,dithered_image);
        dithertop(src,dithered_image2);
      }
    }
  }

  display_image(ximage,dithered_image);
}

static void ditherframe(src)
unsigned char *src[];
{
  int i,j;
  int y,u,v;
  unsigned char *py,*pu,*pv,*dst;

  py = src[0];
  pu = src[1];
  pv = src[2];
  dst = dithered_image;

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y]|utab[u]|vtab[v]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+2]|utab[u+2]|vtab[v+2]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+10]|utab[u+10]|vtab[v+10]];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }

    /* line j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+12]|utab[u+12]|vtab[v+12]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+4]|utab[u+4]|vtab[v+4]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+14]|utab[u+14]|vtab[v+14]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+6]|utab[u+6]|vtab[v+6]];
    }

    /* line j + 2 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+3]|utab[u+3]|vtab[v+3]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+11]|utab[u+11]|vtab[v+11]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+1]|utab[u+1]|vtab[v+1]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+9]|utab[u+9]|vtab[v+9]];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }

    /* line j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+15]|utab[u+15]|vtab[v+15]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+7]|utab[u+7]|vtab[v+7]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+13]|utab[u+13]|vtab[v+13]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+5]|utab[u+5]|vtab[v+5]];
    }
  }

}

static void dithertop(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y,y2,u,v;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0];
  py2 = src[0] + (coded_picture_width<<1);
  pu = src[1];
  pv = src[2];
  dst2 = dst + coded_picture_width;

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y]|utab[u]|vtab[v]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+12]|utab[u+12]|vtab[v+12]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+4]|utab[u+4]|vtab[v+4]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+2]|utab[u+2]|vtab[v+2]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+14]|utab[u+14]|vtab[v+14]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+10]|utab[u+10]|vtab[v+10]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+6]|utab[u+6]|vtab[v+6]];
    }

    py += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

    dst += coded_picture_width;
    dst2 += coded_picture_width;

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+3]|utab[u+3]|vtab[v+3]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+15]|utab[u+15]|vtab[v+15]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+11]|utab[u+11]|vtab[v+11]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+7]|utab[u+7]|vtab[v+7]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+1]|utab[u+1]|vtab[v+1]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+13]|utab[u+13]|vtab[v+13]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+9]|utab[u+9]|vtab[v+9]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+5]|utab[u+5]|vtab[v+5]];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;
    pu += chrom_width;
    pv += chrom_width;
  }
}

static void ditherbot(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y,y2,u,v;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0] + coded_picture_width;
  py2 = py;
  pu = src[1] + chrom_width;
  pv = src[2] + chrom_width;
  dst2 = dst + coded_picture_width;

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)]|utab[u]|vtab[v]];
      *dst2++ = pixel[ytab[y2+12]|utab[u+12]|vtab[v+12]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+8]|utab[u+8]|vtab[v+8]];
      *dst2++ = pixel[ytab[y2+4]|utab[u+4]|vtab[v+4]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+2]|utab[u+2]|vtab[v+2]];
      *dst2++ = pixel[ytab[y2+14]|utab[u+14]|vtab[v+14]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+10]|utab[u+10]|vtab[v+10]];
      *dst2++ = pixel[ytab[y2+6]|utab[u+6]|vtab[v+6]];
    }

    if (j==0)
      py -= coded_picture_width;
    else
      py += coded_picture_width;

    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2. j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+3]|utab[u+3]|vtab[v+3]];
      *dst2++ = pixel[ytab[y2+15]|utab[u+15]|vtab[v+15]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+11]|utab[u+11]|vtab[v+11]];
      *dst2++ = pixel[ytab[y2+7]|utab[u+7]|vtab[v+7]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+1]|utab[u+1]|vtab[v+1]];
      *dst2++ = pixel[ytab[y2+13]|utab[u+13]|vtab[v+13]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+9]|utab[u+9]|vtab[v+9]];
      *dst2++ = pixel[ytab[y2+5]|utab[u+5]|vtab[v+5]];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
    dst += coded_picture_width;
    dst2 += coded_picture_width;
    pu += chrom_width;
    pv += chrom_width;
  }
}

static void dithertop420(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y1,u1,v1,y2,u2,v2;
  unsigned char *py1,*pu1,*pv1,*py2,*pu2,*pv2,*dst2;

  py1 = src[0];
  pu1 = src[1];
  pv1 = src[2];

  py2 = py1 + (coded_picture_width<<1);
  pu2 = pu1 + (chrom_width<<1);
  pv2 = pv1 + (chrom_width<<1);

  dst2 = dst + coded_picture_width;

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)]|utab[u1]|vtab[v1]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+12]|utab[((3*u1+u2)>>2)+12]
                                             |vtab[((3*v1+v2)>>2)+12]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+8]|utab[u1+8]|vtab[v1+8]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+4]|utab[((3*u1+u2)>>2)+4]
                                            |vtab[((3*v1+v2)>>2)+4]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+2]|utab[u1+2]|vtab[v1+2]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+14]|utab[((3*u1+u2)>>2)+14]
                                             |vtab[((3*v1+v2)>>2)+14]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+10]|utab[u1+10]|vtab[v1+10]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+6]|utab[((3*u1+u2)>>2)+6]
                                            |vtab[((3*v1+v2)>>2)+6]];
    }

    py1 += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

    pu1 -= chrom_width;
    pv1 -= chrom_width;
    pu2 -= chrom_width;
    pv2 -= chrom_width;

    dst  += coded_picture_width;
    dst2 += coded_picture_width;

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+3]|utab[((u1+u2)>>1)+3]
                                            |vtab[((v1+v2)>>1)+3]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+15]|utab[((u1+3*u2)>>2)+15]
                                             |vtab[((v1+3*v2)>>2)+15]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+11]|utab[((u1+u2)>>1)+11]
                                             |vtab[((v1+v2)>>1)+11]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+7]|utab[((u1+3*u2)>>2)+7]
                                            |vtab[((v1+3*v2)>>2)+7]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+1]|utab[((u1+u2)>>1)+1]
                                            |vtab[((v1+v2)>>1)+1]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+13]|utab[((u1+3*u2)>>2)+13]
                                             |vtab[((v1+3*v2)>>2)+13]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+9]|utab[((u1+u2)>>1)+9]
                                            |vtab[((v1+v2)>>1)+9]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+5]|utab[((u1+3*u2)>>2)+5]
                                            |vtab[((v1+3*v2)>>2)+5]];
    }

    py1 += coded_picture_width;
    py2 += coded_picture_width;
    pu1 += chrom_width;
    pv1 += chrom_width;
    if (j!=(coded_picture_height-8))
    {
      pu2 += chrom_width;
      pv2 += chrom_width;
    }
    else
    {
      pu2 -= chrom_width;
      pv2 -= chrom_width;
    }
    dst += coded_picture_width;
    dst2+= coded_picture_width;
  }
}

static void ditherbot420(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y1,u1,v1,y2,u2,v2;
  unsigned char *py1,*pu1,*pv1,*py2,*pu2,*pv2,*dst2;

  py2 = py1 = src[0] + coded_picture_width;
  pu2 = pu1 = src[1] + chrom_width;
  pv2 = pv1 = src[2] + chrom_width;

  dst2 = dst;

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+15]|utab[((3*u1+u2)>>2)+15]
                                             |vtab[((3*v1+v2)>>2)+15]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)]|utab[((u1+u2)>>1)]
                                          |vtab[((v1+v2)>>1)]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+7]|utab[((3*u1+u2)>>2)+7]
                                            |vtab[((3*v1+v2)>>2)+7]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+8]|utab[((u1+u2)>>1)+8]
                                            |vtab[((v1+v2)>>1)+8]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+13]|utab[((3*u1+u2)>>2)+13]
                                             |vtab[((3*v1+v2)>>2)+13]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+2]|utab[((u1+u2)>>1)+2]
                                            |vtab[((v1+v2)>>1)+2]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+5]|utab[((3*u1+u2)>>2)+5]
                                            |vtab[((3*v1+v2)>>2)+5]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+10]|utab[((u1+u2)>>1)+10]
                                             |vtab[((v1+v2)>>1)+10]];
    }

    if (j!=0)
      py1 += coded_picture_width;
    else
      py1 -= coded_picture_width;

    py2 += coded_picture_width;

    pu1 -= chrom_width;
    pv1 -= chrom_width;
    pu2 -= chrom_width;
    pv2 -= chrom_width;

    if (j!=0)
      dst  += coded_picture_width;

    dst2 += coded_picture_width;

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+12]|utab[((u1+3*u2)>>2)+12]
                                             |vtab[((v1+3*v2)>>2)+12]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+3]|utab[u2+3]
                                            |vtab[v2+3]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+4]|utab[((u1+3*u2)>>2)+4]
                                            |vtab[((v1+3*v2)>>2)+4]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+11]|utab[u2+11]
                                             |vtab[v2+11]];

      y1 = *py1++;
      y2 = *py2++;
      u1 = *pu1++ >> 1;
      v1 = *pv1++ >> 1;
      u2 = *pu2++ >> 1;
      v2 = *pv2++ >> 1;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+14]|utab[((u1+3*u2)>>2)+14]
                                             |vtab[((v1+3*v2)>>2)+14]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+1]|utab[u2+1]
                                            |vtab[v2+1]];

      y1 = *py1++;
      y2 = *py2++;
      *dst++  = pixel[ytab[((3*y1+y2)>>2)+6]|utab[((u1+3*u2)>>2)+6]
                                            |vtab[((v1+3*v2)>>2)+6]];
      *dst2++ = pixel[ytab[((y1+3*y2)>>2)+9]|utab[u2+9]
                                            |vtab[v2+9]];
    }

    py1 += coded_picture_width;
    py2 += coded_picture_width;

    if (j!=0)
    {
      pu1 += chrom_width;
      pv1 += chrom_width;
    }
    else
    {
      pu1 -= chrom_width;
      pv1 -= chrom_width;
    }

    pu2 += chrom_width;
    pv2 += chrom_width;

    dst += coded_picture_width;
    dst2+= coded_picture_width;
  }

  py2 -= (coded_picture_width<<1);
  pu2 -= (chrom_width<<1);
  pv2 -= (chrom_width<<1);

  /* dither last line */
  for (i=0; i<coded_picture_width; i+=4)
  {
    y1 = *py1++;
    y2 = *py2++;
    u1 = *pu1++ >> 1;
    v1 = *pv1++ >> 1;
    u2 = *pu2++ >> 1;
    v2 = *pv2++ >> 1;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+15]|utab[((3*u1+u2)>>2)+15]
                                           |vtab[((3*v1+v2)>>2)+15]];

    y1 = *py1++;
    y2 = *py2++;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+7]|utab[((3*u1+u2)>>2)+7]
                                          |vtab[((3*v1+v2)>>2)+7]];

    y1 = *py1++;
    y2 = *py2++;
    u1 = *pu1++ >> 1;
    v1 = *pv1++ >> 1;
    u2 = *pu2++ >> 1;
    v2 = *pv2++ >> 1;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+13]|utab[((3*u1+u2)>>2)+13]
                                           |vtab[((3*v1+v2)>>2)+13]];

    y1 = *py1++;
    y2 = *py2++;
    *dst++  = pixel[ytab[((3*y1+y2)>>2)+5]|utab[((3*u1+u2)>>2)+5]
                                          |vtab[((3*v1+v2)>>2)+5]];
    }

}
#endif
