[Xquartz-changes] mesa: Branch 'PR-575' - 21 commits

Thu May 10 18:58:51 PDT 2012

configs/default                                        |    2 
 src/egl/main/eglimage.c                                |    2 
 src/gallium/auxiliary/util/u_double_list.h             |   15 +-
 src/gallium/auxiliary/util/u_linkage.h                 |    9 -
 src/gallium/drivers/nvfx/nvfx_fragprog.c               |    3 
 src/gallium/drivers/r300/compiler/radeon_program_alu.c |   73 ++++++----
 src/gallium/drivers/r300/r300_emit.c                   |   24 +--
 src/gallium/drivers/r300/r300_state.c                  |    5 
 src/gallium/state_trackers/vega/text.c                 |    4 
 src/gallium/targets/egl-static/egl_st.c                |    3 
 src/glx/apple/Makefile                                 |    1 
 src/glx/apple/apple_glx.c                              |   25 ---
 src/glx/apple/apple_glx.h                              |    3 
 src/glx/apple/apple_glx_context.c                      |    2 
 src/glx/apple/apple_glx_drawable.c                     |   29 ++--
 src/glx/apple/apple_glx_log.c                          |  118 +++++++++++++++++
 src/glx/apple/apple_glx_log.h                          |   57 ++++++++
 src/glx/apple/apple_glx_surface.c                      |    8 -
 src/mesa/drivers/dri/i965/brw_eu.h                     |    1 
 src/mesa/drivers/dri/i965/brw_eu_emit.c                |   53 ++++++-
 src/mesa/drivers/dri/i965/brw_fs.h                     |   22 +++
 src/mesa/drivers/dri/i965/brw_fs_emit.cpp              |   55 +++++++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp           |    5 
 src/mesa/drivers/dri/i965/brw_tex_layout.c             |    5 
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c       |   38 +++++
 src/mesa/drivers/dri/intel/intel_fbo.c                 |   56 --------
 src/mesa/drivers/dri/intel/intel_fbo.h                 |    3 
 src/mesa/drivers/dri/intel/intel_tex_obj.h             |    1 
 src/mesa/drivers/dri/intel/intel_tex_validate.c        |   10 -
 src/mesa/drivers/windows/gdi/wmesa.c                   |   97 ++-----------
 30 files changed, 482 insertions(+), 247 deletions(-)

New commits:
commit 4e5badfbf6b82223d8cdfb9a96cdae3f6792bb5e
Author: Jeremy Huddleston <jeremyhu at apple.com>
Date:   Thu May 10 18:56:50 2012 -0700

    darwin: Unlock our mutex before destroying it
    
    http://xquartz.macosforge.org/trac/ticket/575
    
    Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>

diff --git a/src/glx/apple/apple_glx_drawable.c b/src/glx/apple/apple_glx_drawable.c
index 3f84d56..b261a55 100644
--- a/src/glx/apple/apple_glx_drawable.c
+++ b/src/glx/apple/apple_glx_drawable.c
@@ -174,6 +174,9 @@ destroy_drawable(struct apple_glx_drawable *d)
 
    apple_glx_diagnostic("%s: freeing %p\n", __func__, (void *) d);
 
+   /* Stupid recursive locks */
+   while (pthread_mutex_unlock(&d->mutex) == 0);
+
    err = pthread_mutex_destroy(&d->mutex);
    if (err) {
       fprintf(stderr, "pthread_mutex_destroy error: %s\n", strerror(err));
commit 17b600182e6f0a178a809281f64fe8ba4cceec71
Author: Jeremy Huddleston <jeremyhu at apple.com>
Date:   Fri Apr 27 18:36:33 2012 -0700

    darwin: Eliminate a possible race condition while destroying a surface
    
    Introduced by: c60ffd2840036af1ea6f2b6c6e1e9014bb8e2c34
    Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>

diff --git a/src/glx/apple/apple_glx_surface.c b/src/glx/apple/apple_glx_surface.c
index d42fa3b..9155202 100644
--- a/src/glx/apple/apple_glx_surface.c
+++ b/src/glx/apple/apple_glx_surface.c
@@ -207,9 +207,6 @@ apple_glx_surface_destroy(unsigned int uid)
       d->types.surface.pending_destroy = true;
       d->release(d);
 
-      /* apple_glx_drawable_find_by_uid returns a locked drawable */
-      d->unlock(d);
-
       /* 
        * We release 2 references to the surface.  One was acquired by
        * the find, and the other was leftover from a context, or 
@@ -220,6 +217,9 @@ apple_glx_surface_destroy(unsigned int uid)
        * to actually destroy it when the pending_destroy is processed
        * by a glViewport callback (see apple_glx_context_update()).
        */
-      d->destroy(d);
+      if (!d->destroy(d)) {
+          /* apple_glx_drawable_find_by_uid returns a locked drawable */
+          d->unlock(d);
+      }
    }
 }
commit fa68a8bae3961808288cfd84d5a7843f6fc0f317
Author: Yuanhan Liu <yuanhan.liu at linux.intel.com>
Date:   Wed May 2 17:29:11 2012 +0800

    i965: fix wrong cube/3D texture layout
    
    Fix wrong cube/3D texture layout for the tailing levels whose width or
    height is smaller than the align unit.
    
    From 965 B-spec http://intellinuxgraphics.org/VOL_1_graphics_core.pdf at
    page 135:
       All of the LOD=0 q-planes are stacked vertically, then below that,
       the LOD=1 qplanes are stacked two-wide, then the LOD=2 qplanes are
       stacked four-wide below that, and so on.
    
    Thus we should always inrease pack_x_nr, which results to the pitch of LODn
    may greater than the pitch of LOD0. So we should refactor mt->total_width
    when needed.
    
    This would fix the following webgl test case on all gen4 platforms:
      conformance/textures/texture-size-cube-maps.html
    
    NOTE: This is a candidate for stable release branches.
    
    Signed-off-by: Yuanhan Liu <yuanhan.liu at linux.intel.com>
    (cherry picked from commit f939776cb2372a3427784f88d34bf14c18a5a212)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 7a1b91f..8bf1d3d 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -115,6 +115,8 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
 	       intel_miptree_set_image_offset(mt, level, q, x, y);
 	       x += pack_x_pitch;
 	    }
+            if (x > mt->total_width)
+               mt->total_width = x;
 
 	    x = 0;
 	    y += pack_y_pitch;
@@ -135,10 +137,9 @@ brw_miptree_layout(struct intel_context *intel, struct intel_mipmap_tree *mt)
 	       pack_x_nr <<= 1;
 	    }
 	 } else {
+            pack_x_nr <<= 1;
 	    if (pack_x_pitch > 4) {
 	       pack_x_pitch >>= 1;
-	       pack_x_nr <<= 1;
-	       assert(pack_x_pitch * pack_x_nr <= mt->total_width);
 	    }
 
 	    if (pack_y_pitch > 2) {
commit 064c324d8d1e254e76e9a6d2681432063d80eff9
Author: Brian Paul <brianp at vmware.com>
Date:   Mon May 7 07:29:34 2012 -0600

    mesa: bump version to 8.0.2 in configs/default

diff --git a/configs/default b/configs/default
index 2ca6fe4..a4069cb 100644
--- a/configs/default
+++ b/configs/default
@@ -10,7 +10,7 @@ CONFIG_NAME = default
 # Version info
 MESA_MAJOR=8
 MESA_MINOR=0
-MESA_TINY=0
+MESA_TINY=2
 MESA_VERSION = $(MESA_MAJOR).$(MESA_MINOR).$(MESA_TINY)
 
 # external projects.  This should be useless now that we use libdrm.
commit 8700db8c876c1208a302560c0737c2cbde8b9c77
Author: Brian Paul <brianp at vmware.com>
Date:   Sun Mar 11 18:31:32 2012 -0600

    mesa/gdi: remove clear_color() function
    
    Setup the clearing color in the clear() function.
    
    Reviewed-by: Dave Airlie <airlied at redhat.com>
    (cherry picked from commit 9c53fc593e9d8c735e492ace688c76fcbb0fc687)

diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
index ba11998..93da05f 100644
--- a/src/mesa/drivers/windows/gdi/wmesa.c
+++ b/src/mesa/drivers/windows/gdi/wmesa.c
@@ -243,39 +243,9 @@ static void wmesa_flush(struct gl_context *ctx)
 /*****                   CLEAR Functions                          *****/
 /**********************************************************************/
 
-/* If we do not implement these, Mesa clears the buffers via the pixel
- * span writing interface, which is very slow for a clear operation.
- */
-
-/*
- * Set the color used to clear the color buffer.
- */
-static void clear_color(struct gl_context *ctx,
-                        const union gl_color_union color)
-{
-    WMesaContext pwc = wmesa_context(ctx);
-    GLubyte col[3];
-
-    UNCLAMPED_FLOAT_TO_UBYTE(col[0], color.f[0]);
-    UNCLAMPED_FLOAT_TO_UBYTE(col[1], color.f[1]);
-    UNCLAMPED_FLOAT_TO_UBYTE(col[2], color.f[2]);
-    pwc->clearColorRef = RGB(col[0], col[1], col[2]);
-    DeleteObject(pwc->clearPen);
-    DeleteObject(pwc->clearBrush);
-    pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef); 
-    pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef); 
-}
-
-
 /* 
- * Clear the specified region of the color buffer using the clear color 
- * or index as specified by one of the two functions above. 
- * 
- * This procedure clears either the front and/or the back COLOR buffers. 
- * Only the "left" buffer is cleared since we are not stereo. 
- * Clearing of the other non-color buffers is left to the swrast. 
+ * Clear the color/depth/stencil buffers.
  */ 
-
 static void clear(struct gl_context *ctx, GLbitfield mask)
 {
 #define FLIP(Y)  (ctx->DrawBuffer->Height - (Y) - 1)
@@ -298,6 +268,20 @@ static void clear(struct gl_context *ctx, GLbitfield mask)
 	return;
     }
 
+    if (mask & BUFFER_BITS_COLOR) {
+       /* setup the clearing color */
+       const union gl_color_union color = ctx->Color.ClearColor;
+       GLubyte col[3];
+       UNCLAMPED_FLOAT_TO_UBYTE(col[0], color.f[0]);
+       UNCLAMPED_FLOAT_TO_UBYTE(col[1], color.f[1]);
+       UNCLAMPED_FLOAT_TO_UBYTE(col[2], color.f[2]);
+       pwc->clearColorRef = RGB(col[0], col[1], col[2]);
+       DeleteObject(pwc->clearPen);
+       DeleteObject(pwc->clearBrush);
+       pwc->clearPen = CreatePen(PS_SOLID, 1, pwc->clearColorRef); 
+       pwc->clearBrush = CreateSolidBrush(pwc->clearColorRef); 
+    }
+
     /* Back buffer */
     if (mask & BUFFER_BIT_BACK_LEFT) { 
 	
@@ -1095,7 +1079,6 @@ WMesaContext WMesaCreateContext(HDC hDC,
     functions.GetBufferSize = wmesa_get_buffer_size;
     functions.Flush = wmesa_flush;
     functions.Clear = clear;
-    functions.ClearColor = clear_color;
     functions.ResizeBuffers = wmesa_resize_buffers;
     functions.Viewport = wmesa_viewport;
 
commit 53f88f86066d31eb4bbef7107c23b2b3efbf68a3
Author: Brian Paul <brianp at vmware.com>
Date:   Tue Jan 24 15:13:19 2012 -0700

    mesa/gdi: remove wmesa_set_renderbuffer_funcs() function
    
    The code is no longer relevant.
    Note: this driver is probably broken now.  There's no implementation
    of ctx->Driver.Map/UnmapRenderbuffer().
    (cherry picked from commit 4a1c66059957f8650afb73f9c0f982e4ae7b8200)

diff --git a/src/mesa/drivers/windows/gdi/wmesa.c b/src/mesa/drivers/windows/gdi/wmesa.c
index 40aa56e..ba11998 100644
--- a/src/mesa/drivers/windows/gdi/wmesa.c
+++ b/src/mesa/drivers/windows/gdi/wmesa.c
@@ -940,54 +940,6 @@ wmesa_renderbuffer_storage(struct gl_context *ctx,
 
 
 /**
- * Plug in the Get/PutRow/Values functions for a renderbuffer depending
- * on if we're drawing to the front or back color buffer.
- */
-static void
-wmesa_set_renderbuffer_funcs(struct gl_renderbuffer *rb, int pixelformat,
-                             int cColorBits, int double_buffer)
-{
-    if (double_buffer) {
-        /* back buffer */
-	/* Picking the correct span functions is important because
-	 * the DIB was allocated with the indicated depth. */
-	switch(pixelformat) {
-	case PF_5R6G5B:
-	    rb->PutRow = write_rgba_span_16;
-	    rb->PutValues = write_rgba_pixels_16;
-	    rb->GetRow = read_rgba_span_16;
-	    rb->GetValues = read_rgba_pixels_16;
-	    break;
-	case PF_8R8G8B:
-		if (cColorBits == 24)
-		{
-		    rb->PutRow = write_rgba_span_24;
-		    rb->PutValues = write_rgba_pixels_24;
-		    rb->GetRow = read_rgba_span_24;
-		    rb->GetValues = read_rgba_pixels_24;
-		}
-		else
-		{
-                    rb->PutRow = write_rgba_span_32;
-                    rb->PutValues = write_rgba_pixels_32;
-                    rb->GetRow = read_rgba_span_32;
-                    rb->GetValues = read_rgba_pixels_32;
-		}
-	    break;
-	default:
-	    break;
-	}
-    }
-    else {
-        /* front buffer (actual Windows window) */
-	rb->PutRow = write_rgba_span_front;
-	rb->PutValues = write_rgba_pixels_front;
-	rb->GetRow = read_rgba_span_front;
-	rb->GetValues = read_rgba_pixels_front;
-    }
-}
-
-/**
  * Called by ctx->Driver.ResizeBuffers()
  * Resize the front/back colorbuffers to match the latest window size.
  */
@@ -1275,11 +1227,9 @@ void WMesaMakeCurrent(WMesaContext c, HDC hdc)
         if (visual->doubleBufferMode == 1) {
             rb = wmesa_new_renderbuffer();
             _mesa_add_renderbuffer(&pwfb->Base, BUFFER_BACK_LEFT, rb);
-            wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 1);
 	}
         rb = wmesa_new_renderbuffer();
         _mesa_add_renderbuffer(&pwfb->Base, BUFFER_FRONT_LEFT, rb);
-        wmesa_set_renderbuffer_funcs(rb, pwfb->pixelformat, pwfb->cColorBits, 0);
 
 	/* Let Mesa own the Depth, Stencil, and Accum buffers */
         _swrast_add_soft_renderbuffers(&pwfb->Base,
commit 0558ac1fd72dd38b9945a570201c059330b1f657
Author: Tom Stellard <thomas.stellard at amd.com>
Date:   Sat Apr 14 10:02:19 2012 -0400

    r300/compiler: Copy all instruction attributes during local transfoms
    
    Instruction attributes like WriteALUResult and ALUResultCompare
    were being discarded during the some of the local transformations.
    
    This fixes the following piglit tests:
    
    glsl1-inequality (vec2, pass)
    loopfunc
    fs-any-bvec2-using-if
    fs-op-ne-bvec2-bvec2-using-if
    fs-op-ne-ivec2-ivec2-using-if
    fs-op-ne-mat2-mat2-using-if
    fs-op-ne-vec2-vec2-using-if
    fs-op-ne-mat2x3-mat2x3-using-if
    fs-op-ne-mat2x4-mat2x4-using-if
    
    https://bugs.freedesktop.org/show_bug.cgi?id=45921
    
    (cherry-picked from commit 73249239cf71e3595ee19f3c1a02b8b0f58994cd)

diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
index c48f936..b3da311 100644
--- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c
+++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c
@@ -41,13 +41,16 @@
 
 static struct rc_instruction *emit1(
 	struct radeon_compiler * c, struct rc_instruction * after,
-	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
-	struct rc_src_register SrcReg)
+	rc_opcode Opcode, struct rc_sub_instruction * base,
+	struct rc_dst_register DstReg, struct rc_src_register SrcReg)
 {
 	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
 
+	if (base) {
+		memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+	}
+
 	fpi->U.I.Opcode = Opcode;
-	fpi->U.I.SaturateMode = Saturate;
 	fpi->U.I.DstReg = DstReg;
 	fpi->U.I.SrcReg[0] = SrcReg;
 	return fpi;
@@ -55,13 +58,17 @@ static struct rc_instruction *emit1(
 
 static struct rc_instruction *emit2(
 	struct radeon_compiler * c, struct rc_instruction * after,
-	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	rc_opcode Opcode, struct rc_sub_instruction * base,
+	struct rc_dst_register DstReg,
 	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
 {
 	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
 
+	if (base) {
+		memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+	}
+
 	fpi->U.I.Opcode = Opcode;
-	fpi->U.I.SaturateMode = Saturate;
 	fpi->U.I.DstReg = DstReg;
 	fpi->U.I.SrcReg[0] = SrcReg0;
 	fpi->U.I.SrcReg[1] = SrcReg1;
@@ -70,14 +77,18 @@ static struct rc_instruction *emit2(
 
 static struct rc_instruction *emit3(
 	struct radeon_compiler * c, struct rc_instruction * after,
-	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+	rc_opcode Opcode, struct rc_sub_instruction * base,
+	struct rc_dst_register DstReg,
 	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
 	struct rc_src_register SrcReg2)
 {
 	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
 
+	if (base) {
+		memcpy(&fpi->U.I, base, sizeof(struct rc_sub_instruction));
+	}
+
 	fpi->U.I.Opcode = Opcode;
-	fpi->U.I.SaturateMode = Saturate;
 	fpi->U.I.DstReg = DstReg;
 	fpi->U.I.SrcReg[0] = SrcReg0;
 	fpi->U.I.SrcReg[1] = SrcReg1;
@@ -221,7 +232,7 @@ static void transform_ABS(struct radeon_compiler* c,
 	struct rc_src_register src = inst->U.I.SrcReg[0];
 	src.Abs = 1;
 	src.Negate = RC_MASK_NONE;
-	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+	emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, src);
 	rc_remove_instruction(inst);
 }
 
@@ -240,7 +251,7 @@ static void transform_CEIL(struct radeon_compiler* c,
 
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
-	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
 		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
 	rc_remove_instruction(inst);
 }
@@ -256,7 +267,7 @@ static void transform_CLAMP(struct radeon_compiler *c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 	emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
 		inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
-	emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit2(c, inst->Prev, RC_OPCODE_MAX, &inst->U.I, inst->U.I.DstReg,
 		srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
 	rc_remove_instruction(inst);
 }
@@ -272,7 +283,7 @@ static void transform_DP2(struct radeon_compiler* c,
 	src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
 	src1.Swizzle &= ~(63 << (3 * 2));
 	src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
-	emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	emit2(c, inst->Prev, RC_OPCODE_DP3, &inst->U.I, inst->U.I.DstReg, src0, src1);
 	rc_remove_instruction(inst);
 }
 
@@ -283,7 +294,7 @@ static void transform_DPH(struct radeon_compiler* c,
 	src0.Negate &= ~RC_MASK_W;
 	src0.Swizzle &= ~(7 << (3 * 3));
 	src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
-	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
 	rc_remove_instruction(inst);
 }
 
@@ -294,7 +305,7 @@ static void transform_DPH(struct radeon_compiler* c,
 static void transform_DST(struct radeon_compiler* c,
 	struct rc_instruction* inst)
 {
-	emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit2(c, inst->Prev, RC_OPCODE_MUL, &inst->U.I, inst->U.I.DstReg,
 		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
 		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
 	rc_remove_instruction(inst);
@@ -305,7 +316,7 @@ static void transform_FLR(struct radeon_compiler* c,
 {
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
-	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit2(c, inst->Prev, RC_OPCODE_ADD, &inst->U.I, inst->U.I.DstReg,
 		inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
 	rc_remove_instruction(inst);
 }
@@ -379,14 +390,14 @@ static void transform_LIT(struct radeon_compiler* c,
 		swizzle_wwww(srctemp));
 
 	/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I,
 		dstregtmpmask(temp, RC_MASK_Z),
 		negate(swizzle_xxxx(srctemp)),
 		swizzle_wwww(srctemp),
 		builtin_zero);
 
 	/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
-	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+	emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I,
 		dstregtmpmask(temp, RC_MASK_XYW),
 		swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
 
@@ -401,7 +412,7 @@ static void transform_LRP(struct radeon_compiler* c,
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
 		dst,
 		inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
-	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+	emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I,
 		inst->U.I.DstReg,
 		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
 
@@ -418,7 +429,7 @@ static void transform_POW(struct radeon_compiler* c,
 
 	emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
 	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
-	emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+	emit1(c, inst->Prev, RC_OPCODE_EX2, &inst->U.I, inst->U.I.DstReg, tempsrc);
 
 	rc_remove_instruction(inst);
 }
@@ -472,7 +483,7 @@ static void transform_SEQ(struct radeon_compiler* c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
 		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
 
 	rc_remove_instruction(inst);
@@ -481,7 +492,7 @@ static void transform_SEQ(struct radeon_compiler* c,
 static void transform_SFL(struct radeon_compiler* c,
 	struct rc_instruction* inst)
 {
-	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+	emit1(c, inst->Prev, RC_OPCODE_MOV, &inst->U.I, inst->U.I.DstReg, builtin_zero);
 	rc_remove_instruction(inst);
 }
 
@@ -491,7 +502,7 @@ static void transform_SGE(struct radeon_compiler* c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
 		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
 
 	rc_remove_instruction(inst);
@@ -503,7 +514,7 @@ static void transform_SGT(struct radeon_compiler* c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
 		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
 
 	rc_remove_instruction(inst);
@@ -515,7 +526,7 @@ static void transform_SLE(struct radeon_compiler* c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
 		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
 
 	rc_remove_instruction(inst);
@@ -527,7 +538,7 @@ static void transform_SLT(struct radeon_compiler* c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
 		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
 
 	rc_remove_instruction(inst);
@@ -539,7 +550,7 @@ static void transform_SNE(struct radeon_compiler* c,
 	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
 
 	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
-	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_CMP, &inst->U.I, inst->U.I.DstReg,
 		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
 
 	rc_remove_instruction(inst);
@@ -604,7 +615,7 @@ static void transform_XPD(struct radeon_compiler* c,
 	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
 		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
 		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
-	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+	emit3(c, inst->Prev, RC_OPCODE_MAD, &inst->U.I, inst->U.I.DstReg,
 		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
 		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
 		negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
@@ -719,7 +730,7 @@ static void transform_r300_vertex_DP3(struct radeon_compiler* c,
 	src1.Negate &= ~RC_MASK_W;
 	src1.Swizzle &= ~(7 << (3 * 3));
 	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
-	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+	emit2(c, inst->Prev, RC_OPCODE_DP4, &inst->U.I, inst->U.I.DstReg, src0, src1);
 	rc_remove_instruction(inst);
 }
 
@@ -1043,22 +1054,22 @@ static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
 	unsigned srctmp)
 {
 	if (inst->U.I.Opcode == RC_OPCODE_COS) {
-		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+		emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, inst->U.I.DstReg,
 			srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
 	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
-		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+		emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I,
 			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
 	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
 		struct rc_dst_register moddst = inst->U.I.DstReg;
 
 		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
 			moddst.WriteMask = RC_MASK_X;
-			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+			emit1(c, inst->Prev, RC_OPCODE_COS, &inst->U.I, moddst,
 				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
 		}
 		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
 			moddst.WriteMask = RC_MASK_Y;
-			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+			emit1(c, inst->Prev, RC_OPCODE_SIN, &inst->U.I, moddst,
 				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
 		}
 	}
commit 649a8952dfd9bc3d81330a9350eb96ce587f3c3e
Author: Tom Stellard <thomas.stellard at amd.com>
Date:   Thu Apr 12 22:07:40 2012 -0400

    r300/compiler: Clear loop registers in vertex shaders w/o loops
    
    The loop registers weren't being cleared, so any shader that was
    executed after a shader containing loops was at risk of having a loop
    randomly inserted into it.
    
    This fixes over one hundred piglit tests, although these test
    only failed during full piglit runs and would pass if
    run individually.  The exact number of piglit tests that this patch
    fixes will vary depending on the version of piglit and the order the
    tests are run.
    
    (cherry-picked from commit 4a269a8dc0170c75ff22af3910786228727ea41e)

diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index 3897e99..e4afe78 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -1030,20 +1030,18 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
             R300_PVS_VF_MAX_VTX_NUM(12) |
             (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
 
-    /* Emit flow control instructions. */
-    if (code->num_fc_ops) {
-
-        OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
-        if (r300screen->caps.is_r500) {
-            OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2);
-            OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2);
-        } else {
-            OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops);
-            OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops);
-        }
-        OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops);
-        OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops);
+    /* Emit flow control instructions.  Even if there are no fc instructions,
+     * we still need to write the registers to make sure they are cleared. */
+    OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops);
+    if (r300screen->caps.is_r500) {
+        OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2);
+        OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2);
+    } else {
+        OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS);
+        OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS);
     }
+    OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS);
+    OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS);
 
     END_CS;
 }
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index c43352a..8a656e6 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -1802,9 +1802,8 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
     if (r300->screen->caps.has_tcl) {
         unsigned fc_op_dwords = r300->screen->caps.is_r500 ? 3 : 2;
         r300_mark_atom_dirty(r300, &r300->vs_state);
-        r300->vs_state.size =
-                vs->code.length + 9 +
-        (vs->code.num_fc_ops ? vs->code.num_fc_ops * fc_op_dwords + 4 : 0);
+        r300->vs_state.size = vs->code.length + 9 +
+			(R300_VS_MAX_FC_OPS * fc_op_dwords + 4);
 
         r300_mark_atom_dirty(r300, &r300->vs_constants);
         r300->vs_constants.size =
commit f36e638c761c0b11a053cc2b21c9fda2402119ca
Author: Jeremy Huddleston <jeremyhu at apple.com>
Date:   Sat Apr 28 23:19:42 2012 -0700

    darwin: Use ASL for logging
    
    Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>
    (cherry picked from commit 51691f0767f6a75a1f549cd979a878a0ad12a228)

diff --git a/src/glx/apple/Makefile b/src/glx/apple/Makefile
index dc64295..68fe6ad 100644
--- a/src/glx/apple/Makefile
+++ b/src/glx/apple/Makefile
@@ -26,6 +26,7 @@ SOURCES = \
 	apple_glx.c \
 	apple_glx_context.c \
 	apple_glx_drawable.c \
+	apple_glx_log.c \
 	apple_glx_pbuffer.c \
 	apple_glx_pixmap.c \
 	apple_glx_surface.c \
diff --git a/src/glx/apple/apple_glx.c b/src/glx/apple/apple_glx.c
index d94c1e0..56cff64 100644
--- a/src/glx/apple/apple_glx.c
+++ b/src/glx/apple/apple_glx.c
@@ -33,6 +33,8 @@
 #include <assert.h>
 #include <stdarg.h>
 #include <dlfcn.h>
+#include <pthread.h>
+#include <inttypes.h>
 #include "appledri.h"
 #include "apple_glx.h"
 #include "apple_glx_context.h"
@@ -43,22 +45,6 @@ static int dri_event_base = 0;
 
 const GLuint __glXDefaultPixelStore[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 1 };
 
-static bool diagnostic = false;
-
-void
-apple_glx_diagnostic(const char *fmt, ...)
-{
-   va_list vl;
-
-   if (diagnostic) {
-      fprintf(stderr, "DIAG: ");
-
-      va_start(vl, fmt);
-      vfprintf(stderr, fmt, vl);
-      va_end(vl);
-   }
-}
-
 int
 apple_get_dri_event_base(void)
 {
@@ -125,10 +111,9 @@ apple_init_glx(Display * dpy)
    if (initialized)
       return false;
 
-   if (getenv("LIBGL_DIAGNOSTIC")) {
-      printf("initializing libGL in %s\n", __func__);
-      diagnostic = true;
-   }
+   apple_glx_log_init();
+
+   apple_glx_log(ASL_LEVEL_INFO, "Initializing libGL.");
 
    apple_cgl_init();
    (void) apple_glx_get_client_id();
diff --git a/src/glx/apple/apple_glx.h b/src/glx/apple/apple_glx.h
index ce8c488..0967f18 100644
--- a/src/glx/apple/apple_glx.h
+++ b/src/glx/apple/apple_glx.h
@@ -38,7 +38,8 @@
 #define XP_NO_X_HEADERS
 #include <Xplugin.h>
 
-void apple_glx_diagnostic(const char *fmt, ...);
+#include "apple_glx_log.h"
+
 xp_client_id apple_glx_get_client_id(void);
 bool apple_init_glx(Display * dpy);
 void apple_glx_swap_buffers(void *ptr);
diff --git a/src/glx/apple/apple_glx_log.c b/src/glx/apple/apple_glx_log.c
new file mode 100644
index 0000000..9ebf666
--- /dev/null
+++ b/src/glx/apple/apple_glx_log.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2012 Apple Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT
+ * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name(s) of the above
+ * copyright holders shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization.
+ */
+
+#include <sys/cdefs.h>
+#include <asl.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include "apple_glx_log.h"
+
+static bool diagnostic = false;
+static aslclient aslc;
+
+void apple_glx_log_init(void) {
+    if (getenv("LIBGL_DIAGNOSTIC")) {
+        diagnostic = true;
+    }
+
+    aslc = asl_open(NULL, NULL, 0);
+}
+
+void _apple_glx_log(int level, const char *file, const char *function,
+                    int line, const char *fmt, ...) {
+    va_list v;
+    va_start(v, fmt);
+    _apple_glx_vlog(level, file, function, line, fmt, v);
+    va_end(v);
+}
+
+static const char *
+_asl_level_string(int level)
+{
+        if (level == ASL_LEVEL_EMERG) return ASL_STRING_EMERG;
+        if (level == ASL_LEVEL_ALERT) return ASL_STRING_ALERT;
+        if (level == ASL_LEVEL_CRIT) return ASL_STRING_CRIT;
+        if (level == ASL_LEVEL_ERR) return ASL_STRING_ERR;
+        if (level == ASL_LEVEL_WARNING) return ASL_STRING_WARNING;
+        if (level == ASL_LEVEL_NOTICE) return ASL_STRING_NOTICE;
+        if (level == ASL_LEVEL_INFO) return ASL_STRING_INFO;
+        if (level == ASL_LEVEL_DEBUG) return ASL_STRING_DEBUG;
+        return "unknown";
+}
+
+void _apple_glx_vlog(int level, const char *file, const char *function,
+                     int line, const char *fmt, va_list args) {
+    aslmsg msg;
+    uint64_t thread = 0;
+
+    if (pthread_is_threaded_np()) {
+        pthread_threadid_np(NULL, &thread);
+    }
+
+    if (diagnostic) {
+        va_list args2;
+        va_copy(args2, args);
+
+        fprintf(stderr, "%-9s %24s:%-4d %s(%"PRIu64"): ",
+                _asl_level_string(level), file, line, function, thread);
+        vfprintf(stderr, fmt, args2);
+    }
+
+    msg = asl_new(ASL_TYPE_MSG);
+    if (msg) {
+        if (file)
+            asl_set(msg, "File", file);
+        if (function)
+            asl_set(msg, "Function", function);
+        if (line) {
+            char *_line;
+            asprintf(&_line, "%d", line);
+            if (_line) {
+                asl_set(msg, "Line", _line);
+                free(_line);
+            }
+        }
+        if (pthread_is_threaded_np()) {
+            char *_thread;
+            asprintf(&_thread, "%"PRIu64, thread);
+            if (_thread) {
+                asl_set(msg, "Thread", _thread);
+                free(_thread);
+            }
+        }
+    }
+
+    asl_vlog(aslc, msg, level, fmt, args);
+    if (msg)
+        asl_free(msg);
+}
diff --git a/src/glx/apple/apple_glx_log.h b/src/glx/apple/apple_glx_log.h
new file mode 100644
index 0000000..4b1c531
--- /dev/null
+++ b/src/glx/apple/apple_glx_log.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2012 Apple Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation files
+ * (the "Software"), to deal in the Software without restriction,
+ * including without limitation the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE ABOVE LISTED COPYRIGHT
+ * HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name(s) of the above
+ * copyright holders shall not be used in advertising or otherwise to
+ * promote the sale, use or other dealings in this Software without
+ * prior written authorization.
+ */
+
+#ifndef APPLE_GLX_LOG_H
+#define APPLE_GLX_LOG_H
+
+#include <sys/cdefs.h>
+#include <asl.h>
+
+void apple_glx_log_init(void);
+
+__printflike(5, 6)
+void _apple_glx_log(int level, const char *file, const char *function,
+                    int line, const char *fmt, ...);
+#define apple_glx_log(l, f, args ...) \
+    _apple_glx_log(l, __FILE__, __FUNCTION__, __LINE__, f, ## args)
+
+
+__printflike(5, 0)
+void _apple_glx_vlog(int level, const char *file, const char *function,
+                     int line, const char *fmt, va_list v);
+#define apple_glx_vlog(l, f, v) \
+    _apple_glx_vlog(l, __FILE__, __FUNCTION__, __LINE__, f, v)
+
+/* This is just here to help the transition.
+ * TODO: Replace calls to apple_glx_diagnostic
+ */
+#define apple_glx_diagnostic(f, args ...) \
+    apple_glx_log(ASL_LEVEL_DEBUG, f, ## args)
+
+#endif
commit f818673acbff14cbf6dc57bd420ef6a0db164df3
Author: Jeremy Huddleston <jeremyhu at apple.com>
Date:   Sat Apr 28 16:50:00 2012 -0700

    darwin: Make reported errors more user-friendly
    
    Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>
    (cherry picked from commit cf5db0a418b63d71385b43897a7ea9be7bb785d1)

diff --git a/src/glx/apple/apple_glx_drawable.c b/src/glx/apple/apple_glx_drawable.c
index db28302..3f84d56 100644
--- a/src/glx/apple/apple_glx_drawable.c
+++ b/src/glx/apple/apple_glx_drawable.c
@@ -32,6 +32,7 @@
 #include <stdlib.h>
 #include <assert.h>
 #include <pthread.h>
+#include <string.h>
 #include "apple_glx.h"
 #include "apple_glx_context.h"
 #include "apple_glx_drawable.h"
@@ -48,8 +49,8 @@ lock_drawables_list(void)
    err = pthread_mutex_lock(&drawables_lock);
 
    if (err) {
-      fprintf(stderr, "pthread_mutex_lock failure in %s: %d\n",
-              __func__, err);
+      fprintf(stderr, "pthread_mutex_lock failure in %s: %s\n",
+              __func__, strerror(err));
       abort();
    }
 }
@@ -62,8 +63,8 @@ unlock_drawables_list(void)
    err = pthread_mutex_unlock(&drawables_lock);
 
    if (err) {
-      fprintf(stderr, "pthread_mutex_unlock failure in %s: %d\n",
-              __func__, err);
+      fprintf(stderr, "pthread_mutex_unlock failure in %s: %s\n",
+              __func__, strerror(err));
       abort();
    }
 }
@@ -95,7 +96,7 @@ drawable_lock(struct apple_glx_drawable *agd)
    err = pthread_mutex_lock(&agd->mutex);
 
    if (err) {
-      fprintf(stderr, "pthread_mutex_lock error: %d\n", err);
+      fprintf(stderr, "pthread_mutex_lock error: %s\n", strerror(err));
       abort();
    }
 }
@@ -108,7 +109,7 @@ drawable_unlock(struct apple_glx_drawable *d)
    err = pthread_mutex_unlock(&d->mutex);
 
    if (err) {
-      fprintf(stderr, "pthread_mutex_unlock error: %d\n", err);
+      fprintf(stderr, "pthread_mutex_unlock error: %s\n", strerror(err));
       abort();
    }
 }
@@ -245,7 +246,7 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d)
    err = pthread_mutexattr_init(&attr);
 
    if (err) {
-      fprintf(stderr, "pthread_mutexattr_init error: %d\n", err);
+      fprintf(stderr, "pthread_mutexattr_init error: %s\n", strerror(err));
       abort();
    }
 
@@ -257,14 +258,14 @@ common_init(Display * dpy, GLXDrawable drawable, struct apple_glx_drawable *d)
    err = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
 
    if (err) {
-      fprintf(stderr, "error: setting pthread mutex type: %d\n", err);
+      fprintf(stderr, "error: setting pthread mutex type: %s\n", strerror(err));
       abort();
    }
 
    err = pthread_mutex_init(&d->mutex, &attr);
 
    if (err) {
-      fprintf(stderr, "pthread_mutex_init error: %d\n", err);
+      fprintf(stderr, "pthread_mutex_init error: %s\n", strerror(err));
       abort();
    }
 
commit 8010ff17ae931e17dd9d5eead91b144dce146147
Author: Jeremy Huddleston <jeremyhu at apple.com>
Date:   Sun Apr 29 00:27:03 2012 -0700

    darwin: Fix an error message
    
    Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>
    (cherry picked from commit 244dc0521439379a0a44f81ba432aa04ca6c1a91)

diff --git a/src/glx/apple/apple_glx_context.c b/src/glx/apple/apple_glx_context.c
index c58d05a..0bb25b4 100644
--- a/src/glx/apple/apple_glx_context.c
+++ b/src/glx/apple/apple_glx_context.c
@@ -421,7 +421,7 @@ apple_glx_make_current_context(Display * dpy, void *oldptr, void *ptr,
     */
 
    if (same_drawable && ac->is_current) {
-      apple_glx_diagnostic("%s: same_drawable and ac->is_current\n");
+      apple_glx_diagnostic("same_drawable and ac->is_current\n");
       return false;
    }
 
commit 93e94cbb48a679b7bf67594adb6f858526b37935
Author: Eric Anholt <eric at anholt.net>
Date:   Fri Feb 10 12:54:25 2012 -0800

    intel: Fix rendering from textures after RenderTexture().
    
    There's a serious trap for drivers: RenderTexture() does not indicate
    that the texture is currently bound to the draw buffer, despite
    FinishRenderTexture() signaling that the texture is just now being
    unbound from the draw buffer.
    
    We were acting as if RenderTexture() *was* the start of rendering and
    that we could make texturing incoherent with the current contents of
    the renderbuffer.  This caused intel oglconform sRGB
    Mipmap.1D_textures to fail, because we got a call to TexImage() and
    thus RenderTexture() on a texture bound to a framebuffer that wasn't
    the draw buffer, so we skipped validating the new image into the
    texture object used for rendering.
    
    We can't (easily) make RenderTexture() indicate the start of drawing,
    because both our driver and gallium are using it as the moment to set
    up the renderbuffer wrapper used for things like MapRenderbuffer().
    Instead, postpone the setup of the workaround render target miptree
    until update_renderbuffer time, so that we no longer need to skip
    validation of miptrees used as render targets.  As a bonus, this
    should make GL_NV_texture_barrier possible.
    
    (This also fixes a regression in the gen4 small-mipmap rendering since
    3b38b33c1648b07e75dc4d8340758171e109c598, which switched
    set_draw_offset from image->mt to irb->mt but didn't move the irb->mt
    replacement up before set_draw_offset).
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=44961
    NOTE: This is a candidate for the 8.0 branch.

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 51d3a46..97ae489 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -916,12 +916,48 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
    struct gl_context *ctx = &intel->ctx;
    struct intel_renderbuffer *irb = intel_renderbuffer(rb);
    struct intel_mipmap_tree *mt = irb->mt;
-   struct intel_region *region = irb->mt->region;
+   struct intel_region *region;
    uint32_t *surf;
    uint32_t tile_x, tile_y;
    uint32_t format = 0;
    gl_format rb_format = intel_rb_format(irb);
 
+   if (irb->tex_image && !brw->has_surface_tile_offset) {
+      intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
+
+      if (tile_x != 0 || tile_y != 0) {
+	 /* Original gen4 hardware couldn't draw to a non-tile-aligned
+	  * destination in a miptree unless you actually setup your renderbuffer
+	  * as a miptree and used the fragile lod/array_index/etc. controls to
+	  * select the image.  So, instead, we just make a new single-level
+	  * miptree and render into that.
+	  */
+	 struct intel_context *intel = intel_context(ctx);
+	 struct intel_texture_image *intel_image =
+	    intel_texture_image(irb->tex_image);
+	 struct intel_mipmap_tree *new_mt;
+	 int width, height, depth;
+
+	 intel_miptree_get_dimensions_for_image(irb->tex_image, &width, &height, &depth);
+
+	 new_mt = intel_miptree_create(intel, irb->tex_image->TexObject->Target,
+				       intel_image->base.Base.TexFormat,
+				       intel_image->base.Base.Level,
+				       intel_image->base.Base.Level,
+				       width, height, depth,
+				       true);
+
+	 intel_miptree_copy_teximage(intel, intel_image, new_mt);
+	 intel_miptree_reference(&irb->mt, intel_image->mt);
+	 intel_renderbuffer_set_draw_offset(irb);
+	 intel_miptree_release(&new_mt);
+
+	 mt = irb->mt;
+      }
+   }
+
+   region = irb->mt->region;
+
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 			  6 * 4, 32, &brw->bind.surf_offset[unit]);
 
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 185602a..c5097c3 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -553,22 +553,6 @@ intel_renderbuffer_tile_offsets(struct intel_renderbuffer *irb,
    }
 }
 
-#ifndef I915
-static bool
-need_tile_offset_workaround(struct brw_context *brw,
-			    struct intel_renderbuffer *irb)
-{
-   uint32_t tile_x, tile_y;
-
-   if (brw->has_surface_tile_offset)
-      return false;
-
-   intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y);
-
-   return tile_x != 0 || tile_y != 0;
-}
-#endif
-
 /**
  * Called by glFramebufferTexture[123]DEXT() (and other places) to
  * prepare for rendering into texture memory.  This might be called
@@ -626,42 +610,13 @@ intel_render_texture(struct gl_context * ctx,
        return;
    }
 
+   irb->tex_image = image;
+
    DBG("Begin render %s texture tex=%u w=%d h=%d refcount=%d\n",
        _mesa_get_format_name(image->TexFormat),
        att->Texture->Name, image->Width, image->Height,
        irb->Base.Base.RefCount);
 
-   intel_image->used_as_render_target = true;
-
-#ifndef I915
-   if (need_tile_offset_workaround(brw_context(ctx), irb)) {
-      /* Original gen4 hardware couldn't draw to a non-tile-aligned
-       * destination in a miptree unless you actually setup your
-       * renderbuffer as a miptree and used the fragile
-       * lod/array_index/etc. controls to select the image.  So,
-       * instead, we just make a new single-level miptree and render
-       * into that.
-       */
-      struct intel_context *intel = intel_context(ctx);
-      struct intel_mipmap_tree *new_mt;
-      int width, height, depth;
-
-      intel_miptree_get_dimensions_for_image(image, &width, &height, &depth);
-
-      new_mt = intel_miptree_create(intel, image->TexObject->Target,
-				    intel_image->base.Base.TexFormat,
-				    intel_image->base.Base.Level,
-				    intel_image->base.Base.Level,
-                                    width, height, depth,
-				    true);
-
-      intel_miptree_copy_teximage(intel, intel_image, new_mt);
-      intel_renderbuffer_set_draw_offset(irb);
-
-      intel_miptree_reference(&irb->mt, intel_image->mt);
-      intel_miptree_release(&new_mt);
-   }
-#endif
    /* update drawing region, etc */
    intel_draw_buffer(ctx);
 }
@@ -678,14 +633,13 @@ intel_finish_render_texture(struct gl_context * ctx,
    struct gl_texture_object *tex_obj = att->Texture;
    struct gl_texture_image *image =
       tex_obj->Image[att->CubeMapFace][att->TextureLevel];
-   struct intel_texture_image *intel_image = intel_texture_image(image);
+   struct intel_renderbuffer *irb = intel_renderbuffer(att->Renderbuffer);
 
    DBG("Finish render %s texture tex=%u\n",
        _mesa_get_format_name(image->TexFormat), att->Texture->Name);
 
-   /* Flag that this image may now be validated into the object's miptree. */
-   if (intel_image)
-      intel_image->used_as_render_target = false;
+   if (irb)
+      irb->tex_image = NULL;
 
    /* Since we've (probably) rendered to the texture and will (likely) use
     * it in the texture domain later on in this batchbuffer, flush the
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.h b/src/mesa/drivers/dri/intel/intel_fbo.h
index a2c1b1a..724f141 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.h
+++ b/src/mesa/drivers/dri/intel/intel_fbo.h
@@ -47,6 +47,9 @@ struct intel_renderbuffer
    struct intel_mipmap_tree *mt; /**< The renderbuffer storage. */
    drm_intel_bo *map_bo;
 
+   /* Current texture image this renderbuffer is attached to. */
+   struct gl_texture_image *tex_image;
+
    /**
     * \name Miptree view
     * \{
diff --git a/src/mesa/drivers/dri/intel/intel_tex_obj.h b/src/mesa/drivers/dri/intel/intel_tex_obj.h
index 8b278ba..d1a5f05 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_obj.h
+++ b/src/mesa/drivers/dri/intel/intel_tex_obj.h
@@ -65,7 +65,6 @@ struct intel_texture_image
     * Else there is no image data.
     */
    struct intel_mipmap_tree *mt;
-   bool used_as_render_target;
 };
 
 static INLINE struct intel_texture_object *
diff --git a/src/mesa/drivers/dri/intel/intel_tex_validate.c b/src/mesa/drivers/dri/intel/intel_tex_validate.c
index b96f2a4..a63068b 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_validate.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_validate.c
@@ -97,14 +97,8 @@ intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit)
 	 /* skip too small size mipmap */
  	 if (intelImage == NULL)
 		 break;
-	 /* Need to import images in main memory or held in other trees.
-	  * If it's a render target, then its data isn't needed to be in
-	  * the object tree (otherwise we'd be FBO incomplete), and we need
-	  * to keep track of the image's MT as needing to be pulled in still,
-	  * or we'll lose the rendering that's done to it.
-          */
-         if (intelObj->mt != intelImage->mt &&
-	     !intelImage->used_as_render_target) {
+
+         if (intelObj->mt != intelImage->mt) {
             intel_miptree_copy_teximage(intel, intelImage, intelObj->mt);
          }
       }
commit 4e172532d1e5d24e879e8b7c4dffa9d50348a77c
Author: Eric Anholt <eric at anholt.net>
Date:   Mon Dec 19 10:53:10 2011 -0800

    i965/fs: Jump from discard statements to the end of the program when done.
    
    From the GLSL 1.30 spec:
    
         The discard keyword is only allowed within fragment shaders. It
         can be used within a fragment shader to abandon the operation on
         the current fragment. This keyword causes the fragment to be
         discarded and no updates to any buffers will occur. Control flow
         exits the shader, and subsequent implicit or explicit derivatives
         are undefined when this control flow is non-uniform (meaning
         different fragments within the primitive take different control
         paths).
    
    v2: Don't emit the final HALT if no other HALTs were emitted.
    
    Reviewed-by: Kenneth Graunke <kenneth at whitecape.org> (v1)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index f660222..5064c18 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -1048,6 +1048,7 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p);
 struct brw_instruction *brw_BREAK(struct brw_compile *p);
 struct brw_instruction *brw_CONT(struct brw_compile *p);
 struct brw_instruction *gen6_CONT(struct brw_compile *p);
+struct brw_instruction *gen6_HALT(struct brw_compile *p);
 /* Forward jumps:
  */
 void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx);
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b2581da..21d3c5a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1339,6 +1339,20 @@ struct brw_instruction *brw_CONT(struct brw_compile *p)
    return insn;
 }
 
+struct brw_instruction *gen6_HALT(struct brw_compile *p)
+{
+   struct brw_instruction *insn;
+
+   insn = next_insn(p, BRW_OPCODE_HALT);
+   brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+   brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
+
+   insn->header.compression_control = BRW_COMPRESSION_NONE;
+   insn->header.execution_size = BRW_EXECUTE_8;
+   return insn;
+}
+
 /* DO/WHILE loop:
  *
  * The DO/WHILE is just an unterminated loop -- break or continue are
@@ -2395,8 +2409,8 @@ brw_find_next_block_end(struct brw_compile *p, int start)
 	 return ip;
       }
    }
-   assert(!"not reached");
-   return start + 1;
+
+   return 0;
 }
 
 /* There is no DO instruction on gen6, so to find the end of the loop
@@ -2425,7 +2439,7 @@ brw_find_loop_end(struct brw_compile *p, int start)
 }
 
 /* After program generation, go back and update the UIP and JIP of
- * BREAK and CONT instructions to their correct locations.
+ * BREAK, CONT, and HALT instructions to their correct locations.
  */
 void
 brw_set_uip_jip(struct brw_compile *p)
@@ -2439,21 +2453,50 @@ brw_set_uip_jip(struct brw_compile *p)
 
    for (ip = 0; ip < p->nr_insn; ip++) {
       struct brw_instruction *insn = &p->store[ip];
+      int block_end_ip = 0;
+
+      if (insn->header.opcode == BRW_OPCODE_BREAK ||
+	  insn->header.opcode == BRW_OPCODE_CONTINUE ||
+	  insn->header.opcode == BRW_OPCODE_HALT) {
+	 block_end_ip = brw_find_next_block_end(p, ip);
+      }
 
       switch (insn->header.opcode) {
       case BRW_OPCODE_BREAK:
-	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+	 assert(block_end_ip != 0);
+	 insn->bits3.break_cont.jip = br * (block_end_ip - ip);
 	 /* Gen7 UIP points to WHILE; Gen6 points just after it */
 	 insn->bits3.break_cont.uip =
 	    br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
 	 break;
       case BRW_OPCODE_CONTINUE:
-	 insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+	 assert(block_end_ip != 0);
+	 insn->bits3.break_cont.jip = br * (block_end_ip - ip);
 	 insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
 
 	 assert(insn->bits3.break_cont.uip != 0);
 	 assert(insn->bits3.break_cont.jip != 0);
 	 break;
+      case BRW_OPCODE_HALT:
+	 /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
+	  *
+	  *    "In case of the halt instruction not inside any conditional code
+	  *     block, the value of <JIP> and <UIP> should be the same. In case
+	  *     of the halt instruction inside conditional code block, the <UIP>
+	  *     should be the end of the program, and the <JIP> should be end of
+	  *     the most inner conditional code block."
+	  *
+	  * The uip will have already been set by whoever set up the
+	  * instruction.
+	  */
+	 if (block_end_ip == 0) {
+	    insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
+	 } else {
+	    insn->bits3.break_cont.jip = br * (block_end_ip - ip);
+	 }
+	 assert(insn->bits3.break_cont.uip != 0);
+	 assert(insn->bits3.break_cont.jip != 0);
+	 break;
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9a2cc08..b9cd42f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -171,6 +171,26 @@ static const fs_reg reg_undef;
 static const fs_reg reg_null_f(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_F);
 static const fs_reg reg_null_d(ARF, BRW_ARF_NULL, BRW_REGISTER_TYPE_D);
 
+class ip_record : public exec_node {
+public:
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = rzalloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   ip_record(int ip)
+   {
+      this->ip = ip;
+   }
+
+   int ip;
+};
+
 class fs_inst : public exec_node {
 public:
    /* Callers of this ralloc-based new need not call delete. It's
@@ -489,6 +509,7 @@ public:
    bool remove_duplicate_mrf_writes();
    bool virtual_grf_interferes(int a, int b);
    void schedule_instructions();
+   void patch_discard_jumps_to_fb_writes();
    void fail(const char *msg, ...);
 
    void push_force_uncompressed();
@@ -571,6 +592,7 @@ public:
    struct gl_shader_program *prog;
    void *mem_ctx;
    exec_list instructions;
+   exec_list discard_halt_patches;
 
    /* Delayed setup of c->prog_data.params[] due to realloc of
     * ParamValues[] during compile.
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index b68d8cb..cc70904 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -37,11 +37,55 @@ extern "C" {
 #include "glsl/ir_print_visitor.h"
 
 void
+fs_visitor::patch_discard_jumps_to_fb_writes()
+{
+   if (intel->gen < 6 || this->discard_halt_patches.is_empty())
+      return;
+
+   /* There is a somewhat strange undocumented requirement of using
+    * HALT, according to the simulator.  If some channel has HALTed to
+    * a particular UIP, then by the end of the program, every channel
+    * must have HALTed to that UIP.  Furthermore, the tracking is a
+    * stack, so you can't do the final halt of a UIP after starting
+    * halting to a new UIP.
+    *
+    * Symptoms of not emitting this instruction on actual hardware
+    * included GPU hangs and sparkly rendering on the piglit discard
+    * tests.
+    */
+   struct brw_instruction *last_halt = gen6_HALT(p);
+   last_halt->bits3.break_cont.uip = 2;
+   last_halt->bits3.break_cont.jip = 2;
+
+   int ip = p->nr_insn;
+
+   foreach_list(node, &this->discard_halt_patches) {
+      ip_record *patch_ip = (ip_record *)node;
+      struct brw_instruction *patch = &p->store[patch_ip->ip];
+      int br = (intel->gen >= 5) ? 2 : 1;
+
+      /* HALT takes a distance from the pre-incremented IP, so '1'
+       * would be the next instruction after jmpi.
+       */
+      assert(patch->header.opcode == BRW_OPCODE_HALT);
+      patch->bits3.break_cont.uip = (ip - patch_ip->ip) * br;
+   }
+
+   this->discard_halt_patches.make_empty();
+}
+
+void
 fs_visitor::generate_fb_write(fs_inst *inst)
 {
    bool eot = inst->eot;
    struct brw_reg implied_header;
 
+   /* Note that the jumps emitted to this point mean that the g0 ->
+    * base_mrf setup must be inside of this function, so that we jump
+    * to a point containing it.
+    */
+   patch_discard_jumps_to_fb_writes();
+
    /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
     * move, here's g1.
     */
@@ -482,6 +526,17 @@ fs_visitor::generate_discard(fs_inst *inst)
       brw_set_mask_control(p, BRW_MASK_DISABLE);
       brw_AND(p, g1, f0, g1);
       brw_pop_insn_state(p);
+
+      /* GLSL 1.30+ say that discarded channels should stop executing
+       * (so, for example, an infinite loop that would otherwise in
+       * just that channel does not occur.
+       *
+       * This HALT will be patched up at FB write time to point UIP at
+       * the end of the program, and at brw_uip_jip() JIP will be set
+       * to the end of the current block (or the program).
+       */
+      this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn));
+      gen6_HALT(p);
    } else {
       struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
 
commit e995b41a16a164426a8dfb78945802f1ea1ed997
Author: Dylan Noblesmith <nobled at dreamwidth.org>
Date:   Sun Apr 1 19:57:57 2012 +0000

    egl-static: fix printf warning
    
    Noticed by clang:
    
    egl_st.c:57:50: warning: field precision should have type 'int',
    but argument has type 'size_t' (aka 'unsigned long') [-Wformat]
          ret = util_snprintf(path, sizeof(path), "%.*s/%s" UTIL_DL_EXT,
                                                   ~~^~
    
    NOTE: This is a candidate for the 8.0 branch.
    
    Reviewed-by: Brian Paul <brianp at vmware.com>
    (cherry picked from commit 2bb91274e2cc2290ce8e790335f1e57b81d9d783)

diff --git a/src/gallium/targets/egl-static/egl_st.c b/src/gallium/targets/egl-static/egl_st.c
index 81d7bb4..67e3c29 100644
--- a/src/gallium/targets/egl-static/egl_st.c
+++ b/src/gallium/targets/egl-static/egl_st.c
@@ -54,8 +54,9 @@ dlopen_gl_lib_cb(const char *dir, size_t len, void *callback_data)
    int ret;
 
    if (len) {
+      assert(len <= INT_MAX && "path is insanely long!");
       ret = util_snprintf(path, sizeof(path), "%.*s/%s" UTIL_DL_EXT,
-            len, dir, name);
+            (int)len, dir, name);
    }
    else {
       ret = util_snprintf(path, sizeof(path), "%s" UTIL_DL_EXT, name);
commit fca35d2e5e84fad50b0a99463d072bbb221ac947
Author: Dylan Noblesmith <nobled at dreamwidth.org>
Date:   Sun Apr 1 19:48:21 2012 +0000

    st/vega: fix uninitialized values
    
    C still treats array arguments exactly like pointer arguments.
    By sheer coincidence, this still worked fine on 64-bit
    machines where 2 * sizeof(float) == sizeof(void*), but not
    on 32-bit.
    
    Noticed by clang:
    
    text.c:76:51: warning: sizeof on array function parameter will
    return size of 'const VGfloat *' (aka 'const float *') instead of
    'const VGfloat [2]' [-Wsizeof-array-argument]
       memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyphOrigin));
    
    NOTE: This is a candidate for the 8.0 branch.
    
    Reviewed-by: Brian Paul <brianp at vmware.com>
    (cherry picked from commit 6a491b5728fcfb928612182fa87212eeb2253917)

diff --git a/src/gallium/state_trackers/vega/text.c b/src/gallium/state_trackers/vega/text.c
index a183933..27d461c 100644
--- a/src/gallium/state_trackers/vega/text.c
+++ b/src/gallium/state_trackers/vega/text.c
@@ -73,8 +73,8 @@ static void add_glyph(struct vg_font *font,
    glyph = CALLOC_STRUCT(vg_glyph);
    glyph->object = obj;
    glyph->is_hinted = isHinted;
-   memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyphOrigin));
-   memcpy(glyph->escapement, escapement, sizeof(escapement));
+   memcpy(glyph->glyph_origin, glyphOrigin, sizeof(glyph->glyph_origin));
+   memcpy(glyph->escapement, escapement, sizeof(glyph->glyph_origin));
 
    cso_hash_insert(font->glyphs, (unsigned) glyphIndex, glyph);
 }
commit 4dd228a0a9b6c2e2c242eb29f82c7660f94e80e2
Author: Dylan Noblesmith <nobled at dreamwidth.org>
Date:   Sun Apr 1 18:48:13 2012 +0000

    egl: fix uninitialized values
    
    Noticed by clang:
    
    eglimage.c:48:28: warning: argument to 'sizeof' in 'memset' call is
    the same expression as the destination; did you mean to dereference
    it? [-Wsizeof-pointer-memaccess]
       memset(attrs, 0, sizeof(attrs));
              ~~~~~            ^~~~~
    
    NOTE: This is a candidate for the 8.0 branch.
    
    Reviewed-by: Brian Paul <brianp at vmware.com>
    (cherry picked from commit 071501a68129768c6223beb24f7363d87c6684ea)

diff --git a/src/egl/main/eglimage.c b/src/egl/main/eglimage.c
index d5deae7..1174d0a 100644
--- a/src/egl/main/eglimage.c
+++ b/src/egl/main/eglimage.c
@@ -45,7 +45,7 @@ _eglParseImageAttribList(_EGLImageAttribs *attrs, _EGLDisplay *dpy,
 
    (void) dpy;
 
-   memset(attrs, 0, sizeof(attrs));
+   memset(attrs, 0, sizeof(*attrs));
    attrs->ImagePreserved = EGL_FALSE;
    attrs->GLTextureLevel = 0;
    attrs->GLTextureZOffset = 0;
commit 108d544cae9cb860dbe53e4e0655c80e950808a1
Author: Dylan Noblesmith <nobled at dreamwidth.org>
Date:   Sun Apr 1 18:35:29 2012 +0000

    util: fix uninitialized table
    
    Most of the 256 values in the 'generic_to_slot' table were supposed to
    be initialized with the default value 0xff, but were left at zero
    (from CALLOC_STRUCT()) instead.
    
    Noticed by clang:
    
    u_linkage.h:60:31: warning: argument to 'sizeof' in 'memset' call is the same expression as the destination;
          did you mean to provide an explicit length? [-Wsizeof-pointer-memaccess]
       memset(table, 0xff, sizeof(table));
              ~~~~~               ^~~~~
    
    Also fix a signed/unsigned comparison and a comment typo here.
    
    NOTE: This is a candidate for the 8.0 branch.
    
    Reviewed-by: Brian Paul <brianp at vmware.com>
    (cherry picked from commit 520521e3809a36201582fa48ee173eba12e97812)

diff --git a/src/gallium/auxiliary/util/u_linkage.h b/src/gallium/auxiliary/util/u_linkage.h
index 43ec917..7b23123 100644
--- a/src/gallium/auxiliary/util/u_linkage.h
+++ b/src/gallium/auxiliary/util/u_linkage.h
@@ -49,15 +49,16 @@ unsigned util_semantic_set_from_program_file(struct util_semantic_set *set, cons
  *
  * num_slots is the size of the layout array and hardware limit instead.
  *
- * efficient_slots == 0 or efficient_solts == num_slots are typical settings.
+ * efficient_slots == 0 or efficient_slots == num_slots are typical settings.
  */
 void util_semantic_layout_from_set(unsigned char *layout, const struct util_semantic_set *set, unsigned efficient_slots, unsigned num_slots);
 
 static INLINE void
-util_semantic_table_from_layout(unsigned char *table, unsigned char *layout, unsigned char first_slot_value, unsigned char num_slots)
+util_semantic_table_from_layout(unsigned char *table, size_t table_size, unsigned char *layout,
+                                unsigned char first_slot_value, unsigned char num_slots)
 {
-   int i;
-   memset(table, 0xff, sizeof(table));
+   unsigned char i;
+   memset(table, 0xff, table_size);
 
    for(i = 0; i < num_slots; ++i)
       table[layout[i]] = first_slot_value + i;
diff --git a/src/gallium/drivers/nvfx/nvfx_fragprog.c b/src/gallium/drivers/nvfx/nvfx_fragprog.c
index dbd7c77..0babcbb 100644
--- a/src/gallium/drivers/nvfx/nvfx_fragprog.c
+++ b/src/gallium/drivers/nvfx/nvfx_fragprog.c
@@ -977,7 +977,8 @@ nvfx_fragprog_prepare(struct nvfx_context* nvfx, struct nvfx_fpc *fpc)
 	if(fpc->fp->num_slots > num_texcoords)
 		return FALSE;
 	util_semantic_layout_from_set(fpc->fp->slot_to_generic, &set, 0, num_texcoords);
-	util_semantic_table_from_layout(fpc->generic_to_slot, fpc->fp->slot_to_generic, 0, num_texcoords);
+	util_semantic_table_from_layout(fpc->generic_to_slot, sizeof fpc->generic_to_slot,
+                                        fpc->fp->slot_to_generic, 0, num_texcoords);
 
 	memset(fpc->fp->slot_to_fp_input, 0xff, sizeof(fpc->fp->slot_to_fp_input));
 
commit 546abd25038dc4ad9af17a0f99bae818b980436e
Author: Dylan Noblesmith <nobled at dreamwidth.org>
Date:   Sun Apr 1 18:21:47 2012 +0000

    util: fix undefined behavior
    
    container_of() can legally return anything, even invalid addresses
    that cause segfaults, when 'sample' is an uninitialized pointer.
    
    Bug exposed by clang.
    
    NOTE: This is a candidate for the 8.0 branch.
    (cherry picked from commit ccff74971203b533bf16b46b49a9e61753f75e6c)

diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h
index 2384c36..9d1129b 100644
--- a/src/gallium/auxiliary/util/u_double_list.h
+++ b/src/gallium/auxiliary/util/u_double_list.h
@@ -105,6 +105,11 @@ static INLINE void list_delinit(struct list_head *item)
 #define LIST_IS_EMPTY(__list)                   \
     ((__list)->next == (__list))
 
+/**
+ * Cast from a pointer to a member of a struct back to the containing struct.
+ *
+ * 'sample' MUST be initialized, or else the result is undefined!
+ */
 #ifndef container_of
 #define container_of(ptr, sample, member)				\
     (void *)((char *)(ptr)						\
@@ -112,29 +117,29 @@ static INLINE void list_delinit(struct list_head *item)
 #endif
 
 #define LIST_FOR_EACH_ENTRY(pos, head, member)				\
-   for (pos = container_of((head)->next, pos, member);			\
+   for (pos = NULL, pos = container_of((head)->next, pos, member);	\
 	&pos->member != (head);						\
 	pos = container_of(pos->member.next, pos, member))
 
 #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member)	\
-   for (pos = container_of((head)->next, pos, member),			\
+   for (pos = NULL, pos = container_of((head)->next, pos, member),	\
 	storage = container_of(pos->member.next, pos, member);	\
 	&pos->member != (head);						\
 	pos = storage, storage = container_of(storage->member.next, storage, member))
 
 #define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member)	\
-   for (pos = container_of((head)->prev, pos, member),			\
+   for (pos = NULL, pos = container_of((head)->prev, pos, member),	\
 	storage = container_of(pos->member.prev, pos, member);		\
 	&pos->member != (head);						\
 	pos = storage, storage = container_of(storage->member.prev, storage, member))
 
 #define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member)		\
-   for (pos = container_of((start), pos, member);			\
+   for (pos = NULL, pos = container_of((start), pos, member);		\
 	&pos->member != (head);						\
 	pos = container_of(pos->member.next, pos, member))
 
 #define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member)		\
-   for (pos = container_of((start), pos, member);			\
+   for (pos = NULL, pos = container_of((start), pos, member);		\
 	&pos->member != (head);						\
 	pos = container_of(pos->member.prev, pos, member))
 
commit bcc5caf642a9beec324657becac501944ce4dc23
Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Tue Apr 24 14:09:13 2012 -0700

    i965/fs: Fix FB writes that tried to use the non-existent m16 register.
    
    A little analysis shows that the worst-case value for "nr" is 17:
    - base_mrf = 2                       ... 2
    - header present (say gen == 5)      ... 4
    - aa_dest_stencil_reg (stencil test) ... 5
    - SIMD16 mode: += 4 * reg_width      ... 13
    - source_depth_to_render_target      ... 15
    - dest_depth_reg                     ... 17
    
    This resulted in us setting base_mrf to 2 and mlen to 15.  In other
    words, we'd try to use m2..m16.  But m16 doesn't exist pre-Gen6.  Also,
    the instruction scheduler data structures use arrays of size 16, so this
    would cause us to access them out of bounds.
    
    While the debugger system routine may need m0 and m1, we don't use it
    today, so the simplest solution is just to move base_mrf back to 1.
    That way, our worst case message fits in m1..m15, which is legal.
    
    An alternative would be to fail on SIMD16 in this case, but that seems
    a bit unfortunate if there's no real need to reserve m0 and m1.
    
    Fixes new piglit test shaders/depth-test-and-write on Ironlake,
    as well as gzdoom.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=48218
    Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
    Reviewed-by: Eric Anholt <eric at anholt.net>
    (cherry picked from commit aa429ea73c0931d5cfa2c263fe005ead8dc32ddf)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 0632052..cec1e95 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1921,7 +1921,10 @@ fs_visitor::emit_fb_writes()
 {
    this->current_annotation = "FB write header";
    bool header_present = true;
-   int base_mrf = 2;
+   /* We can potentially have a message length of up to 15, so we have to set
+    * base_mrf to either 0 or 1 in order to fit in m0..m15.
+    */
+   int base_mrf = 1;
    int nr = base_mrf;
    int reg_width = c->dispatch_width / 8;
 
commit 69d8a25d429bccf960e98e5c126e1ef2ae4ffe9d
Author: Jeremy Huddleston <jeremyhu at apple.com>
Date:   Mon Apr 23 16:43:22 2012 -0700

    darwin: Eliminate a pthread mutex leak
    
    Signed-off-by: Jeremy Huddleston <jeremyhu at apple.com>
    Tested-by: Charles Davis <cdavis at mines.edu>
    (cherry picked from commit 1a33c1b2b895566299ec76643659adacc239a3dc)

diff --git a/src/glx/apple/apple_glx_drawable.c b/src/glx/apple/apple_glx_drawable.c
index 5530224..db28302 100644
--- a/src/glx/apple/apple_glx_drawable.c
+++ b/src/glx/apple/apple_glx_drawable.c
@@ -135,6 +135,7 @@ release_drawable(struct apple_glx_drawable *d)
 static bool
 destroy_drawable(struct apple_glx_drawable *d)
 {
+   int err;
 
    d->lock(d);
 
@@ -172,6 +173,12 @@ destroy_drawable(struct apple_glx_drawable *d)
 
    apple_glx_diagnostic("%s: freeing %p\n", __func__, (void *) d);
 
+   err = pthread_mutex_destroy(&d->mutex);
+   if (err) {
+      fprintf(stderr, "pthread_mutex_destroy error: %s\n", strerror(err));
+      abort();
+   }
+   
    free(d);
 
    /* So that the locks are balanced and the caller correctly unlocks. */
commit 6095a17534c2694760300701fee59a320950f271
Author: Jonas Maebe <jonas.maebe at elis.ugent.be>
Date:   Mon Apr 23 16:02:16 2012 -0700

    apple: Fix a use after free
    
    Reviewed-by: Jeremy Huddleston <jeremyhu at apple.com>
    (cherry picked from commit c60ffd2840036af1ea6f2b6c6e1e9014bb8e2c34)

diff --git a/src/glx/apple/apple_glx_surface.c b/src/glx/apple/apple_glx_surface.c
index 39f5130..d42fa3b 100644
--- a/src/glx/apple/apple_glx_surface.c
+++ b/src/glx/apple/apple_glx_surface.c
@@ -206,6 +206,10 @@ apple_glx_surface_destroy(unsigned int uid)
    if (d) {
       d->types.surface.pending_destroy = true;
       d->release(d);
+
+      /* apple_glx_drawable_find_by_uid returns a locked drawable */
+      d->unlock(d);
+
       /* 
        * We release 2 references to the surface.  One was acquired by
        * the find, and the other was leftover from a context, or 
@@ -217,7 +221,5 @@ apple_glx_surface_destroy(unsigned int uid)
        * by a glViewport callback (see apple_glx_context_update()).
        */
       d->destroy(d);
-
-      d->unlock(d);
    }
 }