From 527b4ec9895c457fed3387c3342433a4502fea94 Mon Sep 17 00:00:00 2001
From: ryang <decatf@gmail.com>
Date: Thu, 9 Aug 2018 18:46:09 -0400
Subject: [PATCH 5/5] grate-driver

# Conflicts:
#	configure.ac
---
 configure.ac                                       |  15 +-
 meson.build                                        |   5 +
 src/gallium/Makefile.am                            |   5 +
 .../auxiliary/pipe-loader/pipe_loader_drm.c        |   5 +
 src/gallium/auxiliary/target-helpers/drm_helper.h  |  22 +
 .../auxiliary/target-helpers/drm_helper_public.h   |   3 +
 src/gallium/drivers/grate/Automake.inc             |  11 +
 src/gallium/drivers/grate/Makefile.am              |  43 ++
 src/gallium/drivers/grate/class_ids.h              |  37 ++
 src/gallium/drivers/grate/grate_common.h           |  18 +
 src/gallium/drivers/grate/grate_compiler.h         |  38 ++
 src/gallium/drivers/grate/grate_compiler_fp.c      | 241 ++++++++
 src/gallium/drivers/grate/grate_compiler_vpe.c     | 336 +++++++++++
 src/gallium/drivers/grate/grate_context.c          | 314 ++++++++++
 src/gallium/drivers/grate/grate_context.h          |  63 ++
 src/gallium/drivers/grate/grate_draw.c             | 125 ++++
 src/gallium/drivers/grate/grate_draw.h             |   7 +
 src/gallium/drivers/grate/grate_fp_ir.c            | 196 +++++++
 src/gallium/drivers/grate/grate_fp_ir.h            | 168 ++++++
 src/gallium/drivers/grate/grate_fp_vliw.h          | 174 ++++++
 src/gallium/drivers/grate/grate_program.c          | 219 +++++++
 src/gallium/drivers/grate/grate_program.h          |  29 +
 src/gallium/drivers/grate/grate_resource.c         | 511 +++++++++++++++++
 src/gallium/drivers/grate/grate_resource.h         |  30 +
 src/gallium/drivers/grate/grate_screen.c           | 609 ++++++++++++++++++++
 src/gallium/drivers/grate/grate_screen.h           |  31 +
 src/gallium/drivers/grate/grate_state.c            | 634 +++++++++++++++++++++
 src/gallium/drivers/grate/grate_state.h            |  59 ++
 src/gallium/drivers/grate/grate_stream.c           | 380 ++++++++++++
 src/gallium/drivers/grate/grate_stream.h           | 102 ++++
 src/gallium/drivers/grate/grate_surface.c          |  45 ++
 src/gallium/drivers/grate/grate_surface.h          |  14 +
 src/gallium/drivers/grate/grate_vpe_ir.c           | 211 +++++++
 src/gallium/drivers/grate/grate_vpe_ir.h           | 115 ++++
 src/gallium/drivers/grate/host1x01_hardware.h      | 136 +++++
 src/gallium/drivers/grate/hw_host1x01_uclass.h     | 159 ++++++
 src/gallium/drivers/grate/meson.build              |  68 +++
 src/gallium/drivers/grate/tgr_3d.xml.h             | 451 +++++++++++++++
 src/gallium/meson.build                            |   6 +
 src/gallium/targets/dri/Makefile.am                |   2 +
 src/gallium/targets/dri/dri.sym                    |   1 +
 src/gallium/targets/dri/meson.build                |   6 +-
 src/gallium/targets/dri/target.c                   |   4 +
 src/gallium/winsys/tegra/drm/Makefile.am           |  33 ++
 src/gallium/winsys/tegra/drm/meson.build           |  30 +
 src/gallium/winsys/tegra/drm/tegra_drm_public.h    |   8 +
 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c    |  17 +
 47 files changed, 5731 insertions(+), 5 deletions(-)
 create mode 100644 src/gallium/drivers/grate/Automake.inc
 create mode 100644 src/gallium/drivers/grate/Makefile.am
 create mode 100644 src/gallium/drivers/grate/class_ids.h
 create mode 100644 src/gallium/drivers/grate/grate_common.h
 create mode 100644 src/gallium/drivers/grate/grate_compiler.h
 create mode 100644 src/gallium/drivers/grate/grate_compiler_fp.c
 create mode 100644 src/gallium/drivers/grate/grate_compiler_vpe.c
 create mode 100644 src/gallium/drivers/grate/grate_context.c
 create mode 100644 src/gallium/drivers/grate/grate_context.h
 create mode 100644 src/gallium/drivers/grate/grate_draw.c
 create mode 100644 src/gallium/drivers/grate/grate_draw.h
 create mode 100644 src/gallium/drivers/grate/grate_fp_ir.c
 create mode 100644 src/gallium/drivers/grate/grate_fp_ir.h
 create mode 100644 src/gallium/drivers/grate/grate_fp_vliw.h
 create mode 100644 src/gallium/drivers/grate/grate_program.c
 create mode 100644 src/gallium/drivers/grate/grate_program.h
 create mode 100644 src/gallium/drivers/grate/grate_resource.c
 create mode 100644 src/gallium/drivers/grate/grate_resource.h
 create mode 100755 src/gallium/drivers/grate/grate_screen.c
 create mode 100644 src/gallium/drivers/grate/grate_screen.h
 create mode 100644 src/gallium/drivers/grate/grate_state.c
 create mode 100644 src/gallium/drivers/grate/grate_state.h
 create mode 100644 src/gallium/drivers/grate/grate_stream.c
 create mode 100644 src/gallium/drivers/grate/grate_stream.h
 create mode 100644 src/gallium/drivers/grate/grate_surface.c
 create mode 100644 src/gallium/drivers/grate/grate_surface.h
 create mode 100644 src/gallium/drivers/grate/grate_vpe_ir.c
 create mode 100644 src/gallium/drivers/grate/grate_vpe_ir.h
 create mode 100644 src/gallium/drivers/grate/host1x01_hardware.h
 create mode 100644 src/gallium/drivers/grate/hw_host1x01_uclass.h
 create mode 100644 src/gallium/drivers/grate/meson.build
 create mode 100644 src/gallium/drivers/grate/tgr_3d.xml.h
 create mode 100644 src/gallium/winsys/tegra/drm/Makefile.am
 create mode 100644 src/gallium/winsys/tegra/drm/meson.build
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_public.h
 create mode 100644 src/gallium/winsys/tegra/drm/tegra_drm_winsys.c

diff --git a/configure.ac b/configure.ac
index 3141348..be3b547 100644
--- a/configure.ac
+++ b/configure.ac
@@ -80,6 +80,7 @@ LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
 LIBDRM_FREEDRENO_REQUIRED=2.4.91
 LIBDRM_ETNAVIV_REQUIRED=2.4.89
+LIBDRM_TEGRA_REQUIRED=2.4.81
 
 dnl Versions for external dependencies
 DRI2PROTO_REQUIRED=2.8
@@ -1359,7 +1360,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
     [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
         [comma delimited Gallium drivers list, e.g.
-        "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,vc5,virgl,etnaviv,imx"
+        "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,vc5,virgl,etnaviv,imx,grate"
         @<:@default=r300,r600,svga,swrast@:>@])],
     [with_gallium_drivers="$withval"],
     [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -2735,6 +2736,11 @@ if test -n "$with_gallium_drivers"; then
                 require_basic_egl "virgl"
             fi
             ;;
+        xgrate)
+            HAVE_GALLIUM_GRATE=yes
+            PKG_CHECK_MODULES([GRATE], [libdrm_tegra >= $LIBDRM_TEGRA_REQUIRED])
+            require_libdrm "tegra"
+            ;;
         *)
             AC_MSG_ERROR([Unknown Gallium driver: $driver])
             ;;
@@ -2874,6 +2880,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_SWRAST, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes -o \
 AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_VC5, test "x$HAVE_GALLIUM_VC5" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_GRATE, test "x$HAVE_GALLIUM_GRATE" = xyes)
 
 AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)
 
@@ -3003,6 +3010,7 @@ AC_CONFIG_FILES([Makefile
                  src/gallium/auxiliary/Makefile
                  src/gallium/auxiliary/pipe-loader/Makefile
                  src/gallium/drivers/freedreno/Makefile
+                 src/gallium/drivers/grate/Makefile
                  src/gallium/drivers/i915/Makefile
                  src/gallium/drivers/llvmpipe/Makefile
                  src/gallium/drivers/nouveau/Makefile
@@ -3048,14 +3056,15 @@ AC_CONFIG_FILES([Makefile
                  src/gallium/targets/xvmc/Makefile
                  src/gallium/tests/trivial/Makefile
                  src/gallium/tests/unit/Makefile
+                 src/gallium/winsys/amdgpu/drm/Makefile
                  src/gallium/winsys/etnaviv/drm/Makefile
-                 src/gallium/winsys/imx/drm/Makefile
                  src/gallium/winsys/freedreno/drm/Makefile
+                 src/gallium/winsys/tegra/drm/Makefile
                  src/gallium/winsys/i915/drm/Makefile
+                 src/gallium/winsys/imx/drm/Makefile
                  src/gallium/winsys/nouveau/drm/Makefile
                  src/gallium/winsys/pl111/drm/Makefile
                  src/gallium/winsys/radeon/drm/Makefile
-                 src/gallium/winsys/amdgpu/drm/Makefile
                  src/gallium/winsys/svga/drm/Makefile
                  src/gallium/winsys/sw/dri/Makefile
                  src/gallium/winsys/sw/kms-dri/Makefile
diff --git a/meson.build b/meson.build
index 2288875..8b796ba 100644
--- a/meson.build
+++ b/meson.build
@@ -137,6 +137,7 @@ with_gallium_r600 = false
 with_gallium_nouveau = false
 with_gallium_freedreno = false
 with_gallium_softpipe = false
+with_gallium_grate = false
 with_gallium_vc4 = false
 with_gallium_vc5 = false
 with_gallium_etnaviv = false
@@ -171,6 +172,7 @@ if _drivers != ''
   with_gallium_nouveau = _split.contains('nouveau')
   with_gallium_freedreno = _split.contains('freedreno')
   with_gallium_softpipe = _split.contains('swrast')
+  with_gallium_grate = _split.contains('grate')
   with_gallium_vc4 = _split.contains('vc4')
   with_gallium_vc5 = _split.contains('vc5')
   with_gallium_etnaviv = _split.contains('etnaviv')
@@ -1028,6 +1030,7 @@ dep_libdrm_nouveau = null_dep
 dep_libdrm_etnaviv = null_dep
 dep_libdrm_freedreno = null_dep
 dep_libdrm_intel = null_dep
+dep_libdrm_tegra = null_dep
 
 _drm_amdgpu_ver = '2.4.91'
 _drm_radeon_ver = '2.4.71'
@@ -1035,6 +1038,7 @@ _drm_nouveau_ver = '2.4.66'
 _drm_etnaviv_ver = '2.4.89'
 _drm_freedreno_ver = '2.4.91'
 _drm_intel_ver = '2.4.75'
+_drm_tegra_ver = '2.4.81'
 _drm_ver = '2.4.75'
 
 _libdrm_checks = [
@@ -1045,6 +1049,7 @@ _libdrm_checks = [
   ['nouveau', (with_gallium_nouveau or with_dri_nouveau)],
   ['etnaviv', with_gallium_etnaviv],
   ['freedreno', with_gallium_freedreno],
+  ['tegra', with_gallium_tegra],
 ]
 
 # Loop over the enables versions and get the highest libdrm requirement for all
diff --git a/src/gallium/Makefile.am b/src/gallium/Makefile.am
index cf2fe42..dac78ac 100644
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -78,6 +78,11 @@ if HAVE_GALLIUM_SWR
 SUBDIRS += drivers/swr
 endif
 
+## tegra
+if HAVE_GALLIUM_GRATE
+SUBDIRS += drivers/grate winsys/tegra/drm
+endif
+
 ## vc4/rpi
 if HAVE_GALLIUM_VC4
 SUBDIRS += drivers/vc4 winsys/vc4/drm
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index c6c6b13..57dec10 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -111,6 +111,11 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
         .configuration = pipe_default_configuration_query,
     },
     {
+        .driver_name = "tegra",
+        .create_screen = pipe_grate_create_screen,
+        .configuration = pipe_default_configuration_query,
+    },
+    {
         .driver_name = "virtio_gpu",
         .create_screen = pipe_virgl_create_screen,
         .configuration = pipe_default_configuration_query,
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h b/src/gallium/auxiliary/target-helpers/drm_helper.h
index 7aea83b..ca20ac7 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -263,6 +263,28 @@ pipe_freedreno_create_screen(int fd, const struct pipe_screen_config *config)
 
 #endif
 
+#ifdef GALLIUM_GRATE
+#include "tegra/drm/tegra_drm_public.h"
+
+struct pipe_screen *
+pipe_grate_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   struct pipe_screen *screen;
+   screen = tegra_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_grate_create_screen(int fd, const struct pipe_screen_config *config)
+{
+   fprintf(stderr, "grate: driver missing\n");
+   return NULL;
+}
+
+#endif
+
 #ifdef GALLIUM_VIRGL
 #include "virgl/drm/virgl_drm_public.h"
 #include "virgl/virgl_public.h"
diff --git a/src/gallium/auxiliary/target-helpers/drm_helper_public.h b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
index e21ea32..0eb6c17 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@@ -37,6 +37,9 @@ struct pipe_screen *
 pipe_virgl_create_screen(int fd, const struct pipe_screen_config *config);
 
 struct pipe_screen *
+pipe_grate_create_screen(int fd, const struct pipe_screen_config *config);
+
+struct pipe_screen *
 pipe_vc4_create_screen(int fd, const struct pipe_screen_config *config);
 
 struct pipe_screen *
diff --git a/src/gallium/drivers/grate/Automake.inc b/src/gallium/drivers/grate/Automake.inc
new file mode 100644
index 0000000..c6070a2
--- /dev/null
+++ b/src/gallium/drivers/grate/Automake.inc
@@ -0,0 +1,11 @@
+if HAVE_GALLIUM_GRATE
+
+TARGET_DRIVERS += tegra
+TARGET_CPPFLAGS += -DGALLIUM_GRATE
+TARGET_LIB_DEPS += \
+	$(top_builddir)/src/gallium/winsys/tegra/drm/libtegradrm.la \
+	$(top_builddir)/src/gallium/drivers/grate/libgrate.la \
+	$(GRATE_LIBS) \
+	$(LIBDRM_LIBS)
+
+endif
diff --git a/src/gallium/drivers/grate/Makefile.am b/src/gallium/drivers/grate/Makefile.am
new file mode 100644
index 0000000..08ac201
--- /dev/null
+++ b/src/gallium/drivers/grate/Makefile.am
@@ -0,0 +1,43 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+noinst_LTLIBRARIES = libgrate.la
+
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/gallium/drivers/grate/include \
+	-I$(top_srcdir)/src/gallium/drivers \
+	-I$(top_srcdir)/include
+
+AM_CFLAGS = \
+	$(GALLIUM_CFLAGS) \
+	$(LIBDRM_CFLAGS)
+
+libgrate_la_SOURCES = \
+	class_ids.h \
+	host1x01_hardware.h \
+	hw_host1x01_uclass.h \
+	grate_common.h \
+	grate_compiler_fp.c \
+	grate_compiler_vpe.c \
+	grate_compiler.h \
+	grate_context.c \
+	grate_context.h \
+	grate_draw.c \
+	grate_draw.h \
+	grate_fence.h \
+	grate_fp_ir.c \
+	grate_fp_ir.h \
+	grate_fp_vliw.h \
+	grate_program.c \
+	grate_program.h \
+	grate_resource.c \
+	grate_resource.h \
+	grate_screen.c \
+	grate_screen.h \
+	grate_state.c \
+	grate_state.h \
+	grate_stream.c \
+	grate_stream.h \
+	grate_surface.c \
+	grate_surface.h \
+	grate_vpe_ir.c \
+	grate_vpe_ir.h
diff --git a/src/gallium/drivers/grate/class_ids.h b/src/gallium/drivers/grate/class_ids.h
new file mode 100644
index 0000000..4efda30
--- /dev/null
+++ b/src/gallium/drivers/grate/class_ids.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Dmitry Osipenko <digetx@gmail.com>
+ * Copyright (C) 2012-2013 NVIDIA Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Arto Merilainen <amerilainen@nvidia.com>
+ */
+
+#ifndef CLASS_IDS_H_
+#define CLASS_IDS_H_
+
+enum host1x_class {
+        HOST1X_CLASS_GR2D    = 0x51,
+        HOST1X_CLASS_GR2D_SB = 0x52,
+        HOST1X_CLASS_GR3D    = 0x60,
+};
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_common.h b/src/gallium/drivers/grate/grate_common.h
new file mode 100644
index 0000000..d4b2e07
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_common.h
@@ -0,0 +1,18 @@
+#ifndef GRATE_COMMON_H
+#define GRATE_COMMON_H
+
+#include "grate_screen.h"
+
+#define unimplemented() do { \
+   if (grate_debug & GRATE_DEBUG_UNIMPLEMENTED) \
+      printf("TODO: %s()\n", __func__); \
+} while (0)
+
+#define TGR3D_VAL(reg_name, field_name, value) \
+   (((value) << TGR3D_ ## reg_name ## _ ## field_name ## __SHIFT) & \
+           TGR3D_ ## reg_name ## _ ## field_name ## __MASK)
+
+#define TGR3D_BOOL(reg_name, field_name, boolean) \
+   ((boolean) ? TGR3D_ ## reg_name ## _ ## field_name : 0)
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_compiler.h b/src/gallium/drivers/grate/grate_compiler.h
new file mode 100644
index 0000000..068147d
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_compiler.h
@@ -0,0 +1,38 @@
+#ifndef GRATE_COMPILER_H
+#define GRATE_COMPILER_H
+
+#include "util/list.h"
+
+#include <stdint.h>
+
+struct tgsi_parse_context;
+
+struct grate_vpe_shader {
+   struct list_head instructions;
+   uint16_t output_mask;
+};
+
+struct grate_fp_info {
+   struct {
+      uint32_t src;
+      uint32_t dst;
+   } inputs[16];
+   int num_inputs;
+   int color_input;
+   int max_tram_row;
+};
+
+struct grate_fp_shader {
+   struct list_head fp_instructions;
+   struct list_head alu_instructions;
+   struct list_head mfu_instructions;
+   struct grate_fp_info info;
+};
+
+void
+grate_tgsi_to_vpe(struct grate_vpe_shader *vpe, struct tgsi_parse_context *tgsi);
+
+void
+grate_tgsi_to_fp(struct grate_fp_shader *fp, struct tgsi_parse_context *tgsi);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_compiler_fp.c b/src/gallium/drivers/grate/grate_compiler_fp.c
new file mode 100644
index 0000000..9b0da15
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_compiler_fp.c
@@ -0,0 +1,241 @@
+#include "grate_compiler.h"
+#include "grate_fp_ir.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "util/u_memory.h"
+
+static struct fp_alu_src_operand
+fp_alu_src_row(int index)
+{
+   assert(index >= 0 && index < 16);
+   struct fp_alu_src_operand src = {
+      .index = index
+   };
+   return src;
+}
+
+static struct fp_alu_src_operand
+fp_alu_src_reg(int index)
+{
+   assert(index >= 0 && index < 8);
+   struct fp_alu_src_operand src = {
+      .index = 16 + index
+   };
+   return src;
+}
+
+static struct fp_alu_src_operand
+fp_alu_src_zero()
+{
+   struct fp_alu_src_operand src = {
+      .index = 31,
+      .datatype = FP_DATATYPE_FIXED10,
+      .sub_reg_select_high = 0
+   };
+   return src;
+}
+
+static struct fp_alu_src_operand
+fp_alu_src_one()
+{
+   struct fp_alu_src_operand src = {
+      .index = 31,
+      .datatype = FP_DATATYPE_FIXED10,
+      .sub_reg_select_high = 1
+   };
+   return src;
+}
+
+static struct fp_alu_instr
+fp_alu_sMOV(struct fp_alu_dst_operand dst, struct fp_alu_src_operand src)
+{
+   struct fp_alu_instr ret = {
+      .op = FP_ALU_OP_MAD,
+      .dst = dst,
+      .src = {
+         src,
+         fp_alu_src_one(),
+         fp_alu_src_zero(),
+         fp_alu_src_one()
+      }
+   };
+   return ret;
+}
+
+static struct fp_alu_dst_operand
+fp_alu_dst(const struct tgsi_dst_register *dst, int subreg, bool saturate)
+{
+   struct fp_alu_dst_operand ret = { 0 };
+
+   ret.index = dst->Index;
+   if (dst->File == TGSI_FILE_OUTPUT) {
+      ret.index = 2; // HACK: r2+r3 to match hard-coded store shader for now
+
+      // fixed10
+      // swizzle RGBA -> BGRA
+      int o = subreg < 3 ? (2 - subreg) : 3;
+      ret.index += o / 2;
+      ret.write_low_sub_reg = (o % 2) == 0;
+      ret.write_high_sub_reg = (o % 2) != 0;
+   } else
+      ret.index += subreg;
+
+   ret.saturate = saturate;
+
+   return ret;
+}
+
+static void
+emit_vMOV(struct grate_fp_shader *fp, const struct tgsi_dst_register *dst,
+          bool saturate, const struct tgsi_src_register *src)
+{
+   struct fp_instr *inst = CALLOC_STRUCT(fp_instr);
+   list_inithead(&inst->link);
+
+   struct fp_mfu_instr *mfu = NULL;
+   if (src->File == TGSI_FILE_INPUT) {
+      mfu = CALLOC_STRUCT(fp_mfu_instr);
+      list_inithead(&mfu->link);
+   }
+
+   int swizzle[] = {
+      src->SwizzleX,
+      src->SwizzleY,
+      src->SwizzleZ,
+      src->SwizzleW
+   };
+
+   struct fp_alu_instr_packet *alu = CALLOC_STRUCT(fp_alu_instr_packet);
+   int alu_instrs = 0;
+   list_inithead(&alu->link);
+   for (int i = 0; i < 4; ++i) {
+      if ((dst->WriteMask & (1 << i)) == 0)
+         continue;
+
+      int comp = swizzle[i];
+
+      struct fp_alu_src_operand src0 = { };
+      if (src->File == TGSI_FILE_INPUT) {
+         mfu->var[i].op = FP_VAR_OP_FP20;
+         mfu->var[i].tram_row = src->Index;
+         fp->info.max_tram_row = MAX2(fp->info.max_tram_row, src->Index);
+         src0 = fp_alu_src_row(comp);
+      } else
+         src0 = fp_alu_src_reg(src->Index + comp);
+
+      alu->slots[alu_instrs++] = fp_alu_sMOV(fp_alu_dst(dst, i, saturate), src0);
+   }
+   inst->alu_sched.num_instructions = 1;
+   inst->alu_sched.address = list_length(&fp->fp_instructions);
+
+   if (mfu != NULL) {
+      inst->mfu_sched.num_instructions = 1;
+      inst->mfu_sched.address = list_length(&fp->fp_instructions);
+      list_addtail(&mfu->link, &fp->mfu_instructions);
+   }
+
+   if (dst->File == TGSI_FILE_OUTPUT) {
+      inst->dw.enable = 1;
+      inst->dw.index = 1 + dst->Index;
+      inst->dw.stencil_write = 0;
+      inst->dw.src_regs = FP_DW_REGS_R2_R3; // hard-coded for now
+   }
+
+   list_addtail(&alu->link, &fp->alu_instructions);
+   list_addtail(&inst->link, &fp->fp_instructions);
+}
+
+static void
+emit_tgsi_instr(struct grate_fp_shader *fp, const struct tgsi_full_instruction *inst)
+{
+   bool saturate = inst->Instruction.Saturate != 0;
+
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_MOV:
+      emit_vMOV(fp, &inst->Dst[0].Register, saturate,
+                    &inst->Src[0].Register);
+      break;
+
+   default:
+      unreachable("unsupported TGSI-opcode!");
+   }
+}
+
+#define LINK_SRC(index) ((index) << 3)
+#define LINK_DST(index, comp, type) (((comp) | (type) << 2) << ((index) * 4))
+#define LINK_DST_NONE      0
+#define LINK_DST_FX10_LOW  1
+#define LINK_DST_FX10_HIGH 2
+#define LINK_DST_FP20      3
+
+static void
+emit_tgsi_input(struct grate_fp_shader *fp, const struct tgsi_full_declaration *decl)
+{
+   assert(decl->Range.First == decl->Range.Last);
+
+   uint32_t src = LINK_SRC(1);
+   uint32_t dst = 0;
+   for (int i = 0; i < 4; ++i)
+      dst |= LINK_DST(i, i, LINK_DST_FP20);
+
+   fp->info.inputs[fp->info.num_inputs].src = src;
+   fp->info.inputs[fp->info.num_inputs].dst = dst;
+
+   if (decl->Declaration.Semantic == TGSI_SEMANTIC_COLOR)
+      fp->info.color_input = decl->Range.First;
+
+   fp->info.num_inputs++;
+}
+
+static void
+emit_tgsi_declaration(struct grate_fp_shader *fp, const struct tgsi_full_declaration *decl)
+{
+   switch (decl->Declaration.File) {
+   case TGSI_FILE_INPUT:
+      emit_tgsi_input(fp, decl);
+      break;
+   }
+}
+
+void
+grate_tgsi_to_fp(struct grate_fp_shader *fp, struct tgsi_parse_context *tgsi)
+{
+   list_inithead(&fp->fp_instructions);
+   list_inithead(&fp->alu_instructions);
+   list_inithead(&fp->mfu_instructions);
+
+   fp->info.num_inputs = 0;
+   fp->info.color_input = -1;
+   fp->info.max_tram_row = 1;
+
+   while (!tgsi_parse_end_of_tokens(tgsi)) {
+      tgsi_parse_token(tgsi);
+      switch (tgsi->FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         emit_tgsi_declaration(fp, &tgsi->FullToken.FullDeclaration);
+         break;
+
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (tgsi->FullToken.FullInstruction.Instruction.Opcode != TGSI_OPCODE_END)
+            emit_tgsi_instr(fp, &tgsi->FullToken.FullInstruction);
+         break;
+      }
+   }
+
+   /*
+    * HACK: insert barycentric interpolation setup
+    * This will overwrite instructions in some cases, need proper scheduler
+    * to fix properly
+    */
+    struct fp_mfu_instr *first = list_first_entry(&fp->mfu_instructions, struct fp_mfu_instr, link);
+    first->sfu.op = FP_SFU_OP_RCP;
+    first->sfu.reg = 4;
+    first->mul[0].dst = FP_MFU_MUL_DST_BARYCENTRIC_WEIGHT;
+    first->mul[0].src[0] = FP_MFU_MUL_SRC_SFU_RESULT;
+    first->mul[0].src[1] = FP_MFU_MUL_SRC_BARYCENTRIC_COEF_0;
+
+    first->mul[1].dst = FP_MFU_MUL_DST_BARYCENTRIC_WEIGHT;
+    first->mul[1].src[0] = FP_MFU_MUL_SRC_SFU_RESULT;
+    first->mul[1].src[1] = FP_MFU_MUL_SRC_BARYCENTRIC_COEF_1;
+}
diff --git a/src/gallium/drivers/grate/grate_compiler_vpe.c b/src/gallium/drivers/grate/grate_compiler_vpe.c
new file mode 100644
index 0000000..e7d61ef
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_compiler_vpe.c
@@ -0,0 +1,336 @@
+#include "grate_compiler.h"
+#include "grate_vpe_ir.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "util/u_memory.h"
+
+static struct vpe_src_operand
+src_undef()
+{
+   struct vpe_src_operand ret = {
+      .file = VPE_SRC_FILE_UNDEF,
+      .index = 0,
+      .swizzle = { VPE_SWZ_X, VPE_SWZ_Y, VPE_SWZ_Z, VPE_SWZ_W }
+   };
+   return ret;
+}
+
+static struct vpe_src_operand
+attrib(int index, const enum vpe_swz swizzle[4], bool negate, bool absolute)
+{
+   struct vpe_src_operand ret = {
+      .file = VPE_SRC_FILE_ATTRIB,
+      .index = index,
+      .negate = negate,
+      .absolute = absolute
+   };
+   memcpy(ret.swizzle, swizzle, sizeof(ret.swizzle));
+   return ret;
+}
+
+static struct vpe_src_operand
+uniform(int index, const enum vpe_swz swizzle[4], bool negate, bool absolute)
+{
+   struct vpe_src_operand ret = {
+      .file = VPE_SRC_FILE_UNIFORM,
+      .index = index,
+      .negate = negate,
+      .absolute = absolute
+   };
+   memcpy(ret.swizzle, swizzle, sizeof(ret.swizzle));
+   return ret;
+}
+
+static struct vpe_src_operand
+src_temp(int index, const enum vpe_swz swizzle[4], bool negate, bool absolute)
+{
+   struct vpe_src_operand ret = {
+      .file = VPE_SRC_FILE_TEMP,
+      .index = index,
+      .negate = negate,
+      .absolute = absolute
+   };
+   memcpy(ret.swizzle, swizzle, sizeof(ret.swizzle));
+   return ret;
+}
+
+static struct vpe_dst_operand
+dst_undef()
+{
+   struct vpe_dst_operand ret = {
+      .file = VPE_DST_FILE_UNDEF,
+      .index = 0,
+      .write_mask = 0,
+      .saturate = 0
+   };
+   return ret;
+}
+
+static struct vpe_dst_operand
+emit_output(struct grate_vpe_shader *vpe, int index,
+            unsigned int write_mask, bool saturate)
+{
+   vpe->output_mask |= 1 << index;
+   struct vpe_dst_operand ret = {
+      .file = VPE_DST_FILE_OUTPUT,
+      .index = index,
+      .write_mask = write_mask,
+      .saturate = saturate
+   };
+   return ret;
+}
+
+static struct vpe_dst_operand
+dst_temp(int index, unsigned int write_mask, bool saturate)
+{
+   struct vpe_dst_operand ret = {
+      .file = VPE_DST_FILE_TEMP,
+      .index = index,
+      .write_mask = write_mask,
+      .saturate = saturate
+   };
+   return ret;
+}
+
+static struct vpe_vec_instr
+emit_vec_unop(enum vpe_vec_op op, struct vpe_dst_operand dst,
+              struct vpe_src_operand src)
+{
+   struct vpe_vec_instr ret = {
+      .op = op,
+      .dst = dst,
+      .src = { src, src_undef(), src_undef() }
+   };
+   return ret;
+}
+
+static struct vpe_vec_instr
+emit_vec_binop(enum vpe_vec_op op, struct vpe_dst_operand dst,
+              struct vpe_src_operand src0, struct vpe_src_operand src1)
+{
+   struct vpe_vec_instr ret = {
+      .op = op,
+      .dst = dst,
+      .src = { src0, src1, src_undef() }
+   };
+   return ret;
+}
+
+static struct vpe_vec_instr
+emit_vNOP()
+{
+   struct vpe_vec_instr ret = {
+      .op = VPE_VEC_OP_NOP,
+      .dst = dst_undef(),
+      .src = { src_undef(), src_undef(), src_undef() }
+   };
+   return ret;
+}
+
+static struct vpe_vec_instr
+emit_vMOV(struct vpe_dst_operand dst, struct vpe_src_operand src)
+{
+   return emit_vec_unop(VPE_VEC_OP_MOV, dst, src);
+}
+
+static struct vpe_vec_instr
+emit_vADD(struct vpe_dst_operand dst, struct vpe_src_operand src0,
+          struct vpe_src_operand src2)
+{
+   struct vpe_vec_instr ret = {
+      .op = VPE_VEC_OP_ADD,
+      .dst = dst,
+      .src = { src0, src_undef(), src2 } // add is "strange" in that it takes src0 and src2
+   };
+   return ret;
+}
+
+#define GEN_V_BINOP(OP) \
+static struct vpe_vec_instr \
+emit_v ## OP (struct vpe_dst_operand dst, struct vpe_src_operand src0, \
+          struct vpe_src_operand src1) \
+{ \
+   return emit_vec_binop(VPE_VEC_OP_ ## OP, dst, src0, src1); \
+}
+
+GEN_V_BINOP(MUL)
+GEN_V_BINOP(DP3)
+GEN_V_BINOP(DP4)
+GEN_V_BINOP(SLT)
+GEN_V_BINOP(MAX)
+
+static struct vpe_vec_instr
+emit_vMAD(struct vpe_dst_operand dst, struct vpe_src_operand src0,
+          struct vpe_src_operand src1, struct vpe_src_operand src2)
+{
+   struct vpe_vec_instr ret = {
+      .op = VPE_VEC_OP_MAD,
+      .dst = dst,
+      .src = { src0, src1, src2 }
+   };
+   return ret;
+}
+
+static struct vpe_scalar_instr
+emit_sNOP()
+{
+   struct vpe_scalar_instr ret = {
+      .op = VPE_SCALAR_OP_NOP,
+      .dst = dst_undef(),
+      .src = src_undef()
+   };
+   return ret;
+}
+
+#define GEN_S_UNOP(OP) \
+static struct vpe_scalar_instr \
+emit_s ## OP (struct vpe_dst_operand dst, struct vpe_src_operand src) \
+{ \
+   struct vpe_scalar_instr ret = { \
+      .op = VPE_SCALAR_OP_ ## OP, \
+      .dst = dst, \
+      .src = src \
+   }; \
+   return ret; \
+}
+
+GEN_S_UNOP(RSQ)
+
+static struct vpe_instr *
+emit_packed(struct vpe_vec_instr vec, struct vpe_scalar_instr scalar)
+{
+   struct vpe_instr *ret = CALLOC_STRUCT(vpe_instr);
+   list_inithead(&ret->link);
+   ret->vec = vec;
+   ret->scalar = scalar;
+   return ret;
+}
+
+static struct vpe_dst_operand
+tgsi_dst_to_vpe(struct grate_vpe_shader *vpe, const struct tgsi_dst_register *dst, bool saturate)
+{
+   switch (dst->File) {
+   case TGSI_FILE_OUTPUT:
+      return emit_output(vpe, dst->Index, dst->WriteMask, saturate);
+
+   case TGSI_FILE_TEMPORARY:
+      return dst_temp(dst->Index, dst->WriteMask, saturate);
+
+   default:
+      unreachable("unsupported output");
+   }
+}
+
+static struct vpe_src_operand
+tgsi_src_to_vpe(struct grate_vpe_shader *vpe, const struct tgsi_src_register *src)
+{
+   enum vpe_swz swizzle[4] = {
+      src->SwizzleX,
+      src->SwizzleY,
+      src->SwizzleZ,
+      src->SwizzleW
+   };
+   bool negate = src->Negate != 0;
+   bool absolute = src->Absolute != 0;
+
+   switch (src->File) {
+   case TGSI_FILE_INPUT:
+      return attrib(src->Index, swizzle, negate, absolute);
+
+   case TGSI_FILE_CONSTANT:
+      return uniform(src->Index, swizzle, negate, absolute);
+
+   case TGSI_FILE_TEMPORARY:
+      return src_temp(src->Index, swizzle, negate, absolute);
+
+   case TGSI_FILE_IMMEDIATE:
+      /* HACK: allocate uniforms from the top for immediates; need to actually record these */
+      return uniform(1023 - src->Index, swizzle, negate, absolute);
+
+   default:
+      unreachable("unsupported input!");
+   }
+}
+
+static struct vpe_instr *
+tgsi_to_vpe(struct grate_vpe_shader *vpe, const struct tgsi_full_instruction *inst)
+{
+   bool saturate = inst->Instruction.Saturate != 0;
+   switch (inst->Instruction.Opcode) {
+   case TGSI_OPCODE_MOV:
+      return emit_packed(emit_vMOV(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_ADD:
+      return emit_packed(emit_vADD(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_MUL:
+      return emit_packed(emit_vMUL(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_DP3:
+      return emit_packed(emit_vDP3(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_DP4:
+      return emit_packed(emit_vDP4(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_SLT:
+      return emit_packed(emit_vSLT(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_MAX:
+      return emit_packed(emit_vMAX(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_MAD:
+      return emit_packed(emit_vMAD(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[1].Register),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[2].Register)),
+                         emit_sNOP());
+
+   case TGSI_OPCODE_RSQ:
+      return emit_packed(emit_vNOP(),
+                         emit_sRSQ(tgsi_dst_to_vpe(vpe, &inst->Dst[0].Register, saturate),
+                                   tgsi_src_to_vpe(vpe, &inst->Src[0].Register)));
+
+   default:
+      unreachable("unsupported TGSI-opcode!");
+   }
+}
+
+void
+grate_tgsi_to_vpe(struct grate_vpe_shader *vpe, struct tgsi_parse_context *tgsi)
+{
+   list_inithead(&vpe->instructions);
+   vpe->output_mask = 0;
+
+   while (!tgsi_parse_end_of_tokens(tgsi)) {
+      tgsi_parse_token(tgsi);
+      switch (tgsi->FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (tgsi->FullToken.FullInstruction.Instruction.Opcode != TGSI_OPCODE_END) {
+            struct vpe_instr *instr = tgsi_to_vpe(vpe, &tgsi->FullToken.FullInstruction);
+            list_addtail(&instr->link, &vpe->instructions);
+         }
+         break;
+      }
+   }
+}
diff --git a/src/gallium/drivers/grate/grate_context.c b/src/gallium/drivers/grate/grate_context.c
new file mode 100644
index 0000000..420724a
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_context.c
@@ -0,0 +1,314 @@
+#include <errno.h>
+#include <stdio.h>
+#include <math.h>
+
+#include "util/u_bitcast.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+
+#include "indices/u_primconvert.h"
+
+#include "grate_common.h"
+#include "grate_context.h"
+#include "grate_draw.h"
+#include "grate_program.h"
+#include "grate_resource.h"
+#include "grate_screen.h"
+#include "grate_state.h"
+#include "grate_surface.h"
+
+#include "host1x01_hardware.h"
+#include "tgr_3d.xml.h"
+
+static int
+init(struct grate_stream *stream)
+{
+   int err = grate_stream_begin(stream);
+   if (err < 0) {
+      fprintf(stderr, "grate_stream_begin() failed: %d\n", err);
+      return err;
+   }
+
+   grate_stream_push_setclass(stream, HOST1X_CLASS_GR3D);
+
+   /* Tegra30 specific stuff */
+   grate_stream_push(stream, host1x_opcode_incr(0x750, 16));
+   for (int i = 0; i < 16; i++)
+      grate_stream_push(stream, 0x00000000);
+
+   grate_stream_push(stream, host1x_opcode_imm(0x907, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x908, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x909, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x90a, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x90b, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb00, 0x3));
+   grate_stream_push(stream, host1x_opcode_imm(0xb01, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb04, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb06, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb07, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb08, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb09, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb0a, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb0b, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb0c, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb0d, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb0e, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb0f, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb10, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb11, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb12, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xb14, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe40, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe41, 0));
+
+   /* Common stuff */
+   grate_stream_push(stream, host1x_opcode_imm(0x00d, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x00e, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x00f, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x010, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x011, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x012, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x013, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x014, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x015, 0));
+
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_VP_ATTRIB_IN_OUT_SELECT, 0));
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_DRAW_PARAMS, 0));
+
+   grate_stream_push(stream, host1x_opcode_imm(0x124, 0x7));
+   grate_stream_push(stream, host1x_opcode_imm(0x125, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x126, 0));
+
+   grate_stream_push(stream, host1x_opcode_incr(0x200, 5));
+   grate_stream_push(stream, 0x00000011);
+   grate_stream_push(stream, 0x0000ffff);
+   grate_stream_push(stream, 0x00ff0000);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+
+   grate_stream_push(stream, host1x_opcode_imm(0x209, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x20a, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x20b, 0x3));
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_LINKER_INSTRUCTION(0), 0));
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_LINKER_INSTRUCTION(1), 0));
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_CULL_FACE_LINKER_SETUP, 25));
+   grate_stream_push(stream, 0xb8e00000); /* TGR3D_CULL_FACE_LINKER_SETUP */
+   grate_stream_push(stream, 0x00000000); /* TGR3D_POLYGON_OFFSET_UNITS */
+   grate_stream_push(stream, 0x00000000); /* TGR3D_POLYGON_OFFSET_FACTOR */
+   grate_stream_push(stream, 0x00000105); /* TGR3D_POINT_PARAMS */
+   grate_stream_push(stream, u_bitcast_f2u(0.5f)); /* TGR3D_POINT_SIZE */
+   grate_stream_push(stream, u_bitcast_f2u(1.0f)); /* TGR3D_POIN_COORD_RANGE_MAX_S */
+   grate_stream_push(stream, u_bitcast_f2u(1.0f)); /* TGR3D_POIN_COORD_RANGE_MAX_T */
+   grate_stream_push(stream, u_bitcast_f2u(0.0f)); /* TGR3D_POIN_COORD_RANGE_MIN_S */
+   grate_stream_push(stream, u_bitcast_f2u(0.0f)); /* TGR3D_POIN_COORD_RANGE_MIN_T */
+   grate_stream_push(stream, 0x00000000); /* TGR2D_LINE_PARAMS */
+   grate_stream_push(stream, u_bitcast_f2u(0.5f)); /* TGR3D_HALF_LINE_WIDTH */
+   grate_stream_push(stream, u_bitcast_f2u(1.0f)); /* 0x34e - unknonwn */
+   grate_stream_push(stream, 0x00000000); /* 0x34f - unknown */
+   grate_stream_push(stream, 0x00000000); /* TGR3D_SCISSOR_HORIZ */
+   grate_stream_push(stream, 0x00000000); /* TGR3D_SCISSOR_VERT */
+   grate_stream_push(stream, u_bitcast_f2u(0.0f)); /* TGR3D_VIEWPORT_X_BIAS */
+   grate_stream_push(stream, u_bitcast_f2u(0.0f)); /* TGR3D_VIEWPORT_Y_BIAS */
+   grate_stream_push(stream, u_bitcast_f2u(0.5f - powf(2.0, -21))); /* TGR3D_VIEWPORT_Z_BIAS */
+   grate_stream_push(stream, u_bitcast_f2u(0.0f)); /* TGR3D_VIEWPORT_X_SCALE */
+   grate_stream_push(stream, u_bitcast_f2u(0.0f)); /* TGR3D_VIEWPORT_Y_SCALE */
+   grate_stream_push(stream, u_bitcast_f2u(0.5f - powf(2.0, -21))); /* TGR3D_VIEWPORT_Z_SCALE */
+   grate_stream_push(stream, u_bitcast_f2u(1.0f)); /* TGR3D_GUARDBAND_WIDTH */
+   grate_stream_push(stream, u_bitcast_f2u(1.0f)); /* TGR3D_GUARDBAND_HEIGHT */
+   grate_stream_push(stream, u_bitcast_f2u(1.0f)); /* TGR3D_GUARDBAND_DEPTH */
+   grate_stream_push(stream, 0x00000205); /* 0x35b - unknown */
+
+   grate_stream_push(stream, host1x_opcode_imm(0x363, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x364, 0));
+
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_STENCIL_FRONT1, 0x07ff));
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_STENCIL_BACK1, 0x07ff));
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_STENCIL_PARAMS, 18));
+   grate_stream_push(stream, 0x00000040); /* TGR3D_STENCIL_PARAMS */
+   grate_stream_push(stream, 0x00000310); /* TGR3D_DEPTH_TEST_PARAMS*/
+   grate_stream_push(stream, 0x00000000); /* TGR3D_DEPTH_RANGE_NEAR */
+   grate_stream_push(stream, 0x000fffff); /* TGR3D_DEPTH_RANGE_FAR */
+   grate_stream_push(stream, 0x00000001); /* 0x406 - unknown */
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x1fff1fff);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000006);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000008);
+   grate_stream_push(stream, 0x00000048);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+
+   grate_stream_push(stream, host1x_opcode_imm(TGR3D_FP_PSEQ_UPLOAD_INST_BUFFER_FLUSH, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x501, 0x7));
+   grate_stream_push(stream, host1x_opcode_imm(0x502, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x503, 0));
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_FP_PSEQ_ENGINE_INST, 32));
+   for (int i = 0; i < 32; i++)
+      grate_stream_push(stream, 0);
+
+   grate_stream_push(stream, host1x_opcode_imm(0x540, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x542, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x543, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x544, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x545, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x546, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x60e, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x702, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x740, 0x1));
+   grate_stream_push(stream, host1x_opcode_imm(0x741, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x742, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x902, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0x903, 0));
+
+   grate_stream_push(stream, host1x_opcode_incr(0xa00, 13));
+   grate_stream_push(stream, 0x00000e00); /* TGR3D_FDC_CONTROL */
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x000001ff);
+   grate_stream_push(stream, 0x000001ff);
+   grate_stream_push(stream, 0x000001ff);
+   grate_stream_push(stream, 0x00000030);
+   grate_stream_push(stream, 0x00000020);
+   grate_stream_push(stream, 0x000001ff);
+   grate_stream_push(stream, 0x00000100);
+   grate_stream_push(stream, 0x0f0f0f0f);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+   grate_stream_push(stream, 0x00000000);
+
+   grate_stream_push(stream, host1x_opcode_imm(0xe20, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe21, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe22, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe25, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe26, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe27, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe28, 0));
+   grate_stream_push(stream, host1x_opcode_imm(0xe29, 0));
+
+   grate_stream_end(stream);
+   grate_stream_flush(stream);
+
+   return 0;
+}
+
+static int
+grate_channel_create(struct grate_context *context,
+                     enum drm_tegra_class class,
+                     struct grate_channel **channelp)
+{
+   struct grate_screen *screen = grate_screen(context->base.screen);
+   int err;
+   struct drm_tegra_channel *drm_channel;
+   struct grate_channel *channel;
+
+   err = drm_tegra_channel_open(&drm_channel, screen->drm, class);
+   if (err < 0)
+      return err;
+
+   channel = CALLOC_STRUCT(grate_channel);
+   if (!channel)
+      return -ENOMEM;
+
+   channel->context = context;
+
+   err = grate_stream_create(screen->drm, drm_channel, &channel->stream, 32768);
+   if (err < 0) {
+      FREE(channel);
+      drm_tegra_channel_close(drm_channel);
+      return err;
+   }
+
+   *channelp = channel;
+
+   return 0;
+}
+
+static void
+grate_channel_delete(struct grate_channel *channel)
+{
+   grate_stream_destroy(&channel->stream);
+   drm_tegra_channel_close(channel->stream.channel);
+   FREE(channel);
+}
+
+static void
+grate_context_destroy(struct pipe_context *pcontext)
+{
+   struct grate_context *context = grate_context(pcontext);
+
+   if (context->primconvert)
+      util_primconvert_destroy(context->primconvert);
+
+   slab_destroy_child(&context->transfer_pool);
+
+   grate_channel_delete(context->gr3d);
+   grate_channel_delete(context->gr2d);
+   FREE(context);
+}
+
+static void
+grate_context_flush(struct pipe_context *pcontext,
+                    struct pipe_fence_handle **pfence,
+                    enum pipe_flush_flags flags)
+{
+   unimplemented();
+}
+
+struct pipe_context *
+grate_screen_context_create(struct pipe_screen *pscreen,
+                            void *priv, unsigned flags)
+{
+   struct grate_screen *screen = grate_screen(pscreen);
+   int err;
+
+   struct grate_context *context = CALLOC_STRUCT(grate_context);
+   if (!context)
+      return NULL;
+
+   context->base.screen = pscreen;
+   context->base.priv = priv;
+
+   context->primconvert = util_primconvert_create(&context->base,
+                                                  (1 << PIPE_PRIM_QUADS) - 1);
+
+   err = grate_channel_create(context, DRM_TEGRA_GR2D, &context->gr2d);
+   if (err < 0) {
+      fprintf(stderr, "grate_channel_create() failed: %d\n", err);
+      return NULL;
+   }
+
+   err = grate_channel_create(context, DRM_TEGRA_GR3D, &context->gr3d);
+   if (err < 0) {
+      fprintf(stderr, "grate_channel_create() failed: %d\n", err);
+      return NULL;
+   }
+
+   init(&context->gr3d->stream);
+
+   slab_create_child(&context->transfer_pool, &screen->transfer_pool);
+
+   context->base.destroy = grate_context_destroy;
+   context->base.flush = grate_context_flush;
+   context->base.stream_uploader = u_upload_create_default(&context->base);
+   context->base.const_uploader = context->base.stream_uploader;
+
+   grate_context_resource_init(&context->base);
+   grate_context_surface_init(&context->base);
+   grate_context_state_init(&context->base);
+   grate_context_blend_init(&context->base);
+   grate_context_sampler_init(&context->base);
+   grate_context_rasterizer_init(&context->base);
+   grate_context_zsa_init(&context->base);
+   grate_context_program_init(&context->base);
+   grate_context_vbo_init(&context->base);
+   grate_context_draw_init(&context->base);
+
+   return &context->base;
+}
diff --git a/src/gallium/drivers/grate/grate_context.h b/src/gallium/drivers/grate/grate_context.h
new file mode 100644
index 0000000..b1bd69b
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_context.h
@@ -0,0 +1,63 @@
+#ifndef GRATE_CONTEXT_H
+#define GRATE_CONTEXT_H
+
+#include "util/slab.h"
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "grate_state.h"
+#include "grate_stream.h"
+
+struct primconvert_context;
+
+struct grate_framebuffer_state {
+   struct pipe_framebuffer_state base;
+   int num_rts;
+   struct drm_tegra_bo *bos[16];
+   uint32_t rt_params[16];
+   uint32_t mask;
+};
+
+struct grate_channel {
+   struct grate_context *context;
+   struct grate_stream stream;
+};
+
+struct grate_context {
+   struct pipe_context base;
+   struct primconvert_context *primconvert;
+
+   struct grate_channel *gr2d;
+   struct grate_channel *gr3d;
+
+   struct grate_framebuffer_state framebuffer;
+
+   struct slab_child_pool transfer_pool;
+
+   struct grate_vertex_state *vs;
+   struct grate_vertexbuf_state vbs;
+   struct pipe_constant_buffer constant_buffer[PIPE_SHADER_TYPES];
+
+   struct grate_zsa_state *zsa;
+   struct grate_rasterizer_state *rast;
+
+   struct grate_vertex_shader_state *vshader;
+   struct grate_fragment_shader_state *fshader;
+
+   uint32_t no_scissor[3];
+   uint32_t viewport[10];
+   uint32_t guardband[4];
+};
+
+static inline struct grate_context *
+grate_context(struct pipe_context *context)
+{
+   return (struct grate_context *)context;
+}
+
+struct pipe_context *
+grate_screen_context_create(struct pipe_screen *pscreen,
+                            void *priv, unsigned flags);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_draw.c b/src/gallium/drivers/grate/grate_draw.c
new file mode 100644
index 0000000..c2c4ab4
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_draw.c
@@ -0,0 +1,125 @@
+#include <stdio.h>
+
+#include "pipe/p_state.h"
+#include "util/u_helpers.h"
+#include "util/u_prim.h"
+#include "indices/u_primconvert.h"
+
+#include "grate_common.h"
+#include "grate_context.h"
+#include "grate_draw.h"
+#include "grate_program.h"
+#include "grate_resource.h"
+#include "grate_state.h"
+
+#include "tgr_3d.xml.h"
+#include "host1x01_hardware.h"
+
+static int
+grate_primitive_type(enum pipe_prim_type mode)
+{
+   switch (mode) {
+   case PIPE_PRIM_POINTS:
+      return TGR3D_PRIMITIVE_TYPE_POINTS;
+
+   case PIPE_PRIM_LINES:
+      return TGR3D_PRIMITIVE_TYPE_LINES;
+
+   case PIPE_PRIM_LINE_LOOP:
+      return TGR3D_PRIMITIVE_TYPE_LINE_LOOP;
+
+   case PIPE_PRIM_LINE_STRIP:
+      return TGR3D_PRIMITIVE_TYPE_LINE_STRIP;
+
+   case PIPE_PRIM_TRIANGLES:
+      return TGR3D_PRIMITIVE_TYPE_TRIANGLES;
+
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      return TGR3D_PRIMITIVE_TYPE_TRIANGLE_STRIP;
+
+   case PIPE_PRIM_TRIANGLE_FAN:
+      return TGR3D_PRIMITIVE_TYPE_TRIANGLE_FAN;
+
+   default:
+      unreachable("unexpected enum pipe_prim_type");
+   }
+}
+
+static void
+grate_draw_vbo(struct pipe_context *pcontext,
+               const struct pipe_draw_info *info)
+{
+   int err;
+   uint32_t value;
+   struct grate_context *context = grate_context(pcontext);
+   struct grate_stream *stream = &context->gr3d->stream;
+   uint16_t out_mask = context->vshader->output_mask;
+
+   if (info->mode >= PIPE_PRIM_QUADS) {
+      // the HW can handle non-trimmed sizes, but pimconvert can't
+      if (!u_trim_pipe_prim(info->mode, (unsigned *)&info->count))
+         return;
+
+      util_primconvert_save_rasterizer_state(context->primconvert, &context->rast->base);
+      util_primconvert_draw_vbo(context->primconvert, info);
+      return;
+   }
+
+   err = grate_stream_begin(stream);
+   if (err < 0) {
+      fprintf(stderr, "grate_stream_begin() failed: %d\n", err);
+      return;
+   }
+
+   grate_stream_push_setclass(stream, HOST1X_CLASS_GR3D);
+
+   grate_emit_state(context);
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_VP_ATTRIB_IN_OUT_SELECT, 1));
+   grate_stream_push(stream, ((uint32_t)context->vs->mask << 16) | out_mask);
+
+   struct pipe_resource *index_buffer = NULL;
+   unsigned offset = 0;
+   if (info->index_size > 0) {
+      unsigned index_offset = 0;
+      if (info->has_user_indices) {
+         if (!util_upload_index_buffer(pcontext, info, &index_buffer, &index_offset)) {
+            fprintf(stderr, "util_upload_index_buffer() failed\n");
+            return;
+         }
+      } else
+         index_buffer = info->index.resource;
+
+      index_offset += info->start * info->index_size;
+      grate_stream_push(stream, host1x_opcode_incr(TGR3D_INDEX_PTR, 1));
+      grate_stream_push_reloc(stream, grate_resource(index_buffer)->bo, index_offset);
+   } else
+      offset = info->start;
+
+   /* draw params */
+   assert(info->index_size >= 0 && info->index_size <= 2);
+   value  = TGR3D_VAL(DRAW_PARAMS, INDEX_MODE, info->index_size);
+   value |= context->rast->draw_params;
+   value |= TGR3D_VAL(DRAW_PARAMS, PRIMITIVE_TYPE, grate_primitive_type(info->mode));
+   value |= TGR3D_VAL(DRAW_PARAMS, FIRST, info->start);
+   value |= 0xC0000000; /* flush input caches? */
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_DRAW_PARAMS, 1));
+   grate_stream_push(stream, value);
+
+   assert(info->count > 0 && info->count < (1 << 11));
+   value  = TGR3D_VAL(DRAW_PRIMITIVES, INDEX_COUNT, info->count - 1);
+   value |= TGR3D_VAL(DRAW_PRIMITIVES, OFFSET, offset);
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_DRAW_PRIMITIVES, 1));
+   grate_stream_push(stream, value);
+
+   grate_stream_end(stream);
+
+   grate_stream_flush(stream);
+}
+
+void
+grate_context_draw_init(struct pipe_context *pcontext)
+{
+   pcontext->draw_vbo = grate_draw_vbo;
+}
diff --git a/src/gallium/drivers/grate/grate_draw.h b/src/gallium/drivers/grate/grate_draw.h
new file mode 100644
index 0000000..f83f4cb
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_draw.h
@@ -0,0 +1,7 @@
+#ifndef GRATE_DRAW_H
+#define GRATE_DRAW_H
+
+void
+grate_context_draw_init(struct pipe_context *pcontext);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_fp_ir.c b/src/gallium/drivers/grate/grate_fp_ir.c
new file mode 100644
index 0000000..4fd7861
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_fp_ir.c
@@ -0,0 +1,196 @@
+#include "grate_fp_ir.h"
+
+void
+grate_fp_pack_alu(uint32_t *dst, struct fp_alu_instr *instr)
+{
+   union {
+      struct __attribute__((packed)) {
+         unsigned rD_fixed10:1;
+         unsigned rD_absolute_value:1;
+         unsigned rD_enable:1;
+         unsigned rD_minus_one:1;
+         unsigned rD_sub_reg_select_high:1;
+         unsigned rD_reg_select:1;
+
+         unsigned rC_scale_by_two:1;
+         unsigned rC_negate:1;
+         unsigned rC_absolute_value:1;
+         unsigned rC_fixed10:1;
+         unsigned rC_minus_one:1;
+         unsigned rC_sub_reg_select_high:1;
+         unsigned rC_reg_select:7;
+
+         unsigned rB_scale_by_two:1;
+         unsigned rB_negate:1;
+         unsigned rB_absolute_value:1;
+         unsigned rB_fixed10:1;
+         unsigned rB_minus_one:1;
+         unsigned rB_sub_reg_select_high:1;
+         unsigned rB_reg_select:7;
+
+         unsigned rA_scale_by_two:1;
+         unsigned rA_negate:1;
+         unsigned rA_absolute_value:1;
+         unsigned rA_fixed10:1;
+         unsigned rA_minus_one:1;
+         unsigned rA_sub_reg_select_high:1;
+         unsigned rA_reg_select:7;
+
+         unsigned write_low_sub_reg:1;
+         unsigned write_high_sub_reg:1;
+         unsigned dst_reg:7;
+         unsigned condition_code:2;
+         unsigned saturate_result:1;
+         unsigned scale_result:2;
+
+         unsigned addition_disable:1;
+         unsigned accumulate_result_this:1;
+         unsigned accumulate_result_other:1;
+         unsigned opcode:2;
+      };
+
+      uint32_t words[2];
+   } tmp = {
+      .opcode = instr->op,
+      .dst_reg = instr->dst.index,
+      .saturate_result = instr->dst.saturate,
+
+      .write_low_sub_reg = instr->dst.write_low_sub_reg,
+      .write_high_sub_reg = instr->dst.write_high_sub_reg,
+
+      .rA_reg_select = instr->src[0].index,
+      .rA_fixed10 = instr->src[0].datatype != FP_DATATYPE_FP20,
+      .rA_sub_reg_select_high = instr->src[0].sub_reg_select_high,
+
+      .rB_reg_select = instr->src[1].index,
+      .rB_fixed10 = instr->src[1].datatype != FP_DATATYPE_FP20,
+      .rB_sub_reg_select_high = instr->src[1].sub_reg_select_high,
+
+      .rC_reg_select = instr->src[2].index,
+      .rC_fixed10 = instr->src[2].datatype != FP_DATATYPE_FP20,
+      .rC_sub_reg_select_high = instr->src[2].sub_reg_select_high,
+
+      .rD_reg_select = instr->src[3].index == instr->src[2].index,
+      .rD_fixed10 = instr->src[3].datatype != FP_DATATYPE_FP20,
+      .rD_sub_reg_select_high = instr->src[3].sub_reg_select_high,
+   };
+
+   /* copy packed instruction into destination */
+   for (int i = 0; i < 2; ++i)
+      dst[i] = tmp.words[1 - i];
+}
+
+uint32_t
+grate_fp_pack_dw(struct fp_dw_instr *instr)
+{
+   union {
+      struct __attribute__((packed)) {
+         unsigned enable:1;
+         unsigned unk_1:1;
+         unsigned render_target_index:4;
+         unsigned unk_6_9:4;
+         unsigned stencil_write:1;
+         unsigned unk_11_14:4;
+         unsigned src_regs_select:1;
+         unsigned unk_16_31:16;
+      };
+
+      uint32_t word;
+   } tmp = {
+      .enable = instr->enable,
+      .unk_16_31 = instr->enable ? 2 : 0, // no idea what this is
+      .render_target_index = instr->index,
+      .stencil_write = instr->stencil_write,
+      .src_regs_select = instr->src_regs,
+   };
+
+   return tmp.word;
+}
+
+void
+grate_fp_pack_mfu(uint32_t *dst, struct fp_mfu_instr *instr)
+{
+   union {
+      struct __attribute__((packed)) {
+         unsigned var0_saturate:1;
+         unsigned var0_opcode:2;
+         unsigned var0_source:4;
+
+         unsigned var1_saturate:1;
+         unsigned var1_opcode:2;
+         unsigned var1_source:4;
+
+         unsigned var2_saturate:1;
+         unsigned var2_opcode:2;
+         unsigned var2_source:4;
+
+         unsigned var3_saturate:1;
+         unsigned var3_opcode:2;
+         unsigned var3_source:4;
+
+         unsigned __pad:4;
+
+         unsigned mul0_src0:4;
+         unsigned mul0_src1:4;
+         unsigned mul0_dst:3;
+
+         unsigned mul1_src0:4;
+         unsigned mul1_src1:4;
+         unsigned mul1_dst:3;
+
+         unsigned opcode:4;
+         unsigned reg:6;
+      };
+
+      uint32_t words[2];
+   } tmp = {
+      .opcode = instr->sfu.op,
+      .reg = instr->sfu.reg,
+
+      .mul0_src0 = instr->mul[0].src[0],
+      .mul0_src1 = instr->mul[0].src[1],
+      .mul0_dst = instr->mul[0].dst,
+
+      .mul1_src0 = instr->mul[1].src[0],
+      .mul1_src1 = instr->mul[1].src[1],
+      .mul1_dst = instr->mul[1].dst,
+
+      .var0_saturate = instr->var[0].saturate,
+      .var0_opcode = instr->var[0].op,
+      .var0_source = instr->var[0].tram_row,
+
+      .var1_saturate = instr->var[1].saturate,
+      .var1_opcode = instr->var[1].op,
+      .var1_source = instr->var[1].tram_row,
+
+      .var2_saturate = instr->var[2].saturate,
+      .var2_opcode = instr->var[2].op,
+      .var2_source = instr->var[2].tram_row,
+
+      .var3_saturate = instr->var[3].saturate,
+      .var3_opcode = instr->var[3].op,
+      .var3_source = instr->var[3].tram_row,
+   };
+
+   /* copy packed instruction into destination */
+   for (int i = 0; i < 2; ++i)
+      dst[i] = tmp.words[1 - i];
+}
+
+uint32_t
+grate_fp_pack_sched(struct fp_sched *sched)
+{
+   assert(sched->num_instructions >= 0 && sched->num_instructions < 4);
+   assert(sched->address >= 0 && sched->address < 64);
+   union {
+      struct __attribute__((packed)) {
+         unsigned num_instructions : 2;
+         unsigned address : 6;
+      };
+      uint32_t word;
+   } tmp = {
+      .num_instructions = sched->num_instructions,
+      .address = sched->address
+   };
+   return tmp.word;
+}
diff --git a/src/gallium/drivers/grate/grate_fp_ir.h b/src/gallium/drivers/grate/grate_fp_ir.h
new file mode 100644
index 0000000..f8d1624
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_fp_ir.h
@@ -0,0 +1,168 @@
+#ifndef FP_IR_H
+#define FP_IR_H
+
+#include "util/list.h"
+
+#include "stdbool.h"
+#include "stdint.h"
+
+enum fp_alu_op {
+   FP_ALU_OP_MAD = 0,
+   FP_ALU_OP_MIN = 1,
+   FP_ALU_OP_MAX = 2,
+   FP_ALU_OP_CSEL = 3
+};
+
+enum fp_scale {
+   FP_SCALE_NONE = 0,
+   FP_SCALE_MUL2 = 1,
+   FP_SCALE_MUL4 = 2,
+   FP_SCALE_DIV2 = 3
+};
+
+enum fp_condition {
+   FP_CONDITION_ALWAYS = 0,
+   FP_CONDITION_EQUAL = 1,
+   FP_CONDITION_GEQUAL = 2,
+   FP_CONDITION_GREATER = 3
+};
+
+struct fp_alu_dst_operand {
+   bool write_low_sub_reg;
+   bool write_high_sub_reg;
+   unsigned index;
+   bool saturate;
+};
+
+enum fp_datatype {
+   FP_DATATYPE_FP20 = 0,
+   FP_DATATYPE_FIXED10 = 1
+};
+
+struct fp_alu_src_operand {
+   bool scale_by_two;
+   bool negate;
+   bool absolute_value;
+   enum fp_datatype datatype;
+   bool minus_one;
+   bool sub_reg_select_high;
+   unsigned index;
+};
+
+struct fp_alu_instr {
+   enum fp_condition condition;
+
+   enum fp_alu_op op;
+   enum fp_scale scale;
+
+   struct fp_alu_dst_operand dst;
+   struct fp_alu_src_operand src[4];
+};
+
+enum fp_dw_src_regs {
+   FP_DW_REGS_R0_R1 = 0,
+   FP_DW_REGS_R2_R3 = 1
+};
+
+struct fp_dw_instr {
+   bool enable;
+   int index;
+   bool stencil_write;
+   enum fp_dw_src_regs src_regs;
+};
+
+enum fp_sfu_op {
+   FP_SFU_OP_NOP = 0,
+   FP_SFU_OP_RCP = 1,
+   FP_SFU_OP_RSQ = 2,
+   FP_SFU_OP_LG2 = 3,
+   FP_SFU_OP_EX2 = 4,
+   FP_SFU_OP_SQRT = 5,
+   FP_SFU_OP_SIN = 6,
+   FP_SFU_OP_COS = 7,
+   FP_SFU_OP_FRC = 8,
+   FP_SFU_OP_PREEX2 = 9,
+   FP_SFU_OP_PRESIN = 10,
+   FP_SFU_OP_PRECOS = 11
+};
+
+enum fp_mfu_mul_dst {
+   FP_MFU_MUL_DST_BARYCENTRIC_WEIGHT = 1,
+   FP_MFU_MUL_DST_ROW_REG_0 = 4,
+   FP_MFU_MUL_DST_ROW_REG_1 = 5,
+   FP_MFU_MUL_DST_ROW_REG_2 = 6,
+   FP_MFU_MUL_DST_ROW_REG_3 = 7
+};
+
+enum fp_mfu_mul_src {
+   FP_MFU_MUL_SRC_ROW_REG_0 = 0,
+   FP_MFU_MUL_SRC_ROW_REG_1 = 1,
+   FP_MFU_MUL_SRC_ROW_REG_2 = 2,
+   FP_MFU_MUL_SRC_ROW_REG_3 = 3,
+   FP_MFU_MUL_SRC_SFU_RESULT = 10,
+   FP_MFU_MUL_SRC_BARYCENTRIC_COEF_0 = 11,
+   FP_MFU_MUL_SRC_BARYCENTRIC_COEF_1 = 12,
+   FP_MFU_MUL_SRC_CONST_1 = 13,
+};
+
+struct fp_mfu_mul {
+   enum fp_mfu_mul_dst dst;
+   enum fp_mfu_mul_src src[2];
+};
+
+enum fp_var_op {
+   FP_VAR_OP_NOP = 0,
+   FP_VAR_OP_FP20 = 1,
+   FP_VAR_OP_FX10 = 2,
+};
+
+struct fp_var_instr {
+   bool saturate;
+   enum fp_var_op op;
+   unsigned tram_row;
+};
+
+struct fp_sfu_instr {
+   enum fp_sfu_op op;
+   unsigned reg;
+};
+
+struct fp_mfu_instr {
+   struct list_head link;
+   struct fp_sfu_instr sfu;
+   struct fp_mfu_mul mul[2];
+   struct fp_var_instr var[4];
+};
+
+struct fp_alu_instr_packet {
+   struct list_head link;
+   struct fp_alu_instr slots[4];
+};
+
+struct fp_sched {
+   int num_instructions;
+   int address;
+};
+
+struct fp_instr {
+   struct list_head link;
+   // TODO: PSEQ
+   struct fp_sched mfu_sched;
+   // TODO: TEX
+   struct fp_sched alu_sched;
+   struct fp_dw_instr dw;
+};
+
+void
+grate_fp_pack_alu(uint32_t *dst, struct fp_alu_instr *instr);
+
+uint32_t
+grate_fp_pack_dw(struct fp_dw_instr *instr);
+
+void
+grate_fp_pack_mfu(uint32_t *dst, struct fp_mfu_instr *instr);
+
+uint32_t
+grate_fp_pack_sched(struct fp_sched *sched);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_fp_vliw.h b/src/gallium/drivers/grate/grate_fp_vliw.h
new file mode 100644
index 0000000..15f5506
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_fp_vliw.h
@@ -0,0 +1,174 @@
+#ifndef FP_VLIW_H
+#define FP_VLIW_H
+
+#include <stdint.h>
+
+union fragment_mfu_instruction {
+   struct __attribute__((packed)) {
+      unsigned var0_saturate:1;
+      unsigned var0_opcode:2;
+      unsigned var0_source:4;
+
+      unsigned var1_saturate:1;
+      unsigned var1_opcode:2;
+      unsigned var1_source:4;
+
+      unsigned var2_saturate:1;
+      unsigned var2_opcode:2;
+      unsigned var2_source:4;
+
+      unsigned var3_saturate:1;
+      unsigned var3_opcode:2;
+      unsigned var3_source:4;
+
+      unsigned __pad:4;
+
+      unsigned mul0_src0:4;
+      unsigned mul0_src1:4;
+      unsigned mul0_dst:3;
+
+      unsigned mul1_src0:4;
+      unsigned mul1_src1:4;
+      unsigned mul1_dst:3;
+
+      unsigned opcode:4;
+      unsigned reg:6;
+   };
+
+   struct __attribute__((packed)) {
+      uint32_t part0;
+      uint32_t part1;
+   };
+};
+
+union fragment_alu_instruction {
+   struct __attribute__((packed)) {
+      unsigned rD_fixed10:1;
+      unsigned rD_absolute_value:1;
+      unsigned rD_enable:1;
+      unsigned rD_minus_one:1;
+      unsigned rD_sub_reg_select_high:1;
+      unsigned rD_reg_select:1;
+
+      unsigned rC_scale_by_two:1;
+      unsigned rC_negate:1;
+      unsigned rC_absolute_value:1;
+      unsigned rC_fixed10:1;
+      unsigned rC_minus_one:1;
+      unsigned rC_sub_reg_select_high:1;
+      unsigned rC_reg_select:7;
+
+      unsigned rB_scale_by_two:1;
+      unsigned rB_negate:1;
+      unsigned rB_absolute_value:1;
+      unsigned rB_fixed10:1;
+      unsigned rB_minus_one:1;
+      unsigned rB_sub_reg_select_high:1;
+      unsigned rB_reg_select:7;
+
+      unsigned rA_scale_by_two:1;
+      unsigned rA_negate:1;
+      unsigned rA_absolute_value:1;
+      unsigned rA_fixed10:1;
+      unsigned rA_minus_one:1;
+      unsigned rA_sub_reg_select_high:1;
+      unsigned rA_reg_select:7;
+
+      unsigned write_low_sub_reg:1;
+      unsigned write_high_sub_reg:1;
+      unsigned dst_reg:7;
+      unsigned condition_code:2;
+      unsigned saturate_result:1;
+      unsigned scale_result:2;
+
+      unsigned addition_disable:1;
+      unsigned accumulate_result_this:1;
+      unsigned accumulate_result_other:1;
+      unsigned opcode:2;
+   };
+
+   struct __attribute__((packed)) {
+      uint32_t part0;
+      uint32_t part1;
+   };
+};
+
+union fragment_alu_instruction_packet {
+   struct __attribute__((packed)) {
+      union fragment_alu_instruction a[4];
+   };
+
+   union {
+      struct __attribute__((packed)) {
+         uint64_t __pad1;
+         uint64_t __pad2;
+         uint64_t __pad3;
+         unsigned __pad4:4;
+         unsigned fx10_low:10;
+         unsigned fx10_high:10;
+      };
+
+      struct __attribute__((packed)) {
+         uint64_t __pad5;
+         uint64_t __pad6;
+         uint64_t __pad7;
+         unsigned __pad8:4;
+         unsigned fp20:20;
+      };
+   } imm0;
+
+   union {
+      struct __attribute__((packed)) {
+         uint64_t __pad1;
+         uint64_t __pad2;
+         uint64_t __pad3;
+         unsigned __pad4:24;
+         unsigned fx10_low:10;
+         unsigned fx10_high:10;
+      };
+
+      struct __attribute__((packed)) {
+         uint64_t __pad5;
+         uint64_t __pad6;
+         uint64_t __pad7;
+         unsigned __pad8:24;
+         unsigned fp20:20;
+      };
+   } imm1;
+
+   union {
+      struct __attribute__((packed)) {
+         uint64_t __pad1;
+         uint64_t __pad2;
+         uint64_t __pad3;
+         uint32_t __pad4;
+         unsigned __pad5:12;
+         unsigned fx10_low:10;
+         unsigned fx10_high:10;
+      };
+
+      struct __attribute__((packed)) {
+         uint64_t __pad6;
+         uint64_t __pad7;
+         uint64_t __pad8;
+         uint32_t __pad9;
+         unsigned __pad10:12;
+         unsigned fp20:20;
+      };
+   } imm2;
+
+   struct __attribute__((packed)) {
+      uint32_t part0;
+      uint32_t part1;
+      uint32_t part2;
+      uint32_t part3;
+      uint32_t part4;
+      uint32_t part5;
+      uint32_t part6;
+      uint32_t part7;
+
+      uint32_t complement;
+   };
+};
+
+#endif // FP_VLIW_H
diff --git a/src/gallium/drivers/grate/grate_program.c b/src/gallium/drivers/grate/grate_program.c
new file mode 100644
index 0000000..e9c8124
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_program.c
@@ -0,0 +1,219 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "util/u_dynarray.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "host1x01_hardware.h"
+#include "grate_common.h"
+#include "grate_context.h"
+#include "grate_screen.h"
+#include "grate_program.h"
+#include "grate_compiler.h"
+#include "grate_fp_ir.h"
+#include "grate_vpe_ir.h"
+#include "tgr_3d.xml.h"
+
+static void *
+grate_create_vs_state(struct pipe_context *pcontext,
+                      const struct pipe_shader_state *template)
+{
+   struct grate_vertex_shader_state *so =
+      CALLOC_STRUCT(grate_vertex_shader_state);
+
+   if (!so)
+      return NULL;
+
+   so->base = *template;
+
+   if (grate_debug & GRATE_DEBUG_TGSI) {
+      fprintf(stderr, "DEBUG: TGSI:\n");
+      tgsi_dump(template->tokens, 0);
+      fprintf(stderr, "\n");
+   }
+
+   struct tgsi_parse_context parser;
+   unsigned ok = tgsi_parse_init(&parser, template->tokens);
+   assert(ok == TGSI_PARSE_OK);
+
+   struct grate_vpe_shader vpe;
+   grate_tgsi_to_vpe(&vpe, &parser);
+
+   int num_instructions = list_length(&vpe.instructions);
+   assert(num_instructions < 256);
+   int num_commands = 2 + num_instructions * 4;
+   uint32_t *commands = MALLOC(num_commands * sizeof(uint32_t));
+   if (!commands) {
+      FREE(so);
+      return NULL;
+   }
+
+   commands[0] = host1x_opcode_imm(TGR3D_VP_UPLOAD_INST_ID, 0);
+   commands[1] = host1x_opcode_nonincr(TGR3D_VP_UPLOAD_INST,
+                                       num_instructions * 4);
+
+   struct vpe_instr *last = list_last_entry(&vpe.instructions, struct vpe_instr, link);
+   int offset = 2;
+   list_for_each_entry(struct vpe_instr, instr, &vpe.instructions, link) {
+      bool end_of_program = instr == last;
+      grate_vpe_pack(commands + offset, instr, end_of_program);
+      offset += 4;
+   }
+
+   so->blob.commands = commands;
+   so->blob.num_commands = num_commands;
+   so->output_mask = vpe.output_mask;
+
+   return so;
+}
+
+static void
+grate_bind_vs_state(struct pipe_context *pcontext, void *so)
+{
+   grate_context(pcontext)->vshader = so;
+}
+
+static void
+grate_delete_vs_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+static void *
+grate_create_fs_state(struct pipe_context *pcontext,
+                      const struct pipe_shader_state *template)
+{
+   struct grate_fragment_shader_state *so =
+      CALLOC_STRUCT(grate_fragment_shader_state);
+
+   if (!so)
+      return NULL;
+
+   so->base = *template;
+
+   if (grate_debug & GRATE_DEBUG_TGSI) {
+      fprintf(stderr, "DEBUG: TGSI:\n");
+      tgsi_dump(template->tokens, 0);
+      fprintf(stderr, "\n");
+   }
+
+   struct tgsi_parse_context parser;
+   unsigned ok = tgsi_parse_init(&parser, template->tokens);
+   assert(ok == TGSI_PARSE_OK);
+
+   struct grate_fp_shader fp;
+   grate_tgsi_to_fp(&fp, &parser);
+
+   struct util_dynarray buf;
+   util_dynarray_init(&buf, NULL);
+
+#define PUSH(x) util_dynarray_append(&buf, uint32_t, (x))
+   PUSH(host1x_opcode_incr(TGR3D_ALU_BUFFER_SIZE, 1));
+   PUSH(0x58000000);
+
+   PUSH(host1x_opcode_imm(TGR3D_FP_PSEQ_QUAD_ID, 0));
+   PUSH(host1x_opcode_imm(TGR3D_FP_UPLOAD_INST_ID_COMMON, 0));
+   PUSH(host1x_opcode_imm(TGR3D_FP_UPLOAD_MFU_INST_ID, 0));
+   PUSH(host1x_opcode_imm(TGR3D_FP_UPLOAD_ALU_INST_ID, 0));
+
+   int num_fp_instrs = list_length(&fp.fp_instructions);
+   assert(num_fp_instrs < 64);
+
+   PUSH(host1x_opcode_incr(TGR3D_FP_PSEQ_ENGINE_INST, 1));
+   PUSH(0x20006000 | num_fp_instrs);
+
+   PUSH(host1x_opcode_incr(TGR3D_FP_PSEQ_DW_CFG, 1));
+   PUSH(0x00000040);
+
+   PUSH(host1x_opcode_imm(TGR3D_FP_PSEQ_UPLOAD_INST_BUFFER_FLUSH, 0));
+
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_PSEQ_UPLOAD_INST, num_fp_instrs));
+   list_for_each_entry(struct fp_instr, instr, &fp.fp_instructions, link)
+      PUSH(0x00000000);
+
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_MFU_SCHED, num_fp_instrs));
+   list_for_each_entry(struct fp_instr, instr, &fp.fp_instructions, link)
+      PUSH(grate_fp_pack_sched(&instr->mfu_sched));
+
+   int num_mfu_instrs = list_length(&fp.mfu_instructions);
+   assert(num_mfu_instrs < 64); // TODO: not sure if this is really correct
+
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_MFU_INST, num_mfu_instrs * 2));
+   list_for_each_entry(struct fp_mfu_instr, instr, &fp.mfu_instructions, link) {
+      uint32_t words[2];
+      grate_fp_pack_mfu(words, instr);
+      PUSH(words[0]);
+      PUSH(words[1]);
+   }
+
+   // TODO: emit actual instructions here
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_TEX_INST, num_fp_instrs));
+   for (int i = 0; i < num_fp_instrs; ++i)
+      PUSH(0x00000000);
+
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_ALU_SCHED, num_fp_instrs));
+   list_for_each_entry(struct fp_instr, instr, &fp.fp_instructions, link)
+      PUSH(grate_fp_pack_sched(&instr->alu_sched));
+
+   int num_alu_instrs = list_length(&fp.alu_instructions);
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_ALU_INST,
+        num_alu_instrs * 4 * 2));
+   list_for_each_entry(struct fp_alu_instr_packet, instr, &fp.alu_instructions, link) {
+      for (int i = 0; i < 4; ++i) {
+         uint32_t words[2];
+         grate_fp_pack_alu(words, instr->slots + i);
+         PUSH(words[0]);
+         PUSH(words[1]);
+      }
+   }
+
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_ALU_INST_COMPLEMENT, num_fp_instrs));
+   list_for_each_entry(struct fp_instr, instr, &fp.fp_instructions, link)
+      PUSH(0x00000000);
+
+   PUSH(host1x_opcode_nonincr(TGR3D_FP_UPLOAD_DW_INST, num_fp_instrs));
+   list_for_each_entry(struct fp_instr, instr, &fp.fp_instructions, link)
+      PUSH(grate_fp_pack_dw(&instr->dw));
+
+   uint32_t tram_setup = 0;
+   tram_setup |= TGR3D_VAL(TRAM_SETUP, USED_TRAM_ROWS_NB, fp.info.max_tram_row);
+   tram_setup |= TGR3D_VAL(TRAM_SETUP, DIV64, 64 / fp.info.max_tram_row);
+
+   PUSH(host1x_opcode_incr(TGR3D_TRAM_SETUP, 1));
+   PUSH(tram_setup);
+
+#undef PUSH
+   util_dynarray_trim(&buf);
+
+   so->blob.num_commands = buf.size / sizeof(uint32_t);
+   so->blob.commands = buf.data;
+   so->info = fp.info;
+   return so;
+}
+
+static void
+grate_bind_fs_state(struct pipe_context *pcontext, void *so)
+{
+   grate_context(pcontext)->fshader = so;
+}
+
+static void
+grate_delete_fs_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+void
+grate_context_program_init(struct pipe_context *pcontext)
+{
+   pcontext->create_vs_state = grate_create_vs_state;
+   pcontext->bind_vs_state = grate_bind_vs_state;
+   pcontext->delete_vs_state = grate_delete_vs_state;
+
+   pcontext->create_fs_state = grate_create_fs_state;
+   pcontext->bind_fs_state = grate_bind_fs_state;
+   pcontext->delete_fs_state = grate_delete_fs_state;
+}
diff --git a/src/gallium/drivers/grate/grate_program.h b/src/gallium/drivers/grate/grate_program.h
new file mode 100644
index 0000000..716443f
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_program.h
@@ -0,0 +1,29 @@
+#ifndef GRATE_PROGRAM_H
+#define GRATE_PROGRAM_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+#include "grate_compiler.h"
+
+struct grate_shader_blob {
+   uint32_t *commands;
+   int num_commands;
+};
+
+struct grate_vertex_shader_state {
+   struct pipe_shader_state base;
+   struct grate_shader_blob blob;
+   uint16_t output_mask;
+};
+
+struct grate_fragment_shader_state {
+   struct pipe_shader_state base;
+   struct grate_shader_blob blob;
+   struct grate_fp_info info;
+};
+
+void
+grate_context_program_init(struct pipe_context *pcontext);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_resource.c b/src/gallium/drivers/grate/grate_resource.c
new file mode 100644
index 0000000..4f9703b
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_resource.c
@@ -0,0 +1,511 @@
+#include <stdio.h>
+
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "util/u_transfer.h"
+#include "util/u_inlines.h"
+
+#include "grate_common.h"
+#include "grate_context.h"
+#include "grate_resource.h"
+#include "grate_screen.h"
+
+#include "host1x01_hardware.h"
+#include "tgr_3d.xml.h"
+
+#include <libdrm/tegra_drm.h>
+#include <libdrm/tegra.h>
+
+/*
+ * XXX Required to access winsys_handle internals. Should go away in favour
+ * of some abstraction to handle handles in a Tegra-specific winsys
+ * implementation.
+ */
+#include "state_tracker/drm_driver.h"
+
+
+static boolean
+grate_resource_get_handle(struct pipe_screen *pscreen,
+                          struct pipe_resource *presource,
+                          struct winsys_handle *handle)
+{
+   struct grate_resource *resource = grate_resource(presource);
+   int err;
+
+   if (handle->type == DRM_API_HANDLE_TYPE_SHARED) {
+      err = drm_tegra_bo_get_name(resource->bo, &handle->handle);
+      if (err < 0) {
+         fprintf(stderr, "drm_tegra_bo_get_name() failed: %d\n", err);
+         return FALSE;
+      }
+   } else if (handle->type == DRM_API_HANDLE_TYPE_KMS) {
+      err = drm_tegra_bo_get_handle(resource->bo, &handle->handle);
+      if (err < 0) {
+         fprintf(stderr, "drm_tegra_bo_get_handle() failed: %d\n", err);
+         return FALSE;
+      }
+   } else {
+      fprintf(stdout, "unsupported handle type: %d\n", handle->type);
+      return FALSE;
+   }
+
+   handle->stride = resource->pitch;
+   return TRUE;
+}
+
+static void
+grate_resource_destroy(struct pipe_screen *pscreen,
+                       struct pipe_resource *presource)
+{
+   struct grate_resource *resource = grate_resource(presource);
+
+   drm_tegra_bo_unref(resource->bo);
+   FREE(resource);
+}
+
+static void *
+grate_resource_transfer_map(struct pipe_context *pcontext,
+                            struct pipe_resource *presource,
+                            unsigned level, unsigned usage,
+                            const struct pipe_box *box,
+                            struct pipe_transfer **transfer)
+{
+   struct grate_context *context = grate_context(pcontext);
+   struct grate_resource *resource = grate_resource(presource);
+   void *ret = NULL;
+   struct pipe_transfer *ptrans;
+
+   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+      return NULL;
+
+   ptrans = slab_alloc(&context->transfer_pool);
+   if (!ptrans)
+      return NULL;
+
+   if (drm_tegra_bo_map(resource->bo, &ret))
+      return NULL;
+
+   memset(ptrans, 0, sizeof(*ptrans));
+
+   pipe_resource_reference(&ptrans->resource, presource);
+   ptrans->resource = presource;
+   ptrans->level = level;
+   ptrans->usage = usage;
+   ptrans->box = *box;
+   ptrans->stride = resource->pitch;
+   ptrans->layer_stride = ptrans->stride;
+   *transfer = ptrans;
+
+   return (uint8_t *)ret +
+          box->y * resource->pitch +
+          box->x * util_format_get_blocksize(presource->format);
+}
+
+static void
+grate_resource_transfer_flush_region(struct pipe_context *pcontext,
+                                     struct pipe_transfer *transfer,
+                                     const struct pipe_box *box)
+{
+   unimplemented();
+}
+
+static void
+grate_resource_transfer_unmap(struct pipe_context *pcontext,
+                              struct pipe_transfer *transfer)
+{
+   struct grate_context *context = grate_context(pcontext);
+
+   drm_tegra_bo_unmap(grate_resource(transfer->resource)->bo);
+
+   pipe_resource_reference(&transfer->resource, NULL);
+   slab_free(&context->transfer_pool, transfer);
+}
+
+static const struct u_resource_vtbl grate_resource_vtbl = {
+   .resource_get_handle = grate_resource_get_handle,
+   .resource_destroy = grate_resource_destroy,
+   .transfer_map = grate_resource_transfer_map,
+   .transfer_flush_region = grate_resource_transfer_flush_region,
+   .transfer_unmap = grate_resource_transfer_unmap,
+};
+
+int
+grate_pixel_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_A8_UNORM:
+      return TGR3D_PIXEL_FORMAT_A8;
+   case PIPE_FORMAT_L8_UNORM:
+      return TGR3D_PIXEL_FORMAT_L8;
+   case PIPE_FORMAT_L8A8_UNORM:
+      return TGR3D_PIXEL_FORMAT_LA88;
+   case PIPE_FORMAT_B5G6R5_UNORM:
+      return TGR3D_PIXEL_FORMAT_RGB565;
+   case PIPE_FORMAT_B5G5R5A1_UNORM:
+      return TGR3D_PIXEL_FORMAT_RGBA5551;
+   case PIPE_FORMAT_B4G4R4A4_UNORM:
+      return TGR3D_PIXEL_FORMAT_RGBA4444;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:
+   case PIPE_FORMAT_B8G8R8X8_UNORM:
+      return TGR3D_PIXEL_FORMAT_RGBA8888;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:
+      return TGR3D_PIXEL_FORMAT_RGBA_FP32;
+   case PIPE_FORMAT_S8_UINT:
+      return TGR3D_PIXEL_FORMAT_S8;
+   case PIPE_FORMAT_Z16_UNORM:
+      return TGR3D_PIXEL_FORMAT_D16_LINEAR;
+   default:
+      return -1;
+   }
+}
+
+static struct pipe_resource *
+grate_screen_resource_create(struct pipe_screen *pscreen,
+                             const struct pipe_resource *template)
+{
+   struct grate_screen *screen = grate_screen(pscreen);
+   struct grate_resource *resource;
+   uint32_t flags = 0, height, size;
+   int err;
+
+   resource = CALLOC_STRUCT(grate_resource);
+   if (!resource)
+      return NULL;
+
+   resource->base.b = *template;
+
+   pipe_reference_init(&resource->base.b.reference, 1);
+   resource->base.vtbl = &grate_resource_vtbl;
+   resource->base.b.screen = pscreen;
+
+   resource->pitch = template->width0 * util_format_get_blocksize(template->format);
+   height = template->height0;
+
+   resource->tiled = 0;
+   if (template->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW |
+                         PIPE_BIND_SCANOUT | PIPE_BIND_DEPTH_STENCIL)) {
+      if (template->bind & PIPE_BIND_DEPTH_STENCIL)
+         resource->pitch = align(resource->pitch, 256);
+      else
+         resource->pitch = align(resource->pitch, 32);
+
+      flags = DRM_TEGRA_GEM_CREATE_BOTTOM_UP;
+   }
+
+   if (template->target != PIPE_BUFFER) {
+      /* pick pixel-format */
+      int format = grate_pixel_format(template->format);
+      assert(format >= 0);
+      resource->format = format;
+   }
+
+   size = resource->pitch * height;
+
+   err = drm_tegra_bo_new(&resource->bo, screen->drm, flags, size);
+   if (err < 0) {
+      fprintf(stderr, "drm_tegra_bo_new() failed: %d\n", err);
+      return NULL;
+   }
+
+   return &resource->base.b;
+}
+
+static struct pipe_resource *
+grate_screen_resource_from_handle(struct pipe_screen *pscreen,
+                                  const struct pipe_resource *template,
+                                  struct winsys_handle *handle,
+                                  unsigned usage)
+{
+   struct grate_screen *screen = grate_screen(pscreen);
+   struct grate_resource *resource;
+   int err, format;
+
+   resource = CALLOC_STRUCT(grate_resource);
+   if (!resource)
+      return NULL;
+
+   resource->base.b = *template;
+
+   pipe_reference_init(&resource->base.b.reference, 1);
+   resource->base.vtbl = &grate_resource_vtbl;
+   resource->base.b.screen = pscreen;
+
+   err = drm_tegra_bo_from_name(&resource->bo, screen->drm,
+                                handle->handle, 0);
+   if (err < 0) {
+      fprintf(stderr, "drm_tegra_bo_from_name() failed: %d\n", err);
+      FREE(resource);
+      return NULL;
+   }
+
+   resource->pitch = handle->stride;
+
+   format = grate_pixel_format(template->format);
+   assert(format >= 0);
+   resource->format = format;
+
+   return &resource->base.b;
+}
+
+void
+grate_screen_resource_init(struct pipe_screen *pscreen)
+{
+   pscreen->resource_create = grate_screen_resource_create;
+   pscreen->resource_from_handle = grate_screen_resource_from_handle;
+   pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+   pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+static void
+grate_resource_copy_region(struct pipe_context *pcontext,
+                           struct pipe_resource *dst,
+                           unsigned int dst_level,
+                           unsigned int dstx, unsigned dsty,
+                           unsigned int dstz,
+                           struct pipe_resource *src,
+                           unsigned int src_level,
+                           const struct pipe_box *box)
+{
+   unimplemented();
+}
+
+static void
+grate_blit(struct pipe_context *pcontext, const struct pipe_blit_info *info)
+{
+   int err, value;
+   struct grate_context *context = grate_context(pcontext);
+   struct grate_channel *gr2d = context->gr2d;
+   struct grate_resource *dst, *src;
+
+   dst = grate_resource(info->dst.resource);
+   src = grate_resource(info->src.resource);
+
+   err = grate_stream_begin(&gr2d->stream);
+   if (err < 0) {
+      fprintf(stderr, "grate_stream_begin() failed: %d\n", err);
+      return;
+   }
+
+   grate_stream_push_setclass(&gr2d->stream, HOST1X_CLASS_GR2D);
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x009, 0x9));
+   grate_stream_push(&gr2d->stream, 0x0000003a);            /* 0x009 - trigger */
+   grate_stream_push(&gr2d->stream, 0x00000000);            /* 0x00c - cmdsel */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x01e, 0x7));
+   grate_stream_push(&gr2d->stream, 0x00000000);            /* 0x01e - controlsecond */
+   /*
+    * [20:20] source color depth (0: mono, 1: same)
+    * [17:16] destination color depth (0: 8 bpp, 1: 16 bpp, 2: 32 bpp)
+    */
+
+   value = 1 << 20;
+   switch (util_format_get_blocksize(dst->base.b.format)) {
+   case 1:
+      value |= 0 << 16;
+      break;
+   case 2:
+      value |= 1 << 16;
+      break;
+   case 4:
+      value |= 2 << 16;
+      break;
+   default:
+      assert(0);
+   }
+
+   grate_stream_push(&gr2d->stream, value);                 /* 0x01f - controlmain */
+   grate_stream_push(&gr2d->stream, 0x000000cc);            /* 0x020 - ropfade */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_nonincr(0x046, 1));
+
+   /*
+    * [20:20] destination write tile mode (0: linear, 1: tiled)
+    * [ 0: 0] tile mode Y/RGB (0: linear, 1: tiled)
+    */
+   value = (dst->tiled << 20) | src->tiled;
+   grate_stream_push(&gr2d->stream, value);                 /* 0x046 - tilemode */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x02b, 0xe149));
+   grate_stream_push_reloc(&gr2d->stream, dst->bo, 0);      /* 0x02b - dstba */
+
+   grate_stream_push(&gr2d->stream, dst->pitch);            /* 0x02e - dstst */
+
+   grate_stream_push_reloc(&gr2d->stream, src->bo, 0);      /* 0x031 - srcba */
+
+   grate_stream_push(&gr2d->stream, src->pitch);            /* 0x033 - srcst */
+
+   value = info->dst.box.height << 16 | info->dst.box.width;
+   grate_stream_push(&gr2d->stream, value);                 /* 0x038 - dstsize */
+
+   value = info->src.box.y << 16 | info->src.box.x;
+   grate_stream_push(&gr2d->stream, value);                 /* 0x039 - srcps */
+
+   value = info->dst.box.y << 16 | info->dst.box.x;
+   grate_stream_push(&gr2d->stream, value);                 /* 0x03a - dstps */
+
+   grate_stream_end(&gr2d->stream);
+
+   grate_stream_flush(&gr2d->stream);
+}
+
+static uint32_t
+pack_color(enum pipe_format format, const float *rgba)
+{
+   union util_color uc;
+   util_pack_color(rgba, format, &uc);
+   return uc.ui[0];
+}
+
+static int
+fill(struct grate_channel *gr2d,
+           struct grate_resource *dst,
+           uint32_t fill_value, int blocksize,
+           unsigned dstx, unsigned dsty,
+           unsigned width, unsigned height)
+{
+   uint32_t value;
+   int err;
+
+   err = grate_stream_begin(&gr2d->stream);
+   if (err < 0) {
+      fprintf(stderr, "grate_stream_begin() failed: %d\n", err);
+      return -1;
+   }
+
+   grate_stream_push_setclass(&gr2d->stream, HOST1X_CLASS_GR2D);
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x09, 0x09));
+   grate_stream_push(&gr2d->stream, 0x0000003a);           /* 0x009 - trigger */
+   grate_stream_push(&gr2d->stream, 0x00000000);           /* 0x00C - cmdsel */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x1e, 0x07));
+   grate_stream_push(&gr2d->stream, 0x00000000);           /* 0x01e - controlsecond */
+
+   value  = 1 << 6; /* fill mode */
+   value |= 1 << 2; /* turbofill */
+   switch (blocksize) {
+   case 1:
+      value |= 0 << 16;
+      break;
+   case 2:
+      value |= 1 << 16;
+      break;
+   case 4:
+      value |= 2 << 16;
+      break;
+   default:
+      unreachable("invalid blocksize");
+   }
+   grate_stream_push(&gr2d->stream, value);           /* 0x01f - controlmain */
+
+   grate_stream_push(&gr2d->stream, 0x000000cc);      /* 0x020 - ropfade */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x2b, 0x09));
+   grate_stream_push_reloc(&gr2d->stream, dst->bo, 0);/* 0x02b - dstba */
+   grate_stream_push(&gr2d->stream, dst->pitch);      /* 0x02e - dstst */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_nonincr(0x35, 1));
+
+   grate_stream_push(&gr2d->stream, fill_value);           /* 0x035 - srcfgc */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_nonincr(0x46, 1));
+   grate_stream_push(&gr2d->stream, dst->tiled << 20);     /* 0x046 - tilemode */
+
+   grate_stream_push(&gr2d->stream, host1x_opcode_mask(0x38, 0x05));
+   grate_stream_push(&gr2d->stream, height << 16 | width); /* 0x038 - dstsize */
+   grate_stream_push(&gr2d->stream, dsty << 16 | dstx);    /* 0x03a - dstps */
+   grate_stream_end(&gr2d->stream);
+
+   grate_stream_flush(&gr2d->stream);
+
+   return 0;
+}
+
+static void
+grate_clear(struct pipe_context *pcontext, unsigned int buffers,
+            const union pipe_color_union *color, double depth,
+            unsigned int stencil)
+{
+   struct grate_context *context = grate_context(pcontext);
+   struct pipe_framebuffer_state *fb;
+
+   fb = &context->framebuffer.base;
+
+   if (buffers & PIPE_CLEAR_COLOR) {
+      int i;
+      for (i = 0; i < fb->nr_cbufs; ++i) {
+         struct pipe_surface *dst = fb->cbufs[i];
+         if (fill(context->gr2d, grate_resource(dst->texture),
+                  pack_color(dst->format, color->f),
+                  util_format_get_blocksize(dst->format),
+                  0, 0, dst->width, dst->height) < 0)
+            return;
+      }
+   }
+
+   if (buffers & PIPE_CLEAR_DEPTH || buffers & PIPE_CLEAR_STENCIL) {
+      /* TODO: handle the case where both are not set! */
+      if (fill(context->gr2d, grate_resource(fb->zsbuf->texture),
+               util_pack_z_stencil(fb->zsbuf->format, depth, stencil),
+               util_format_get_blocksize(fb->zsbuf->format),
+               0, 0, fb->zsbuf->width, fb->zsbuf->height) < 0)
+         return;
+   }
+}
+
+static void
+grate_clear_render_target(struct pipe_context *pipe,
+                          struct pipe_surface *dst,
+                          const union pipe_color_union *color,
+                          unsigned dstx, unsigned dsty,
+                          unsigned width, unsigned height,
+                          bool render_condition_enabled)
+{
+   assert(!render_condition_enabled);
+   fill(grate_context(pipe)->gr2d, grate_resource(dst->texture),
+        pack_color(dst->format, color->f), util_format_get_blocksize(dst->format),
+        dstx, dsty, width, height);
+}
+
+static void
+grate_clear_depth_stencil(struct pipe_context *pipe,
+                          struct pipe_surface *dst,
+                          unsigned clear_flags,
+                          double depth,
+                          unsigned stencil,
+                          unsigned dstx, unsigned dsty,
+                          unsigned width, unsigned height,
+                          bool render_condition_enabled)
+{
+   assert(!render_condition_enabled);
+   fill(grate_context(pipe)->gr2d, grate_resource(dst->texture),
+        util_pack_z_stencil(dst->format, depth, stencil),
+        util_format_get_blocksize(dst->format),
+        dstx, dsty, width, height);
+}
+
+static void
+grate_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+{
+   unimplemented();
+}
+
+void
+grate_context_resource_init(struct pipe_context *pcontext)
+{
+   pcontext->transfer_map = u_transfer_map_vtbl;
+   pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+   pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+   pcontext->buffer_subdata = u_default_buffer_subdata;
+   pcontext->texture_subdata = u_default_texture_subdata;
+
+   pcontext->resource_copy_region = grate_resource_copy_region;
+   pcontext->blit = grate_blit;
+   pcontext->clear = grate_clear;
+   pcontext->flush_resource = grate_flush_resource;
+   pcontext->clear_render_target = grate_clear_render_target;
+   pcontext->clear_depth_stencil = grate_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/grate/grate_resource.h b/src/gallium/drivers/grate/grate_resource.h
new file mode 100644
index 0000000..50a73c3
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_resource.h
@@ -0,0 +1,30 @@
+#ifndef GRATE_RESOURCE_H
+#define GRATE_RESOURCE_H
+
+#include "pipe/p_screen.h"
+#include "util/u_transfer.h"
+
+struct grate_resource {
+   struct u_resource base;
+   struct drm_tegra_bo *bo;
+   unsigned int pitch;
+   unsigned int tiled : 1;
+   unsigned int format : 5;
+};
+
+static inline struct grate_resource *
+grate_resource(struct pipe_resource *resource)
+{
+   return (struct grate_resource *)resource;
+}
+
+int
+grate_pixel_format(enum pipe_format format);
+
+void
+grate_context_resource_init(struct pipe_context *pcontext);
+
+void
+grate_screen_resource_init(struct pipe_screen *pscreen);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_screen.c b/src/gallium/drivers/grate/grate_screen.c
new file mode 100755
index 0000000..a4fda79
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_screen.c
@@ -0,0 +1,609 @@
+#include <stdio.h>
+
+#include "util/u_memory.h"
+
+#include "grate_common.h"
+#include "grate_context.h"
+#include "grate_resource.h"
+#include "grate_screen.h"
+
+static const struct debug_named_value debug_options[] = {
+   { "unimplemented", GRATE_DEBUG_UNIMPLEMENTED,
+     "Print unimplemented functions" },
+   { "tgsi", GRATE_DEBUG_TGSI,
+     "Dump TGSI during program compile" },
+   { NULL }
+};
+
+DEBUG_GET_ONCE_FLAGS_OPTION(grate_debug, "GRATE_DEBUG", debug_options, 0)
+uint32_t grate_debug;
+
+static void
+grate_screen_destroy(struct pipe_screen *pscreen)
+{
+   struct grate_screen *screen = grate_screen(pscreen);
+
+   slab_destroy_parent(&screen->transfer_pool);
+
+   drm_tegra_close(screen->drm);
+   FREE(screen);
+}
+
+static const char *
+grate_screen_get_name(struct pipe_screen *pscreen)
+{
+   return "Tegra";
+}
+
+static const char *
+grate_screen_get_vendor(struct pipe_screen *pscreen)
+{
+   return "Grate";
+}
+
+static const char *
+grate_screen_get_device_vendor(struct pipe_screen *pscreen)
+{
+   return "NVIDIA";
+}
+
+static int
+grate_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+   switch (param) {
+   case PIPE_CAP_NPOT_TEXTURES:
+      return 1; /* not really, but mesa requires it for now! */
+
+   case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+      return 0; /* ??? */
+
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 0;
+
+   case PIPE_CAP_POINT_SPRITE:
+      return 1;
+
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      return 8; /* ??? */
+
+   case PIPE_CAP_OCCLUSION_QUERY:
+      return 0; /* ??? */
+
+   case PIPE_CAP_QUERY_TIME_ELAPSED:
+      return 0; /* ??? - can we use syncpts for this? */
+
+   case PIPE_CAP_TEXTURE_SWIZZLE:
+      return 0;
+
+   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+      return 16;
+
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return 0;
+
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return 16; /* ??? */
+
+   case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+      return 0;
+
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+      return 1;
+
+   case PIPE_CAP_SM3:
+      return 1; /* well, not quite. but perhaps close enough? */
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+      return 0;
+
+   case PIPE_CAP_PRIMITIVE_RESTART:
+      return 0; /* probably possible to do by splitting draws, but not sure */
+
+   case PIPE_CAP_INDEP_BLEND_ENABLE:
+      return 0; /* ??? */
+
+   case PIPE_CAP_INDEP_BLEND_FUNC:
+      return 0; /* ??? */
+
+   case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+      return 0;
+
+   case PIPE_CAP_DEPTH_CLIP_DISABLE:
+      return 0; /* ??? */
+
+   case PIPE_CAP_SHADER_STENCIL_EXPORT:
+      return 0; /* ??? */
+
+   case PIPE_CAP_TGSI_INSTANCEID:
+   case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+      return 0;
+
+   case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+      return 0; /* probably not */
+
+   case PIPE_CAP_SEAMLESS_CUBE_MAP:
+   case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+      return 0; /* probably not */
+
+   case PIPE_CAP_MIN_TEXEL_OFFSET:
+   case PIPE_CAP_MAX_TEXEL_OFFSET:
+      return 0;
+
+   case PIPE_CAP_CONDITIONAL_RENDER:
+      return 0; /* probably not */
+
+   case PIPE_CAP_TEXTURE_BARRIER:
+      return 0; /* no clue */
+
+   case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+   case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+   case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+      return 0;
+
+   case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+      return 0; /* probably */
+
+   case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+      return 1; /* probably irrelevant for GLES2 */
+
+   case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+      return 0; /* probably irrelevant for GLES2 */
+
+   case PIPE_CAP_GLSL_FEATURE_LEVEL:
+      return 120; /* no clue */
+
+   case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+      return 0; /* no idea, need to test */
+
+   case PIPE_CAP_USER_VERTEX_BUFFERS:
+      return 0; /* probably possible, but nasty for kernel */
+
+   case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+   case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+      return 0;
+
+   case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+      return 4; /* DWORD aligned, can do pure data GATHER */
+
+   case PIPE_CAP_START_INSTANCE:
+      return 0;
+
+   case PIPE_CAP_QUERY_TIMESTAMP:
+      return 0; /* dunno */
+
+   case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
+      return 0;
+
+   case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+      return 0;
+
+   case PIPE_CAP_CUBE_MAP_ARRAY:
+      return 0;
+
+   case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+   case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+      return 0;
+
+   case PIPE_CAP_TGSI_TEXCOORD:
+      return 0;
+
+   case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+      return 1;
+
+   case PIPE_CAP_COMPUTE:
+   case PIPE_CAP_TEXTURE_MULTISAMPLE:
+   case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+   case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+      return 0;
+
+   case PIPE_CAP_MAX_VIEWPORTS:
+      return 1;
+
+   case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:
+      return 1;
+
+   case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:
+   case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
+   case PIPE_CAP_MAX_VERTEX_STREAMS:
+      return 0;
+
+   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
+   case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
+   case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
+   case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT: /* dunno */
+   case PIPE_CAP_TEXTURE_QUERY_LOD:
+   case PIPE_CAP_SAMPLE_SHADING:
+      return 0;
+
+   case PIPE_CAP_DRAW_INDIRECT:
+   case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
+      return 0;
+
+   case PIPE_CAP_VENDOR_ID:
+      return 0x10de;
+
+   case PIPE_CAP_DEVICE_ID:
+      return 0xFFFFFFFF;
+
+   case PIPE_CAP_ACCELERATED:
+      return 1;
+
+   case PIPE_CAP_VIDEO_MEMORY:
+      return 0;
+
+   case PIPE_CAP_UMA:
+      return 1;
+
+   case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
+      return 0; /* no support, shouldn't matter */
+
+   case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
+      return (1 << 24) - 1;
+
+   case PIPE_CAP_SAMPLER_VIEW_TARGET:
+   case PIPE_CAP_CLIP_HALFZ:
+   case PIPE_CAP_POLYGON_OFFSET_CLAMP:
+   case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
+   case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
+      return 0;
+
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+      return 0;
+
+   case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
+   case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
+   case PIPE_CAP_DEPTH_BOUNDS_TEST:
+   case PIPE_CAP_TGSI_TXQS:
+      return 0;
+
+   case PIPE_CAP_SHAREABLE_SHADERS:
+      return 1;
+
+   case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:
+   case PIPE_CAP_CLEAR_TEXTURE: /* might be possible */
+   case PIPE_CAP_DRAW_PARAMETERS:
+   case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT:
+   case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:
+      return 0;
+
+   case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:
+   case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
+      return 0; /* not really sure about these */
+
+   case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
+   case PIPE_CAP_INVALIDATE_BUFFER: /* not sure */
+   case PIPE_CAP_STRING_MARKER:
+   case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS: /* can probably be supported */
+   case PIPE_CAP_QUERY_BUFFER_OBJECT:
+   case PIPE_CAP_QUERY_MEMORY_INFO:
+   case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: /* not sure */
+   case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR: /* probably not */
+   case PIPE_CAP_CULL_DISTANCE: /* don't know */
+   case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
+   case PIPE_CAP_TGSI_VOTE:
+   case PIPE_CAP_MAX_WINDOW_RECTANGLES:
+      return 0;
+
+   case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
+      return 4; /* minimum for GLES 2.0, might be more */
+
+   case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:
+      return 1; /* probably true ? */
+
+   case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+      return 1; /* can't see why not... */
+
+   case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:
+   case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:
+   case PIPE_CAP_TGSI_FS_FBFETCH: /* supported, but let's enable later */
+   case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
+   case PIPE_CAP_DOUBLES:
+   case PIPE_CAP_INT64:
+   case PIPE_CAP_INT64_DIVMOD:
+   case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
+   case PIPE_CAP_TGSI_TEX_TXF_LZ:
+   case PIPE_CAP_TGSI_CLOCK:
+   case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
+   case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:
+   case PIPE_CAP_TGSI_BALLOT:
+   case PIPE_CAP_NATIVE_FENCE_FD:
+   case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
+   case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
+   case PIPE_CAP_POST_DEPTH_COVERAGE:
+   case PIPE_CAP_BINDLESS_TEXTURE:
+   case PIPE_CAP_QUERY_SO_OVERFLOW:
+   case PIPE_CAP_MEMOBJ:
+   case PIPE_CAP_LOAD_CONSTBUF:
+   case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
+   case PIPE_CAP_TILE_RASTER_ORDER:
+   case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
+   case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+      return 0;
+
+   default:
+      fprintf(stdout, "%s: unsupported parameter: %d\n", __func__, param);
+      return 0;
+   }
+}
+
+static float
+grate_screen_get_paramf(struct pipe_screen *pscreen,
+                        enum pipe_capf param)
+{
+   switch (param) {
+   case PIPE_CAPF_MAX_LINE_WIDTH:
+   case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+   case PIPE_CAPF_MAX_POINT_WIDTH:
+   case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+      return 8192.0f; /* no clue */
+
+   case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+      return 0.0f;
+
+   case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+      return 16.0f;
+
+   default:
+      fprintf(stdout, "%s: unsupported parameter: %d\n", __func__, param);
+      return 0.0f;
+   }
+}
+
+static int
+grate_screen_get_shader_param(struct pipe_screen *pscreen,
+                              unsigned int shader,
+                              enum pipe_shader_cap param)
+{
+   switch (shader) {
+   case PIPE_SHADER_VERTEX:
+      switch (param) {
+
+      case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+      case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+         return 1024;
+
+      /* no vertex-texturing */
+      case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+      case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+      case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+      case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+         return 0;
+
+      case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+      case PIPE_SHADER_CAP_SUBROUTINES:
+         return 0;
+
+      case PIPE_SHADER_CAP_MAX_INPUTS:
+      case PIPE_SHADER_CAP_MAX_OUTPUTS:
+         return 16;
+
+      case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+         return 1024;
+
+      case PIPE_SHADER_CAP_MAX_TEMPS:
+         return 64 * 4; /* 64 vec4s */
+
+      /* cannot index attributes, varyings nor GPRs */
+      case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+      case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+      case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+         return 0;
+
+      case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+         return 1; /* can index constant registers */
+
+      case PIPE_SHADER_CAP_INTEGERS:
+      case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+         return 0;
+
+      case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+         return 1;
+
+      case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+      case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+         return 0;
+
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+         return 0;
+
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return INT_MAX;
+
+      case PIPE_SHADER_CAP_SUPPORTED_IRS:
+         return PIPE_SHADER_IR_TGSI;
+
+      case PIPE_SHADER_CAP_PREFERRED_IR:
+         return PIPE_SHADER_IR_TGSI;
+
+      case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+         return INT_MAX;
+
+      case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+          return 0;
+
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+          return 0;
+
+      case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+      case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+          return 0;
+
+      default:
+         fprintf(stdout, "%s: unsupported vertex-shader parameter: %d\n", __func__, param);
+         return 0;
+      }
+
+   case PIPE_SHADER_FRAGMENT:
+
+      switch (param) {
+      case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+         return 4 * 128;
+
+      case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+         return 4 * 128;
+
+      case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+         return 128;
+
+      case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+         return 128;
+
+      /* no control flow */
+      case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+      case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+      case PIPE_SHADER_CAP_SUBROUTINES:
+         return 0;
+
+      case PIPE_SHADER_CAP_MAX_INPUTS:
+      case PIPE_SHADER_CAP_MAX_OUTPUTS:
+         return 16;
+
+      case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
+      case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+         return 32;
+
+      case PIPE_SHADER_CAP_MAX_TEMPS:
+         return 16; /* scalars */
+
+      /* no indirection */
+      case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+      case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+      case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+      case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+         return 0;
+
+      case PIPE_SHADER_CAP_INTEGERS:
+      case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
+         return 0;
+
+      case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+      case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
+         return 16;
+
+      case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+         return 1;
+
+      case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
+      case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
+         return 0;
+
+      case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
+         return 0; /* might really be true, but need more testing to be sure */
+
+      case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
+         return 0;
+
+      case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
+         return INT_MAX;
+
+      case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
+         return 0;
+
+      case PIPE_SHADER_CAP_SUPPORTED_IRS:
+         return PIPE_SHADER_IR_TGSI;
+
+      case PIPE_SHADER_CAP_PREFERRED_IR:
+         return PIPE_SHADER_IR_TGSI;
+
+      case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:
+         return INT_MAX;
+
+      case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:
+          return 0;
+
+      case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:
+          return 0;
+
+      case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
+      case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+          return 0;
+
+      default:
+         fprintf(stdout, "%s: unsupported fragment-shader parameter: %d\n", __func__, param);
+         return 0;
+      }
+      break;
+
+   case PIPE_SHADER_GEOMETRY:
+   case PIPE_SHADER_TESS_CTRL:
+   case PIPE_SHADER_TESS_EVAL:
+   case PIPE_SHADER_COMPUTE:
+      return 0;
+
+   default:
+      fprintf(stdout, "%s: unknown shader type: %u\n", __func__, shader);
+      return 0;
+   }
+}
+
+static boolean
+grate_screen_is_format_supported(struct pipe_screen *pscreen,
+                                 enum pipe_format format,
+                                 enum pipe_texture_target target,
+                                 unsigned int sample_count,
+                                 unsigned int usage)
+{
+   if (usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL)) {
+      if (grate_pixel_format(format) < 0)
+         return false;
+   }
+
+   return true;
+}
+
+static void
+grate_screen_fence_reference(struct pipe_screen *pscreen,
+                             struct pipe_fence_handle **ptr,
+                             struct pipe_fence_handle *fence)
+{
+   unimplemented();
+}
+
+static boolean
+grate_screen_fence_finish(struct pipe_screen *screen,
+                          struct pipe_context *ctx,
+                          struct pipe_fence_handle *fence,
+                          uint64_t timeout)
+{
+   unimplemented();
+   return FALSE;
+}
+
+struct pipe_screen *
+grate_screen_create(struct drm_tegra *drm)
+{
+   struct grate_screen *screen = CALLOC_STRUCT(grate_screen);
+   if (!screen)
+      return NULL;
+
+   screen->drm = drm;
+
+   grate_debug = debug_get_option_grate_debug();
+
+   screen->base.destroy = grate_screen_destroy;
+   screen->base.get_name = grate_screen_get_name;
+   screen->base.get_vendor = grate_screen_get_vendor;
+   screen->base.get_device_vendor = grate_screen_get_device_vendor;
+   screen->base.get_param = grate_screen_get_param;
+   screen->base.get_paramf = grate_screen_get_paramf;
+   screen->base.get_shader_param = grate_screen_get_shader_param;
+   screen->base.context_create = grate_screen_context_create;
+   screen->base.is_format_supported = grate_screen_is_format_supported;
+
+   /* fence functions */
+   screen->base.fence_reference = grate_screen_fence_reference;
+   screen->base.fence_finish = grate_screen_fence_finish;
+
+   grate_screen_resource_init(&screen->base);
+
+   slab_create_parent(&screen->transfer_pool, sizeof(struct pipe_transfer), 16);
+
+   return &screen->base;
+}
diff --git a/src/gallium/drivers/grate/grate_screen.h b/src/gallium/drivers/grate/grate_screen.h
new file mode 100644
index 0000000..d3b56d1
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_screen.h
@@ -0,0 +1,31 @@
+#ifndef GRATE_SCREEN_H
+#define GRATE_SCREEN_H
+
+#include "pipe/p_screen.h"
+#include "util/slab.h"
+
+#include <libdrm/tegra.h>
+
+extern uint32_t grate_debug;
+
+#define GRATE_DEBUG_UNIMPLEMENTED 0x1
+#define GRATE_DEBUG_TGSI 0x2
+
+struct grate_screen {
+   struct pipe_screen base;
+
+   struct slab_parent_pool transfer_pool;
+
+   struct drm_tegra *drm;
+};
+
+static inline struct grate_screen *
+grate_screen(struct pipe_screen *screen)
+{
+   return (struct grate_screen *)screen;
+}
+
+struct pipe_screen *
+grate_screen_create(struct drm_tegra *drm);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_state.c b/src/gallium/drivers/grate/grate_state.c
new file mode 100644
index 0000000..0660ab9
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_state.c
@@ -0,0 +1,634 @@
+#include <stdio.h>
+#include <math.h>
+
+#include "util/u_bitcast.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_format.h"
+
+#include "grate_common.h"
+#include "grate_context.h"
+#include "grate_program.h"
+#include "grate_resource.h"
+#include "grate_state.h"
+
+#include "tgr_3d.xml.h"
+#include "host1x01_hardware.h"
+
+static void
+grate_set_sample_mask(struct pipe_context *pcontext,
+                      unsigned int sample_mask)
+{
+   unimplemented();
+}
+
+static void
+grate_set_constant_buffer(struct pipe_context *pcontext,
+                          unsigned int shader, unsigned int index,
+                          const struct pipe_constant_buffer *buffer)
+{
+   struct grate_context *context = grate_context(pcontext);
+
+   assert(index == 0);
+   assert(!buffer || buffer->user_buffer);
+
+   util_copy_constant_buffer(&context->constant_buffer[shader], buffer);
+}
+
+static void
+grate_set_framebuffer_state(struct pipe_context *pcontext,
+                            const struct pipe_framebuffer_state *framebuffer)
+{
+   struct grate_context *context = grate_context(pcontext);
+   struct pipe_framebuffer_state *cso = &context->framebuffer.base;
+   unsigned int i;
+   uint32_t mask = 0;
+
+   if (framebuffer->zsbuf) {
+      struct grate_resource *res = grate_resource(framebuffer->zsbuf->texture);
+      uint32_t rt_params;
+
+      rt_params  = TGR3D_VAL(RT_PARAMS, FORMAT, res->format);
+      rt_params |= TGR3D_VAL(RT_PARAMS, PITCH, res->pitch);
+      rt_params |= TGR3D_BOOL(RT_PARAMS, TILED, res->tiled);
+
+      context->framebuffer.rt_params[0] = rt_params;
+      context->framebuffer.bos[0] = res->bo;
+      mask |= 1;
+   } else {
+      context->framebuffer.rt_params[0] = 0;
+      context->framebuffer.bos[0] = NULL;
+   }
+
+   pipe_surface_reference(&context->framebuffer.base.zsbuf,
+                          framebuffer->zsbuf);
+
+   for (i = 0; i < framebuffer->nr_cbufs; i++) {
+      struct pipe_surface *ref = framebuffer->cbufs[i];
+      struct grate_resource *res = grate_resource(ref->texture);
+      uint32_t rt_params;
+
+      rt_params  = TGR3D_VAL(RT_PARAMS, FORMAT, res->format);
+      rt_params |= TGR3D_VAL(RT_PARAMS, PITCH, res->pitch);
+      rt_params |= TGR3D_BOOL(RT_PARAMS, TILED, res->tiled);
+
+      context->framebuffer.rt_params[1 + i] = rt_params;
+      context->framebuffer.bos[1 + i] = res->bo;
+      mask |= 1 << (1 + i);
+
+      pipe_surface_reference(&cso->cbufs[i], ref);
+   }
+
+   for (; i < cso->nr_cbufs; i++)
+      pipe_surface_reference(&cso->cbufs[i], NULL);
+
+   context->framebuffer.num_rts = 1 + i;
+   context->framebuffer.mask = mask;
+
+   context->framebuffer.base.width = framebuffer->width;
+   context->framebuffer.base.height = framebuffer->height;
+   context->framebuffer.base.nr_cbufs = framebuffer->nr_cbufs;
+
+   /* prepare the scissor-registers for the non-scissor case */
+   context->no_scissor[0]  = host1x_opcode_incr(TGR3D_SCISSOR_HORIZ, 2);
+   context->no_scissor[1]  = TGR3D_VAL(SCISSOR_HORIZ, MIN, 0);
+   context->no_scissor[1] |= TGR3D_VAL(SCISSOR_HORIZ, MAX, framebuffer->width);
+   context->no_scissor[2]  = TGR3D_VAL(SCISSOR_VERT, MIN, 0);
+   context->no_scissor[2] |= TGR3D_VAL(SCISSOR_VERT, MAX, framebuffer->height);
+}
+
+static void
+grate_set_polygon_stipple(struct pipe_context *pcontext,
+                          const struct pipe_poly_stipple *stipple)
+{
+   unimplemented();
+}
+
+static void
+grate_set_scissor_states(struct pipe_context *pcontext,
+                         unsigned start_slot,
+                         unsigned num_scissors,
+                         const struct pipe_scissor_state * scissors)
+{
+   assert(num_scissors == 1);
+   unimplemented();
+}
+
+static void
+grate_set_viewport_states(struct pipe_context *pcontext,
+                          unsigned start_slot,
+                          unsigned num_viewports,
+                          const struct pipe_viewport_state *viewports)
+{
+   struct grate_context *context = grate_context(pcontext);
+   static const float zeps = powf(2.0f, -21);
+
+   assert(num_viewports == 1);
+   assert(start_slot == 0);
+
+   context->viewport[0] = host1x_opcode_incr(TGR3D_VIEWPORT_X_BIAS, 6);
+   context->viewport[1] = u_bitcast_f2u(viewports[0].translate[0] * 16.0f);
+   context->viewport[2] = u_bitcast_f2u(viewports[0].translate[1] * 16.0f);
+   context->viewport[3] = u_bitcast_f2u(viewports[0].translate[2] - zeps);
+   context->viewport[4] = u_bitcast_f2u(viewports[0].scale[0] * 16.0f);
+   context->viewport[5] = u_bitcast_f2u(viewports[0].scale[1] * 16.0f);
+   context->viewport[6] = u_bitcast_f2u(viewports[0].scale[2] - zeps);
+
+   uint32_t depth_near = (viewports[0].translate[2] - viewports[0].scale[2]) * ((1 << 20) - 1);
+   uint32_t depth_far = (viewports[0].translate[2] + viewports[0].scale[2]) * ((1 << 20) - 1);
+   context->viewport[7] = host1x_opcode_incr(TGR3D_DEPTH_RANGE_NEAR, 2);
+   context->viewport[8] = depth_near;
+   context->viewport[9] = depth_far;
+
+   assert(viewports[0].scale[0] >= 0.0f);
+   float max_x = fabs(viewports[0].translate[0]);
+   float max_y = fabs(viewports[0].translate[1]);
+   float scale_x = viewports[0].scale[0];
+   float scale_y = fabs(viewports[0].scale[1]);
+   context->guardband[0] = host1x_opcode_incr(TGR3D_GUARDBAND_WIDTH, 3);
+   context->guardband[1] = u_bitcast_f2u((3967 - max_x) / scale_x);
+   context->guardband[2] = u_bitcast_f2u((3967 - max_y) / scale_y);
+   context->guardband[3] = u_bitcast_f2u(6.99);
+}
+
+static void
+grate_set_vertex_buffers(struct pipe_context *pcontext,
+                         unsigned int start, unsigned int count,
+                         const struct pipe_vertex_buffer *buffer)
+{
+   struct grate_context *context = grate_context(pcontext);
+   struct grate_vertexbuf_state *vbs = &context->vbs;
+
+   util_set_vertex_buffers_mask(vbs->vb, &vbs->enabled, buffer, start, count);
+   vbs->count = util_last_bit(vbs->enabled);
+}
+
+
+static void
+grate_set_sampler_views(struct pipe_context *pcontext,
+                        unsigned shader,
+                        unsigned start_slot, unsigned num_views,
+                        struct pipe_sampler_view **views)
+{
+   unimplemented();
+}
+
+void
+grate_context_state_init(struct pipe_context *pcontext)
+{
+   pcontext->set_sample_mask = grate_set_sample_mask;
+   pcontext->set_constant_buffer = grate_set_constant_buffer;
+   pcontext->set_framebuffer_state = grate_set_framebuffer_state;
+   pcontext->set_polygon_stipple = grate_set_polygon_stipple;
+   pcontext->set_scissor_states = grate_set_scissor_states;
+   pcontext->set_viewport_states = grate_set_viewport_states;
+   pcontext->set_sampler_views = grate_set_sampler_views;
+   pcontext->set_vertex_buffers = grate_set_vertex_buffers;
+}
+
+static void *
+grate_create_blend_state(struct pipe_context *pcontext,
+                         const struct pipe_blend_state *template)
+{
+   struct pipe_blend_state *so = CALLOC_STRUCT(pipe_blend_state);
+   if (!so)
+      return NULL;
+
+   *so = *template;
+
+   return so;
+}
+
+static void
+grate_bind_blend_state(struct pipe_context *pcontext, void *so)
+{
+   unimplemented();
+}
+
+static void
+grate_delete_blend_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+void
+grate_context_blend_init(struct pipe_context *pcontext)
+{
+   pcontext->create_blend_state = grate_create_blend_state;
+   pcontext->bind_blend_state = grate_bind_blend_state;
+   pcontext->delete_blend_state = grate_delete_blend_state;
+}
+
+static void *
+grate_create_sampler_state(struct pipe_context *pcontext,
+            const struct pipe_sampler_state *template)
+{
+   struct pipe_sampler_state *so = CALLOC_STRUCT(pipe_sampler_state);
+   if (!so)
+      return NULL;
+
+   *so = *template;
+
+   return so;
+}
+
+static void
+grate_bind_sampler_states(struct pipe_context *pcontext,
+                          unsigned shader, unsigned start_slot,
+                          unsigned num_samplers, void **samplers)
+{
+   unimplemented();
+}
+
+static void
+grate_delete_sampler_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+void
+grate_context_sampler_init(struct pipe_context *pcontext)
+{
+   pcontext->create_sampler_state = grate_create_sampler_state;
+   pcontext->bind_sampler_states = grate_bind_sampler_states;
+   pcontext->delete_sampler_state = grate_delete_sampler_state;
+}
+
+static int
+grate_cull_face(int cull_face, bool front_ccw)
+{
+   switch (cull_face) {
+   case PIPE_FACE_NONE:
+      return TGR3D_CULL_FACE_NONE;
+
+   case PIPE_FACE_FRONT:
+      return front_ccw ? TGR3D_CULL_FACE_CCW : TGR3D_CULL_FACE_CW;
+
+   case PIPE_FACE_BACK:
+      return front_ccw ? TGR3D_CULL_FACE_CW : TGR3D_CULL_FACE_CCW;
+
+   case PIPE_FACE_FRONT_AND_BACK:
+      return TGR3D_CULL_FACE_BOTH;
+
+   default:
+      unreachable("unknown cull_face");
+   }
+}
+
+static void *
+grate_create_rasterizer_state(struct pipe_context *pcontext,
+                              const struct pipe_rasterizer_state *template)
+{
+   struct grate_rasterizer_state *so = CALLOC_STRUCT(grate_rasterizer_state);
+   if (!so)
+      return NULL;
+
+   so->base = *template;
+
+   so->draw_params = TGR3D_VAL(DRAW_PARAMS, PROVOKING_VERTEX, !template->flatshade_first);
+
+   so->cull_face = TGR3D_BOOL(CULL_FACE_LINKER_SETUP, FRONT_CW, !template->front_ccw);
+   so->cull_face |= TGR3D_VAL(CULL_FACE_LINKER_SETUP, CULL_FACE, grate_cull_face(template->cull_face, !template->front_ccw));
+
+   return so;
+}
+
+static void
+grate_bind_rasterizer_state(struct pipe_context *pcontext, void *so)
+{
+   grate_context(pcontext)->rast = so;
+}
+
+static void
+grate_delete_rasterizer_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+void
+grate_context_rasterizer_init(struct pipe_context *pcontext)
+{
+   pcontext->create_rasterizer_state = grate_create_rasterizer_state;
+   pcontext->bind_rasterizer_state = grate_bind_rasterizer_state;
+   pcontext->delete_rasterizer_state = grate_delete_rasterizer_state;
+}
+
+static int
+grate_compare_func(enum pipe_compare_func func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER: return TGR3D_COMPARE_FUNC_NEVER;
+   case PIPE_FUNC_LESS: return TGR3D_COMPARE_FUNC_LESS;
+   case PIPE_FUNC_EQUAL: return TGR3D_COMPARE_FUNC_EQUAL;
+   case PIPE_FUNC_LEQUAL: return TGR3D_COMPARE_FUNC_LEQUAL;
+   case PIPE_FUNC_GREATER: return TGR3D_COMPARE_FUNC_GREATER;
+   case PIPE_FUNC_NOTEQUAL: return TGR3D_COMPARE_FUNC_NOTEQUAL;
+   case PIPE_FUNC_ALWAYS: return TGR3D_COMPARE_FUNC_ALWAYS;
+   default: unreachable("unknown pipe_compare_func");
+   }
+}
+
+static void *
+grate_create_zsa_state(struct pipe_context *pcontext,
+                       const struct pipe_depth_stencil_alpha_state *template)
+{
+   struct grate_zsa_state *so = CALLOC_STRUCT(grate_zsa_state);
+   if (!so)
+      return NULL;
+
+   so->base = *template;
+
+   uint32_t depth_test = 0;
+   depth_test |= TGR3D_VAL(DEPTH_TEST_PARAMS, FUNC,
+                           grate_compare_func(template->depth.func));
+   depth_test |= TGR3D_BOOL(DEPTH_TEST_PARAMS, DEPTH_TEST,
+                            template->depth.enabled);
+   depth_test |= TGR3D_BOOL(DEPTH_TEST_PARAMS, DEPTH_WRITE,
+                            template->depth.writemask);
+   depth_test |= 0x200;
+
+   so->commands[0] = host1x_opcode_incr(TGR3D_DEPTH_TEST_PARAMS, 1);
+   so->commands[1] = depth_test;
+
+   return so;
+}
+
+static void
+grate_bind_zsa_state(struct pipe_context *pcontext, void *so)
+{
+   grate_context(pcontext)->zsa = so;
+}
+
+static void
+grate_delete_zsa_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+void
+grate_context_zsa_init(struct pipe_context *pcontext)
+{
+   pcontext->create_depth_stencil_alpha_state = grate_create_zsa_state;
+   pcontext->bind_depth_stencil_alpha_state = grate_bind_zsa_state;
+   pcontext->delete_depth_stencil_alpha_state = grate_delete_zsa_state;
+}
+
+/*
+ * Note: this does not include the stride, which needs to be mixed in later
+ **/
+static uint32_t
+attrib_mode(const struct pipe_vertex_element *e)
+{
+   const struct util_format_description *desc = util_format_description(e->src_format);
+   const int c = util_format_get_first_non_void_channel(e->src_format);
+   uint32_t type, format;
+
+   assert(!desc->is_mixed);
+   assert(c >= 0);
+
+   switch (desc->channel[c].type) {
+   case UTIL_FORMAT_TYPE_UNSIGNED:
+   case UTIL_FORMAT_TYPE_SIGNED:
+      switch (desc->channel[c].size) {
+      case 8:
+         type = TGR3D_ATTRIB_TYPE_UBYTE;
+         break;
+
+      case 16:
+         type = TGR3D_ATTRIB_TYPE_USHORT;
+         break;
+
+      case 32:
+         type = TGR3D_ATTRIB_TYPE_UINT;
+         break;
+
+      default:
+         unreachable("invalid channel-size");
+      }
+
+      if (desc->channel[c].type == UTIL_FORMAT_TYPE_SIGNED)
+         type += 2;
+
+      if (desc->channel[c].normalized)
+         type += 1;
+
+      break;
+
+   case UTIL_FORMAT_TYPE_FIXED:
+      assert(desc->channel[c].size == 32);
+      type = TGR3D_ATTRIB_TYPE_FIXED16;
+      break;
+
+   case UTIL_FORMAT_TYPE_FLOAT:
+      assert(desc->channel[c].size == 32); /* TODO: float16 ? */
+      type = TGR3D_ATTRIB_TYPE_FLOAT32;
+      break;
+
+   default:
+      unreachable("invalid channel-type");
+   }
+
+   format  = TGR3D_VAL(ATTRIB_MODE, TYPE, type);
+   format |= TGR3D_VAL(ATTRIB_MODE, SIZE, desc->nr_channels);
+   return format;
+}
+
+static void *
+grate_create_vertex_state(struct pipe_context *pcontext, unsigned int count,
+                          const struct pipe_vertex_element *elements)
+{
+   unsigned int i;
+   uint16_t mask = 0;
+   struct grate_vertex_state *vtx = CALLOC_STRUCT(grate_vertex_state);
+   if (!vtx)
+      return NULL;
+
+   for (i = 0; i < count; ++i) {
+      const struct pipe_vertex_element *src = elements + i;
+      struct grate_vertex_element *dst = vtx->elements + i;
+      dst->attrib = attrib_mode(src);
+      dst->buffer_index = src->vertex_buffer_index;
+      dst->offset = src->src_offset;
+      mask |= 1 << i;
+   }
+
+   vtx->num_elements = count;
+   vtx->mask = mask;
+
+   return vtx;
+}
+
+static void
+grate_bind_vertex_state(struct pipe_context *pcontext, void *so)
+{
+   grate_context(pcontext)->vs = so;
+}
+
+static void
+grate_delete_vertex_state(struct pipe_context *pcontext, void *so)
+{
+   FREE(so);
+}
+
+static void
+emit_attribs(struct grate_context *context)
+{
+   unsigned int i;
+   struct grate_stream *stream = &context->gr3d->stream;
+
+   assert(context->vs);
+
+   for (i = 0; i < context->vs->num_elements; ++i) {
+      const struct pipe_vertex_buffer *vb;
+      const struct grate_vertex_element *e = context->vs->elements + i;
+      const struct grate_resource *r;
+
+      assert(e->buffer_index < context->vbs.count);
+      vb = context->vbs.vb + e->buffer_index;
+      assert(!vb->is_user_buffer);
+      r = grate_resource(vb->buffer.resource);
+
+      uint32_t attrib = e->attrib;
+      assert(vb->stride < 1 << 24);
+      attrib |= TGR3D_VAL(ATTRIB_MODE, STRIDE, vb->stride);
+
+      grate_stream_push(stream, host1x_opcode_incr(TGR3D_ATTRIB_PTR(i), 2));
+      grate_stream_push_reloc(stream, r->bo, vb->buffer_offset + e->offset);
+      grate_stream_push(stream, attrib);
+   }
+}
+
+static void
+emit_render_targets(struct grate_context *context)
+{
+   unsigned int i;
+   struct grate_stream *stream = &context->gr3d->stream;
+   const struct grate_framebuffer_state *fb = &context->framebuffer;
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_RT_PARAMS(0), fb->num_rts));
+   for (i = 0; i < fb->num_rts; ++i) {
+      uint32_t rt_params = fb->rt_params[i];
+      /* TODO: setup dither */
+      /* rt_params |= TGR3D_BOOL(RT_PARAMS, DITHER_ENABLE, enable_dither); */
+      grate_stream_push(stream, rt_params);
+   }
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_RT_PTR(0), fb->num_rts));
+   for (i = 0; i < fb->num_rts; ++i)
+      grate_stream_push_reloc(stream, fb->bos[i], 0);
+
+   grate_stream_push(stream, host1x_opcode_incr(TGR3D_RT_ENABLE, 1));
+   grate_stream_push(stream, fb->mask);
+}
+
+static void
+emit_scissor(struct grate_context *context)
+{
+   struct grate_stream *stream = &context->gr3d->stream;
+   grate_stream_push_words(stream, context->no_scissor, 3, 0);
+}
+
+static void
+emit_viewport(struct grate_context *context)
+{
+   struct grate_stream *stream = &context->gr3d->stream;
+   grate_stream_push_words(stream, context->viewport, 10, 0);
+}
+
+static void
+emit_guardband(struct grate_context *context)
+{
+   struct grate_stream *stream = &context->gr3d->stream;
+   grate_stream_push_words(stream, context->guardband, 4, 0);
+}
+
+static void
+emit_zsa_state(struct grate_context *context)
+{
+   struct grate_stream *stream = &context->gr3d->stream;
+   grate_stream_push_words(stream, context->zsa->commands, 2, 0);
+}
+
+static void
+emit_vs_uniforms(struct grate_context *context)
+{
+   struct grate_stream *stream = &context->gr3d->stream;
+   struct pipe_constant_buffer *constbuf = &context->constant_buffer[PIPE_SHADER_VERTEX];
+   int len;
+
+   if (constbuf->user_buffer != NULL) {
+      assert(constbuf->buffer_size % sizeof(uint32_t) == 0);
+
+      len = constbuf->buffer_size / 4;
+      assert(len < 256 * 4);
+
+      grate_stream_push(stream, host1x_opcode_imm(TGR3D_VP_UPLOAD_CONST_ID, 0));
+      grate_stream_push(stream, host1x_opcode_nonincr(TGR3D_VP_UPLOAD_CONST, len));
+      grate_stream_push_words(stream, constbuf->user_buffer, len, 0);
+   }
+}
+
+static void
+emit_shader(struct grate_stream *stream, struct grate_shader_blob *blob)
+{
+   grate_stream_push_words(stream, blob->commands, blob->num_commands, 0);
+}
+
+static void
+emit_program(struct grate_context *context)
+{
+   struct grate_stream *stream = &context->gr3d->stream;
+
+   emit_shader(stream, &context->vshader->blob);
+   emit_shader(stream, &context->fshader->blob);
+
+   uint32_t cull_face_linker_setup = TGR3D_VAL(CULL_FACE_LINKER_SETUP,
+                                               UNK_18_31, 0x2e38);
+
+   /* depends on cull-face */
+   cull_face_linker_setup |= context->rast->cull_face;
+
+   /* depends on linking */
+   struct grate_fp_info *info = &context->fshader->info;
+   assert(info->num_inputs > 0);
+   cull_face_linker_setup |= TGR3D_VAL(CULL_FACE_LINKER_SETUP,
+                                       LINKER_INST_COUNT,
+                                       info->num_inputs - 1);
+
+   uint32_t linker_insts[3 + info->num_inputs * 2];
+   linker_insts[0] = host1x_opcode_incr(TGR3D_CULL_FACE_LINKER_SETUP, 1);
+   linker_insts[1] = cull_face_linker_setup;
+   linker_insts[2] = host1x_opcode_incr(TGR3D_LINKER_INSTRUCTION(0), info->num_inputs * 2);
+
+   for (int i = 0; i < info->num_inputs; ++i) {
+      linker_insts[3 + i * 2] = info->inputs[i].src;
+      linker_insts[3 + i * 2 + 1] = info->inputs[i].dst;
+   }
+
+   if (context->rast->base.flatshade && info->color_input >= 0)
+      linker_insts[3 + info->color_input * 2 + 1] |= 0xf << 16;
+
+   grate_stream_push_words(stream, linker_insts, ARRAY_SIZE(linker_insts), 0);
+}
+
+void
+grate_emit_state(struct grate_context *context)
+{
+   emit_render_targets(context);
+   emit_viewport(context);
+   emit_guardband(context);
+   emit_scissor(context);
+   emit_zsa_state(context);
+   emit_attribs(context);
+   emit_vs_uniforms(context);
+   emit_program(context);
+}
+
+void
+grate_context_vbo_init(struct pipe_context *pcontext)
+{
+   pcontext->create_vertex_elements_state = grate_create_vertex_state;
+   pcontext->bind_vertex_elements_state = grate_bind_vertex_state;
+   pcontext->delete_vertex_elements_state = grate_delete_vertex_state;
+}
diff --git a/src/gallium/drivers/grate/grate_state.h b/src/gallium/drivers/grate/grate_state.h
new file mode 100644
index 0000000..0a96461
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_state.h
@@ -0,0 +1,59 @@
+#ifndef GRATE_STATE_H
+#define GRATE_STATE_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+struct grate_context;
+
+struct grate_rasterizer_state {
+   struct pipe_rasterizer_state base;
+   uint32_t draw_params;
+   uint32_t cull_face;
+};
+
+struct grate_zsa_state {
+   struct pipe_depth_stencil_alpha_state base;
+   uint32_t commands[2];
+};
+
+struct grate_vertexbuf_state {
+   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+   unsigned int count;
+   uint32_t enabled;
+};
+
+struct grate_vertex_element {
+   uint32_t attrib;
+   unsigned int buffer_index;
+   unsigned int offset;
+};
+
+struct grate_vertex_state {
+   struct grate_vertex_element elements[PIPE_MAX_ATTRIBS];
+   unsigned int num_elements;
+   uint16_t mask;
+};
+
+void
+grate_emit_state(struct grate_context *context);
+
+void
+grate_context_state_init(struct pipe_context *pcontext);
+
+void
+grate_context_blend_init(struct pipe_context *pcontext);
+
+void
+grate_context_sampler_init(struct pipe_context *pcontext);
+
+void
+grate_context_rasterizer_init(struct pipe_context *pcontext);
+
+void
+grate_context_zsa_init(struct pipe_context *pcontext);
+
+void
+grate_context_vbo_init(struct pipe_context *pcontext);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_stream.c b/src/gallium/drivers/grate/grate_stream.c
new file mode 100644
index 0000000..21c21e1
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_stream.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (c) 2016-2017 Dmitry Osipenko <digetx@gmail.com>
+ * Copyright (C) 2012-2013 NVIDIA Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS\n", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Arto Merilainen <amerilainen@nvidia.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "host1x01_hardware.h"
+#include "hw_host1x01_uclass.h"
+#include "grate_stream.h"
+
+#define ErrorMsg(fmt, args...) \
+    fprintf(stderr, "%s:%d/%s(): " fmt, \
+            __FILE__, __LINE__, __func__, ##args)
+
+/*
+ * grate_stream_create(channel)
+ *
+ * Create a stream for given channel. This function preallocates several
+ * command buffers for later usage to improve performance. Streams are
+ * used for generating command buffers opcode by opcode using
+ * grate_stream_push().
+ */
+
+int
+grate_stream_create(struct drm_tegra *drm,
+                    struct drm_tegra_channel *channel,
+                    struct grate_stream *stream,
+                    uint32_t words_num)
+{
+   stream->status    = GRATE_STREAM_FREE;
+   stream->channel   = channel;
+   stream->num_words = words_num;
+
+   return 0;
+}
+
+/*
+ * grate_stream_destroy(stream)
+ *
+ * Destroy the given stream object. All resrouces are released.
+ */
+
+void
+grate_stream_destroy(struct grate_stream *stream)
+{
+   if (!stream)
+      return;
+
+   drm_tegra_job_free(stream->job);
+}
+
+/*
+ * grate_stream_flush(stream, fence)
+ *
+ * Send the current contents of stream buffer. The stream must be
+ * synchronized correctly (we cannot send partial streams). If
+ * pointer to fence is given, the fence will contain the syncpoint value
+ * that is reached when operations in the buffer are finished.
+ */
+
+int
+grate_stream_flush(struct grate_stream *stream)
+{
+   struct drm_tegra_fence *fence;
+   int result = 0;
+
+   if (!stream)
+      return -1;
+
+   /* Reflushing is fine */
+   if (stream->status == GRATE_STREAM_FREE)
+      return 0;
+
+   /* Return error if stream is constructed badly */
+   if (stream->status != GRATE_STREAM_READY) {
+      result = -1;
+      goto cleanup;
+   }
+
+   result = drm_tegra_job_submit(stream->job, &fence);
+   if (result != 0) {
+      ErrorMsg("drm_tegra_job_submit() failed %d\n", result);
+      result = -1;
+      goto cleanup;
+   }
+
+   result = drm_tegra_fence_wait_timeout(fence, 1000);
+   if (result != 0) {
+      ErrorMsg("drm_tegra_fence_wait_timeout() failed %d\n", result);
+      result = -1;
+   }
+
+   drm_tegra_fence_free(fence);
+
+cleanup:
+   drm_tegra_job_free(stream->job);
+
+   stream->job = NULL;
+   stream->status = GRATE_STREAM_FREE;
+
+   return result;
+}
+
+/*
+ * grate_stream_begin(stream, num_words, fence, num_fences, num_syncpt_incrs,
+ *          num_relocs, class_id)
+ *
+ * Start constructing a stream.
+ *  - num_words refer to the maximum number of words the stream can contain.
+ *  - fence is a pointer to a table that contains syncpoint preconditions
+ *    before the stream execution can start.
+ *  - num_fences indicate the number of elements in the fence table.
+ *  - num_relocs indicate the number of memory references in the buffer.
+ *  - class_id refers to the class_id that is selected in the beginning of a
+ *    stream. If no class id is given, the default class id (=usually the
+ *    client device's class) is selected.
+ *
+ * This function verifies that the current buffer has enough room for holding
+ * the whole stream (this is computed using num_words and num_relocs). The
+ * function blocks until the stream buffer is ready for use.
+ */
+
+int
+grate_stream_begin(struct grate_stream *stream)
+{
+   int ret;
+
+   /* check stream and its state */
+   if (!(stream && stream->status == GRATE_STREAM_FREE)) {
+      ErrorMsg("Stream status isn't FREE\n");
+      return -1;
+   }
+
+   ret = drm_tegra_job_new(&stream->job, stream->channel);
+   if (ret != 0) {
+      ErrorMsg("drm_tegra_job_new() failed %d\n", ret);
+      return -1;
+   }
+
+   ret = drm_tegra_pushbuf_new(&stream->buffer.pushbuf, stream->job);
+   if (ret != 0) {
+      ErrorMsg("drm_tegra_pushbuf_new() failed %d\n", ret);
+      drm_tegra_job_free(stream->job);
+      return -1;
+   }
+
+   ret = drm_tegra_pushbuf_prepare(stream->buffer.pushbuf, stream->num_words);
+   if (ret != 0) {
+      ErrorMsg("drm_tegra_pushbuf_prepare() failed %d\n", ret);
+      drm_tegra_job_free(stream->job);
+      return -1;
+   }
+
+   stream->class_id = 0;
+   stream->status = GRATE_STREAM_CONSTRUCT;
+
+   return 0;
+}
+
+/*
+ * grate_stream_push_reloc(stream, h, offset)
+ *
+ * Push a memory reference to the stream.
+ */
+
+int
+grate_stream_push_reloc(struct grate_stream *stream,
+                        struct drm_tegra_bo *bo,
+                        unsigned offset)
+{
+   int ret;
+
+   if (!(stream && stream->status == GRATE_STREAM_CONSTRUCT)) {
+      ErrorMsg("Stream status isn't CONSTRUCT\n");
+      return -1;
+   }
+
+   ret = drm_tegra_pushbuf_relocate(stream->buffer.pushbuf,
+                                    bo, offset, 0);
+   if (ret != 0) {
+      stream->status = GRATE_STREAM_CONSTRUCTION_FAILED;
+      ErrorMsg("drm_tegra_pushbuf_relocate() failed %d\n", ret);
+      return -1;
+   }
+
+   return 0;
+}
+
+/*
+ * grate_stream_push(stream, word)
+ *
+ * Push a single word to given stream.
+ */
+
+int
+grate_stream_push(struct grate_stream *stream, uint32_t word)
+{
+   int ret;
+
+   if (!(stream && stream->status == GRATE_STREAM_CONSTRUCT)) {
+      ErrorMsg("Stream status isn't CONSTRUCT\n");
+      return -1;
+   }
+
+   ret = drm_tegra_pushbuf_prepare(stream->buffer.pushbuf, 1);
+   if (ret != 0) {
+      stream->status = GRATE_STREAM_CONSTRUCTION_FAILED;
+      ErrorMsg("drm_tegra_pushbuf_prepare() failed %d\n", ret);
+      return -1;
+   }
+
+   *stream->buffer.pushbuf->ptr++ = word;
+
+   return 0;
+}
+
+/*
+ * grate_stream_push_setclass(stream, class_id)
+ *
+ * Push "set class" opcode to the stream. Do nothing if the class is already
+ * active
+ */
+
+int
+grate_stream_push_setclass(struct grate_stream *stream, unsigned class_id)
+{
+   int result;
+
+   if (stream->class_id == class_id)
+      return 0;
+
+   result = grate_stream_push(stream, host1x_opcode_setclass(class_id, 0, 0));
+
+   if (result == 0)
+      stream->class_id = class_id;
+
+   return result;
+}
+
+/*
+ * grate_stream_end(stream)
+ *
+ * Mark end of stream. This function pushes last syncpoint increment for
+ * marking end of stream.
+ */
+
+int
+grate_stream_end(struct grate_stream *stream)
+{
+   int ret;
+
+   if (!(stream && stream->status == GRATE_STREAM_CONSTRUCT)) {
+      ErrorMsg("Stream status isn't CONSTRUCT\n");
+      return -1;
+   }
+
+   ret = drm_tegra_pushbuf_sync(stream->buffer.pushbuf,
+                                DRM_TEGRA_SYNCPT_COND_OP_DONE);
+   if (ret != 0) {
+      stream->status = GRATE_STREAM_CONSTRUCTION_FAILED;
+      ErrorMsg("drm_tegra_pushbuf_sync() failed %d\n", ret);
+      return -1;
+   }
+
+   stream->status = GRATE_STREAM_READY;
+
+   return 0;
+}
+
+/*
+ * grate_reloc (variable, handle, offset)
+ *
+ * This function creates a reloc allocation. The function should be used in
+ * conjunction with grate_stream_push_words.
+ */
+
+struct grate_reloc
+grate_reloc(const void *var_ptr, struct drm_tegra_bo *bo,
+            uint32_t offset, uint32_t var_offset)
+{
+   struct grate_reloc reloc = {var_ptr, bo, offset, var_offset};
+   return reloc;
+}
+
+/*
+ * grate_stream_push_words(stream, addr, words, ...)
+ *
+ * Push words from given address to stream. The function takes
+ * reloc structs as its argument. You can generate the structs with grate_reloc
+ * function.
+ */
+
+int grate_stream_push_words(struct grate_stream *stream, const void *addr,
+                            unsigned words, int num_relocs, ...)
+{
+   struct grate_reloc reloc_arg;
+   va_list ap;
+   uint32_t *pushbuf_ptr;
+   int ret;
+
+   if (!(stream && stream->status == GRATE_STREAM_CONSTRUCT)) {
+      ErrorMsg("Stream status isn't CONSTRUCT\n");
+      return -1;
+   }
+
+   ret = drm_tegra_pushbuf_prepare(stream->buffer.pushbuf, words);
+   if (ret != 0) {
+      stream->status = GRATE_STREAM_CONSTRUCTION_FAILED;
+      ErrorMsg("drm_tegra_pushbuf_prepare() failed %d\n", ret);
+      return -1;
+   }
+
+   /* Class id should be set explicitly, for simplicity. */
+   if (stream->class_id == 0) {
+      stream->status = GRATE_STREAM_CONSTRUCTION_FAILED;
+      ErrorMsg("HOST1X class not specified\n");
+      return -1;
+   }
+
+   /* Copy the contents */
+   pushbuf_ptr = stream->buffer.pushbuf->ptr;
+   memcpy(pushbuf_ptr, addr, words * sizeof(uint32_t));
+
+   /* Copy relocs */
+   va_start(ap, num_relocs);
+   for (; num_relocs; num_relocs--) {
+      reloc_arg = va_arg(ap, struct grate_reloc);
+
+      stream->buffer.pushbuf->ptr  = pushbuf_ptr;
+      stream->buffer.pushbuf->ptr += reloc_arg.var_offset / sizeof(uint32_t);
+
+      ret = drm_tegra_pushbuf_relocate(stream->buffer.pushbuf, reloc_arg.bo,
+                                       reloc_arg.offset, 0);
+      if (ret != 0) {
+         stream->status = GRATE_STREAM_CONSTRUCTION_FAILED;
+         ErrorMsg("drm_tegra_pushbuf_relocate() failed %d\n", ret);
+         break;
+      }
+   }
+   va_end(ap);
+
+   stream->buffer.pushbuf->ptr = pushbuf_ptr + words;
+
+   return ret ? -1 : 0;
+}
diff --git a/src/gallium/drivers/grate/grate_stream.h b/src/gallium/drivers/grate/grate_stream.h
new file mode 100644
index 0000000..e3f57d9
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_stream.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2016 Dmitry Osipenko <digetx@gmail.com>
+ * Copyright (C) 2012-2013 NVIDIA Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Arto Merilainen <amerilainen@nvidia.com>
+ */
+
+#ifndef GRATE_STREAM_H_
+#define GRATE_STREAM_H_
+
+#include <stdint.h>
+#include <libdrm/tegra.h>
+
+#include "class_ids.h"
+
+enum grate_stream_status {
+   GRATE_STREAM_FREE,
+   GRATE_STREAM_CONSTRUCT,
+   GRATE_STREAM_CONSTRUCTION_FAILED,
+   GRATE_STREAM_READY,
+};
+
+struct grate_command_buffer {
+   struct drm_tegra_pushbuf *pushbuf;
+};
+
+struct grate_stream {
+   enum grate_stream_status status;
+
+   struct drm_tegra_job *job;
+   struct drm_tegra_channel *channel;
+
+   struct grate_command_buffer buffer;
+   int num_words;
+   uint32_t class_id;
+};
+
+struct grate_reloc {
+   const void *addr;
+   struct drm_tegra_bo *bo;
+   uint32_t offset;
+   unsigned var_offset;
+};
+
+/* Stream operations */
+int
+grate_stream_create(struct drm_tegra *drm,
+                    struct drm_tegra_channel *channel,
+                    struct grate_stream *stream,
+                    uint32_t words_num);
+void
+grate_stream_destroy(struct grate_stream *stream);
+
+int
+grate_stream_begin(struct grate_stream *stream);
+
+int
+grate_stream_end(struct grate_stream *stream);
+
+int
+grate_stream_flush(struct grate_stream *stream);
+
+int
+grate_stream_push(struct grate_stream *stream, uint32_t word);
+
+int
+grate_stream_push_setclass(struct grate_stream *stream,
+                           enum host1x_class class_id);
+
+int
+grate_stream_push_reloc(struct grate_stream *stream,
+                        struct drm_tegra_bo *bo, unsigned offset);
+
+struct grate_reloc
+grate_reloc(const void *var_ptr, struct drm_tegra_bo *bo,
+            uint32_t offset, uint32_t var_offset);
+
+int
+grate_stream_push_words(struct grate_stream *stream, const void *addr,
+                        unsigned words, int num_relocs, ...);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_surface.c b/src/gallium/drivers/grate/grate_surface.c
new file mode 100644
index 0000000..e7f90a2
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_surface.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+
+#include "grate_surface.h"
+
+static struct pipe_surface *
+grate_create_surface(struct pipe_context *context,
+                     struct pipe_resource *resource,
+                     const struct pipe_surface *template)
+{
+   unsigned int level = template->u.tex.level;
+   struct grate_surface *surface = CALLOC_STRUCT(grate_surface);
+   if (!surface)
+      return NULL;
+
+   pipe_resource_reference(&surface->base.texture, resource);
+   pipe_reference_init(&surface->base.reference, 1);
+
+   surface->base.context = context;
+   surface->base.format = template->format;
+   surface->base.width = u_minify(resource->width0, level);
+   surface->base.height = u_minify(resource->height0, level);
+   surface->base.u.tex.level = level;
+   surface->base.u.tex.first_layer = template->u.tex.first_layer;
+   surface->base.u.tex.last_layer = template->u.tex.last_layer;
+
+   return &surface->base;
+}
+
+static void
+grate_surface_destroy(struct pipe_context *context,
+                      struct pipe_surface *surface)
+{
+   pipe_resource_reference(&surface->texture, NULL);
+   FREE(surface);
+}
+
+void
+grate_context_surface_init(struct pipe_context *context)
+{
+   context->create_surface = grate_create_surface;
+   context->surface_destroy = grate_surface_destroy;
+}
diff --git a/src/gallium/drivers/grate/grate_surface.h b/src/gallium/drivers/grate/grate_surface.h
new file mode 100644
index 0000000..00c967d
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_surface.h
@@ -0,0 +1,14 @@
+#ifndef GRATE_SURFACE_H
+#define GRATE_SURFACE_H
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+
+struct grate_surface {
+   struct pipe_surface base;
+};
+
+void
+grate_context_surface_init(struct pipe_context *context);
+
+#endif
diff --git a/src/gallium/drivers/grate/grate_vpe_ir.c b/src/gallium/drivers/grate/grate_vpe_ir.c
new file mode 100644
index 0000000..4afec11
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_vpe_ir.c
@@ -0,0 +1,211 @@
+#include <assert.h>
+#include <stdint.h>
+
+#include "util/macros.h"
+
+#include "grate_vpe_ir.h"
+
+static unsigned
+vpe_write_mask(unsigned input)
+{
+   return (((input >> 0) & 1) << 3) |
+          (((input >> 1) & 1) << 2) |
+          (((input >> 2) & 1) << 1) |
+          (((input >> 3) & 1) << 0);
+}
+
+static unsigned
+vpe_swizzle(enum vpe_swz swizzle[4])
+{
+   return (swizzle[0] << 6) |
+          (swizzle[1] << 4) |
+          (swizzle[2] << 2) |
+          (swizzle[3] << 0);
+}
+
+static unsigned
+vpe_src_reg(struct vpe_src_operand op)
+{
+   union {
+      struct __attribute__((packed)) {
+         unsigned type : 2;    //  0 .. 1
+         unsigned index : 6;   //  2 .. 7
+         unsigned swizzle : 8; //  8 .. 15
+         unsigned negate : 1;  //  16
+      };
+      unsigned value;
+   } u;
+
+   u.type = op.file;
+   u.index = op.index;
+   u.swizzle = vpe_swizzle(op.swizzle);
+   u.negate = op.negate;
+
+   return u.value;
+}
+
+
+void
+grate_vpe_pack(uint32_t *dst, struct vpe_instr *instr, bool end_of_program)
+{
+   /* we can only handle one output register per instruction */
+   assert(instr->vec.dst.file != VPE_DST_FILE_OUTPUT ||
+          instr->scalar.dst.file != VPE_DST_FILE_OUTPUT);
+
+   union {
+      struct __attribute__((packed)) {
+         unsigned end_of_program : 1;                       //   0
+         unsigned constant_relative_addressing_enable : 1;  //   1
+         unsigned export_write_index : 5;                   //   2 .. 6
+         unsigned scalar_rD_index : 6;                      //   7 .. 12
+         unsigned vector_op_write_mask : 4;                 //  13 .. 16
+         unsigned scalar_op_write_mask : 4;                 //  17 .. 20
+         unsigned rC : 17;                                  //  21 .. 37
+         unsigned rB : 17;                                  //  38 .. 54
+         unsigned rA : 17;                                  //  55 .. 71
+         unsigned attribute_fetch_index : 4;                //  72 .. 75
+         unsigned uniform_fetch_index : 10;                 //  76 .. 85
+         unsigned vector_opcode : 5;                        //  86 .. 90
+         unsigned scalar_opcode : 5;                        //  91 .. 95
+         unsigned address_register_select : 2;              //  96 .. 97
+         unsigned predicate_swizzle : 8;                    //  98 .. 105
+         unsigned predicate_lt : 1;                         // 106
+         unsigned predicate_eq : 1;                         // 107
+         unsigned predicate_gt : 1;                         // 108
+         unsigned condition_check : 1;                      // 109
+         unsigned condition_set : 1;                        // 110
+         unsigned vector_rD_index : 6;                      // 111 .. 116
+         unsigned rA_absolute : 1;                          // 117
+         unsigned rB_absolute : 1;                          // 118
+         unsigned rC_absolute : 1;                          // 119
+         unsigned bit120 : 1;                               // 120
+         unsigned condition_register_index : 1;             // 121
+         unsigned saturate_result : 1;                      // 122
+         unsigned attribute_relative_addressing_enable : 1; // 123
+         unsigned export_relative_addressing_enable : 1;    // 124
+         unsigned condition_flags_write_enable : 1;         // 125
+         unsigned export_vector_write_enable : 1;           // 126
+         unsigned bit127 : 1;                               // 127
+      };
+
+      uint32_t words[4];
+   } tmp = {
+      .predicate_lt = 1,
+      .predicate_eq = 1,
+      .predicate_gt = 1,
+
+      .predicate_swizzle = (0 << 6) | (1 << 4) | (2 << 2) | 3,
+   };
+
+   /* find the attribute/uniform fetch-values, and zero out the index
+    * for these registers.
+    */
+   int attr_fetch = -1, uniform_fetch = -1;
+   for (int i = 0; i < 3; ++i) {
+      switch (instr->vec.src[i].file) {
+      case VPE_SRC_FILE_ATTRIB:
+         assert(attr_fetch < 0 ||
+                attr_fetch == instr->vec.src[i].index);
+         attr_fetch = instr->vec.src[i].index;
+         instr->vec.src[i].index = 0;
+         break;
+
+      case VPE_SRC_FILE_UNIFORM:
+         assert(uniform_fetch < 0 ||
+                uniform_fetch == instr->vec.src[i].index);
+         uniform_fetch = instr->vec.src[i].index;
+         instr->vec.src[i].index = 0;
+         break;
+
+      default: /* nothing */
+         break;
+      }
+   }
+
+   switch (instr->scalar.src.file) {
+   case VPE_SRC_FILE_ATTRIB:
+      assert(attr_fetch < 0 ||
+             attr_fetch == instr->scalar.src.index);
+      attr_fetch = instr->scalar.src.index;
+      instr->scalar.src.index = 0;
+      break;
+
+      case VPE_SRC_FILE_UNIFORM:
+         assert(uniform_fetch < 0 ||
+                uniform_fetch == instr->scalar.src.index);
+         uniform_fetch = instr->scalar.src.index;
+         instr->scalar.src.index = 0;
+         break;
+
+      default: /* nothing */
+         break;
+
+   }
+
+   tmp.attribute_fetch_index = attr_fetch >= 0 ? attr_fetch : 0;
+   tmp.uniform_fetch_index = uniform_fetch >= 0 ? uniform_fetch : 0;
+
+   tmp.vector_opcode = instr->vec.op;
+   switch (instr->vec.dst.file) {
+   case VPE_DST_FILE_TEMP:
+      tmp.vector_rD_index = instr->vec.dst.index;
+      tmp.export_write_index = 31;
+      break;
+
+   case VPE_DST_FILE_OUTPUT:
+      tmp.vector_rD_index = 63; // disable register-write
+      tmp.export_vector_write_enable = 1;
+      tmp.export_write_index = instr->vec.dst.index;
+      break;
+
+   case VPE_DST_FILE_UNDEF:
+      // assert(0);  // TODO: consult NOP
+      break;
+
+   default:
+      unreachable("illegal enum vpe_dst_file value");
+   }
+   tmp.vector_op_write_mask = vpe_write_mask(instr->vec.dst.write_mask);
+
+   tmp.scalar_opcode = instr->scalar.op;
+   switch (instr->scalar.dst.file) {
+   case VPE_DST_FILE_TEMP:
+      tmp.scalar_rD_index = instr->scalar.dst.index;
+      tmp.export_write_index = 31;
+      break;
+
+   case VPE_DST_FILE_OUTPUT:
+      tmp.scalar_rD_index = 63; // disable register-write
+      tmp.export_vector_write_enable = 0;
+      tmp.export_write_index = instr->scalar.dst.index;
+      break;
+
+   case VPE_DST_FILE_UNDEF:
+      // assert(0);  // TODO: consult NOP
+      break;
+
+   default:
+      unreachable("illegal enum vpe_dst_file value");
+   }
+   tmp.scalar_op_write_mask = vpe_write_mask(instr->scalar.dst.write_mask);
+
+   tmp.rA = vpe_src_reg(instr->vec.src[0]);
+   tmp.rA_absolute = instr->vec.src[0].absolute;
+
+   tmp.rB = vpe_src_reg(instr->vec.src[1]);
+   tmp.rB_absolute = instr->vec.src[1].absolute;
+
+   if (instr->vec.src[2].file != VPE_SRC_FILE_UNDEF) {
+      tmp.rC = vpe_src_reg(instr->vec.src[2]);
+      tmp.rC_absolute = instr->vec.src[2].absolute;
+   } else if (instr->scalar.src.file != VPE_SRC_FILE_UNDEF) {
+      tmp.rC = vpe_src_reg(instr->scalar.src);
+      tmp.rC_absolute = instr->scalar.src.absolute;
+   }
+
+   tmp.end_of_program = end_of_program;
+
+   /* copy packed instruction into destination */
+   for (int i = 0; i < 4; ++i)
+      dst[i] = tmp.words[3 - i];
+}
diff --git a/src/gallium/drivers/grate/grate_vpe_ir.h b/src/gallium/drivers/grate/grate_vpe_ir.h
new file mode 100644
index 0000000..7a16dc3
--- /dev/null
+++ b/src/gallium/drivers/grate/grate_vpe_ir.h
@@ -0,0 +1,115 @@
+#ifndef GRATE_VPE_IR_H
+#define GRATE_VPE_IR_H
+
+#include "util/list.h"
+
+#include "stdbool.h"
+#include "stdint.h"
+
+enum vpe_src_file {
+   VPE_SRC_FILE_UNDEF = 0,
+   VPE_SRC_FILE_TEMP = 1,
+   VPE_SRC_FILE_ATTRIB = 2,
+   VPE_SRC_FILE_UNIFORM = 3,
+};
+
+enum vpe_dst_file {
+   VPE_DST_FILE_TEMP,
+   VPE_DST_FILE_OUTPUT,
+   VPE_DST_FILE_UNDEF
+};
+
+enum vpe_swz {
+   VPE_SWZ_X = 0,
+   VPE_SWZ_Y = 1,
+   VPE_SWZ_Z = 2,
+   VPE_SWZ_W = 3
+};
+
+enum vpe_vec_op {
+   VPE_VEC_OP_NOP = 0,
+   VPE_VEC_OP_MOV = 1,
+   VPE_VEC_OP_MUL = 2,
+   VPE_VEC_OP_ADD = 3,
+   VPE_VEC_OP_MAD = 4,
+   VPE_VEC_OP_DP3 = 5,
+   VPE_VEC_OP_DPH = 6,
+   VPE_VEC_OP_DP4 = 7,
+   VPE_VEC_OP_DST = 8,
+   VPE_VEC_OP_MIN = 9,
+   VPE_VEC_OP_MAX = 10,
+   VPE_VEC_OP_SLT = 11,
+   VPE_VEC_OP_SGE = 12,
+   VPE_VEC_OP_ARL = 13,
+   VPE_VEC_OP_FRC = 14,
+   VPE_VEC_OP_FLR = 15,
+   VPE_VEC_OP_SEQ = 16,
+   VPE_VEC_OP_SFL = 17,
+   VPE_VEC_OP_SGT = 18,
+   VPE_VEC_OP_SLE = 19,
+   VPE_VEC_OP_SNE = 20,
+   VPE_VEC_OP_STR = 21,
+   VPE_VEC_OP_SSG = 22,
+   VPE_VEC_OP_ARR = 23,
+   VPE_VEC_OP_ARA = 24,
+   VPE_VEC_OP_TXL = 25,
+   VPE_VEC_OP_PUSHA = 26,
+   VPE_VEC_OP_POPA = 27
+};
+
+enum vpe_scalar_op {
+   VPE_SCALAR_OP_NOP = 0,
+   VPE_SCALAR_OP_MOV = 1,
+   VPE_SCALAR_OP_RCP = 2,
+   VPE_SCALAR_OP_RCC = 3,
+   VPE_SCALAR_OP_RSQ = 4,
+   VPE_SCALAR_OP_EXP = 5,
+   VPE_SCALAR_OP_LOG = 6,
+   VPE_SCALAR_OP_LIT = 7,
+   VPE_SCALAR_OP_BRA = 9,
+   VPE_SCALAR_OP_CAL = 11,
+   VPE_SCALAR_OP_RET = 12,
+   VPE_SCALAR_OP_LG2 = 13,
+   VPE_SCALAR_OP_EX2 = 14,
+   VPE_SCALAR_OP_SIN = 15,
+   VPE_SCALAR_OP_COS = 16,
+   VPE_SCALAR_OP_PUSHA = 19,
+   VPE_SCALAR_OP_POPA = 20
+};
+
+struct vpe_dst_operand {
+   enum vpe_dst_file file;
+   int index;
+   unsigned int write_mask;
+   bool saturate;
+};
+
+struct vpe_src_operand {
+   enum vpe_src_file file;
+   int index;
+   enum vpe_swz swizzle[4];
+   bool negate, absolute;
+};
+
+struct vpe_vec_instr {
+   enum vpe_vec_op op;
+   struct vpe_dst_operand dst;
+   struct vpe_src_operand src[3];
+};
+
+struct vpe_scalar_instr {
+   enum vpe_scalar_op op;
+   struct vpe_dst_operand dst;
+   struct vpe_src_operand src;
+};
+
+struct vpe_instr {
+   struct list_head link;
+   struct vpe_vec_instr vec;
+   struct vpe_scalar_instr scalar;
+};
+
+void
+grate_vpe_pack(uint32_t *dst, struct vpe_instr *instr, bool end_of_program);
+
+#endif
diff --git a/src/gallium/drivers/grate/host1x01_hardware.h b/src/gallium/drivers/grate/host1x01_hardware.h
new file mode 100644
index 0000000..a6f6519
--- /dev/null
+++ b/src/gallium/drivers/grate/host1x01_hardware.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2012-2013 NVIDIA Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Arto Merilainen <amerilainen@nvidia.com>
+ */
+
+#ifndef HOST1X01_HARDWARE_H_
+#define HOST1X01_HARDWARE_H_
+
+#include <linux/types.h>
+#include "hw_host1x01_uclass.h"
+
+/* channel registers */
+#define HOST1X_CHANNEL_MAP_SIZE_BYTES 16384
+#define HOST1X_SYNC_MLOCK_NUM 16
+
+/* sync registers */
+#define HOST1X_CHANNEL_SYNC_REG_BASE   0x3000
+#define HOST1X_NB_MLOCKS 16
+
+#define BIT(nr)	(1UL << (nr))
+
+static inline uint32_t host1x_class_host_wait_syncpt(unsigned indx,
+						      unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx) |
+		host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline uint32_t host1x_class_host_load_syncpt_base(unsigned indx,
+							  unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx) |
+		host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline uint32_t host1x_class_host_wait_syncpt_base(unsigned indx,
+							  unsigned base_indx,
+							  unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx) |
+		host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) |
+		host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline uint32_t host1x_class_host_incr_syncpt_base(unsigned base_indx,
+							  unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) |
+		host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline uint32_t host1x_class_host_incr_syncpt(unsigned cond,
+						     unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond) |
+		host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline uint32_t host1x_class_host_indoff_reg_write(unsigned mod_id,
+							  unsigned offset,
+							  int auto_inc)
+{
+	uint32_t v = host1x_uclass_indoff_indbe_f(0xf) |
+		host1x_uclass_indoff_indmodid_f(mod_id) |
+		host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline uint32_t host1x_class_host_indoff_reg_read(unsigned mod_id,
+							 unsigned offset,
+							 int auto_inc)
+{
+	uint32_t v = host1x_uclass_indoff_indmodid_f(mod_id) |
+		host1x_uclass_indoff_indroffset_f(offset) |
+		host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+
+/* cdma opcodes */
+static inline uint32_t host1x_opcode_setclass(unsigned class_id,
+					      unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline uint32_t host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline uint32_t host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline uint32_t host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline uint32_t host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline uint32_t host1x_mask2(unsigned x, unsigned y)
+{
+	return 1 | (1 << (y - x));
+}
+#endif
diff --git a/src/gallium/drivers/grate/hw_host1x01_uclass.h b/src/gallium/drivers/grate/hw_host1x01_uclass.h
new file mode 100644
index 0000000..334af05
--- /dev/null
+++ b/src/gallium/drivers/grate/hw_host1x01_uclass.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2012-2013 NVIDIA Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *	Arto Merilainen <amerilainen@nvidia.com>
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *	 <x>_r(void) : Returns the offset for register <x>.
+  *
+  *	 <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *	 <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *	 <x>_<y>_f(uint32_t v) : Returns a value based on 'v' which has been shifted
+  *		 and masked to place it at field <y> of register <x>.  This value
+  *		 can be |'d with others to produce a full register value for
+  *		 register <x>.
+  *
+  *	 <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *		 value can be ~'d and then &'d to clear the value of field <y> for
+  *		 register <x>.
+  *
+  *	 <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *		 to place it at field <y> of register <x>.  This value can be |'d
+  *		 with others to produce a full register value for <x>.
+  *
+  *	 <x>_<y>_v(uint32_t r) : Returns the value of field <y> from a full register
+  *		 <x> value 'r' after being shifted to place its LSB at bit 0.
+  *		 This value is suitable for direct comparison with other unshifted
+  *		 values appropriate for use in field <y> of register <x>.
+  *
+  *	 <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *		 field <y> of register <x>.  This value is suitable for direct
+  *		 comparison with unshifted values appropriate for use in field <y>
+  *		 of register <x>.
+  */
+
+#ifndef HW_HOST1X_UCLASS_HOST1X_H_
+#define HW_HOST1X_UCLASS_HOST1X_H_
+
+static inline uint32_t host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+static inline uint32_t host1x_uclass_incr_syncpt_cond_f(uint32_t v)
+{
+	return (v & 0xff) << 8;
+}
+static inline uint32_t host1x_uclass_incr_syncpt_cond_op_done_v(void)
+{
+	return 1;
+}
+static inline uint32_t host1x_uclass_incr_syncpt_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 0;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 24;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_thresh_f(uint32_t v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x09;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_base_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 24;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_base_base_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 16;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_base_offset_f(uint32_t v)
+{
+	return (v & 0xffff) << 0;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_incr_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 24;
+}
+static inline uint32_t host1x_uclass_wait_syncpt_incr_r(void)
+{
+	return 0x0a;
+}
+static inline uint32_t host1x_uclass_load_syncpt_base_base_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 24;
+}
+static inline uint32_t host1x_uclass_load_syncpt_base_value_f(uint32_t v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline uint32_t host1x_uclass_incr_syncpt_base_base_indx_f(uint32_t v)
+{
+	return (v & 0xff) << 24;
+}
+static inline uint32_t host1x_uclass_incr_syncpt_base_offset_f(uint32_t v)
+{
+	return (v & 0xffffff) << 0;
+}
+static inline uint32_t host1x_uclass_delay_usec_r(void)
+{
+	return 0x10;
+}
+static inline uint32_t host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+static inline uint32_t host1x_uclass_indoff_indbe_f(uint32_t v)
+{
+	return (v & 0xf) << 28;
+}
+static inline uint32_t host1x_uclass_indoff_autoinc_f(uint32_t v)
+{
+	return (v & 0x1) << 27;
+}
+static inline uint32_t host1x_uclass_indoff_indmodid_f(uint32_t v)
+{
+	return (v & 0xff) << 18;
+}
+static inline uint32_t host1x_uclass_indoff_indroffset_f(uint32_t v)
+{
+	return (v & 0xffff) << 2;
+}
+static inline uint32_t host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#endif
diff --git a/src/gallium/drivers/grate/meson.build b/src/gallium/drivers/grate/meson.build
new file mode 100644
index 0000000..c05475a
--- /dev/null
+++ b/src/gallium/drivers/grate/meson.build
@@ -0,0 +1,68 @@
+# Copyright © 2017 Erik Faye-Lund
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+files_libgrate = files(
+  'class_ids.h',
+  'host1x01_hardware.h',
+  'hw_host1x01_uclass.h',
+  'grate_common.h',
+  'grate_compiler_fp.c',
+  'grate_compiler_vpe.c',
+  'grate_compiler.h',
+  'grate_context.c',
+  'grate_context.h',
+  'grate_draw.c',
+  'grate_draw.h',
+  'grate_fp_ir.c',
+  'grate_fp_ir.h',
+  'grate_fp_vliw.h',
+  'grate_program.c',
+  'grate_program.h',
+  'grate_resource.c',
+  'grate_resource.h',
+  'grate_screen.c',
+  'grate_screen.h',
+  'grate_state.c',
+  'grate_state.h',
+  'grate_stream.c',
+  'grate_stream.h',
+  'grate_surface.c',
+  'grate_surface.h',
+  'grate_vpe_ir.c',
+  'grate_vpe_ir.h'
+)
+
+libgrate = static_library(
+  'grate',
+  [files_libgrate],
+  include_directories : [
+    inc_src, inc_include, inc_gallium, inc_gallium_aux,
+    inc_gallium_drivers, inc_drm_uapi,
+  ],
+  c_args : [c_vis_args],
+  cpp_args : [cpp_vis_args],
+  dependencies : [dep_libdrm, dep_libdrm_tegra],
+  build_by_default : false,
+)
+
+driver_grate = declare_dependency(
+  compile_args : '-DGALLIUM_GRATE',
+  link_with : [libgrate, libtegrawinsys],
+)
diff --git a/src/gallium/drivers/grate/tgr_3d.xml.h b/src/gallium/drivers/grate/tgr_3d.xml.h
new file mode 100644
index 0000000..16192db
--- /dev/null
+++ b/src/gallium/drivers/grate/tgr_3d.xml.h
@@ -0,0 +1,451 @@
+#ifndef TGR_3D_XML
+#define TGR_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://github.com/envytools/envytools/
+git clone https://github.com/envytools/envytools.git
+
+The rules-ng-ng source files this header was generated from are:
+- /home/kusma/src/envytools/rnndb/tgr_3d.xml          (  14496 bytes, from 2017-09-06 20:15:12)
+- /home/kusma/src/envytools/rnndb/grate_copyright.xml (   1556 bytes, from 2017-09-06 18:11:45)
+
+Copyright (C) 2012-2017 by the following authors:
+- Erik Faye-Lund <kusmabite@gmail.com> (kusma)
+- Dmitry Osipenko <digetx@gmail.com> (digetx)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define TGR3D_INDEX_MODE_NONE					0x00000000
+#define TGR3D_INDEX_MODE_UINT8					0x00000001
+#define TGR3D_INDEX_MODE_UINT16					0x00000002
+#define TGR3D_PRIMITIVE_TYPE_POINTS				0x00000000
+#define TGR3D_PRIMITIVE_TYPE_LINES				0x00000001
+#define TGR3D_PRIMITIVE_TYPE_LINE_LOOP				0x00000002
+#define TGR3D_PRIMITIVE_TYPE_LINE_STRIP				0x00000003
+#define TGR3D_PRIMITIVE_TYPE_TRIANGLES				0x00000004
+#define TGR3D_PRIMITIVE_TYPE_TRIANGLE_STRIP			0x00000005
+#define TGR3D_PRIMITIVE_TYPE_TRIANGLE_FAN			0x00000006
+#define TGR3D_PROVOKING_VERTEX_FIRST				0x00000000
+#define TGR3D_PROVOKING_VERTEX_LAST				0x00000001
+#define TGR3D_CULL_FACE_NONE					0x00000000
+#define TGR3D_CULL_FACE_CCW					0x00000001
+#define TGR3D_CULL_FACE_CW					0x00000002
+#define TGR3D_CULL_FACE_BOTH					0x00000003
+#define TGR3D_ATTRIB_TYPE_UBYTE					0x00000000
+#define TGR3D_ATTRIB_TYPE_UBYTE_NORM				0x00000001
+#define TGR3D_ATTRIB_TYPE_SBYTE					0x00000002
+#define TGR3D_ATTRIB_TYPE_SBYTE_NORM				0x00000003
+#define TGR3D_ATTRIB_TYPE_USHORT				0x00000004
+#define TGR3D_ATTRIB_TYPE_USHORT_NORM				0x00000005
+#define TGR3D_ATTRIB_TYPE_SSHORT				0x00000006
+#define TGR3D_ATTRIB_TYPE_SSHORT_NORM				0x00000007
+#define TGR3D_ATTRIB_TYPE_UINT					0x00000008
+#define TGR3D_ATTRIB_TYPE_UINT_NORM				0x00000009
+#define TGR3D_ATTRIB_TYPE_SINT					0x0000000a
+#define TGR3D_ATTRIB_TYPE_SINT_NORM				0x0000000b
+#define TGR3D_ATTRIB_TYPE_FIXED16				0x0000000c
+#define TGR3D_ATTRIB_TYPE_FLOAT32				0x0000000d
+#define TGR3D_ATTRIB_TYPE_FLOAT16				0x0000000e
+#define TGR3D_PIXEL_FORMAT_A8					0x00000001
+#define TGR3D_PIXEL_FORMAT_L8					0x00000002
+#define TGR3D_PIXEL_FORMAT_S8					0x00000003
+#define TGR3D_PIXEL_FORMAT_LA88					0x00000004
+#define TGR3D_PIXEL_FORMAT_RGB565				0x00000006
+#define TGR3D_PIXEL_FORMAT_RGBA5551				0x00000009
+#define TGR3D_PIXEL_FORMAT_RGBA4444				0x0000000a
+#define TGR3D_PIXEL_FORMAT_D16_LINEAR				0x0000000b
+#define TGR3D_PIXEL_FORMAT_D16_NONLINEAR			0x0000000c
+#define TGR3D_PIXEL_FORMAT_RGBA8888				0x0000000d
+#define TGR3D_PIXEL_FORMAT_RGBA_FP32				0x00000012
+#define TGR3D_COMPARE_FUNC_NEVER				0x00000000
+#define TGR3D_COMPARE_FUNC_LESS					0x00000001
+#define TGR3D_COMPARE_FUNC_EQUAL				0x00000002
+#define TGR3D_COMPARE_FUNC_LEQUAL				0x00000003
+#define TGR3D_COMPARE_FUNC_GREATER				0x00000004
+#define TGR3D_COMPARE_FUNC_NOTEQUAL				0x00000005
+#define TGR3D_COMPARE_FUNC_GEQUAL				0x00000006
+#define TGR3D_COMPARE_FUNC_ALWAYS				0x00000007
+#define TGR3D_SYNCPT_CONDITIONAL_IMMEDIATE			0x00000000
+#define TGR3D_SYNCPT_CONDITIONAL_OP_DONE			0x00000001
+#define TGR3D_SYNCPT_CONDITIONAL_RD_DONE			0x00000002
+#define TGR3D_SYNCPT_CONDITIONAL_WR_SAFE			0x00000003
+#define TGR3D_STENCIL_OP_ZERO					0x00000000
+#define TGR3D_STENCIL_OP_KEEP					0x00000001
+#define TGR3D_STENCIL_OP_REPLACE				0x00000002
+#define TGR3D_STENCIL_OP_INCR					0x00000003
+#define TGR3D_STENCIL_OP_DECR					0x00000004
+#define TGR3D_STENCIL_OP_INVERT					0x00000005
+#define TGR3D_STENCIL_OP_INCR_WRAP				0x00000006
+#define TGR3D_STENCIL_OP_DECR_WRAP				0x00000007
+#define TGR3D_INCR_SYNCPT					0x00000000
+#define TGR3D_INCR_SYNCPT_SYNCPT_INDEX__MASK			0x000000ff
+#define TGR3D_INCR_SYNCPT_SYNCPT_INDEX__SHIFT			0
+#define TGR3D_INCR_SYNCPT_CONDITION__MASK			0x00000700
+#define TGR3D_INCR_SYNCPT_CONDITION__SHIFT			8
+
+#define TGR3D_WAIT_SYNCPT					0x00000008
+
+#define TGR3D_WAIT_SYNCPT_BASE					0x00000009
+#define TGR3D_WAIT_SYNCPT_BASE_OFFSET__MASK			0x0000ffff
+#define TGR3D_WAIT_SYNCPT_BASE_OFFSET__SHIFT			0
+#define TGR3D_WAIT_SYNCPT_BASE_BASE_IDX__MASK			0x00ff0000
+#define TGR3D_WAIT_SYNCPT_BASE_BASE_IDX__SHIFT			16
+#define TGR3D_WAIT_SYNCPT_BASE_IDX__MASK			0xff000000
+#define TGR3D_WAIT_SYNCPT_BASE_IDX__SHIFT			24
+
+#define TGR3D_LOAD_SYNCPT_BASE					0x0000000b
+
+#define TGR3D_INCR_SYNCPT_BASE					0x0000000c
+#define TGR3D_INCR_SYNCPT_BASE_OFFSET__MASK			0x00ffffff
+#define TGR3D_INCR_SYNCPT_BASE_OFFSET__SHIFT			0
+#define TGR3D_INCR_SYNCPT_BASE_BASE_IDX__MASK			0xff000000
+#define TGR3D_INCR_SYNCPT_BASE_BASE_IDX__SHIFT			24
+
+#define TGR3D_INDOFF2						0x0000002c
+
+#define TGR3D_INDOFF						0x0000002d
+
+#define TGR3D_ATTRIB_PTR(i0)				       (0x00000100 + 0x2*(i0))
+#define TGR3D_ATTRIB_PTR__ESIZE					0x00000002
+#define TGR3D_ATTRIB_PTR__LEN					0x00000010
+
+#define TGR3D_ATTRIB_MODE(i0)				       (0x00000101 + 0x2*(i0))
+#define TGR3D_ATTRIB_MODE__ESIZE				0x00000002
+#define TGR3D_ATTRIB_MODE__LEN					0x00000010
+#define TGR3D_ATTRIB_MODE_TYPE__MASK				0x0000000f
+#define TGR3D_ATTRIB_MODE_TYPE__SHIFT				0
+#define TGR3D_ATTRIB_MODE_SIZE__MASK				0x000000f0
+#define TGR3D_ATTRIB_MODE_SIZE__SHIFT				4
+#define TGR3D_ATTRIB_MODE_STRIDE__MASK				0xffffff00
+#define TGR3D_ATTRIB_MODE_STRIDE__SHIFT				8
+
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT				0x00000120
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT0			0x00000001
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT1			0x00000002
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT2			0x00000004
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT3			0x00000008
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT4			0x00000010
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT5			0x00000020
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT6			0x00000040
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT7			0x00000080
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT8			0x00000100
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT9			0x00000200
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT10			0x00000400
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT11			0x00000800
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT12			0x00001000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT13			0x00002000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT14			0x00004000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_OUT15			0x00008000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN0			0x00010000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN1			0x00020000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN2			0x00040000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN3			0x00080000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN4			0x00100000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN5			0x00200000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN6			0x00400000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN7			0x00800000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN8			0x01000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN9			0x02000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN10			0x04000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN11			0x08000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN12			0x04000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN13			0x20000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN14			0x40000000
+#define TGR3D_VP_ATTRIB_IN_OUT_SELECT_IN15			0x80000000
+
+#define TGR3D_INDEX_PTR						0x00000121
+
+#define TGR3D_DRAW_PARAMS					0x00000122
+#define TGR3D_DRAW_PARAMS_INDEX_MODE__MASK			0x30000000
+#define TGR3D_DRAW_PARAMS_INDEX_MODE__SHIFT			28
+#define TGR3D_DRAW_PARAMS_PROVOKING_VERTEX__MASK		0x08000000
+#define TGR3D_DRAW_PARAMS_PROVOKING_VERTEX__SHIFT		27
+#define TGR3D_DRAW_PARAMS_PRIMITIVE_TYPE__MASK			0x07000000
+#define TGR3D_DRAW_PARAMS_PRIMITIVE_TYPE__SHIFT			24
+#define TGR3D_DRAW_PARAMS_FIRST__MASK				0x00ffffff
+#define TGR3D_DRAW_PARAMS_FIRST__SHIFT				0
+
+#define TGR3D_DRAW_PRIMITIVES					0x00000123
+#define TGR3D_DRAW_PRIMITIVES_INDEX_COUNT__MASK			0xfff00000
+#define TGR3D_DRAW_PRIMITIVES_INDEX_COUNT__SHIFT		20
+#define TGR3D_DRAW_PRIMITIVES_OFFSET__MASK			0x000fffff
+#define TGR3D_DRAW_PRIMITIVES_OFFSET__SHIFT			0
+
+#define TGR3D_VP_UPLOAD_INST_ID					0x00000205
+
+#define TGR3D_VP_UPLOAD_INST					0x00000206
+
+#define TGR3D_VP_UPLOAD_CONST_ID				0x00000207
+
+#define TGR3D_VP_UPLOAD_CONST					0x00000208
+
+#define TGR3D_LINKER_INSTRUCTION(i0)			       (0x00000300 + 0x2*(i0))
+#define TGR3D_LINKER_INSTRUCTION__ESIZE				0x00000002
+#define TGR3D_LINKER_INSTRUCTION__LEN				0x00000020
+
+#define TGR3D_CULL_FACE_LINKER_SETUP				0x00000343
+#define TGR3D_CULL_FACE_LINKER_SETUP_LINKER_INST_COUNT__MASK	0x000003e0
+#define TGR3D_CULL_FACE_LINKER_SETUP_LINKER_INST_COUNT__SHIFT	5
+#define TGR3D_CULL_FACE_LINKER_SETUP_FRONT_CW			0x00008000
+#define TGR3D_CULL_FACE_LINKER_SETUP_CULL_FACE__MASK		0x00030000
+#define TGR3D_CULL_FACE_LINKER_SETUP_CULL_FACE__SHIFT		16
+#define TGR3D_CULL_FACE_LINKER_SETUP_UNK_18_31__MASK		0xfffc0000
+#define TGR3D_CULL_FACE_LINKER_SETUP_UNK_18_31__SHIFT		18
+
+#define TGR3D_POLYGON_OFFSET_UNITS				0x00000344
+
+#define TGR3D_POLYFON_OFFSET_FACTOR				0x00000345
+
+#define TGR3D_POINT_PARAMS					0x00000346
+
+#define TGR3D_POINT_SIZE					0x00000347
+
+#define TGR3D_POINT_COORD_RANGE_MAX_S				0x00000348
+
+#define TGR3D_POINT_COORD_RANGE_MAX_T				0x00000349
+
+#define TGR3D_POINT_COORD_RANGE_MIN_S				0x0000034a
+
+#define TGR3D_POINT_COORD_RANGE_MIN_T				0x0000034b
+
+#define TGR3D_LINE_PARAMS					0x0000034c
+
+#define TGR3D_HALF_LINE_WIDTH					0x0000034d
+
+#define TGR3D_SCISSOR_HORIZ					0x00000350
+#define TGR3D_SCISSOR_HORIZ_MIN__MASK				0xffff0000
+#define TGR3D_SCISSOR_HORIZ_MIN__SHIFT				16
+#define TGR3D_SCISSOR_HORIZ_MAX__MASK				0x0000ffff
+#define TGR3D_SCISSOR_HORIZ_MAX__SHIFT				0
+
+#define TGR3D_SCISSOR_VERT					0x00000351
+#define TGR3D_SCISSOR_VERT_MIN__MASK				0xffff0000
+#define TGR3D_SCISSOR_VERT_MIN__SHIFT				16
+#define TGR3D_SCISSOR_VERT_MAX__MASK				0x0000ffff
+#define TGR3D_SCISSOR_VERT_MAX__SHIFT				0
+
+#define TGR3D_VIEWPORT_X_BIAS					0x00000352
+
+#define TGR3D_VIEWPORT_Y_BIAS					0x00000353
+
+#define TGR3D_VIEWPORT_Z_BIAS					0x00000354
+
+#define TGR3D_VIEWPORT_X_SCALE					0x00000355
+
+#define TGR3D_VIEWPORT_Y_SCALE					0x00000356
+
+#define TGR3D_VIEWPORT_Z_SCALE					0x00000357
+
+#define TGR3D_GUARDBAND_WIDTH					0x00000358
+
+#define TGR3D_GUARDBAND_HEIGHT					0x00000359
+
+#define TGR3D_GUARDBAND_DEPTH					0x0000035a
+
+#define TGR3D_STENCIL_FRONT1					0x00000400
+#define TGR3D_STENCIL_FRONT1_MASK__MASK				0x000000ff
+#define TGR3D_STENCIL_FRONT1_MASK__SHIFT			0
+#define TGR3D_STENCIL_FRONT1_FUNC__MASK				0x00000700
+#define TGR3D_STENCIL_FRONT1_FUNC__SHIFT			8
+
+#define TGR3D_STENCIL_BACK1					0x00000401
+#define TGR3D_STENCIL_BACK1_MASK__MASK				0x000000ff
+#define TGR3D_STENCIL_BACK1_MASK__SHIFT				0
+#define TGR3D_STENCIL_BACK1_FUNC__MASK				0x00000700
+#define TGR3D_STENCIL_BACK1_FUNC__SHIFT				8
+
+#define TGR3D_STENCIL_PARAMS					0x00000402
+#define TGR3D_STENCIL_PARAMS_STENCIL_WRITE_EARLY		0x00000040
+#define TGR3D_STENCIL_PARAMS_STENCIL_TEST			0x00000020
+
+#define TGR3D_DEPTH_TEST_PARAMS					0x00000403
+#define TGR3D_DEPTH_TEST_PARAMS_DEPTH_WRITE			0x00000100
+#define TGR3D_DEPTH_TEST_PARAMS_FUNC__MASK			0x000000f0
+#define TGR3D_DEPTH_TEST_PARAMS_FUNC__SHIFT			4
+#define TGR3D_DEPTH_TEST_PARAMS_DEPTH_TEST			0x00000008
+
+#define TGR3D_DEPTH_RANGE_NEAR					0x00000404
+
+#define TGR3D_DEPTH_RANGE_FAR					0x00000405
+
+#define TGR3D_FP_PSEQ_UPLOAD_INST_BUFFER_FLUSH			0x00000500
+
+#define TGR3D_FP_PSEQ_ENGINE_INST				0x00000520
+
+#define TGR3D_FP_PSEQ_UPLOAD_INST_ID				0x00000540
+
+#define TGR3D_FP_PSEQ_UPLOAD_INST				0x00000541
+
+#define TGR3D_FP_PSEQ_QUAD_ID					0x00000545
+
+#define TGR3D_FP_PSEQ_DW_CFG					0x00000546
+#define TGR3D_FP_PSEQ_DW_CFG_PSEQ_TO_DW_EXEC_NB__MASK		0xffffffc0
+#define TGR3D_FP_PSEQ_DW_CFG_PSEQ_TO_DW_EXEC_NB__SHIFT		6
+
+#define TGR3D_FP_UPLOAD_MFU_SCHED_ID				0x00000600
+
+#define TGR3D_FP_UPLOAD_MFU_SCHED				0x00000601
+#define TGR3D_FP_UPLOAD_MFU_SCHED_OFFSET__MASK			0x000000fc
+#define TGR3D_FP_UPLOAD_MFU_SCHED_OFFSET__SHIFT			2
+#define TGR3D_FP_UPLOAD_MFU_SCHED_COUNT__MASK			0x00000003
+#define TGR3D_FP_UPLOAD_MFU_SCHED_COUNT__SHIFT			0
+
+#define TGR3D_FP_UPLOAD_MFU_INST_ID				0x00000603
+
+#define TGR3D_FP_UPLOAD_MFU_INST				0x00000604
+
+#define TGR3D_FP_UPLOAD_TEX_INST_ID				0x00000700
+
+#define TGR3D_FP_UPLOAD_TEX_INST				0x00000701
+
+#define TGR3D_TEXTURE_POINTER(i0)			       (0x00000710 + 0x1*(i0))
+#define TGR3D_TEXTURE_POINTER__ESIZE				0x00000001
+#define TGR3D_TEXTURE_POINTER__LEN				0x00000010
+
+#define TGR3D_TEXTURE_DESC1(i0)				       (0x00000720 + 0x2*(i0))
+#define TGR3D_TEXTURE_DESC1__ESIZE				0x00000002
+#define TGR3D_TEXTURE_DESC1__LEN				0x00000010
+#define TGR3D_TEXTURE_DESC1_MAGFILTER_LINEAR			0x20000000
+#define TGR3D_TEXTURE_DESC1_MINFILTER_LINEAR_WITHIN		0x10000000
+#define TGR3D_TEXTURE_DESC1_MINFILTER_LINEAR_BETWEEN		0x08000000
+#define TGR3D_TEXTURE_DESC1_FORMAT__MASK			0x00001f00
+#define TGR3D_TEXTURE_DESC1_FORMAT__SHIFT			8
+#define TGR3D_TEXTURE_DESC1_WRAP_S_MIRRORED_REPEAT		0x00000008
+#define TGR3D_TEXTURE_DESC1_WRAP_T_MIRRORED_REPEAT		0x00000004
+#define TGR3D_TEXTURE_DESC1_WRAP_S_CLAMP_TO_EDGE		0x00000002
+#define TGR3D_TEXTURE_DESC1_WRAP_T_CLAMP_TO_EDGE		0x00000001
+
+#define TGR3D_TEXTURE_DESC2(i0)				       (0x00000721 + 0x2*(i0))
+#define TGR3D_TEXTURE_DESC2__ESIZE				0x00000002
+#define TGR3D_TEXTURE_DESC2__LEN				0x00000010
+#define TGR3D_TEXTURE_DESC2_WIDTH__MASK				0xfff00000
+#define TGR3D_TEXTURE_DESC2_WIDTH__SHIFT			20
+#define TGR3D_TEXTURE_DESC2_HEIGHT__MASK			0x000fff00
+#define TGR3D_TEXTURE_DESC2_HEIGHT__SHIFT			8
+#define TGR3D_TEXTURE_DESC2_WIDTH_LOG2__MASK			0xf0000000
+#define TGR3D_TEXTURE_DESC2_WIDTH_LOG2__SHIFT			28
+#define TGR3D_TEXTURE_DESC2_HEIGHT_LOG2__MASK			0x0f000000
+#define TGR3D_TEXTURE_DESC2_HEIGHT_LOG2__SHIFT			24
+#define TGR3D_TEXTURE_DESC2_MAX_LOD__MASK			0x0000f000
+#define TGR3D_TEXTURE_DESC2_MAX_LOD__SHIFT			12
+#define TGR3D_TEXTURE_DESC2_MIPMAP_DISABLE			0x00000080
+#define TGR3D_TEXTURE_DESC2_NOT_POW2_DIMENSIONS			0x00000040
+
+#define TGR3D_FP_UPLOAD_ALU_SCHED_ID				0x00000800
+
+#define TGR3D_FP_UPLOAD_ALU_SCHED				0x00000801
+#define TGR3D_FP_UPLOAD_ALU_SCHED_OFFSET__MASK			0x000000fc
+#define TGR3D_FP_UPLOAD_ALU_SCHED_OFFSET__SHIFT			2
+#define TGR3D_FP_UPLOAD_ALU_SCHED_COUNT__MASK			0x00000003
+#define TGR3D_FP_UPLOAD_ALU_SCHED_COUNT__SHIFT			0
+
+#define TGR3D_FP_UPLOAD_ALU_INST_ID				0x00000803
+
+#define TGR3D_FP_UPLOAD_ALU_INST				0x00000804
+
+#define TGR3D_FP_UPLOAD_ALU_INST_COMPLEMENT			0x00000806
+
+#define TGR3D_FP_CONST(i0)				       (0x00000820 + 0x1*(i0))
+#define TGR3D_FP_CONST__ESIZE					0x00000001
+#define TGR3D_FP_CONST__LEN					0x00000020
+
+#define TGR3D_FP_UPLOAD_DW_INST_ID				0x00000900
+
+#define TGR3D_FP_UPLOAD_DW_INST					0x00000901
+
+#define TGR3D_RT_ENABLE						0x00000903
+#define TGR3D_RT_ENABLE_0					0x00000001
+#define TGR3D_RT_ENABLE_1					0x00000002
+#define TGR3D_RT_ENABLE_2					0x00000004
+#define TGR3D_RT_ENABLE_3					0x00000008
+#define TGR3D_RT_ENABLE_4					0x00000010
+#define TGR3D_RT_ENABLE_5					0x00000020
+#define TGR3D_RT_ENABLE_6					0x00000040
+#define TGR3D_RT_ENABLE_7					0x00000080
+#define TGR3D_RT_ENABLE_8					0x00000100
+#define TGR3D_RT_ENABLE_9					0x00000200
+#define TGR3D_RT_ENABLE_10					0x00000400
+#define TGR3D_RT_ENABLE_11					0x00000800
+#define TGR3D_RT_ENABLE_12					0x00001000
+#define TGR3D_RT_ENABLE_13					0x00002000
+#define TGR3D_RT_ENABLE_14					0x00004000
+#define TGR3D_RT_ENABLE_15					0x00008000
+#define TGR3D_RT_ENABLE_DEPTH_BUFFER				0x00010000
+
+#define TGR3D_FDC_CONTROL					0x00000a00
+#define TGR3D_FDC_CONTROL_INVALIDATE				0x00000001
+
+#define TGR3D_RT_PTR(i0)				       (0x00000e00 + 0x1*(i0))
+#define TGR3D_RT_PTR__ESIZE					0x00000001
+#define TGR3D_RT_PTR__LEN					0x00000010
+
+#define TGR3D_RT_PARAMS(i0)				       (0x00000e10 + 0x1*(i0))
+#define TGR3D_RT_PARAMS__ESIZE					0x00000001
+#define TGR3D_RT_PARAMS__LEN					0x00000010
+#define TGR3D_RT_PARAMS_DITHER_ENABLE				0x00000001
+#define TGR3D_RT_PARAMS_FORMAT__MASK				0x0000007c
+#define TGR3D_RT_PARAMS_FORMAT__SHIFT				2
+#define TGR3D_RT_PARAMS_PITCH__MASK				0x01ffff00
+#define TGR3D_RT_PARAMS_PITCH__SHIFT				8
+#define TGR3D_RT_PARAMS_TILED					0x04000000
+
+#define TGR3D_ALU_BUFFER_SIZE					0x00000e20
+#define TGR3D_ALU_BUFFER_SIZE_SIZE__MASK			0x00000003
+#define TGR3D_ALU_BUFFER_SIZE_SIZE__SHIFT			0
+#define TGR3D_ALU_BUFFER_SIZE_SIZEx4__MASK			0xff000000
+#define TGR3D_ALU_BUFFER_SIZE_SIZEx4__SHIFT			24
+
+#define TGR3D_TRAM_SETUP					0x00000e21
+#define TGR3D_TRAM_SETUP_DIV64__MASK				0x0000007f
+#define TGR3D_TRAM_SETUP_DIV64__SHIFT				0
+#define TGR3D_TRAM_SETUP_USED_TRAM_ROWS_NB__MASK		0x00007f00
+#define TGR3D_TRAM_SETUP_USED_TRAM_ROWS_NB__SHIFT		8
+
+#define TGR3D_FP_UPLOAD_INST_ID_COMMON				0x00000e22
+
+#define TGR3D_DITHER						0x00000e26
+
+#define TGR3D_STENCIL_FRONT2					0x00000e28
+#define TGR3D_STENCIL_FRONT2_REF__MASK				0x03fe0000
+#define TGR3D_STENCIL_FRONT2_REF__SHIFT				17
+#define TGR3D_STENCIL_FRONT2_OP_FAIL__MASK			0x00000007
+#define TGR3D_STENCIL_FRONT2_OP_FAIL__SHIFT			0
+#define TGR3D_STENCIL_FRONT2_OP_ZFAIL__MASK			0x00000038
+#define TGR3D_STENCIL_FRONT2_OP_ZFAIL__SHIFT			3
+#define TGR3D_STENCIL_FRONT2_OP_ZPASS__MASK			0x000001c0
+#define TGR3D_STENCIL_FRONT2_OP_ZPASS__SHIFT			6
+
+#define TGR3D_STENCIL_BACK2					0x00000e29
+#define TGR3D_STENCIL_BACK2_REF__MASK				0x03fe0000
+#define TGR3D_STENCIL_BACK2_REF__SHIFT				17
+#define TGR3D_STENCIL_BACK2_OP_FAIL__MASK			0x00000007
+#define TGR3D_STENCIL_BACK2_OP_FAIL__SHIFT			0
+#define TGR3D_STENCIL_BACK2_OP_ZFAIL__MASK			0x00000038
+#define TGR3D_STENCIL_BACK2_OP_ZFAIL__SHIFT			3
+#define TGR3D_STENCIL_BACK2_OP_ZPASS__MASK			0x000001c0
+#define TGR3D_STENCIL_BACK2_OP_ZPASS__SHIFT			6
+
+
+#endif /* TGR_3D_XML */
diff --git a/src/gallium/meson.build b/src/gallium/meson.build
index 88e664e..93e1c5e 100644
--- a/src/gallium/meson.build
+++ b/src/gallium/meson.build
@@ -86,6 +86,12 @@ if with_gallium_freedreno
 else
   driver_freedreno = declare_dependency()
 endif
+if with_gallium_grate
+  subdir('winsys/tegra/drm')
+  subdir('drivers/grate')
+else
+  driver_grate = declare_dependency()
+endif
 if with_gallium_vc4
   subdir('winsys/vc4/drm')
   subdir('drivers/vc4')
diff --git a/src/gallium/targets/dri/Makefile.am b/src/gallium/targets/dri/Makefile.am
index 9597235..aa07c419 100644
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -74,6 +74,8 @@ include $(top_srcdir)/src/gallium/drivers/svga/Automake.inc
 
 include $(top_srcdir)/src/gallium/drivers/freedreno/Automake.inc
 
+include $(top_srcdir)/src/gallium/drivers/grate/Automake.inc
+
 include $(top_srcdir)/src/gallium/drivers/vc4/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/vc5/Automake.inc
 include $(top_srcdir)/src/gallium/drivers/pl111/Automake.inc
diff --git a/src/gallium/targets/dri/dri.sym b/src/gallium/targets/dri/dri.sym
index 1fdf18b..3e0abf9 100644
--- a/src/gallium/targets/dri/dri.sym
+++ b/src/gallium/targets/dri/dri.sym
@@ -6,6 +6,7 @@
 		radeon_drm_winsys_create;
 		amdgpu_winsys_create;
 		fd_drm_screen_create;
+		grate_drm_screen_create;
 	local:
 		*;
 };
diff --git a/src/gallium/targets/dri/meson.build b/src/gallium/targets/dri/meson.build
index 0081bb6..6186a47 100644
--- a/src/gallium/targets/dri/meson.build
+++ b/src/gallium/targets/dri/meson.build
@@ -56,8 +56,9 @@ libgallium_dri = shared_library(
   dependencies : [
     dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread,
     driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
-    driver_pl111, driver_vc4, driver_vc5, driver_freedreno, driver_etnaviv,
-    driver_imx, driver_i915, driver_svga, driver_virgl, driver_swr,
+    driver_pl111, driver_vc4, driver_vc5, driver_grate, driver_freedreno,
+    driver_etnaviv, driver_imx, driver_i915, driver_svga, driver_virgl,
+    driver_swr,
   ],
 )
 
@@ -67,6 +68,7 @@ foreach d : [[with_gallium_pl111, 'pl111_dri.so'],
              [with_gallium_freedreno, ['msm_dri.so', 'kgsl_dri.so']],
              [with_gallium_softpipe or with_gallium_swr, 'swrast_dri.so'],
              [with_gallium_softpipe and with_gallium_drisw_kms, 'kms_swrast_dri.so'],
+             [with_gallium_grate, 'tegra_dri.so'],
              [with_gallium_vc4, 'vc4_dri.so'],
              [with_gallium_vc5, 'vc5_dri.so'],
              [with_gallium_etnaviv, 'etnaviv_dri.so'],
diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
index 5ee1761..4982431 100644
--- a/src/gallium/targets/dri/target.c
+++ b/src/gallium/targets/dri/target.c
@@ -86,3 +86,7 @@ DEFINE_LOADER_DRM_ENTRYPOINT(vc5)
 DEFINE_LOADER_DRM_ENTRYPOINT(imx_drm)
 DEFINE_LOADER_DRM_ENTRYPOINT(etnaviv)
 #endif
+
+#if defined(GALLIUM_GRATE)
+DEFINE_LOADER_DRM_ENTRYPOINT(tegra)
+#endif
diff --git a/src/gallium/winsys/tegra/drm/Makefile.am b/src/gallium/winsys/tegra/drm/Makefile.am
new file mode 100644
index 0000000..ff34092
--- /dev/null
+++ b/src/gallium/winsys/tegra/drm/Makefile.am
@@ -0,0 +1,33 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	-I$(top_srcdir)/src/gallium/drivers \
+	$(GALLIUM_CFLAGS) \
+	$(GRATE_CFLAGS)
+
+noinst_LTLIBRARIES = libtegradrm.la
+
+libtegradrm_la_SOURCES = \
+	tegra_drm_winsys.c
diff --git a/src/gallium/winsys/tegra/drm/meson.build b/src/gallium/winsys/tegra/drm/meson.build
new file mode 100644
index 0000000..8f9f7d3
--- /dev/null
+++ b/src/gallium/winsys/tegra/drm/meson.build
@@ -0,0 +1,30 @@
+# Copyright © 2017 Erik Faye-Lund
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libtegrawinsys = static_library(
+  'tegrawinsys',
+  files('tegra_drm_winsys.c'),
+  include_directories : [
+    inc_src, inc_include,
+    inc_gallium, inc_gallium_aux, inc_gallium_drivers,
+  ],
+  dependencies : [dep_libdrm, dep_libdrm_tegra],
+  c_args : [c_vis_args],
+)
diff --git a/src/gallium/winsys/tegra/drm/tegra_drm_public.h b/src/gallium/winsys/tegra/drm/tegra_drm_public.h
new file mode 100644
index 0000000..eceb7c8
--- /dev/null
+++ b/src/gallium/winsys/tegra/drm/tegra_drm_public.h
@@ -0,0 +1,8 @@
+#ifndef TEGRA_DRM_PUBLIC_H
+#define TEGRA_DRM_PUBLIC_H
+
+struct pipe_screen;
+
+struct pipe_screen *tegra_drm_screen_create(int fd);
+
+#endif
diff --git a/src/gallium/winsys/tegra/drm/tegra_drm_winsys.c b/src/gallium/winsys/tegra/drm/tegra_drm_winsys.c
new file mode 100644
index 0000000..0f41960
--- /dev/null
+++ b/src/gallium/winsys/tegra/drm/tegra_drm_winsys.c
@@ -0,0 +1,17 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libdrm/tegra.h>
+
+#include "grate/grate_screen.h"
+#include "tegra_drm_public.h"
+
+struct pipe_screen *tegra_drm_screen_create(int fd)
+{
+	struct drm_tegra *drm;
+	int err = drm_tegra_new(&drm, fd);
+	if (err < 0)
+		return NULL;
+
+	return grate_screen_create(drm);
+}
-- 
2.7.4