diff --git a/include/c3d/maths.h b/include/c3d/maths.h
index 9d3533a..e1f242d 100644
--- a/include/c3d/maths.h
+++ b/include/c3d/maths.h
@@ -1,77 +1,635 @@
 #pragma once
 #include "types.h"
-#include <string.h>
 #include <math.h>
-#include <float.h>
+#include <string.h>
 
 // See http://tauday.com/tau-manifesto
 //#define M_TAU 6.28318530717958647693
+/// The one true circumference-to-radius ratio
 #define M_TAU (2*M_PI)
 
+/**
+ * @brief Convert an angle from degrees to radians
+ * @param[in] _angle Angle in degrees
+ * @return Angle in radians
+ */
 #define C3D_Angle(_angle) ((_angle)*M_TAU)
-#define C3D_AspectRatioTop (400.0f / 240.0f)
-#define C3D_AspectRatioBot (320.0f / 240.0f)
 
-static inline float FVec_DP4(const C3D_FVec* a, const C3D_FVec* b)
+#define C3D_AspectRatioTop (400.0f / 240.0f) ///< Aspect ratio for 3DS top screen
+#define C3D_AspectRatioBot (320.0f / 240.0f) ///< Aspect ratio for 3DS bottom screen
+
+///@name Vector Math
+///@{
+/**
+ * @brief Create a new FVec4
+ * @param[in] x X-component
+ * @param[in] y Y-component
+ * @param[in] z Z-component
+ * @param[in] w W-component
+ * @return New FVec4
+ */
+static inline C3D_FVec FVec4_New(float x, float y, float z, float w)
 {
-	return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
+	return (C3D_FVec){{ w, z, y, x }};
 }
 
-static inline float FVec_Mod4(const C3D_FVec* a)
+/**
+ * @brief Add two FVec4s
+ * @param[in] lhs Augend
+ * @param[in] rhs Addend
+ * @return lhs+rhs (sum)
+ */
+static inline C3D_FVec FVec4_Add(C3D_FVec lhs, C3D_FVec rhs)
 {
-	return sqrtf(FVec_DP4(a,a));
+	// component-wise addition
+	return FVec4_New(lhs.x+rhs.x, lhs.y+rhs.y, lhs.z+rhs.z, lhs.w+rhs.w);
 }
 
-static inline void FVec_Norm4(C3D_FVec* vec)
+/**
+ * @brief Subtract two FVec4s
+ * @param[in] lhs Minuend
+ * @param[in] rhs Subtrahend
+ * @return lhs-rhs (difference)
+ */
+static inline C3D_FVec FVec4_Subtract(C3D_FVec lhs, C3D_FVec rhs)
 {
-	float m = FVec_Mod4(vec);
-	vec->x /= m;
-	vec->y /= m;
-	vec->z /= m;
-	vec->w /= m;
+	// component-wise subtraction
+	return FVec4_New(lhs.x-rhs.x, lhs.y-rhs.y, lhs.z-rhs.z, lhs.w-rhs.w);
 }
 
-static inline float FVec_DP3(const C3D_FVec* a, const C3D_FVec* b)
+/**
+ * @brief Negate a FVec4
+ * @note This is the same as scaling by -1
+ * @param[in] v Vector to negate
+ * @return -v
+ */
+static inline C3D_FVec FVec4_Negate(C3D_FVec v)
 {
-	return a->x*b->x + a->y*b->y + a->z*b->z;
+	// component-wise negation
+	return FVec4_New(-v.x, -v.y, -v.z, -v.w);
 }
 
-static inline float FVec_Mod3(const C3D_FVec* a)
+/**
+ * @brief Scale a FVec4
+ * @param[in] v Vector to scale
+ * @param[in] s Scale factor
+ * @return v*s
+ */
+static inline C3D_FVec FVec4_Scale(C3D_FVec v, float s)
 {
-	return sqrtf(FVec_DP3(a,a));
+	// component-wise scaling
+	return FVec4_New(v.x*s, v.y*s, v.z*s, v.w*s);
 }
 
-static inline void FVec_Norm3(C3D_FVec* vec)
+/**
+ * @brief Dot product of two FVec4s
+ * @param[in] lhs Left-side FVec4
+ * @param[in] rhs Right-side FVec4
+ * @return lhs∙rhs
+ */
+static inline float FVec4_Dot(C3D_FVec lhs, C3D_FVec rhs)
 {
-	float m = FVec_Mod3(vec);
-	vec->x /= m;
-	vec->y /= m;
-	vec->z /= m;
-	vec->w  = 0.0f;
+	// A∙B = sum of component-wise products
+	return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z + lhs.w*rhs.w;
 }
 
+/**
+ * @brief Magnitude of a FVec4
+ * @param[in] v Vector
+ * @return ‖v‖
+ */
+static inline float FVec4_Magnitude(C3D_FVec v)
+{
+	// ‖v‖ = √(v∙v)
+	return sqrtf(FVec4_Dot(v,v));
+}
+
+/**
+ * @brief Normalize a FVec4
+ * @param[in] v FVec4 to normalize
+ * @return v/‖v‖
+ */
+static inline C3D_FVec FVec4_Normalize(C3D_FVec v)
+{
+	// get vector magnitude
+	float m = FVec4_Magnitude(v);
+
+	// scale by inverse magnitude to get a unit vector
+	return FVec4_New(v.x/m, v.y/m, v.z/m, v.w/m);
+}
+
+/**
+ * @brief Create a new FVec3
+ * @param[in] x X-component
+ * @param[in] y Y-component
+ * @param[in] z Z-component
+ * @return New FVec3
+ */
+static inline C3D_FVec FVec3_New(float x, float y, float z)
+{
+	return FVec4_New(x, y, z, 0.0f);
+}
+
+/**
+ * @brief Dot product of two FVec3s
+ * @param[in] lhs Left-side FVec3
+ * @param[in] rhs Right-side FVec3
+ * @return lhs∙rhs
+ */
+static inline float FVec3_Dot(C3D_FVec lhs, C3D_FVec rhs)
+{
+	// A∙B = sum of component-wise products
+	return lhs.x*rhs.x + lhs.y*rhs.y + lhs.z*rhs.z;
+}
+
+/**
+ * @brief Magnitude of a FVec3
+ * @param[in] v Vector
+ * @return ‖v‖
+ */
+static inline float FVec3_Magnitude(C3D_FVec v)
+{
+	// ‖v‖ = √(v∙v)
+	return sqrtf(FVec3_Dot(v,v));
+}
+
+/**
+ * @brief Normalize a FVec3
+ * @param[in] v FVec3 to normalize
+ * @return v/‖v‖
+ */
+static inline C3D_FVec FVec3_Normalize(C3D_FVec v)
+{
+	// get vector magnitude
+	float m = FVec3_Magnitude(v);
+
+	// scale by inverse magnitude to get a unit vector
+	return FVec3_New(v.x/m, v.y/m, v.z/m);
+}
+
+/**
+ * @brief Add two FVec3s
+ * @param[in] lhs Augend
+ * @param[in] rhs Addend
+ * @return lhs+rhs (sum)
+ */
+static inline C3D_FVec FVec3_Add(C3D_FVec lhs, C3D_FVec rhs)
+{
+	// component-wise addition
+	return FVec3_New(lhs.x+rhs.x, lhs.y+rhs.y, lhs.z+rhs.z);
+}
+
+/**
+ * @brief Subtract two FVec3s
+ * @param[in] lhs Minuend
+ * @param[in] rhs Subtrahend
+ * @return lhs-rhs (difference)
+ */
+static inline C3D_FVec FVec3_Subtract(C3D_FVec lhs, C3D_FVec rhs)
+{
+	// component-wise subtraction
+	return FVec3_New(lhs.x-rhs.x, lhs.y-rhs.y, lhs.z-rhs.z);
+}
+
+/**
+ * @brief Distance between two 3D points
+ * @param[in] lhs Relative origin
+ * @param[in] rhs Relative point of interest
+ * @return ‖lhs-rhs‖
+ */
+static inline float FVec3_Distance(C3D_FVec lhs, C3D_FVec rhs)
+{
+        // distance = ‖lhs-rhs‖
+	return FVec3_Magnitude(FVec3_Subtract(lhs, rhs));
+}
+
+/**
+ * @brief Scale a FVec4
+ * @param[in] v Vector to scale
+ * @param[in] s Scale factor
+ * @return v*s
+ */
+static inline C3D_FVec FVec3_Scale(C3D_FVec v, float s)
+{
+	// component-wise scaling
+	return FVec3_New(v.x*s, v.y*s, v.z*s);
+}
+
+/**
+ * @brief Negate a FVec4
+ * @note This is the same as scaling by -1
+ * @param[in] v Vector to negate
+ * @return -v
+ */
+static inline C3D_FVec FVec3_Negate(C3D_FVec v)
+{
+	// component-wise negation
+	return FVec3_New(-v.x, -v.y, -v.z);
+}
+
+/**
+ * @brief Cross product of two FVec3s
+ * @note This returns a pseudo-vector which is perpendicular to the plane
+ *       spanned by the two input vectors.
+ * @param[in] lhs Left-side FVec3
+ * @param[in] rhs Right-side FVec3
+ * @return lhs×rhs
+ */
+static inline C3D_FVec FVec3_Cross(C3D_FVec lhs, C3D_FVec rhs)
+{
+	// A×B = (AyBz - AzBy, AzBx - AxBz, AxBy - AyBx)
+	return FVec3_New(lhs.y*rhs.z - lhs.z*rhs.y, lhs.z*rhs.x - lhs.x*rhs.z, lhs.x*rhs.y - lhs.y*rhs.x);
+}
+///@}
+
+///@name Matrix Math
+///@note All matrices are 4x4 unless otherwise noted
+///@{
+
+/**
+ * @brief Zero matrix
+ * @param[out] out Matrix to zero
+ */
 static inline void Mtx_Zeros(C3D_Mtx* out)
 {
 	memset(out, 0, sizeof(*out));
 }
 
+/**
+ * @brief Copy a matrix
+ * @param[out] out Output matrix
+ * @param[in]  in  Input matrix
+ */
 static inline void Mtx_Copy(C3D_Mtx* out, const C3D_Mtx* in)
 {
-	memcpy(out, in, sizeof(*out));
+	*out = *in;
 }
 
+/**
+ * @brief Identity matrix
+ * @param[out] out Matrix to fill
+ */
 void Mtx_Identity(C3D_Mtx* out);
+
+/**
+ * @brief Multiply two matrices
+ * @param[out] out Output matrix
+ * @param[in]  a   Multiplicand
+ * @param[in]  b   Multiplier
+ */
 void Mtx_Multiply(C3D_Mtx* out, const C3D_Mtx* a, const C3D_Mtx* b);
 
+/**
+ * @brief Inverse a matrix
+ * @param[in,out] out Matrix to inverse
+ * @retval true Matrix was inverted
+ * @retval false Matrix is degenerate
+ */
 bool Mtx_Inverse(C3D_Mtx* out);
 
-void Mtx_Translate(C3D_Mtx* mtx, float x, float y, float z);
-void Mtx_Scale(C3D_Mtx* mtx, float x, float y, float z);
-void Mtx_RotateX(C3D_Mtx* mtx, float angle, bool bRightSide);
-void Mtx_RotateY(C3D_Mtx* mtx, float angle, bool bRightSide);
-void Mtx_RotateZ(C3D_Mtx* mtx, float angle, bool bRightSide);
+/**
+ * @brief Multiply 3x3 matrix by a FVec3
+ * @param[in] mtx Matrix
+ * @param[in] v   Vector
+ * @return Product of mtx and v
+ */
+C3D_FVec Mtx_MultiplyFVec3(const C3D_Mtx* mtx, C3D_FVec v);
 
-// Special versions of the projection matrices that take the 3DS' screen orientation into account
+/**
+ * @brief Multiply 4x4 matrix by a FVec4
+ * @param[in] mtx Matrix
+ * @param[in] v   Vector
+ * @return Product of mtx and v
+ */
+C3D_FVec Mtx_MultiplyFVec4(const C3D_Mtx* mtx, C3D_FVec v);
+
+/**
+ * @brief Multiply 4x3 matrix by a FVec3
+ * @param[in] mtx Matrix
+ * @param[in] v   Vector
+ * @return Product of mtx and v
+ */
+static inline C3D_FVec Mtx_MultiplyFVecH(const C3D_Mtx* mtx, C3D_FVec v)
+{
+	v.w = 1.0f;
+
+	return Mtx_MultiplyFVec4(mtx, v);
+}
+///@}
+
+/**
+ * @name 3D Transformation Matrix Math
+ * @note bRightSide is used to determine which side to perform the transformation.
+ *       With an input matrix A and a transformation matrix B, bRightSide being
+ *       true yields AB, while being false yield BA.
+ */
+///@{
+
+/**
+ * @brief 3D translation
+ * @param[in,out] mtx Matrix to translate
+ * @param[in]     x   X component to translate
+ * @param[in]     y   Y component to translate
+ * @param[in]     z   Z component to translate
+ */
+void Mtx_Translate(C3D_Mtx* mtx, float x, float y, float z, bool bRightSide);
+
+/**
+ * @brief 3D Scale
+ * @param[in,out] mtx Matrix to scale
+ * @param[in]     x   X component to scale
+ * @param[in]     y   Y component to scale
+ * @param[in]     z   Z component to scale
+ */
+void Mtx_Scale(C3D_Mtx* mtx, float x, float y, float z);
+
+/**
+ * @brief 3D Rotation
+ * @param[in,out] mtx        Matrix to rotate
+ * @param[in]     axis       Axis about which to rotate
+ * @param[in]     angle      Radians to rotate
+ * @param[in]     bRightSide Whether to transform from the right side
+ */
+void Mtx_Rotate(C3D_Mtx* mtx, C3D_FVec axis, float angle, bool bRightSide);
+
+/**
+ * @brief 3D Rotation about the X axis
+ * @param[in,out] mtx        Matrix to rotate
+ * @param[in]     angle      Radians to rotate
+ * @param[in]     bRightSide Whether to transform from the right side
+ */
+void Mtx_RotateX(C3D_Mtx* mtx, float angle, bool bRightSide);
+
+/**
+ * @brief 3D Rotation about the Y axis
+ * @param[in,out] mtx        Matrix to rotate
+ * @param[in]     angle      Radians to rotate
+ * @param[in]     bRightSide Whether to transform from the right side
+ */
+void Mtx_RotateY(C3D_Mtx* mtx, float angle, bool bRightSide);
+
+/**
+ * @brief 3D Rotation about the Z axis
+ * @param[in,out] mtx        Matrix to rotate
+ * @param[in]     angle      Radians to rotate
+ * @param[in]     bRightSide Whether to transform from the right side
+ */
+void Mtx_RotateZ(C3D_Mtx* mtx, float angle, bool bRightSide);
+///@}
+
+///@name 3D Projection Matrix Math
+///@{
+
+/**
+ * @brief Orthogonal projection
+ * @param[out] mtx Output matrix
+ * @param[in]  left    Left clip plane (X=left)
+ * @param[in]  right   Right clip plane (X=right)
+ * @param[in]  bottom  Bottom clip plane (Y=bottom)
+ * @param[in]  top     Top clip plane (Y=top)
+ * @param[in]  near    Near clip plane (Z=near)
+ * @param[in]  far     Far clip plane (Z=far)
+ */
+void Mtx_Ortho(C3D_Mtx* mtx, float left, float right, float bottom, float top, float near, float far);
+
+/**
+ * @brief Perspective projection
+ * @param[out] mtx    Output matrix
+ * @param[in]  fovy   Vertical field of view in radians
+ * @param[in]  aspect Aspect ration of projection plane (width/height)
+ * @param[in]  near   Near clip plane (Z=near)
+ * @param[in]  far    Far clip plane (Z=far)
+ */
+void Mtx_Persp(C3D_Mtx* mtx, float fovy, float aspect, float near, float far);
+
+/**
+ * @brief Stereo perspective projection
+ * @note Typically you will use iod to mean the distance between the eyes. Plug
+ *       in -iod for the left eye and iod for the right eye.
+ * @note The focal length is defined by screen. If objects are further than this,
+ *       they will appear to be inside the screen. If objects are closer than this,
+ *       they will appear to pop out of the screen. Objects at this distance appear
+ *       to be at the screen.
+ * @param[out] mtx    Output matrix
+ * @param[in]  fovy   Vertical field of view in radians
+ * @param[in]  aspect Aspect ration of projection plane (width/height)
+ * @param[in]  near   Near clip plane (Z=near)
+ * @param[in]  far    Far clip plane (Z=far)
+ * @param[in]  iod    Interocular distance
+ * @param[in]  screen Focal length
+ */
+void Mtx_PerspStereo(C3D_Mtx* mtx, float fovy, float aspect, float near, float far, float iod, float screen);
+
+/**
+ * @brief Orthogonal projection, tilted to account for the 3DS screen rotation
+ * @param[in]  left    Left clip plane (X=left)
+ * @param[in]  right   Right clip plane (X=right)
+ * @param[in]  bottom  Bottom clip plane (Y=bottom)
+ * @param[in]  top     Top clip plane (Y=top)
+ * @param[in]  near    Near clip plane (Z=near)
+ * @param[in]  far     Far clip plane (Z=far)
+ */
 void Mtx_OrthoTilt(C3D_Mtx* mtx, float left, float right, float bottom, float top, float near, float far);
+
+/**
+ * @brief Perspective projection, tilted to account for the 3DS screen rotation
+ * @param[out] mtx    Output matrix
+ * @param[in]  fovy   Vertical field of view in radians
+ * @param[in]  aspect Aspect ration of projection plane (width/height)
+ * @param[in]  near   Near clip plane (Z=near)
+ * @param[in]  far    Far clip plane (Z=far)
+ */
 void Mtx_PerspTilt(C3D_Mtx* mtx, float fovy, float aspect, float near, float far);
+
+/**
+ * @brief Stereo perspective projection, tilted to account for the 3DS screen rotation
+ * @note See the notes for Mtx_PerspStereo
+ * @param[out] mtx    Output matrix
+ * @param[in]  fovy   Vertical field of view in radians
+ * @param[in]  aspect Aspect ration of projection plane (width/height)
+ * @param[in]  near   Near clip plane (Z=near)
+ * @param[in]  far    Far clip plane (Z=far)
+ * @param[in]  iod    Interocular distance
+ * @param[in]  screen Focal length
+ */
 void Mtx_PerspStereoTilt(C3D_Mtx* mtx, float fovy, float aspect, float near, float far, float iod, float screen);
+///@}
+
+///@name Quaternion Math
+///@{
+//
+/**
+ * @brief Create a new Quaternion
+ * @param[in] i I-component
+ * @param[in] j J-component
+ * @param[in] k K-component
+ * @param[in] r R-component
+ * @return New Quaternion
+ */
+#define Quat_New(i,j,k,r) FVec4_New(i,j,k,r)
+
+/**
+ * @brief Negate a Quaternion
+ * @note This is the same as scaling by -1
+ * @param[in] q Quaternion to negate
+ * @return -q
+ */
+#define Quat_Negate(q) FVec4_Negate(q)
+
+/**
+ * @brief Add two Quaternions
+ * @param[in] lhs Augend
+ * @param[in] rhs Addend
+ * @return lhs+rhs (sum)
+ */
+#define Quat_Add(lhs,rhs) FVec4_Add(lhs,rhs)
+
+/**
+ * @brief Subtract two Quaternions
+ * @param[in] lhs Minuend
+ * @param[in] rhs Subtrahend
+ * @return lhs-rhs (difference)
+ */
+#define Quat_Subtract(lhs,rhs) FVec4_Subtract(lhs,rhs)
+
+/**
+ * @brief Scale a Quaternion
+ * @param[in] q Quaternion to scale
+ * @param[in] s Scale factor
+ * @return q*s
+ */
+#define Quat_Scale(q,s) FVec4_Scale(q,s)
+
+/**
+ * @brief Normalize a Quaternion
+ * @param[in] q Quaternion to normalize
+ * @return q/‖q‖
+ */
+#define Quat_Normalize(q) FVec4_Normalize(q)
+
+/**
+ * @brief Dot product of two Quaternions
+ * @param[in] lhs Left-side Quaternion
+ * @param[in] rhs Right-side Quaternion
+ * @return lhs∙rhs
+ */
+#define Quat_Dot(lhs,rhs) FVec4_Dot(lhs,rhs)
+
+/**
+ * @brief Multiply two Quaternions
+ * @param[in] lhs Multiplicand
+ * @param[in] rhs Multiplier
+ * @return lhs*rhs
+ */
+C3D_FQuat Quat_Multiply(C3D_FQuat lhs, C3D_FQuat rhs);
+
+/**
+ * @brief Raise Quaternion to a power
+ * @note If p is 0, this returns the identity Quaternion.
+ *       If p is 1, this returns q.
+ * @param[in] q Base Quaternion
+ * @param[in] p Power
+ * @return q^p
+ */
+C3D_FQuat Quat_Pow(C3D_FQuat q, float p);
+
+/**
+ * @brief Cross product of Quaternion and FVec3
+ * @param[in] lhs Left-side Quaternion
+ * @param[in] rhs Right-side FVec3
+ * @return q×v
+ */
+C3D_FVec Quat_CrossFVec3(C3D_FQuat q, C3D_FVec v);
+
+/**
+ * @brief 3D Rotation
+ * @param[in] q          Quaternion to rotate
+ * @param[in] axis       Axis about which to rotate
+ * @param[in] r          Radians to rotate
+ * @param[in] bRightSide Whether to transform from the right side
+ * @return Rotated Quaternion
+ */
+C3D_FQuat Quat_Rotate(C3D_FQuat q, C3D_FVec axis, float r, bool bRightSide);
+
+/**
+ * @brief 3D Rotation about the X axis
+ * @param[in] q          Quaternion to rotate
+ * @param[in] r          Radians to rotate
+ * @param[in] bRightSide Whether to transform from the right side
+ * @return Rotated Quaternion
+ */
+C3D_FQuat Quat_RotateX(C3D_FQuat q, float r, bool bRightSide);
+
+/**
+ * @brief 3D Rotation about the Y axis
+ * @param[in] q          Quaternion to rotate
+ * @param[in] r          Radians to rotate
+ * @param[in] bRightSide Whether to transform from the right side
+ * @return Rotated Quaternion
+ */
+C3D_FQuat Quat_RotateY(C3D_FQuat q, float r, bool bRightSide);
+
+/**
+ * @brief 3D Rotation about the Z axis
+ * @param[in] q          Quaternion to rotate
+ * @param[in] r          Radians to rotate
+ * @param[in] bRightSide Whether to transform from the right side
+ * @return Rotated Quaternion
+ */
+C3D_FQuat Quat_RotateZ(C3D_FQuat q, float r, bool bRightSide);
+
+/**
+ * @brief Get 4x4 matrix equivalent to Quaternion
+ * @param[out] m Output matrix
+ * @param[in]  q Input Quaternion
+ */
+void Mtx_FromQuat(C3D_Mtx* m, C3D_FQuat q);
+
+/**
+ * @brief Identity Quaternion
+ * @return Identity Quaternion
+ */
+static inline C3D_FQuat Quat_Identity(void)
+{
+	// r=1, i=j=k=0
+	return Quat_New(0.0f, 0.0f, 0.0f, 1.0f);
+}
+
+/**
+ * @brief Quaternion conjugate
+ * @param[in] q Quaternion of which to get conjugate
+ * @return Conjugate of q
+ */
+static inline C3D_FQuat Quat_Conjugate(C3D_FQuat q)
+{
+	// q* = q.r - q.i - q.j - q.k
+	return Quat_New(-q.i, -q.j, -q.k, q.r);
+}
+
+/**
+ * @brief Quaternion inverse
+ * @note This is the same as raising to the power of -1
+ * @param[in] q Quaternion of which to get inverse
+ * @return Inverse of q
+ */
+static inline C3D_FQuat Quat_Inverse(C3D_FQuat q)
+{
+	// q^-1 = (q.r - q.i - q.j - q.k) / (q.r^2 + q.i^2 + q.j^2 + q.k^2)
+	//      = q* / (q∙q)
+	C3D_FQuat c = Quat_Conjugate(q);
+	float     d = Quat_Dot(q, q);
+	return Quat_New(c.i/d, c.j/d, c.k/d, c.r/d);
+}
+
+/**
+ * @brief Cross product of FVec3 and Quaternion
+ * @param[in] lhs Left-side FVec3
+ * @param[in] rhs Right-side Quaternion
+ * @return v×q
+ */
+static inline C3D_FVec FVec3_CrossQuat(C3D_FVec v, C3D_FQuat q)
+{
+	// v×q = q^-1×v
+	return Quat_CrossFVec3(Quat_Inverse(q), v);
+}
+///@}
diff --git a/include/c3d/mtxstack.h b/include/c3d/mtxstack.h
index afd4ccf..3c86536 100644
--- a/include/c3d/mtxstack.h
+++ b/include/c3d/mtxstack.h
@@ -18,6 +18,6 @@ static inline C3D_Mtx* MtxStack_Cur(C3D_MtxStack* stk)
 
 void MtxStack_Init(C3D_MtxStack* stk);
 void MtxStack_Bind(C3D_MtxStack* stk, GPU_SHADER_TYPE unifType, int unifPos, int unifLen);
-void MtxStack_Push(C3D_MtxStack* stk);
-void MtxStack_Pop(C3D_MtxStack* stk);
+C3D_Mtx* MtxStack_Push(C3D_MtxStack* stk);
+C3D_Mtx* MtxStack_Pop(C3D_MtxStack* stk);
 void MtxStack_Update(C3D_MtxStack* stk);
diff --git a/include/c3d/types.h b/include/c3d/types.h
index c6d8b83..243b37c 100644
--- a/include/c3d/types.h
+++ b/include/c3d/types.h
@@ -1,14 +1,24 @@
 #pragma once
+#ifdef _3DS
 #include <3ds.h>
+#else
+#include <stdbool.h>
+#include <stdint.h>
+typedef uint8_t u8;
+typedef uint32_t u32;
+#endif
 
 typedef u32 C3D_IVec;
 
 typedef union
 {
 	struct { float w, z, y, x; };
+	struct { float r, k, j, i; };
 	float c[4];
 } C3D_FVec;
 
+typedef C3D_FVec C3D_FQuat;
+
 // Row-major 4x4 matrix
 typedef union
 {
diff --git a/source/light.c b/source/light.c
index 33b7f9f..e1d921a 100644
--- a/source/light.c
+++ b/source/light.c
@@ -122,8 +122,8 @@ static inline u16 floattofix2_11(float x)
 void C3D_LightSpotDir(C3D_Light* light, float x, float y, float z)
 {
 	C3Di_EnableCommon(light, true, GPU_LC1_SPOTBIT(light->id));
-	C3D_FVec vec = { { 0.0, -z, -y, -x } };
-	FVec_Norm4(&vec);
+	C3D_FVec vec = FVec3_New(-x, -y, -z);
+	vec = FVec3_Normalize(vec);
 	light->conf.spotDir[0] = floattofix2_11(vec.x);
 	light->conf.spotDir[1] = floattofix2_11(vec.y);
 	light->conf.spotDir[2] = floattofix2_11(vec.z);
diff --git a/source/maths/.gitignore b/source/maths/.gitignore
new file mode 100644
index 0000000..31dc307
--- /dev/null
+++ b/source/maths/.gitignore
@@ -0,0 +1,2 @@
+*.d
+*.o
diff --git a/source/maths/mtx_fromquat.c b/source/maths/mtx_fromquat.c
new file mode 100644
index 0000000..0568a9c
--- /dev/null
+++ b/source/maths/mtx_fromquat.c
@@ -0,0 +1,34 @@
+#include <c3d/maths.h>
+
+void Mtx_FromQuat(C3D_Mtx* m, C3D_FQuat q)
+{
+	float ii = q.i*q.i;
+	float ij = q.i*q.j;
+	float ik = q.i*q.k;
+	float jj = q.j*q.j;
+	float jk = q.j*q.k;
+	float kk = q.k*q.k;
+	float ri = q.r*q.i;
+	float rj = q.r*q.j;
+	float rk = q.r*q.k;
+
+	m->r[0].x = 1.0f - (2.0f * (jj + kk));
+	m->r[1].x = 2.0f * (ij + rk);
+	m->r[2].x = 2.0f * (ik - rj);
+	m->r[3].x = 0.0f;
+
+	m->r[0].y = 2.0f * (ij - rk);
+	m->r[1].y = 1.0f - (2.0f * (ii + kk));
+	m->r[2].y = 2.0f * (jk + ri);
+	m->r[3].y = 0.0f;
+
+	m->r[0].z = 2.0f * (ik + rj);
+	m->r[1].z = 2.0f * (jk - ri);
+	m->r[2].z = 1.0f - (2.0f * (ii + jj));
+	m->r[3].z = 0.0f;
+
+	m->r[0].w = 0.0f;
+	m->r[1].w = 0.0f;
+	m->r[2].w = 0.0f;
+	m->r[3].w = 1.0f;
+}
diff --git a/source/maths/mtx_identity.c b/source/maths/mtx_identity.c
index 08aef16..2ce7461 100644
--- a/source/maths/mtx_identity.c
+++ b/source/maths/mtx_identity.c
@@ -2,6 +2,9 @@
 
 void Mtx_Identity(C3D_Mtx* out)
 {
-	Mtx_Zeros(out);
+	// http://www.wolframalpha.com/input/?i={{1,0,0,0},{0,1,0,0},{0,0,1,0},{0,0,0,1}}
+	int i;
+	for (i = 0; i < 16; ++i)
+		out->m[i] = 0.0f;
 	out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
 }
diff --git a/source/maths/mtx_inverse.c b/source/maths/mtx_inverse.c
index e093362..fe74a5a 100644
--- a/source/maths/mtx_inverse.c
+++ b/source/maths/mtx_inverse.c
@@ -1,3 +1,4 @@
+#include <float.h>
 #include <c3d/maths.h>
 
 bool Mtx_Inverse(C3D_Mtx* out)
diff --git a/source/maths/mtx_multiply.c b/source/maths/mtx_multiply.c
index 2804bb1..c7e6377 100644
--- a/source/maths/mtx_multiply.c
+++ b/source/maths/mtx_multiply.c
@@ -2,8 +2,9 @@
 
 void Mtx_Multiply(C3D_Mtx* out, const C3D_Mtx* a, const C3D_Mtx* b)
 {
+	// http://www.wolframalpha.com/input/?i={{a,b,c,d},{e,f,g,h},{i,j,k,l},{m,n,o,p}}{{α,β,γ,δ},{ε,θ,ι,κ},{λ,μ,ν,ξ},{ο,π,ρ,σ}}
 	int i, j;
-	for (i = 0; i < 4; i ++)
-		for (j = 0; j < 4; j ++)
+	for (j = 0; j < 4; ++j)
+		for (i = 0; i < 4; ++i)
 			out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
 }
diff --git a/source/maths/mtx_multiplyfvec3.c b/source/maths/mtx_multiplyfvec3.c
new file mode 100644
index 0000000..cbb984c
--- /dev/null
+++ b/source/maths/mtx_multiplyfvec3.c
@@ -0,0 +1,11 @@
+#include <c3d/maths.h>
+
+C3D_FVec Mtx_MultiplyFVec3(const C3D_Mtx* mtx, C3D_FVec v)
+{
+	// http://www.wolframalpha.com/input/?i={{a,b,c},{d,e,f},{g,h,i}}{x,y,z}
+	float x = FVec3_Dot(mtx->r[0], v);
+	float y = FVec3_Dot(mtx->r[1], v);
+	float z = FVec3_Dot(mtx->r[2], v);
+
+	return FVec3_New(x, y, z);
+}
diff --git a/source/maths/mtx_multiplyfvec4.c b/source/maths/mtx_multiplyfvec4.c
new file mode 100644
index 0000000..1520d38
--- /dev/null
+++ b/source/maths/mtx_multiplyfvec4.c
@@ -0,0 +1,12 @@
+#include <c3d/maths.h>
+
+C3D_FVec Mtx_MultiplyFVec4(const C3D_Mtx* mtx, C3D_FVec v)
+{
+	// http://www.wolframalpha.com/input/?i={{a,b,c,d},{e,f,g,h},{i,j,k,l},{m,n,o,p}}{x,y,z,w}
+	float x = FVec4_Dot(mtx->r[0], v);
+	float y = FVec4_Dot(mtx->r[1], v);
+	float z = FVec4_Dot(mtx->r[2], v);
+	float w = FVec4_Dot(mtx->r[3], v);
+
+	return FVec4_New(x, y, z, w);
+}
diff --git a/source/maths/mtx_ortho.c b/source/maths/mtx_ortho.c
new file mode 100644
index 0000000..5ceb8e1
--- /dev/null
+++ b/source/maths/mtx_ortho.c
@@ -0,0 +1,17 @@
+#include <c3d/maths.h>
+
+void Mtx_Ortho(C3D_Mtx* mtx, float left, float right, float bottom, float top, float near, float far)
+{
+	Mtx_Zeros(mtx);
+
+	// Standard orthogonal projection matrix, with a fixed depth range of [-1,0] (required by PICA)
+	// http://www.wolframalpha.com/input/?i={{1,0,0,0},{0,1,0,0},{0,0,0.5,-0.5},{0,0,0,1}}{{2/(r-l),0,0,(l%2Br)/(l-r)},{0,2/(t-b),0,(b%2Bt)/(b-t)},{0,0,2/(n-f),(n%2Bf)/(n-f)},{0,0,0,1}}
+
+	mtx->r[0].x = 2.0f / (right - left);
+	mtx->r[0].w = (left + right) / (left - right);
+	mtx->r[1].y = 2.0f / (top - bottom);
+	mtx->r[1].w = (bottom + top) / (bottom - top);
+	mtx->r[2].z = 1.0f / (near - far);
+	mtx->r[2].w = 0.5f*(near + far) / (near - far) - 0.5f;
+	mtx->r[3].w = 1.0f;
+}
diff --git a/source/maths/mtx_orthotilt.c b/source/maths/mtx_orthotilt.c
index f1c807d..eeae7ea 100644
--- a/source/maths/mtx_orthotilt.c
+++ b/source/maths/mtx_orthotilt.c
@@ -2,30 +2,17 @@
 
 void Mtx_OrthoTilt(C3D_Mtx* mtx, float left, float right, float bottom, float top, float near, float far)
 {
-	C3D_Mtx mp;
-	Mtx_Zeros(&mp);
+	Mtx_Zeros(mtx);
 
-	// Build standard orthogonal projection matrix
-	mp.r[0].x = 2.0f / (right - left);
-	mp.r[0].w = (left + right) / (left - right);
-	mp.r[1].y = 2.0f / (top - bottom);
-	mp.r[1].w = (bottom + top) / (bottom - top);
-	mp.r[2].z = 2.0f / (far - near);
-	mp.r[2].w = (near + far) / (near - far);
-	mp.r[3].w = 1.0f;
+	// Standard orthogonal projection matrix, with a fixed depth range of [-1,0] (required by PICA) and rotated τ/4 radians counterclockwise around the Z axis (due to 3DS screen orientation)
+	// http://www.wolframalpha.com/input/?i={{0,1,0,0},{-1,0,0,0},{0,0,1,0},{0,0,0,1}}{{1,0,0,0},{0,1,0,0},{0,0,0.5,-0.5},{0,0,0,1}}
+	// http://www.wolframalpha.com/input/?i={{0,1,0,0},{-1,0,0,0},{0,0,0.5,-0.5},{0,0,0,1}}{{2/(r-l),0,0,(l%2Br)/(l-r)},{0,2/(t-b),0,(b%2Bt)/(b-t)},{0,0,2/(n-f),(n%2Bf)/(n-f)},{0,0,0,1}}
 
-	// Fix depth range to [-1, 0]
-	C3D_Mtx mp2, mp3;
-	Mtx_Identity(&mp2);
-	mp2.r[2].z = 0.5;
-	mp2.r[2].w = -0.5;
-	Mtx_Multiply(&mp3, &mp2, &mp);
-
-	// Fix the 3DS screens' orientation by swapping the X and Y axis
-	Mtx_Identity(&mp2);
-	mp2.r[0].x = 0.0;
-	mp2.r[0].y = 1.0;
-	mp2.r[1].x = -1.0; // flipped
-	mp2.r[1].y = 0.0;
-	Mtx_Multiply(mtx, &mp2, &mp3);
+	mtx->r[0].y = 2.0f / (top - bottom);
+	mtx->r[0].w = (bottom + top) / (bottom - top);
+	mtx->r[1].x = 2.0f / (left - right);
+	mtx->r[1].w = (left + right) / (right - left);
+	mtx->r[2].z = 1.0f / (near - far);
+	mtx->r[2].w = 0.5f*(far + near) / (near - far) - 0.5f;
+	mtx->r[3].w = 1.0f;
 }
diff --git a/source/maths/mtx_persp.c b/source/maths/mtx_persp.c
new file mode 100644
index 0000000..d7f7b30
--- /dev/null
+++ b/source/maths/mtx_persp.c
@@ -0,0 +1,17 @@
+#include <c3d/maths.h>
+
+void Mtx_Persp(C3D_Mtx* mtx, float fovy, float aspect, float near, float far)
+{
+	float fovy_tan = tanf(fovy/2.0f);
+
+	Mtx_Zeros(mtx);
+
+	// Standard perspective projection matrix, with a fixed depth range of [-1,0] (required by PICA)
+	// http://www.wolframalpha.com/input/?i={{1,0,0,0},{0,1,0,0},{0,0,0.5,-0.5},{0,0,0,1}}{{1/(a*tan(v)),0,0,0},{0,1/tan(v),0,0},{0,0,(n%2Bf)/(n-f),(fn)/(n-f)},{0,0,0,-1}}
+
+	mtx->r[0].x = 1.0f / (aspect * fovy_tan);
+	mtx->r[1].y = 1.0f / fovy_tan;
+	mtx->r[2].z = 0.5f*(far + near) / (near - far) + 0.5f;
+	mtx->r[2].w = far*near / (near - far);
+	mtx->r[3].z = -1.0f;
+}
diff --git a/source/maths/mtx_perspstereo.c b/source/maths/mtx_perspstereo.c
index 23f0c93..6e3778d 100644
--- a/source/maths/mtx_perspstereo.c
+++ b/source/maths/mtx_perspstereo.c
@@ -1,37 +1,18 @@
 #include <c3d/maths.h>
 
-void Mtx_PerspStereoTilt(C3D_Mtx* mtx, float fovx, float invaspect, float near, float far, float iod, float screen)
+void Mtx_PerspStereo(C3D_Mtx* mtx, float fovy, float aspect, float near, float far, float iod, float screen)
 {
-	// Notes:
-	// Once again, we are passed "fovy" and the "aspect ratio"; however the 3DS screens are sideways,
-	// and the formula had to be tweaked. With stereo, left/right separation becomes top/bottom separation.
-	// The detailed mathematical explanation is in mtx_persptilt.c.
+	float fovy_tan = tanf(fovy/2.0f);
+	float fovy_tan_aspect = fovy_tan*aspect;
+	float shift = iod / (2.0f*screen); // 'near' not in the numerator because it cancels out in mp.r[1].z
 
-	float fovx_tan = tanf(fovx/2);
-	float fovx_tan_invaspect = fovx_tan*invaspect;
-	float shift = iod / (2*screen); // 'near' not in the numerator because it cancels out in mp.r[1].z
+	Mtx_Zeros(mtx);
 
-	C3D_Mtx mp;
-	Mtx_Zeros(&mp);
-
-	// Build asymmetric perspective projection matrix
-	mp.r[0].x = 1.0f / fovx_tan;
-	mp.r[1].y = 1.0f / fovx_tan_invaspect;
-	mp.r[1].z = shift / fovx_tan_invaspect;
-	mp.r[2].z = (near + far) / (near - far);
-	mp.r[2].w = (2 * near * far) / (near - far);
-	mp.r[3].z = -1.0f;
-
-	// Fix depth range to [-1, 0]
-	C3D_Mtx mp2;
-	Mtx_Identity(&mp2);
-	mp2.r[2].z = 0.5;
-	mp2.r[2].w = -0.5;
-	Mtx_Multiply(mtx, &mp2, &mp);
-
-	// Translate to screen plane
-	Mtx_Translate(mtx, 0, iod/2, 0);
-
-	// Rotate the matrix one quarter of a turn clockwise in order to fix the 3DS screens' orientation
-	Mtx_RotateZ(mtx, -M_TAU/4, true);
+	mtx->r[0].x = 1.0f / fovy_tan_aspect;
+	mtx->r[0].z = -shift / fovy_tan_aspect;
+	mtx->r[0].w = -iod / 2.0f;
+	mtx->r[1].y = 1.0f / fovy_tan;
+	mtx->r[2].z = 0.5f*(near + far) / (near - far) + 0.5f;
+	mtx->r[2].w = near * far / (near - far);
+	mtx->r[3].z = -1.0f;
 }
diff --git a/source/maths/mtx_perspstereotilt.c b/source/maths/mtx_perspstereotilt.c
new file mode 100644
index 0000000..72c08c4
--- /dev/null
+++ b/source/maths/mtx_perspstereotilt.c
@@ -0,0 +1,23 @@
+#include <c3d/maths.h>
+
+void Mtx_PerspStereoTilt(C3D_Mtx* mtx, float fovx, float invaspect, float near, float far, float iod, float screen)
+{
+	// Notes:
+	// Once again, we are passed "fovy" and the "aspect ratio"; however the 3DS screens are sideways,
+	// and the formula had to be tweaked. With stereo, left/right separation becomes top/bottom separation.
+	// The detailed mathematical explanation is in mtx_persptilt.c.
+
+	float fovx_tan = tanf(fovx/2.0f);
+	float fovx_tan_invaspect = fovx_tan*invaspect;
+	float shift = iod / (2.0f*screen); // 'near' not in the numerator because it cancels out in mp.r[1].z
+
+	Mtx_Zeros(mtx);
+
+	mtx->r[0].y = 1.0f / fovx_tan;
+	mtx->r[1].x = -1.0f / fovx_tan_invaspect;
+	mtx->r[1].z = shift / fovx_tan_invaspect;
+	mtx->r[1].w = iod / 2.0f;
+	mtx->r[2].z = 0.5f*(near + far) / (near - far) + 0.5f;
+	mtx->r[2].w = near * far / (near - far);
+	mtx->r[3].z = -1.0f;
+}
diff --git a/source/maths/mtx_persptilt.c b/source/maths/mtx_persptilt.c
index 2ab5d44..e6b260b 100644
--- a/source/maths/mtx_persptilt.c
+++ b/source/maths/mtx_persptilt.c
@@ -8,6 +8,10 @@ void Mtx_PerspTilt(C3D_Mtx* mtx, float fovx, float invaspect, float near, float
 	// of the aspect ratio. Therefore the formula for the perspective projection matrix
 	// had to be modified to be expressed in these terms instead.
 
+	// Notes:
+	// Includes adjusting depth range from [-1,1] to [-1,0]
+	// Includes rotation of the matrix one quarter of a turn clockwise in order to fix the 3DS screens' orientation
+
 	// Notes:
 	// fovx = 2 atan(tan(fovy/2)*w/h)
 	// fovy = 2 atan(tan(fovx/2)*h/w)
@@ -21,24 +25,13 @@ void Mtx_PerspTilt(C3D_Mtx* mtx, float fovx, float invaspect, float near, float
 
 	// a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
 
-	float fovx_tan = tanf(fovx/2);
-	C3D_Mtx mp;
-	Mtx_Zeros(&mp);
+	float fovx_tan = tanf(fovx/2.0f);
 
-	// Build standard perspective projection matrix
-	mp.r[0].x = 1.0f / fovx_tan;
-	mp.r[1].y = 1.0f / (fovx_tan*invaspect);
-	mp.r[2].z = (near + far) / (near - far);
-	mp.r[2].w = (2 * near * far) / (near - far);
-	mp.r[3].z = -1.0f;
+	Mtx_Zeros(mtx);
 
-	// Fix depth range to [-1, 0]
-	C3D_Mtx mp2;
-	Mtx_Identity(&mp2);
-	mp2.r[2].z = 0.5;
-	mp2.r[2].w = -0.5;
-	Mtx_Multiply(mtx, &mp2, &mp);
-
-	// Rotate the matrix one quarter of a turn clockwise in order to fix the 3DS screens' orientation
-	Mtx_RotateZ(mtx, -M_TAU/4, true);
+	mtx->r[0].y = 1.0f / fovx_tan;
+	mtx->r[1].x = -1.0f / (fovx_tan*invaspect);
+	mtx->r[2].z = 0.5f*(far + near) / (near - far) + 0.5f;
+	mtx->r[2].w = far*near / (near - far);
+	mtx->r[3].z = -1.0f;
 }
diff --git a/source/maths/mtx_rotate.c b/source/maths/mtx_rotate.c
new file mode 100644
index 0000000..95aea2f
--- /dev/null
+++ b/source/maths/mtx_rotate.c
@@ -0,0 +1,72 @@
+#include <c3d/maths.h>
+
+void Mtx_Rotate(C3D_Mtx* mtx, C3D_FVec axis, float angle, bool bRightSide)
+{
+	size_t  i;
+	C3D_Mtx om;
+
+	float s = sinf(angle);
+	float c = cosf(angle);
+	float t = 1.0f - c;
+
+	axis = FVec3_Normalize(axis);
+
+	float x = axis.x;
+	float y = axis.y;
+	float z = axis.z;
+	float w;
+
+	om.r[0].x = t*x*x + c;
+	om.r[1].x = t*x*y + s*z;
+	om.r[2].x = t*x*z - s*y;
+	//om.r[3].x = 0.0f; //optimized out
+
+	om.r[0].y = t*y*x - s*z;
+	om.r[1].y = t*y*y + c;
+	om.r[2].y = t*y*z + s*x;
+	//om.r[3].y = 0.0f; //optimized out
+
+	om.r[0].z = t*z*x + s*y;
+	om.r[1].z = t*z*y - s*x;
+	om.r[2].z = t*z*z + c;
+	//om.r[3].z = 0.0f; //optimized out
+
+	/* optimized out
+	om.r[0].w = 0.0f;
+	om.r[1].w = 0.0f;
+	om.r[2].w = 0.0f;
+	om.r[3].w = 1.0f;
+	*/
+
+	if (bRightSide)
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			x = mtx->r[i].x*om.r[0].x + mtx->r[i].y*om.r[1].x + mtx->r[i].z*om.r[2].x;
+			y = mtx->r[i].x*om.r[0].y + mtx->r[i].y*om.r[1].y + mtx->r[i].z*om.r[2].y;
+			z = mtx->r[i].x*om.r[0].z + mtx->r[i].y*om.r[1].z + mtx->r[i].z*om.r[2].z;
+
+			mtx->r[i].x = x;
+			mtx->r[i].y = y;
+			mtx->r[i].z = z;
+		}
+	}
+	else
+	{
+		for (i = 0; i < 3; ++i)
+		{
+			x = mtx->r[0].x*om.r[i].x + mtx->r[1].x*om.r[i].y + mtx->r[2].x*om.r[i].z;
+			y = mtx->r[0].y*om.r[i].x + mtx->r[1].y*om.r[i].y + mtx->r[2].y*om.r[i].z;
+			z = mtx->r[0].z*om.r[i].x + mtx->r[1].z*om.r[i].y + mtx->r[2].z*om.r[i].z;
+			w = mtx->r[0].w*om.r[i].x + mtx->r[1].w*om.r[i].y + mtx->r[2].w*om.r[i].z;
+
+			om.r[i].x = x;
+			om.r[i].y = y;
+			om.r[i].z = z;
+			om.r[i].w = w;
+		}
+
+		for (i = 0; i < 3; ++i)
+			mtx->r[i] = om.r[i];
+	}
+}
diff --git a/source/maths/mtx_rotatex.c b/source/maths/mtx_rotatex.c
index b5baf12..712aaec 100644
--- a/source/maths/mtx_rotatex.c
+++ b/source/maths/mtx_rotatex.c
@@ -2,20 +2,29 @@
 
 void Mtx_RotateX(C3D_Mtx* mtx, float angle, bool bRightSide)
 {
-	C3D_Mtx rm, om;
+	float  a, b;
+	float  cosAngle = cosf(angle);
+	float  sinAngle = sinf(angle);
+	size_t i;
 
-	float cosAngle = cosf(angle);
-	float sinAngle = sinf(angle);
-
-	Mtx_Zeros(&rm);
-	rm.r[0].x = 1.0f;
-	rm.r[1].y = cosAngle;
-	rm.r[1].z = -sinAngle;
-	rm.r[2].y = sinAngle;
-	rm.r[2].z = cosAngle;
-	rm.r[3].w = 1.0f;
-
-	if (bRightSide) Mtx_Multiply(&om, mtx, &rm);
-	else            Mtx_Multiply(&om, &rm, mtx);
-	Mtx_Copy(mtx, &om);
+	if (bRightSide)
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			a = mtx->r[i].y*cosAngle + mtx->r[i].z*sinAngle;
+			b = mtx->r[i].z*cosAngle - mtx->r[i].y*sinAngle;
+			mtx->r[i].y = a;
+			mtx->r[i].z = b;
+		}
+	}
+	else
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			a = mtx->r[1].c[i]*cosAngle - mtx->r[2].c[i]*sinAngle;
+			b = mtx->r[2].c[i]*cosAngle + mtx->r[1].c[i]*sinAngle;
+			mtx->r[1].c[i] = a;
+			mtx->r[2].c[i] = b;
+		}
+	}
 }
diff --git a/source/maths/mtx_rotatey.c b/source/maths/mtx_rotatey.c
index 2f7d07b..7ee9598 100644
--- a/source/maths/mtx_rotatey.c
+++ b/source/maths/mtx_rotatey.c
@@ -2,20 +2,29 @@
 
 void Mtx_RotateY(C3D_Mtx* mtx, float angle, bool bRightSide)
 {
-	C3D_Mtx rm, om;
+	float  a, b;
+	float  cosAngle = cosf(angle);
+	float  sinAngle = sinf(angle);
+	size_t i;
 
-	float cosAngle = cosf(angle);
-	float sinAngle = sinf(angle);
-
-	Mtx_Zeros(&rm);
-	rm.r[0].x = cosAngle;
-	rm.r[0].z = sinAngle;
-	rm.r[1].y = 1.0f;
-	rm.r[2].x = -sinAngle;
-	rm.r[2].z = cosAngle;
-	rm.r[3].w = 1.0f;
-
-	if (bRightSide) Mtx_Multiply(&om, mtx, &rm);
-	else            Mtx_Multiply(&om, &rm, mtx);
-	Mtx_Copy(mtx, &om);
+	if (bRightSide)
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			a = mtx->r[i].x*cosAngle - mtx->r[i].z*sinAngle;
+			b = mtx->r[i].z*cosAngle + mtx->r[i].x*sinAngle;
+			mtx->r[i].x = a;
+			mtx->r[i].z = b;
+		}
+	}
+	else
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			a = mtx->r[0].c[i]*cosAngle + mtx->r[2].c[i]*sinAngle;
+			b = mtx->r[2].c[i]*cosAngle - mtx->r[0].c[i]*sinAngle;
+			mtx->r[0].c[i] = a;
+			mtx->r[2].c[i] = b;
+		}
+	}
 }
diff --git a/source/maths/mtx_rotatez.c b/source/maths/mtx_rotatez.c
index b951ef8..42963a8 100644
--- a/source/maths/mtx_rotatez.c
+++ b/source/maths/mtx_rotatez.c
@@ -2,20 +2,29 @@
 
 void Mtx_RotateZ(C3D_Mtx* mtx, float angle, bool bRightSide)
 {
-	C3D_Mtx rm, om;
+	float  a, b;
+	float  cosAngle = cosf(angle);
+	float  sinAngle = sinf(angle);
+	size_t i;
 
-	float cosAngle = cosf(angle);
-	float sinAngle = sinf(angle);
-
-	Mtx_Zeros(&rm);
-	rm.r[0].x = cosAngle;
-	rm.r[0].y = -sinAngle;
-	rm.r[1].x = sinAngle;
-	rm.r[1].y = cosAngle;
-	rm.r[2].z = 1.0f;
-	rm.r[3].w = 1.0f;
-
-	if (bRightSide) Mtx_Multiply(&om, mtx, &rm);
-	else            Mtx_Multiply(&om, &rm, mtx);
-	Mtx_Copy(mtx, &om);
+	if (bRightSide)
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			a = mtx->r[i].x*cosAngle + mtx->r[i].y*sinAngle;
+			b = mtx->r[i].y*cosAngle - mtx->r[i].x*sinAngle;
+			mtx->r[i].x = a;
+			mtx->r[i].y = b;
+		}
+	}
+	else
+	{
+		for (i = 0; i < 4; ++i)
+		{
+			a = mtx->r[0].c[i]*cosAngle - mtx->r[1].c[i]*sinAngle;
+			b = mtx->r[1].c[i]*cosAngle + mtx->r[0].c[i]*sinAngle;
+			mtx->r[0].c[i] = a;
+			mtx->r[1].c[i] = b;
+		}
+	}
 }
diff --git a/source/maths/mtx_scale.c b/source/maths/mtx_scale.c
index 33d8872..5d7ce60 100644
--- a/source/maths/mtx_scale.c
+++ b/source/maths/mtx_scale.c
@@ -3,7 +3,7 @@
 void Mtx_Scale(C3D_Mtx* mtx, float x, float y, float z)
 {
 	int i;
-	for (i = 0; i < 4; i ++)
+	for (i = 0; i < 4; ++i)
 	{
 		mtx->r[i].x *= x;
 		mtx->r[i].y *= y;
diff --git a/source/maths/mtx_translate.c b/source/maths/mtx_translate.c
index 0ed26d4..30098a3 100644
--- a/source/maths/mtx_translate.c
+++ b/source/maths/mtx_translate.c
@@ -1,14 +1,21 @@
 #include <c3d/maths.h>
 
-void Mtx_Translate(C3D_Mtx* mtx, float x, float y, float z)
+void Mtx_Translate(C3D_Mtx* mtx, float x, float y, float z, bool bRightSide)
 {
-	C3D_Mtx tm, om;
 
-	Mtx_Identity(&tm);
-	tm.r[0].w = x;
-	tm.r[1].w = y;
-	tm.r[2].w = z;
+	C3D_FVec v = FVec4_New(x, y, z, 1.0f);
+	int i, j;
+
+	if (bRightSide)
+	{
+		for (i = 0; i < 4; ++i)
+			mtx->r[i].w = FVec4_Dot(mtx->r[i], v);
+	}
+	else
+	{
+		for (j = 0; j < 3; ++j)
+			for (i = 0; i < 4; ++i)
+				mtx->r[j].c[i] += mtx->r[3].c[i] * v.c[3-j];
+	}
 
-	Mtx_Multiply(&om, mtx, &tm);
-	Mtx_Copy(mtx, &om);
 }
diff --git a/source/maths/quat_crossfvec3.c b/source/maths/quat_crossfvec3.c
new file mode 100644
index 0000000..2265f39
--- /dev/null
+++ b/source/maths/quat_crossfvec3.c
@@ -0,0 +1,13 @@
+#include <c3d/maths.h>
+
+C3D_FVec Quat_CrossFVec3(C3D_FQuat q, C3D_FVec  v)
+{
+	C3D_FVec qv  = FVec3_New(q.i, q.j, q.k);
+	C3D_FVec uv  = FVec3_Cross(qv, v);
+	C3D_FVec uuv = FVec3_Cross(qv, uv);
+
+	uv  = FVec3_Scale(uv,  2.0f * q.r);
+	uuv = FVec3_Scale(uuv, 2.0f);
+
+	return FVec3_Add(v, FVec3_Add(uv, uuv));
+}
diff --git a/source/maths/quat_multiply.c b/source/maths/quat_multiply.c
new file mode 100644
index 0000000..532b26f
--- /dev/null
+++ b/source/maths/quat_multiply.c
@@ -0,0 +1,11 @@
+#include <c3d/maths.h>
+
+C3D_FQuat Quat_Multiply(C3D_FQuat lhs, C3D_FQuat rhs)
+{
+	float i = lhs.r*rhs.i + lhs.i*rhs.r + lhs.j*rhs.k - lhs.k*rhs.j;
+	float j = lhs.r*rhs.j + lhs.j*rhs.r + lhs.k*rhs.i - lhs.i*rhs.k;
+	float k = lhs.r*rhs.k + lhs.k*rhs.r + lhs.i*rhs.j - lhs.j*rhs.i;
+	float r = lhs.r*rhs.r - lhs.i*rhs.i - lhs.j*rhs.j - lhs.k*rhs.k;
+
+	return Quat_New(i, j, k, r);
+}
diff --git a/source/maths/quat_pow.c b/source/maths/quat_pow.c
new file mode 100644
index 0000000..0e1f264
--- /dev/null
+++ b/source/maths/quat_pow.c
@@ -0,0 +1,23 @@
+#include <float.h>
+#include <c3d/maths.h>
+
+C3D_FQuat Quat_Pow(C3D_FQuat q, float p)
+{
+	// if the power is very near to zero, return the identity quaternion to avoid blowing up with division
+	if (p > -FLT_EPSILON && p < FLT_EPSILON)
+		return Quat_Identity();
+
+	float mag = FVec4_Magnitude(q);
+
+	// if the magnitude is very near to one, this is equivalent to raising the real component by the power
+	// also, acosf(1) == 0 and sinf(0) == 0 so you would get a divide-by-zero anyway
+	if (fabsf(q.r / mag) > 1.0f - FLT_EPSILON && fabsf(q.r / mag) < 1.0f + FLT_EPSILON)
+		return Quat_New(0.0f, 0.0f, 0.0f, powf(q.r, p));
+
+	float angle    = acosf(q.r / mag);
+	float newAngle = angle * p;
+	float div      = sinf(newAngle) / sinf(angle);
+	float Mag      = powf(mag, p - 1.0f);
+
+	return Quat_New(q.i*div*Mag, q.j*div*Mag, q.k*div*Mag, cosf(newAngle)*mag*Mag);
+}
diff --git a/source/maths/quat_rotate.c b/source/maths/quat_rotate.c
new file mode 100644
index 0000000..ba70880
--- /dev/null
+++ b/source/maths/quat_rotate.c
@@ -0,0 +1,16 @@
+#include <c3d/maths.h>
+
+C3D_FQuat Quat_Rotate(C3D_FQuat q, C3D_FVec axis, float r, bool bRightSide)
+{
+	float halfAngle = r/2.0f;
+	float s         = sinf(halfAngle);
+
+	axis = FVec3_Normalize(axis);
+
+	C3D_FQuat tmp = Quat_New(axis.x*s, axis.y*s, axis.z*s, cosf(halfAngle));
+
+	if (bRightSide)
+		return Quat_Multiply(tmp, q);
+	else
+		return Quat_Multiply(q, tmp);
+}
diff --git a/source/maths/quat_rotatex.c b/source/maths/quat_rotatex.c
new file mode 100644
index 0000000..4b0feed
--- /dev/null
+++ b/source/maths/quat_rotatex.c
@@ -0,0 +1,12 @@
+#include <c3d/maths.h>
+
+C3D_FQuat Quat_RotateX(C3D_FQuat q, float r, bool bRightSide)
+{
+	float c = cosf(r/2.0f);
+	float s = sinf(r/2.0f);
+
+	if (bRightSide)
+		return Quat_New(q.r*s + q.i*c, q.j*c - q.k*s, q.k*c + q.j*s, q.r*c - q.i*s);
+	else
+		return Quat_New(q.r*s + q.i*c, q.j*c + q.k*s, q.k*c - q.j*s, q.r*c - q.i*s);
+}
diff --git a/source/maths/quat_rotatey.c b/source/maths/quat_rotatey.c
new file mode 100644
index 0000000..c3a7993
--- /dev/null
+++ b/source/maths/quat_rotatey.c
@@ -0,0 +1,12 @@
+#include <c3d/maths.h>
+
+C3D_FQuat Quat_RotateY(C3D_FQuat q, float r, bool bRightSide)
+{
+	float c = cosf(r/2.0f);
+	float s = sinf(r/2.0f);
+
+	if (bRightSide)
+		return Quat_New(q.i*c + q.k*s, q.r*s + q.j*c, q.k*c - q.i*s, q.r*c - q.j*s);
+	else
+		return Quat_New(q.i*c - q.k*s, q.r*s + q.j*c, q.k*c + q.i*s, q.r*c - q.j*s);
+}
diff --git a/source/maths/quat_rotatez.c b/source/maths/quat_rotatez.c
new file mode 100644
index 0000000..09c92ec
--- /dev/null
+++ b/source/maths/quat_rotatez.c
@@ -0,0 +1,12 @@
+#include <c3d/maths.h>
+
+C3D_FQuat Quat_RotateZ(C3D_FQuat q, float r, bool bRightSide)
+{
+	float c = cosf(r/2.0f);
+	float s = sinf(r/2.0f);
+
+	if (bRightSide)
+		return Quat_New(q.i*c - q.j*s, q.j*c + q.i*s, q.r*s + q.k*c, q.r*c - q.k*s);
+	else
+		return Quat_New(q.i*c + q.j*s, q.j*c - q.i*s, q.r*s + q.k*c, q.r*c - q.k*s);
+}
diff --git a/source/mtxstack.c b/source/mtxstack.c
index ff3fe06..87b9a06 100644
--- a/source/mtxstack.c
+++ b/source/mtxstack.c
@@ -17,18 +17,19 @@ void MtxStack_Bind(C3D_MtxStack* stk, GPU_SHADER_TYPE unifType, int unifPos, int
 	stk->isDirty = true;
 }
 
-void MtxStack_Push(C3D_MtxStack* stk)
+C3D_Mtx* MtxStack_Push(C3D_MtxStack* stk)
 {
-	if (stk->pos == (C3D_MTXSTACK_SIZE-1)) return;
+	if (stk->pos == (C3D_MTXSTACK_SIZE-1)) return NULL;
 	stk->pos ++;
 	Mtx_Copy(&stk->m[stk->pos], &stk->m[stk->pos-1]);
+	return MtxStack_Cur(stk);
 }
 
-void MtxStack_Pop(C3D_MtxStack* stk)
+C3D_Mtx* MtxStack_Pop(C3D_MtxStack* stk)
 {
-	if (stk->pos == 0) return;
+	if (stk->pos == 0) return NULL;
 	stk->pos --;
-	stk->isDirty = true;
+	return MtxStack_Cur(stk);
 }
 
 void MtxStack_Update(C3D_MtxStack* stk)
diff --git a/test/3ds/Makefile b/test/3ds/Makefile
new file mode 100644
index 0000000..dc61171
--- /dev/null
+++ b/test/3ds/Makefile
@@ -0,0 +1,249 @@
+#---------------------------------------------------------------------------------
+.SUFFIXES:
+#---------------------------------------------------------------------------------
+
+ifeq ($(strip $(DEVKITARM)),)
+$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
+endif
+
+3DSTEX := 3dstex
+TOPDIR ?= $(CURDIR)
+include $(DEVKITARM)/3ds_rules
+
+#---------------------------------------------------------------------------------
+# TARGET is the name of the output
+# BUILD is the directory where object files & intermediate files will be placed
+# SOURCES is a list of directories containing source code
+# DATA is a list of directories containing data files
+# INCLUDES is a list of directories containing header files
+#
+# NO_SMDH: if set to anything, no SMDH file is generated.
+# ROMFS is the directory which contains the RomFS, relative to the Makefile (Optional)
+# APP_TITLE is the name of the app stored in the SMDH file (Optional)
+# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
+# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
+# ICON is the filename of the icon (.png), relative to the project folder.
+#   If not set, it attempts to use one of the following (in this order):
+#     - <Project name>.png
+#     - icon.png
+#     - <libctru folder>/default_icon.png
+#---------------------------------------------------------------------------------
+TARGET   := citro3d_test
+BUILD    := build
+SOURCES  := source
+GRAPHICS := gfx
+DATA     := data
+INCLUDES := include
+ROMFS    := romfs
+
+APP_TITLE       := citro3d test
+APP_DESCRIPTION := v1.0
+APP_AUTHOR      := mtheall
+ICON            :=
+
+#---------------------------------------------------------------------------------
+# options for code generation
+#---------------------------------------------------------------------------------
+ARCH     := -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft
+
+CFLAGS   := -g -Wall -O3 -mword-relocations \
+            -fomit-frame-pointer -ffunction-sections \
+            $(ARCH)
+
+CFLAGS   +=  $(INCLUDE) -DARM11 -D_3DS
+
+CXXFLAGS := $(CFLAGS) -fno-rtti -std=gnu++11
+
+ASFLAGS  := -g $(ARCH)
+LDFLAGS   = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(TARGET).map
+
+LIBS     := -lcitro3d -lctru
+
+#---------------------------------------------------------------------------------
+# list of directories containing libraries, this must be the top level containing
+# include and lib
+#---------------------------------------------------------------------------------
+LIBDIRS  := $(CTRULIB) #$(CURDIR)/../..
+
+
+#---------------------------------------------------------------------------------
+# no real need to edit anything past this point unless you need to add additional
+# rules for different file extensions
+#---------------------------------------------------------------------------------
+ifneq ($(BUILD),$(notdir $(CURDIR)))
+#---------------------------------------------------------------------------------
+
+export OUTPUT  :=  $(CURDIR)/$(TARGET)
+export TOPDIR  :=  $(CURDIR)
+
+export VPATH   :=  $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
+                   $(foreach dir,$(DATA),$(CURDIR)/$(dir)) \
+                   $(foreach dir,$(GRAPHICS),$(CURDIR)/$(dir))
+
+export DEPSDIR :=  $(CURDIR)/$(BUILD)
+
+CFILES    := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
+CXXFILES  := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
+SFILES    := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
+PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.v.pica)))
+BINFILES  := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
+PNGFILES  := $(foreach dir,$(GRAPHICS),$(notdir $(wildcard $(dir)/*.png)))
+
+#---------------------------------------------------------------------------------
+# use CXX for linking C++ projects, CC for standard C
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(CXXFILES)),)
+  export LD := $(CC)
+else
+  export LD := $(CXX)
+endif
+#---------------------------------------------------------------------------------
+
+export OFILES   := $(addsuffix .o,$(BINFILES)) \
+                   $(PICAFILES:.v.pica=.shbin.o) \
+                   $(CXXFILES:.cpp=.o) \
+                   $(CFILES:.c=.o) \
+                   $(SFILES:.s=.o)
+
+export INCLUDE  := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
+                   $(foreach dir,$(LIBDIRS),-I$(dir)/include) \
+                   -I$(CURDIR)/$(BUILD)
+
+export LIBPATHS :=  $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
+
+PNGROMFS := $(addprefix romfs/,$(PNGFILES:.png=.bin))
+
+ifeq ($(strip $(ICON)),)
+	icons := $(wildcard *.png)
+	ifneq (,$(findstring $(TARGET).png,$(icons)))
+		export APP_ICON := $(TOPDIR)/$(TARGET).png
+	else
+		ifneq (,$(findstring icon.png,$(icons)))
+			export APP_ICON := $(TOPDIR)/icon.png
+		endif
+	endif
+else
+	export APP_ICON := $(TOPDIR)/$(ICON)
+endif
+
+ifeq ($(strip $(NO_SMDH)),)
+	export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
+endif
+
+ifneq ($(ROMFS),)
+	export _3DSXFLAGS += --romfs=$(CURDIR)/$(ROMFS)
+endif
+
+.PHONY: $(BUILD) clean all
+
+#---------------------------------------------------------------------------------
+all: $(BUILD)
+
+$(BUILD): $(PNGROMFS)
+	@[ -d $@ ] || mkdir -p $@
+	@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
+
+#---------------------------------------------------------------------------------
+clean:
+	@echo clean ...
+	@rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf $(PNGROMFS)
+
+$(ROMFS)/%.rgba.bin: %.rgba.png
+	@$(3DSTEX) -o $@ --rgba $<
+
+$(ROMFS)/%.rgb.bin: %.rgb.png
+	@$(3DSTEX) -o $@ --rgb $<
+
+$(ROMFS)/%.rgba5551.bin: %.rgba5551.png
+	@$(3DSTEX) -o $@ --rgba5551 $<
+
+$(ROMFS)/%.rgb565.bin: %.rgb565.png
+	@$(3DSTEX) -o $@ --rgb565 $<
+
+$(ROMFS)/%.rgba4.bin: %.rgba4.png
+	@$(3DSTEX) -o $@ --rgba4 $<
+
+$(ROMFS)/%.la.bin: %.la.png
+	@$(3DSTEX) -o $@ --la $<
+
+$(ROMFS)/%.hilo.bin: %.hilo.png
+	@$(3DSTEX) -o $@ --hilo $<
+
+$(ROMFS)/%.l.bin: %.l.png
+	@$(3DSTEX) -o $@ --l $<
+
+$(ROMFS)/%.a.bin: %.a.png
+	@$(3DSTEX) -o $@ --a $<
+
+$(ROMFS)/%.la4.bin: %.la4.png
+	@$(3DSTEX) -o $@ --la4 $<
+
+$(ROMFS)/%.l4.bin: %.l4.png
+	@$(3DSTEX) -o $@ --l4 $<
+
+$(ROMFS)/%.a4.bin: %.a4.png
+	@$(3DSTEX) -o $@ --a4 $<
+
+$(ROMFS)/%.etc1.bin: %.etc1.png
+	@$(3DSTEX) -o $@ --etc1 $<
+
+$(ROMFS)/%.etc1a4.bin: %.etc1a4.png
+	@$(3DSTEX) -o $@ --etc1a4 $<
+
+$(ROMFS)/%.bin: %.png
+	@$(3DSTEX) -o $@ $<
+
+#---------------------------------------------------------------------------------
+else
+
+DEPENDS := $(OFILES:.o=.d)
+
+#---------------------------------------------------------------------------------
+# main targets
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(NO_SMDH)),)
+.PHONY: all
+all	:	$(OUTPUT).3dsx $(OUTPUT).smdh
+$(OUTPUT).smdh : $(TOPDIR)/Makefile
+$(OUTPUT).3dsx: $(OUTPUT).smdh
+endif
+$(OUTPUT).3dsx: $(OUTPUT).elf
+$(OUTPUT).elf:  $(OFILES)
+
+#---------------------------------------------------------------------------------
+# you need a rule like this for each extension you use as binary data
+#---------------------------------------------------------------------------------
+%.bin.o: %.bin
+#---------------------------------------------------------------------------------
+	@echo $(notdir $<)
+	@$(bin2o)
+
+#---------------------------------------------------------------------------------
+# rules for assembling GPU shaders
+#---------------------------------------------------------------------------------
+define shader-as
+	$(eval CURBIN := $(patsubst %.shbin.o,%.shbin,$(notdir $@)))
+	picasso -o $(CURBIN) $1
+	bin2s $(CURBIN) | $(AS) -o $@
+	echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
+	echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
+	echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
+endef
+
+%.shbin.o : %.v.pica %.g.pica
+	@echo $(notdir $^)
+	@$(call shader-as,$^)
+
+%.shbin.o : %.v.pica
+	@echo $(notdir $<)
+	@$(call shader-as,$<)
+
+%.shbin.o : %.shlist
+	@echo $(notdir $<)
+	@$(call shader-as,$(foreach file,$(shell cat $<),$(dir $<)/$(file)))
+
+-include $(DEPENDS)
+
+#---------------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------------
diff --git a/test/3ds/logo48.png b/test/3ds/logo48.png
new file mode 100644
index 0000000..e754bc3
Binary files /dev/null and b/test/3ds/logo48.png differ
diff --git a/test/3ds/logo64.png b/test/3ds/logo64.png
new file mode 100644
index 0000000..c16191c
Binary files /dev/null and b/test/3ds/logo64.png differ
diff --git a/test/3ds/romfs/logo.bin b/test/3ds/romfs/logo.bin
new file mode 100644
index 0000000..621baff
Binary files /dev/null and b/test/3ds/romfs/logo.bin differ
diff --git a/test/3ds/source/main.cpp b/test/3ds/source/main.cpp
new file mode 100644
index 0000000..ec8f7f5
--- /dev/null
+++ b/test/3ds/source/main.cpp
@@ -0,0 +1,927 @@
+#include <algorithm>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <3ds.h>
+#include <citro3d.h>
+
+#include "vshader_shbin.h"
+
+#define CLEAR_COLOR 0x777777FF
+
+#define DISPLAY_TRANSFER_FLAGS \
+  (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+   GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+   GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+
+namespace
+{
+
+template<class T>
+inline T clamp(T val, T min, T max)
+{
+  return std::max(min, std::min(max, val));
+}
+
+typedef struct
+{
+  float position[3];
+  float texcoord[2];
+  float normal[3];
+} attribute_t;
+
+const attribute_t attribute_list[] =
+{
+  { { -0.5f, -0.5f,  0.5f }, { 0.0f, 0.0f }, { 0.0f, 0.0f,  1.0f } },
+  { {  0.5f, -0.5f,  0.5f }, { 1.0f, 0.0f }, { 0.0f, 0.0f,  1.0f } },
+  { {  0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, { 0.0f, 0.0f,  1.0f } },
+  { {  0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, { 0.0f, 0.0f,  1.0f } },
+  { { -0.5f,  0.5f,  0.5f }, { 0.0f, 1.0f }, { 0.0f, 0.0f,  1.0f } },
+  { { -0.5f, -0.5f,  0.5f }, { 0.0f, 0.0f }, { 0.0f, 0.0f,  1.0f } },
+
+  { { -0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, { 0.0f, 0.0f, -1.0f } },
+  { { -0.5f,  0.5f, -0.5f }, { 1.0f, 0.0f }, { 0.0f, 0.0f, -1.0f } },
+  { {  0.5f,  0.5f, -0.5f }, { 1.0f, 1.0f }, { 0.0f, 0.0f, -1.0f } },
+  { {  0.5f,  0.5f, -0.5f }, { 1.0f, 1.0f }, { 0.0f, 0.0f, -1.0f } },
+  { {  0.5f, -0.5f, -0.5f }, { 0.0f, 1.0f }, { 0.0f, 0.0f, -1.0f } },
+  { { -0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, { 0.0f, 0.0f, -1.0f } },
+
+  { {  0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, {  1.0f, 0.0f, 0.0f } },
+  { {  0.5f,  0.5f, -0.5f }, { 1.0f, 0.0f }, {  1.0f, 0.0f, 0.0f } },
+  { {  0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, {  1.0f, 0.0f, 0.0f } },
+  { {  0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, {  1.0f, 0.0f, 0.0f } },
+  { {  0.5f, -0.5f,  0.5f }, { 0.0f, 1.0f }, {  1.0f, 0.0f, 0.0f } },
+  { {  0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, {  1.0f, 0.0f, 0.0f } },
+
+  { { -0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, { -1.0f, 0.0f, 0.0f } },
+  { { -0.5f, -0.5f,  0.5f }, { 1.0f, 0.0f }, { -1.0f, 0.0f, 0.0f } },
+  { { -0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, { -1.0f, 0.0f, 0.0f } },
+  { { -0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, { -1.0f, 0.0f, 0.0f } },
+  { { -0.5f,  0.5f, -0.5f }, { 0.0f, 1.0f }, { -1.0f, 0.0f, 0.0f } },
+  { { -0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, { -1.0f, 0.0f, 0.0f } },
+
+  { { -0.5f,  0.5f, -0.5f }, { 0.0f, 0.0f }, { 0.0f,  1.0f, 0.0f } },
+  { { -0.5f,  0.5f,  0.5f }, { 1.0f, 0.0f }, { 0.0f,  1.0f, 0.0f } },
+  { {  0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, { 0.0f,  1.0f, 0.0f } },
+  { {  0.5f,  0.5f,  0.5f }, { 1.0f, 1.0f }, { 0.0f,  1.0f, 0.0f } },
+  { {  0.5f,  0.5f, -0.5f }, { 0.0f, 1.0f }, { 0.0f,  1.0f, 0.0f } },
+  { { -0.5f,  0.5f, -0.5f }, { 0.0f, 0.0f }, { 0.0f,  1.0f, 0.0f } },
+
+  { { -0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, { 0.0f, -1.0f, 0.0f } },
+  { {  0.5f, -0.5f, -0.5f }, { 1.0f, 0.0f }, { 0.0f, -1.0f, 0.0f } },
+  { {  0.5f, -0.5f,  0.5f }, { 1.0f, 1.0f }, { 0.0f, -1.0f, 0.0f } },
+  { {  0.5f, -0.5f,  0.5f }, { 1.0f, 1.0f }, { 0.0f, -1.0f, 0.0f } },
+  { { -0.5f, -0.5f,  0.5f }, { 0.0f, 1.0f }, { 0.0f, -1.0f, 0.0f } },
+  { { -0.5f, -0.5f, -0.5f }, { 0.0f, 0.0f }, { 0.0f, -1.0f, 0.0f } },
+};
+
+#define attribute_list_count (sizeof(attribute_list)/sizeof(attribute_list[0]))
+
+int uLoc_projection, uLoc_modelView, uLoc_texView;
+int uLoc_lightVec, uLoc_lightHalfVec, uLoc_lightClr, uLoc_material;
+C3D_Mtx material =
+{
+  {
+    { { 0.0f, 0.2f, 0.2f, 0.2f } }, // Ambient
+    { { 0.0f, 0.4f, 0.4f, 0.4f } }, // Diffuse
+    { { 0.0f, 0.8f, 0.8f, 0.8f } }, // Specular
+    { { 1.0f, 0.0f, 0.0f, 0.0f } }, // Emission
+  }
+};
+
+struct
+{
+  C3D_Tex      tex;
+  const char   *path;
+  size_t       width, height;
+  GPU_TEXCOLOR format;
+} texture[] =
+{
+  { {}, "romfs:/logo.bin",  64, 64, GPU_RGBA8, },
+};
+
+#define num_textures (sizeof(texture)/sizeof(texture[0]))
+  
+void *vbo_data;
+
+void sceneInit(shaderProgram_s *program)
+{
+  uLoc_projection   = shaderInstanceGetUniformLocation(program->vertexShader, "projection");
+  uLoc_modelView    = shaderInstanceGetUniformLocation(program->vertexShader, "modelView");
+  uLoc_texView      = shaderInstanceGetUniformLocation(program->vertexShader, "texView");
+  uLoc_lightVec     = shaderInstanceGetUniformLocation(program->vertexShader, "lightVec");
+  uLoc_lightHalfVec = shaderInstanceGetUniformLocation(program->vertexShader, "lightHalfVec");
+  uLoc_lightClr     = shaderInstanceGetUniformLocation(program->vertexShader, "lightClr");
+  uLoc_material     = shaderInstanceGetUniformLocation(program->vertexShader, "material");
+
+  // Configure attributes for use with the vertex shader
+  C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
+  AttrInfo_Init(attrInfo);
+  AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
+  AttrInfo_AddLoader(attrInfo, 1, GPU_FLOAT, 2); // v1=texcoord
+  AttrInfo_AddLoader(attrInfo, 2, GPU_FLOAT, 3); // v2=normal
+
+  // Create the VBO (vertex buffer object)
+  vbo_data = linearAlloc(sizeof(attribute_list));
+  std::memcpy(vbo_data, attribute_list, sizeof(attribute_list));
+
+  // Configure buffers
+  C3D_BufInfo* bufInfo = C3D_GetBufInfo();
+  BufInfo_Init(bufInfo);
+  BufInfo_Add(bufInfo, vbo_data, sizeof(attribute_t), 3, 0x210);
+  
+  // Load the texture and bind it to the first texture unit
+  for(size_t i = 0; i < num_textures; ++i)
+  {
+    struct stat st;
+    int         fd = ::open(texture[i].path, O_RDONLY);
+    ::fstat(fd, &st);
+
+    size_t size = st.st_size;
+
+    void *buffer = std::malloc(size);
+    void *p      = buffer;
+  
+    while(size > 0)
+    {
+      ssize_t rc = ::read(fd, p, size);
+      if(rc <= 0 || static_cast<size_t>(rc) > size)
+        break;
+
+      size -= rc;
+      p    =  reinterpret_cast<char*>(p) + rc;
+    }
+
+    ::close(fd);
+
+    C3D_TexInit(&texture[i].tex, texture[i].width, texture[i].height, texture[i].format);
+    C3D_TexUpload(&texture[i].tex, buffer);
+    C3D_TexSetFilter(&texture[i].tex, GPU_LINEAR, GPU_NEAREST);
+
+    ::free(buffer);
+  }
+
+  C3D_TexBind(0, &texture[0].tex);
+
+  // Configure the first fragment shading substage to blend the texture color with
+  // the vertex color (calculated by the vertex shader using a lighting algorithm)
+  // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+  C3D_TexEnv* env = C3D_GetTexEnv(0);
+  C3D_TexEnvSrc(env, C3D_Both, GPU_TEXTURE0, GPU_PRIMARY_COLOR, 0);
+  C3D_TexEnvOp(env, C3D_Both, 0, 0, 0);
+  C3D_TexEnvFunc(env, C3D_Both, GPU_MODULATE);
+}
+
+void sceneExit()
+{
+  for(size_t i = 0; i < num_textures; ++i)
+    C3D_TexDelete(&texture[i].tex);
+
+  linearFree(vbo_data);
+}
+
+void persp_tilt_test()
+{
+  C3D_RenderTarget *top;
+  C3D_Mtx          projection;
+  C3D_Mtx          modelView;
+  C3D_Mtx          texView;
+  float            x = 0.0f, y = 0.0f, z = -2.0f,
+                   old_x = x, old_y = y, old_z = z;
+  float            angle = 0.0f;
+
+  top = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(top, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetOutput(top, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+
+  Mtx_PerspTilt(&projection, 60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f);
+
+  Mtx_Identity(&modelView);
+  Mtx_Translate(&modelView, x, y, z, true);
+
+  Mtx_Identity(&texView);
+
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView,  &modelView);
+  C3D_FVUnifMtx2x4(GPU_VERTEX_SHADER, uLoc_texView,    &texView);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_material,   &material);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightVec,     0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightHalfVec, 0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightClr,     1.0f, 1.0f,  1.0f, 1.0f);
+
+  C3D_TexBind(0, &texture[0].tex);
+
+  std::printf("\x1b[2J");
+  std::printf("(LEFT/RIGHT) x %.1f\n", x);
+  std::printf("(UP/DOWN)    y %.1f\n", y);
+  std::printf("(L/R)        z %.1f\n", z);
+
+  while(aptMainLoop())
+  {
+    gspWaitForVBlank();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+    u32 held = hidKeysHeld();
+    if(down & (KEY_START|KEY_SELECT))
+      break;
+
+    old_x = x;
+    old_y = y;
+    old_z = z;
+
+    if((down | held) & KEY_LEFT)
+      x = clamp(x - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_RIGHT)
+      x = clamp(x + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_UP)
+      y = clamp(y + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_DOWN)
+      y = clamp(y - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_L)
+      z = clamp(z + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_R)
+      z = clamp(z - 0.1f, -10.0f, 10.0f);
+
+    if((x != old_x) || (y != old_y) || (z != old_z))
+    {
+      std::printf("\x1b[0;0H");
+      std::printf("(LEFT/RIGHT) x %.1f\n", x);
+      std::printf("(UP/DOWN)    y %.1f\n", y);
+      std::printf("(L/R)        z %.1f\n", z);
+    }
+
+    Mtx_Identity(&modelView);
+    Mtx_Translate(&modelView, x, y, z, true);
+    Mtx_RotateY(&modelView, angle*M_TAU/360.0f, true);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    angle += 1.0f;
+    if(angle >= 360.0f)
+      angle = 0.0f;
+
+    C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+    C3D_FrameDrawOn(top);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+    C3D_FrameEnd(0);
+  }
+
+  C3D_RenderTargetDelete(top);
+}
+
+void ortho_tilt_test()
+{
+  C3D_RenderTarget *top;
+  C3D_Mtx          projection;
+  C3D_Mtx          modelView;
+  C3D_Mtx          texView;
+  float            x = 0.0f, y = 0.0f, z = 0.0f,
+                   old_x = x, old_y = y, old_z = z;
+  float            angle = 0.0f;
+
+  top = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(top, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetOutput(top, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+
+  Mtx_OrthoTilt(&projection, 0.0f, 400.0f, 0.0f, 240.0f, 100.0f, -100.0f);
+
+  Mtx_Identity(&texView);
+
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView,  &modelView);
+  C3D_FVUnifMtx2x4(GPU_VERTEX_SHADER, uLoc_texView,    &texView);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_material,   &material);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightVec,     0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightHalfVec, 0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightClr,     1.0f, 1.0f,  1.0f, 1.0f);
+
+  C3D_TexBind(0, &texture[0].tex);
+
+  std::printf("\x1b[2J");
+  std::printf("(LEFT/RIGHT) x %.1f\n", x);
+  std::printf("(UP/DOWN)    y %.1f\n", y);
+  std::printf("(L/R)        z %.1f\n", z);
+
+  while(aptMainLoop())
+  {
+    gspWaitForVBlank();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+    u32 held = hidKeysHeld();
+    if(down & (KEY_START|KEY_SELECT))
+      break;
+
+    old_x = x;
+    old_y = y;
+    old_z = z;
+
+    if((down | held) & KEY_LEFT)
+      x = clamp(x - 1.0f, 0.0f, 400.0f);
+    if((down | held) & KEY_RIGHT)
+      x = clamp(x + 1.0f, 0.0f, 400.0f);
+    if((down | held) & KEY_UP)
+      y = clamp(y + 1.0f, 0.0f, 240.0f);
+    if((down | held) & KEY_DOWN)
+      y = clamp(y - 1.0f, 0.0f, 240.0f);
+    if((down | held) & KEY_L)
+      z = clamp(z + 1.0f, -100.0f, 100.0f);
+    if((down | held) & KEY_R)
+      z = clamp(z - 1.0f, -100.0f, 100.0f);
+
+    if((x != old_x) || (y != old_y) || (z != old_z))
+    {
+      std::printf("\x1b[0;0H");
+      std::printf("(LEFT/RIGHT) x %.1f\n", x);
+      std::printf("(UP/DOWN)    y %.1f\n", y);
+      std::printf("(L/R)        z %.1f\n", z);
+    }
+
+    Mtx_Identity(&modelView);
+    Mtx_Translate(&modelView, x, y, z, true);
+    Mtx_Scale(&modelView, 64.0f, 64.0f, 64.0f);
+    Mtx_RotateY(&modelView, angle*M_TAU/360.0f, true);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    angle += 1.0f;
+    if(angle >= 360.0f)
+      angle = 0.0f;
+
+    C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+    C3D_FrameDrawOn(top);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+    C3D_FrameEnd(0);
+  }
+
+  C3D_RenderTargetDelete(top);
+}
+
+void stereo_tilt_test()
+{
+  C3D_RenderTarget *topLeft, *topRight;
+  C3D_Mtx          projLeft, projRight;
+  C3D_Mtx          modelView;
+  C3D_Mtx          texView;
+  float            x = 0.0f, y = 0.0f, z = -2.0f,
+                   old_x = x, old_y = y, old_z = z;
+  float            iod = osGet3DSliderState(), old_iod = iod;
+  float            focLen = 2.0f, old_focLen = focLen;
+  float            angle = 0.0f;
+
+  gfxSet3D(true);
+
+  topLeft  = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  topRight = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(topLeft,  C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetClear(topRight, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetOutput(topLeft,  GFX_TOP, GFX_LEFT,  DISPLAY_TRANSFER_FLAGS);
+  C3D_RenderTargetSetOutput(topRight, GFX_TOP, GFX_RIGHT, DISPLAY_TRANSFER_FLAGS);
+
+  Mtx_PerspStereoTilt(&projLeft,  60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f, -iod, focLen);
+  Mtx_PerspStereoTilt(&projRight, 60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f,  iod, focLen);
+
+  Mtx_Identity(&texView);
+
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView,  &modelView);
+  C3D_FVUnifMtx2x4(GPU_VERTEX_SHADER, uLoc_texView,    &texView);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_material,   &material);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightVec,     0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightHalfVec, 0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightClr,     1.0f, 1.0f,  1.0f, 1.0f);
+
+  C3D_TexBind(0, &texture[0].tex);
+
+  std::printf("\x1b[2J");
+  std::printf("(LEFT/RIGHT) x      %.1f\n", x);
+  std::printf("(UP/DOWN)    y      %.1f\n", y);
+  std::printf("(L/R)        z      %.1f\n", z);
+  std::printf("(Y/A)        focLen %.1f\n", focLen);
+  std::printf("(3D Slider)  iod    %.1f\n", iod);
+
+  while(aptMainLoop())
+  {
+    gspWaitForVBlank();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+    u32 held = hidKeysHeld();
+    if(down & (KEY_START|KEY_SELECT))
+      break;
+
+    old_x      = x;
+    old_y      = y;
+    old_z      = z;
+    old_focLen = focLen;
+    old_iod    = iod;
+
+    if((down | held) & KEY_LEFT)
+      x = clamp(x - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_RIGHT)
+      x = clamp(x + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_UP)
+      y = clamp(y + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_DOWN)
+      y = clamp(y - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_L)
+      z = clamp(z + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_R)
+      z = clamp(z - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_Y)
+      focLen = clamp(focLen - 0.1f, 0.1f, 10.0f);
+    if((down | held) & KEY_A)
+      focLen = clamp(focLen + 0.1f, 0.1f, 10.0f);
+
+    iod = osGet3DSliderState();
+
+    if((x != old_x) || (y != old_y) || (z != old_z)
+    || (focLen != old_focLen) || (iod != old_iod))
+    {
+      std::printf("\x1b[0;0H");
+      std::printf("(LEFT/RIGHT) x      %.1f\n", x);
+      std::printf("(UP/DOWN)    y      %.1f\n", y);
+      std::printf("(L/R)        z      %.1f\n", z);
+      std::printf("(Y/A)        focLen %.1f\n", focLen);
+      std::printf("(3D Slider)  iod    %.1f\n", iod);
+
+      Mtx_PerspStereoTilt(&projLeft,  60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f, -iod, focLen);
+      Mtx_PerspStereoTilt(&projRight, 60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f,  iod, focLen);
+    }
+
+    Mtx_Identity(&modelView);
+    Mtx_Translate(&modelView, x, y, z, true);
+    Mtx_RotateY(&modelView, angle*M_TAU/360.0f, true);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    angle += 1.0f;
+    if(angle >= 360.0f)
+      angle = 0.0f;
+
+    C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+
+    C3D_FrameDrawOn(topLeft);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projLeft);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+
+    if(iod > 0.0f)
+    {
+      C3D_FrameDrawOn(topRight);
+      C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projRight);
+      C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+    }
+
+    C3D_FrameEnd(0);
+  }
+
+  C3D_RenderTargetDelete(topLeft);
+  C3D_RenderTargetDelete(topRight);
+
+  gfxSet3D(false);
+}
+
+void persp_test()
+{
+  C3D_RenderTarget *top, *tex;
+  C3D_Mtx          projTop, projTex;
+  C3D_Mtx          modelView;
+  C3D_Mtx          texView;
+  float            x = 0.0f, y = 0.0f, z = -2.0f,
+                   old_x = x, old_y = y, old_z = z;
+  float            angle = 0.0f;
+
+  top = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(top, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetOutput(top, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+
+  tex = C3D_RenderTargetCreate(512, 256, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(tex, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_TexSetFilter(&tex->renderBuf.colorBuf, GPU_LINEAR, GPU_NEAREST);
+
+  Mtx_Persp(&projTex, 60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f);
+
+  Mtx_Identity(&modelView);
+  Mtx_Translate(&modelView, x, y, z, true);
+
+  Mtx_OrthoTilt(&projTop, -0.5f, 0.5f, -0.5f, 0.5f, 100.0f, -100.0f);
+  Mtx_Identity(&texView);
+
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView,  &modelView);
+  C3D_FVUnifMtx2x4(GPU_VERTEX_SHADER, uLoc_texView,    &texView);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_material,   &material);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightVec,     0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightHalfVec, 0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightClr,     1.0f, 1.0f,  1.0f, 1.0f);
+
+  std::printf("\x1b[2J");
+  std::printf("(LEFT/RIGHT) x %.1f\n", x);
+  std::printf("(UP/DOWN)    y %.1f\n", y);
+  std::printf("(L/R)        z %.1f\n", z);
+
+  while(aptMainLoop())
+  {
+    gspWaitForVBlank();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+    u32 held = hidKeysHeld();
+    if(down & (KEY_START|KEY_SELECT))
+      break;
+
+    old_x      = x;
+    old_y      = y;
+    old_z      = z;
+
+    if((down | held) & KEY_LEFT)
+      x = clamp(x - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_RIGHT)
+      x = clamp(x + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_UP)
+      y = clamp(y + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_DOWN)
+      y = clamp(y - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_L)
+      z = clamp(z + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_R)
+      z = clamp(z - 0.1f, -10.0f, 10.0f);
+
+    if((x != old_x) || (y != old_y) || (z != old_z))
+    {
+      std::printf("\x1b[0;0H");
+      std::printf("(LEFT/RIGHT) x %.1f\n", x);
+      std::printf("(UP/DOWN)    y %.1f\n", y);
+      std::printf("(L/R)        z %.1f\n", z);
+
+      Mtx_Persp(&projTex,  60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f);
+    }
+
+    Mtx_Identity(&modelView);
+    Mtx_Translate(&modelView, x, y, z, true);
+    Mtx_RotateY(&modelView, angle*M_TAU/360.0f, true);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    angle += 1.0f;
+    if(angle >= 360.0f)
+      angle = 0.0f;
+
+    C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+
+    C3D_TexBind(0, &texture[0].tex);
+
+    C3D_FrameDrawOn(tex);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projTex);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projTop);
+    Mtx_Identity(&modelView);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    C3D_FrameDrawOn(top);
+    C3D_TexBind(0, &tex->renderBuf.colorBuf);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, 6);
+    C3D_FrameEnd(0);
+  }
+
+  C3D_RenderTargetDelete(top);
+  C3D_RenderTargetDelete(tex);
+}
+
+void stereo_test()
+{
+  C3D_RenderTarget     *topLeft, *topRight, *texLeft, *texRight;
+  C3D_Mtx              projLeft, projRight, proj;
+  C3D_Mtx              modelView;
+  C3D_Mtx              texView;
+  float                x = 0.0f, y = 0.0f, z = -2.0f,
+                       old_x = x, old_y = y, old_z = z;
+  float                iod = osGet3DSliderState(), old_iod = iod;
+  float                focLen = 2.0f, old_focLen = focLen;
+  float                angle = 0.0f;
+
+  gfxSet3D(true);
+
+  topLeft  = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  topRight = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(topLeft,  C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetClear(topRight, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetOutput(topLeft,  GFX_TOP, GFX_LEFT,  DISPLAY_TRANSFER_FLAGS);
+  C3D_RenderTargetSetOutput(topRight, GFX_TOP, GFX_RIGHT, DISPLAY_TRANSFER_FLAGS);
+
+  texLeft  = C3D_RenderTargetCreate(512, 256, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  texRight = C3D_RenderTargetCreate(512, 256, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(texLeft,  C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetClear(texRight, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_TexSetFilter(&texLeft->renderBuf.colorBuf, GPU_LINEAR, GPU_NEAREST);
+  C3D_TexSetFilter(&texRight->renderBuf.colorBuf, GPU_LINEAR, GPU_NEAREST);
+
+  Mtx_PerspStereo(&projLeft,  60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f, -iod, focLen);
+  Mtx_PerspStereo(&projRight, 60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f,  iod, focLen);
+
+  Mtx_Identity(&modelView);
+  Mtx_Translate(&modelView, x, y, z, true);
+
+  Mtx_OrthoTilt(&proj, -0.5f, 0.5f, -0.5f, 0.5f, 100.0f, -100.0f);
+  Mtx_Identity(&texView);
+
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView,  &modelView);
+  C3D_FVUnifMtx2x4(GPU_VERTEX_SHADER, uLoc_texView,    &texView);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_material,   &material);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightVec,     0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightHalfVec, 0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightClr,     1.0f, 1.0f,  1.0f, 1.0f);
+
+  std::printf("\x1b[2J");
+  std::printf("(LEFT/RIGHT) x      %.1f\n", x);
+  std::printf("(UP/DOWN)    y      %.1f\n", y);
+  std::printf("(L/R)        z      %.1f\n", z);
+  std::printf("(Y/A)        focLen %.1f\n", focLen);
+  std::printf("(3D Slider)  iod    %.1f\n", iod);
+
+  while(aptMainLoop())
+  {
+    gspWaitForVBlank();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+    u32 held = hidKeysHeld();
+    if(down & (KEY_START|KEY_SELECT))
+      break;
+
+    old_x      = x;
+    old_y      = y;
+    old_z      = z;
+    old_focLen = focLen;
+    old_iod    = iod;
+
+    if((down | held) & KEY_LEFT)
+      x = clamp(x - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_RIGHT)
+      x = clamp(x + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_UP)
+      y = clamp(y + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_DOWN)
+      y = clamp(y - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_L)
+      z = clamp(z + 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_R)
+      z = clamp(z - 0.1f, -10.0f, 10.0f);
+    if((down | held) & KEY_Y)
+      focLen = clamp(focLen - 0.1f, 0.1f, 10.0f);
+    if((down | held) & KEY_A)
+      focLen = clamp(focLen + 0.1f, 0.1f, 10.0f);
+
+    iod = osGet3DSliderState();
+
+    if((x != old_x) || (y != old_y) || (z != old_z)
+    || (focLen != old_focLen) || (iod != old_iod))
+    {
+      std::printf("\x1b[0;0H");
+      std::printf("(LEFT/RIGHT) x      %.1f\n", x);
+      std::printf("(UP/DOWN)    y      %.1f\n", y);
+      std::printf("(L/R)        z      %.1f\n", z);
+      std::printf("(Y/A)        focLen %.1f\n", focLen);
+      std::printf("(3D Slider)  iod    %.1f\n", iod);
+
+      Mtx_PerspStereo(&projLeft,  60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f, -iod, focLen);
+      Mtx_PerspStereo(&projRight, 60.0f*M_TAU/360.0f, 400.0f/240.0f, 1.0f, 10.0f,  iod, focLen);
+    }
+
+    Mtx_Identity(&modelView);
+    Mtx_Translate(&modelView, x, y, z, true);
+    Mtx_RotateY(&modelView, angle*M_TAU/360.0f, true);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    angle += 1.0f;
+    if(angle >= 360.0f)
+      angle = 0.0f;
+
+    C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+
+    C3D_TexBind(0, &texture[0].tex);
+
+    C3D_FrameDrawOn(texLeft);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projLeft);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+
+    if(iod > 0.0f)
+    {
+      C3D_FrameDrawOn(texRight);
+      C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projRight);
+      C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+    }
+
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &proj);
+    Mtx_Identity(&modelView);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    C3D_FrameDrawOn(topLeft);
+    C3D_TexBind(0, &texLeft->renderBuf.colorBuf);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+    if(iod > 0.0f)
+    {
+      C3D_FrameDrawOn(topRight);
+      C3D_TexBind(0, &texRight->renderBuf.colorBuf);
+      C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+    }
+    C3D_FrameEnd(0);
+  }
+
+  C3D_RenderTargetDelete(topLeft);
+  C3D_RenderTargetDelete(topRight);
+  C3D_RenderTargetDelete(texLeft);
+  C3D_RenderTargetDelete(texRight);
+
+  gfxSet3D(false);
+}
+
+void ortho_test()
+{
+  C3D_RenderTarget *top, *tex;
+  C3D_Mtx          projTop, projTex;
+  C3D_Mtx          modelView;
+  C3D_Mtx          texView;
+  float            x = 0.0f, y = 0.0f, z = -2.0f,
+                   old_x = x, old_y = y, old_z = z;
+  float            angle = 0.0f;
+
+  top = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(top, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_RenderTargetSetOutput(top, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
+
+  tex = C3D_RenderTargetCreate(512, 256, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
+  C3D_RenderTargetSetClear(tex, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
+  C3D_TexSetFilter(&tex->renderBuf.colorBuf, GPU_LINEAR, GPU_NEAREST);
+
+  Mtx_Ortho(&projTex, 0.0f, 400.0f, 0.0f, 240.0f, 100.0f, -100.0f);
+
+  Mtx_Identity(&modelView);
+  Mtx_Translate(&modelView, x, y, z, true);
+
+  Mtx_OrthoTilt(&projTop, -0.5f, 0.5f, -0.5f, 0.5f, 100.0f, -100.0f);
+  Mtx_Identity(&texView);
+
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView,  &modelView);
+  C3D_FVUnifMtx2x4(GPU_VERTEX_SHADER, uLoc_texView,    &texView);
+  C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_material,   &material);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightVec,     0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightHalfVec, 0.0f, 0.0f, -1.0f, 0.0f);
+  C3D_FVUnifSet(GPU_VERTEX_SHADER, uLoc_lightClr,     1.0f, 1.0f,  1.0f, 1.0f);
+
+  std::printf("\x1b[2J");
+  std::printf("(LEFT/RIGHT) x %.1f\n", x);
+  std::printf("(UP/DOWN)    y %.1f\n", y);
+  std::printf("(L/R)        z %.1f\n", z);
+
+  while(aptMainLoop())
+  {
+    gspWaitForVBlank();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+    u32 held = hidKeysHeld();
+    if(down & (KEY_START|KEY_SELECT))
+      break;
+
+    old_x      = x;
+    old_y      = y;
+    old_z      = z;
+
+    if((down | held) & KEY_LEFT)
+      x = clamp(x - 1.0f, 0.0f, 400.0f);
+    if((down | held) & KEY_RIGHT)
+      x = clamp(x + 1.0f, 0.0f, 400.0f);
+    if((down | held) & KEY_UP)
+      y = clamp(y + 1.0f, 0.0f, 240.0f);
+    if((down | held) & KEY_DOWN)
+      y = clamp(y - 1.0f, 0.0f, 240.0f);
+    if((down | held) & KEY_L)
+      z = clamp(z + 1.0f, -100.0f, 100.0f);
+    if((down | held) & KEY_R)
+      z = clamp(z - 1.0f, -100.0f, 100.0f);
+
+    if((x != old_x) || (y != old_y) || (z != old_z))
+    {
+      std::printf("\x1b[0;0H");
+      std::printf("(LEFT/RIGHT) x %.1f\n", x);
+      std::printf("(UP/DOWN)    y %.1f\n", y);
+      std::printf("(L/R)        z %.1f\n", z);
+    }
+
+    Mtx_Identity(&modelView);
+    Mtx_Translate(&modelView, x, y, z, true);
+    Mtx_Scale(&modelView, 64.0f, 64.0f, 64.0f);
+    Mtx_RotateY(&modelView, angle*M_TAU/360.0f, true);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    angle += 1.0f;
+    if(angle >= 360.0f)
+      angle = 0.0f;
+
+    C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
+
+    C3D_TexBind(0, &texture[0].tex);
+
+    C3D_FrameDrawOn(tex);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projTex);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, attribute_list_count);
+
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projTop);
+    Mtx_Identity(&modelView);
+    C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+
+    C3D_FrameDrawOn(top);
+    C3D_TexBind(0, &tex->renderBuf.colorBuf);
+    C3D_DrawArrays(GPU_TRIANGLES, 0, 6);
+    C3D_FrameEnd(0);
+  }
+
+  C3D_RenderTargetDelete(top);
+  C3D_RenderTargetDelete(tex);
+}
+
+typedef struct
+{
+  const char *name;
+  void (*test)();
+} test_t;
+
+test_t tests[] =
+{
+  { "Mtx_PerspTilt",       persp_tilt_test,  },
+  { "Mtx_OrthoTilt",       ortho_tilt_test,  },
+  { "Mtx_PerspStereoTilt", stereo_tilt_test, },
+  { "Mtx_Persp",           persp_test,       },
+  { "Mtx_PerspStereo",     stereo_test,      },
+  { "Mtx_Ortho",           ortho_test,       },
+};
+
+const size_t num_tests = sizeof(tests)/sizeof(tests[0]);
+
+void print_choices(size_t choice)
+{
+  std::printf("\x1b[2J");
+  for(size_t i = 0; i < num_tests; ++i)
+    std::printf("\x1b[%zu;0H%c%s", i, i == choice ? '*' : ' ', tests[i].name);
+}
+
+}
+
+int main(int argc, char *argv[])
+{
+  size_t          choice = 0;
+  shaderProgram_s program;
+  DVLB_s          *vsh_dvlb;
+
+  romfsInit();
+  gfxInitDefault();
+  gfxSet3D(false);
+  consoleInit(GFX_BOTTOM, nullptr);
+  C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
+
+  shaderProgramInit(&program);
+  vsh_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+  shaderProgramSetVsh(&program, &vsh_dvlb->DVLE[0]);
+  C3D_BindProgram(&program);
+
+  sceneInit(&program);
+
+  print_choices(choice);
+  while(aptMainLoop())
+  { 
+    gfxFlushBuffers();
+    gspWaitForVBlank();
+    gfxSwapBuffers();
+
+    hidScanInput();
+    u32 down = hidKeysDown();
+
+    if(down & KEY_UP)
+    {
+      choice = (choice + num_tests - 1) % num_tests;
+      print_choices(choice);
+    }
+    else if(down & KEY_DOWN)
+    {
+      choice = (choice + 1) % num_tests;
+      print_choices(choice);
+    }
+    else if(down & KEY_A)
+    {
+      tests[choice].test();
+      print_choices(choice);
+    }
+    else if(down & KEY_B)
+      break;
+  }
+
+  sceneExit();
+
+  shaderProgramFree(&program);
+  DVLB_Free(vsh_dvlb);
+  C3D_Fini();
+  gfxExit();
+  romfsExit();
+
+  return 0;
+}
diff --git a/test/3ds/source/vshader.v.pica b/test/3ds/source/vshader.v.pica
new file mode 100644
index 0000000..1068e28
--- /dev/null
+++ b/test/3ds/source/vshader.v.pica
@@ -0,0 +1,89 @@
+; Example PICA200 vertex shader
+
+; Uniforms
+.fvec projection[4], modelView[4], texView[2]
+.fvec lightVec, lightHalfVec, lightClr, material[4]
+.alias mat_amb material[0]
+.alias mat_dif material[1]
+.alias mat_spe material[2]
+.alias mat_emi material[3]
+
+; Constants
+.constf myconst(0.0, 1.0, -1.0, -0.5)
+.alias  zeros myconst.xxxx ; Vector full of zeros
+.alias  ones  myconst.yyyy ; Vector full of ones
+
+; Outputs
+.out outpos position
+.out outtc0 texcoord0
+.out outclr color
+
+; Inputs (defined as aliases for convenience)
+.alias inpos v0
+.alias intex v1
+.alias innrm v2
+
+.proc main
+	; Force the w component of inpos to be 1.0
+	mov r0.xyz, inpos
+	mov r0.w,   ones
+
+	; r1 = modelView * inpos
+	dp4 r1.x, modelView[0], r0
+	dp4 r1.y, modelView[1], r0
+	dp4 r1.z, modelView[2], r0
+	dp4 r1.w, modelView[3], r0
+
+	; outpos = projection * r1
+	dp4 outpos.x, projection[0], r1
+	dp4 outpos.y, projection[1], r1
+	dp4 outpos.z, projection[2], r1
+	dp4 outpos.w, projection[3], r1
+
+	; outtex = intex
+	dp4 outtc0.x, texView[0], intex
+	dp4 outtc0.y, texView[1], intex
+	mov outtc0.zw, myconst.xy
+
+	; Transform the normal vector with the modelView matrix
+	; r1 = normalize(modelView * innrm)
+	mov r0.xyz, innrm
+	mov r0.w,   zeros
+	dp4 r1.x,   modelView[0], r0
+	dp4 r1.y,   modelView[1], r0
+	dp4 r1.z,   modelView[2], r0
+	mov r1.w,   zeros
+	dp3 r2,     r1, r1 ; r2 = x^2+y^2+z^2 for each component
+	rsq r2,     r2     ; r2 = 1/sqrt(r2)  ''
+	mul r1,     r2, r1 ; r1 = r1*r2
+
+	; Calculate the diffuse level (r0.x) and the shininess level (r0.y)
+	; r0.x = max(0, -(lightVec * r1))
+	; r0.y = max(0, (-lightHalfVec[i]) * r1) ^ 2
+	dp3 r0.x, lightVec,      r1
+	add r0.x, zeros,         -r0
+	dp3 r0.y, -lightHalfVec, r1
+	max r0,   zeros,         r0
+	mul r0.y, r0,            r0
+
+	; Accumulate the vertex color in r1, initializing it to the emission color
+	mov r1, mat_emi
+
+	; r1 += specularColor * lightClr * shininessLevel
+	mul r2, lightClr, r0.yyyy
+	mad r1, r2, mat_spe, r1
+
+	; r1 += diffuseColor * lightClr * diffuseLevel
+	mul r2, lightClr, r0.xxxx
+	mad r1, r2, mat_dif, r1
+
+	; r1 += ambientColor * lightClr
+	mov r2, lightClr
+	mad r1, r2, mat_amb, r1
+
+	; outclr = clamp r1 to [0,1]
+	min outclr, ones, r1
+
+	; We're finished
+	end
+.end
diff --git a/test/pc/.gitignore b/test/pc/.gitignore
new file mode 100644
index 0000000..b408773
--- /dev/null
+++ b/test/pc/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.o
+test
diff --git a/test/pc/Makefile b/test/pc/Makefile
new file mode 100644
index 0000000..593678c
--- /dev/null
+++ b/test/pc/Makefile
@@ -0,0 +1,31 @@
+TARGET   := test
+
+CFILES   := $(wildcard *.c) $(wildcard ../../source/maths/*.c)
+CXXFILES := $(wildcard *.cpp)
+OFILES   := $(CXXFILES:.cpp=.o) $(CFILES:.c=.o)
+DFILES   := $(wildcard *.d) $(wildcard ../../source/maths/*.d)
+
+CFLAGS   := -Wall -g -pipe -I../../include
+CXXFLAGS := $(CFLAGS) -std=gnu++11 -DGLM_FORCE_RADIANS
+LDFLAGS  := $(ARCH) -pipe -lm
+
+.PHONY: all clean
+
+all: $(TARGET)
+
+$(TARGET): $(OFILES)
+	@echo "Linking $@"
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+%.o : %.cpp $(wildcard *.h)
+	@echo "Compiling $@"
+	@$(CXX) -o $@ -c $< $(CXXFLAGS) -MMD -MP -MF $*.d
+
+%.o : %.c $(wildcard *.h)
+	@echo "Compiling $@"
+	@$(CC) -o $@ -c $< $(CFLAGS) -MMD -MP -MF $*.d
+
+clean:
+	$(RM) $(OFILES) $(DFILES) $(TARGET)
+
+-include $(DFILES)
diff --git a/test/pc/main.cpp b/test/pc/main.cpp
new file mode 100644
index 0000000..0a50c5a
--- /dev/null
+++ b/test/pc/main.cpp
@@ -0,0 +1,846 @@
+#include <cassert>
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <random>
+
+#include <glm/glm.hpp>
+#include <glm/gtc/matrix_transform.hpp>
+#include <glm/gtc/quaternion.hpp>
+#include <glm/gtx/quaternion.hpp>
+
+extern "C" {
+#include <c3d/maths.h>
+}
+
+typedef std::default_random_engine            generator_t;
+typedef std::uniform_real_distribution<float> distribution_t;
+
+static inline void
+randomMatrix(C3D_Mtx &m, generator_t &g, distribution_t &d)
+{
+  for(size_t i = 0; i < 16; ++i)
+    m.m[i] = d(g);
+}
+
+static inline glm::vec3
+randomVector3(generator_t &g, distribution_t &d)
+{
+  return glm::vec3(d(g), d(g), d(g));
+}
+
+static inline glm::vec4
+randomVector4(generator_t &g, distribution_t &d)
+{
+  return glm::vec4(d(g), d(g), d(g), d(g));
+}
+
+static inline float
+randomAngle(generator_t &g, distribution_t &d)
+{
+  return d(g);
+}
+
+static inline C3D_FQuat
+randomQuat(generator_t &g, distribution_t &d)
+{
+  return Quat_New(d(g), d(g), d(g), d(g));
+}
+
+static inline glm::mat4
+loadMatrix(const C3D_Mtx &m)
+{
+  return glm::mat4(m.m[ 3], m.m[ 7], m.m[11], m.m[15],
+                   m.m[ 2], m.m[ 6], m.m[10], m.m[14],
+                   m.m[ 1], m.m[ 5], m.m[ 9], m.m[13],
+                   m.m[ 0], m.m[ 4], m.m[ 8], m.m[12]);
+}
+
+static inline glm::quat
+loadQuat(const C3D_FQuat &q)
+{
+  return glm::quat(q.r, q.i, q.j, q.k);
+}
+
+static inline bool
+operator==(const glm::vec3 &lhs, const C3D_FVec &rhs)
+{
+  return std::abs(lhs.x - rhs.x) < 0.001f
+      && std::abs(lhs.y - rhs.y) < 0.001f
+      && std::abs(lhs.z - rhs.z) < 0.001f;
+}
+
+static inline bool
+operator==(const C3D_FVec &lhs, const glm::vec3 &rhs)
+{
+  return rhs == lhs;
+}
+
+static inline bool
+operator==(const glm::vec4 &lhs, const C3D_FVec &rhs)
+{
+  return std::abs(lhs.x - rhs.x) < 0.001f
+      && std::abs(lhs.y - rhs.y) < 0.001f
+      && std::abs(lhs.z - rhs.z) < 0.001f
+      && std::abs(lhs.w - rhs.w) < 0.001f;
+}
+
+static inline bool
+operator==(const C3D_FVec &lhs, const glm::vec4 &rhs)
+{
+  return rhs == lhs;
+}
+
+static inline bool
+operator==(const glm::mat4 &lhs, const C3D_Mtx &rhs)
+{
+  for(size_t i = 0; i < 4; ++i)
+  {
+    for(size_t j = 0; j < 4; ++j)
+    {
+      if(std::abs(lhs[i][j] - rhs.m[j*4+3-i]) > 0.001f)
+        return false;
+    }
+  }
+
+  return true;
+}
+
+static inline bool
+operator==(const C3D_Mtx &lhs, const glm::mat4 &rhs)
+{
+  return rhs == lhs;
+}
+
+static inline bool
+operator==(const glm::quat &lhs, const C3D_FQuat &rhs)
+{
+  if((std::isnan(lhs.w) && std::isnan(rhs.r))
+  || (std::isnan(lhs.x) && std::isnan(rhs.i))
+  || (std::isnan(lhs.y) && std::isnan(rhs.j))
+  || (std::isnan(lhs.z) && std::isnan(rhs.k)))
+    return true;
+
+  return std::abs(lhs.w - rhs.r) < 0.01f
+      && std::abs(lhs.x - rhs.i) < 0.01f
+      && std::abs(lhs.y - rhs.j) < 0.01f
+      && std::abs(lhs.z - rhs.k) < 0.01f;
+}
+
+static inline bool
+operator==(const C3D_FQuat &lhs, const glm::quat &rhs)
+{
+  return rhs == lhs;
+}
+
+static inline bool
+operator==(const C3D_FQuat &lhs, const C3D_FQuat &rhs)
+{
+  if((std::isnan(lhs.r) && std::isnan(rhs.r))
+  || (std::isnan(lhs.i) && std::isnan(rhs.i))
+  || (std::isnan(lhs.j) && std::isnan(rhs.j))
+  || (std::isnan(lhs.k) && std::isnan(rhs.k)))
+    return true;
+
+  return std::abs(lhs.r - rhs.r) < 0.01f
+      && std::abs(lhs.i - rhs.i) < 0.01f
+      && std::abs(lhs.j - rhs.j) < 0.01f
+      && std::abs(lhs.k - rhs.k) < 0.01f;
+}
+
+static inline void
+print(const C3D_FVec &v)
+{
+  std::printf("%s:\n", __PRETTY_FUNCTION__);
+  std::printf("% 6.4f % 6.4f % 6.4f % 6.4f\n", v.w, v.x, v.y, v.z);
+}
+
+static inline void
+print(const glm::vec3 &v)
+{
+  std::printf("%s:\n", __PRETTY_FUNCTION__);
+  std::printf("% 6.4f % 6.4f % 6.4f\n", v.x, v.y, v.z);
+}
+
+static inline void
+print(const glm::vec4 &v)
+{
+  std::printf("%s:\n", __PRETTY_FUNCTION__);
+  std::printf("%6.4f % 6.4f % 6.4f % 6.4f\n", v.w, v.x, v.y, v.z);
+}
+
+static inline void
+print(const C3D_Mtx &m)
+{
+  std::printf("%s:\n", __PRETTY_FUNCTION__);
+  for(size_t j = 0; j < 4; ++j)
+  {
+    std::printf("% 6.4f % 6.4f % 6.4f % 6.4f\n",
+                m.m[j*4+3],
+                m.m[j*4+2],
+                m.m[j*4+1],
+                m.m[j*4+0]);
+  }
+}
+
+static inline void
+print(const glm::mat4 &m)
+{
+  std::printf("%s:\n", __PRETTY_FUNCTION__);
+  for(size_t j = 0; j < 4; ++j)
+  {
+    std::printf("% 6.4f % 6.4f % 6.4f % 6.4f\n",
+                m[0][j],
+                m[1][j],
+                m[2][j],
+                m[3][j]);
+  }
+}
+
+static inline void
+print(const glm::quat &q)
+{
+  std::printf("%s:\n", __PRETTY_FUNCTION__);
+  std::printf("% 6.4f % 6.4f % 6.4f % 6.4f\n", q.w, q.x, q.y, q.z);
+}
+
+static const glm::vec3 x_axis(1.0f, 0.0f, 0.0f);
+static const glm::vec3 y_axis(0.0f, 1.0f, 0.0f);
+static const glm::vec3 z_axis(0.0f, 0.0f, 1.0f);
+
+static void
+check_matrix(generator_t &gen, distribution_t &dist)
+{
+
+  glm::mat4 fix_depth(1.0f, 0.0f,  0.0f, 0.0f,
+                      0.0f, 1.0f,  0.0f, 0.0f,
+                      0.0f, 0.0f,  0.5f, 0.0f,
+                      0.0f, 0.0f, -0.5f, 1.0f);
+  glm::mat4 tilt = glm::rotate(glm::mat4(), -static_cast<float>(M_TAU)/4.0f, z_axis);
+
+  // check identity
+  {
+    C3D_Mtx m;
+    Mtx_Identity(&m);
+    assert(m == glm::mat4());
+  }
+
+  for(size_t x = 0; x < 10000; ++x)
+  {
+    // check inverse
+    {
+      C3D_Mtx m, inv, id;
+
+      randomMatrix(m, gen, dist);
+
+      // cast to int to try to avoid assertion failure due to rounding error
+      for(size_t i = 0; i < 16; ++i)
+        m.m[i] = static_cast<int>(m.m[i]);
+
+      Mtx_Copy(&inv, &m);
+      if(Mtx_Inverse(&inv))
+      {
+        Mtx_Multiply(&id, &m, &inv);
+        assert(id == glm::mat4()); // could still fail due to rounding errors
+        Mtx_Multiply(&id, &inv, &m);
+        assert(id == glm::mat4()); // could still fail due to rounding errors
+      }
+    }
+
+    // check perspective
+    {
+      C3D_Mtx m;
+      float   fovy   = dist(gen),
+              aspect = dist(gen),
+              near   = dist(gen),
+              far    = dist(gen),
+              fovx;
+
+      while(aspect < 0.25f || aspect > 4.0f)
+        aspect = dist(gen);
+
+      while(fovy < M_TAU / 36.0f
+         || fovy >= M_TAU / 2.0f
+         || (fovx = 2.0f * atanf(tanf(fovy/2.0f) * aspect)) < M_TAU / 36.0f
+         || fovx >= M_TAU / 2.0f)
+      {
+        fovy = dist(gen);
+      }
+
+      while(std::abs(far - near) < 0.1f)
+        far = dist(gen);
+
+      Mtx_Persp(&m, fovy, aspect, near, far);
+
+      glm::mat4 g = glm::perspective(fovy, aspect, near, far);
+
+      assert(m == fix_depth*g);
+    }
+
+    // check perspective tilt
+    {
+      C3D_Mtx m;
+      float   fovy   = dist(gen),
+              aspect = dist(gen),
+              near   = dist(gen),
+              far    = dist(gen),
+              fovx;
+
+      while(aspect < 0.25f || aspect > 4.0f)
+        aspect = dist(gen);
+
+      while(fovy < M_TAU / 36.0f
+         || fovy >= M_TAU / 2.0f
+         || (fovx = 2.0f * atanf(tanf(fovy/2.0f) * aspect)) < M_TAU / 36.0f
+         || fovx >= M_TAU / 2.0f)
+      {
+        fovy = dist(gen);
+      }
+
+      while(std::abs(far - near) < 0.1f)
+        far = dist(gen);
+
+      Mtx_PerspTilt(&m, fovy, aspect, near, far);
+
+      glm::mat4 g = glm::perspective(fovx, 1.0f / aspect, near, far);
+
+      assert(m == fix_depth*g*tilt);
+    }
+
+    // check perspective stereo
+    {
+      C3D_Mtx left, right;
+      float   fovy   = dist(gen),
+              aspect = dist(gen),
+              near   = dist(gen),
+              far    = dist(gen),
+              iod    = dist(gen),
+              focLen = dist(gen),
+              fovy_tan,
+              fovx;
+
+      while(aspect < 0.25f || aspect > 4.0f)
+        aspect = dist(gen);
+
+      while(fovy < M_TAU / 36.0f
+         || fovy >= M_TAU / 2.0f
+         || (fovx = 2.0f * atanf(tanf(fovy/2.0f) * aspect)) < M_TAU / 36.0f
+         || fovx >= M_TAU / 2.0f)
+      {
+        fovy = dist(gen);
+      }
+
+      while(std::abs(far - near) < 0.1f)
+        far = dist(gen);
+
+      while(focLen < 0.25f)
+        focLen = dist(gen);
+
+      Mtx_PerspStereo(&left, fovy, aspect, near, far, -iod, focLen);
+      Mtx_PerspStereo(&right, fovy, aspect, near, far, iod, focLen);
+
+      glm::mat4 g = glm::perspective(fovy, aspect, near, far);
+
+      fovy_tan = tanf(fovy/2.0f);
+      glm::mat4 left_eye (1.0f, 0.0f, 0.0f, 0.0f,
+                          0.0f, 1.0f, 0.0f, 0.0f,
+                          iod/(focLen*2.0f), 0.0f, 1.0f, 0.0f,
+                          iod*fovy_tan*aspect/2.0f, 0.0f, 0.0f, 1.0f);
+      glm::mat4 right_eye(1.0f, 0.0f, 0.0f, 0.0f,
+                          0.0f, 1.0f, 0.0f, 0.0f,
+                          -iod/(focLen*2.0f), 0.0f, 1.0f, 0.0f,
+                          -iod*fovy_tan*aspect/2.0f, 0.0f, 0.0f, 1.0f);
+
+      assert(left == fix_depth*g*left_eye);
+      assert(right == fix_depth*g*right_eye);
+    }
+
+    // check perspective stereo tilt
+    {
+      C3D_Mtx left, right;
+      float   fovy   = dist(gen),
+              aspect = dist(gen),
+              near   = dist(gen),
+              far    = dist(gen),
+              iod    = dist(gen),
+              focLen = dist(gen),
+              fovx,
+              fovx_tan;
+
+      while(aspect < 0.25f || aspect > 4.0f)
+        aspect = dist(gen);
+
+      while(fovy < M_TAU / 36.0f
+         || fovy >= M_TAU / 2.0f
+         || (fovx = 2.0f * atanf(tanf(fovy/2.0f) * aspect)) < M_TAU / 36.0f
+         || fovx >= M_TAU / 2.0f)
+      {
+        fovy = dist(gen);
+      }
+
+      while(std::abs(far - near) < 0.1f)
+        far = dist(gen);
+
+      while(focLen < 0.25f)
+        focLen = dist(gen);
+
+      Mtx_PerspStereoTilt(&left, fovy, aspect, near, far, -iod, focLen);
+      Mtx_PerspStereoTilt(&right, fovy, aspect, near, far, iod, focLen);
+
+      glm::mat4 g = glm::perspective(fovx, 1.0f / aspect, near, far);
+
+      fovx_tan = tanf(fovx/2.0f);
+      glm::mat4 left_eye (1.0f, 0.0f, 0.0f, 0.0f,
+                          0.0f, 1.0f, 0.0f, 0.0f,
+                          0.0f, -iod/(focLen*2.0f), 1.0f, 0.0f,
+                          0.0f, -iod*fovx_tan/2.0f, 0.0f, 1.0f);
+      glm::mat4 right_eye(1.0f, 0.0f, 0.0f, 0.0f,
+                          0.0f, 1.0f, 0.0f, 0.0f,
+                          0.0f, iod/(focLen*2.0f), 1.0f, 0.0f,
+                          0.0f, iod*fovx_tan/2.0f, 0.0f, 1.0f);
+
+      assert(left == fix_depth*g*left_eye*tilt);
+      assert(right == fix_depth*g*right_eye*tilt);
+    }
+
+    // check ortho
+    {
+      C3D_Mtx m;
+      float   l = dist(gen),
+              r = dist(gen),
+              b = dist(gen),
+              t = dist(gen),
+              n = dist(gen),
+              f = dist(gen);
+
+      while(std::abs(r-l) < 0.1f)
+        r = dist(gen);
+
+      while(std::abs(t-b) < 0.1f)
+        t = dist(gen);
+
+      while(std::abs(f-n) < 0.1f)
+        f = dist(gen);
+
+      Mtx_Ortho(&m, l, r, b, t, n, f);
+
+      glm::mat4 g = glm::ortho(l, r, b, t, n, f);
+
+      assert(m == fix_depth*g);
+    }
+
+    // check ortho tilt
+    {
+      C3D_Mtx m;
+      float   l = dist(gen),
+              r = dist(gen),
+              b = dist(gen),
+              t = dist(gen),
+              n = dist(gen),
+              f = dist(gen);
+
+      while(std::abs(r-l) < 0.1f)
+        r = dist(gen);
+
+      while(std::abs(t-b) < 0.1f)
+        t = dist(gen);
+
+      while(std::abs(f-n) < 0.1f)
+        f = dist(gen);
+
+      Mtx_OrthoTilt(&m, l, r, b, t, n, f);
+
+      glm::mat4 g = glm::ortho(l, r, b, t, n, f);
+
+      assert(m == tilt*fix_depth*g);
+    }
+
+    // check multiply
+    {
+      C3D_Mtx m1, m2;
+      randomMatrix(m1, gen, dist);
+      randomMatrix(m2, gen, dist);
+
+      glm::mat4 g1 = loadMatrix(m1);
+      glm::mat4 g2 = loadMatrix(m2);
+
+      C3D_Mtx result;
+      Mtx_Multiply(&result, &m1, &m2);
+      assert(result == g1*g2);
+    }
+
+    // check translate
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec3 v = randomVector3(gen, dist);
+
+      Mtx_Translate(&m, v.x, v.y, v.z, true);
+      assert(m == glm::translate(g, v));
+    }
+
+    // check translate (reversed)
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec3 v = randomVector3(gen, dist);
+
+      Mtx_Translate(&m, v.x, v.y, v.z, false);
+      assert(m == glm::translate(glm::mat4(), v)*g);
+    }
+
+    // check scale
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec3 v = randomVector3(gen, dist);
+
+      Mtx_Scale(&m, v.x, v.y, v.z);
+      assert(m == glm::scale(g, v));
+    }
+
+    // check rotate
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec3 v = randomVector3(gen, dist);
+
+      Mtx_Rotate(&m, FVec3_New(v.x, v.y, v.z), r, true);
+      assert(m == glm::rotate(g, r, v));
+    }
+
+    // check rotate (reversed)
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec3 v = randomVector3(gen, dist);
+
+      Mtx_Rotate(&m, FVec3_New(v.x, v.y, v.z), r, false);
+      assert(m == glm::rotate(glm::mat4(), r, v)*g);
+    }
+
+    // check rotate X
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+
+      Mtx_RotateX(&m, r, true);
+      assert(m == glm::rotate(g, r, x_axis));
+    }
+
+    // check rotate X (reversed)
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+
+      Mtx_RotateX(&m, r, false);
+      assert(m == glm::rotate(glm::mat4(), r, x_axis)*g);
+    }
+
+    // check rotate Y
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+
+      Mtx_RotateY(&m, r, true);
+      assert(m == glm::rotate(g, r, y_axis));
+    }
+
+    // check rotate Y (reversed)
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+
+      Mtx_RotateY(&m, r, false);
+      assert(m == glm::rotate(glm::mat4(), r, y_axis)*g);
+    }
+
+    // check rotate Z
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+
+      Mtx_RotateZ(&m, r, true);
+      assert(m == glm::rotate(g, r, z_axis));
+    }
+
+    // check rotate Z (reversed)
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      float r = randomAngle(gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+
+      Mtx_RotateZ(&m, r, false);
+      assert(m == glm::rotate(glm::mat4(), r, z_axis)*g);
+    }
+
+    // check vec3 multiply
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec3 v = randomVector3(gen, dist);
+
+      assert(Mtx_MultiplyFVec3(&m, FVec3_New(v.x, v.y, v.z)) == glm::mat3x3(g)*v);
+    }
+
+    // check vec4 multiply
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec4 v = randomVector4(gen, dist);
+
+      assert(Mtx_MultiplyFVec4(&m, FVec4_New(v.x, v.y, v.z, v.w)) == g*v);
+    }
+
+    // check vecH multiply
+    {
+      C3D_Mtx m;
+      randomMatrix(m, gen, dist);
+
+      glm::mat4 g = loadMatrix(m);
+      glm::vec4 v = randomVector4(gen, dist);
+      v.w = 1.0f;
+
+      assert(Mtx_MultiplyFVecH(&m, FVec3_New(v.x, v.y, v.z)) == glm::mat4x3(g)*v);
+    }
+  }
+}
+
+static void
+check_quaternion(generator_t &gen, distribution_t &dist)
+{
+  // check identity
+  {
+    C3D_FQuat q = Quat_Identity();
+    glm::quat g;
+
+    assert(q == g);
+  }
+
+  for(size_t x = 0; x < 10000; ++x)
+  {
+    // check negation
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      assert(Quat_Negate(q) == -g);
+    }
+
+    // check addition
+    {
+      C3D_FQuat q1 = randomQuat(gen, dist);
+      C3D_FQuat q2 = randomQuat(gen, dist);
+
+      glm::quat g1 = loadQuat(q1);
+      glm::quat g2 = loadQuat(q2);
+
+      assert(Quat_Add(q1, q2) == g1+g2);
+    }
+
+    // check subtraction
+    {
+      C3D_FQuat q1 = randomQuat(gen, dist);
+      C3D_FQuat q2 = randomQuat(gen, dist);
+
+      glm::quat g1 = loadQuat(q1);
+      glm::quat g2 = loadQuat(q2);
+
+      assert(Quat_Subtract(q1, q2) == g1 + (-g2));
+    }
+
+    // check scale
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      float f = dist(gen);
+
+      assert(Quat_Scale(q, f) == g*f);
+    }
+
+    // check normalize
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      assert(Quat_Normalize(q) == glm::normalize(g));
+    }
+
+    // check dot
+    {
+      C3D_FQuat q1 = randomQuat(gen, dist);
+      C3D_FQuat q2 = randomQuat(gen, dist);
+      glm::quat g1 = loadQuat(q1);
+      glm::quat g2 = loadQuat(q2);
+
+      assert(std::abs(Quat_Dot(q1, q2) - glm::dot(g1, g2)) < 0.0001f);
+    }
+
+    // check conjugate
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      assert(Quat_Conjugate(q) == glm::conjugate(g));
+    }
+
+    // check inverse
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      assert(Quat_Inverse(q) == glm::inverse(g));
+    }
+
+    // check quaternion multiplication
+    {
+      C3D_FQuat q1 = randomQuat(gen, dist);
+      C3D_FQuat q2 = randomQuat(gen, dist);
+      glm::quat g1 = loadQuat(q1);
+      glm::quat g2 = loadQuat(q2);
+
+      assert(Quat_Multiply(q1, q2) == g1*g2);
+    }
+
+    // check quat pow()
+    // Note: older versions of glm have broken pow() for quats
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      //glm::quat g = loadQuat(q);
+      float     r = dist(gen);
+
+      //assert(Quat_Pow(q, r) == glm::pow(g, r));
+
+      q = Quat_Normalize(q);
+
+      // check trivial cases
+      assert(Quat_Pow(q, 1.0f) == q);
+      assert(Quat_Pow(q, 0.0f) == Quat_Identity());
+      assert(Quat_Pow(Quat_Identity(), r) == Quat_Identity());
+
+      // validate semantics
+      assert(Quat_Pow(q, r) == Quat_Multiply(Quat_Pow(q, r/2), Quat_Pow(q, r/2)));
+    }
+
+    // check vector multiplication (cross)
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      glm::vec3 v = randomVector3(gen, dist);
+
+      assert(Quat_CrossFVec3(q, FVec3_New(v.x, v.y, v.z)) == glm::cross(g, v));
+      assert(FVec3_CrossQuat(FVec3_New(v.x, v.y, v.z), q) == glm::cross(v, g));
+    }
+
+    // check rotation
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      glm::vec3 v = randomVector3(gen, dist);
+      float     r = randomAngle(gen, dist);
+
+      assert(Quat_Rotate(q, FVec3_New(v.x, v.y, v.z), r, false) == glm::rotate(g, r, v));
+      assert(Quat_Rotate(q, FVec3_New(v.x, v.y, v.z), r, true) == glm::rotate(glm::quat(), r, v)*g);
+    }
+
+    // check rotate X
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      float     r = randomAngle(gen, dist);
+
+      assert(Quat_RotateX(q, r, false) == glm::rotate(g, r, x_axis));
+      assert(Quat_RotateX(q, r, true) == glm::rotate(glm::quat(), r, x_axis)*g);
+    }
+
+    // check rotate Y
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      float     r = randomAngle(gen, dist);
+
+      assert(Quat_RotateY(q, r, false) == glm::rotate(g, r, y_axis));
+      assert(Quat_RotateY(q, r, true) == glm::rotate(glm::quat(), r, y_axis)*g);
+    }
+
+    // check rotate Z
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      float     r = randomAngle(gen, dist);
+
+      assert(Quat_RotateZ(q, r, false) == glm::rotate(g, r, z_axis));
+      assert(Quat_RotateZ(q, r, true) == glm::rotate(glm::quat(), r, z_axis)*g);
+    }
+
+    // check conversion to matrix
+    {
+      C3D_FQuat q = randomQuat(gen, dist);
+      glm::quat g = loadQuat(q);
+
+      C3D_Mtx m;
+      Mtx_FromQuat(&m, q);
+
+      assert(m == glm::mat4_cast(g));
+    }
+  }
+}
+
+int main(int argc, char *argv[])
+{
+  std::random_device rd;
+  generator_t        gen(rd());
+  distribution_t     dist(-10.0f, 10.0f);
+
+  check_matrix(gen, dist);
+  check_quaternion(gen, dist);
+
+  return EXIT_SUCCESS;
+}