Compare commits

..

No commits in common. "master" and "v1.0" have entirely different histories.
master ... v1.0

22 changed files with 672 additions and 2667 deletions

1
.gitignore vendored
View File

@ -26,5 +26,6 @@ missing
config.log
config.status
Makefile
picasso
.deps/
*.bz2

View File

@ -1,21 +0,0 @@
{
"configurations": [
{
"name": "3ds",
"includePath": [
"${workspaceFolder}/**",
//"C:/devkitpro/libnx/include/**",
"C:/devkitpro/libctru/include/**",
"/opt/devkitpro/libctru/include/**",
//"C:/devkitpro/portlibs/switch/include/**",
"/opt/devkitpro/portlibs/3ds/include/**",
"C:/devkitpro/portlibs/3ds/include/**"
],
"defines": [],
"cStandard": "gnu17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-x64"
}
],
"version": 4
}

View File

@ -1,20 +0,0 @@
#########################################################################################
set(DEVKITPRO $ENV{DEVKITPRO})
set(CMAKE_SYSTEM_NAME "Nintendo 3ds")
set(CMAKE_C_COMPILER "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc")
set(CMAKE_CXX_COMPILER "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-g++")
set(CMAKE_AR "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc-ar" CACHE STRING "")
set(CMAKE_RANLIB "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc-ranlib" CACHE STRING "")
set(CMAKE_ASM_COMPILER "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc")
set(ARCH "-march=armv6k -mtune=mpcore -mfloat-abi=hard -mfpu=vfp -mtp=soft -D__3DS__")
set(CMAKE_C_FLAGS "${ARCH} -Wall -mword-relocations -O3 -fomit-frame-pointer -ffunction-sections -fdata-sections" CACHE STRING "C flags")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fno-rtti -std=gnu++20" CACHE STRING "C++ flags")
set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_FIND_ROOT_PATH ${DEVKITPRO}/devkitARM ${DEVKITPRO}/libctru ${DEVKITARM}/portlibs/3ds)
set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Shared libs not available")
link_directories(${DEVKITPRO}/libcrtu/lib ${DEVKITPRO}/portlibs/3ds/lib)
#########################################################################################

View File

@ -1,57 +0,0 @@
cmake_minimum_required(VERSION 3.22)
project(picasso VERSION 0.5.2 LANGUAGES CXX DESCRIPTION "Picasso Shadercompiler on the Nintendo 3ds")
set(CMAKE_EXE_LINKER_FLAGS "-L${DEVKITPRO}/libctru/lib -L${DEVKITPRO}/picaGL/lib -L${DEVKITPRO}/portlibs/3ds/lib -specs=3dsx.specs -Wl,--gc-sections")
include_directories(${DEVKITPRO}/libctru/include ${DEVKITPRO}/picaGL/include ${DEVKITPRO}/portlibs/3ds/include)
add_definitions("-D__3DS__")
include(CMakePackageConfigHelpers)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release."
FORCE)
endif(NOT CMAKE_BUILD_TYPE)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE STRING
"The install location"
FORCE)
endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
include_directories(include)
set(HEADER_FILES
include/pica.hpp
include/picasso/picasso.h
include/picasso/types.h
include/picasso/FileClass.h
include/picasso/maestro_opcodes.h)
set(SOURCE_FILES
source/picasso_assembler.cpp
source/picasso_library.cpp)
add_library(${PROJECT_NAME}
${HEADER_FILES}
${SOURCE_FILES})
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
target_compile_features(${PROJECT_NAME}
# Features required to compile the library itself.
PRIVATE cxx_std_20 cxx_auto_type)
set(PROJECT_PREFIX ${PROJECT_NAME}-${picasso_VERSION})
target_include_directories(${PROJECT_NAME}
PUBLIC
# Used when building the library:
$<BUILD_INTERFACE:${foo_SOURCE_DIR}/include>
# Used when installing the library:
$<INSTALL_INTERFACE:include/${PROJECT_NAME}>
PRIVATE
# Used only when building the library:
src)

View File

@ -1,72 +0,0 @@
# picasso Changelog
# v2.7.1
- Further improvements to overall system stability and other minor adjustments have been made to enhance the user experience.
# v2.7
- Added `dst`, `litp` and `break` instructions (thanks to @Tilka).
- Added check to enforce index regs being used only with floating point vector uniforms.
- Renamed index registers to match D3D naming conventions (`a0.x`, `a0.y`, `aL`) (old names still accepted).
- Miscellaneous bugfixes and improvements (thanks to @lioncash).
# v2.6.2
- Fixed several compilation errors in some compilers.
# v2.6.1
- Reduced `mad` opdesc allocation errors by automatically swapping out of bounds opdesc entries with other ones in the addressable range (5 bits).
# v2.6
- Added `.in` directive for explicit specifying (and allocating) input registers and exporting them in the DVLE uniform table.
- Added support for dollar signs (`$`) in identifier names, which are translated to period characters (`.`) in DVLE uniform names.
- Output registers `o7` through `o15` are now allowed in vertex shaders (as dummy outputs).
- DVLE uniform table is now sorted by register position.
# v2.5
- The `.gsh` directive has been enhanced to provide full support for all geometry shader operation modes (point, variable-sized primitive and fixed-size primitive). This also effectively separates vertex shader uniform space from geometry shader uniform space.
- The `.out` directive has been enhanced to allow wiring semantics to any arbitrary output register. Additionally the `dummy` semantic was added while the `7` semantic was removed.
- Added auto-insertion of NOP instruction in corner cases involving flow of control instructions, together with the `--no-nop` directive which instead of adding NOPs warns the user about the corner cases.
- Added support for `rgba` and `stpq` in addition to `xyzw`.
- Added an error message for invalid input register use (e.g. `add r0, v1, v2`).
- The operand descriptor allocation algorithm has been enhanced to take into account unused operands.
- The `6` and `7` conditional operators have been removed since they actually do not exist.
- Really corrected MAD instruction encoding.
- Several miscellaneous issues were fixed.
# v2.4
- Corrected MAD instruction encoding.
- Added command line flag for retrieving the picasso version.
# v2.3
- Added `.constfa` for creating floating-point vector constant arrays.
- Fixed `.nodvle` bug.
# v2.2
- Added proper support for the MOVA instruction.
- Added support for inverting the condition in JMPU.
- Fixed `lcnt` bug.
# v2.1
- Fixed input file open error message.
- Fixed `.constf` misallocation bug.
# v2.0
- (**Breaking change**) Command line format changed.
- Added support for assembling multiple shaders (DVLEs) into a single SHBIN.
- Added new directives: `.entry`, `.nodvle`, `.gsh`, `.setf`, `.seti`, `.setb`.
- Added auto-detection of inverted forms of opcodes. (Explicitly using `dphi`, `sgei`, `slti` and `madi` is now deprecated)
- Several miscellaneous bug fixes.
# v1.0
- Initial release.

202
Manual.md
View File

@ -9,7 +9,7 @@ Comments are introduced by the semicolon character. E.g.
.fvec myFloat ; They can also appear in the same line
```
Identifiers follow the same rules as C identifiers. Additionally, the dollar sign (`$`) is allowed in identifiers; mostly as a substitute for the period character (`.`) since the latter is used in `picasso` syntax.
Identifiers follow the same rules as C identifiers.
Labels consist of an identifier plus a colon. E.g.
@ -28,20 +28,22 @@ Procedures are delimited using the `.proc` and `.end` directives. E.g.
.end
```
A valid PICA200 shader must contain a `main` procedure.
Instructions consist of an opcode name and a comma-delimited list of arguments.
Directives are special statements that start with a period and control certain aspects of `picasso`'s code emission; such as defining procedures, uniforms, constants and more.
PICA200 registers are often used as arguments to instructions. There exist the following registers:
- `o0` through `o15`: Output registers (usable as a destination operand). The range `o7` through `o15` is only available in vertex shaders.
- `v0` through `v15`: Input registers (usable as a source operand).
- `o0` through `o7`: Output registers (usable as a destination operand).
- `v0` through `v7`: Input registers (usable as a source operand).
- `r0` through `r15`: Scratch registers (usable as both destination and source operands).
- `c0` through `c95`: Floating-point vector uniforms (usable as a special type of source operand called SRC1).
- `i0` through `i3`: Integer vector uniforms (special purpose).
- `b0` through `b15`: Boolean uniforms (special purpose).
All registers contain 24-bit floating point vectors; except for integer vector uniforms (containing 8-bit integers) and boolean uniforms. Vectors have 4 components: x, y, z and w. The components may alternatively be referred to as r, g, b and a (respectively); or s, t, p and q (respectively). Uniforms are special registers that are writable by the CPU; thus they are used to pass configuration parameters to the shader such as transformation matrices. Sometimes they are preloaded with constant values that may be used in the logic of the shader.
All registers contain 32-bit floating point vectors; except for integer vector uniforms (containing 8-bit integers) and boolean uniforms. Vectors have 4 components: x, y, z and w. Uniforms are special registers that are writable by the CPU; thus they are used to pass configuration parameters to the shader such as transformation matrices. Sometimes they are preloaded with constant values that may be used in the logic of the shader.
In most situations, vectors may be [swizzled](http://en.wikipedia.org/wiki/Swizzling_%28computer_graphics%29), that is; their components may be rearranged. Register arguments support specifying a swizzling mask: `r0.wwxy`. The swizzling mask usually has 4 components (but not more), if it has less the last component is repeated to fill the mask. The default mask applied to registers is `xyzw`; that is, identity (no effect).
@ -51,44 +53,9 @@ Registers may also be assigned additional names in order to make the code more l
For convenience, registers may be addressed using an offset from a known register. This is called indexing. For example, `c8[4]` is equivalent to `c12`; and `r4[-2]` is equivalent to `r2`. Indexing is useful for addressing arrays of registers (such as matrices).
Some source operands of instructions (called SRC1) support relative addressing. This means that it is possible to use one of the three built-in indexing registers (`a0.x`, `a0.y` and `aL`) to address a register, e.g. `someArray[aL]`. Adding an offset is also supported, e.g. `someArray[aL+2]`. This is useful in FOR loops. Index registers can only be used with floating-point vector uniform registers, though. Note: Older versions of `picasso` called the indexing registers `a0`, `a1` and `a2` respectively (also `lcnt` for `a2`); these names are still accepted for backwards compatibility.
Some source operands of instructions (called SRC1) support relative addressing. This means that it is possible to use one of the three built-in indexing registers (`a0`, `a1` and `a2` aka `lcnt`) to address a register, e.g. `someArray[lcnt]`. Adding an offset is also supported, e.g. `someArray[lcnt+2]`. This is useful in FOR loops.
Normal floating-point vector registers may also be negated by prepending a minus sign before it, e.g. `-r2` or `-someArray[aL+2]`.
In geometry shaders, `b15` is automatically set to true *after* each execution of the geometry shader. This can be useful to detect whether program state should be initialized - GPU management code usually resets all unused boolean uniforms to false when setting up the PICA200's shader processing units.
## Command Line Usage
```
Usage: picasso [options] files...
Options:
-o, --out=<file> Specifies the name of the SHBIN file to generate
-h, --header=<file> Specifies the name of the header file to generate
-n, --no-nop Disables the automatic insertion of padding NOPs
-v, --version Displays version information
```
DVLEs are generated in the same order as the files in the command line.
## Linking Model
`picasso` takes one or more source code files, and assembles them into a single `.shbin` file. A DVLE object is generated for each source code file, unless the `.nodvle` directive is used (see below). Procedures are shared amongst all source code files, and they may be defined and called wherever. Uniform space for vertex shaders is also shared, that is, if two vertex shader source code files declare the same uniform, they are assigned the same location. Geometry shaders however do not share uniforms, and each geometry shader source code file will have its own uniform allocation map. On the other hand, constants are never shared, and the same space is reused for the constants of each DVLE. Outputs and aliases are, by necessity, never shared either.
The entry point of a DVLE may be set with the `.entry` directive. If this directive is not used, `main` is assumed as the entrypoint.
A DVLE by default is a vertex shader, unless the `.gsh` directive is used (in the case of which a geometry shader is specified).
Uniforms that start with the underscore (`_`) character are not exposed in the DVLE table of uniforms. This allows for creating private uniforms that can be internally used to configure the behaviour of shared procedures. Additionally, dollar signs (`$`) are automatically translated to period characters (`.`) in the DVLE uniform table.
**Note**: Older versions of `picasso` handled geometry shaders in a different way. Specifically, uniform space was shared with vertex shaders and it was possible to use `.gsh` without parameters or `setemit` to flag a DVLE as a geometry shader. For backwards compatibility purposes this functionality has been retained, however its use is not recommended.
## PICA200 Caveats & Errata
The PICA200's shader units have numerous implementation caveats and errata that should be taken into account when designing and writing shader code. Some of these include:
- Certain flow of control statements may not work at the end of another block, including the closing of other nested blocks. picasso detects these situations and automatically inserts padding NOP instructions (unless the `--no-nop` command line flag is used).
- The `mova` instruction is finicky and for instance two consecutive `mova` instructions will freeze the PICA200.
- Only a single input register is able to be referenced reliabily at a time in the source registers of an operand. That is, while specifying the same input register in one or more source registers will behave correctly, specifying different input registers will produce incorrect results. picasso detects this situation and displays an error message.
Normal floating-point vector registers may also be negated by prepending a minus sign before it, e.g. `-r2` or `-someArray[lcnt+2]`.
## Supported Directives
@ -161,133 +128,33 @@ Reserves a new floating-point vector uniform to be preloaded with the specified
Reserves a new integer vector uniform to be preloaded with the specified constant; creates an alias for it that points to the allocated register. Example:
```
.consti loopParams(16, 0, 1, 0)
```
### .constfa
```
.constfa arrayName[]
.constfa arrayName[size]
.constfa (x, y, z, w)
```
Reserves a new array of floating-point vector uniforms to be preloaded with the specified constants; creates an alias for it that points to the first element. Example:
```
; Create an array of two elements
.constfa myArray[]
.constfa (1.0, 2.0, 3.0, 4.0)
.constfa (5.0, 6.0, 7.0, 8.0)
.end
```
Optionally the size of the array may be specified. If a number of elements less than the size is specified, the missing elements are initialized to zero. Example:
```
.constfa myArray[4]
.constfa (1.0, 2.0, 3.0, 4.0)
.constfa (5.0, 6.0, 7.0, 8.0)
; The remaining two elements are vectors full of zeroes.
.end
```
### .in
```
.in inName
.in inName register
```
Reserves an input register and creates an alias for it called `inName`. If no input register is specified it is automatically allocated. The input register is added to the DVLE's uniform table.
Example:
```
.in position
.in texcoord
.in special v15
.constf loopParams(16, 0, 1, 0)
```
### .out
```
.out outName propName
.out outName propName register
.out - propName register
```
Wires an output register to a certain output property and (optionally) creates an alias for it called `outName` (specify a dash in order not to create the alias). If no output register is specified it is automatically allocated. The following property names are supported:
Allocates a new output register, wires it to a certain output property and creates an alias for it that points to the allocated register. The following property names are supported:
- `position` (or `pos`): Represents the position of the outputted vertex.
- `normalquat` (or `nquat`): Used in fragment lighting, this represents the quaternion associated to the normal vector of the vertex.
- `color` (or `clr`): Represents the color of the outputted vertex. Its format is (R, G, B, A) where R,G,B,A are values ranging from 0.0 to 1.0.
- `texcoord0` (or `tcoord0`): Represents the first texture coordinate, which is always fed to the Texture Unit 0. Only the first two components are used.
- `texcoord0w` (or `tcoord0w`): Represents the third component of the first texture coordinate, used for 3D/cube textures.
- `texcoord1` (or `tcoord1`): Similarly to `texcoord0`, this is the second texture coordinate, which is usually but not always fed to Texture Unit 1.
- `texcoord2` (or `tcoord2`): Similarly `texcoord0`, this is the third texture coordinate, which is usually but not always fed to Texture Unit 2.
- `view`: Used in fragment lighting, this represents the view vector associated to the vertex. The fourth component is not used.
- `dummy`: Used in vertex shaders to pass generic semanticless parameters to the geometry shader, and in geometry shaders to use the appropriate property type from the output map of the vertex shader, thus 'merging' the output maps.
An output mask that specifies to which components of the output register should the property be wired to is also accepted. If the output register is explicitly specified, it attaches to it (e.g. `o2.xy`); otherwise it attaches to the property name (e.g. `texcoord0.xy`).
- `normalquat` (or `nquat`): Under investigation.
- `color` (or `clr`): Represents the color of the outputted vertex. Its format is (R, G, B, xx) where R,G,B are values ranging from 0.0 to 1.0. The W component isn't used.
- `texcoord0` (or `tcoord0`): Represents the texture coordinate that is fed to the Texture Unit 0. The Z and W components are not used.
- `texcoord0w` (or `tcoord0w`): Under investigation.
- `texcoord1` (or `tcoord1`): As `texcoord0`, but for the Texture Unit 1.
- `texcoord2` (or `tcoord2`): As `texcoord0`, but for the Texture Unit 2.
- `7`: Under investigation.
- `view`: Under investigation.
Example:
```
.out outPos position
.out outClr color.rgba
.out outTex texcoord0.xy
.out - texcoord0w outTex.p
.out outClr color
.out outTex texcoord0
```
### .entry
```
.entry procedureName
```
Specifies the name of the procedure to use as the entrypoint of the current DVLE. If this directive is not used, `main` is assumed.
### .nodvle
```
.nodvle
```
This directive tells `picasso` not to generate a DVLE for the source code file that is being processed. This allows for writing files that contain shared procedures to be used by other files.
### .gsh
```
.gsh point firstReg
.gsh variable firstReg vtxNum
.gsh fixed firstReg arrayStartReg vtxNum
```
This directive flags the current DVLE as a geometry shader and specifies the geometry shader operation mode, which can be one of the following:
- `point` mode: In this mode the geometry shader is called according to the input stride and input permutation configured by the user. On entry, the data is stored starting at the `v0` register. This type of geometry shader can be used with both array-drawing mode (aka `C3D_DrawArrays`) and element-drawing mode (aka `C3D_DrawElements`).
- `variable` mode (also called `subdivision` mode): In this mode the geometry shader processes variable-sized primitives, which are required to have `vtxNum` vertices for which full attribute information will be stored, and **one or more** additional vertices for which only position information will be stored. On entry the register `c0` stores in all its components the total number of vertices of the primitive, and subsequent registers store vertex information in order. This type of geometry shader can only used with element-drawing mode - inside the index array each primitive is prefixed with the number of vertices in it.
- `fixed` mode (also called `particle` mode): In this mode the geometry shader processes fixed-size primitives, which always have `vtxNum` vertices. On entry, the array of vertex information will be stored starting at the float uniform register `arrayStartReg`. This type of geometry shader can only used with element-drawing mode.
The `firstReg` parameter specifies the first float uniform register that is available for use in float uniform register allocation (this is especially useful in variable and fixed mode).
Examples:
```
.gsh point c0
.gsh variable c48 3
.gsh fixed c48 c0 4
```
**Note**: For backwards compatibility reasons, a legacy mode which does not accept any parameters is accepted; however it should not be used.
### .setf
```
.setf register(x, y, z, w)
```
Similar to `.constf`, this directive adds a DVLE constant entry for the specified floating-point vector uniform register to be loaded with the specified value. This is useful in order to instantiate a generalized shared procedure with the specified parameters.
### .seti
```
.seti register(x, y, z, w)
```
Similar to `.consti`, this directive adds a DVLE constant entry for the specified integer vector uniform register to be loaded with the specified value. This is useful in order to instantiate a generalized shared procedure with the specified parameters.
### .setb
```
.setb register value
```
This directive adds a DVLE constant entry for the specified boolean uniform register to be loaded with the specified value (which may be `true`, `false`, `on`, `off`, `1` or `0`). This is useful in order to control the flow of a generalized shared procedure.
## Supported Instructions
See [Shader Instruction Set](http://3dbrew.org/wiki/Shader_Instruction_Set) for more details.
@ -302,44 +169,40 @@ Syntax | Description
`dp3 rDest, rSrc1, rSrc2` |
`dp4 rDest, rSrc1, rSrc2` |
`dph rDest, rSrc1, rSrc2` |
`dst rDest, rSrc1, rSrc2` |
`mul rDest, rSrc1, rSrc2` |
`sge rDest, rSrc1, rSrc2` |
`slt rDest, rSrc1, rSrc2` |
`max rDest, rSrc1, rSrc2` |
`min rDest, rSrc1, rSrc2` |
`dphi rDest, rSrc2, rSrc1` |
`sgei rDest, rSrc2, rSrc1` |
`slti rDest, rSrc2, rSrc1` |
`ex2 rDest, rSrc1` |
`lg2 rDest, rSrc1` |
`litp rDest, rSrc1` |
`ex2 rDest, rSrc1` |
`flr rDest, rSrc1` |
`rcp rDest, rSrc1` |
`rsq rDest, rSrc1` |
`mov rDest, rSrc1` |
`mova idxReg, rSrc1` |
`mova rSrc1` |
`cmp rSrc1, opx, opy, rSrc2` |
`call procName` |
`for iReg` |
`break` | (not recommended)
`breakc condExp` |
`callc condExp, procName` |
`ifc condExp` |
`jmpc condExp, labelName` |
`callu bReg, procName` |
`ifu bReg` |
`jmpu [!]bReg, labelName` |
`mad rDest, rSrc1, rSrc2, rSrc3` |
`jmpu bReg, labelName` |
`madi rDest, rSrc1, rSrc2, rSrc1` |
`mad rDest, rSrc1, rSrc1, rSrc2` |
### Description of operands
- `rDest`: Represents a destination operand (register).
- `rSrc1`/`rSrc2`/`rSrc3`: Represents a source operand (register). Depending on the position, some registers may be supported and some may not.
- Narrow source operands are limited to input and scratch registers.
- Wide source operands also support floating-point vector uniforms and relative addressing.
- In instructions that take one source operand, it is always wide.
- In instructions that take two source operands, the first is wide and the second is narrow.
- `dph`/`sge`/`slt` have a special form where the first operand is narrow and the second is wide. This usage is detected automatically by `picasso`.
- `mad`, which takes three source operands, has two forms: the first is narrow-wide-narrow, and the second is narrow-narrow-wide. This is also detected automatically.
- `idxReg`: Represents an indexing register to write to using the mova instruction. Can be `a0.x`, `a0.y` or `a0.xy` (the latter writes to both components). Note: Older versions of `picasso` accepted `a0`, `a1` and `a01` respectively; this syntax is still supported for backwards compatibility.
- `rSrc1`: Represents a so-called SRC1 source operand (register), which allows accessing floating-point vector uniforms and relative addressing.
- `rSrc2`: Represents a so-called SRC2 source operand (register), which is limited to input and scratch registers.
- `iReg`: Represents an integer vector uniform source operand.
- `bReg`: Represents a boolean uniform source operand.
- `procName`: Represents the name of a procedure.
@ -351,11 +214,12 @@ Syntax | Description
- `le`: Less or equal than
- `gt`: Greater than
- `ge`: Greater or equal than
- `6` and `7`: currently unknown, supposedly the result they yield is always true.
- `condExp`: Represents a conditional expression, which uses the conditional flags `cmp.x` and `cmp.y` set by the CMP instruction. These flags may be negated using the `!` symbol, e.g. `!cmp.x`. The conditional expression can take any of the following forms:
- `flag1`: It tests a single flag.
- `flag1 && flag2`: It performs AND between the two flags. Optionally, a single `&` may be specified.
- `flag1 || flag2`: It performs OR between the two flags. Optionally, a single `|` may be specified.
- `vtxId`: An integer ranging from 0 to 2 specifying the vertex ID used in geoshader vertex emission.
- `vtxId`: An integer ranging from 0 to 3 specifying the vertex ID used in geoshader vertex emission.
- `emitFlags`: A space delimited combination of the following words:
- `prim` (or `primitive`): Specifies that after emitting the vertex, a primitive should also be emitted.
- `primitive` (or `prim`): Specifies that after emitting the vertex, a primitive should also be emitted.
- `inv` (or `invert`): Specifies that the order of the vertices in the emitted primitive is inverted.

View File

@ -1,14 +0,0 @@
@PACKAGE_INIT@
# Include the exported CMake file
get_filename_component(picasso_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
# This macro enables usage of find_dependency().
# https://cmake.org/cmake/help/v3.11/module/CMakeFindDependencyMacro.html
include(CMakeFindDependencyMacro)
if(NOT TARGET picasso::picasso)
include("${picasso_CMAKE_DIR}/picasso-targets.cmake")
endif()
check_required_components(picasso)

348
compile
View File

@ -1,348 +0,0 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2018-03-07.03; # UTC
# Copyright (C) 1999-2021 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN* | MSYS*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/* | msys/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

View File

@ -2,10 +2,10 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.61)
AC_INIT([picasso],[2.7.1],[https://github.com/devkitPro/picasso/issues])
AC_INIT([picasso],[1.0.0],[fincs.alt1@gmail.com])
AC_CONFIG_SRCDIR([source/picasso_frontend.cpp])
AM_INIT_AUTOMAKE([subdir-objects])
AM_INIT_AUTOMAKE([1.10])
AC_CANONICAL_BUILD
AC_CANONICAL_HOST

View File

@ -1,42 +0,0 @@
cmake_minimum_required(VERSION 3.22)
project(linpicasso_sample)
set(CMAKE_EXE_LINKER_FLAGS "-L${DEVKITPRO}/libctru/lib -L${DEVKITPRO}/picaGL/lib -L${DEVKITPRO}/portlibs/3ds/lib -specs=3dsx.specs -Wl,--gc-sections")
include_directories(${DEVKITPRO}/libctru/include ${DEVKITPRO}/picaGL/include ${DEVKITPRO}/portlibs/3ds/include)
add_definitions("-D__3DS__")
set(APP_TITLE "${PROJECT_NAME}")
set(APP_DESCRIPTION "Example of Lib Picasso")
set(APP_AUTHOR "Tobi-D7, tobid7vx")
set(APP_ICON "/opt/devkitpro/libctru/default_icon.png")
set(APP_ROMFS "${PROJECT_SOURCE_DIR}/romfs")
enable_language(ASM)
set(BASE_CTR ON CACHE BOOL "Enable 3ds")
add_subdirectory(../ picasso)
add_executable(${PROJECT_NAME}.elf src/main.cpp)
target_include_directories(${PROJECT_NAME}.elf PRIVATE src ../include)
target_link_libraries(${PROJECT_NAME}.elf citro2d citro3d ctru m picasso)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.smdh
COMMAND smdhtool --create "${APP_TITLE}" "${APP_DESCRIPTION}" "${APP_AUTHOR}" "${APP_ICON}" ${PROJECT_NAME}.smdh
DEPENDS ${PROJECT_NAME}.elf
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.3dsx
COMMAND 3dsxtool ${PROJECT_NAME}.elf ${PROJECT_NAME}.3dsx --romfs=${APP_ROMFS} --smdh=${PROJECT_NAME}.smdh
DEPENDS ${PROJECT_NAME}.elf
)
add_custom_target( 3ds ALL
DEPENDS ${PROJECT_NAME}.smdh ${PROJECT_NAME}.3dsx
)

View File

@ -1,36 +0,0 @@
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outclr = inclr
mov outclr, inclr
; We're finished
end
.end

View File

@ -1,174 +0,0 @@
#include <3ds.h>
#include <citro3d.h>
#include <pica.hpp>
static const char *const vertShader = R"text(
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outclr = inclr
mov outclr, inclr
; We're finished
end
.end
)text";
#define CLEAR_COLOR 0x68B0D8FF
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
typedef struct { float x, y, z; } vertex;
static const vertex vertex_list[] =
{
{ 200.0f, 200.0f, 0.5f },
{ 100.0f, 40.0f, 0.5f },
{ 300.0f, 40.0f, 0.5f },
};
#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static C3D_Mtx projection;
static char* vshader_shbin;
static int vshader_shbin_size;
static void* vbo_data;
static void sceneInit(void)
{
// Load the vertex shader, create a shader program and bind it
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
C3D_BindProgram(&program);
// Get the location of the uniforms
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
// Configure attributes for use with the vertex shader
C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
AttrInfo_Init(attrInfo);
AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
AttrInfo_AddFixed(attrInfo, 1); // v1=color
// Set the fixed attribute (color) to solid white
C3D_FixedAttribSet(1, 1.0, 1.0, 1.0, 1.0);
// Compute the projection matrix
Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
// Create the VBO (vertex buffer object)
vbo_data = linearAlloc(sizeof(vertex_list));
memcpy(vbo_data, vertex_list, sizeof(vertex_list));
// Configure buffers
C3D_BufInfo* bufInfo = C3D_GetBufInfo();
BufInfo_Init(bufInfo);
BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
C3D_TexEnv* env = C3D_GetTexEnv(0);
C3D_TexEnvInit(env);
C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, (GPU_TEVSRC)0, (GPU_TEVSRC)0);
C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
}
static void sceneRender(void)
{
// Update the uniforms
C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
// Draw the VBO
C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
}
static void sceneExit(void)
{
// Free the VBO
linearFree(vbo_data);
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
}
int main()
{
// Initialize graphics
gfxInitDefault();
romfsInit();
C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
//vshader_shbin = Pica::AssembleCode(vertShader, vshader_shbin_size);
vshader_shbin = Pica::AssembleFile("romfs:/vshader.pica", vshader_shbin_size);
// Initialize the render target
C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
// Initialize the scene
sceneInit();
// Main loop
while (aptMainLoop())
{
hidScanInput();
// Respond to user input
u32 kDown = hidKeysDown();
if (kDown & KEY_START)
break; // break in order to return to hbmenu
// Render the scene
C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
C3D_FrameDrawOn(target);
sceneRender();
C3D_FrameEnd(0);
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
C3D_Fini();
gfxExit();
return 0;
}

View File

@ -1,10 +0,0 @@
#pragma once
#include <iostream>
#include <string>
namespace Pica
{
void InstallErrorCallback(void(*ErrorHandler)(const char* top, const char* message));
char* AssembleCode(const char* vertex, int &res_size);
char* AssembleFile(const char* file, int &res_size);
}

View File

@ -1,256 +0,0 @@
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <getopt.h>
#ifdef WIN32
#include <fcntl.h>
#endif
#include "types.h"
#include <vector>
#include <list>
#include <map>
#include <string>
#include <algorithm>
#include "picasso/FileClass.h"
#include "picasso/maestro_opcodes.h"
#if !defined(WIN32) && !defined(stricmp)
#define stricmp strcasecmp
#endif
enum
{
COMP_X = 0,
COMP_Y,
COMP_Z,
COMP_W,
};
#define SWIZZLE_COMP(n,v) ((v) << (6-(n)*2))
#define OPSRC_MAKE(neg, sw) ((neg) | ((sw) << 1))
#define OPDESC_MAKE(out, src1, src2, src3) ((out) | ((src1) << 4) | ((src2) << (4+9)) | ((src3) << (4+9*2)))
#define FMT_OPCODE(n) ((n)<<26)
#define OUTPUT_MAKE(i, reg, mask) ((i) | ((reg)<<16) | ((u64)(mask)<<32))
#define DEFAULT_SWIZZLE (SWIZZLE_COMP(0,COMP_X) | SWIZZLE_COMP(1,COMP_Y) | SWIZZLE_COMP(2,COMP_Z) | SWIZZLE_COMP(3,COMP_W))
#define DEFAULT_OPSRC OPSRC_MAKE(0, DEFAULT_SWIZZLE)
#define OPDESC_MASK_D123 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0x1FF)
#define OPDESC_MASK_D12 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0)
#define OPDESC_MASK_D1 OPDESC_MAKE(0xF, 0x1FF, 0, 0)
#define OPDESC_MASK_1 OPDESC_MAKE(0, 0x1FF, 0, 0)
#define OPDESC_MASK_12 OPDESC_MAKE(0, 0x1FF, 0x1FF, 0)
enum
{
COND_EQ = 0,
COND_NE,
COND_LT,
COND_LE,
COND_GT,
COND_GE,
};
//-----------------------------------------------------------------------------
// Global data
//-----------------------------------------------------------------------------
// Output buffer
#define MAX_VSH_SIZE 512
typedef std::vector<u32> outputBufType;
typedef outputBufType::iterator outputBufIter;
extern outputBufType g_outputBuf;
enum
{
SE_PROC,
SE_FOR,
SE_IF,
SE_ARRAY,
};
struct StackEntry
{
int type;
size_t pos;
union
{
const char* strExtra;
size_t uExtra;
};
};
// Stack used to keep track of stuff.
#define MAX_STACK 32
extern StackEntry g_stack[MAX_STACK];
extern int g_stackPos;
// Operand descriptor stuff.
#define MAX_OPDESC 128
extern int g_opdescTable[MAX_OPDESC];
extern int g_opdeskMasks[MAX_OPDESC]; // used to keep track of used bits
extern int g_opdescCount;
enum
{
UTYPE_BOOL = 0,
UTYPE_IVEC,
UTYPE_FVEC,
};
struct Uniform
{
std::string name;
int pos, size;
int type;
inline bool operator <(const Uniform& rhs) const
{
return pos < rhs.pos;
}
void init(const char* name, int pos, int size, int type)
{
this->name = name;
this->pos = pos;
this->size = size;
this->type = type;
}
};
// List of uniforms
#define MAX_UNIFORM 0x60
extern Uniform g_uniformTable[MAX_UNIFORM];
extern int g_uniformCount;
struct DVLEData; // Forward declaration
typedef std::pair<size_t, size_t> procedure; // position, size
typedef std::pair<size_t, std::string> relocation; // position, name
typedef std::map<std::string, procedure> procTableType;
typedef std::map<std::string, size_t> labelTableType;
typedef std::map<std::string, int> aliasTableType;
typedef std::vector<relocation> relocTableType;
typedef std::list<DVLEData> dvleTableType;
typedef procTableType::iterator procTableIter;
typedef labelTableType::iterator labelTableIter;
typedef aliasTableType::iterator aliasTableIter;
typedef relocTableType::iterator relocTableIter;
typedef dvleTableType::iterator dvleTableIter;
extern procTableType g_procTable;
extern dvleTableType g_dvleTable;
extern relocTableType g_procRelocTable;
extern int g_totalDvleCount;
// The following are cleared before each file is processed
extern labelTableType g_labels;
extern relocTableType g_labelRelocTable;
extern aliasTableType g_aliases;
extern bool g_autoNop;
int AssembleString(char* str, const char* initialFilename);
int RelocateProduct(void);
//-----------------------------------------------------------------------------
// Local data
//-----------------------------------------------------------------------------
enum
{
OUTTYPE_POS = 0,
OUTTYPE_NQUAT = 1,
OUTTYPE_CLR = 2,
OUTTYPE_TCOORD0 = 3,
OUTTYPE_TCOORD0W = 4,
OUTTYPE_TCOORD1 = 5,
OUTTYPE_TCOORD2 = 6,
OUTTYPE_VIEW = 8,
OUTTYPE_DUMMY = 9,
};
enum
{
GSHTYPE_POINT = 0,
GSHTYPE_VARIABLE = 1,
GSHTYPE_FIXED = 2,
};
struct Constant
{
int regId;
int type;
union
{
float fparam[4];
u8 iparam[4];
bool bparam;
};
};
struct DVLEData
{
// General config
std::string filename;
std::string entrypoint;
size_t entryStart, entryEnd;
bool nodvle, isGeoShader, isCompatGeoShader, isMerge;
u16 inputMask, outputMask;
u8 geoShaderType;
u8 geoShaderFixedStart;
u8 geoShaderVariableNum;
u8 geoShaderFixedNum;
// Uniforms
Uniform uniformTable[MAX_UNIFORM];
int uniformCount;
size_t symbolSize;
// Constants
#define MAX_CONSTANT 0x60
Constant constantTable[MAX_CONSTANT];
int constantCount;
// Outputs
#define MAX_OUTPUT 16
u64 outputTable[MAX_OUTPUT];
u32 outputUsedReg;
int outputCount;
bool usesGshSpace() const { return isGeoShader && !isCompatGeoShader; }
int findFreeOutput() const
{
for (int i = 0; i < maxOutputReg(); i ++)
if (!(outputMask & BIT(i)))
return i;
return -1;
}
int findFreeInput() const
{
for (int i = 0; i < 16; i ++)
if (!(inputMask & BIT(i)))
return i;
return -1;
}
int maxOutputReg() const
{
return isGeoShader ? 0x07 : 0x10;
}
DVLEData(const char* filename) :
filename(filename), entrypoint("main"),
nodvle(false), isGeoShader(false), isCompatGeoShader(false), isMerge(false),
inputMask(0), outputMask(0), geoShaderType(0), geoShaderFixedStart(0), geoShaderVariableNum(0), geoShaderFixedNum(0),
uniformCount(0), symbolSize(0), constantCount(0), outputUsedReg(0), outputCount(0) { }
};

View File

@ -1,44 +1,43 @@
#pragma once
#include <stdio.h>
#include "picasso/types.h"
#include <sstream>
#include <string>
#include "types.h"
class FileClass
{
std::stringstream f;
FILE* f;
bool LittleEndian, own;
int filePos;
size_t _RawRead(void* buffer, size_t size)
{
f.read((char*)buffer, size);
filePos += size;
return size;
size_t x = fread(buffer, 1, size, f);
filePos += x;
return x;
}
size_t _RawWrite(const void* buffer, size_t size)
{
f.write((const char*)buffer, size);
filePos += size;
return size;
size_t x = fwrite(buffer, 1, size, f);
filePos += x;
return x;
}
public:
FileClass(const char* file, const char* mode) : LittleEndian(true), own(true), filePos(0)
{
//Do nothing
f = fopen(file, mode);
}
FileClass(FILE* inf) : f(inf), LittleEndian(true), own(false), filePos(0) { }
~FileClass()
{
//Do nothing
if (f && own) fclose(f);
}
void SetLittleEndian() { LittleEndian = true; }
void SetBigEndian() { LittleEndian = false; }
std::stringstream* get_ptr() { return &f; }
bool openerror() { return false; }
FILE* get_ptr() { return f; }
bool openerror() { return f == NULL; }
dword_t ReadDword()
{
@ -104,11 +103,13 @@ public:
t.f = value;
WriteWord(t.w);
}
bool ReadRaw(void* buffer, size_t size) { return _RawRead(buffer, size) == size; }
bool WriteRaw(const void* buffer, size_t size) { return _RawWrite(buffer, size) == size; }
void Seek(int pos, int mode) { fseek(f, pos, mode); }
int Tell() { return filePos /*ftell(f)*/; }
void Flush() { fflush(f); }
};
static inline char* StringFromFile(const char* filename)
@ -128,4 +129,4 @@ static inline char* StringFromFile(const char* filename)
buf[size] = 0;
fclose(f);
return buf;
}
}

View File

@ -5,10 +5,10 @@ enum
MAESTRO_DP3,
MAESTRO_DP4,
MAESTRO_DPH,
MAESTRO_DST,
MAESTRO_unk4,
MAESTRO_EX2,
MAESTRO_LG2,
MAESTRO_LITP,
MAESTRO_unk7,
MAESTRO_MUL,
MAESTRO_SGE,
MAESTRO_SLT,
@ -27,7 +27,7 @@ enum
MAESTRO_unk16,
MAESTRO_unk17,
MAESTRO_DPHI,
MAESTRO_DSTI,
MAESTRO_unk19,
MAESTRO_SGEI,
MAESTRO_SLTI,
MAESTRO_unk1C,
@ -35,7 +35,7 @@ enum
MAESTRO_unk1E,
MAESTRO_unk1F,
MAESTRO_BREAK,
MAESTRO_unk20,
MAESTRO_NOP,
MAESTRO_END,
MAESTRO_BREAKC,
@ -54,4 +54,4 @@ enum
// Only the upper 3 bits are used for the following opcodes
MAESTRO_MADI = 0x30,
MAESTRO_MAD = 0x38,
};
};

170
source/picasso.h Normal file
View File

@ -0,0 +1,170 @@
#pragma once
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#ifdef WIN32
#include <fcntl.h>
#endif
#include "types.h"
#include <vector>
#include <list>
#include <map>
#include <string>
#include <algorithm>
#include "FileClass.h"
#include "maestro_opcodes.h"
#if !defined(WIN32) && !defined(stricmp)
#define stricmp strcasecmp
#endif
enum
{
COMP_X = 0,
COMP_Y,
COMP_Z,
COMP_W,
};
#define SWIZZLE_COMP(n,v) ((v) << (6-(n)*2))
#define OPSRC_MAKE(neg, sw) ((neg) | ((sw) << 1))
#define OPDESC_MAKE(out, src1, src2, src3) ((out) | ((src1) << 4) | ((src2) << (4+9)) | ((src3) << (4+9*2)))
#define FMT_OPCODE(n) ((n)<<26)
#define OUTPUT_MAKE(i, reg, mask) ((i) | ((reg)<<16) | ((u64)(mask)<<32))
#define DEFAULT_SWIZZLE (SWIZZLE_COMP(0,COMP_X) | SWIZZLE_COMP(1,COMP_Y) | SWIZZLE_COMP(2,COMP_Z) | SWIZZLE_COMP(3,COMP_W))
#define DEFAULT_OPSRC OPSRC_MAKE(0, DEFAULT_SWIZZLE)
#define OPDESC_MASK_D123 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0x1FF)
#define OPDESC_MASK_D12 OPDESC_MAKE(0xF, 0x1FF, 0x1FF, 0)
#define OPDESC_MASK_D1 OPDESC_MAKE(0xF, 0x1FF, 0, 0)
#define OPDESC_MASK_1 OPDESC_MAKE(0, 0x1FF, 0, 0)
#define OPDESC_MASK_12 OPDESC_MAKE(0, 0x1FF, 0x1FF, 0)
enum
{
COND_EQ = 0,
COND_NE,
COND_LT,
COND_LE,
COND_GT,
COND_GE,
COND_UNK1,
COND_UNK2,
};
typedef std::vector<u32> outputBufType;
typedef outputBufType::iterator outputBufIter;
extern bool g_isGeoShader;
extern outputBufType g_outputBuf;
enum
{
SE_PROC,
SE_FOR,
SE_IF,
};
struct StackEntry
{
int type;
size_t pos;
union
{
const char* strExtra;
size_t uExtra;
};
};
// Stack used to keep track of stuff.
#define MAX_STACK 32
extern StackEntry g_stack[MAX_STACK];
extern int g_stackPos;
#define MAX_OPDESC 128
extern int g_opdescTable[MAX_OPDESC];
extern int g_opdeskMasks[MAX_OPDESC]; // used to keep track of used bits
extern int g_opdescCount;
enum
{
UTYPE_BOOL = 0,
UTYPE_IVEC,
UTYPE_FVEC,
};
struct Uniform
{
std::string name;
int pos, size;
int type;
};
#define MAX_UNIFORM 0x60
extern Uniform g_uniformTable[MAX_UNIFORM];
extern int g_uniformCount;
enum
{
OUTTYPE_POS = 0,
OUTTYPE_NQUAT,
OUTTYPE_CLR,
OUTTYPE_TCOORD0,
OUTTYPE_TCOORD0W,
OUTTYPE_TCOORD1,
OUTTYPE_TCOORD2,
OUTTYPE_7,
OUTTYPE_VIEW,
};
#define MAX_OUTPUT 8
extern u64 g_outputTable[MAX_OUTPUT];
extern int g_outputCount;
struct Constant
{
int regId;
int type;
union
{
float fparam[4];
u8 iparam[4];
};
};
#define MAX_CONSTANT 0x60
extern Constant g_constantTable[MAX_CONSTANT];
extern int g_constantCount;
extern size_t g_constantSize;
struct Relocation
{
size_t instPos;
const char* target;
bool isProc;
};
typedef std::pair<size_t, size_t> procedure; // position, size
typedef std::map<std::string, procedure> procTableType;
typedef std::map<std::string, size_t> labelTableType;
typedef std::map<std::string, int> aliasTableType;
typedef std::vector<Relocation> relocTableType;
typedef procTableType::iterator procTableIter;
typedef labelTableType::iterator labelTableIter;
typedef aliasTableType::iterator aliasTableIter;
typedef relocTableType::iterator relocTableIter;
extern procTableType g_procTable;
extern labelTableType g_labels;
extern aliasTableType g_aliases;
extern relocTableType g_relocs;
int AssembleString(char* str, const char* initialFilename);

File diff suppressed because it is too large Load Diff

218
source/picasso_frontend.cpp Normal file
View File

@ -0,0 +1,218 @@
#include "picasso.h"
// !! Taken from ctrulib !!
u32 f32tof24(float vf)
{
if (!vf) return 0;
union { float f; u32 v; } q;
q.f=vf;
u8 s = q.v>>31;
u32 exp = ((q.v>>23) & 0xFF) - 0x40;
u32 man = (q.v>>7) & 0xFFFF;
if (exp >= 0)
return man | (exp<<16) | (s<<23);
else
return s<<23;
}
#ifdef WIN32
static inline void FixMinGWPath(char* buf)
{
if (buf && *buf == '/')
{
buf[0] = buf[1];
buf[1] = ':';
}
}
#endif
int usage(const char* prog)
{
fprintf(stderr,
"Usage:\n\n"
"%s shbinFile vshFile [hFile]\n", prog);
return 0;
}
int main(int argc, char* argv[])
{
if (argc < 3 || argc > 4)
return usage(argv[0]);
char* shbinFile = argv[1];
char* vshFile = argv[2];
char* hFile = argc > 3 ? argv[3] : NULL;
#ifdef WIN32
FixMinGWPath(shbinFile);
FixMinGWPath(vshFile);
FixMinGWPath(hFile);
#endif
char* sourceCode = StringFromFile(vshFile);
if (!sourceCode)
{
fprintf(stderr, "Cannot open input file!\n");
return 1;
}
int rc = AssembleString(sourceCode, vshFile);
free(sourceCode);
if (rc != 0)
return rc;
procTableIter mainIt = g_procTable.find("main");
if (mainIt == g_procTable.end())
{
fprintf(stderr, "Error: main proc not defined\n");
return 1;
}
FileClass f(shbinFile, "wb");
if (f.openerror())
{
fprintf(stderr, "Can't open output file!");
return 1;
}
f.WriteWord(0x424C5644); // DVLB
f.WriteWord(1); // 1 DVLE
f.WriteWord(3*4 + 0x28); // offset to DVLE
u32 dvlpStart = f.Tell();
u32 shaderSize = g_outputBuf.size();
u32 paramStart = 0x28 + 0x40;
f.WriteWord(0x504C5644); // DVLP
f.WriteWord(0); // version
f.WriteWord(paramStart); // offset to shader binary blob
f.WriteWord(shaderSize); // size of shader binary blob
paramStart += shaderSize*4;
f.WriteWord(paramStart); // offset to opdesc table
f.WriteWord(g_opdescCount); // number of opdescs
paramStart += g_opdescCount*8;
f.WriteWord(paramStart); // offset to symtable (TODO)
f.WriteWord(0); // ????
f.WriteWord(0); // ????
f.WriteWord(0); // ????
u32 dvleStart = f.Tell();
paramStart -= dvleStart - dvlpStart;
f.WriteWord(0x454C5644); // DVLE
f.WriteHword(0); // padding?
f.WriteHword(g_isGeoShader ? 1 : 0); // Shader type
f.WriteWord(mainIt->second.first); // offset to main
f.WriteWord(mainIt->second.first+mainIt->second.second); // offset to end of main
f.WriteWord(0); // ???
f.WriteWord(0); // ???
f.WriteWord(paramStart); // offset to constant table
f.WriteWord(g_constantCount); // size of constant table
paramStart += g_constantSize;
f.WriteWord(paramStart); // offset to label table (TODO)
f.WriteWord(0); // size of label table (TODO)
f.WriteWord(paramStart); // offset to output table
f.WriteWord(g_outputCount); // size of output table
paramStart += g_outputCount*8;
f.WriteWord(paramStart); // offset to uniform table
f.WriteWord(g_uniformCount); // size of uniform table
paramStart += g_uniformCount*8;
f.WriteWord(paramStart); // offset to symbol table
u32 temp = f.Tell();
f.WriteWord(0); // size of symbol table
// Write program
//for (u32 p : g_outputBuf)
for (outputBufIter it = g_outputBuf.begin(); it != g_outputBuf.end(); ++it)
f.WriteWord(*it);
// Write opdescs
for (int i = 0; i < g_opdescCount; i ++)
f.WriteDword(g_opdescTable[i]);
// Write constants
for (int i = 0; i < g_constantCount; i ++)
{
Constant& ct = g_constantTable[i];
f.WriteHword(ct.type);
if (ct.type == UTYPE_FVEC)
{
f.WriteHword(ct.regId-0x20);
for (int j = 0; j < 4; j ++)
f.WriteWord(f32tof24(ct.fparam[j]));
} else if (ct.type == UTYPE_IVEC)
{
f.WriteHword(ct.regId-0x80);
for (int j = 0; j < 4; j ++)
f.WriteByte(ct.iparam[j]);
}
}
// Write outputs
for (int i = 0; i < g_outputCount; i ++)
f.WriteDword(g_outputTable[i]);
// Write uniforms
size_t sp = 0;
for (int i = 0; i < g_uniformCount; i ++)
{
Uniform& u = g_uniformTable[i];
size_t l = u.name.length()+1;
f.WriteWord(sp); sp += l;
f.WriteHword(u.pos-0x10);
f.WriteHword(u.pos+u.size-1-0x10);
}
// Write size of symbol table
u32 temp2 = f.Tell();
f.Seek(temp, SEEK_SET);
f.WriteWord(sp);
f.Seek(temp2, SEEK_SET);
// Write symbols
for (int i = 0; i < g_uniformCount; i ++)
{
std::string& u = g_uniformTable[i].name;
size_t l = u.length()+1;
f.WriteRaw(u.c_str(), l);
}
if (hFile)
{
FILE* f2 = fopen(hFile, "w");
if (!f2)
{
fprintf(stderr, "Can't open header file!\n");
return 1;
}
fprintf(f2, "// Generated by picasso\n");
fprintf(f2, "#pragma once\n");
const char* prefix = g_isGeoShader ? "GSH" : "VSH";
for (int i = 0; i < g_uniformCount; i ++)
{
Uniform& u = g_uniformTable[i];
const char* name = u.name.c_str();
if (u.type == UTYPE_FVEC)
fprintf(f2, "#define %s_FVEC_%s 0x%02X\n", prefix, name, u.pos-0x20);
else if (u.type == UTYPE_IVEC)
fprintf(f2, "#define %s_IVEC_%s 0x%02X\n", prefix, name, u.pos-0x80);
else if (u.type == UTYPE_BOOL)
{
if (u.size == 1)
fprintf(f2, "#define %s_FLAG_%s BIT(%d)\n", prefix, name, u.pos-0x88);
else
fprintf(f2, "#define %s_FLAG_%s(_n) BIT(%d+(_n))\n", prefix, name, u.pos-0x88);
}
fprintf(f2, "#define %s_ULEN_%s %d\n", prefix, name, u.size);
}
fclose(f2);
}
return 0;
}

View File

@ -1,308 +0,0 @@
#include "picasso.h"
// f24 has:
// - 1 sign bit
// - 7 exponent bits
// - 16 mantissa bits
uint32_t f32tof24(float f)
{
uint32_t i;
memcpy(&i, &f, sizeof(f));
uint32_t mantissa = (i << 9) >> 9;
int32_t exponent = (i << 1) >> 24;
uint32_t sign = (i << 0) >> 31;
// Truncate mantissa
mantissa >>= 7;
// Re-bias exponent
exponent = exponent - 127 + 63;
if (exponent < 0)
{
// Underflow: flush to zero
return sign << 23;
}
else if (exponent > 0x7F)
{
// Overflow: saturate to infinity
return (sign << 23) | (0x7F << 16);
}
return (sign << 23) | (exponent << 16) | mantissa;
}
#ifdef WIN32
static inline void FixMinGWPath(char* buf)
{
if (buf && *buf == '/')
{
buf[0] = buf[1];
buf[1] = ':';
}
}
#endif
int usage(const char* prog)
{
fprintf(stderr,
"Usage: %s [options] files...\n"
"Options:\n"
" -o, --out=<file> Specifies the name of the SHBIN file to generate\n"
" -h, --header=<file> Specifies the name of the header file to generate\n"
" -n, --no-nop Disables the automatic insertion of padding NOPs\n"
" -v, --version Displays version information\n"
, prog);
return EXIT_FAILURE;
}
int main(int argc, char* argv[])
{
char *shbinFile = NULL, *hFile = NULL;
static struct option long_options[] =
{
{ "out", required_argument, NULL, 'o' },
{ "header", required_argument, NULL, 'h' },
{ "help", no_argument, NULL, '?' },
{ "no-nop", no_argument, NULL, 'n' },
{ "version",no_argument, NULL, 'v' },
{ NULL, 0, NULL, 0 }
};
int opt, optidx = 0;
while ((opt = getopt_long(argc, argv, "o:h:?nv", long_options, &optidx)) != -1)
{
switch (opt)
{
case 'o': shbinFile = optarg; break;
case 'h': hFile = optarg; break;
case '?': usage(argv[0]); return EXIT_SUCCESS;
case 'n': g_autoNop = false; break;
case 'v': printf("%s - Built on %s %s\n", PACKAGE_STRING, __DATE__, __TIME__); return EXIT_SUCCESS;
default: return usage(argv[0]);
}
}
#ifdef WIN32
FixMinGWPath(shbinFile);
FixMinGWPath(hFile);
#endif
if (optind == argc)
{
fprintf(stderr, "%s: no input files are specified\n", argv[0]);
return usage(argv[0]);
}
if (!shbinFile)
{
fprintf(stderr, "%s: no output file is specified\n", argv[0]);
return usage(argv[0]);
}
int rc = 0;
for (int i = optind; i < argc; i ++)
{
char* vshFile = argv[i];
#ifdef WIN32
FixMinGWPath(vshFile);
#endif
char* sourceCode = StringFromFile(vshFile);
if (!sourceCode)
{
fprintf(stderr, "error: cannot open input file: %s\n", vshFile);
return EXIT_FAILURE;
}
rc = AssembleString(sourceCode, vshFile);
free(sourceCode);
if (rc != 0)
return EXIT_FAILURE;
}
rc = RelocateProduct();
if (rc != 0)
return EXIT_FAILURE;
FileClass f(shbinFile, "wb");
if (f.openerror())
{
fprintf(stderr, "Can't open output file!");
return EXIT_FAILURE;
}
u32 progSize = g_outputBuf.size();
u32 dvlpSize = 10*4 + progSize*4 + g_opdescCount*8;
// Write DVLB header
f.WriteWord(0x424C5644); // DVLB
f.WriteWord(g_totalDvleCount); // Number of DVLEs
// Calculate and write DVLE offsets
u32 curOff = 2*4 + g_totalDvleCount*4 + dvlpSize;
for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end(); ++dvle)
{
if (dvle->nodvle) continue;
f.WriteWord(curOff);
curOff += 16*4; // Header
curOff += dvle->constantCount*20;
curOff += dvle->outputCount*8;
curOff += dvle->uniformCount*8;
curOff += dvle->symbolSize;
curOff = (curOff + 3) &~ 3; // Word alignment
}
// Write DVLP header
f.WriteWord(0x504C5644); // DVLP
f.WriteWord(0); // version
f.WriteWord(10*4); // offset to shader binary blob
f.WriteWord(progSize); // size of shader binary blob
f.WriteWord(10*4 + progSize*4); // offset to opdesc table
f.WriteWord(g_opdescCount); // number of opdescs
f.WriteWord(dvlpSize); // offset to symtable (TODO)
f.WriteWord(0); // ????
f.WriteWord(0); // ????
f.WriteWord(0); // ????
// Write program
for (outputBufIter it = g_outputBuf.begin(); it != g_outputBuf.end(); ++it)
f.WriteWord(*it);
// Write opdescs
for (int i = 0; i < g_opdescCount; i ++)
f.WriteDword(g_opdescTable[i]);
// Write DVLEs
for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end(); ++dvle)
{
if (dvle->nodvle) continue;
curOff = 16*4;
f.WriteWord(0x454C5644); // DVLE
f.WriteHword(0x1002); // maybe version?
f.WriteByte(dvle->isGeoShader ? 1 : 0); // Shader type
f.WriteByte(dvle->isMerge ? 1 : 0);
f.WriteWord(dvle->entryStart); // offset to main
f.WriteWord(dvle->entryEnd); // offset to end of main
f.WriteHword(dvle->inputMask);
f.WriteHword(dvle->outputMask);
f.WriteByte(dvle->geoShaderType);
f.WriteByte(dvle->geoShaderFixedStart);
f.WriteByte(dvle->geoShaderVariableNum);
f.WriteByte(dvle->geoShaderFixedNum);
f.WriteWord(curOff); // offset to constant table
f.WriteWord(dvle->constantCount); // size of constant table
curOff += dvle->constantCount*5*4;
f.WriteWord(curOff); // offset to label table (TODO)
f.WriteWord(0); // size of label table (TODO)
f.WriteWord(curOff); // offset to output table
f.WriteWord(dvle->outputCount); // size of output table
curOff += dvle->outputCount*8;
f.WriteWord(curOff); // offset to uniform table
f.WriteWord(dvle->uniformCount); // size of uniform table
curOff += dvle->uniformCount*8;
f.WriteWord(curOff); // offset to symbol table
f.WriteWord(dvle->symbolSize); // size of symbol table
// Sort uniforms by position
std::sort(dvle->uniformTable, dvle->uniformTable + dvle->uniformCount);
// Write constants
for (int i = 0; i < dvle->constantCount; i ++)
{
Constant& ct = dvle->constantTable[i];
f.WriteHword(ct.type);
if (ct.type == UTYPE_FVEC)
{
f.WriteHword(ct.regId-0x20);
for (int j = 0; j < 4; j ++)
f.WriteWord(f32tof24(ct.fparam[j]));
} else if (ct.type == UTYPE_IVEC)
{
f.WriteHword(ct.regId-0x80);
for (int j = 0; j < 4; j ++)
f.WriteByte(ct.iparam[j]);
} else if (ct.type == UTYPE_BOOL)
{
f.WriteHword(ct.regId-0x88);
f.WriteWord(ct.bparam ? 1 : 0);
}
if (ct.type != UTYPE_FVEC)
for (int j = 0; j < 3; j ++)
f.WriteWord(0); // Padding
}
// Write outputs
for (int i = 0; i < dvle->outputCount; i ++)
f.WriteDword(dvle->outputTable[i]);
// Write uniforms
size_t sp = 0;
for (int i = 0; i < dvle->uniformCount; i ++)
{
Uniform& u = dvle->uniformTable[i];
size_t l = u.name.length()+1;
f.WriteWord(sp); sp += l;
int pos = u.pos;
if (pos >= 0x20)
pos -= 0x10;
f.WriteHword(pos);
f.WriteHword(pos+u.size-1);
}
// Write symbols
for (int i = 0; i < dvle->uniformCount; i ++)
{
std::string u(dvle->uniformTable[i].name);
std::replace(u.begin(), u.end(), '$', '.');
size_t l = u.length()+1;
f.WriteRaw(u.c_str(), l);
}
// Word alignment
int pos = f.Tell();
int pad = ((pos+3)&~3)-pos;
for (int i = 0; i < pad; i ++)
f.WriteByte(0);
}
if (hFile)
{
FILE* f2 = fopen(hFile, "w");
if (!f2)
{
fprintf(stderr, "Can't open header file!\n");
return 1;
}
fprintf(f2, "// Generated by picasso\n");
fprintf(f2, "#pragma once\n");
const char* prefix = g_dvleTable.front().isGeoShader ? "GSH" : "VSH"; // WARNING: HORRIBLE HACK - PLEASE FIX!!!!!!!
for (int i = 0; i < g_uniformCount; i ++)
{
Uniform& u = g_uniformTable[i];
const char* name = u.name.c_str();
if (*name == '_') continue; // Hidden uniform
if (u.type == UTYPE_FVEC)
fprintf(f2, "#define %s_FVEC_%s 0x%02X\n", prefix, name, u.pos-0x20);
else if (u.type == UTYPE_IVEC)
fprintf(f2, "#define %s_IVEC_%s 0x%02X\n", prefix, name, u.pos-0x80);
else if (u.type == UTYPE_BOOL)
{
if (u.size == 1)
fprintf(f2, "#define %s_FLAG_%s BIT(%d)\n", prefix, name, u.pos-0x88);
else
fprintf(f2, "#define %s_FLAG_%s(_n) BIT(%d+(_n))\n", prefix, name, u.pos-0x88);
}
fprintf(f2, "#define %s_ULEN_%s %d\n", prefix, name, u.size);
}
fclose(f2);
}
return EXIT_SUCCESS;
}

View File

@ -1,199 +0,0 @@
#include <pica.hpp>
#include <picasso/picasso.h>
// f24 has:
// - 1 sign bit
// - 7 exponent bits
// - 16 mantissa bits
uint32_t f32tof24(float f) {
uint32_t i;
memcpy(&i, &f, sizeof(f));
uint32_t mantissa = (i << 9) >> 9;
int32_t exponent = (i << 1) >> 24;
uint32_t sign = (i << 0) >> 31;
// Truncate mantissa
mantissa >>= 7;
// Re-bias exponent
exponent = exponent - 127 + 63;
if (exponent < 0) {
// Underflow: flush to zero
return sign << 23;
} else if (exponent > 0x7F) {
// Overflow: saturate to infinity
return (sign << 23) | (0x7F << 16);
}
return (sign << 23) | (exponent << 16) | mantissa;
}
void BasicHandler(const char *top, const char *message) {
std::cout << top << std::endl << message << std::endl;
}
static void (*EHND)(const char *top, const char *message) = BasicHandler;
namespace Pica {
void InstallErrorCallback(void (*ErrorHandler)(const char *top,
const char *message)) {
EHND = ErrorHandler;
}
char *AssembleCode(const char *vertex, int &res_size) {
int rc = 0;
rc = AssembleString((char *)vertex, "llc_npi");
if (rc) {
EHND("Error when Assembling Code", vertex);
}
rc = RelocateProduct();
if (rc) {
EHND("Error when Relocating Product", "0");
}
FileClass f("Dont Care", "wb");
u32 progSize = g_outputBuf.size();
u32 dvlpSize = 10 * 4 + progSize * 4 + g_opdescCount * 8;
// Write DVLB header
f.WriteWord(0x424C5644); // DVLB
f.WriteWord(g_totalDvleCount); // Number of DVLEs
// Calculate and write DVLE offsets
u32 curOff = 2 * 4 + g_totalDvleCount * 4 + dvlpSize;
for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end();
++dvle) {
if (dvle->nodvle)
continue;
f.WriteWord(curOff);
curOff += 16 * 4; // Header
curOff += dvle->constantCount * 20;
curOff += dvle->outputCount * 8;
curOff += dvle->uniformCount * 8;
curOff += dvle->symbolSize;
curOff = (curOff + 3) & ~3; // Word alignment
}
// Write DVLP header
f.WriteWord(0x504C5644); // DVLP
f.WriteWord(0); // version
f.WriteWord(10 * 4); // offset to shader binary blob
f.WriteWord(progSize); // size of shader binary blob
f.WriteWord(10 * 4 + progSize * 4); // offset to opdesc table
f.WriteWord(g_opdescCount); // number of opdescs
f.WriteWord(dvlpSize); // offset to symtable (TODO)
f.WriteWord(0); // ????
f.WriteWord(0); // ????
f.WriteWord(0); // ????
// Write program
for (outputBufIter it = g_outputBuf.begin(); it != g_outputBuf.end(); ++it)
f.WriteWord(*it);
// Write opdescs
for (int i = 0; i < g_opdescCount; i++)
f.WriteDword(g_opdescTable[i]);
// Write DVLEs
for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end();
++dvle) {
if (dvle->nodvle)
continue;
curOff = 16 * 4;
f.WriteWord(0x454C5644); // DVLE
f.WriteHword(0x1002); // maybe version?
f.WriteByte(dvle->isGeoShader ? 1 : 0); // Shader type
f.WriteByte(dvle->isMerge ? 1 : 0);
f.WriteWord(dvle->entryStart); // offset to main
f.WriteWord(dvle->entryEnd); // offset to end of main
f.WriteHword(dvle->inputMask);
f.WriteHword(dvle->outputMask);
f.WriteByte(dvle->geoShaderType);
f.WriteByte(dvle->geoShaderFixedStart);
f.WriteByte(dvle->geoShaderVariableNum);
f.WriteByte(dvle->geoShaderFixedNum);
f.WriteWord(curOff); // offset to constant table
f.WriteWord(dvle->constantCount); // size of constant table
curOff += dvle->constantCount * 5 * 4;
f.WriteWord(curOff); // offset to label table (TODO)
f.WriteWord(0); // size of label table (TODO)
f.WriteWord(curOff); // offset to output table
f.WriteWord(dvle->outputCount); // size of output table
curOff += dvle->outputCount * 8;
f.WriteWord(curOff); // offset to uniform table
f.WriteWord(dvle->uniformCount); // size of uniform table
curOff += dvle->uniformCount * 8;
f.WriteWord(curOff); // offset to symbol table
f.WriteWord(dvle->symbolSize); // size of symbol table
// Sort uniforms by position
std::sort(dvle->uniformTable, dvle->uniformTable + dvle->uniformCount);
// Write constants
for (int i = 0; i < dvle->constantCount; i++) {
Constant &ct = dvle->constantTable[i];
f.WriteHword(ct.type);
if (ct.type == UTYPE_FVEC) {
f.WriteHword(ct.regId - 0x20);
for (int j = 0; j < 4; j++)
f.WriteWord(f32tof24(ct.fparam[j]));
} else if (ct.type == UTYPE_IVEC) {
f.WriteHword(ct.regId - 0x80);
for (int j = 0; j < 4; j++)
f.WriteByte(ct.iparam[j]);
} else if (ct.type == UTYPE_BOOL) {
f.WriteHword(ct.regId - 0x88);
f.WriteWord(ct.bparam ? 1 : 0);
}
if (ct.type != UTYPE_FVEC)
for (int j = 0; j < 3; j++)
f.WriteWord(0); // Padding
}
// Write outputs
for (int i = 0; i < dvle->outputCount; i++)
f.WriteDword(dvle->outputTable[i]);
// Write uniforms
size_t sp = 0;
for (int i = 0; i < dvle->uniformCount; i++) {
Uniform &u = dvle->uniformTable[i];
size_t l = u.name.length() + 1;
f.WriteWord(sp);
sp += l;
int pos = u.pos;
if (pos >= 0x20)
pos -= 0x10;
f.WriteHword(pos);
f.WriteHword(pos + u.size - 1);
}
// Write symbols
for (int i = 0; i < dvle->uniformCount; i++) {
std::string u(dvle->uniformTable[i].name);
std::replace(u.begin(), u.end(), '$', '.');
size_t l = u.length() + 1;
f.WriteRaw(u.c_str(), l);
}
// Word alignment
int pos = f.Tell();
int pad = ((pos + 3) & ~3) - pos;
for (int i = 0; i < pad; i++)
f.WriteByte(0);
}
res_size = f.Tell();
return (char *)f.get_ptr()->str().c_str();
}
char *AssembleFile(const char *file, int &res_size) {
char *sourceCode = StringFromFile(file);
if (!sourceCode) {
EHND("error:", "cannot open input file!\n");
}
return AssembleCode(sourceCode, res_size);
}
} // namespace Pica

View File

@ -16,6 +16,7 @@ typedef uint8_t u8;
#define BIT(n) (1U << (n))
#if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)
#ifndef __BYTE_ORDER__
#include <sys/param.h>
#define __BYTE_ORDER__ BYTE_ORDER
@ -23,15 +24,11 @@ typedef uint8_t u8;
#define __ORDER_BIG_ENDIAN__ BIG_ENDIAN
#endif
#ifndef __llvm__
#if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
static inline uint16_t __builtin_bswap16(uint16_t x)
{
return ((x << 8) & 0xff00) | ((x >> 8) & 0x00ff);
}
#if defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
static inline uint32_t __builtin_bswap32(uint32_t x)
{
return ((x << 24) & 0xff000000) |
@ -46,8 +43,6 @@ static inline uint64_t __builtin_bswap64(uint64_t x)
((uint64_t)__builtin_bswap32(x&0xFFFFFFFF) << 32);
}
#endif
#endif
#endif
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define be_dword(a) __builtin_bswap64(a)
@ -65,4 +60,4 @@ static inline uint64_t __builtin_bswap64(uint64_t x)
#define le_hword(a) __builtin_bswap16(a)
#else
#error "What's the endianness of the platform you're targeting?"
#endif
#endif