Compare commits

...

60 Commits
v2.0 ... master

Author SHA1 Message Date
98c8639bd5
Merge branch 'devkitPro:master' into master 2024-10-12 18:29:36 +02:00
oreo639
d522455ea5 Print warning when attempting to use swizzling in outmask 2023-10-11 21:32:43 +02:00
4019766d8d Update for better comp 2022-12-25 11:30:26 +01:00
6a6c3ed1cf Fix a Big Mistake 2022-12-25 02:56:46 +01:00
=
b614217a82 File Support 2022-12-23 05:05:50 +01:00
=
6c76004761 DONT USE THIS LIBRARY IF YOU DON'T KNOW HOW TO USE 2022-12-23 04:59:41 +01:00
fincs
82cf7d95fe v2.7.1 2022-09-16 15:53:34 +00:00
Tillmann Karras
03193c0838 Remove unnecessary function call 2022-09-14 17:42:15 +02:00
Tillmann Karras
baf8622281 Update repo address 2022-09-14 17:42:15 +02:00
Tillmann Karras
5219cd6501 Fix segfault on empty .proc (#25) 2022-09-14 17:42:15 +02:00
Tillmann Karras
9e161a5fc1 Fix segfault on invalid input register (#25) 2022-09-14 17:42:15 +02:00
Tillmann Karras
6e0063ea00 Fix missed error on invalid index register (#24) 2022-09-14 17:42:15 +02:00
Dave Murphy
5cf6319a05 use subdir-objects 2018-05-11 13:44:28 +02:00
Dave Murphy
40e262ac32 update bug report url 2018-05-11 13:44:28 +02:00
fincs
45fa4d0fd6 v2.7 2018-02-11 20:20:33 +01:00
Lioncash
bbfbf4c6fd picasso: Mark member functions of DVLEData as const
These only query state about the data, they don't modify it
2017-12-29 01:29:01 +01:00
Lioncash
fbc3f381f3 picasso_assembler: Silence a -Wmissing-prototypes warning
This is only used in this translation unit.
2017-12-29 01:28:50 +01:00
fincs
e74836bffc Fix compilation error after d1a1c0a0 2017-12-28 13:31:39 +01:00
Lioncash
d1a1c0a011 picasso_frontend: Get rid of undefined behavior
Type-punning via a union is well-defined in C (specifically C99 and onwards), but not C++
2017-12-28 13:29:28 +01:00
Lioncash
299c35d6fc picasso_assembler: Silence a -Wlogical-op-parenthesis warning 2017-12-28 03:06:58 +01:00
Lioncash
708ce762e0 picasso_assembler: Correct ending square-bracket check in parseReg
Signed-off-by: Lioncash <mathew1800@gmail.com>
2017-12-28 02:27:09 +01:00
fincs
97814558db lcnt -> aL 2017-12-27 18:43:54 +01:00
fincs
625b28e36a Rename a0/a1 index regs to a0.x/a0.y to match D3D naming convention 2017-12-27 18:06:10 +01:00
fincs
9aac60a683 Index regs can only be used with uniform regs (discovered by @Tilka) 2017-12-27 14:44:20 +01:00
fincs
d19c7b7cd0 Minor simplification 2017-12-27 14:38:59 +01:00
Tillmann Karras
eeda288b29 Add break
Don't use this instruction unless you know what you're doing. It does
*not* touch the if/call stacks, so the following code will behave in an
unexpected way (and cannot be fixed by inserting nops):

```
ifu true
    for ...
        ifu true
            break
        .end
    .end
.else
    ; will be executed since the inner if is still on the stack
.end
```

breakc has the same problem but does not require a separate condition.
2017-12-17 18:23:00 +01:00
Tillmann Karras
89deb50e23 Add litp
This instruction prepares a vector for vs_3_0-style lighting by clamping
to certain bounds.
2017-12-17 17:22:54 +01:00
Tillmann Karras
0629de23e0 Add dst/dsti 2017-12-17 17:22:54 +01:00
Tillmann Karras
6dd682dbd5 Fix off-by-one error in max shader size check 2017-12-17 17:04:51 +01:00
Tillmann Karras
11f2f1e521 Fix typos in manual 2017-12-17 17:04:51 +01:00
fincs
067d0985ce Manual: fix blatantly wrong output color description 2017-08-16 17:33:07 +02:00
fincs
d72bf7585a v2.6.2 2017-06-10 16:02:00 +02:00
fincs
4464084525 Fix some compilation warnings/errors 2017-06-10 13:36:30 +02:00
fincs
644ff23e11 v2.6.1 2017-03-18 20:40:41 +01:00
fincs
346f761609 Fix mad opdesc allocation errors by swapping out lower opdescs 2017-03-18 20:40:29 +01:00
fincs
caf06d4896 v2.6 2017-03-18 00:48:41 +01:00
fincs
9721aac006 Add .in directive for explicit input regs in DVLE uniform table 2017-03-18 00:47:02 +01:00
fincs
0d03822d0a Support (dummy) o7-o15 output registers in vertex shaders 2017-03-18 00:46:37 +01:00
fincs
4c7129925d Support '$' in identifier names (translated to '.' in DVLE) 2017-03-18 00:45:58 +01:00
fincs
17f18aa18d v2.5 2016-07-20 17:21:01 +02:00
fincs
73e8119a43 Overhauled geoshader/outmap support, see details:
- GSH DVLEs now have each an independent uniform space.
- .gsh directive now accepts parameters to configure GSH mode.
- .out supports explicit outreg, outmasks are also now fixed.
- Added dummy attribute, removed 7 attribute.
- Back-compat mode is used for old code.
2016-07-20 17:20:48 +02:00
fincs
05d9e79095 Add auto-NOP insertion to work around flow-of-control PICA errata 2016-07-20 17:16:12 +02:00
fincs
d7a3af7c52 Add rgba/stpq component names 2016-07-20 17:13:53 +02:00
fincs
ed10f00333 Add error message for invalid input register usage (e.g. add r0,v1,v2) 2016-07-20 17:13:26 +02:00
fincs
d4714a5441 Optimize opdesc allocation to take into account unused operands 2016-07-20 17:11:59 +02:00
fincs
3e44d48f38 Correct MAD instruction encoding yet again & other miscellaneous fixes 2016-07-20 17:10:18 +02:00
fincs
bc051ca6c9 v2.4 2016-03-26 14:03:11 +01:00
fincs
08e77dad03 Add command line flag for retrieving the picasso version (fix #11) 2016-03-26 14:03:02 +01:00
fincs
e5e8127a5d Correct MAD instruction encoding 2016-03-26 14:02:02 +01:00
fincs
2da4f1b657 v2.3 2016-02-20 00:24:43 +01:00
fincs
5a597fb870 Add .constfa for creating floating-point vector constant arrays. 2016-02-20 00:24:34 +01:00
fincs
d97de2e8dc v2.2 2016-01-26 13:36:40 +01:00
fincs
d822bd4e26 Add support for inverting the condition in JMPU (addresses #9) 2016-01-25 12:52:35 +01:00
fincs
956a328a6f Add proper support for the MOVA instruction 2016-01-24 11:58:16 +01:00
fincs
8f402ad02a Merge pull request #7 from yuriks/patch-1
Return correct value for a2/lcnt index register
2016-01-22 11:17:33 +01:00
Yuri Kunde Schlesner
02c0cd4b08 Return correct value for a2/lcnt index register 2016-01-21 23:18:05 -08:00
fincs
36fa1cd15a Fix compilation with GCC 4.7 2015-10-25 19:33:29 +01:00
fincs
df90fd16bc Update changelog 2015-09-05 11:44:23 +02:00
fincs
e462d199da Fix #5 2015-09-05 11:42:22 +02:00
fincs
7ae2c49768 Fix #4 2015-09-04 23:45:25 +02:00
20 changed files with 1806 additions and 195 deletions

1
.gitignore vendored
View File

@ -26,6 +26,5 @@ missing
config.log
config.status
Makefile
picasso
.deps/
*.bz2

21
.vscode/c_cpp_properties.json vendored Normal file
View File

@ -0,0 +1,21 @@
{
"configurations": [
{
"name": "3ds",
"includePath": [
"${workspaceFolder}/**",
//"C:/devkitpro/libnx/include/**",
"C:/devkitpro/libctru/include/**",
"/opt/devkitpro/libctru/include/**",
//"C:/devkitpro/portlibs/switch/include/**",
"/opt/devkitpro/portlibs/3ds/include/**",
"C:/devkitpro/portlibs/3ds/include/**"
],
"defines": [],
"cStandard": "gnu17",
"cppStandard": "gnu++17",
"intelliSenseMode": "linux-gcc-x64"
}
],
"version": 4
}

20
3ds.cmake Normal file
View File

@ -0,0 +1,20 @@
#########################################################################################
set(DEVKITPRO $ENV{DEVKITPRO})
set(CMAKE_SYSTEM_NAME "Nintendo 3ds")
set(CMAKE_C_COMPILER "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc")
set(CMAKE_CXX_COMPILER "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-g++")
set(CMAKE_AR "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc-ar" CACHE STRING "")
set(CMAKE_RANLIB "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc-ranlib" CACHE STRING "")
set(CMAKE_ASM_COMPILER "${DEVKITPRO}/devkitARM/bin/arm-none-eabi-gcc")
set(ARCH "-march=armv6k -mtune=mpcore -mfloat-abi=hard -mfpu=vfp -mtp=soft -D__3DS__")
set(CMAKE_C_FLAGS "${ARCH} -Wall -mword-relocations -O3 -fomit-frame-pointer -ffunction-sections -fdata-sections" CACHE STRING "C flags")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -fno-rtti -std=gnu++20" CACHE STRING "C++ flags")
set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_FIND_ROOT_PATH ${DEVKITPRO}/devkitARM ${DEVKITPRO}/libctru ${DEVKITARM}/portlibs/3ds)
set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "Shared libs not available")
link_directories(${DEVKITPRO}/libcrtu/lib ${DEVKITPRO}/portlibs/3ds/lib)
#########################################################################################

57
CMakeLists.txt Normal file
View File

@ -0,0 +1,57 @@
cmake_minimum_required(VERSION 3.22)
project(picasso VERSION 0.5.2 LANGUAGES CXX DESCRIPTION "Picasso Shadercompiler on the Nintendo 3ds")
set(CMAKE_EXE_LINKER_FLAGS "-L${DEVKITPRO}/libctru/lib -L${DEVKITPRO}/picaGL/lib -L${DEVKITPRO}/portlibs/3ds/lib -specs=3dsx.specs -Wl,--gc-sections")
include_directories(${DEVKITPRO}/libctru/include ${DEVKITPRO}/picaGL/include ${DEVKITPRO}/portlibs/3ds/include)
add_definitions("-D__3DS__")
include(CMakePackageConfigHelpers)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release."
FORCE)
endif(NOT CMAKE_BUILD_TYPE)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE STRING
"The install location"
FORCE)
endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
include_directories(include)
set(HEADER_FILES
include/pica.hpp
include/picasso/picasso.h
include/picasso/types.h
include/picasso/FileClass.h
include/picasso/maestro_opcodes.h)
set(SOURCE_FILES
source/picasso_assembler.cpp
source/picasso_library.cpp)
add_library(${PROJECT_NAME}
${HEADER_FILES}
${SOURCE_FILES})
add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME})
target_compile_features(${PROJECT_NAME}
# Features required to compile the library itself.
PRIVATE cxx_std_20 cxx_auto_type)
set(PROJECT_PREFIX ${PROJECT_NAME}-${picasso_VERSION})
target_include_directories(${PROJECT_NAME}
PUBLIC
# Used when building the library:
$<BUILD_INTERFACE:${foo_SOURCE_DIR}/include>
# Used when installing the library:
$<INSTALL_INTERFACE:include/${PROJECT_NAME}>
PRIVATE
# Used only when building the library:
src)

View File

@ -1,5 +1,64 @@
# picasso Changelog
# v2.7.1
- Further improvements to overall system stability and other minor adjustments have been made to enhance the user experience.
# v2.7
- Added `dst`, `litp` and `break` instructions (thanks to @Tilka).
- Added check to enforce index regs being used only with floating point vector uniforms.
- Renamed index registers to match D3D naming conventions (`a0.x`, `a0.y`, `aL`) (old names still accepted).
- Miscellaneous bugfixes and improvements (thanks to @lioncash).
# v2.6.2
- Fixed several compilation errors in some compilers.
# v2.6.1
- Reduced `mad` opdesc allocation errors by automatically swapping out of bounds opdesc entries with other ones in the addressable range (5 bits).
# v2.6
- Added `.in` directive for explicit specifying (and allocating) input registers and exporting them in the DVLE uniform table.
- Added support for dollar signs (`$`) in identifier names, which are translated to period characters (`.`) in DVLE uniform names.
- Output registers `o7` through `o15` are now allowed in vertex shaders (as dummy outputs).
- DVLE uniform table is now sorted by register position.
# v2.5
- The `.gsh` directive has been enhanced to provide full support for all geometry shader operation modes (point, variable-sized primitive and fixed-size primitive). This also effectively separates vertex shader uniform space from geometry shader uniform space.
- The `.out` directive has been enhanced to allow wiring semantics to any arbitrary output register. Additionally the `dummy` semantic was added while the `7` semantic was removed.
- Added auto-insertion of NOP instruction in corner cases involving flow of control instructions, together with the `--no-nop` directive which instead of adding NOPs warns the user about the corner cases.
- Added support for `rgba` and `stpq` in addition to `xyzw`.
- Added an error message for invalid input register use (e.g. `add r0, v1, v2`).
- The operand descriptor allocation algorithm has been enhanced to take into account unused operands.
- The `6` and `7` conditional operators have been removed since they actually do not exist.
- Really corrected MAD instruction encoding.
- Several miscellaneous issues were fixed.
# v2.4
- Corrected MAD instruction encoding.
- Added command line flag for retrieving the picasso version.
# v2.3
- Added `.constfa` for creating floating-point vector constant arrays.
- Fixed `.nodvle` bug.
# v2.2
- Added proper support for the MOVA instruction.
- Added support for inverting the condition in JMPU.
- Fixed `lcnt` bug.
# v2.1
- Fixed input file open error message.
- Fixed `.constf` misallocation bug.
# v2.0
- (**Breaking change**) Command line format changed.

140
Manual.md
View File

@ -9,7 +9,7 @@ Comments are introduced by the semicolon character. E.g.
.fvec myFloat ; They can also appear in the same line
```
Identifiers follow the same rules as C identifiers.
Identifiers follow the same rules as C identifiers. Additionally, the dollar sign (`$`) is allowed in identifiers; mostly as a substitute for the period character (`.`) since the latter is used in `picasso` syntax.
Labels consist of an identifier plus a colon. E.g.
@ -34,14 +34,14 @@ Directives are special statements that start with a period and control certain a
PICA200 registers are often used as arguments to instructions. There exist the following registers:
- `o0` through `o7`: Output registers (usable as a destination operand).
- `v0` through `v7`: Input registers (usable as a source operand).
- `o0` through `o15`: Output registers (usable as a destination operand). The range `o7` through `o15` is only available in vertex shaders.
- `v0` through `v15`: Input registers (usable as a source operand).
- `r0` through `r15`: Scratch registers (usable as both destination and source operands).
- `c0` through `c95`: Floating-point vector uniforms (usable as a special type of source operand called SRC1).
- `i0` through `i3`: Integer vector uniforms (special purpose).
- `b0` through `b15`: Boolean uniforms (special purpose).
All registers contain floating point vectors (it is currently unknown whether they are 24-bit or 32-bit); except for integer vector uniforms (containing 8-bit integers) and boolean uniforms. Vectors have 4 components: x, y, z and w. Uniforms are special registers that are writable by the CPU; thus they are used to pass configuration parameters to the shader such as transformation matrices. Sometimes they are preloaded with constant values that may be used in the logic of the shader.
All registers contain 24-bit floating point vectors; except for integer vector uniforms (containing 8-bit integers) and boolean uniforms. Vectors have 4 components: x, y, z and w. The components may alternatively be referred to as r, g, b and a (respectively); or s, t, p and q (respectively). Uniforms are special registers that are writable by the CPU; thus they are used to pass configuration parameters to the shader such as transformation matrices. Sometimes they are preloaded with constant values that may be used in the logic of the shader.
In most situations, vectors may be [swizzled](http://en.wikipedia.org/wiki/Swizzling_%28computer_graphics%29), that is; their components may be rearranged. Register arguments support specifying a swizzling mask: `r0.wwxy`. The swizzling mask usually has 4 components (but not more), if it has less the last component is repeated to fill the mask. The default mask applied to registers is `xyzw`; that is, identity (no effect).
@ -51,9 +51,11 @@ Registers may also be assigned additional names in order to make the code more l
For convenience, registers may be addressed using an offset from a known register. This is called indexing. For example, `c8[4]` is equivalent to `c12`; and `r4[-2]` is equivalent to `r2`. Indexing is useful for addressing arrays of registers (such as matrices).
Some source operands of instructions (called SRC1) support relative addressing. This means that it is possible to use one of the three built-in indexing registers (`a0`, `a1` and `a2` aka `lcnt`) to address a register, e.g. `someArray[lcnt]`. Adding an offset is also supported, e.g. `someArray[lcnt+2]`. This is useful in FOR loops.
Some source operands of instructions (called SRC1) support relative addressing. This means that it is possible to use one of the three built-in indexing registers (`a0.x`, `a0.y` and `aL`) to address a register, e.g. `someArray[aL]`. Adding an offset is also supported, e.g. `someArray[aL+2]`. This is useful in FOR loops. Index registers can only be used with floating-point vector uniform registers, though. Note: Older versions of `picasso` called the indexing registers `a0`, `a1` and `a2` respectively (also `lcnt` for `a2`); these names are still accepted for backwards compatibility.
Normal floating-point vector registers may also be negated by prepending a minus sign before it, e.g. `-r2` or `-someArray[lcnt+2]`.
Normal floating-point vector registers may also be negated by prepending a minus sign before it, e.g. `-r2` or `-someArray[aL+2]`.
In geometry shaders, `b15` is automatically set to true *after* each execution of the geometry shader. This can be useful to detect whether program state should be initialized - GPU management code usually resets all unused boolean uniforms to false when setting up the PICA200's shader processing units.
## Command Line Usage
@ -62,19 +64,31 @@ Usage: picasso [options] files...
Options:
-o, --out=<file> Specifies the name of the SHBIN file to generate
-h, --header=<file> Specifies the name of the header file to generate
-n, --no-nop Disables the automatic insertion of padding NOPs
-v, --version Displays version information
```
DVLEs are generated in the same order as the files in the command line.
## Linking Model
`picasso` takes one or more source code files, and assembles them into a single `.shbin` file. A DVLE object is generated for each source code file, unless the `.nodvle` directive is used (see below). Procedures are shared amongst all source code files, and they may be defined and called wherever. Uniform space is also shared, that is, if two source code files declare the same uniform, they are assigned the same location. Constants however are not shared, and the same space is reused for the constants of each DVLE. Outputs and aliases are necessarily not shared either.
`picasso` takes one or more source code files, and assembles them into a single `.shbin` file. A DVLE object is generated for each source code file, unless the `.nodvle` directive is used (see below). Procedures are shared amongst all source code files, and they may be defined and called wherever. Uniform space for vertex shaders is also shared, that is, if two vertex shader source code files declare the same uniform, they are assigned the same location. Geometry shaders however do not share uniforms, and each geometry shader source code file will have its own uniform allocation map. On the other hand, constants are never shared, and the same space is reused for the constants of each DVLE. Outputs and aliases are, by necessity, never shared either.
The entry point of a DVLE may be set with the `.entry` directive. If this directive is not used, `main` is assumed as the entrypoint.
A DVLE is marked by default as a vertex shader, unless `setemit` or `.gsh` are used (in the case of which a geometry shader is assumed).
A DVLE by default is a vertex shader, unless the `.gsh` directive is used (in the case of which a geometry shader is specified).
Uniforms that start with the underscore (`_`) character are not exposed in the DVLE table of uniforms. This allows for creating private uniforms that can be internally used to configure the behaviour of shared procedures.
Uniforms that start with the underscore (`_`) character are not exposed in the DVLE table of uniforms. This allows for creating private uniforms that can be internally used to configure the behaviour of shared procedures. Additionally, dollar signs (`$`) are automatically translated to period characters (`.`) in the DVLE uniform table.
**Note**: Older versions of `picasso` handled geometry shaders in a different way. Specifically, uniform space was shared with vertex shaders and it was possible to use `.gsh` without parameters or `setemit` to flag a DVLE as a geometry shader. For backwards compatibility purposes this functionality has been retained, however its use is not recommended.
## PICA200 Caveats & Errata
The PICA200's shader units have numerous implementation caveats and errata that should be taken into account when designing and writing shader code. Some of these include:
- Certain flow of control statements may not work at the end of another block, including the closing of other nested blocks. picasso detects these situations and automatically inserts padding NOP instructions (unless the `--no-nop` command line flag is used).
- The `mova` instruction is finicky and for instance two consecutive `mova` instructions will freeze the PICA200.
- Only a single input register is able to be referenced reliabily at a time in the source registers of an operand. That is, while specifying the same input register in one or more source registers will behave correctly, specifying different input registers will produce incorrect results. picasso detects this situation and displays an error message.
## Supported Directives
@ -147,33 +161,77 @@ Reserves a new floating-point vector uniform to be preloaded with the specified
Reserves a new integer vector uniform to be preloaded with the specified constant; creates an alias for it that points to the allocated register. Example:
```
.constf loopParams(16, 0, 1, 0)
.consti loopParams(16, 0, 1, 0)
```
### .constfa
```
.constfa arrayName[]
.constfa arrayName[size]
.constfa (x, y, z, w)
```
Reserves a new array of floating-point vector uniforms to be preloaded with the specified constants; creates an alias for it that points to the first element. Example:
```
; Create an array of two elements
.constfa myArray[]
.constfa (1.0, 2.0, 3.0, 4.0)
.constfa (5.0, 6.0, 7.0, 8.0)
.end
```
Optionally the size of the array may be specified. If a number of elements less than the size is specified, the missing elements are initialized to zero. Example:
```
.constfa myArray[4]
.constfa (1.0, 2.0, 3.0, 4.0)
.constfa (5.0, 6.0, 7.0, 8.0)
; The remaining two elements are vectors full of zeroes.
.end
```
### .in
```
.in inName
.in inName register
```
Reserves an input register and creates an alias for it called `inName`. If no input register is specified it is automatically allocated. The input register is added to the DVLE's uniform table.
Example:
```
.in position
.in texcoord
.in special v15
```
### .out
```
.out outName propName
.out outName propName register
.out - propName register
```
Allocates a new output register, wires it to a certain output property and creates an alias for it that points to the allocated register. The following property names are supported:
Wires an output register to a certain output property and (optionally) creates an alias for it called `outName` (specify a dash in order not to create the alias). If no output register is specified it is automatically allocated. The following property names are supported:
- `position` (or `pos`): Represents the position of the outputted vertex.
- `normalquat` (or `nquat`): Under investigation.
- `color` (or `clr`): Represents the color of the outputted vertex. Its format is (R, G, B, xx) where R,G,B are values ranging from 0.0 to 1.0. The W component isn't used.
- `texcoord0` (or `tcoord0`): Represents the texture coordinate that is fed to the Texture Unit 0. The Z and W components are not used.
- `texcoord0w` (or `tcoord0w`): Under investigation.
- `texcoord1` (or `tcoord1`): As `texcoord0`, but for the Texture Unit 1.
- `texcoord2` (or `tcoord2`): As `texcoord0`, but for the Texture Unit 2.
- `7`: Under investigation.
- `view`: Under investigation.
- `normalquat` (or `nquat`): Used in fragment lighting, this represents the quaternion associated to the normal vector of the vertex.
- `color` (or `clr`): Represents the color of the outputted vertex. Its format is (R, G, B, A) where R,G,B,A are values ranging from 0.0 to 1.0.
- `texcoord0` (or `tcoord0`): Represents the first texture coordinate, which is always fed to the Texture Unit 0. Only the first two components are used.
- `texcoord0w` (or `tcoord0w`): Represents the third component of the first texture coordinate, used for 3D/cube textures.
- `texcoord1` (or `tcoord1`): Similarly to `texcoord0`, this is the second texture coordinate, which is usually but not always fed to Texture Unit 1.
- `texcoord2` (or `tcoord2`): Similarly `texcoord0`, this is the third texture coordinate, which is usually but not always fed to Texture Unit 2.
- `view`: Used in fragment lighting, this represents the view vector associated to the vertex. The fourth component is not used.
- `dummy`: Used in vertex shaders to pass generic semanticless parameters to the geometry shader, and in geometry shaders to use the appropriate property type from the output map of the vertex shader, thus 'merging' the output maps.
The properties also accept an output mask, e.g. `texcoord0.xy`.
An output mask that specifies to which components of the output register should the property be wired to is also accepted. If the output register is explicitly specified, it attaches to it (e.g. `o2.xy`); otherwise it attaches to the property name (e.g. `texcoord0.xy`).
Example:
```
.out outPos position
.out outClr color
.out outTex texcoord0
.out outClr color.rgba
.out outTex texcoord0.xy
.out - texcoord0w outTex.p
```
### .entry
@ -190,9 +248,27 @@ This directive tells `picasso` not to generate a DVLE for the source code file t
### .gsh
```
.gsh
.gsh point firstReg
.gsh variable firstReg vtxNum
.gsh fixed firstReg arrayStartReg vtxNum
```
This directive explicitly flags the current DVLE as a geometry shader.
This directive flags the current DVLE as a geometry shader and specifies the geometry shader operation mode, which can be one of the following:
- `point` mode: In this mode the geometry shader is called according to the input stride and input permutation configured by the user. On entry, the data is stored starting at the `v0` register. This type of geometry shader can be used with both array-drawing mode (aka `C3D_DrawArrays`) and element-drawing mode (aka `C3D_DrawElements`).
- `variable` mode (also called `subdivision` mode): In this mode the geometry shader processes variable-sized primitives, which are required to have `vtxNum` vertices for which full attribute information will be stored, and **one or more** additional vertices for which only position information will be stored. On entry the register `c0` stores in all its components the total number of vertices of the primitive, and subsequent registers store vertex information in order. This type of geometry shader can only used with element-drawing mode - inside the index array each primitive is prefixed with the number of vertices in it.
- `fixed` mode (also called `particle` mode): In this mode the geometry shader processes fixed-size primitives, which always have `vtxNum` vertices. On entry, the array of vertex information will be stored starting at the float uniform register `arrayStartReg`. This type of geometry shader can only used with element-drawing mode.
The `firstReg` parameter specifies the first float uniform register that is available for use in float uniform register allocation (this is especially useful in variable and fixed mode).
Examples:
```
.gsh point c0
.gsh variable c48 3
.gsh fixed c48 c0 4
```
**Note**: For backwards compatibility reasons, a legacy mode which does not accept any parameters is accepted; however it should not be used.
### .setf
```
@ -226,6 +302,7 @@ Syntax | Description
`dp3 rDest, rSrc1, rSrc2` |
`dp4 rDest, rSrc1, rSrc2` |
`dph rDest, rSrc1, rSrc2` |
`dst rDest, rSrc1, rSrc2` |
`mul rDest, rSrc1, rSrc2` |
`sge rDest, rSrc1, rSrc2` |
`slt rDest, rSrc1, rSrc2` |
@ -233,22 +310,23 @@ Syntax | Description
`min rDest, rSrc1, rSrc2` |
`ex2 rDest, rSrc1` |
`lg2 rDest, rSrc1` |
`ex2 rDest, rSrc1` |
`litp rDest, rSrc1` |
`flr rDest, rSrc1` |
`rcp rDest, rSrc1` |
`rsq rDest, rSrc1` |
`mov rDest, rSrc1` |
`mova rSrc1` |
`mova idxReg, rSrc1` |
`cmp rSrc1, opx, opy, rSrc2` |
`call procName` |
`for iReg` |
`break` | (not recommended)
`breakc condExp` |
`callc condExp, procName` |
`ifc condExp` |
`jmpc condExp, labelName` |
`callu bReg, procName` |
`ifu bReg` |
`jmpu bReg, labelName` |
`jmpu [!]bReg, labelName` |
`mad rDest, rSrc1, rSrc2, rSrc3` |
### Description of operands
@ -260,7 +338,8 @@ Syntax | Description
- In instructions that take one source operand, it is always wide.
- In instructions that take two source operands, the first is wide and the second is narrow.
- `dph`/`sge`/`slt` have a special form where the first operand is narrow and the second is wide. This usage is detected automatically by `picasso`.
- `mad`, which takes three source operands, has two forms: the first is wide-wide-narrow, and the second is wide-narrow-wide. This is also detected automatically. Additionally, relative addressing is not supported.
- `mad`, which takes three source operands, has two forms: the first is narrow-wide-narrow, and the second is narrow-narrow-wide. This is also detected automatically.
- `idxReg`: Represents an indexing register to write to using the mova instruction. Can be `a0.x`, `a0.y` or `a0.xy` (the latter writes to both components). Note: Older versions of `picasso` accepted `a0`, `a1` and `a01` respectively; this syntax is still supported for backwards compatibility.
- `iReg`: Represents an integer vector uniform source operand.
- `bReg`: Represents a boolean uniform source operand.
- `procName`: Represents the name of a procedure.
@ -272,12 +351,11 @@ Syntax | Description
- `le`: Less or equal than
- `gt`: Greater than
- `ge`: Greater or equal than
- `6` and `7`: currently unknown, supposedly the result they yield is always true.
- `condExp`: Represents a conditional expression, which uses the conditional flags `cmp.x` and `cmp.y` set by the CMP instruction. These flags may be negated using the `!` symbol, e.g. `!cmp.x`. The conditional expression can take any of the following forms:
- `flag1`: It tests a single flag.
- `flag1 && flag2`: It performs AND between the two flags. Optionally, a single `&` may be specified.
- `flag1 || flag2`: It performs OR between the two flags. Optionally, a single `|` may be specified.
- `vtxId`: An integer ranging from 0 to 3 specifying the vertex ID used in geoshader vertex emission.
- `vtxId`: An integer ranging from 0 to 2 specifying the vertex ID used in geoshader vertex emission.
- `emitFlags`: A space delimited combination of the following words:
- `primitive` (or `prim`): Specifies that after emitting the vertex, a primitive should also be emitted.
- `prim` (or `primitive`): Specifies that after emitting the vertex, a primitive should also be emitted.
- `inv` (or `invert`): Specifies that the order of the vertices in the emitted primitive is inverted.

View File

@ -0,0 +1,14 @@
@PACKAGE_INIT@
# Include the exported CMake file
get_filename_component(picasso_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
# This macro enables usage of find_dependency().
# https://cmake.org/cmake/help/v3.11/module/CMakeFindDependencyMacro.html
include(CMakeFindDependencyMacro)
if(NOT TARGET picasso::picasso)
include("${picasso_CMAKE_DIR}/picasso-targets.cmake")
endif()
check_required_components(picasso)

348
compile Executable file
View File

@ -0,0 +1,348 @@
#! /bin/sh
# Wrapper for compilers which do not understand '-c -o'.
scriptversion=2018-03-07.03; # UTC
# Copyright (C) 1999-2021 Free Software Foundation, Inc.
# Written by Tom Tromey <tromey@cygnus.com>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to <bug-automake@gnu.org> or send patches to
# <automake-patches@gnu.org>.
nl='
'
# We need space, tab and new line, in precisely that order. Quoting is
# there to prevent tools from complaining about whitespace usage.
IFS=" "" $nl"
file_conv=
# func_file_conv build_file lazy
# Convert a $build file to $host form and store it in $file
# Currently only supports Windows hosts. If the determined conversion
# type is listed in (the comma separated) LAZY, no conversion will
# take place.
func_file_conv ()
{
file=$1
case $file in
/ | /[!/]*) # absolute file, and not a UNC file
if test -z "$file_conv"; then
# lazily determine how to convert abs files
case `uname -s` in
MINGW*)
file_conv=mingw
;;
CYGWIN* | MSYS*)
file_conv=cygwin
;;
*)
file_conv=wine
;;
esac
fi
case $file_conv/,$2, in
*,$file_conv,*)
;;
mingw/*)
file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
;;
cygwin/* | msys/*)
file=`cygpath -m "$file" || echo "$file"`
;;
wine/*)
file=`winepath -w "$file" || echo "$file"`
;;
esac
;;
esac
}
# func_cl_dashL linkdir
# Make cl look for libraries in LINKDIR
func_cl_dashL ()
{
func_file_conv "$1"
if test -z "$lib_path"; then
lib_path=$file
else
lib_path="$lib_path;$file"
fi
linker_opts="$linker_opts -LIBPATH:$file"
}
# func_cl_dashl library
# Do a library search-path lookup for cl
func_cl_dashl ()
{
lib=$1
found=no
save_IFS=$IFS
IFS=';'
for dir in $lib_path $LIB
do
IFS=$save_IFS
if $shared && test -f "$dir/$lib.dll.lib"; then
found=yes
lib=$dir/$lib.dll.lib
break
fi
if test -f "$dir/$lib.lib"; then
found=yes
lib=$dir/$lib.lib
break
fi
if test -f "$dir/lib$lib.a"; then
found=yes
lib=$dir/lib$lib.a
break
fi
done
IFS=$save_IFS
if test "$found" != yes; then
lib=$lib.lib
fi
}
# func_cl_wrapper cl arg...
# Adjust compile command to suit cl
func_cl_wrapper ()
{
# Assume a capable shell
lib_path=
shared=:
linker_opts=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
eat=1
case $2 in
*.o | *.[oO][bB][jJ])
func_file_conv "$2"
set x "$@" -Fo"$file"
shift
;;
*)
func_file_conv "$2"
set x "$@" -Fe"$file"
shift
;;
esac
;;
-I)
eat=1
func_file_conv "$2" mingw
set x "$@" -I"$file"
shift
;;
-I*)
func_file_conv "${1#-I}" mingw
set x "$@" -I"$file"
shift
;;
-l)
eat=1
func_cl_dashl "$2"
set x "$@" "$lib"
shift
;;
-l*)
func_cl_dashl "${1#-l}"
set x "$@" "$lib"
shift
;;
-L)
eat=1
func_cl_dashL "$2"
;;
-L*)
func_cl_dashL "${1#-L}"
;;
-static)
shared=false
;;
-Wl,*)
arg=${1#-Wl,}
save_ifs="$IFS"; IFS=','
for flag in $arg; do
IFS="$save_ifs"
linker_opts="$linker_opts $flag"
done
IFS="$save_ifs"
;;
-Xlinker)
eat=1
linker_opts="$linker_opts $2"
;;
-*)
set x "$@" "$1"
shift
;;
*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
func_file_conv "$1"
set x "$@" -Tp"$file"
shift
;;
*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
func_file_conv "$1" mingw
set x "$@" "$file"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -n "$linker_opts"; then
linker_opts="-link$linker_opts"
fi
exec "$@" $linker_opts
exit 1
}
eat=
case $1 in
'')
echo "$0: No command. Try '$0 --help' for more information." 1>&2
exit 1;
;;
-h | --h*)
cat <<\EOF
Usage: compile [--help] [--version] PROGRAM [ARGS]
Wrapper for compilers which do not understand '-c -o'.
Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
arguments, and rename the output as expected.
If you are trying to build a whole package this is not the
right script to run: please start by reading the file 'INSTALL'.
Report bugs to <bug-automake@gnu.org>.
EOF
exit $?
;;
-v | --v*)
echo "compile $scriptversion"
exit $?
;;
cl | *[/\\]cl | cl.exe | *[/\\]cl.exe | \
icl | *[/\\]icl | icl.exe | *[/\\]icl.exe )
func_cl_wrapper "$@" # Doesn't return...
;;
esac
ofile=
cfile=
for arg
do
if test -n "$eat"; then
eat=
else
case $1 in
-o)
# configure might choose to run compile as 'compile cc -o foo foo.c'.
# So we strip '-o arg' only if arg is an object.
eat=1
case $2 in
*.o | *.obj)
ofile=$2
;;
*)
set x "$@" -o "$2"
shift
;;
esac
;;
*.c)
cfile=$1
set x "$@" "$1"
shift
;;
*)
set x "$@" "$1"
shift
;;
esac
fi
shift
done
if test -z "$ofile" || test -z "$cfile"; then
# If no '-o' option was seen then we might have been invoked from a
# pattern rule where we don't need one. That is ok -- this is a
# normal compilation that the losing compiler can handle. If no
# '.c' file was seen then we are probably linking. That is also
# ok.
exec "$@"
fi
# Name of file we expect compiler to create.
cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
# Create the lock directory.
# Note: use '[/\\:.-]' here to ensure that we don't use the same name
# that we are using for the .o file. Also, base the name on the expected
# object file name, since that is what matters with a parallel build.
lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
while true; do
if mkdir "$lockdir" >/dev/null 2>&1; then
break
fi
sleep 1
done
# FIXME: race condition here if user kills between mkdir and trap.
trap "rmdir '$lockdir'; exit 1" 1 2 15
# Run the compile.
"$@"
ret=$?
if test -f "$cofile"; then
test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
elif test -f "${cofile}bj"; then
test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
fi
rmdir "$lockdir"
exit $ret
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:

View File

@ -2,10 +2,10 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ(2.61)
AC_INIT([picasso],[2.0.0],[fincs.alt1@gmail.com])
AC_INIT([picasso],[2.7.1],[https://github.com/devkitPro/picasso/issues])
AC_CONFIG_SRCDIR([source/picasso_frontend.cpp])
AM_INIT_AUTOMAKE([1.10])
AM_INIT_AUTOMAKE([subdir-objects])
AC_CANONICAL_BUILD
AC_CANONICAL_HOST

42
example/CMakeLists.txt Normal file
View File

@ -0,0 +1,42 @@
cmake_minimum_required(VERSION 3.22)
project(linpicasso_sample)
set(CMAKE_EXE_LINKER_FLAGS "-L${DEVKITPRO}/libctru/lib -L${DEVKITPRO}/picaGL/lib -L${DEVKITPRO}/portlibs/3ds/lib -specs=3dsx.specs -Wl,--gc-sections")
include_directories(${DEVKITPRO}/libctru/include ${DEVKITPRO}/picaGL/include ${DEVKITPRO}/portlibs/3ds/include)
add_definitions("-D__3DS__")
set(APP_TITLE "${PROJECT_NAME}")
set(APP_DESCRIPTION "Example of Lib Picasso")
set(APP_AUTHOR "Tobi-D7, tobid7vx")
set(APP_ICON "/opt/devkitpro/libctru/default_icon.png")
set(APP_ROMFS "${PROJECT_SOURCE_DIR}/romfs")
enable_language(ASM)
set(BASE_CTR ON CACHE BOOL "Enable 3ds")
add_subdirectory(../ picasso)
add_executable(${PROJECT_NAME}.elf src/main.cpp)
target_include_directories(${PROJECT_NAME}.elf PRIVATE src ../include)
target_link_libraries(${PROJECT_NAME}.elf citro2d citro3d ctru m picasso)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.smdh
COMMAND smdhtool --create "${APP_TITLE}" "${APP_DESCRIPTION}" "${APP_AUTHOR}" "${APP_ICON}" ${PROJECT_NAME}.smdh
DEPENDS ${PROJECT_NAME}.elf
)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.3dsx
COMMAND 3dsxtool ${PROJECT_NAME}.elf ${PROJECT_NAME}.3dsx --romfs=${APP_ROMFS} --smdh=${PROJECT_NAME}.smdh
DEPENDS ${PROJECT_NAME}.elf
)
add_custom_target( 3ds ALL
DEPENDS ${PROJECT_NAME}.smdh ${PROJECT_NAME}.3dsx
)

View File

@ -0,0 +1,36 @@
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outclr = inclr
mov outclr, inclr
; We're finished
end
.end

174
example/src/main.cpp Normal file
View File

@ -0,0 +1,174 @@
#include <3ds.h>
#include <citro3d.h>
#include <pica.hpp>
static const char *const vertShader = R"text(
; Example PICA200 vertex shader
; Uniforms
.fvec projection[4]
; Constants
.constf myconst(0.0, 1.0, -1.0, 0.1)
.constf myconst2(0.3, 0.0, 0.0, 0.0)
.alias zeros myconst.xxxx ; Vector full of zeros
.alias ones myconst.yyyy ; Vector full of ones
; Outputs
.out outpos position
.out outclr color
; Inputs (defined as aliases for convenience)
.alias inpos v0
.alias inclr v1
.proc main
; Force the w component of inpos to be 1.0
mov r0.xyz, inpos
mov r0.w, ones
; outpos = projectionMatrix * inpos
dp4 outpos.x, projection[0], r0
dp4 outpos.y, projection[1], r0
dp4 outpos.z, projection[2], r0
dp4 outpos.w, projection[3], r0
; outclr = inclr
mov outclr, inclr
; We're finished
end
.end
)text";
#define CLEAR_COLOR 0x68B0D8FF
#define DISPLAY_TRANSFER_FLAGS \
(GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
typedef struct { float x, y, z; } vertex;
static const vertex vertex_list[] =
{
{ 200.0f, 200.0f, 0.5f },
{ 100.0f, 40.0f, 0.5f },
{ 300.0f, 40.0f, 0.5f },
};
#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
static DVLB_s* vshader_dvlb;
static shaderProgram_s program;
static int uLoc_projection;
static C3D_Mtx projection;
static char* vshader_shbin;
static int vshader_shbin_size;
static void* vbo_data;
static void sceneInit(void)
{
// Load the vertex shader, create a shader program and bind it
vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
shaderProgramInit(&program);
shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
C3D_BindProgram(&program);
// Get the location of the uniforms
uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
// Configure attributes for use with the vertex shader
C3D_AttrInfo* attrInfo = C3D_GetAttrInfo();
AttrInfo_Init(attrInfo);
AttrInfo_AddLoader(attrInfo, 0, GPU_FLOAT, 3); // v0=position
AttrInfo_AddFixed(attrInfo, 1); // v1=color
// Set the fixed attribute (color) to solid white
C3D_FixedAttribSet(1, 1.0, 1.0, 1.0, 1.0);
// Compute the projection matrix
Mtx_OrthoTilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0, true);
// Create the VBO (vertex buffer object)
vbo_data = linearAlloc(sizeof(vertex_list));
memcpy(vbo_data, vertex_list, sizeof(vertex_list));
// Configure buffers
C3D_BufInfo* bufInfo = C3D_GetBufInfo();
BufInfo_Init(bufInfo);
BufInfo_Add(bufInfo, vbo_data, sizeof(vertex), 1, 0x0);
// Configure the first fragment shading substage to just pass through the vertex color
// See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
C3D_TexEnv* env = C3D_GetTexEnv(0);
C3D_TexEnvInit(env);
C3D_TexEnvSrc(env, C3D_Both, GPU_PRIMARY_COLOR, (GPU_TEVSRC)0, (GPU_TEVSRC)0);
C3D_TexEnvFunc(env, C3D_Both, GPU_REPLACE);
}
static void sceneRender(void)
{
// Update the uniforms
C3D_FVUnifMtx4x4(GPU_VERTEX_SHADER, uLoc_projection, &projection);
// Draw the VBO
C3D_DrawArrays(GPU_TRIANGLES, 0, vertex_list_count);
}
static void sceneExit(void)
{
// Free the VBO
linearFree(vbo_data);
// Free the shader program
shaderProgramFree(&program);
DVLB_Free(vshader_dvlb);
}
int main()
{
// Initialize graphics
gfxInitDefault();
romfsInit();
C3D_Init(C3D_DEFAULT_CMDBUF_SIZE);
//vshader_shbin = Pica::AssembleCode(vertShader, vshader_shbin_size);
vshader_shbin = Pica::AssembleFile("romfs:/vshader.pica", vshader_shbin_size);
// Initialize the render target
C3D_RenderTarget* target = C3D_RenderTargetCreate(240, 400, GPU_RB_RGBA8, GPU_RB_DEPTH24_STENCIL8);
C3D_RenderTargetSetOutput(target, GFX_TOP, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
// Initialize the scene
sceneInit();
// Main loop
while (aptMainLoop())
{
hidScanInput();
// Respond to user input
u32 kDown = hidKeysDown();
if (kDown & KEY_START)
break; // break in order to return to hbmenu
// Render the scene
C3D_FrameBegin(C3D_FRAME_SYNCDRAW);
C3D_RenderTargetClear(target, C3D_CLEAR_ALL, CLEAR_COLOR, 0);
C3D_FrameDrawOn(target);
sceneRender();
C3D_FrameEnd(0);
}
// Deinitialize the scene
sceneExit();
// Deinitialize graphics
C3D_Fini();
gfxExit();
return 0;
}

10
include/pica.hpp Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include <iostream>
#include <string>
namespace Pica
{
void InstallErrorCallback(void(*ErrorHandler)(const char* top, const char* message));
char* AssembleCode(const char* vertex, int &res_size);
char* AssembleFile(const char* file, int &res_size);
}

View File

@ -1,43 +1,44 @@
#pragma once
#include <stdio.h>
#include "types.h"
#include "picasso/types.h"
#include <sstream>
#include <string>
class FileClass
{
FILE* f;
std::stringstream f;
bool LittleEndian, own;
int filePos;
size_t _RawRead(void* buffer, size_t size)
{
size_t x = fread(buffer, 1, size, f);
filePos += x;
return x;
f.read((char*)buffer, size);
filePos += size;
return size;
}
size_t _RawWrite(const void* buffer, size_t size)
{
size_t x = fwrite(buffer, 1, size, f);
filePos += x;
return x;
f.write((const char*)buffer, size);
filePos += size;
return size;
}
public:
FileClass(const char* file, const char* mode) : LittleEndian(true), own(true), filePos(0)
{
f = fopen(file, mode);
//Do nothing
}
FileClass(FILE* inf) : f(inf), LittleEndian(true), own(false), filePos(0) { }
~FileClass()
{
if (f && own) fclose(f);
//Do nothing
}
void SetLittleEndian() { LittleEndian = true; }
void SetBigEndian() { LittleEndian = false; }
FILE* get_ptr() { return f; }
bool openerror() { return f == NULL; }
std::stringstream* get_ptr() { return &f; }
bool openerror() { return false; }
dword_t ReadDword()
{
@ -103,13 +104,11 @@ public:
t.f = value;
WriteWord(t.w);
}
bool ReadRaw(void* buffer, size_t size) { return _RawRead(buffer, size) == size; }
bool WriteRaw(const void* buffer, size_t size) { return _RawWrite(buffer, size) == size; }
void Seek(int pos, int mode) { fseek(f, pos, mode); }
int Tell() { return filePos /*ftell(f)*/; }
void Flush() { fflush(f); }
};
static inline char* StringFromFile(const char* filename)
@ -129,4 +128,4 @@ static inline char* StringFromFile(const char* filename)
buf[size] = 0;
fclose(f);
return buf;
}
}

View File

@ -5,10 +5,10 @@ enum
MAESTRO_DP3,
MAESTRO_DP4,
MAESTRO_DPH,
MAESTRO_unk4,
MAESTRO_DST,
MAESTRO_EX2,
MAESTRO_LG2,
MAESTRO_unk7,
MAESTRO_LITP,
MAESTRO_MUL,
MAESTRO_SGE,
MAESTRO_SLT,
@ -27,7 +27,7 @@ enum
MAESTRO_unk16,
MAESTRO_unk17,
MAESTRO_DPHI,
MAESTRO_unk19,
MAESTRO_DSTI,
MAESTRO_SGEI,
MAESTRO_SLTI,
MAESTRO_unk1C,
@ -35,7 +35,7 @@ enum
MAESTRO_unk1E,
MAESTRO_unk1F,
MAESTRO_unk20,
MAESTRO_BREAK,
MAESTRO_NOP,
MAESTRO_END,
MAESTRO_BREAKC,
@ -54,4 +54,4 @@ enum
// Only the upper 3 bits are used for the following opcodes
MAESTRO_MADI = 0x30,
MAESTRO_MAD = 0x38,
};
};

View File

@ -16,9 +16,9 @@
#include <string>
#include <algorithm>
#include "FileClass.h"
#include "picasso/FileClass.h"
#include "maestro_opcodes.h"
#include "picasso/maestro_opcodes.h"
#if !defined(WIN32) && !defined(stricmp)
#define stricmp strcasecmp
@ -55,8 +55,6 @@ enum
COND_LE,
COND_GT,
COND_GE,
COND_UNK1,
COND_UNK2,
};
//-----------------------------------------------------------------------------
@ -64,6 +62,7 @@ enum
//-----------------------------------------------------------------------------
// Output buffer
#define MAX_VSH_SIZE 512
typedef std::vector<u32> outputBufType;
typedef outputBufType::iterator outputBufIter;
extern outputBufType g_outputBuf;
@ -73,6 +72,7 @@ enum
SE_PROC,
SE_FOR,
SE_IF,
SE_ARRAY,
};
struct StackEntry
@ -109,6 +109,19 @@ struct Uniform
std::string name;
int pos, size;
int type;
inline bool operator <(const Uniform& rhs) const
{
return pos < rhs.pos;
}
void init(const char* name, int pos, int size, int type)
{
this->name = name;
this->pos = pos;
this->size = size;
this->type = type;
}
};
// List of uniforms
@ -143,6 +156,8 @@ extern labelTableType g_labels;
extern relocTableType g_labelRelocTable;
extern aliasTableType g_aliases;
extern bool g_autoNop;
int AssembleString(char* str, const char* initialFilename);
int RelocateProduct(void);
@ -152,15 +167,22 @@ int RelocateProduct(void);
enum
{
OUTTYPE_POS = 0,
OUTTYPE_NQUAT,
OUTTYPE_CLR,
OUTTYPE_TCOORD0,
OUTTYPE_TCOORD0W,
OUTTYPE_TCOORD1,
OUTTYPE_TCOORD2,
OUTTYPE_7,
OUTTYPE_VIEW,
OUTTYPE_POS = 0,
OUTTYPE_NQUAT = 1,
OUTTYPE_CLR = 2,
OUTTYPE_TCOORD0 = 3,
OUTTYPE_TCOORD0W = 4,
OUTTYPE_TCOORD1 = 5,
OUTTYPE_TCOORD2 = 6,
OUTTYPE_VIEW = 8,
OUTTYPE_DUMMY = 9,
};
enum
{
GSHTYPE_POINT = 0,
GSHTYPE_VARIABLE = 1,
GSHTYPE_FIXED = 2,
};
struct Constant
@ -181,7 +203,12 @@ struct DVLEData
std::string filename;
std::string entrypoint;
size_t entryStart, entryEnd;
bool nodvle, isGeoShader;
bool nodvle, isGeoShader, isCompatGeoShader, isMerge;
u16 inputMask, outputMask;
u8 geoShaderType;
u8 geoShaderFixedStart;
u8 geoShaderVariableNum;
u8 geoShaderFixedNum;
// Uniforms
Uniform uniformTable[MAX_UNIFORM];
@ -194,12 +221,36 @@ struct DVLEData
int constantCount;
// Outputs
#define MAX_OUTPUT 8
#define MAX_OUTPUT 16
u64 outputTable[MAX_OUTPUT];
u32 outputUsedReg;
int outputCount;
bool usesGshSpace() const { return isGeoShader && !isCompatGeoShader; }
int findFreeOutput() const
{
for (int i = 0; i < maxOutputReg(); i ++)
if (!(outputMask & BIT(i)))
return i;
return -1;
}
int findFreeInput() const
{
for (int i = 0; i < 16; i ++)
if (!(inputMask & BIT(i)))
return i;
return -1;
}
int maxOutputReg() const
{
return isGeoShader ? 0x07 : 0x10;
}
DVLEData(const char* filename) :
filename(filename), entrypoint("main"),
nodvle(false), isGeoShader(false),
uniformCount(0), symbolSize(0), constantCount(0), outputCount(0) { }
};
nodvle(false), isGeoShader(false), isCompatGeoShader(false), isMerge(false),
inputMask(0), outputMask(0), geoShaderType(0), geoShaderFixedStart(0), geoShaderVariableNum(0), geoShaderFixedNum(0),
uniformCount(0), symbolSize(0), constantCount(0), outputUsedReg(0), outputCount(0) { }
};

View File

@ -16,7 +16,6 @@ typedef uint8_t u8;
#define BIT(n) (1U << (n))
#if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 6)
#ifndef __BYTE_ORDER__
#include <sys/param.h>
#define __BYTE_ORDER__ BYTE_ORDER
@ -25,11 +24,14 @@ typedef uint8_t u8;
#endif
#ifndef __llvm__
#if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
static inline uint16_t __builtin_bswap16(uint16_t x)
{
return ((x << 8) & 0xff00) | ((x >> 8) & 0x00ff);
}
#if defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ < 7)
static inline uint32_t __builtin_bswap32(uint32_t x)
{
return ((x << 24) & 0xff000000) |
@ -45,6 +47,7 @@ static inline uint64_t __builtin_bswap64(uint64_t x)
}
#endif
#endif
#endif
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define be_dword(a) __builtin_bswap64(a)
@ -62,4 +65,4 @@ static inline uint64_t __builtin_bswap64(uint64_t x)
#define le_hword(a) __builtin_bswap16(a)
#else
#error "What's the endianness of the platform you're targeting?"
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,21 +1,35 @@
#include "picasso.h"
// !! Taken from ctrulib !!
u32 f32tof24(float vf)
// f24 has:
// - 1 sign bit
// - 7 exponent bits
// - 16 mantissa bits
uint32_t f32tof24(float f)
{
if (!vf) return 0;
uint32_t i;
memcpy(&i, &f, sizeof(f));
union { float f; u32 v; } q;
q.f=vf;
uint32_t mantissa = (i << 9) >> 9;
int32_t exponent = (i << 1) >> 24;
uint32_t sign = (i << 0) >> 31;
u8 s = q.v>>31;
u32 exp = ((q.v>>23) & 0xFF) - 0x40;
u32 man = (q.v>>7) & 0xFFFF;
// Truncate mantissa
mantissa >>= 7;
if (exp >= 0)
return man | (exp<<16) | (s<<23);
else
return s<<23;
// Re-bias exponent
exponent = exponent - 127 + 63;
if (exponent < 0)
{
// Underflow: flush to zero
return sign << 23;
}
else if (exponent > 0x7F)
{
// Overflow: saturate to infinity
return (sign << 23) | (0x7F << 16);
}
return (sign << 23) | (exponent << 16) | mantissa;
}
#ifdef WIN32
@ -36,6 +50,8 @@ int usage(const char* prog)
"Options:\n"
" -o, --out=<file> Specifies the name of the SHBIN file to generate\n"
" -h, --header=<file> Specifies the name of the header file to generate\n"
" -n, --no-nop Disables the automatic insertion of padding NOPs\n"
" -v, --version Displays version information\n"
, prog);
return EXIT_FAILURE;
}
@ -49,17 +65,21 @@ int main(int argc, char* argv[])
{ "out", required_argument, NULL, 'o' },
{ "header", required_argument, NULL, 'h' },
{ "help", no_argument, NULL, '?' },
{ "no-nop", no_argument, NULL, 'n' },
{ "version",no_argument, NULL, 'v' },
{ NULL, 0, NULL, 0 }
};
int opt, optidx = 0;
while ((opt = getopt_long(argc, argv, "o:h:?", long_options, &optidx)) != -1)
while ((opt = getopt_long(argc, argv, "o:h:?nv", long_options, &optidx)) != -1)
{
switch (opt)
{
case 'o': shbinFile = optarg; break;
case 'h': hFile = optarg; break;
case '?': usage(argv[0]); return EXIT_SUCCESS;
case '?': usage(argv[0]); return EXIT_SUCCESS;
case 'n': g_autoNop = false; break;
case 'v': printf("%s - Built on %s %s\n", PACKAGE_STRING, __DATE__, __TIME__); return EXIT_SUCCESS;
default: return usage(argv[0]);
}
}
@ -93,7 +113,7 @@ int main(int argc, char* argv[])
char* sourceCode = StringFromFile(vshFile);
if (!sourceCode)
{
fprintf(stderr, "error: cannot open input file: %s\n");
fprintf(stderr, "error: cannot open input file: %s\n", vshFile);
return EXIT_FAILURE;
}
@ -163,12 +183,17 @@ int main(int argc, char* argv[])
curOff = 16*4;
f.WriteWord(0x454C5644); // DVLE
f.WriteHword(0); // padding?
f.WriteHword(dvle->isGeoShader ? 1 : 0); // Shader type
f.WriteHword(0x1002); // maybe version?
f.WriteByte(dvle->isGeoShader ? 1 : 0); // Shader type
f.WriteByte(dvle->isMerge ? 1 : 0);
f.WriteWord(dvle->entryStart); // offset to main
f.WriteWord(dvle->entryEnd); // offset to end of main
f.WriteWord(0); // ???
f.WriteWord(0); // ???
f.WriteHword(dvle->inputMask);
f.WriteHword(dvle->outputMask);
f.WriteByte(dvle->geoShaderType);
f.WriteByte(dvle->geoShaderFixedStart);
f.WriteByte(dvle->geoShaderVariableNum);
f.WriteByte(dvle->geoShaderFixedNum);
f.WriteWord(curOff); // offset to constant table
f.WriteWord(dvle->constantCount); // size of constant table
curOff += dvle->constantCount*5*4;
@ -181,9 +206,11 @@ int main(int argc, char* argv[])
f.WriteWord(dvle->uniformCount); // size of uniform table
curOff += dvle->uniformCount*8;
f.WriteWord(curOff); // offset to symbol table
u32 temp = f.Tell();
f.WriteWord(dvle->symbolSize); // size of symbol table
// Sort uniforms by position
std::sort(dvle->uniformTable, dvle->uniformTable + dvle->uniformCount);
// Write constants
for (int i = 0; i < dvle->constantCount; i ++)
{
@ -220,14 +247,18 @@ int main(int argc, char* argv[])
Uniform& u = dvle->uniformTable[i];
size_t l = u.name.length()+1;
f.WriteWord(sp); sp += l;
f.WriteHword(u.pos-0x10);
f.WriteHword(u.pos+u.size-1-0x10);
int pos = u.pos;
if (pos >= 0x20)
pos -= 0x10;
f.WriteHword(pos);
f.WriteHword(pos+u.size-1);
}
// Write symbols
for (int i = 0; i < dvle->uniformCount; i ++)
{
std::string& u = dvle->uniformTable[i].name;
std::string u(dvle->uniformTable[i].name);
std::replace(u.begin(), u.end(), '$', '.');
size_t l = u.length()+1;
f.WriteRaw(u.c_str(), l);
}

199
source/picasso_library.cpp Normal file
View File

@ -0,0 +1,199 @@
#include <pica.hpp>
#include <picasso/picasso.h>
// f24 has:
// - 1 sign bit
// - 7 exponent bits
// - 16 mantissa bits
uint32_t f32tof24(float f) {
uint32_t i;
memcpy(&i, &f, sizeof(f));
uint32_t mantissa = (i << 9) >> 9;
int32_t exponent = (i << 1) >> 24;
uint32_t sign = (i << 0) >> 31;
// Truncate mantissa
mantissa >>= 7;
// Re-bias exponent
exponent = exponent - 127 + 63;
if (exponent < 0) {
// Underflow: flush to zero
return sign << 23;
} else if (exponent > 0x7F) {
// Overflow: saturate to infinity
return (sign << 23) | (0x7F << 16);
}
return (sign << 23) | (exponent << 16) | mantissa;
}
void BasicHandler(const char *top, const char *message) {
std::cout << top << std::endl << message << std::endl;
}
static void (*EHND)(const char *top, const char *message) = BasicHandler;
namespace Pica {
void InstallErrorCallback(void (*ErrorHandler)(const char *top,
const char *message)) {
EHND = ErrorHandler;
}
char *AssembleCode(const char *vertex, int &res_size) {
int rc = 0;
rc = AssembleString((char *)vertex, "llc_npi");
if (rc) {
EHND("Error when Assembling Code", vertex);
}
rc = RelocateProduct();
if (rc) {
EHND("Error when Relocating Product", "0");
}
FileClass f("Dont Care", "wb");
u32 progSize = g_outputBuf.size();
u32 dvlpSize = 10 * 4 + progSize * 4 + g_opdescCount * 8;
// Write DVLB header
f.WriteWord(0x424C5644); // DVLB
f.WriteWord(g_totalDvleCount); // Number of DVLEs
// Calculate and write DVLE offsets
u32 curOff = 2 * 4 + g_totalDvleCount * 4 + dvlpSize;
for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end();
++dvle) {
if (dvle->nodvle)
continue;
f.WriteWord(curOff);
curOff += 16 * 4; // Header
curOff += dvle->constantCount * 20;
curOff += dvle->outputCount * 8;
curOff += dvle->uniformCount * 8;
curOff += dvle->symbolSize;
curOff = (curOff + 3) & ~3; // Word alignment
}
// Write DVLP header
f.WriteWord(0x504C5644); // DVLP
f.WriteWord(0); // version
f.WriteWord(10 * 4); // offset to shader binary blob
f.WriteWord(progSize); // size of shader binary blob
f.WriteWord(10 * 4 + progSize * 4); // offset to opdesc table
f.WriteWord(g_opdescCount); // number of opdescs
f.WriteWord(dvlpSize); // offset to symtable (TODO)
f.WriteWord(0); // ????
f.WriteWord(0); // ????
f.WriteWord(0); // ????
// Write program
for (outputBufIter it = g_outputBuf.begin(); it != g_outputBuf.end(); ++it)
f.WriteWord(*it);
// Write opdescs
for (int i = 0; i < g_opdescCount; i++)
f.WriteDword(g_opdescTable[i]);
// Write DVLEs
for (dvleTableIter dvle = g_dvleTable.begin(); dvle != g_dvleTable.end();
++dvle) {
if (dvle->nodvle)
continue;
curOff = 16 * 4;
f.WriteWord(0x454C5644); // DVLE
f.WriteHword(0x1002); // maybe version?
f.WriteByte(dvle->isGeoShader ? 1 : 0); // Shader type
f.WriteByte(dvle->isMerge ? 1 : 0);
f.WriteWord(dvle->entryStart); // offset to main
f.WriteWord(dvle->entryEnd); // offset to end of main
f.WriteHword(dvle->inputMask);
f.WriteHword(dvle->outputMask);
f.WriteByte(dvle->geoShaderType);
f.WriteByte(dvle->geoShaderFixedStart);
f.WriteByte(dvle->geoShaderVariableNum);
f.WriteByte(dvle->geoShaderFixedNum);
f.WriteWord(curOff); // offset to constant table
f.WriteWord(dvle->constantCount); // size of constant table
curOff += dvle->constantCount * 5 * 4;
f.WriteWord(curOff); // offset to label table (TODO)
f.WriteWord(0); // size of label table (TODO)
f.WriteWord(curOff); // offset to output table
f.WriteWord(dvle->outputCount); // size of output table
curOff += dvle->outputCount * 8;
f.WriteWord(curOff); // offset to uniform table
f.WriteWord(dvle->uniformCount); // size of uniform table
curOff += dvle->uniformCount * 8;
f.WriteWord(curOff); // offset to symbol table
f.WriteWord(dvle->symbolSize); // size of symbol table
// Sort uniforms by position
std::sort(dvle->uniformTable, dvle->uniformTable + dvle->uniformCount);
// Write constants
for (int i = 0; i < dvle->constantCount; i++) {
Constant &ct = dvle->constantTable[i];
f.WriteHword(ct.type);
if (ct.type == UTYPE_FVEC) {
f.WriteHword(ct.regId - 0x20);
for (int j = 0; j < 4; j++)
f.WriteWord(f32tof24(ct.fparam[j]));
} else if (ct.type == UTYPE_IVEC) {
f.WriteHword(ct.regId - 0x80);
for (int j = 0; j < 4; j++)
f.WriteByte(ct.iparam[j]);
} else if (ct.type == UTYPE_BOOL) {
f.WriteHword(ct.regId - 0x88);
f.WriteWord(ct.bparam ? 1 : 0);
}
if (ct.type != UTYPE_FVEC)
for (int j = 0; j < 3; j++)
f.WriteWord(0); // Padding
}
// Write outputs
for (int i = 0; i < dvle->outputCount; i++)
f.WriteDword(dvle->outputTable[i]);
// Write uniforms
size_t sp = 0;
for (int i = 0; i < dvle->uniformCount; i++) {
Uniform &u = dvle->uniformTable[i];
size_t l = u.name.length() + 1;
f.WriteWord(sp);
sp += l;
int pos = u.pos;
if (pos >= 0x20)
pos -= 0x10;
f.WriteHword(pos);
f.WriteHword(pos + u.size - 1);
}
// Write symbols
for (int i = 0; i < dvle->uniformCount; i++) {
std::string u(dvle->uniformTable[i].name);
std::replace(u.begin(), u.end(), '$', '.');
size_t l = u.length() + 1;
f.WriteRaw(u.c_str(), l);
}
// Word alignment
int pos = f.Tell();
int pad = ((pos + 3) & ~3) - pos;
for (int i = 0; i < pad; i++)
f.WriteByte(0);
}
res_size = f.Tell();
return (char *)f.get_ptr()->str().c_str();
}
char *AssembleFile(const char *file, int &res_size) {
char *sourceCode = StringFromFile(file);
if (!sourceCode) {
EHND("error:", "cannot open input file!\n");
}
return AssembleCode(sourceCode, res_size);
}
} // namespace Pica