commit 9d8ec29538fd350518d12b7e6f94d8b103b9bee7 Author: fincs Date: Sun Nov 16 18:59:54 2014 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dc3267e --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.exe +*.o +*.elf +*~ +*.shbin +*.vsh.h +*.bat +build/ diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..5e11594 --- /dev/null +++ b/COPYING @@ -0,0 +1,19 @@ +Copyright (c) 2014-2015, fincs + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..c51f388 --- /dev/null +++ b/Makefile @@ -0,0 +1,78 @@ +# This Makefile was blatantly ripped off from the devkitPro project + +.SUFFIXES: + +TARGET := $(notdir $(CURDIR)) +BUILD := build +SOURCES := source + +export CC := gcc +export CXX := g++ +export STRIP := strip + +CFLAGS := -g0 -Wall -O2 +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++0x + +LDFLAGS := -g0 + +UNAME := $(shell uname -s) + +ifneq (,$(findstring MINGW,$(UNAME))) + EXEEXT := .exe +endif + +ifneq (,$(findstring Darwin,$(UNAME))) + SDK := /Developer/SDKs/MacOSX10.4u.sdk + OSXCFLAGS := -mmacosx-version-min=10.4 -isysroot $(SDK) -arch i386 -arch ppc + OSXCXXFLAGS := -fvisibility=hidden -mmacosx-version-min=10.4 -isysroot $(SDK) -arch i386 -arch ppc + OSXLDFLAGS := -mmacosx-version-min=10.4 -Wl,-syslibroot,$(SDK) -arch i386 -arch ppc +endif + +ifneq ($(BUILD),$(notdir $(CURDIR))) + +export OUTPUT := $(CURDIR)/$(TARGET)$(EXEEXT) +export DEPSDIR := $(CURDIR)/$(BUILD) +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) + +ifeq ($(strip $(CPPFILES)),) + export LD := $(CC) +else + export LD := $(CXX) +endif + +export OFILES := $(CFILES:.c=.o) $(CPPFILES:.cpp=.o) + +.PHONY: $(BUILD) clean + +$(BUILD): + @[ -d $@ ] || mkdir -p $@ + @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +clean: + @rm -fr $(BUILD) $(OUTPUT) + +else + +$(OUTPUT): $(OFILES) + +-include $(DEPSDIR)/*.d + +$(OUTPUT): + @echo Linking... + @$(LD) $(OSXLDFLAGS) $(LDFLAGS) $(OFILES) -o $@ + @$(STRIP) $@ + +%.o: %.c + @echo $(notdir $<) + @$(CC) -E -MMD -MF $(DEPSDIR)/$(*).d $(CFLAGS) $< > /dev/null + @$(CC) $(OSXCFLAGS) $(CFLAGS) -o $@ -c $< + +%.o: %.cpp + @echo $(notdir $<) + @$(CXX) -E -MMD -MF $(DEPSDIR)/$(*).d $(CXXFLAGS) $< > /dev/null + @$(CXX) $(OSXCXXFLAGS) $(CXXFLAGS) -o $@ -c $< + +endif diff --git a/README.md b/README.md new file mode 100644 index 0000000..7b2a977 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +# picasso + +## Introduction + +`picasso` is a PICA200 shader assembler, written in C++. The PICA200 is the GPU used by the Nintendo 3DS. + +Currently there's no documentation; refer to `example.vsh` in order to figure out the syntax. + +## Building + +A working C++11 compiler for the host is required (Windows users: use TDM-GCC). Use `make` to build the program. + +## Shout-outs + +- **smea** for reverse-engineering the PICA200, writing documentation, working hard & making `aemstro_as.py` (the original homebrew PICA200 shader assembler) +- **neobrain** for making `nihstro-assemble`, whose syntax inspired that of `picasso` and whose usage of boost inspired me to make my own assembler without hefty dependencies. diff --git a/example.vsh b/example.vsh new file mode 100644 index 0000000..880399b --- /dev/null +++ b/example.vsh @@ -0,0 +1,53 @@ +; Really simple & stupid PICA200 shader +; Also serves as an example of picasso syntax + +; Uniforms +.uniform projMtx(4), mdlvMtx(4) + +; Constants +.const myconst(0.0, 1.0, -1.0, 0.0) +.alias zeros myconst.xxxx +.alias ones myconst.yyyy +.alias negones myconst.zzzz +.alias dummytcoord myconst.xxxy ; (0,0,0,1) + +; Outputs +.out outpos position +.out outtc0 texcoord0 +.out outtc1 texcoord1 +.out outtc2 texcoord2 +.out outclr color + +; Inputs +.alias inpos v0 +.alias intex v1 +.alias inarg v2 + +.proc main + ; r0 = (inpos.x, inpos.y, inpos.z, 1.0) + mov r0.xyz, inpos + mov r0.w, ones + + ; r1 = mdlvMtx * r0 + dp4 r1.x, mdlvMtx(0), r0 + dp4 r1.y, mdlvMtx(1), r0 + dp4 r1.z, mdlvMtx(2), r0 + dp4 r1.w, mdlvMtx(3), r0 + + ; outpos = projMtx * r1 + dp4 outpos.x, projMtx(0), r1 + dp4 outpos.y, projMtx(1), r1 + dp4 outpos.z, projMtx(2), r1 + dp4 outpos.w, projMtx(3), r1 + + ; Set texcoords + mov outtc0, intex + mov outtc1, dummytcoord + mov outtc2, dummytcoord + + ; Set vertex color + mov outclr.xyz, inarg + mov outclr.w, ones + + end +.end diff --git a/source/FileClass.h b/source/FileClass.h new file mode 100644 index 0000000..ccdd096 --- /dev/null +++ b/source/FileClass.h @@ -0,0 +1,132 @@ +#pragma once +#include +#include "types.h" + +class FileClass +{ + FILE* f; + bool LittleEndian, own; + int filePos; + + size_t _RawRead(void* buffer, size_t size) + { + size_t x = fread(buffer, 1, size, f); + filePos += x; + return x; + } + + size_t _RawWrite(const void* buffer, size_t size) + { + size_t x = fwrite(buffer, 1, size, f); + filePos += x; + return x; + } + +public: + FileClass(const char* file, const char* mode) : LittleEndian(true), own(true), filePos(0) + { + f = fopen(file, mode); + } + FileClass(FILE* inf) : f(inf), LittleEndian(true), own(false), filePos(0) { } + ~FileClass() + { + if (f && own) fclose(f); + } + + void SetLittleEndian() { LittleEndian = true; } + void SetBigEndian() { LittleEndian = false; } + + FILE* get_ptr() { return f; } + bool openerror() { return f == NULL; } + + dword_t ReadDword() + { + dword_t value; + _RawRead(&value, sizeof(dword_t)); + return LittleEndian ? le_dword(value) : be_dword(value); + } + + void WriteDword(dword_t value) + { + value = LittleEndian ? le_dword(value) : be_dword(value); + _RawWrite(&value, sizeof(dword_t)); + } + + word_t ReadWord() + { + word_t value; + _RawRead(&value, sizeof(word_t)); + return LittleEndian ? le_word(value) : be_word(value); + } + + void WriteWord(word_t value) + { + value = LittleEndian ? le_word(value) : be_word(value); + _RawWrite(&value, sizeof(word_t)); + } + + hword_t ReadHword() + { + hword_t value; + _RawRead(&value, sizeof(hword_t)); + return LittleEndian ? le_hword(value) : be_hword(value); + } + + void WriteHword(hword_t value) + { + value = LittleEndian ? le_hword(value) : be_hword(value); + _RawWrite(&value, sizeof(hword_t)); + } + + byte_t ReadByte() + { + byte_t value; + _RawRead(&value, sizeof(byte_t)); + return value; + } + + void WriteByte(byte_t value) + { + _RawWrite(&value, sizeof(byte_t)); + } + + float ReadFloat() + { + union { word_t w; float f; } t; + t.w = ReadWord(); + return t.f; + } + + void WriteFloat(float value) + { + union { word_t w; float f; } t; + t.f = value; + WriteWord(t.w); + } + + bool ReadRaw(void* buffer, size_t size) { return _RawRead(buffer, size) == size; } + bool WriteRaw(const void* buffer, size_t size) { return _RawWrite(buffer, size) == size; } + + void Seek(int pos, int mode) { fseek(f, pos, mode); } + int Tell() { return filePos /*ftell(f)*/; } + void Flush() { fflush(f); } +}; + +static inline char* StringFromFile(const char* filename) +{ + FILE* f = fopen(filename, "rb"); + if (!f) return NULL; + fseek(f, 0, SEEK_END); + int size = ftell(f); + rewind(f); + char* buf = (char*)malloc(size+1); + if (!buf) + { + fclose(f); + return NULL; + } + fread(buf, 1, size, f); + buf[size] = 0; + fclose(f); + return buf; +} diff --git a/source/maestro_opcodes.h b/source/maestro_opcodes.h new file mode 100644 index 0000000..1c9ea37 --- /dev/null +++ b/source/maestro_opcodes.h @@ -0,0 +1,31 @@ +#pragma once +enum +{ + MAESTRO_ADD = 0x00, + MAESTRO_DP3, + MAESTRO_DP4, + + MAESTRO_MUL = 0x08, + + MAESTRO_MAX = 0x0C, + MAESTRO_MIN, + MAESTRO_RCP, + MAESTRO_RSQ, + + MAESTRO_MOV = 0x13, + + MAESTRO_NOP = 0x21, + MAESTRO_END, + + MAESTRO_CALL = 0x24, + + MAESTRO_CALLC = 0x26, + MAESTRO_IFB, + MAESTRO_IF, // ??? + + MAESTRO_EMIT = 0x2A, // Geometry shader related + MAESTRO_SETEMIT, // Geometry shader related + + MAESTRO_CMP = 0x2E, + MAESTRO_CMP2, // ??? +}; diff --git a/source/picasso.h b/source/picasso.h new file mode 100644 index 0000000..2a7e025 --- /dev/null +++ b/source/picasso.h @@ -0,0 +1,104 @@ +#pragma once +#include +#include +#include +#include +#include +#ifdef WIN32 +#include +#endif +#include "types.h" + +#include +#include +#include +#include +#include + +#include "FileClass.h" + +#include "maestro_opcodes.h" + +enum +{ + COMP_X = 0, + COMP_Y, + COMP_Z, + COMP_W, +}; + +#define SWIZZLE_COMP(n,v) ((v) << (6-(n)*2)) +#define OPDESC_MAKE(out, src1, src2) ((out) | ((src1) << 5) | ((src2) << (5+8+1))) +#define FMT_OPCODE(n) ((n)<<26) +#define OUTPUT_MAKE(i, reg, mask) ((i) | ((reg)<<16) | ((u64)(mask)<<32)) + +#define DEFAULT_SWIZZLE (SWIZZLE_COMP(0,COMP_X) | SWIZZLE_COMP(1,COMP_Y) | SWIZZLE_COMP(2,COMP_Z) | SWIZZLE_COMP(3,COMP_W)) + +extern std::vector g_outputBuf; + +enum +{ + SE_PROC, + SE_IFB, +}; + +struct StackEntry +{ + int type; + size_t pos; + union + { + const char* strExtra; + size_t uExtra; + }; +}; + +// Stack used to keep track of stuff. +#define MAX_STACK 32 +extern StackEntry g_stack[MAX_STACK]; +extern int g_stackPos; + +#define MAX_OPDESC 128 +extern int g_opdescTable[MAX_OPDESC]; +extern int g_opdescCount; + +struct Uniform +{ + const char* name; + int pos, size; +}; + +#define MAX_UNIFORM 0x60 +extern Uniform g_uniformTable[MAX_UNIFORM]; +extern int g_uniformCount; + +enum +{ + OUTTYPE_POS = 0, + OUTTYPE_CLR = 2, + OUTTYPE_TCOORD0, + OUTTYPE_TCOORD1 = 5, + OUTTYPE_TCOORD2, +}; + +#define MAX_OUTPUT 8 +extern u64 g_outputTable[MAX_OUTPUT]; +extern int g_outputCount; + +struct Constant +{ + int regId; + float param[4]; +}; + +#define MAX_CONSTANT 0x60 +extern Constant g_constantTable[MAX_CONSTANT]; +extern int g_constantCount; + +typedef std::pair procedure; // position, size +typedef std::pair relocation; +extern std::map g_procTable; +extern std::map g_labels; +extern std::map g_aliases; + +int AssembleString(char* str, const char* initialFilename); diff --git a/source/picasso_assembler.cpp b/source/picasso_assembler.cpp new file mode 100644 index 0000000..3dc00fd --- /dev/null +++ b/source/picasso_assembler.cpp @@ -0,0 +1,764 @@ +#include "picasso.h" + +#define BUF g_outputBuf +#define NO_MORE_STACK (g_stackPos==MAX_STACK) + +static const char* curFile = nullptr; +static int curLine = -1; + +std::vector g_outputBuf; + +StackEntry g_stack[MAX_STACK]; +int g_stackPos; + +int g_opdescTable[MAX_OPDESC]; +int g_opdescCount; + +Uniform g_uniformTable[MAX_UNIFORM]; +int g_uniformCount; +static int uniformPos = 0x20; + +Constant g_constantTable[MAX_CONSTANT]; +int g_constantCount; + +u64 g_outputTable[MAX_OUTPUT]; +int g_outputCount; + +std::map g_procTable; +std::map g_labels; +std::map g_aliases; + +static char* mystrtok_pos; +static char* mystrtok(char* str, const char* delim) +{ + if (!str) str = mystrtok_pos; + if (!*str) return nullptr; + + size_t pos = strcspn(str, delim); + auto ret = str; + str += pos; + if (*str) + *str++ = 0; + mystrtok_pos = str; + return ret; +} + +static char* mystrtok_spc(char* str) +{ + auto ret = mystrtok(str, " \t"); + if (!ret) return nullptr; + if (*mystrtok_pos) + for (; *mystrtok_pos && isspace(*mystrtok_pos); mystrtok_pos++); + return ret; +} + +static char* remove_comment(char* buf) +{ + char* pos = strchr(buf, ';'); + if (pos) *pos = 0; + return buf; +} + +static char* trim_whitespace(char* buf) +{ + if (!buf) + return nullptr; + + // Remove trailing whitespace + int pos; + for(pos = strlen(buf)-1; pos >= 0 && isspace(buf[pos]); pos --) buf[pos] = '\0'; + + // Remove leading whitespace + char* newbuf = buf; + for(; isspace(*newbuf); newbuf ++); + + return newbuf; +} + +static bool validateIdentifier(const char* id) +{ + int len = strlen(id); + bool valid = true; + for (int i = 0; valid && i < len; i ++) + { + int c = id[i]; + valid = isalpha(c) || c == '_' || c == '.' || (i > 0 && isdigit(c)); + } + return valid; +} + +static int throwError(const char* msg, ...) +{ + va_list v; + + fprintf(stderr, "%s:%d: error: ", curFile, curLine); + + va_start(v, msg); + vfprintf(stderr, msg, v); + va_end(v); + + return 1; +} + +static int parseInt(char* pos, int& out, long long min, long long max) +{ + char* endptr = nullptr; + long long res = strtoll(pos, &endptr, 0); + if (pos == endptr) + return throwError("Invalid value: %s\n", pos); + if (res < min || res > max) + return throwError("Value out of range (%d..%u): %d\n", (int)min, (unsigned int)max, (int)res); + out = res; + return 0; +} + +#define safe_call(x) do \ + { \ + int _ = (x); \ + if (_ != 0) return _; \ + } while(0) + +static int ProcessCommand(const char* cmd); + +int AssembleString(char* str, const char* initialFilename) +{ + curFile = initialFilename; + curLine = 1; + + int nextLineIncr = 0; + char* nextStr = nullptr; + for (; str; str = nextStr, curLine += nextLineIncr) + { + size_t len = strcspn(str, "\n"); + int linedelim = str[len]; + str[len] = 0; + nextStr = linedelim ? (str + len + 1) : nullptr; + nextLineIncr = linedelim == '\n' ? 1 : 0; + + char* line = trim_whitespace(remove_comment(str)); + + char* colonPos = nullptr; + for (;;) + { + colonPos = strchr(line, ':'); + if (!colonPos) + break; + *colonPos = 0; + char* labelName = line; + line = trim_whitespace(colonPos + 1); + + if (!validateIdentifier(labelName)) + return throwError("invalid label name: %s\n", labelName); + + auto ret = g_labels.insert( std::pair(labelName, BUF.size()) ); + if (!ret.second) + return throwError("duplicate label: %s\n", labelName); + + //printf("Label: %s\n", labelName); + }; + + if (!*line) + continue; + + if (*line == '#') + { + line = trim_whitespace(line + 1); + nextLineIncr = 0; + size_t pos = strcspn(line, " \t"); + line[pos] = 0; + curLine = atoi(line); + line = trim_whitespace(line + pos + 1); + if (*line == '"') + { + line ++; + line[strlen(line)-1] = 0; + } + curFile = line; + continue; + } + + char* tok = mystrtok_spc(line); + safe_call(ProcessCommand(tok)); + } + + if (g_stackPos) + return throwError("unclosed block(s)\n"); + + for (int i = 0; i < g_opdescCount; i ++) + g_opdescTable[i] &= ~BIT(31); + + //safe_call(FixupRelocations()); + + return 0; +} + +// -------------------------------------------------------------------- +// Commands +// -------------------------------------------------------------------- + +static char* nextArg() +{ + return trim_whitespace(mystrtok(nullptr, ",")); +} + +static char* nextArgCParen() +{ + return trim_whitespace(mystrtok(nullptr, "(")); +} + +static char* nextArgSpc() +{ + return trim_whitespace(mystrtok_spc(nullptr)); +} + +static int missingParam() +{ + return throwError("missing parameter\n"); +} + +typedef struct +{ + const char* name; + int (* func) (const char*, int); + int opcode; +} cmdTableType; + +#define NEXT_ARG(_varName) char* _varName; do \ + { \ + _varName = nextArg(); \ + if (!_varName) return missingParam(); \ + } while (0) + +#define NEXT_ARG_SPC(_varName) char* _varName; do \ + { \ + _varName = nextArgSpc(); \ + if (!_varName) return missingParam(); \ + } while (0) + +#define NEXT_ARG_CPAREN(_varName) char* _varName; do \ + { \ + _varName = nextArgCParen(); \ + if (!_varName) return missingParam(); \ + } while (0) + +#define NEXT_ARG_OPT(_varName, _opt) char* _varName; do \ + { \ + _varName = nextArg(); \ + if (!_varName) _varName = (char*)(_opt); \ + } while (0) + +#define DEF_COMMAND(name) \ + static int cmd_##name(const char* cmdName, int opcode) + +#define DEC_COMMAND(name, fun) \ + { #name, cmd_##fun, MAESTRO_##name } + +#define DEF_DIRECTIVE(name) \ + static int dir_##name(const char* cmdName, int _unused) + +#define DEC_DIRECTIVE(name) \ + { #name, dir_##name, 0 } + +static int ensureNoMoreArgs() +{ + return nextArg() ? throwError("too many parameters") : 0; +} + +/* +static int ensureLabel(const char* lbl) +{ + if (!validateIdentifier(lbl)) + return throwError("invalid target label: %s\n", lbl); + return 0; +} +*/ + +static int ensure_valid_dest(int reg, const char* name) +{ + if (reg < 0x00 || reg >= 0x20) + return throwError("invalid destination register: %s\n", name); + return 0; +} + +static int ensure_valid_src1(int reg, const char* name) +{ + if (reg < 0x00 || reg >= 0x80) + return throwError("invalid source1 register: %s\n", name); + return 0; +} + +static int ensure_valid_src2(int reg, const char* name) +{ + if (reg < 0x00 || reg >= 0x20) + return throwError("invalid source2 register: %s\n", name); + return 0; +} + +#define ENSURE_NO_MORE_ARGS() safe_call(ensureNoMoreArgs()) + +#define ARG_TO_INT(_varName, _argName, _min, _max) \ + int _varName = 0; \ + safe_call(parseInt(_argName, _varName, _min, _max)) + +#define ARG_TO_REG(_varName, _argName) \ + int _varName = 0, _varName##Sw = 0; \ + safe_call(parseReg(_argName, _varName, _varName##Sw)); + +/* +#define ARG_LABEL(_argName) \ + safe_call(ensureLabel(_argName)) +*/ + +#define ARG_TO_DEST_REG(_reg, _name) \ + ARG_TO_REG(_reg, _name); \ + safe_call(ensure_valid_dest(_reg, _name)) + +#define ARG_TO_SRC1_REG(_reg, _name) \ + ARG_TO_REG(_reg, _name); \ + safe_call(ensure_valid_src1(_reg, _name)) + +#define ARG_TO_SRC2_REG(_reg, _name) \ + ARG_TO_REG(_reg, _name); \ + safe_call(ensure_valid_src2(_reg, _name)) + +static int parseSwizzling(const char* b) +{ + int i, out = 0, q = COMP_X; + for (i = 0; b[i] && i < 4; i ++) + { + switch (tolower(b[i])) + { + case 'x': q = COMP_X; break; + case 'y': q = COMP_Y; break; + case 'z': q = COMP_Z; break; + case 'w': q = COMP_W; break; + default: return -1; + } + out |= SWIZZLE_COMP(i, q); + } + if (b[i]) + return -1; + // Fill in missing bits + for (int j = i; j < 4; j ++) + out |= SWIZZLE_COMP(j, q); + return out; +} + +static int maskFromSwizzling(int sw) +{ + int out = 0; + for (int i = 0; i < 4; i ++) + out |= BIT(3-((sw>>(i*2))&3)); + return out; +} + +static int findOrAddOpdesc(int& out, int opdesc, bool ignoreOp2=false) +{ + for (int i = 0; i < g_opdescCount; i ++) + { + int cur_opdesc = g_opdescTable[i]; + if (ignoreOp2) + cur_opdesc &= ~((0xFF << (5+8+1)) | BIT(31)); // clear bits we don't want to compare + else if (cur_opdesc & BIT(31)) + { + // We can recycle this opdesc which didn't have an explicit Op2 + cur_opdesc &= ~BIT(31); + int cmp = opdesc &~ (0xFF << (5+8+1)); // partial opdesc used for comparison + if (cmp == cur_opdesc) + { + g_opdescTable[i] = cur_opdesc | (opdesc & (0xFF << (5+8+1))); + out = i; + return 0; + } + } + if (opdesc == cur_opdesc) + { + out = i; + return 0; + } + } + if (g_opdescCount == MAX_OPDESC) + return throwError("too many operand descriptors (limit is %d)\n", MAX_OPDESC); + if (ignoreOp2) + opdesc |= BIT(31); // will be removed + g_opdescTable[g_opdescCount] = opdesc; + out = g_opdescCount++; + return 0; +} + +static int parseReg(char* pos, int& outReg, int& outSw) +{ + outReg = 0; + outSw = DEFAULT_SWIZZLE; + auto dotPos = strchr(pos, '.'); + if (dotPos) + { + *dotPos++ = 0; + outSw = parseSwizzling(dotPos); + if (outSw < 0) + return throwError("invalid swizzling mask: %s\n", dotPos); + } + int regOffset = 0; + auto parenPos = strchr(pos, '('); + if (parenPos) + { + auto closePos = strchr(parenPos, ')'); + if (!closePos) + return throwError("missing close paren: %s\n", pos); + *closePos = 0; + *parenPos++ = 0; + parenPos = trim_whitespace(parenPos); + // TODO: support (idx1[+n]), (idx2[+n]), (lcnt[+n]) + regOffset = atoi(parenPos); + if (regOffset < 0) + return throwError("invalid register offset: %s\n", parenPos); + } + auto it = g_aliases.find(pos); + if (it != g_aliases.end()) + { + int x = it->second; + outReg = x & 0xFF; + outReg += regOffset; + x >>= 8; + // Combine swizzling + int temp = 0; + for (int j = 0; j < 4; j ++) + { + int comp = (outSw >> (6 - j*2)) & 3; + comp = (x >> (6 - comp*2)) & 3; + temp |= SWIZZLE_COMP(j, comp); + } + outSw = temp; + return 0; + } + + safe_call(parseInt(pos+1, outReg, 0, 255)); + switch (*pos) + { + case 'o': // Output registers + case 'v': // Input attributes + if (outReg < 0x00 || outReg >= 0x08) + return throwError("invalid input/output register: %s(%d)\n", pos); + break; + case 'r': // Temporary registers + outReg += 0x10; + if (outReg < 0x10 || outReg >= 0x20) + return throwError("invalid temporary register: %s(%d)\n", pos); + break; + case 'c': // Vector uniform registers + outReg += 0x20; + if (outReg < 0x20 || outReg >= 0x80) + return throwError("invalid vector uniform register: %s(%d)\n", pos); + break; + default: + return throwError("invalid register: %s\n", pos); + } + outReg += regOffset; + return 0; +} + +DEF_COMMAND(format0) +{ + ENSURE_NO_MORE_ARGS(); + + BUF.push_back(FMT_OPCODE(opcode)); + return 0; +} + +DEF_COMMAND(format1) +{ + NEXT_ARG(destName); + NEXT_ARG(src1Name); + NEXT_ARG(src2Name); + ENSURE_NO_MORE_ARGS(); + + ARG_TO_DEST_REG(rDest, destName); + ARG_TO_SRC1_REG(rSrc1, src1Name); + ARG_TO_SRC2_REG(rSrc2, src2Name); + + int opdesc = 0; + safe_call(findOrAddOpdesc(opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, rSrc2Sw))); + +#ifdef DEBUG + printf("%s:%02X d%02X, d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, rSrc2, opdesc); +#endif + BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc2<<7) | (rSrc1<<12) | (rDest<<21)); + + return 0; +} + +DEF_COMMAND(format2) +{ + NEXT_ARG(destName); + NEXT_ARG(src1Name); + ENSURE_NO_MORE_ARGS(); + + ARG_TO_DEST_REG(rDest, destName); + ARG_TO_SRC1_REG(rSrc1, src1Name); + + int opdesc = 0; + safe_call(findOrAddOpdesc(opdesc, OPDESC_MAKE(maskFromSwizzling(rDestSw), rSrc1Sw, 0), true)); + +#ifdef DEBUG + printf("%s:%02X d%02X, d%02X (0x%X)\n", cmdName, opcode, rDest, rSrc1, opdesc); +#endif + BUF.push_back(FMT_OPCODE(opcode) | opdesc | (rSrc1<<12) | (rDest<<21)); + + return 0; +} + +static const cmdTableType cmdTable[] = +{ + DEC_COMMAND(NOP, format0), + DEC_COMMAND(END, format0), + + DEC_COMMAND(ADD, format1), + DEC_COMMAND(DP3, format1), + DEC_COMMAND(DP4, format1), + DEC_COMMAND(MUL, format1), + DEC_COMMAND(MAX, format1), + DEC_COMMAND(MIN, format1), + + DEC_COMMAND(RCP, format2), + DEC_COMMAND(RSQ, format2), + DEC_COMMAND(MOV, format2), + + { nullptr, nullptr }, +}; + +// -------------------------------------------------------------------- +// Directives +// -------------------------------------------------------------------- + +DEF_DIRECTIVE(proc) +{ + NEXT_ARG(procName); + ENSURE_NO_MORE_ARGS(); + + if (NO_MORE_STACK) + return throwError("too many nested blocks\n"); + + auto& elem = g_stack[g_stackPos++]; + elem.type = SE_PROC; + elem.pos = BUF.size(); + elem.strExtra = procName; + + if (g_procTable.find(procName) != g_procTable.end()) + return throwError("proc already exists: %s\n", procName); + +#ifdef DEBUG + printf("Defining %s\n", procName); +#endif + return 0; +} + +DEF_DIRECTIVE(end) +{ + ENSURE_NO_MORE_ARGS(); + if (!g_stackPos) + return throwError(".end with unmatched block\n"); + + auto& elem = g_stack[--g_stackPos]; + u32 curPos = BUF.size(); + u32 size = curPos - elem.pos; + + switch (elem.type) + { + case SE_PROC: + { +#ifdef DEBUG + printf("proc: %s(%u, size:%u)\n", elem.strExtra, elem.pos, size); +#endif + g_procTable.insert( std::pair(elem.strExtra, procedure(elem.pos, size)) ); + break; + } + } + + return 0; +} + +static inline bool isregp(int x) +{ + x = tolower(x); + return x=='o' || x=='v' || x=='r' || x=='c'; +} + +DEF_DIRECTIVE(alias) +{ + NEXT_ARG_SPC(aliasName); + NEXT_ARG_SPC(aliasReg); + ENSURE_NO_MORE_ARGS(); + + if (!validateIdentifier(aliasName)) + return throwError("invalid alias name: %s\n", aliasName); + if (isregp(aliasName[0]) && isdigit(aliasName[1])) + return throwError("cannot redefine register\n"); + ARG_TO_REG(rAlias, aliasReg); + + if (g_aliases.find(aliasName) != g_aliases.end()) + return throwError("identifier already used: %s\n", aliasName); + + g_aliases.insert( std::pair(aliasName, rAlias | (rAliasSw<<8)) ); + return 0; +} + +DEF_DIRECTIVE(uniform) +{ + for (;;) + { + char* argText = nextArg(); + if (!argText) break; + + int uSize = 1; + char* parenPos = strchr(argText, '('); + if (parenPos) + { + char* closePos = strchr(parenPos, ')'); + if (!closePos) + return throwError("missing close paren: %s\n", argText); + *closePos = 0; + *parenPos++ = 0; + parenPos = trim_whitespace(parenPos); + uSize = atoi(parenPos); + if (uSize < 1) + return throwError("invalid uniform size: %s(%s)\n", argText, parenPos); + } + if (!validateIdentifier(argText)) + return throwError("invalid uniform name: %s\n", argText); + if ((uniformPos+uSize) >= 0x80) + return throwError("not enough uniform registers: %s(%d)\n", argText, uSize); + if (g_uniformCount == MAX_UNIFORM) + return throwError("too many uniforms: %s(%d)\n", argText, uSize); + if (g_aliases.find(argText) != g_aliases.end()) + return throwError("identifier already used: %s\n", argText); + + auto& uniform = g_uniformTable[g_uniformCount++]; + uniform.name = argText; + uniform.pos = uniformPos; + uniform.size = uSize; + uniformPos += uSize; + g_aliases.insert( std::pair(argText, uniform.pos | (DEFAULT_SWIZZLE<<8)) ); + +#ifdef DEBUG + printf("uniform %s(%d) @ d%02X:d%02X\n", argText, uSize, uniform.pos, uniform.pos+uSize-1); +#endif + } + return 0; +} + +DEF_DIRECTIVE(const) +{ + NEXT_ARG_CPAREN(constName); + NEXT_ARG(arg0Text); + NEXT_ARG(arg1Text); + NEXT_ARG(arg2Text); + auto arg3Text = mystrtok_pos; + if (!mystrtok_pos) return missingParam(); + auto parenPos = strchr(arg3Text, ')'); + if (!parenPos) return throwError("invalid syntax\n"); + *parenPos = 0; + arg3Text = trim_whitespace(arg3Text); + + if (g_constantCount == MAX_CONSTANT || uniformPos>=0x80) + return throwError("not enough space for constant\n"); + + if (g_aliases.find(constName) != g_aliases.end()) + return throwError("identifier already used: %s\n", constName); + + auto& ct = g_constantTable[g_constantCount++]; + ct.regId = uniformPos++; + ct.param[0] = atof(arg0Text); + ct.param[1] = atof(arg1Text); + ct.param[2] = atof(arg2Text); + ct.param[3] = atof(arg3Text); + + g_aliases.insert( std::pair(constName, ct.regId | (DEFAULT_SWIZZLE<<8)) ); + +#ifdef DEBUG + printf("constant %s(%f, %f, %f, %f) @ d%02X\n", constName, ct.param[0], ct.param[1], ct.param[2], ct.param[3], ct.regId); +#endif + return 0; +}; + +static int parseOutType(const char* text) +{ + if (stricmp(text,"pos")==0 || stricmp(text,"position")==0) + return OUTTYPE_POS; + if (stricmp(text,"clr")==0 || stricmp(text,"color")==0) + return OUTTYPE_CLR; + if (stricmp(text,"tcoord0")==0 || stricmp(text,"texcoord0")==0) + return OUTTYPE_TCOORD0; + if (stricmp(text,"tcoord1")==0 || stricmp(text,"texcoord1")==0) + return OUTTYPE_TCOORD1; + if (stricmp(text,"tcoord2")==0 || stricmp(text,"texcoord2")==0) + return OUTTYPE_TCOORD2; + return -1; +} + +DEF_DIRECTIVE(out) +{ + NEXT_ARG_SPC(outName); + NEXT_ARG_SPC(outType); + ENSURE_NO_MORE_ARGS(); + + if (!validateIdentifier(outName)) + return throwError("invalid identifier: %s\n", outName); + + int sw = DEFAULT_SWIZZLE; + auto dotPos = strchr(outType, '.'); + if (dotPos) + { + *dotPos++ = 0; + sw = parseSwizzling(dotPos); + if (sw < 0) + return throwError("invalid output mask: %s\n", dotPos); + } + int mask = maskFromSwizzling(sw); + int type = parseOutType(outType); + if (type < 0) + return throwError("invalid output type: %s\n", outType); + + if (g_outputCount==MAX_OUTPUT) + return throwError("too many outputs\n"); + + if (g_aliases.find(outName) != g_aliases.end()) + return throwError("identifier already used: %s\n", outName); + + int oid = g_outputCount; + +#ifdef DEBUG + printf("output %s <- o%d (%d:%X)\n", outName, oid, type, mask); +#endif + + g_outputTable[g_outputCount++] = OUTPUT_MAKE(type, oid, mask); + g_aliases.insert( std::pair(outName, oid | (sw<<8)) ); + return 0; +} + +static const cmdTableType dirTable[] = +{ + DEC_DIRECTIVE(proc), + DEC_DIRECTIVE(end), + DEC_DIRECTIVE(alias), + DEC_DIRECTIVE(uniform), + DEC_DIRECTIVE(const), + DEC_DIRECTIVE(out), + { nullptr, nullptr }, +}; + +int ProcessCommand(const char* cmd) +{ + const cmdTableType* table = cmdTable; + if (*cmd == '.') + { + cmd ++; + table = dirTable; + } else if (!g_stackPos) + return throwError("instruction outside block\n"); + + for (int i = 0; table[i].name; i ++) + if (stricmp(table[i].name, cmd) == 0) + return table[i].func(cmd, table[i].opcode); + + return throwError("invalid instruction: %s\n", cmd); +} diff --git a/source/picasso_frontend.cpp b/source/picasso_frontend.cpp new file mode 100644 index 0000000..b7dd8f6 --- /dev/null +++ b/source/picasso_frontend.cpp @@ -0,0 +1,204 @@ +#include "picasso.h" + +// !! Taken from ctrulib !! +u32 f32tof24(float vf) +{ + if (!vf) return 0; + + union { float f; u32 v; } q; + q.f=vf; + + u8 s = q.v>>31; + u32 exp = ((q.v>>23) & 0xFF) - 0x40; + u32 man = (q.v>>7) & 0xFFFF; + + if (exp >= 0) + return man | (exp<<16) | (s<<23); + else + return s<<23; +} + +#ifdef WIN32 +static inline void FixMinGWPath(char* buf) +{ + if (buf && *buf == '/') + { + buf[0] = buf[1]; + buf[1] = ':'; + } +} +#endif + +int usage(const char* prog) +{ + fprintf(stderr, + "Usage:\n\n" + "%s shbinFile vshFile [hFile]\n", prog); + return 0; +} + +int main(int argc, char* argv[]) +{ + if (argc < 3 || argc > 4) + return usage(argv[0]); + + char* shbinFile = argv[1]; + char* vshFile = argv[2]; + char* hFile = argc > 3 ? argv[3] : NULL; + +#ifdef WIN32 + FixMinGWPath(shbinFile); + FixMinGWPath(vshFile); + FixMinGWPath(hFile); +#endif + + char* sourceCode = StringFromFile(vshFile); + if (!sourceCode) + { + fprintf(stderr, "Cannot open input file!\n"); + return 1; + } + + int rc = AssembleString(sourceCode, vshFile); + free(sourceCode); + if (rc != 0) + return rc; + + auto mainIt = g_procTable.find("main"); + if (mainIt == g_procTable.end()) + { + fprintf(stderr, "Error: main proc not defined\n"); + return 1; + } + + //if (g_opdescCount > 9) + // printf("WARNING: currently using more than 9 opdescs -- libctru has a bug\n"); + + FileClass f(shbinFile, "wb"); + + if (f.openerror()) + { + fprintf(stderr, "Can't open output file!"); + return 1; + } + + f.WriteWord(0x424C5644); // DVLB + f.WriteWord(1); // 1 DVLE + f.WriteWord(3*4 + 0x28); // offset to DVLE + + u32 dvlpStart = f.Tell(); + u32 shaderSize = g_outputBuf.size(); + u32 paramStart = 0x28 + 0x40; + + f.WriteWord(0x504C5644); // DVLP + f.WriteWord(0); // version + f.WriteWord(paramStart); // offset to shader binary blob + f.WriteWord(shaderSize); // size of shader binary blob + paramStart += shaderSize*4; + f.WriteWord(paramStart); // offset to opdesc table + f.WriteWord(g_opdescCount); // number of opdescs + paramStart += g_opdescCount*8; + f.WriteWord(paramStart); // offset to symtable (TODO) + f.WriteWord(0); // ???? + f.WriteWord(0); // ???? + f.WriteWord(0); // ???? + + u32 dvleStart = f.Tell(); + paramStart -= dvleStart - dvlpStart; + + f.WriteWord(0x454C5644); // DVLE + f.WriteHword(0); // padding? + f.WriteHword(0); // Vertex shader + f.WriteWord(mainIt->second.first); // offset to main + f.WriteWord(mainIt->second.first+mainIt->second.second); // offset to end of main + f.WriteWord(0); // ??? + f.WriteWord(0); // ??? + f.WriteWord(paramStart); // offset to constant table + f.WriteWord(g_constantCount); // size of constant table + paramStart += g_constantCount*0x14; + f.WriteWord(paramStart); // offset to label table (TODO) + f.WriteWord(0); // size of label table (TODO) + f.WriteWord(paramStart); // offset to output table + f.WriteWord(g_outputCount); // size of output table + paramStart += g_outputCount*8; + f.WriteWord(paramStart); // offset to uniform table + f.WriteWord(g_uniformCount); // size of uniform table + paramStart += g_uniformCount*8; + f.WriteWord(paramStart); // offset to symbol table + u32 temp = f.Tell(); + f.WriteWord(0); // size of symbol table + + // Write program + for (u32 p : g_outputBuf) + f.WriteWord(p); + + // Write opdescs + for (int i = 0; i < g_opdescCount; i ++) + { + f.WriteWord(g_opdescTable[i]); + f.WriteWord(0x0000000F); // unknown + } + + // Write constants + for (int i = 0; i < g_constantCount; i ++) + { + auto& ct = g_constantTable[i]; + f.WriteHword(0); + f.WriteByte(ct.regId-0x20); + f.WriteByte(0); + for (int j = 0; j < 4; j ++) + f.WriteWord(f32tof24(ct.param[j])); + } + + // Write outputs + for (int i = 0; i < g_outputCount; i ++) + f.WriteDword(g_outputTable[i]); + + // Write uniforms + size_t sp = 0; + for (int i = 0; i < g_uniformCount; i ++) + { + auto& u = g_uniformTable[i]; + size_t l = strlen(u.name)+1; + f.WriteWord(sp); sp += l; + f.WriteHword(u.pos-0x20); + f.WriteHword(u.pos+u.size-1-0x20); + } + + // Write size of symbol table + u32 temp2 = f.Tell(); + f.Seek(temp, SEEK_SET); + f.WriteWord(sp); + f.Seek(temp2, SEEK_SET); + + // Write symbols + for (int i = 0; i < g_uniformCount; i ++) + { + auto u = g_uniformTable[i].name; + size_t l = strlen(u)+1; + f.WriteRaw(u, l); + } + + if (hFile) + { + auto f2 = fopen(hFile, "w"); + if (!f2) + { + fprintf(stderr, "Can't open header file!\n"); + return 1; + } + + fprintf(f2, "// Generated by picasso\n"); + fprintf(f2, "#pragma once\n"); + for (int i = 0; i < g_uniformCount; i ++) + { + auto& u = g_uniformTable[i]; + fprintf(f2, "#define SHADER_UREG_%s 0x%02X\n", u.name, u.pos-0x20); + fprintf(f2, "#define SHADER_ULEN_%s %d\n", u.name, u.size); + } + + fclose(f2); + } + + return 0; +} diff --git a/source/types.h b/source/types.h new file mode 100644 index 0000000..3147c5c --- /dev/null +++ b/source/types.h @@ -0,0 +1,35 @@ +#pragma once +#include + +typedef uint64_t dword_t; +typedef uint32_t word_t; +typedef uint16_t hword_t; +typedef uint8_t byte_t; +typedef int64_t dlong_t; +typedef int32_t long_t; +typedef int16_t short_t; +typedef int8_t char_t; +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +#define BIT(n) (1U << (n)) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define be_dword(a) __builtin_bswap64(a) +#define be_word(a) __builtin_bswap32(a) +#define be_hword(a) __builtin_bswap16(a) +#define le_dword(a) (a) +#define le_word(a) (a) +#define le_hword(a) (a) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define be_dword(a) (a) +#define be_word(a) (a) +#define be_hword(a) (a) +#define le_dword(a) __builtin_bswap64(a) +#define le_word(a) __builtin_bswap32(a) +#define le_hword(a) __builtin_bswap16(a) +#else +#error "What's the endianness of the platform you're targeting?" +#endif