diff options
71 files changed, 1776 insertions, 3634 deletions
diff --git a/.github/ b/.github/
index 4c9c7975a..9727c5712 100644
--- a/.github/
+++ b/.github/
@@ -4,10 +4,10 @@ Please read the FAQ:
If the FAQ does not answer your question, please go to:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 52a1fd492..ce9a29032 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -271,11 +271,6 @@ if (MSVC)
# process subdirectories
- include_directories(externals/qhexedit)
- add_subdirectory(externals/qhexedit)
diff --git a/ b/
index 7a21eebf8..55f520cb4 100644
--- a/
+++ b/
@@ -1,6 +1,7 @@
# Reporting Issues
-**The issue tracker is not a support forum.** Unless you can provide precise *technical information* regarding an issue, you *should not post in it*. If you need support, first read the [FAQ]( and then either visit our IRC channel, [our forum]( or ask in a general emulation forum such as [/r/emulation]( If you post support questions, generic messages to the developers or vague reports without technical details, they will be closed and locked.
+**The issue tracker is not a support forum.** Unless you can provide precise *technical information* regarding an issue, you *should not post in it*. If you need support, first read the [FAQ]( and then either visit our IRC channel, [our forum]( or ask in a general emulation forum such as [/r/emulation]( If you post support questions, generic messages to the developers or vague reports without technical details, they will be closed and locked.
If you believe you have a valid issue report, please post text or a screenshot from the log (the console window that opens alongside Citra) and build version (hex string visible in the titlebar and zip filename), as well as your hardware and software information if applicable.
diff --git a/ b/
index 7d1e1de0d..461d2e3a7 100644
--- a/
+++ b/
@@ -7,7 +7,7 @@ Citra Emulator
Citra is an experimental open-source Nintendo 3DS emulator/debugger written in C++. It is written with portability in mind, with builds actively maintained for Windows, Linux and macOS. Citra only emulates a subset of 3DS hardware, and therefore is generally only useful for running/debugging homebrew applications. At this time, Citra is even able to boot several commercial games! Most of these do not run to a playable state, but we are working every day to advance the project forward.
-Citra is licensed under the GPLv2 (or any later version). Refer to the license.txt file included. Please read the [FAQ]( before getting started with the project.
+Citra is licensed under the GPLv2 (or any later version). Refer to the license.txt file included. Please read the [FAQ]( before getting started with the project.
Check out our [website](!
diff --git a/externals/nihstro b/externals/nihstro
-Subproject 26a0a04a458df2b9ba6e39608bee183d8a0a00e
+Subproject 7e24743af21a7c2e3cef21ef174ae4269d0cfda
diff --git a/externals/qhexedit/CMakeLists.txt b/externals/qhexedit/CMakeLists.txt
deleted file mode 100644
index e7470dfe4..000000000
--- a/externals/qhexedit/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
- commands.cpp
- qhexedit.cpp
- qhexedit_p.cpp
- xbytearray.cpp
- )
- commands.h
- qhexedit.h
- qhexedit_p.h
- xbytearray.h
- )
-create_directory_groups(${SRCS} ${HEADERS})
-add_library(qhexedit STATIC ${SRCS} ${HEADERS})
-target_link_libraries(qhexedit ${CITRA_QT_LIBS})
diff --git a/externals/qhexedit/commands.cpp b/externals/qhexedit/commands.cpp
deleted file mode 100644
index 303091d1d..000000000
--- a/externals/qhexedit/commands.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-#include "commands.h"
-CharCommand::CharCommand(XByteArray * xData, Cmd cmd, int charPos, char newChar, QUndoCommand *parent)
- : QUndoCommand(parent)
- _xData = xData;
- _charPos = charPos;
- _newChar = newChar;
- _cmd = cmd;
-bool CharCommand::mergeWith(const QUndoCommand *command)
- const CharCommand *nextCommand = static_cast<const CharCommand *>(command);
- bool result = false;
- if (_cmd != remove)
- {
- if (nextCommand->_cmd == replace)
- if (nextCommand->_charPos == _charPos)
- {
- _newChar = nextCommand->_newChar;
- result = true;
- }
- }
- return result;
-void CharCommand::undo()
- switch (_cmd)
- {
- case insert:
- _xData->remove(_charPos, 1);
- break;
- case replace:
- _xData->replace(_charPos, _oldChar);
- _xData->setDataChanged(_charPos, _wasChanged);
- break;
- case remove:
- _xData->insert(_charPos, _oldChar);
- _xData->setDataChanged(_charPos, _wasChanged);
- break;
- }
-void CharCommand::redo()
- switch (_cmd)
- {
- case insert:
- _xData->insert(_charPos, _newChar);
- break;
- case replace:
- _oldChar = _xData->data()[_charPos];
- _wasChanged = _xData->dataChanged(_charPos);
- _xData->replace(_charPos, _newChar);
- break;
- case remove:
- _oldChar = _xData->data()[_charPos];
- _wasChanged = _xData->dataChanged(_charPos);
- _xData->remove(_charPos, 1);
- break;
- }
-ArrayCommand::ArrayCommand(XByteArray * xData, Cmd cmd, int baPos, QByteArray newBa, int len, QUndoCommand *parent)
- : QUndoCommand(parent)
- _cmd = cmd;
- _xData = xData;
- _baPos = baPos;
- _newBa = newBa;
- _len = len;
-void ArrayCommand::undo()
- switch (_cmd)
- {
- case insert:
- _xData->remove(_baPos, _newBa.length());
- break;
- case replace:
- _xData->replace(_baPos, _oldBa);
- _xData->setDataChanged(_baPos, _wasChanged);
- break;
- case remove:
- _xData->insert(_baPos, _oldBa);
- _xData->setDataChanged(_baPos, _wasChanged);
- break;
- }
-void ArrayCommand::redo()
- switch (_cmd)
- {
- case insert:
- _xData->insert(_baPos, _newBa);
- break;
- case replace:
- _oldBa = _xData->data().mid(_baPos, _len);
- _wasChanged = _xData->dataChanged(_baPos, _len);
- _xData->replace(_baPos, _newBa);
- break;
- case remove:
- _oldBa = _xData->data().mid(_baPos, _len);
- _wasChanged = _xData->dataChanged(_baPos, _len);
- _xData->remove(_baPos, _len);
- break;
- }
diff --git a/externals/qhexedit/commands.h b/externals/qhexedit/commands.h
deleted file mode 100644
index 9931b3fb5..000000000
--- a/externals/qhexedit/commands.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef COMMANDS_H
-#define COMMANDS_H
-/** \cond docNever */
-#include <QUndoCommand>
-#include "xbytearray.h"
-/*! CharCommand is a class to prived undo/redo functionality in QHexEdit.
-A QUndoCommand represents a single editing action on a document. CharCommand
-is responsable for manipulations on single chars. It can insert. replace and
-remove characters. A manipulation stores allways to actions
-1. redo (or do) action
-2. undo action.
-CharCommand also supports command compression via mergeWidht(). This allows
-the user to execute a undo command contation e.g. 3 steps in a single command.
-If you for example insert a new byt "34" this means for the editor doing 3
-steps: insert a "00", replace it with "03" and the replace it with "34". These
-3 steps are combined into a single step, insert a "34".
-class CharCommand : public QUndoCommand
- enum { Id = 1234 };
- enum Cmd {insert, remove, replace};
- CharCommand(XByteArray * xData, Cmd cmd, int charPos, char newChar,
- QUndoCommand *parent=0);
- void undo();
- void redo();
- bool mergeWith(const QUndoCommand *command);
- int id() const { return Id; }
- XByteArray * _xData;
- int _charPos;
- bool _wasChanged;
- char _newChar;
- char _oldChar;
- Cmd _cmd;
-/*! ArrayCommand provides undo/redo functionality for handling binary strings. It
-can undo/redo insert, replace and remove binary strins (QByteArrays).
-class ArrayCommand : public QUndoCommand
- enum Cmd {insert, remove, replace};
- ArrayCommand(XByteArray * xData, Cmd cmd, int baPos, QByteArray newBa=QByteArray(), int len=0,
- QUndoCommand *parent=0);
- void undo();
- void redo();
- Cmd _cmd;
- XByteArray * _xData;
- int _baPos;
- int _len;
- QByteArray _wasChanged;
- QByteArray _newBa;
- QByteArray _oldBa;
-/** \endcond docNever */
-#endif // COMMANDS_H
diff --git a/externals/qhexedit/license.txt b/externals/qhexedit/license.txt
deleted file mode 100644
index f166cc57b..000000000
--- a/externals/qhexedit/license.txt
+++ /dev/null
diff --git a/externals/qhexedit/qhexedit.cpp b/externals/qhexedit/qhexedit.cpp
deleted file mode 100644
index b12624e08..000000000
--- a/externals/qhexedit/qhexedit.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-#include <QtGui>
-#include "qhexedit.h"
-QHexEdit::QHexEdit(QWidget *parent) : QScrollArea(parent)
- qHexEdit_p = new QHexEditPrivate(this);
- setWidget(qHexEdit_p);
- setWidgetResizable(true);
- connect(qHexEdit_p, SIGNAL(currentAddressChanged(int)), this, SIGNAL(currentAddressChanged(int)));
- connect(qHexEdit_p, SIGNAL(currentSizeChanged(int)), this, SIGNAL(currentSizeChanged(int)));
- connect(qHexEdit_p, SIGNAL(dataChanged()), this, SIGNAL(dataChanged()));
- connect(qHexEdit_p, SIGNAL(overwriteModeChanged(bool)), this, SIGNAL(overwriteModeChanged(bool)));
- setFocusPolicy(Qt::NoFocus);
-int QHexEdit::indexOf(const QByteArray & ba, int from) const
- return qHexEdit_p->indexOf(ba, from);
-void QHexEdit::insert(int i, const QByteArray & ba)
- qHexEdit_p->insert(i, ba);
-void QHexEdit::insert(int i, char ch)
- qHexEdit_p->insert(i, ch);
-int QHexEdit::lastIndexOf(const QByteArray & ba, int from) const
- return qHexEdit_p->lastIndexOf(ba, from);
-void QHexEdit::remove(int pos, int len)
- qHexEdit_p->remove(pos, len);
-void QHexEdit::replace( int pos, int len, const QByteArray & after)
- qHexEdit_p->replace(pos, len, after);
-QString QHexEdit::toReadableString()
- return qHexEdit_p->toRedableString();
-QString QHexEdit::selectionToReadableString()
- return qHexEdit_p->selectionToReadableString();
-void QHexEdit::setAddressArea(bool addressArea)
- qHexEdit_p->setAddressArea(addressArea);
-void QHexEdit::redo()
- qHexEdit_p->redo();
-void QHexEdit::undo()
- qHexEdit_p->undo();
-void QHexEdit::setAddressWidth(int addressWidth)
- qHexEdit_p->setAddressWidth(addressWidth);
-void QHexEdit::setAsciiArea(bool asciiArea)
- qHexEdit_p->setAsciiArea(asciiArea);
-void QHexEdit::setHighlighting(bool mode)
- qHexEdit_p->setHighlighting(mode);
-void QHexEdit::setAddressOffset(int offset)
- qHexEdit_p->setAddressOffset(offset);
-int QHexEdit::addressOffset()
- return qHexEdit_p->addressOffset();
-void QHexEdit::setCursorPosition(int cursorPos)
- // cursorPos in QHexEditPrivate is the position of the textcoursor without
- // blanks, means bytePos*2
- qHexEdit_p->setCursorPos(cursorPos*2);
-int QHexEdit::cursorPosition()
- return qHexEdit_p->cursorPos() / 2;
-void QHexEdit::setData(const QByteArray &data)
- qHexEdit_p->setData(data);
-QByteArray QHexEdit::data()
- return qHexEdit_p->data();
-void QHexEdit::setAddressAreaColor(const QColor &color)
- qHexEdit_p->setAddressAreaColor(color);
-QColor QHexEdit::addressAreaColor()
- return qHexEdit_p->addressAreaColor();
-void QHexEdit::setHighlightingColor(const QColor &color)
- qHexEdit_p->setHighlightingColor(color);
-QColor QHexEdit::highlightingColor()
- return qHexEdit_p->highlightingColor();
-void QHexEdit::setSelectionColor(const QColor &color)
- qHexEdit_p->setSelectionColor(color);
-QColor QHexEdit::selectionColor()
- return qHexEdit_p->selectionColor();
-void QHexEdit::setOverwriteMode(bool overwriteMode)
- qHexEdit_p->setOverwriteMode(overwriteMode);
-bool QHexEdit::overwriteMode()
- return qHexEdit_p->overwriteMode();
-void QHexEdit::setReadOnly(bool readOnly)
- qHexEdit_p->setReadOnly(readOnly);
-bool QHexEdit::isReadOnly()
- return qHexEdit_p->isReadOnly();
-void QHexEdit::setFont(const QFont &font)
- qHexEdit_p->setFont(font);
-const QFont & QHexEdit::font() const
- return qHexEdit_p->font();
diff --git a/externals/qhexedit/qhexedit.h b/externals/qhexedit/qhexedit.h
deleted file mode 100644
index 15b6d7603..000000000
--- a/externals/qhexedit/qhexedit.h
+++ /dev/null
@@ -1,240 +0,0 @@
-// Original author: Winfried Simon
-// See
-// Huge thanks!
-#ifndef QHEXEDIT_H
-#define QHEXEDIT_H
-#include <QtGui>
-#include "qhexedit_p.h"
-/** \mainpage
-QHexEdit is a binary editor widget for Qt.
-\version Version 0.6.3
-\image html hexedit.png
-/*! QHexEdit is a hex editor widget written in C++ for the Qt (Qt4) framework.
-It is a simple editor for binary data, just like QPlainTextEdit is for text
-data. There are sip configuration files included, so it is easy to create
-bindings for PyQt and you can use this widget also in python.
-QHexEdit takes the data of a QByteArray (setData()) and shows it. You can use
-the mouse or the keyboard to navigate inside the widget. If you hit the keys
-(0..9, a..f) you will change the data. Changed data is highlighted and can be
-accessed via data().
-Normaly QHexEdit works in the overwrite Mode. You can set overwriteMode(false)
-and insert data. In this case the size of data() increases. It is also possible
-to delete bytes (del or backspace), here the size of data decreases.
-You can select data with keyboard hits or mouse movements. The copy-key will
-copy the selected data into the clipboard. The cut-key copies also but delets
-it afterwards. In overwrite mode, the paste function overwrites the content of
-the (does not change the length) data. In insert mode, clipboard data will be
-inserted. The clipboard content is expected in ASCII Hex notation. Unknown
-characters will be ignored.
-QHexEdit comes with undo/redo functionality. All changes can be undone, by
-pressing the undo-key (usually ctr-z). They can also be redone afterwards.
-The undo/redo framework is cleared, when setData() sets up a new
-content for the editor. You can search data inside the content with indexOf()
-and lastIndexOf(). The replace() function is to change located subdata. This
-'replaced' data can also be undone by the undo/redo framework.
-This widget can only handle small amounts of data. The size has to be below 10
-megabytes, otherwise the scroll sliders ard not shown and you can't scroll any
- class QHexEdit : public QScrollArea
- /*! Property data holds the content of QHexEdit. Call setData() to set the
- content of QHexEdit, data() returns the actual content.
- */
- Q_PROPERTY(QByteArray data READ data WRITE setData)
- /*! Property addressOffset is added to the Numbers of the Address Area.
- A offset in the address area (left side) is sometimes usefull, whe you show
- only a segment of a complete memory picture. With setAddressOffset() you set
- this property - with addressOffset() you get the actual value.
- */
- Q_PROPERTY(int addressOffset READ addressOffset WRITE setAddressOffset)
- /*! Property address area color sets (setAddressAreaColor()) the backgorund
- color of address areas. You can also read the color (addressaAreaColor()).
- */
- Q_PROPERTY(QColor addressAreaColor READ addressAreaColor WRITE setAddressAreaColor)
- /*! Porperty cursorPosition sets or gets the position of the editor cursor
- in QHexEdit.
- */
- Q_PROPERTY(int cursorPosition READ cursorPosition WRITE setCursorPosition)
- /*! Property highlighting color sets (setHighlightingColor()) the backgorund
- color of highlighted text areas. You can also read the color
- (highlightingColor()).
- */
- Q_PROPERTY(QColor highlightingColor READ highlightingColor WRITE setHighlightingColor)
- /*! Property selection color sets (setSelectionColor()) the backgorund
- color of selected text areas. You can also read the color
- (selectionColor()).
- */
- Q_PROPERTY(QColor selectionColor READ selectionColor WRITE setSelectionColor)
- /*! Porperty overwrite mode sets (setOverwriteMode()) or gets (overwriteMode()) the mode
- in which the editor works. In overwrite mode the user will overwrite existing data. The
- size of data will be constant. In insert mode the size will grow, when inserting
- new data.
- */
- Q_PROPERTY(bool overwriteMode READ overwriteMode WRITE setOverwriteMode)
- /*! Porperty readOnly sets (setReadOnly()) or gets (isReadOnly) the mode
- in which the editor works. In readonly mode the the user can only navigate
- through the data and select data; modifying is not possible. This
- property's default is false.
- */
- Q_PROPERTY(bool readOnly READ isReadOnly WRITE setReadOnly)
- /*! Set the font of the widget. Please use fixed width fonts like Mono or Courier.*/
- Q_PROPERTY(QFont font READ font WRITE setFont)
- /*! Creates an instance of QHexEdit.
- \param parent Parent widget of QHexEdit.
- */
- QHexEdit(QWidget *parent = 0);
- /*! Returns the index position of the first occurrence
- of the byte array ba in this byte array, searching forward from index position
- from. Returns -1 if ba could not be found. In addition to this functionality
- of QByteArray the cursorposition is set to the end of found bytearray and
- it will be selected.
- */
- int indexOf(const QByteArray & ba, int from = 0) const;
- /*! Inserts a byte array.
- \param i Index position, where to insert
- \param ba byte array, which is to insert
- In overwrite mode, the existing data will be overwritten, in insertmode ba will be
- inserted and size of data grows.
- */
- void insert(int i, const QByteArray & ba);
- /*! Inserts a char.
- \param i Index position, where to insert
- \param ch Char, which is to insert
- In overwrite mode, the existing data will be overwritten, in insertmode ba will be
- inserted and size of data grows.
- */
- void insert(int i, char ch);
- /*! Returns the index position of the last occurrence
- of the byte array ba in this byte array, searching backwards from index position
- from. Returns -1 if ba could not be found. In addition to this functionality
- of QByteArray the cursorposition is set to the beginning of found bytearray and
- it will be selected.
- */
- int lastIndexOf(const QByteArray & ba, int from = 0) const;
- /*! Removes len bytes from the content.
- \param pos Index position, where to remove
- \param len Amount of bytes to remove
- In overwrite mode, the existing bytes will be overwriten with 0x00.
- */
- void remove(int pos, int len=1);
- /*! Replaces len bytes from index position pos with the byte array after.
- */
- void replace( int pos, int len, const QByteArray & after);
- /*! Gives back a formatted image of the content of QHexEdit
- */
- QString toReadableString();
- /*! Gives back a formatted image of the selected content of QHexEdit
- */
- QString selectionToReadableString();
- /*! \cond docNever */
- void setAddressOffset(int offset);
- int addressOffset();
- void setCursorPosition(int cusorPos);
- int cursorPosition();
- void setData(QByteArray const &data);
- QByteArray data();
- void setAddressAreaColor(QColor const &color);
- QColor addressAreaColor();
- void setHighlightingColor(QColor const &color);
- QColor highlightingColor();
- void setSelectionColor(QColor const &color);
- QColor selectionColor();
- void setOverwriteMode(bool);
- bool overwriteMode();
- void setReadOnly(bool);
- bool isReadOnly();
- const QFont &font() const;
- void setFont(const QFont &);
- /*! \endcond docNever */
-public slots:
- /*! Redoes the last operation. If there is no operation to redo, i.e.
- there is no redo step in the undo/redo history, nothing happens.
- */
- void redo();
- /*! Set the minimum width of the address area.
- \param addressWidth Width in characters.
- */
- void setAddressWidth(int addressWidth);
- /*! Switch the address area on or off.
- \param addressArea true (show it), false (hide it).
- */
- void setAddressArea(bool addressArea);
- /*! Switch the ascii area on or off.
- \param asciiArea true (show it), false (hide it).
- */
- void setAsciiArea(bool asciiArea);
- /*! Switch the highlighting feature on or of.
- \param mode true (show it), false (hide it).
- */
- void setHighlighting(bool mode);
- /*! Undoes the last operation. If there is no operation to undo, i.e.
- there is no undo step in the undo/redo history, nothing happens.
- */
- void undo();
- /*! Contains the address, where the cursor is located. */
- void currentAddressChanged(int address);
- /*! Contains the size of the data to edit. */
- void currentSizeChanged(int size);
- /*! The signal is emited every time, the data is changed. */
- void dataChanged();
- /*! The signal is emited every time, the overwrite mode is changed. */
- void overwriteModeChanged(bool state);
- /*! \cond docNever */
- QHexEditPrivate *qHexEdit_p;
- QHBoxLayout *layout;
- QScrollArea *scrollArea;
- /*! \endcond docNever */
diff --git a/externals/qhexedit/qhexedit_p.cpp b/externals/qhexedit/qhexedit_p.cpp
deleted file mode 100644
index 2a6885de8..000000000
--- a/externals/qhexedit/qhexedit_p.cpp
+++ /dev/null
@@ -1,857 +0,0 @@
-#include "qhexedit_p.h"
-#include "commands.h"
-const int HEXCHARS_IN_LINE = 47;
-const int GAP_ADR_HEX = 10;
-const int GAP_HEX_ASCII = 16;
-const int BYTES_PER_LINE = 16;
-QHexEditPrivate::QHexEditPrivate(QScrollArea *parent) : QWidget(parent)
- _undoStack = new QUndoStack(this);
- _scrollArea = parent;
- setAddressWidth(4);
- setAddressOffset(0);
- setAddressArea(true);
- setAsciiArea(true);
- setHighlighting(true);
- setOverwriteMode(true);
- setReadOnly(false);
- setAddressAreaColor(QColor(0xd4, 0xd4, 0xd4, 0xff));
- setHighlightingColor(QColor(0xff, 0xff, 0x99, 0xff));
- setSelectionColor(QColor(0x6d, 0x9e, 0xff, 0xff));
- setFont(QFont("Courier", 10));
- _size = 0;
- resetSelection(0);
- setFocusPolicy(Qt::StrongFocus);
- connect(&_cursorTimer, SIGNAL(timeout()), this, SLOT(updateCursor()));
- _cursorTimer.setInterval(500);
- _cursorTimer.start();
-void QHexEditPrivate::setAddressOffset(int offset)
- _xData.setAddressOffset(offset);
- adjust();
-int QHexEditPrivate::addressOffset()
- return _xData.addressOffset();
-void QHexEditPrivate::setData(const QByteArray &data)
- _xData.setData(data);
- _undoStack->clear();
- adjust();
- setCursorPos(0);
-QByteArray QHexEditPrivate::data()
- return;
-void QHexEditPrivate::setAddressAreaColor(const QColor &color)
- _addressAreaColor = color;
- update();
-QColor QHexEditPrivate::addressAreaColor()
- return _addressAreaColor;
-void QHexEditPrivate::setHighlightingColor(const QColor &color)
- _highlightingColor = color;
- update();
-QColor QHexEditPrivate::highlightingColor()
- return _highlightingColor;
-void QHexEditPrivate::setSelectionColor(const QColor &color)
- _selectionColor = color;
- update();
-QColor QHexEditPrivate::selectionColor()
- return _selectionColor;
-void QHexEditPrivate::setReadOnly(bool readOnly)
- _readOnly = readOnly;
-bool QHexEditPrivate::isReadOnly()
- return _readOnly;
-XByteArray & QHexEditPrivate::xData()
- return _xData;
-int QHexEditPrivate::indexOf(const QByteArray & ba, int from)
- if (from > ( - 1))
- from = - 1;
- int idx =, from);
- if (idx > -1)
- {
- int curPos = idx*2;
- setCursorPos(curPos + ba.length()*2);
- resetSelection(curPos);
- setSelection(curPos + ba.length()*2);
- ensureVisible();
- }
- return idx;
-void QHexEditPrivate::insert(int index, const QByteArray & ba)
- if (ba.length() > 0)
- {
- if (_overwriteMode)
- {
- QUndoCommand *arrayCommand= new ArrayCommand(&_xData, ArrayCommand::replace, index, ba, ba.length());
- _undoStack->push(arrayCommand);
- emit dataChanged();
- }
- else
- {
- QUndoCommand *arrayCommand= new ArrayCommand(&_xData, ArrayCommand::insert, index, ba, ba.length());
- _undoStack->push(arrayCommand);
- emit dataChanged();
- }
- }
-void QHexEditPrivate::insert(int index, char ch)
- QUndoCommand *charCommand = new CharCommand(&_xData, CharCommand::insert, index, ch);
- _undoStack->push(charCommand);
- emit dataChanged();
-int QHexEditPrivate::lastIndexOf(const QByteArray & ba, int from)
- from -= ba.length();
- if (from < 0)
- from = 0;
- int idx =, from);
- if (idx > -1)
- {
- int curPos = idx*2;
- setCursorPos(curPos);
- resetSelection(curPos);
- setSelection(curPos + ba.length()*2);
- ensureVisible();
- }
- return idx;
-void QHexEditPrivate::remove(int index, int len)
- if (len > 0)
- {
- if (len == 1)
- {
- if (_overwriteMode)
- {
- QUndoCommand *charCommand = new CharCommand(&_xData, CharCommand::replace, index, char(0));
- _undoStack->push(charCommand);
- emit dataChanged();
- }
- else
- {
- QUndoCommand *charCommand = new CharCommand(&_xData, CharCommand::remove, index, char(0));
- _undoStack->push(charCommand);
- emit dataChanged();
- }
- }
- else
- {
- QByteArray ba = QByteArray(len, char(0));
- if (_overwriteMode)
- {
- QUndoCommand *arrayCommand = new ArrayCommand(&_xData, ArrayCommand::replace, index, ba, ba.length());
- _undoStack->push(arrayCommand);
- emit dataChanged();
- }
- else
- {
- QUndoCommand *arrayCommand= new ArrayCommand(&_xData, ArrayCommand::remove, index, ba, len);
- _undoStack->push(arrayCommand);
- emit dataChanged();
- }
- }
- }
-void QHexEditPrivate::replace(int index, char ch)
- QUndoCommand *charCommand = new CharCommand(&_xData, CharCommand::replace, index, ch);
- _undoStack->push(charCommand);
- resetSelection();
- emit dataChanged();
-void QHexEditPrivate::replace(int index, const QByteArray & ba)
- QUndoCommand *arrayCommand= new ArrayCommand(&_xData, ArrayCommand::replace, index, ba, ba.length());
- _undoStack->push(arrayCommand);
- resetSelection();
- emit dataChanged();
-void QHexEditPrivate::replace(int pos, int len, const QByteArray &after)
- QUndoCommand *arrayCommand= new ArrayCommand(&_xData, ArrayCommand::replace, pos, after, len);
- _undoStack->push(arrayCommand);
- resetSelection();
- emit dataChanged();
-void QHexEditPrivate::setAddressArea(bool addressArea)
- _addressArea = addressArea;
- adjust();
- setCursorPos(_cursorPosition);
-void QHexEditPrivate::setAddressWidth(int addressWidth)
- _xData.setAddressWidth(addressWidth);
- setCursorPos(_cursorPosition);
-void QHexEditPrivate::setAsciiArea(bool asciiArea)
- _asciiArea = asciiArea;
- adjust();
-void QHexEditPrivate::setFont(const QFont &font)
- QWidget::setFont(font);
- adjust();
-void QHexEditPrivate::setHighlighting(bool mode)
- _highlighting = mode;
- update();
-void QHexEditPrivate::setOverwriteMode(bool overwriteMode)
- _overwriteMode = overwriteMode;
-bool QHexEditPrivate::overwriteMode()
- return _overwriteMode;
-void QHexEditPrivate::redo()
- _undoStack->redo();
- emit dataChanged();
- setCursorPos(_cursorPosition);
- update();
-void QHexEditPrivate::undo()
- _undoStack->undo();
- emit dataChanged();
- setCursorPos(_cursorPosition);
- update();
-QString QHexEditPrivate::toRedableString()
- return _xData.toRedableString();
-QString QHexEditPrivate::selectionToReadableString()
- return _xData.toRedableString(getSelectionBegin(), getSelectionEnd());
-void QHexEditPrivate::keyPressEvent(QKeyEvent *event)
- int charX = (_cursorX - _xPosHex) / _charWidth;
- int posX = (charX / 3) * 2 + (charX % 3);
- int posBa = (_cursorY / _charHeight) * BYTES_PER_LINE + posX / 2;
-/* Cursor movements */
- if (event->matches(QKeySequence::MoveToNextChar))
- {
- setCursorPos(_cursorPosition + 1);
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToPreviousChar))
- {
- setCursorPos(_cursorPosition - 1);
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToEndOfLine))
- {
- setCursorPos(_cursorPosition | (2 * BYTES_PER_LINE -1));
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToStartOfLine))
- {
- setCursorPos(_cursorPosition - (_cursorPosition % (2 * BYTES_PER_LINE)));
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToPreviousLine))
- {
- setCursorPos(_cursorPosition - (2 * BYTES_PER_LINE));
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToNextLine))
- {
- setCursorPos(_cursorPosition + (2 * BYTES_PER_LINE));
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToNextPage))
- {
- setCursorPos(_cursorPosition + (((_scrollArea->viewport()->height() / _charHeight) - 1) * 2 * BYTES_PER_LINE));
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToPreviousPage))
- {
- setCursorPos(_cursorPosition - (((_scrollArea->viewport()->height() / _charHeight) - 1) * 2 * BYTES_PER_LINE));
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToEndOfDocument))
- {
- setCursorPos(_xData.size() * 2);
- resetSelection(_cursorPosition);
- }
- if (event->matches(QKeySequence::MoveToStartOfDocument))
- {
- setCursorPos(0);
- resetSelection(_cursorPosition);
- }
-/* Select commands */
- if (event->matches(QKeySequence::SelectAll))
- {
- resetSelection(0);
- setSelection(2*_xData.size() + 1);
- }
- if (event->matches(QKeySequence::SelectNextChar))
- {
- int pos = _cursorPosition + 1;
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectPreviousChar))
- {
- int pos = _cursorPosition - 1;
- setSelection(pos);
- setCursorPos(pos);
- }
- if (event->matches(QKeySequence::SelectEndOfLine))
- {
- int pos = _cursorPosition - (_cursorPosition % (2 * BYTES_PER_LINE)) + (2 * BYTES_PER_LINE);
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectStartOfLine))
- {
- int pos = _cursorPosition - (_cursorPosition % (2 * BYTES_PER_LINE));
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectPreviousLine))
- {
- int pos = _cursorPosition - (2 * BYTES_PER_LINE);
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectNextLine))
- {
- int pos = _cursorPosition + (2 * BYTES_PER_LINE);
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectNextPage))
- {
- int pos = _cursorPosition + (((_scrollArea->viewport()->height() / _charHeight) - 1) * 2 * BYTES_PER_LINE);
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectPreviousPage))
- {
- int pos = _cursorPosition - (((_scrollArea->viewport()->height() / _charHeight) - 1) * 2 * BYTES_PER_LINE);
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectEndOfDocument))
- {
- int pos = _xData.size() * 2;
- setCursorPos(pos);
- setSelection(pos);
- }
- if (event->matches(QKeySequence::SelectStartOfDocument))
- {
- int pos = 0;
- setCursorPos(pos);
- setSelection(pos);
- }
-/* Edit Commands */
-if (!_readOnly)
- /* Hex input */
- int key = int(event->text()[0].toLatin1());
- if ((key>='0' && key<='9') || (key>='a' && key <= 'f'))
- {
- if (getSelectionBegin() != getSelectionEnd())
- {
- posBa = getSelectionBegin();
- remove(posBa, getSelectionEnd() - posBa);
- setCursorPos(2*posBa);
- resetSelection(2*posBa);
- }
- // If insert mode, then insert a byte
- if (_overwriteMode == false)
- if ((charX % 3) == 0)
- {
- insert(posBa, char(0));
- }
- // Change content
- if (_xData.size() > 0)
- {
- QByteArray hexValue =, 1).toHex();
- if ((charX % 3) == 0)
- hexValue[0] = key;
- else
- hexValue[1] = key;
- replace(posBa, QByteArray().fromHex(hexValue)[0]);
- setCursorPos(_cursorPosition + 1);
- resetSelection(_cursorPosition);
- }
- }
- /* Cut & Paste */
- if (event->matches(QKeySequence::Cut))
- {
- QString result = QString();
- for (int idx = getSelectionBegin(); idx < getSelectionEnd(); idx++)
- {
- result +=, 1).toHex() + " ";
- if ((idx % 16) == 15)
- result.append("\n");
- }
- remove(getSelectionBegin(), getSelectionEnd() - getSelectionBegin());
- QClipboard *clipboard = QApplication::clipboard();
- clipboard->setText(result);
- setCursorPos(getSelectionBegin());
- resetSelection(getSelectionBegin());
- }
- if (event->matches(QKeySequence::Paste))
- {
- QClipboard *clipboard = QApplication::clipboard();
- QByteArray ba = QByteArray().fromHex(clipboard->text().toLatin1());
- insert(_cursorPosition / 2, ba);
- setCursorPos(_cursorPosition + 2 * ba.length());
- resetSelection(getSelectionBegin());
- }
- /* Delete char */
- if (event->matches(QKeySequence::Delete))
- {
- if (getSelectionBegin() != getSelectionEnd())
- {
- posBa = getSelectionBegin();
- remove(posBa, getSelectionEnd() - posBa);
- setCursorPos(2*posBa);
- resetSelection(2*posBa);
- }
- else
- {
- if (_overwriteMode)
- replace(posBa, char(0));
- else
- remove(posBa, 1);
- }
- }
- /* Backspace */
- if ((event->key() == Qt::Key_Backspace) && (event->modifiers() == Qt::NoModifier))
- {
- if (getSelectionBegin() != getSelectionEnd())
- {
- posBa = getSelectionBegin();
- remove(posBa, getSelectionEnd() - posBa);
- setCursorPos(2*posBa);
- resetSelection(2*posBa);
- }
- else
- {
- if (posBa > 0)
- {
- if (_overwriteMode)
- replace(posBa - 1, char(0));
- else
- remove(posBa - 1, 1);
- setCursorPos(_cursorPosition - 2);
- }
- }
- }
- /* undo */
- if (event->matches(QKeySequence::Undo))
- {
- undo();
- }
- /* redo */
- if (event->matches(QKeySequence::Redo))
- {
- redo();
- }
- }
- if (event->matches(QKeySequence::Copy))
- {
- QString result = QString();
- for (int idx = getSelectionBegin(); idx < getSelectionEnd(); idx++)
- {
- result +=, 1).toHex() + " ";
- if ((idx % 16) == 15)
- result.append('\n');
- }
- QClipboard *clipboard = QApplication::clipboard();
- clipboard->setText(result);
- }
- // Switch between insert/overwrite mode
- if ((event->key() == Qt::Key_Insert) && (event->modifiers() == Qt::NoModifier))
- {
- _overwriteMode = !_overwriteMode;
- setCursorPos(_cursorPosition);
- overwriteModeChanged(_overwriteMode);
- }
- ensureVisible();
- update();
-void QHexEditPrivate::mouseMoveEvent(QMouseEvent * event)
- _blink = false;
- update();
- int actPos = cursorPos(event->pos());
- setCursorPos(actPos);
- setSelection(actPos);
-void QHexEditPrivate::mousePressEvent(QMouseEvent * event)
- _blink = false;
- update();
- int cPos = cursorPos(event->pos());
- resetSelection(cPos);
- setCursorPos(cPos);
-void QHexEditPrivate::paintEvent(QPaintEvent *event)
- QPainter painter(this);
- // draw some patterns if needed
- painter.fillRect(event->rect(), this->palette().color(QPalette::Base));
- if (_addressArea)
- painter.fillRect(QRect(_xPosAdr, event->rect().top(), _xPosHex - GAP_ADR_HEX + 2, height()), _addressAreaColor);
- if (_asciiArea)
- {
- int linePos = _xPosAscii - (GAP_HEX_ASCII / 2);
- painter.setPen(Qt::gray);
- painter.drawLine(linePos, event->rect().top(), linePos, height());
- }
- painter.setPen(this->palette().color(QPalette::WindowText));
- // calc position
- int firstLineIdx = ((event->rect().top()/ _charHeight) - _charHeight) * BYTES_PER_LINE;
- if (firstLineIdx < 0)
- firstLineIdx = 0;
- int lastLineIdx = ((event->rect().bottom() / _charHeight) + _charHeight) * BYTES_PER_LINE;
- if (lastLineIdx > _xData.size())
- lastLineIdx = _xData.size();
- int yPosStart = ((firstLineIdx) / BYTES_PER_LINE) * _charHeight + _charHeight;
- // paint address area
- if (_addressArea)
- {
- for (int lineIdx = firstLineIdx, yPos = yPosStart; lineIdx < lastLineIdx; lineIdx += BYTES_PER_LINE, yPos +=_charHeight)
- {
- QString address = QString("%1")
- .arg(lineIdx + _xData.addressOffset(), _xData.realAddressNumbers(), 16, QChar('0'));
- painter.drawText(_xPosAdr, yPos, address);
- }
- }
- // paint hex area
- QByteArray hexBa(, lastLineIdx - firstLineIdx + 1).toHex());
- QBrush highLighted = QBrush(_highlightingColor);
- QPen colHighlighted = QPen(this->palette().color(QPalette::WindowText));
- QBrush selected = QBrush(_selectionColor);
- QPen colSelected = QPen(Qt::white);
- QPen colStandard = QPen(this->palette().color(QPalette::WindowText));
- painter.setBackgroundMode(Qt::TransparentMode);
- for (int lineIdx = firstLineIdx, yPos = yPosStart; lineIdx < lastLineIdx; lineIdx += BYTES_PER_LINE, yPos +=_charHeight)
- {
- QByteArray hex;
- int xPos = _xPosHex;
- for (int colIdx = 0; ((lineIdx + colIdx) < _xData.size() && (colIdx < BYTES_PER_LINE)); colIdx++)
- {
- int posBa = lineIdx + colIdx;
- if ((getSelectionBegin() <= posBa) && (getSelectionEnd() > posBa))
- {
- painter.setBackground(selected);
- painter.setBackgroundMode(Qt::OpaqueMode);
- painter.setPen(colSelected);
- }
- else
- {
- if (_highlighting)
- {
- // hilight diff bytes
- painter.setBackground(highLighted);
- if (_xData.dataChanged(posBa))
- {
- painter.setPen(colHighlighted);
- painter.setBackgroundMode(Qt::OpaqueMode);
- }
- else
- {
- painter.setPen(colStandard);
- painter.setBackgroundMode(Qt::TransparentMode);
- }
- }
- }
- // render hex value
- if (colIdx == 0)
- {
- hex = hexBa.mid((lineIdx - firstLineIdx) * 2, 2);
- painter.drawText(xPos, yPos, hex);
- xPos += 2 * _charWidth;
- } else {
- hex = hexBa.mid((lineIdx + colIdx - firstLineIdx) * 2, 2).prepend(" ");
- painter.drawText(xPos, yPos, hex);
- xPos += 3 * _charWidth;
- }
- }
- }
- painter.setBackgroundMode(Qt::TransparentMode);
- painter.setPen(this->palette().color(QPalette::WindowText));
- // paint ascii area
- if (_asciiArea)
- {
- for (int lineIdx = firstLineIdx, yPos = yPosStart; lineIdx < lastLineIdx; lineIdx += BYTES_PER_LINE, yPos +=_charHeight)
- {
- int xPosAscii = _xPosAscii;
- for (int colIdx = 0; ((lineIdx + colIdx) < _xData.size() && (colIdx < BYTES_PER_LINE)); colIdx++)
- {
- painter.drawText(xPosAscii, yPos, _xData.asciiChar(lineIdx + colIdx));
- xPosAscii += _charWidth;
- }
- }
- }
- // paint cursor
- if (_blink && !_readOnly && hasFocus())
- {
- if (_overwriteMode)
- painter.fillRect(_cursorX, _cursorY + _charHeight - 2, _charWidth, 2, this->palette().color(QPalette::WindowText));
- else
- painter.fillRect(_cursorX, _cursorY, 2, _charHeight, this->palette().color(QPalette::WindowText));
- }
- if (_size != _xData.size())
- {
- _size = _xData.size();
- emit currentSizeChanged(_size);
- }
-void QHexEditPrivate::setCursorPos(int position)
- // delete cursor
- _blink = false;
- update();
- // cursor in range?
- if (_overwriteMode)
- {
- if (position > (_xData.size() * 2 - 1))
- position = _xData.size() * 2 - 1;
- } else {
- if (position > (_xData.size() * 2))
- position = _xData.size() * 2;
- }
- if (position < 0)
- position = 0;
- // calc position
- _cursorPosition = position;
- _cursorY = (position / (2 * BYTES_PER_LINE)) * _charHeight + 4;
- int x = (position % (2 * BYTES_PER_LINE));
- _cursorX = (((x / 2) * 3) + (x % 2)) * _charWidth + _xPosHex;
- // immiadately draw cursor
- _blink = true;
- update();
- emit currentAddressChanged(_cursorPosition/2);
-int QHexEditPrivate::cursorPos(QPoint pos)
- int result = -1;
- // find char under cursor
- if ((pos.x() >= _xPosHex) && (pos.x() < (_xPosHex + HEXCHARS_IN_LINE * _charWidth)))
- {
- int x = (pos.x() - _xPosHex) / _charWidth;
- if ((x % 3) == 0)
- x = (x / 3) * 2;
- else
- x = ((x / 3) * 2) + 1;
- int y = ((pos.y() - 3) / _charHeight) * 2 * BYTES_PER_LINE;
- result = x + y;
- }
- return result;
-int QHexEditPrivate::cursorPos()
- return _cursorPosition;
-void QHexEditPrivate::resetSelection()
- _selectionBegin = _selectionInit;
- _selectionEnd = _selectionInit;
-void QHexEditPrivate::resetSelection(int pos)
- if (pos < 0)
- pos = 0;
- pos = pos / 2;
- _selectionInit = pos;
- _selectionBegin = pos;
- _selectionEnd = pos;
-void QHexEditPrivate::setSelection(int pos)
- if (pos < 0)
- pos = 0;
- pos = pos / 2;
- if (pos >= _selectionInit)
- {
- _selectionEnd = pos;
- _selectionBegin = _selectionInit;
- }
- else
- {
- _selectionBegin = pos;
- _selectionEnd = _selectionInit;
- }
-int QHexEditPrivate::getSelectionBegin()
- return _selectionBegin;
-int QHexEditPrivate::getSelectionEnd()
- return _selectionEnd;
-void QHexEditPrivate::updateCursor()
- if (_blink)
- _blink = false;
- else
- _blink = true;
- update(_cursorX, _cursorY, _charWidth, _charHeight);
-void QHexEditPrivate::adjust()
- _charWidth = fontMetrics().width(QLatin1Char('9'));
- _charHeight = fontMetrics().height();
- _xPosAdr = 0;
- if (_addressArea)
- _xPosHex = _xData.realAddressNumbers()*_charWidth + GAP_ADR_HEX;
- else
- _xPosHex = 0;
- _xPosAscii = _xPosHex + HEXCHARS_IN_LINE * _charWidth + GAP_HEX_ASCII;
- // tell QAbstractScollbar, how big we are
- setMinimumHeight(((_xData.size()/16 + 1) * _charHeight) + 5);
- if(_asciiArea)
- setMinimumWidth(_xPosAscii + (BYTES_PER_LINE * _charWidth));
- else
- setMinimumWidth(_xPosHex + HEXCHARS_IN_LINE * _charWidth);
- update();
-void QHexEditPrivate::ensureVisible()
- // scrolls to cursorx, cusory (which are set by setCursorPos)
- // x-margin is 3 pixels, y-margin is half of charHeight
- _scrollArea->ensureVisible(_cursorX, _cursorY + _charHeight/2, 3, _charHeight/2 + 2);
diff --git a/externals/qhexedit/qhexedit_p.h b/externals/qhexedit/qhexedit_p.h
deleted file mode 100644
index 1c2c11cc2..000000000
--- a/externals/qhexedit/qhexedit_p.h
+++ /dev/null
@@ -1,128 +0,0 @@
-#ifndef QHEXEDIT_P_H
-#define QHEXEDIT_P_H
-/** \cond docNever */
-#include <QtGui>
-#include <QtWidgets>
-#include "xbytearray.h"
-class QHexEditPrivate : public QWidget
- QHexEditPrivate(QScrollArea *parent);
- void setAddressAreaColor(QColor const &color);
- QColor addressAreaColor();
- void setAddressOffset(int offset);
- int addressOffset();
- void setCursorPos(int position);
- int cursorPos();
- void setData(QByteArray const &data);
- QByteArray data();
- void setHighlightingColor(QColor const &color);
- QColor highlightingColor();
- void setOverwriteMode(bool overwriteMode);
- bool overwriteMode();
- void setReadOnly(bool readOnly);
- bool isReadOnly();
- void setSelectionColor(QColor const &color);
- QColor selectionColor();
- XByteArray & xData();
- int indexOf(const QByteArray & ba, int from = 0);
- void insert(int index, const QByteArray & ba);
- void insert(int index, char ch);
- int lastIndexOf(const QByteArray & ba, int from = 0);
- void remove(int index, int len=1);
- void replace(int index, char ch);
- void replace(int index, const QByteArray & ba);
- void replace(int pos, int len, const QByteArray & after);
- void setAddressArea(bool addressArea);
- void setAddressWidth(int addressWidth);
- void setAsciiArea(bool asciiArea);
- void setHighlighting(bool mode);
- virtual void setFont(const QFont &font);
- void undo();
- void redo();
- QString toRedableString();
- QString selectionToReadableString();
- void currentAddressChanged(int address);
- void currentSizeChanged(int size);
- void dataChanged();
- void overwriteModeChanged(bool state);
- void keyPressEvent(QKeyEvent * event);
- void mouseMoveEvent(QMouseEvent * event);
- void mousePressEvent(QMouseEvent * event);
- void paintEvent(QPaintEvent *event);
- int cursorPos(QPoint pos); // calc cursorpos from graphics position. DOES NOT STORE POSITION
- void resetSelection(int pos); // set selectionStart and selectionEnd to pos
- void resetSelection(); // set selectionEnd to selectionStart
- void setSelection(int pos); // set min (if below init) or max (if greater init)
- int getSelectionBegin();
- int getSelectionEnd();
-private slots:
- void updateCursor();
- void adjust();
- void ensureVisible();
- QColor _addressAreaColor;
- QColor _highlightingColor;
- QColor _selectionColor;
- QScrollArea *_scrollArea;
- QTimer _cursorTimer;
- QUndoStack *_undoStack;
- XByteArray _xData; // Hält den Inhalt des Hex Editors
- bool _blink; // true: then cursor blinks
- bool _renderingRequired; // Flag to store that rendering is necessary
- bool _addressArea; // left area of QHexEdit
- bool _asciiArea; // medium area
- bool _highlighting; // highlighting of changed bytes
- bool _overwriteMode;
- bool _readOnly; // true: the user can only look and navigate
- int _charWidth, _charHeight; // char dimensions (dpendend on font)
- int _cursorX, _cursorY; // graphics position of the cursor
- int _cursorPosition; // character positioin in stream (on byte ends in to steps)
- int _xPosAdr, _xPosHex, _xPosAscii; // graphics x-position of the areas
- int _selectionBegin; // First selected char
- int _selectionEnd; // Last selected char
- int _selectionInit; // That's, where we pressed the mouse button
- int _size;
-/** \endcond docNever */
diff --git a/externals/qhexedit/xbytearray.cpp b/externals/qhexedit/xbytearray.cpp
deleted file mode 100644
index 09a04cfeb..000000000
--- a/externals/qhexedit/xbytearray.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-#include "xbytearray.h"
- _oldSize = -99;
- _addressNumbers = 4;
- _addressOffset = 0;
-int XByteArray::addressOffset()
- return _addressOffset;
-void XByteArray::setAddressOffset(int offset)
- _addressOffset = offset;
-int XByteArray::addressWidth()
- return _addressNumbers;
-void XByteArray::setAddressWidth(int width)
- if ((width >= 0) && (width<=6))
- {
- _addressNumbers = width;
- }
-QByteArray & XByteArray::data()
- return _data;
-void XByteArray::setData(QByteArray data)
- _data = data;
- _changedData = QByteArray(data.length(), char(0));
-bool XByteArray::dataChanged(int i)
- return bool(_changedData[i]);
-QByteArray XByteArray::dataChanged(int i, int len)
- return _changedData.mid(i, len);
-void XByteArray::setDataChanged(int i, bool state)
- _changedData[i] = char(state);
-void XByteArray::setDataChanged(int i, const QByteArray & state)
- int length = state.length();
- int len;
- if ((i + length) > _changedData.length())
- len = _changedData.length() - i;
- else
- len = length;
- _changedData.replace(i, len, state);
-int XByteArray::realAddressNumbers()
- if (_oldSize != _data.size())
- {
- // is addressNumbers wide enought?
- QString test = QString("%1")
- .arg(_data.size() + _addressOffset, _addressNumbers, 16, QChar('0'));
- _realAddressNumbers = test.size();
- }
- return _realAddressNumbers;
-int XByteArray::size()
- return _data.size();
-QByteArray & XByteArray::insert(int i, char ch)
- _data.insert(i, ch);
- _changedData.insert(i, char(1));
- return _data;
-QByteArray & XByteArray::insert(int i, const QByteArray & ba)
- _data.insert(i, ba);
- _changedData.insert(i, QByteArray(ba.length(), char(1)));
- return _data;
-QByteArray & XByteArray::remove(int i, int len)
- _data.remove(i, len);
- _changedData.remove(i, len);
- return _data;
-QByteArray & XByteArray::replace(int index, char ch)
- _data[index] = ch;
- _changedData[index] = char(1);
- return _data;
-QByteArray & XByteArray::replace(int index, const QByteArray & ba)
- int len = ba.length();
- return replace(index, len, ba);
-QByteArray & XByteArray::replace(int index, int length, const QByteArray & ba)
- int len;
- if ((index + length) > _data.length())
- len = _data.length() - index;
- else
- len = length;
- _data.replace(index, len, ba.mid(0, len));
- _changedData.replace(index, len, QByteArray(len, char(1)));
- return _data;
-QChar XByteArray::asciiChar(int index)
- char ch = _data[index];
- if ((ch < 0x20) || (ch > 0x7e))
- ch = '.';
- return QChar(ch);
-QString XByteArray::toRedableString(int start, int end)
- int adrWidth = realAddressNumbers();
- if (_addressNumbers > adrWidth)
- adrWidth = _addressNumbers;
- if (end < 0)
- end = _data.size();
- QString result;
- for (int i=start; i < end; i += 16)
- {
- QString adrStr = QString("%1").arg(_addressOffset + i, adrWidth, 16, QChar('0'));
- QString hexStr;
- QString ascStr;
- for (int j=0; j<16; j++)
- {
- if ((i + j) < _data.size())
- {
- hexStr.append(" ").append(_data.mid(i+j, 1).toHex());
- ascStr.append(asciiChar(i+j));
- }
- }
- result += adrStr + " " + QString("%1").arg(hexStr, -48) + " " + QString("%1").arg(ascStr, -17) + "\n";
- }
- return result;
diff --git a/externals/qhexedit/xbytearray.h b/externals/qhexedit/xbytearray.h
deleted file mode 100644
index 2b67c61b8..000000000
--- a/externals/qhexedit/xbytearray.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/** \cond docNever */
-#include <QtCore>
-/*! XByteArray represents the content of QHexEcit.
-XByteArray comprehend the data itself and informations to store if it was
-changed. The QHexEdit component uses these informations to perform nice
-rendering of the data
-XByteArray also provides some functionality to insert, replace and remove
-single chars and QByteArras. Additionally some functions support rendering
-and converting to readable strings.
-class XByteArray
- explicit XByteArray();
- int addressOffset();
- void setAddressOffset(int offset);
- int addressWidth();
- void setAddressWidth(int width);
- QByteArray & data();
- void setData(QByteArray data);
- bool dataChanged(int i);
- QByteArray dataChanged(int i, int len);
- void setDataChanged(int i, bool state);
- void setDataChanged(int i, const QByteArray & state);
- int realAddressNumbers();
- int size();
- QByteArray & insert(int i, char ch);
- QByteArray & insert(int i, const QByteArray & ba);
- QByteArray & remove(int pos, int len);
- QByteArray & replace(int index, char ch);
- QByteArray & replace(int index, const QByteArray & ba);
- QByteArray & replace(int index, int length, const QByteArray & ba);
- QChar asciiChar(int index);
- QString toRedableString(int start=0, int end=-1);
-public slots:
- QByteArray _data;
- QByteArray _changedData;
- int _addressNumbers; // wanted width of address area
- int _addressOffset; // will be added to the real addres inside bytearray
- int _realAddressNumbers; // real width of address area (can be greater then wanted width)
- int _oldSize; // size of data
-/** \endcond docNever */
-#endif // XBYTEARRAY_H
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index ba6acf28e..84f9c03a7 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -56,20 +56,8 @@ void AddAddressSpace(Kernel::VMManager& address_space) {
void SelectSink(std::string sink_id) {
- auto iter =
- std::find_if(g_sink_details.begin(), g_sink_details.end(),
- [sink_id](const auto& sink_detail) { return == sink_id; });
- if (sink_id == "auto" || iter == g_sink_details.end()) {
- if (sink_id != "auto") {
- LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id %s", sink_id.c_str());
- }
- // Auto-select.
- // g_sink_details is ordered in terms of desirability, with the best choice at the front.
- iter = g_sink_details.begin();
- }
- DSP::HLE::SetSink(iter->factory());
+ const SinkDetails& sink_details = GetSinkDetails(sink_id);
+ DSP::HLE::SetSink(sink_details.factory());
void EnableStretching(bool enable) {
diff --git a/src/audio_core/null_sink.h b/src/audio_core/null_sink.h
index e7668438c..c732926a2 100644
--- a/src/audio_core/null_sink.h
+++ b/src/audio_core/null_sink.h
@@ -23,6 +23,12 @@ public:
size_t SamplesInQueue() const override {
return 0;
+ void SetDevice(int device_id) override {}
+ std::vector<std::string> GetDeviceList() const override {
+ return {};
+ }
} // namespace AudioCore
diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp
index 4b66cd826..933c5f16d 100644
--- a/src/audio_core/sdl2_sink.cpp
+++ b/src/audio_core/sdl2_sink.cpp
@@ -4,12 +4,12 @@
#include <list>
#include <numeric>
-#include <vector>
#include <SDL.h>
#include "audio_core/audio_core.h"
#include "audio_core/sdl2_sink.h"
#include "common/assert.h"
#include "common/logging/log.h"
+#include "core/settings.h"
namespace AudioCore {
@@ -42,10 +42,24 @@ SDL2Sink::SDL2Sink() : impl(std::make_unique<Impl>()) {
SDL_AudioSpec obtained_audiospec;
- impl->audio_device_id =
- SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0);
+ int device_count = SDL_GetNumAudioDevices(0);
+ device_list.clear();
+ for (int i = 0; i < device_count; ++i) {
+ device_list.push_back(SDL_GetAudioDeviceName(i, 0));
+ }
+ const char* device = nullptr;
+ if (device_count >= 1 && Settings::values.audio_device_id != "auto" &&
+ !Settings::values.audio_device_id.empty()) {
+ device = Settings::values.audio_device_id.c_str();
+ }
+ impl->audio_device_id = SDL_OpenAudioDevice(device, false, &desired_audiospec,
+ &obtained_audiospec, SDL_AUDIO_ALLOW_ANY_CHANGE);
if (impl->audio_device_id <= 0) {
- LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed with: %s", SDL_GetError());
+ LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed with code %d for device \"%s\"",
+ impl->audio_device_id, Settings::values.audio_device_id.c_str());
@@ -69,6 +83,10 @@ unsigned int SDL2Sink::GetNativeSampleRate() const {
return impl->sample_rate;
+std::vector<std::string> SDL2Sink::GetDeviceList() const {
+ return device_list;
void SDL2Sink::EnqueueSamples(const s16* samples, size_t sample_count) {
if (impl->audio_device_id <= 0)
@@ -96,6 +114,10 @@ size_t SDL2Sink::SamplesInQueue() const {
return total_size;
+void SDL2Sink::SetDevice(int device_id) {
+ this->device_id = device_id;
void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) {
Impl* impl = reinterpret_cast<Impl*>(impl_);
diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h
index ccd0f7c7e..bcc725369 100644
--- a/src/audio_core/sdl2_sink.h
+++ b/src/audio_core/sdl2_sink.h
@@ -21,9 +21,14 @@ public:
size_t SamplesInQueue() const override;
+ std::vector<std::string> GetDeviceList() const override;
+ void SetDevice(int device_id) override;
struct Impl;
std::unique_ptr<Impl> impl;
+ int device_id;
+ std::vector<std::string> device_list;
} // namespace AudioCore
diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h
index 08f3bab5b..558c8c0fe 100644
--- a/src/audio_core/sink.h
+++ b/src/audio_core/sink.h
@@ -31,6 +31,15 @@ public:
/// Samples enqueued that have not been played yet.
virtual std::size_t SamplesInQueue() const = 0;
+ /**
+ * Sets the desired output device.
+ * @paran device_id Id of the desired device.
+ */
+ virtual void SetDevice(int device_id) = 0;
+ /// Returns the list of available devices.
+ virtual std::vector<std::string> GetDeviceList() const = 0;
} // namespace
diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp
index 95ccc9e9d..6972395af 100644
--- a/src/audio_core/sink_details.cpp
+++ b/src/audio_core/sink_details.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <memory>
#include <vector>
#include "audio_core/null_sink.h"
@@ -9,6 +10,7 @@
#ifdef HAVE_SDL2
#include "audio_core/sdl2_sink.h"
+#include "common/logging/log.h"
namespace AudioCore {
@@ -20,4 +22,21 @@ const std::vector<SinkDetails> g_sink_details = {
{"null", []() { return std::make_unique<NullSink>(); }},
+const SinkDetails& GetSinkDetails(std::string sink_id) {
+ auto iter =
+ std::find_if(g_sink_details.begin(), g_sink_details.end(),
+ [sink_id](const auto& sink_detail) { return == sink_id; });
+ if (sink_id == "auto" || iter == g_sink_details.end()) {
+ if (sink_id != "auto") {
+ LOG_ERROR(Audio, "AudioCore::SelectSink given invalid sink_id %s", sink_id.c_str());
+ }
+ // Auto-select.
+ // g_sink_details is ordered in terms of desirability, with the best choice at the front.
+ iter = g_sink_details.begin();
+ }
+ return *iter;
} // namespace AudioCore
diff --git a/src/audio_core/sink_details.h b/src/audio_core/sink_details.h
index 4b30cf835..9d3735171 100644
--- a/src/audio_core/sink_details.h
+++ b/src/audio_core/sink_details.h
@@ -24,4 +24,6 @@ struct SinkDetails {
extern const std::vector<SinkDetails> g_sink_details;
+const SinkDetails& GetSinkDetails(std::string sink_id);
} // namespace AudioCore
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 99c096ac7..76f5caeb1 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -141,6 +141,26 @@ int main(int argc, char** argv) {
case Core::System::ResultStatus::ErrorLoader:
LOG_CRITICAL(Frontend, "Failed to load ROM!");
return -1;
+ case Core::System::ResultStatus::ErrorLoader_ErrorEncrypted:
+ LOG_CRITICAL(Frontend, "The game that you are trying to load must be decrypted before "
+ "being used with Citra. \n\n For more information on dumping and "
+ "decrypting games, please refer to: "
+ "");
+ return -1;
+ case Core::System::ResultStatus::ErrorLoader_ErrorInvalidFormat:
+ LOG_CRITICAL(Frontend, "Error while loading ROM: The ROM format is not supported.");
+ return -1;
+ case Core::System::ResultStatus::ErrorNotInitialized:
+ LOG_CRITICAL(Frontend, "CPUCore not initialized");
+ return -1;
+ case Core::System::ResultStatus::ErrorSystemMode:
+ LOG_CRITICAL(Frontend, "Failed to determine system mode!");
+ return -1;
+ case Core::System::ResultStatus::ErrorVideoCore:
+ LOG_CRITICAL(Frontend, "VideoCore not initialized");
+ return -1;
+ case Core::System::ResultStatus::Success:
+ break; // Expected case
while (emu_window->IsOpen()) {
diff --git a/src/citra/config.cpp b/src/citra/config.cpp
index bd8ac563b..827c90e55 100644
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@@ -63,7 +63,8 @@ void Config::ReadValues() {
// Renderer
Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true);
Settings::values.use_shader_jit = sdl2_config->GetBoolean("Renderer", "use_shader_jit", true);
- Settings::values.resolution_factor = sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
+ Settings::values.resolution_factor =
+ (float)sdl2_config->GetReal("Renderer", "resolution_factor", 1.0);
Settings::values.use_vsync = sdl2_config->GetBoolean("Renderer", "use_vsync", false);
Settings::values.toggle_framelimit =
sdl2_config->GetBoolean("Renderer", "toggle_framelimit", true);
@@ -81,6 +82,7 @@ void Config::ReadValues() {
Settings::values.sink_id = sdl2_config->Get("Audio", "output_engine", "auto");
Settings::values.enable_audio_stretching =
sdl2_config->GetBoolean("Audio", "enable_audio_stretching", true);
+ Settings::values.audio_device_id = sdl2_config->Get("Audio", "output_device", "auto");
// Data Storage
Settings::values.use_virtual_sd =
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index 7996813b4..d728fb9e8 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -91,6 +91,10 @@ output_engine =
# 0: No, 1 (default): Yes
enable_audio_stretching =
+# Which audio device to use.
+# auto (default): Auto-select
+output_device =
[Data Storage]
# Whether to create a virtual SD card.
# 1 (default): Yes, 0: No
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 93f1c339d..d4460bf01 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -14,7 +14,6 @@ set(SRCS
- debugger/ramview.cpp
@@ -48,7 +47,6 @@ set(HEADERS
- debugger/ramview.h
@@ -100,7 +98,7 @@ if (APPLE)
add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS})
-target_link_libraries(citra-qt core video_core audio_core common qhexedit)
+target_link_libraries(citra-qt core video_core audio_core common)
target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
target_link_libraries(citra-qt ${PLATFORM_LIBRARIES} Threads::Threads)
diff --git a/src/citra_qt/config.cpp b/src/citra_qt/config.cpp
index 8021667d0..f776e16b2 100644
--- a/src/citra_qt/config.cpp
+++ b/src/citra_qt/config.cpp
@@ -63,6 +63,8 @@ void Config::ReadValues() {
Settings::values.sink_id = qt_config->value("output_engine", "auto").toString().toStdString();
Settings::values.enable_audio_stretching =
qt_config->value("enable_audio_stretching", true).toBool();
+ Settings::values.audio_device_id =
+ qt_config->value("output_device", "auto").toString().toStdString();
qt_config->beginGroup("Data Storage");
@@ -169,6 +171,7 @@ void Config::SaveValues() {
qt_config->setValue("output_engine", QString::fromStdString(Settings::values.sink_id));
qt_config->setValue("enable_audio_stretching", Settings::values.enable_audio_stretching);
+ qt_config->setValue("output_device", QString::fromStdString(Settings::values.audio_device_id));
qt_config->beginGroup("Data Storage");
diff --git a/src/citra_qt/configure_audio.cpp b/src/citra_qt/configure_audio.cpp
index 3cdd4c780..3ddcf9232 100644
--- a/src/citra_qt/configure_audio.cpp
+++ b/src/citra_qt/configure_audio.cpp
@@ -2,6 +2,9 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <memory>
+#include "audio_core/audio_core.h"
+#include "audio_core/sink.h"
#include "audio_core/sink_details.h"
#include "citra_qt/configure_audio.h"
#include "core/settings.h"
@@ -18,6 +21,8 @@ ConfigureAudio::ConfigureAudio(QWidget* parent)
+ connect(ui->output_sink_combo_box, SIGNAL(currentIndexChanged(int)), this,
+ SLOT(updateAudioDevices(int)));
ConfigureAudio::~ConfigureAudio() {}
@@ -33,6 +38,19 @@ void ConfigureAudio::setConfiguration() {
+ // The device list cannot be pre-populated (nor listed) until the output sink is known.
+ updateAudioDevices(new_sink_index);
+ int new_device_index = -1;
+ for (int index = 0; index < ui->audio_device_combo_box->count(); index++) {
+ if (ui->audio_device_combo_box->itemText(index).toStdString() ==
+ Settings::values.audio_device_id) {
+ new_device_index = index;
+ break;
+ }
+ }
+ ui->audio_device_combo_box->setCurrentIndex(new_device_index);
void ConfigureAudio::applyConfiguration() {
@@ -40,5 +58,20 @@ void ConfigureAudio::applyConfiguration() {
Settings::values.enable_audio_stretching = ui->toggle_audio_stretching->isChecked();
+ Settings::values.audio_device_id =
+ ui->audio_device_combo_box->itemText(ui->audio_device_combo_box->currentIndex())
+ .toStdString();
+void ConfigureAudio::updateAudioDevices(int sink_index) {
+ ui->audio_device_combo_box->clear();
+ ui->audio_device_combo_box->addItem("auto");
+ std::string sink_id = ui->output_sink_combo_box->itemText(sink_index).toStdString();
+ std::vector<std::string> device_list =
+ AudioCore::GetSinkDetails(sink_id).factory()->GetDeviceList();
+ for (const auto& device : device_list) {
+ ui->audio_device_combo_box->addItem(device.c_str());
+ }
diff --git a/src/citra_qt/configure_audio.h b/src/citra_qt/configure_audio.h
index 51df2e27b..8190e694f 100644
--- a/src/citra_qt/configure_audio.h
+++ b/src/citra_qt/configure_audio.h
@@ -20,6 +20,9 @@ public:
void applyConfiguration();
+public slots:
+ void updateAudioDevices(int sink_index);
void setConfiguration();
diff --git a/src/citra_qt/configure_audio.ui b/src/citra_qt/configure_audio.ui
index 3e2b4635f..dd870eb61 100644
--- a/src/citra_qt/configure_audio.ui
+++ b/src/citra_qt/configure_audio.ui
@@ -35,6 +35,21 @@
+ <item>
+ <layout class="QHBoxLayout">
+ <item>
+ <widget class="QLabel">
+ <property name="text">
+ <string>Audio Device:</string>
+ </property>
+ </widget>
+ </item>
+ <item>
+ <widget class="QComboBox" name="audio_device_combo_box">
+ </widget>
+ </item>
+ </layout>
+ </item>
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp
index ff2e7e363..f37524190 100644
--- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.cpp
@@ -18,7 +18,9 @@
#include "citra_qt/util/util.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
+#include "video_core/shader/debug_data.h"
#include "video_core/shader/shader.h"
+#include "video_core/shader/shader_interpreter.h"
using nihstro::OpCode;
using nihstro::Instruction;
@@ -518,8 +520,9 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d
info.labels.insert({entry_point, "main"});
// Generate debug information
- debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config,
- shader_setup);
+ Pica::Shader::InterpreterEngine shader_engine;
+ shader_engine.SetupBatch(shader_setup, entry_point);
+ debug_data = shader_engine.ProduceDebugInfo(shader_setup, input_vertex, num_attributes);
// Reload widget state
for (int attr = 0; attr < num_attributes; ++attr) {
diff --git a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h
index bedea0bed..3292573f3 100644
--- a/src/citra_qt/debugger/graphics/graphics_vertex_shader.h
+++ b/src/citra_qt/debugger/graphics/graphics_vertex_shader.h
@@ -8,6 +8,7 @@
#include <QTreeView>
#include "citra_qt/debugger/graphics/graphics_breakpoint_observer.h"
#include "nihstro/parser_shbin.h"
+#include "video_core/shader/debug_data.h"
#include "video_core/shader/shader.h"
class QLabel;
diff --git a/src/citra_qt/debugger/ramview.cpp b/src/citra_qt/debugger/ramview.cpp
deleted file mode 100644
index 10a09dda8..000000000
--- a/src/citra_qt/debugger/ramview.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-#include "citra_qt/debugger/ramview.h"
-GRamView::GRamView(QWidget* parent) : QHexEdit(parent) {}
-void GRamView::OnCPUStepped() {
- // TODO: QHexEdit doesn't show vertical scroll bars for > 10MB data streams...
- // setData(QByteArray((const char*)Mem_RAM,sizeof(Mem_RAM)/8));
diff --git a/src/citra_qt/debugger/ramview.h b/src/citra_qt/debugger/ramview.h
deleted file mode 100644
index d01cea93b..000000000
--- a/src/citra_qt/debugger/ramview.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-#pragma once
-#include "qhexedit.h"
-class GRamView : public QHexEdit {
- explicit GRamView(QWidget* parent = nullptr);
-public slots:
- void OnCPUStepped();
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 6d59cf640..f765c0147 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -12,6 +12,7 @@
#include <QFileDialog>
#include <QMessageBox>
#include <QtGui>
+#include <QtWidgets>
#include "citra_qt/bootmanager.h"
#include "citra_qt/config.h"
#include "citra_qt/configure_dialog.h"
@@ -24,7 +25,6 @@
#include "citra_qt/debugger/graphics/graphics_tracing.h"
#include "citra_qt/debugger/graphics/graphics_vertex_shader.h"
#include "citra_qt/debugger/profiler.h"
-#include "citra_qt/debugger/ramview.h"
#include "citra_qt/debugger/registers.h"
#include "citra_qt/debugger/wait_tree.h"
#include "citra_qt/game_list.h"
@@ -46,7 +46,6 @@
#include "core/gdbstub/gdbstub.h"
#include "core/loader/loader.h"
#include "core/settings.h"
-#include "qhexedit.h"
#include "video_core/video_core.h"
diff --git a/src/common/hash.cpp b/src/common/hash.cpp
index 2309320bb..f3d390dc5 100644
--- a/src/common/hash.cpp
+++ b/src/common/hash.cpp
@@ -16,7 +16,7 @@ namespace Common {
// Block read - if your platform needs to do endian-swapping or can only handle aligned reads, do
// the conversion here
-static FORCE_INLINE u64 getblock64(const u64* p, int i) {
+static FORCE_INLINE u64 getblock64(const u64* p, size_t i) {
return p[i];
@@ -34,9 +34,9 @@ static FORCE_INLINE u64 fmix64(u64 k) {
// This is the 128-bit variant of the MurmurHash3 hash function that is targeted for 64-bit
// platforms (MurmurHash3_x64_128). It was taken from:
-void MurmurHash3_128(const void* key, int len, u32 seed, void* out) {
+void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out) {
const u8* data = (const u8*)key;
- const int nblocks = len / 16;
+ const size_t nblocks = len / 16;
u64 h1 = seed;
u64 h2 = seed;
@@ -48,7 +48,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out) {
const u64* blocks = (const u64*)(data);
- for (int i = 0; i < nblocks; i++) {
+ for (size_t i = 0; i < nblocks; i++) {
u64 k1 = getblock64(blocks, i * 2 + 0);
u64 k2 = getblock64(blocks, i * 2 + 1);
diff --git a/src/common/hash.h b/src/common/hash.h
index a3850be68..ee2560dad 100644
--- a/src/common/hash.h
+++ b/src/common/hash.h
@@ -4,11 +4,12 @@
#pragma once
+#include <cstddef>
#include "common/common_types.h"
namespace Common {
-void MurmurHash3_128(const void* key, int len, u32 seed, void* out);
+void MurmurHash3_128(const void* key, size_t len, u32 seed, void* out);
* Computes a 64-bit hash over the specified block of data
@@ -16,7 +17,7 @@ void MurmurHash3_128(const void* key, int len, u32 seed, void* out);
* @param len Length of data (in bytes) to compute hash over
* @returns 64-bit hash value that was computed over the data block
-static inline u64 ComputeHash64(const void* data, int len) {
+static inline u64 ComputeHash64(const void* data, size_t len) {
u64 res[2];
MurmurHash3_128(data, len, 0, res);
return res[0];
diff --git a/src/core/core.h b/src/core/core.h
index 1015e8847..17572a74f 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -115,7 +115,7 @@ private:
static System s_instance;
-static ARM_Interface& CPU() {
+inline ARM_Interface& CPU() {
return System::GetInstance().CPU();
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index a437d0823..276ecfdf6 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -13,7 +13,7 @@
#include "core/core.h"
#include "core/core_timing.h"
-int g_clock_rate_arm11 = 268123480;
+int g_clock_rate_arm11 = BASE_CLOCK_RATE_ARM11;
// is this really necessary?
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index b72a1b500..d2f85cd4d 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -21,6 +21,7 @@
// inside callback:
// ScheduleEvent(periodInCycles - cycles_late, callback, "whatever")
+constexpr int BASE_CLOCK_RATE_ARM11 = 268123480;
extern int g_clock_rate_arm11;
inline s64 msToCycles(int ms) {
diff --git a/src/core/file_sys/archive_extsavedata.cpp b/src/core/file_sys/archive_extsavedata.cpp
index 51ce78435..dd2fb167f 100644
--- a/src/core/file_sys/archive_extsavedata.cpp
+++ b/src/core/file_sys/archive_extsavedata.cpp
@@ -107,6 +107,8 @@ public:
case PathParser::NotFound:
LOG_ERROR(Service_FS, "%s not found", full_path.c_str());
+ case PathParser::FileFound:
+ break; // Expected 'success' case
FileUtil::IOFile file(full_path, "r+b");
diff --git a/src/core/file_sys/archive_sdmc.cpp b/src/core/file_sys/archive_sdmc.cpp
index 333dfb92e..72ff05c65 100644
--- a/src/core/file_sys/archive_sdmc.cpp
+++ b/src/core/file_sys/archive_sdmc.cpp
@@ -72,6 +72,8 @@ ResultVal<std::unique_ptr<FileBackend>> SDMCArchive::OpenFileBase(const Path& pa
+ case PathParser::FileFound:
+ break; // Expected 'success' case
FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb");
@@ -106,6 +108,8 @@ ResultCode SDMCArchive::DeleteFile(const Path& path) const {
case PathParser::DirectoryFound:
LOG_ERROR(Service_FS, "%s is not a file", full_path.c_str());
+ case PathParser::FileFound:
+ break; // Expected 'success' case
if (FileUtil::Delete(full_path)) {
@@ -154,6 +158,8 @@ static ResultCode DeleteDirectoryHelper(const Path& path, const std::string& mou
case PathParser::FileFound:
LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str());
+ case PathParser::DirectoryFound:
+ break; // Expected 'success' case
if (deleter(full_path)) {
@@ -197,6 +203,8 @@ ResultCode SDMCArchive::CreateFile(const FileSys::Path& path, u64 size) const {
case PathParser::FileFound:
LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
+ case PathParser::NotFound:
+ break; // Expected 'success' case
if (size == 0) {
@@ -238,6 +246,8 @@ ResultCode SDMCArchive::CreateDirectory(const Path& path) const {
case PathParser::FileFound:
LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
+ case PathParser::NotFound:
+ break; // Expected 'success' case
if (FileUtil::CreateDir(mount_point + path.AsString())) {
@@ -281,6 +291,8 @@ ResultVal<std::unique_ptr<DirectoryBackend>> SDMCArchive::OpenDirectory(const Pa
case PathParser::FileInPath:
LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str());
+ case PathParser::DirectoryFound:
+ break; // Expected 'success' case
auto directory = std::make_unique<DiskDirectory>(full_path);
diff --git a/src/core/file_sys/savedata_archive.cpp b/src/core/file_sys/savedata_archive.cpp
index f2e6a06bc..f540c4a93 100644
--- a/src/core/file_sys/savedata_archive.cpp
+++ b/src/core/file_sys/savedata_archive.cpp
@@ -57,6 +57,8 @@ ResultVal<std::unique_ptr<FileBackend>> SaveDataArchive::OpenFile(const Path& pa
+ case PathParser::FileFound:
+ break; // Expected 'success' case
FileUtil::IOFile file(full_path, mode.write_flag ? "r+b" : "rb");
@@ -91,6 +93,8 @@ ResultCode SaveDataArchive::DeleteFile(const Path& path) const {
case PathParser::NotFound:
LOG_ERROR(Service_FS, "File not found %s", full_path.c_str());
+ case PathParser::FileFound:
+ break; // Expected 'success' case
if (FileUtil::Delete(full_path)) {
@@ -139,6 +143,8 @@ static ResultCode DeleteDirectoryHelper(const Path& path, const std::string& mou
case PathParser::FileFound:
LOG_ERROR(Service_FS, "Unexpected file or directory %s", full_path.c_str());
+ case PathParser::DirectoryFound:
+ break; // Expected 'success' case
if (deleter(full_path)) {
@@ -182,6 +188,8 @@ ResultCode SaveDataArchive::CreateFile(const FileSys::Path& path, u64 size) cons
case PathParser::FileFound:
LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
+ case PathParser::NotFound:
+ break; // Expected 'success' case
if (size == 0) {
@@ -225,6 +233,8 @@ ResultCode SaveDataArchive::CreateDirectory(const Path& path) const {
case PathParser::FileFound:
LOG_ERROR(Service_FS, "%s already exists", full_path.c_str());
+ case PathParser::NotFound:
+ break; // Expected 'success' case
if (FileUtil::CreateDir(mount_point + path.AsString())) {
@@ -269,6 +279,8 @@ ResultVal<std::unique_ptr<DirectoryBackend>> SaveDataArchive::OpenDirectory(
case PathParser::FileFound:
LOG_ERROR(Service_FS, "Unexpected file in path %s", full_path.c_str());
+ case PathParser::DirectoryFound:
+ break; // Expected 'success' case
auto directory = std::make_unique<DiskDirectory>(full_path);
diff --git a/src/core/frontend/emu_window.cpp b/src/core/frontend/emu_window.cpp
index 1541cc39d..4f0f786ce 100644
--- a/src/core/frontend/emu_window.cpp
+++ b/src/core/frontend/emu_window.cpp
@@ -98,9 +98,9 @@ void EmuWindow::AccelerometerChanged(float x, float y, float z) {
// TODO(wwylele): do a time stretch as it in GyroscopeChanged
// The time stretch formula should be like
// stretched_vector = (raw_vector - gravity) * stretch_ratio + gravity
- accel_x = x * coef;
- accel_y = y * coef;
- accel_z = z * coef;
+ accel_x = static_cast<s16>(x * coef);
+ accel_y = static_cast<s16>(y * coef);
+ accel_z = static_cast<s16>(z * coef);
void EmuWindow::GyroscopeChanged(float x, float y, float z) {
@@ -109,9 +109,9 @@ void EmuWindow::GyroscopeChanged(float x, float y, float z) {
float stretch =
FULL_FPS / Common::Profiling::GetTimingResultsAggregator()->GetAggregatedResults().fps;
std::lock_guard<std::mutex> lock(gyro_mutex);
- gyro_x = x * coef * stretch;
- gyro_y = y * coef * stretch;
- gyro_z = z * coef * stretch;
+ gyro_x = static_cast<s16>(x * coef * stretch);
+ gyro_y = static_cast<s16>(y * coef * stretch);
+ gyro_z = static_cast<s16>(z * coef * stretch);
void EmuWindow::UpdateCurrentFramebufferLayout(unsigned width, unsigned height) {
diff --git a/src/core/hle/service/err_f.cpp b/src/core/hle/service/err_f.cpp
index cd0a1a598..9da55f328 100644
--- a/src/core/hle/service/err_f.cpp
+++ b/src/core/hle/service/err_f.cpp
@@ -227,6 +227,8 @@ static void ThrowFatalError(Interface* self) {
LOG_CRITICAL(Service_ERR, "FINST2: 0x%08X",
+ case ExceptionType::Undefined:
+ break; // Not logging exception_info for this case
LOG_CRITICAL(Service_ERR, "Datetime: %s", GetCurrentSystemTime().c_str());
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index 947958703..a8c1331ed 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -149,7 +149,7 @@ static ResultCode WriteHWRegsWithMask(u32 base_address, u32 size_in_bytes, VAddr
u32 mask = Memory::Read32(masks_vaddr);
// Update the current value of the register only for set mask bits
- reg_value = (reg_value & ~mask) | (data | mask);
+ reg_value = (reg_value & ~mask) | (data & mask);
WriteSingleHWReg(base_address, reg_value);
diff --git a/src/core/hle/service/hid/hid.cpp b/src/core/hle/service/hid/hid.cpp
index 676154bd4..f14ab3811 100644
--- a/src/core/hle/service/hid/hid.cpp
+++ b/src/core/hle/service/hid/hid.cpp
@@ -35,6 +35,15 @@ static u32 next_gyroscope_index;
static int enable_accelerometer_count = 0; // positive means enabled
static int enable_gyroscope_count = 0; // positive means enabled
+static int pad_update_event;
+static int accelerometer_update_event;
+static int gyroscope_update_event;
+// Updating period for each HID device. These empirical values are measured from a 11.2 3DS.
+constexpr u64 pad_update_ticks = BASE_CLOCK_RATE_ARM11 / 234;
+constexpr u64 accelerometer_update_ticks = BASE_CLOCK_RATE_ARM11 / 104;
+constexpr u64 gyroscope_update_ticks = BASE_CLOCK_RATE_ARM11 / 101;
static PadState GetCirclePadDirectionState(s16 circle_pad_x, s16 circle_pad_y) {
// 30 degree and 60 degree are angular thresholds for directions
constexpr float TAN30 = 0.577350269f;
@@ -65,14 +74,9 @@ static PadState GetCirclePadDirectionState(s16 circle_pad_x, s16 circle_pad_y) {
return state;
-void Update() {
+static void UpdatePadCallback(u64 userdata, int cycles_late) {
SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer());
- if (mem == nullptr) {
- LOG_DEBUG(Service_HID, "Cannot update HID prior to mapping shared memory!");
- return;
- }
PadState state = VideoCore::g_emu_window->GetPadState();
// Get current circle pad position and update circle pad direction
@@ -131,59 +135,68 @@ void Update() {
- // Update accelerometer
- if (enable_accelerometer_count > 0) {
- mem->accelerometer.index = next_accelerometer_index;
- next_accelerometer_index =
- (next_accelerometer_index + 1) % mem->accelerometer.entries.size();
- AccelerometerDataEntry& accelerometer_entry =
- mem->accelerometer.entries[mem->accelerometer.index];
- std::tie(accelerometer_entry.x, accelerometer_entry.y, accelerometer_entry.z) =
- VideoCore::g_emu_window->GetAccelerometerState();
- // Make up "raw" entry
- // TODO(wwylele):
- // From hardware testing, the raw_entry values are approximately,
- // but not exactly, as twice as corresponding entries (or with a minus sign).
- // It may caused by system calibration to the accelerometer.
- // Figure out how it works, or, if no game reads raw_entry,
- // the following three lines can be removed and leave raw_entry unimplemented.
- mem->accelerometer.raw_entry.x = -2 * accelerometer_entry.x;
- mem->accelerometer.raw_entry.z = 2 * accelerometer_entry.y;
- mem->accelerometer.raw_entry.y = -2 * accelerometer_entry.z;
- // If we just updated index 0, provide a new timestamp
- if (mem->accelerometer.index == 0) {
- mem->accelerometer.index_reset_ticks_previous = mem->accelerometer.index_reset_ticks;
- mem->accelerometer.index_reset_ticks = (s64)CoreTiming::GetTicks();
- }
+ // Reschedule recurrent event
+ CoreTiming::ScheduleEvent(pad_update_ticks - cycles_late, pad_update_event);
+static void UpdateAccelerometerCallback(u64 userdata, int cycles_late) {
+ SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer());
+ mem->accelerometer.index = next_accelerometer_index;
+ next_accelerometer_index = (next_accelerometer_index + 1) % mem->accelerometer.entries.size();
- event_accelerometer->Signal();
+ AccelerometerDataEntry& accelerometer_entry =
+ mem->accelerometer.entries[mem->accelerometer.index];
+ std::tie(accelerometer_entry.x, accelerometer_entry.y, accelerometer_entry.z) =
+ VideoCore::g_emu_window->GetAccelerometerState();
+ // Make up "raw" entry
+ // TODO(wwylele):
+ // From hardware testing, the raw_entry values are approximately, but not exactly, as twice as
+ // corresponding entries (or with a minus sign). It may caused by system calibration to the
+ // accelerometer. Figure out how it works, or, if no game reads raw_entry, the following three
+ // lines can be removed and leave raw_entry unimplemented.
+ mem->accelerometer.raw_entry.x = -2 * accelerometer_entry.x;
+ mem->accelerometer.raw_entry.z = 2 * accelerometer_entry.y;
+ mem->accelerometer.raw_entry.y = -2 * accelerometer_entry.z;
+ // If we just updated index 0, provide a new timestamp
+ if (mem->accelerometer.index == 0) {
+ mem->accelerometer.index_reset_ticks_previous = mem->accelerometer.index_reset_ticks;
+ mem->accelerometer.index_reset_ticks = (s64)CoreTiming::GetTicks();
- // Update gyroscope
- if (enable_gyroscope_count > 0) {
- mem->gyroscope.index = next_gyroscope_index;
- next_gyroscope_index = (next_gyroscope_index + 1) % mem->gyroscope.entries.size();
+ event_accelerometer->Signal();
- GyroscopeDataEntry& gyroscope_entry = mem->gyroscope.entries[mem->gyroscope.index];
- std::tie(gyroscope_entry.x, gyroscope_entry.y, gyroscope_entry.z) =
- VideoCore::g_emu_window->GetGyroscopeState();
+ // Reschedule recurrent event
+ CoreTiming::ScheduleEvent(accelerometer_update_ticks - cycles_late, accelerometer_update_event);
- // Make up "raw" entry
- mem->gyroscope.raw_entry.x = gyroscope_entry.x;
- mem->gyroscope.raw_entry.z = -gyroscope_entry.y;
- mem->gyroscope.raw_entry.y = gyroscope_entry.z;
+static void UpdateGyroscopeCallback(u64 userdata, int cycles_late) {
+ SharedMem* mem = reinterpret_cast<SharedMem*>(shared_mem->GetPointer());
- // If we just updated index 0, provide a new timestamp
- if (mem->gyroscope.index == 0) {
- mem->gyroscope.index_reset_ticks_previous = mem->gyroscope.index_reset_ticks;
- mem->gyroscope.index_reset_ticks = (s64)CoreTiming::GetTicks();
- }
+ mem->gyroscope.index = next_gyroscope_index;
+ next_gyroscope_index = (next_gyroscope_index + 1) % mem->gyroscope.entries.size();
+ GyroscopeDataEntry& gyroscope_entry = mem->gyroscope.entries[mem->gyroscope.index];
+ std::tie(gyroscope_entry.x, gyroscope_entry.y, gyroscope_entry.z) =
+ VideoCore::g_emu_window->GetGyroscopeState();
+ // Make up "raw" entry
+ mem->gyroscope.raw_entry.x = gyroscope_entry.x;
+ mem->gyroscope.raw_entry.z = -gyroscope_entry.y;
+ mem->gyroscope.raw_entry.y = gyroscope_entry.z;
- event_gyroscope->Signal();
+ // If we just updated index 0, provide a new timestamp
+ if (mem->gyroscope.index == 0) {
+ mem->gyroscope.index_reset_ticks_previous = mem->gyroscope.index_reset_ticks;
+ mem->gyroscope.index_reset_ticks = (s64)CoreTiming::GetTicks();
+ event_gyroscope->Signal();
+ // Reschedule recurrent event
+ CoreTiming::ScheduleEvent(gyroscope_update_ticks - cycles_late, gyroscope_update_event);
void GetIPCHandles(Service::Interface* self) {
@@ -204,7 +217,11 @@ void EnableAccelerometer(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- event_accelerometer->Signal();
+ // Schedules the accelerometer update event if the accelerometer was just enabled
+ if (enable_accelerometer_count == 1) {
+ CoreTiming::ScheduleEvent(accelerometer_update_ticks, accelerometer_update_event);
+ }
cmd_buff[1] = RESULT_SUCCESS.raw;
@@ -215,7 +232,11 @@ void DisableAccelerometer(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- event_accelerometer->Signal();
+ // Unschedules the accelerometer update event if the accelerometer was just disabled
+ if (enable_accelerometer_count == 0) {
+ CoreTiming::UnscheduleEvent(accelerometer_update_event, 0);
+ }
cmd_buff[1] = RESULT_SUCCESS.raw;
@@ -226,7 +247,11 @@ void EnableGyroscopeLow(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- event_gyroscope->Signal();
+ // Schedules the gyroscope update event if the gyroscope was just enabled
+ if (enable_gyroscope_count == 1) {
+ CoreTiming::ScheduleEvent(gyroscope_update_ticks, gyroscope_update_event);
+ }
cmd_buff[1] = RESULT_SUCCESS.raw;
@@ -237,7 +262,11 @@ void DisableGyroscopeLow(Service::Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- event_gyroscope->Signal();
+ // Unschedules the gyroscope update event if the gyroscope was just disabled
+ if (enable_gyroscope_count == 0) {
+ CoreTiming::UnscheduleEvent(gyroscope_update_event, 0);
+ }
cmd_buff[1] = RESULT_SUCCESS.raw;
@@ -291,6 +320,8 @@ void Init() {
next_pad_index = 0;
next_touch_index = 0;
+ next_accelerometer_index = 0;
+ next_gyroscope_index = 0;
// Create event handles
event_pad_or_touch_1 = Event::Create(ResetType::OneShot, "HID:EventPadOrTouch1");
@@ -298,6 +329,15 @@ void Init() {
event_accelerometer = Event::Create(ResetType::OneShot, "HID:EventAccelerometer");
event_gyroscope = Event::Create(ResetType::OneShot, "HID:EventGyroscope");
event_debug_pad = Event::Create(ResetType::OneShot, "HID:EventDebugPad");
+ // Register update callbacks
+ pad_update_event = CoreTiming::RegisterEvent("HID::UpdatePadCallback", UpdatePadCallback);
+ accelerometer_update_event =
+ CoreTiming::RegisterEvent("HID::UpdateAccelerometerCallback", UpdateAccelerometerCallback);
+ gyroscope_update_event =
+ CoreTiming::RegisterEvent("HID::UpdateGyroscopeCallback", UpdateGyroscopeCallback);
+ CoreTiming::ScheduleEvent(pad_update_ticks, pad_update_event);
void Shutdown() {
diff --git a/src/core/hle/service/hid/hid.h b/src/core/hle/service/hid/hid.h
index 7904e7355..21e66dfe0 100644
--- a/src/core/hle/service/hid/hid.h
+++ b/src/core/hle/service/hid/hid.h
@@ -296,9 +296,6 @@ void GetGyroscopeLowRawToDpsCoefficient(Service::Interface* self);
void GetGyroscopeLowCalibrateParam(Service::Interface* self);
-/// Checks for user input updates
-void Update();
/// Initialize HID service
void Init();
diff --git a/src/core/hle/service/mic_u.cpp b/src/core/hle/service/mic_u.cpp
index c62f8afc6..e98388560 100644
--- a/src/core/hle/service/mic_u.cpp
+++ b/src/core/hle/service/mic_u.cpp
@@ -93,7 +93,7 @@ static void StartSampling(Interface* self) {
sample_rate = static_cast<SampleRate>(cmd_buff[2] & 0xFF);
audio_buffer_offset = cmd_buff[3];
audio_buffer_size = cmd_buff[4];
- audio_buffer_loop = static_cast<bool>(cmd_buff[5] & 0xFF);
+ audio_buffer_loop = (cmd_buff[5] & 0xFF) != 0;
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
is_sampling = true;
@@ -202,7 +202,7 @@ static void GetGain(Interface* self) {
static void SetPower(Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- mic_power = static_cast<bool>(cmd_buff[1] & 0xFF);
+ mic_power = (cmd_buff[1] & 0xFF) != 0;
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
LOG_WARNING(Service_MIC, "(STUBBED) called, mic_power=%u", mic_power);
@@ -252,7 +252,7 @@ static void SetIirFilterMic(Interface* self) {
static void SetClamp(Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- clamp = static_cast<bool>(cmd_buff[1] & 0xFF);
+ clamp = (cmd_buff[1] & 0xFF) != 0;
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
LOG_WARNING(Service_MIC, "(STUBBED) called, clamp=%u", clamp);
@@ -282,7 +282,7 @@ static void GetClamp(Interface* self) {
static void SetAllowShellClosed(Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
- allow_shell_closed = static_cast<bool>(cmd_buff[1] & 0xFF);
+ allow_shell_closed = (cmd_buff[1] & 0xFF) != 0;
cmd_buff[1] = RESULT_SUCCESS.raw; // No error
LOG_WARNING(Service_MIC, "(STUBBED) called, allow_shell_closed=%u", allow_shell_closed);
diff --git a/src/core/hle/service/nfc/nfc.cpp b/src/core/hle/service/nfc/nfc.cpp
index e248285f9..fd3c7d9c2 100644
--- a/src/core/hle/service/nfc/nfc.cpp
+++ b/src/core/hle/service/nfc/nfc.cpp
@@ -11,6 +11,81 @@ namespace Service {
namespace NFC {
static Kernel::SharedPtr<Kernel::Event> tag_in_range_event;
+static Kernel::SharedPtr<Kernel::Event> tag_out_of_range_event;
+static TagState nfc_tag_state = TagState::NotInitialized;
+static CommunicationStatus nfc_status = CommunicationStatus::NfcInitialized;
+void Initialize(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ u8 param = static_cast<u8>(cmd_buff[1] & 0xFF);
+ nfc_tag_state = TagState::NotScanning;
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called, param=%u", param);
+void Shutdown(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ u8 param = static_cast<u8>(cmd_buff[1] & 0xFF);
+ nfc_tag_state = TagState::NotInitialized;
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called, param=%u", param);
+void StartCommunication(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
+void StopCommunication(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
+void StartTagScanning(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ nfc_tag_state = TagState::TagInRange;
+ tag_in_range_event->Signal();
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
+void StopTagScanning(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ nfc_tag_state = TagState::NotScanning;
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
+void LoadAmiiboData(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ nfc_tag_state = TagState::TagDataLoaded;
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
+void ResetTagScanState(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ nfc_tag_state = TagState::NotScanning;
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
void GetTagInRangeEvent(Interface* self) {
u32* cmd_buff = Kernel::GetCommandBuffer();
@@ -22,16 +97,46 @@ void GetTagInRangeEvent(Interface* self) {
LOG_WARNING(Service_NFC, "(STUBBED) called");
+void GetTagOutOfRangeEvent(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[0] = IPC::MakeHeader(0xC, 1, 2);
+ cmd_buff[1] = RESULT_SUCCESS.raw;
+ cmd_buff[2] = IPC::CopyHandleDesc();
+ cmd_buff[3] = Kernel::g_handle_table.Create(tag_out_of_range_event).MoveFrom();
+ LOG_WARNING(Service_NFC, "(STUBBED) called");
+void GetTagState(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ cmd_buff[2] = static_cast<u8>(nfc_tag_state);
+ LOG_DEBUG(Service_NFC, "(STUBBED) called");
+void CommunicationGetStatus(Interface* self) {
+ u32* cmd_buff = Kernel::GetCommandBuffer();
+ cmd_buff[1] = RESULT_SUCCESS.raw; // No error
+ cmd_buff[2] = static_cast<u8>(nfc_status);
+ LOG_DEBUG(Service_NFC, "(STUBBED) called");
void Init() {
AddService(new NFC_M());
AddService(new NFC_U());
tag_in_range_event =
Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_in_range_event");
+ tag_out_of_range_event =
+ Kernel::Event::Create(Kernel::ResetType::OneShot, "NFC::tag_out_range_event");
+ nfc_tag_state = TagState::NotInitialized;
void Shutdown() {
tag_in_range_event = nullptr;
+ tag_out_of_range_event = nullptr;
} // namespace NFC
diff --git a/src/core/hle/service/nfc/nfc.h b/src/core/hle/service/nfc/nfc.h
index b02354201..a013bdae7 100644
--- a/src/core/hle/service/nfc/nfc.h
+++ b/src/core/hle/service/nfc/nfc.h
@@ -4,12 +4,103 @@
#pragma once
+#include "common/common_types.h"
namespace Service {
class Interface;
namespace NFC {
+enum class TagState : u8 {
+ NotInitialized = 0,
+ NotScanning = 1,
+ Scanning = 2,
+ TagInRange = 3,
+ TagOutOfRange = 4,
+ TagDataLoaded = 5,
+enum class CommunicationStatus : u8 {
+ AttemptInitialize = 1,
+ NfcInitialized = 2,
+ * NFC::Initialize service function
+ * Inputs:
+ * 0 : Header code [0x00010040]
+ * 1 : (u8) unknown parameter. Can be either value 0x1 or 0x2
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void Initialize(Interface* self);
+ * NFC::Shutdown service function
+ * Inputs:
+ * 0 : Header code [0x00020040]
+ * 1 : (u8) unknown parameter
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void Shutdown(Interface* self);
+ * NFC::StartCommunication service function
+ * Inputs:
+ * 0 : Header code [0x00030000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void StartCommunication(Interface* self);
+ * NFC::StopCommunication service function
+ * Inputs:
+ * 0 : Header code [0x00040000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void StopCommunication(Interface* self);
+ * NFC::StartTagScanning service function
+ * Inputs:
+ * 0 : Header code [0x00050040]
+ * 1 : (u16) unknown. This is normally 0x0
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void StartTagScanning(Interface* self);
+ * NFC::StopTagScanning service function
+ * Inputs:
+ * 0 : Header code [0x00060000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void StopTagScanning(Interface* self);
+ * NFC::LoadAmiiboData service function
+ * Inputs:
+ * 0 : Header code [0x00070000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void LoadAmiiboData(Interface* self);
+ * NFC::ResetTagScanState service function
+ * Inputs:
+ * 0 : Header code [0x00080000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ */
+void ResetTagScanState(Interface* self);
* NFC::GetTagInRangeEvent service function
* Inputs:
@@ -21,6 +112,37 @@ namespace NFC {
void GetTagInRangeEvent(Interface* self);
+ * NFC::GetTagOutOfRangeEvent service function
+ * Inputs:
+ * 0 : Header code [0x000C0000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ * 2 : Copy handle descriptor
+ * 3 : Event Handle
+ */
+void GetTagOutOfRangeEvent(Interface* self);
+ * NFC::GetTagState service function
+ * Inputs:
+ * 0 : Header code [0x000D0000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ * 2 : (u8) Tag state
+ */
+void GetTagState(Interface* self);
+ * NFC::CommunicationGetStatus service function
+ * Inputs:
+ * 0 : Header code [0x000F0000]
+ * Outputs:
+ * 1 : Result of function, 0 on success, otherwise error code
+ * 2 : (u8) Communication state
+ */
+void CommunicationGetStatus(Interface* self);
/// Initialize all NFC services.
void Init();
diff --git a/src/core/hle/service/nfc/nfc_m.cpp b/src/core/hle/service/nfc/nfc_m.cpp
index f43b4029a..ebe637650 100644
--- a/src/core/hle/service/nfc/nfc_m.cpp
+++ b/src/core/hle/service/nfc/nfc_m.cpp
@@ -11,18 +11,19 @@ namespace NFC {
const Interface::FunctionInfo FunctionTable[] = {
// clang-format off
// nfc:u shared commands
- {0x00010040, nullptr, "Initialize"},
- {0x00020040, nullptr, "Shutdown"},
- {0x00030000, nullptr, "StartCommunication"},
- {0x00040000, nullptr, "StopCommunication"},
- {0x00050040, nullptr, "StartTagScanning"},
- {0x00060000, nullptr, "StopTagScanning"},
- {0x00070000, nullptr, "LoadAmiiboData"},
- {0x00080000, nullptr, "ResetTagScanState"},
+ {0x00010040, Initialize, "Initialize"},
+ {0x00020040, Shutdown, "Shutdown"},
+ {0x00030000, StartCommunication, "StartCommunication"},
+ {0x00040000, StopCommunication, "StopCommunication"},
+ {0x00050040, StartTagScanning, "StartTagScanning"},
+ {0x00060000, StopTagScanning, "StopTagScanning"},
+ {0x00070000, LoadAmiiboData, "LoadAmiiboData"},
+ {0x00080000, ResetTagScanState, "ResetTagScanState"},
{0x00090002, nullptr, "UpdateStoredAmiiboData"},
{0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"},
- {0x000D0000, nullptr, "GetTagState"},
- {0x000F0000, nullptr, "CommunicationGetStatus"},
+ {0x000C0000, GetTagOutOfRangeEvent, "GetTagOutOfRangeEvent"},
+ {0x000D0000, GetTagState, "GetTagState"},
+ {0x000F0000, CommunicationGetStatus, "CommunicationGetStatus"},
{0x00100000, nullptr, "GetTagInfo2"},
{0x00110000, nullptr, "GetTagInfo"},
{0x00120000, nullptr, "CommunicationGetResult"},
diff --git a/src/core/hle/service/nfc/nfc_u.cpp b/src/core/hle/service/nfc/nfc_u.cpp
index 4b5200ae8..5a40c7874 100644
--- a/src/core/hle/service/nfc/nfc_u.cpp
+++ b/src/core/hle/service/nfc/nfc_u.cpp
@@ -10,18 +10,19 @@ namespace NFC {
const Interface::FunctionInfo FunctionTable[] = {
// clang-format off
- {0x00010040, nullptr, "Initialize"},
- {0x00020040, nullptr, "Shutdown"},
- {0x00030000, nullptr, "StartCommunication"},
- {0x00040000, nullptr, "StopCommunication"},
- {0x00050040, nullptr, "StartTagScanning"},
- {0x00060000, nullptr, "StopTagScanning"},
- {0x00070000, nullptr, "LoadAmiiboData"},
- {0x00080000, nullptr, "ResetTagScanState"},
+ {0x00010040, Initialize, "Initialize"},
+ {0x00020040, Shutdown, "Shutdown"},
+ {0x00030000, StartCommunication, "StartCommunication"},
+ {0x00040000, StopCommunication, "StopCommunication"},
+ {0x00050040, StartTagScanning, "StartTagScanning"},
+ {0x00060000, StopTagScanning, "StopTagScanning"},
+ {0x00070000, LoadAmiiboData, "LoadAmiiboData"},
+ {0x00080000, ResetTagScanState, "ResetTagScanState"},
{0x00090002, nullptr, "UpdateStoredAmiiboData"},
{0x000B0000, GetTagInRangeEvent, "GetTagInRangeEvent"},
- {0x000D0000, nullptr, "GetTagState"},
- {0x000F0000, nullptr, "CommunicationGetStatus"},
+ {0x000C0000, GetTagOutOfRangeEvent, "GetTagOutOfRangeEvent"},
+ {0x000D0000, GetTagState, "GetTagState"},
+ {0x000F0000, CommunicationGetStatus, "CommunicationGetStatus"},
{0x00100000, nullptr, "GetTagInfo2"},
{0x00110000, nullptr, "GetTagInfo"},
{0x00120000, nullptr, "CommunicationGetResult"},
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 1a1ee90b2..fa8c13d36 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -15,7 +15,6 @@
#include "common/vector_math.h"
#include "core/core_timing.h"
#include "core/hle/service/gsp_gpu.h"
-#include "core/hle/service/hid/hid.h"
#include "core/hw/gpu.h"
#include "core/hw/hw.h"
#include "core/memory.h"
@@ -33,7 +32,7 @@ namespace GPU {
Regs g_regs;
/// 268MHz CPU clocks / 60Hz frames per second
-const u64 frame_ticks = 268123480ull / 60;
+const u64 frame_ticks = BASE_CLOCK_RATE_ARM11 / 60;
/// Event id for CoreTiming
static int vblank_event;
/// Total number of frames drawn
@@ -551,9 +550,6 @@ static void VBlankCallback(u64 userdata, int cycles_late) {
- // Check for user input updates
- Service::HID::Update();
if (!Settings::values.use_vsync && Settings::values.toggle_framelimit) {
diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp
index 1c10740a0..09266e8b0 100644
--- a/src/core/loader/3dsx.cpp
+++ b/src/core/loader/3dsx.cpp
@@ -177,18 +177,34 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr,
pos += table.skip;
s32 num_patches = table.patch;
while (0 < num_patches && pos < end_pos) {
- u32 in_addr =
- static_cast<u32>(reinterpret_cast<u8*>(pos) -;
- u32 addr = TranslateAddr(*pos, &loadinfo, offsets);
- LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)",
- base_addr + in_addr, addr, current_segment_reloc_table, *pos);
+ u32 in_addr = base_addr + static_cast<u32>(reinterpret_cast<u8*>(pos) -
+ u32 orig_data = *pos;
+ u32 sub_type = orig_data >> (32 - 4);
+ u32 addr = TranslateAddr(orig_data & ~0xF0000000, &loadinfo, offsets);
+ LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", in_addr, addr,
+ current_segment_reloc_table, *pos);
switch (current_segment_reloc_table) {
- case 0:
- *pos = (addr);
+ case 0: {
+ if (sub_type != 0)
+ return ERROR_READ;
+ *pos = addr;
- case 1:
- *pos = static_cast<u32>(addr - in_addr);
+ }
+ case 1: {
+ u32 data = addr - in_addr;
+ switch (sub_type) {
+ case 0: // 32-bit signed offset
+ *pos = data;
+ break;
+ case 1: // 31-bit signed offset
+ *pos = data & ~(1U << 31);
+ break;
+ default:
+ return ERROR_READ;
+ }
+ }
break; // this should never happen
diff --git a/src/core/settings.h b/src/core/settings.h
index 8dbda653a..e22ce0f16 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -104,6 +104,7 @@ struct Values {
// Audio
std::string sink_id;
bool enable_audio_stretching;
+ std::string audio_device_id;
// Debugging
bool use_gdbstub;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6ca319b59..d55b84ce0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -50,10 +50,12 @@ set(HEADERS
set(SRCS ${SRCS}
- shader/shader_jit_x64.cpp)
+ shader/shader_jit_x64.cpp
+ shader/shader_jit_x64_compiler.cpp)
- shader/shader_jit_x64.h)
+ shader/shader_jit_x64.h
+ shader/shader_jit_x64_compiler.h)
create_directory_groups(${SRCS} ${HEADERS})
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index ea58e9f54..eb79974a8 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -142,16 +142,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
immediate_attribute_id = 0;
- Shader::UnitState shader_unit;
- g_state.vs.Setup();
+ auto* shader_engine = Shader::GetEngine();
+ shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
// Send to vertex shader
if (g_debug_context)
- g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes + 1);
- Shader::OutputVertex output_vertex =
- shader_unit.output_registers.ToVertex(regs.vs);
+ Shader::UnitState shader_unit;
+ shader_unit.LoadInputVertex(immediate_input, regs.vs.num_input_attributes + 1);
+ shader_engine->Run(g_state.vs, shader_unit);
+ auto output_vertex = Shader::OutputVertex::FromRegisters(
+ shader_unit.registers.output, regs, regs.vs.output_mask);
// Send to renderer
using Pica::Shader::OutputVertex;
@@ -243,8 +245,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
unsigned int vertex_cache_pos = 0;
+ auto* shader_engine = Shader::GetEngine();
Shader::UnitState shader_unit;
- g_state.vs.Setup();
+ shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset);
for (unsigned int index = 0; index < regs.num_vertices; ++index) {
// Indexed rendering doesn't use the start offset
@@ -283,10 +287,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
if (g_debug_context)
- g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
+ shader_unit.LoadInputVertex(input, loader.GetNumTotalAttributes());
+ shader_engine->Run(g_state.vs, shader_unit);
// Retrieve vertex from register data
- output_vertex = shader_unit.output_registers.ToVertex(regs.vs);
+ output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output,
+ regs, regs.vs.output_mask);
if (is_indexed) {
vertex_cache[vertex_cache_pos] = output_vertex;
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ce2bd455e..b4a77c632 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -499,7 +499,7 @@ void Init() {
void Shutdown() {
- Shader::ClearCache();
+ Shader::Shutdown();
template <typename T>
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 5a306a5c8..f3674e965 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -716,8 +716,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
- using PixelFormat = CachedSurface::PixelFormat;
- using SurfaceType = CachedSurface::SurfaceType;
CachedSurface src_params;
src_params.addr = config.GetPhysicalInputAddress();
@@ -748,7 +746,8 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe
// Adjust the source rectangle to take into account parts of the input lines being cropped
if (config.input_width > config.output_width) {
- src_rect.right -= (config.input_width - config.output_width) * src_surface->res_scale_width;
+ src_rect.right -= static_cast<int>((config.input_width - config.output_width) *
+ src_surface->res_scale_width);
// Require destination surface to have same resolution scale as source to preserve scaling
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index e1a9cb361..cc3e4bed5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -76,7 +76,7 @@ union PicaShaderConfig {
state.fog_mode = regs.fog_mode;
- state.fog_flip = regs.fog_flip;
+ state.fog_flip = regs.fog_flip != 0;
state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index ef3b06a7b..1e7eedecb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -172,7 +172,6 @@ bool RasterizerCacheOpenGL::TryBlitSurfaces(CachedSurface* src_surface,
const MathUtil::Rectangle<int>& src_rect,
CachedSurface* dst_surface,
const MathUtil::Rectangle<int>& dst_rect) {
- using SurfaceType = CachedSurface::SurfaceType;
if (!CachedSurface::CheckFormatsBlittable(src_surface->pixel_format,
dst_surface->pixel_format)) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b50e8292b..f57fdb3cc 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -8,7 +8,14 @@
#include <memory>
#include <set>
#include <tuple>
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-local-typedef"
#include <boost/icl/interval_map.hpp>
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
#include <glad/glad.h>
#include "common/assert.h"
#include "common/common_funcs.h"
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index a4aa3c9e0..2da50bd62 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,14 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <atomic>
#include <cmath>
#include <cstring>
-#include <unordered_map>
-#include <utility>
-#include <boost/range/algorithm/fill.hpp>
-#include "common/bit_field.h"
-#include "common/hash.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/pica.h"
@@ -25,7 +19,8 @@ namespace Pica {
namespace Shader {
-OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const {
+OutputVertex OutputVertex::FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs,
+ u32 output_mask) {
// Setup output data
OutputVertex ret;
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
@@ -33,13 +28,13 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const {
unsigned index = 0;
for (unsigned i = 0; i < 7; ++i) {
- if (index >= g_state.regs.vs_output_total)
+ if (index >= regs.vs_output_total)
- if ((config.output_mask & (1 << i)) == 0)
+ if ((output_mask & (1 << i)) == 0)
- const auto& output_register_map = g_state.regs.vs_output_attributes[index];
+ const auto& output_register_map = regs.vs_output_attributes[index];
u32 semantics[4] = {output_register_map.map_x, output_register_map.map_y,
output_register_map.map_z, output_register_map.map_w};
@@ -47,7 +42,7 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const {
for (unsigned comp = 0; comp < 4; ++comp) {
float24* out = ((float24*)&ret) + semantics[comp];
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
- *out = value[i][comp];
+ *out = output_regs[i][comp];
} else {
// Zero output so that attributes which aren't output won't have denormals in them,
// which would slow us down later.
@@ -76,86 +71,41 @@ OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) const {
return ret;
-#ifdef ARCHITECTURE_x86_64
-static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
-static const JitShader* jit_shader;
-#endif // ARCHITECTURE_x86_64
+void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) {
+ // Setup input register table
+ const auto& attribute_register_map = g_state.regs.vs.input_register_map;
+ for (int i = 0; i < num_attributes; i++)
+ registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
+MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
-void ClearCache() {
#ifdef ARCHITECTURE_x86_64
- shader_map.clear();
+static std::unique_ptr<JitX64Engine> jit_engine;
#endif // ARCHITECTURE_x86_64
+static InterpreterEngine interpreter_engine;
-void ShaderSetup::Setup() {
+ShaderEngine* GetEngine() {
#ifdef ARCHITECTURE_x86_64
+ // TODO(yuriks): Re-initialize on each change rather than being persistent
if (VideoCore::g_shader_jit_enabled) {
- u64 cache_key =
- Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^
- Common::ComputeHash64(&g_state.vs.swizzle_data, sizeof(g_state.vs.swizzle_data));
- auto iter = shader_map.find(cache_key);
- if (iter != shader_map.end()) {
- jit_shader = iter->second.get();
- } else {
- auto shader = std::make_unique<JitShader>();
- shader->Compile();
- jit_shader = shader.get();
- shader_map[cache_key] = std::move(shader);
+ if (jit_engine == nullptr) {
+ jit_engine = std::make_unique<JitX64Engine>();
+ return jit_engine.get();
#endif // ARCHITECTURE_x86_64
-MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
-void ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) {
- auto& config = g_state.regs.vs;
- auto& setup = g_state.vs;
- // Setup input register table
- const auto& attribute_register_map = config.input_register_map;
- for (unsigned i = 0; i < num_attributes; i++)
- state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
- state.conditional_code[0] = false;
- state.conditional_code[1] = false;
+ return &interpreter_engine;
+void Shutdown() {
#ifdef ARCHITECTURE_x86_64
- if (VideoCore::g_shader_jit_enabled) {
- jit_shader->Run(setup, state, config.main_offset);
- } else {
- DebugData<false> dummy_debug_data;
- RunInterpreter(setup, state, dummy_debug_data, config.main_offset);
- }
- DebugData<false> dummy_debug_data;
- RunInterpreter(setup, state, dummy_debug_data, config.main_offset);
+ jit_engine = nullptr;
#endif // ARCHITECTURE_x86_64
-DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes,
- const Regs::ShaderConfig& config,
- const ShaderSetup& setup) {
- UnitState state;
- DebugData<true> debug_data;
- // Setup input register table
- boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
- const auto& attribute_register_map = config.input_register_map;
- for (unsigned i = 0; i < num_attributes; i++)
- state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i];
- state.conditional_code[0] = false;
- state.conditional_code[1] = false;
- RunInterpreter(setup, state, debug_data, config.main_offset);
- return debug_data;
} // namespace Shader
} // namespace Pica
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 2b07759b9..44d9f76c3 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -6,7 +6,6 @@
#include <array>
#include <cstddef>
-#include <memory>
#include <type_traits>
#include <nihstro/shader_bytecode.h>
#include "common/assert.h"
@@ -15,7 +14,6 @@
#include "common/vector_math.h"
#include "video_core/pica.h"
#include "video_core/pica_types.h"
-#include "video_core/shader/debug_data.h"
using nihstro::RegisterType;
using nihstro::SourceRegister;
@@ -75,19 +73,13 @@ struct OutputVertex {
ret.Lerp(factor, v1);
return ret;
+ static OutputVertex FromRegisters(Math::Vec4<float24> output_regs[16], const Regs& regs,
+ u32 output_mask);
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
-struct OutputRegisters {
- OutputRegisters() = default;
- alignas(16) Math::Vec4<float24> value[16];
- OutputVertex ToVertex(const Regs::ShaderConfig& config) const;
-static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
* This structure contains the state information that needs to be unique for a shader unit. The 3DS
* has four shader units that process shaders in parallel. At the present, Citra only implements a
@@ -100,11 +92,10 @@ struct UnitState {
// required to be 16-byte aligned.
alignas(16) Math::Vec4<float24> input[16];
alignas(16) Math::Vec4<float24> temporary[16];
+ alignas(16) Math::Vec4<float24> output[16];
} registers;
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
- OutputRegisters output_registers;
bool conditional_code[2];
// Two Address registers and one loop counter
@@ -130,7 +121,7 @@ struct UnitState {
static size_t OutputOffset(const DestRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Output:
- return offsetof(UnitState, output_registers.value) +
+ return offsetof(UnitState, registers.output) +
reg.GetIndex() * sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
@@ -142,13 +133,17 @@ struct UnitState {
return 0;
-/// Clears the shader cache
-void ClearCache();
+ /**
+ * Loads the unit state with an input vertex.
+ *
+ * @param input Input vertex into the shader
+ * @param num_attributes The number of vertex shader attributes to load
+ */
+ void LoadInputVertex(const InputVertex& input, int num_attributes);
struct ShaderSetup {
struct {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
// therefore required to be 16-byte aligned.
@@ -173,32 +168,37 @@ struct ShaderSetup {
std::array<u32, 1024> program_code;
std::array<u32, 1024> swizzle_data;
+ /// Data private to ShaderEngines
+ struct EngineData {
+ unsigned int entry_point;
+ /// Used by the JIT, points to a compiled shader object.
+ const void* cached_shader = nullptr;
+ } engine_data;
+class ShaderEngine {
+ virtual ~ShaderEngine() = default;
* Performs any shader unit setup that only needs to happen once per shader (as opposed to once
* per vertex, which would happen within the `Run` function).
- void Setup();
- /**
- * Runs the currently setup shader
- * @param state Shader unit state, must be setup per shader and per shader unit
- * @param input Input vertex into the shader
- * @param num_attributes The number of vertex shader attributes
- */
- void Run(UnitState& state, const InputVertex& input, int num_attributes);
+ virtual void SetupBatch(ShaderSetup& setup, unsigned int entry_point) = 0;
- * Produce debug information based on the given shader and input vertex
- * @param input Input vertex into the shader
- * @param num_attributes The number of vertex shader attributes
- * @param config Configuration object for the shader pipeline
- * @param setup Setup object for the shader pipeline
- * @return Debug information for this shader with regards to the given vertex
+ * Runs the currently setup shader.
+ *
+ * @param setup Shader engine state, must be setup with SetupBatch on each shader change.
+ * @param state Shader unit state, must be setup with input data before each shader invocation.
- DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
- const Regs::ShaderConfig& config, const ShaderSetup& setup);
+ virtual void Run(const ShaderSetup& setup, UnitState& state) const = 0;
+// TODO(yuriks): Remove and make it non-global state somewhere
+ShaderEngine* GetEngine();
+void Shutdown();
} // namespace Shader
} // namespace Pica
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 20fb9754b..c0c89b857 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -7,10 +7,12 @@
#include <cmath>
#include <numeric>
#include <boost/container/static_vector.hpp>
+#include <boost/range/algorithm/fill.hpp>
#include <nihstro/shader_bytecode.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/vector_math.h"
#include "video_core/pica_state.h"
#include "video_core/pica_types.h"
@@ -37,12 +39,15 @@ struct CallStackElement {
template <bool Debug>
-void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
- unsigned offset) {
+static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
+ unsigned offset) {
// TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack;
u32 program_counter = offset;
+ state.conditional_code[0] = false;
+ state.conditional_code[1] = false;
auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset,
u8 repeat_count, u8 loop_increment) {
// -1 to make sure when incrementing the PC we end up at the correct offset
@@ -73,9 +78,9 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
- const auto& uniforms = g_state.vs.uniforms;
- const auto& swizzle_data = g_state.vs.swizzle_data;
- const auto& program_code = g_state.vs.program_code;
+ const auto& uniforms = setup.uniforms;
+ const auto& swizzle_data = setup.swizzle_data;
+ const auto& program_code = setup.program_code;
// Placeholder for invalid inputs
static float24 dummy_vec4_float24[4];
@@ -170,7 +175,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
float24* dest =
(instr.common.dest.Value() < 0x10)
- ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
+ ? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
: (instr.common.dest.Value() < 0x20)
? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
@@ -513,7 +518,7 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
float24* dest =
(instr.mad.dest.Value() < 0x10)
- ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
+ ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
: (instr.mad.dest.Value() < 0x20)
? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
@@ -647,9 +652,33 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
-// Explicit instantiation
-template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset);
-template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset);
+void InterpreterEngine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
+ ASSERT(entry_point < 1024);
+ setup.engine_data.entry_point = entry_point;
+void InterpreterEngine::Run(const ShaderSetup& setup, UnitState& state) const {
+ DebugData<false> dummy_debug_data;
+ RunInterpreter(setup, state, dummy_debug_data, setup.engine_data.entry_point);
+DebugData<true> InterpreterEngine::ProduceDebugInfo(const ShaderSetup& setup,
+ const InputVertex& input,
+ int num_attributes) const {
+ UnitState state;
+ DebugData<true> debug_data;
+ // Setup input register table
+ boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
+ state.LoadInputVertex(input, num_attributes);
+ RunInterpreter(setup, state, debug_data, setup.engine_data.entry_point);
+ return debug_data;
} // namespace
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index d31dcd7a6..d6c0e2d8c 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -4,18 +4,28 @@
#pragma once
+#include "video_core/shader/debug_data.h"
+#include "video_core/shader/shader.h"
namespace Pica {
namespace Shader {
-struct UnitState;
-template <bool Debug>
-struct DebugData;
-template <bool Debug>
-void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
- unsigned offset);
+class InterpreterEngine final : public ShaderEngine {
+ void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
+ void Run(const ShaderSetup& setup, UnitState& state) const override;
+ /**
+ * Produce debug information based on the given shader and input vertex
+ * @param input Input vertex into the shader
+ * @param num_attributes The number of vertex shader attributes
+ * @param config Configuration object for the shader pipeline
+ * @return Debug information for this shader with regards to the given vertex
+ */
+ DebugData<true> ProduceDebugInfo(const ShaderSetup& setup, const InputVertex& input,
+ int num_attributes) const;
} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index c588b778b..0ee0dd9ef 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -1,888 +1,48 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <nihstro/shader_bytecode.h>
-#include <smmintrin.h>
-#include <xmmintrin.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/vector_math.h"
-#include "common/x64/cpu_detect.h"
-#include "common/x64/xbyak_abi.h"
-#include "common/x64/xbyak_util.h"
-#include "video_core/pica_state.h"
-#include "video_core/pica_types.h"
+#include "common/hash.h"
+#include "common/microprofile.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_x64.h"
-using namespace Common::X64;
-using namespace Xbyak::util;
-using Xbyak::Label;
-using Xbyak::Reg32;
-using Xbyak::Reg64;
-using Xbyak::Xmm;
+#include "video_core/shader/shader_jit_x64_compiler.h"
namespace Pica {
namespace Shader {
-typedef void (JitShader::*JitFunction)(Instruction instr);
-const JitFunction instr_table[64] = {
- &JitShader::Compile_ADD, // add
- &JitShader::Compile_DP3, // dp3
- &JitShader::Compile_DP4, // dp4
- &JitShader::Compile_DPH, // dph
- nullptr, // unknown
- &JitShader::Compile_EX2, // ex2
- &JitShader::Compile_LG2, // lg2
- nullptr, // unknown
- &JitShader::Compile_MUL, // mul
- &JitShader::Compile_SGE, // sge
- &JitShader::Compile_SLT, // slt
- &JitShader::Compile_FLR, // flr
- &JitShader::Compile_MAX, // max
- &JitShader::Compile_MIN, // min
- &JitShader::Compile_RCP, // rcp
- &JitShader::Compile_RSQ, // rsq
- nullptr, // unknown
- nullptr, // unknown
- &JitShader::Compile_MOVA, // mova
- &JitShader::Compile_MOV, // mov
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- &JitShader::Compile_DPH, // dphi
- nullptr, // unknown
- &JitShader::Compile_SGE, // sgei
- &JitShader::Compile_SLT, // slti
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- nullptr, // unknown
- &JitShader::Compile_NOP, // nop
- &JitShader::Compile_END, // end
- nullptr, // break
- &JitShader::Compile_CALL, // call
- &JitShader::Compile_CALLC, // callc
- &JitShader::Compile_CALLU, // callu
- &JitShader::Compile_IF, // ifu
- &JitShader::Compile_IF, // ifc
- &JitShader::Compile_LOOP, // loop
- nullptr, // emit
- nullptr, // sete
- &JitShader::Compile_JMP, // jmpc
- &JitShader::Compile_JMP, // jmpu
- &JitShader::Compile_CMP, // cmp
- &JitShader::Compile_CMP, // cmp
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // madi
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
- &JitShader::Compile_MAD, // mad
-// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
-// be used as scratch registers within a compiler function. The other registers have designated
-// purposes, as documented below:
+JitX64Engine::JitX64Engine() = default;
+JitX64Engine::~JitX64Engine() = default;
-/// Pointer to the uniform memory
-static const Reg64 SETUP = r9;
-/// The two 32-bit VS address offset registers set by the MOVA instruction
-static const Reg64 ADDROFFS_REG_0 = r10;
-static const Reg64 ADDROFFS_REG_1 = r11;
-/// VS loop count register (Multiplied by 16)
-static const Reg32 LOOPCOUNT_REG = r12d;
-/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
-static const Reg32 LOOPCOUNT = esi;
-/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
-static const Reg32 LOOPINC = edi;
-/// Result of the previous CMP instruction for the X-component comparison
-static const Reg64 COND0 = r13;
-/// Result of the previous CMP instruction for the Y-component comparison
-static const Reg64 COND1 = r14;
-/// Pointer to the UnitState instance for the current VS unit
-static const Reg64 STATE = r15;
-/// SIMD scratch register
-static const Xmm SCRATCH = xmm0;
-/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
-static const Xmm SRC1 = xmm1;
-/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
-static const Xmm SRC2 = xmm2;
-/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
-static const Xmm SRC3 = xmm3;
-/// Additional scratch register
-static const Xmm SCRATCH2 = xmm4;
-/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
-static const Xmm ONE = xmm14;
-/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
-static const Xmm NEGBIT = xmm15;
+void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
+ ASSERT(entry_point < 1024);
+ setup.engine_data.entry_point = entry_point;
-// State registers that must not be modified by external functions calls
-// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
-static const BitSet32 persistent_regs = BuildRegSet({
- // Pointers to register blocks
- // Cached registers
- // Constants
+ u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code));
+ u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data));
-/// Raw constant for the source register selector that indicates no swizzling is performed
-static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
-/// Raw constant for the destination register enable mask that indicates all components are enabled
-static const u8 NO_DEST_REG_MASK = 0xf;
- * Get the vertex shader instruction for a given offset in the current shader program
- * @param offset Offset in the current shader program of the instruction
- * @return Instruction at the specified offset
- */
-static Instruction GetVertexShaderInstruction(size_t offset) {
- return {g_state.vs.program_code[offset]};
-static void LogCritical(const char* msg) {
- LOG_CRITICAL(HW_GPU, "%s", msg);
-void JitShader::Compile_Assert(bool condition, const char* msg) {
- if (!condition) {
- mov(ABI_PARAM1, reinterpret_cast<size_t>(msg));
- CallFarFunction(*this, LogCritical);
- }
- * Loads and swizzles a source register into the specified XMM register.
- * @param instr VS instruction, used for determining how to load the source register
- * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
- * @param src_reg SourceRegister object corresponding to the source register to load
- * @param dest Destination XMM register to store the loaded, swizzled source register
- */
-void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
- Xmm dest) {
- Reg64 src_ptr;
- size_t src_offset;
- if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
- src_ptr = SETUP;
- src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
+ u64 cache_key = code_hash ^ swizzle_hash;
+ auto iter = cache.find(cache_key);
+ if (iter != cache.end()) {
+ setup.engine_data.cached_shader = iter->second.get();
} else {
- src_ptr = STATE;
- src_offset = UnitState::InputOffset(src_reg);
- }
- int src_offset_disp = (int)src_offset;
- ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
- unsigned operand_desc_id;
- const bool is_inverted =
- (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
- unsigned address_register_index;
- unsigned offset_src;
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
- instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
- operand_desc_id = instr.mad.operand_desc_id;
- offset_src = is_inverted ? 3 : 2;
- address_register_index = instr.mad.address_register_index;
- } else {
- operand_desc_id = instr.common.operand_desc_id;
- offset_src = is_inverted ? 2 : 1;
- address_register_index = instr.common.address_register_index;
- }
- if (src_num == offset_src && address_register_index != 0) {
- switch (address_register_index) {
- case 1: // address offset 1
- movaps(dest, xword[src_ptr + ADDROFFS_REG_0 + src_offset_disp]);
- break;
- case 2: // address offset 2
- movaps(dest, xword[src_ptr + ADDROFFS_REG_1 + src_offset_disp]);
- break;
- case 3: // address offset 3
- movaps(dest, xword[src_ptr + LOOPCOUNT_REG.cvt64() + src_offset_disp]);
- break;
- default:
- break;
- }
- } else {
- // Load the source
- movaps(dest, xword[src_ptr + src_offset_disp]);
- }
- SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
- // Generate instructions for source register swizzling as needed
- u8 sel = swiz.GetRawSelector(src_num);
- if (sel != NO_SRC_REG_SWIZZLE) {
- // Selector component order needs to be reversed for the SHUFPS instruction
- sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2);
- // Shuffle inputs for swizzle
- shufps(dest, dest, sel);
- }
- // If the source register should be negated, flip the negative bit using XOR
- const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
- if (negate[src_num - 1]) {
- xorps(dest, NEGBIT);
+ auto shader = std::make_unique<JitShader>();
+ shader->Compile(&setup.program_code, &setup.swizzle_data);
+ setup.engine_data.cached_shader = shader.get();
+ cache.emplace_hint(iter, cache_key, std::move(shader));
-void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
- DestRegister dest;
- unsigned operand_desc_id;
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
- instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
- operand_desc_id = instr.mad.operand_desc_id;
- dest = instr.mad.dest.Value();
- } else {
- operand_desc_id = instr.common.operand_desc_id;
- dest = instr.common.dest.Value();
- }
- SwizzlePattern swiz = {g_state.vs.swizzle_data[operand_desc_id]};
- size_t dest_offset_disp = UnitState::OutputOffset(dest);
- // If all components are enabled, write the result to the destination register
- if (swiz.dest_mask == NO_DEST_REG_MASK) {
- // Store dest back to memory
- movaps(xword[STATE + dest_offset_disp], src);
- } else {
- // Not all components are enabled, so mask the result when storing to the destination
- // register...
- movaps(SCRATCH, xword[STATE + dest_offset_disp]);
- if (Common::GetCPUCaps().sse4_1) {
- u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
- ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
- blendps(SCRATCH, src, mask);
- } else {
- movaps(SCRATCH2, src);
- unpckhps(SCRATCH2, SCRATCH); // Unpack X/Y components of source and destination
- unpcklps(SCRATCH, src); // Unpack Z/W components of source and destination
- // Compute selector to selectively copy source components to destination for SHUFPS
- // instruction
- u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
- ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
- ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
- ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
- shufps(SCRATCH, SCRATCH2, sel);
- }
- // Store dest back to memory
- movaps(xword[STATE + dest_offset_disp], SCRATCH);
- }
-void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) {
- movaps(scratch, src1);
- cmpordps(scratch, src2);
- mulps(src1, src2);
- movaps(src2, src1);
- cmpunordps(src2, src2);
+void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const {
+ ASSERT(setup.engine_data.cached_shader != nullptr);
- xorps(scratch, src2);
- andps(src1, scratch);
-void JitShader::Compile_EvaluateCondition(Instruction instr) {
- // Note: NXOR is used below to check for equality
- switch (instr.flow_control.op) {
- case Instruction::FlowControlType::Or:
- mov(eax, COND0);
- mov(ebx, COND1);
- xor(eax, (instr.flow_control.refx.Value() ^ 1));
- xor(ebx, (instr.flow_control.refy.Value() ^ 1));
- or (eax, ebx);
- break;
- case Instruction::FlowControlType::And:
- mov(eax, COND0);
- mov(ebx, COND1);
- xor(eax, (instr.flow_control.refx.Value() ^ 1));
- xor(ebx, (instr.flow_control.refy.Value() ^ 1));
- and(eax, ebx);
- break;
- case Instruction::FlowControlType::JustX:
- mov(eax, COND0);
- xor(eax, (instr.flow_control.refx.Value() ^ 1));
- break;
- case Instruction::FlowControlType::JustY:
- mov(eax, COND1);
- xor(eax, (instr.flow_control.refy.Value() ^ 1));
- break;
- }
-void JitShader::Compile_UniformCondition(Instruction instr) {
- size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
- cmp(byte[SETUP + offset], 0);
+ const JitShader* shader = static_cast<const JitShader*>(setup.engine_data.cached_shader);
+ shader->Run(setup, state, setup.engine_data.entry_point);
-BitSet32 JitShader::PersistentCallerSavedRegs() {
- return persistent_regs & ABI_ALL_CALLER_SAVED;
-void JitShader::Compile_ADD(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- addps(SRC1, SRC2);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_DP3(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- movaps(SRC2, SRC1);
- shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1));
- movaps(SRC3, SRC1);
- shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2));
- shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
- addps(SRC1, SRC2);
- addps(SRC1, SRC3);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_DP4(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- movaps(SRC2, SRC1);
- shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
- addps(SRC1, SRC2);
- movaps(SRC2, SRC1);
- shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
- addps(SRC1, SRC2);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_DPH(Instruction instr) {
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
- } else {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- }
- if (Common::GetCPUCaps().sse4_1) {
- // Set 4th component to 1.0
- blendps(SRC1, ONE, 0b1000);
- } else {
- // Set 4th component to 1.0
- movaps(SCRATCH, SRC1);
- unpckhps(SCRATCH, ONE); // XYZW, 1111 -> Z1__
- unpcklpd(SRC1, SCRATCH); // XYZW, Z1__ -> XYZ1
- }
- Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- movaps(SRC2, SRC1);
- shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
- addps(SRC1, SRC2);
- movaps(SRC2, SRC1);
- shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
- addps(SRC1, SRC2);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_EX2(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- movss(xmm0, SRC1); // ABI_PARAM1
- ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
- CallFarFunction(*this, exp2f);
- ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
- shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
- movaps(SRC1, xmm0);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_LG2(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- movss(xmm0, SRC1); // ABI_PARAM1
- ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
- CallFarFunction(*this, log2f);
- ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
- shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
- movaps(SRC1, xmm0);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_MUL(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_SGE(Instruction instr) {
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
- } else {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- }
- cmpleps(SRC2, SRC1);
- andps(SRC2, ONE);
- Compile_DestEnable(instr, SRC2);
-void JitShader::Compile_SLT(Instruction instr) {
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
- } else {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- }
- cmpltps(SRC1, SRC2);
- andps(SRC1, ONE);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_FLR(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- if (Common::GetCPUCaps().sse4_1) {
- roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
- } else {
- cvttps2dq(SRC1, SRC1);
- cvtdq2ps(SRC1, SRC1);
- }
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_MAX(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
- maxps(SRC1, SRC2);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_MIN(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
- minps(SRC1, SRC2);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_MOVA(Instruction instr) {
- SwizzlePattern swiz = {g_state.vs.swizzle_data[instr.common.operand_desc_id]};
- if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
- return; // NoOp
- }
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // Convert floats to integers using truncation (only care about X and Y components)
- cvttps2dq(SRC1, SRC1);
- // Get result
- movq(rax, SRC1);
- // Handle destination enable
- if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) {
- // Move and sign-extend low 32 bits
- movsxd(ADDROFFS_REG_0, eax);
- // Move and sign-extend high 32 bits
- shr(rax, 32);
- movsxd(ADDROFFS_REG_1, eax);
- // Multiply by 16 to be used as an offset later
- shl(ADDROFFS_REG_0, 4);
- shl(ADDROFFS_REG_1, 4);
- } else {
- if (swiz.DestComponentEnabled(0)) {
- // Move and sign-extend low 32 bits
- movsxd(ADDROFFS_REG_0, eax);
- // Multiply by 16 to be used as an offset later
- shl(ADDROFFS_REG_0, 4);
- } else if (swiz.DestComponentEnabled(1)) {
- // Move and sign-extend high 32 bits
- shr(rax, 32);
- movsxd(ADDROFFS_REG_1, eax);
- // Multiply by 16 to be used as an offset later
- shl(ADDROFFS_REG_1, 4);
- }
- }
-void JitShader::Compile_MOV(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_RCP(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
- // performs this operation more accurately. This should be checked on hardware.
- rcpss(SRC1, SRC1);
- shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_RSQ(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
- // performs this operation more accurately. This should be checked on hardware.
- rsqrtss(SRC1, SRC1);
- shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_NOP(Instruction instr) {}
-void JitShader::Compile_END(Instruction instr) {
- ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8);
- ret();
-void JitShader::Compile_CALL(Instruction instr) {
- // Push offset of the return
- push(qword, (instr.flow_control.dest_offset + instr.flow_control.num_instructions));
- // Call the subroutine
- call(instruction_labels[instr.flow_control.dest_offset]);
- // Skip over the return offset that's on the stack
- add(rsp, 8);
-void JitShader::Compile_CALLC(Instruction instr) {
- Compile_EvaluateCondition(instr);
- Label b;
- jz(b);
- Compile_CALL(instr);
- L(b);
-void JitShader::Compile_CALLU(Instruction instr) {
- Compile_UniformCondition(instr);
- Label b;
- jz(b);
- Compile_CALL(instr);
- L(b);
-void JitShader::Compile_CMP(Instruction instr) {
- using Op = Instruction::Common::CompareOpType::Op;
- Op op_x = instr.common.compare_op.x;
- Op op_y = instr.common.compare_op.y;
- Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
- Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
- // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
- // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
- // because they don't match when used with NaNs.
- static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
- bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
- Xmm lhs_x = invert_op_x ? SRC2 : SRC1;
- Xmm rhs_x = invert_op_x ? SRC1 : SRC2;
- if (op_x == op_y) {
- // Compare X-component and Y-component together
- cmpps(lhs_x, rhs_x, cmp[op_x]);
- movq(COND0, lhs_x);
- mov(COND1, COND0);
- } else {
- bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual);
- Xmm lhs_y = invert_op_y ? SRC2 : SRC1;
- Xmm rhs_y = invert_op_y ? SRC1 : SRC2;
- // Compare X-component
- movaps(SCRATCH, lhs_x);
- cmpss(SCRATCH, rhs_x, cmp[op_x]);
- // Compare Y-component
- cmpps(lhs_y, rhs_y, cmp[op_y]);
- movq(COND0, SCRATCH);
- movq(COND1, lhs_y);
- }
- shr(COND0.cvt32(), 31); // ignores upper 32 bits in source
- shr(COND1, 63);
-void JitShader::Compile_MAD(Instruction instr) {
- Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
- Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2);
- Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3);
- } else {
- Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2);
- Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
- }
- Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
- addps(SRC1, SRC3);
- Compile_DestEnable(instr, SRC1);
-void JitShader::Compile_IF(Instruction instr) {
- Compile_Assert(instr.flow_control.dest_offset >= program_counter,
- "Backwards if-statements not supported");
- Label l_else, l_endif;
- // Evaluate the "IF" condition
- if (instr.opcode.Value() == OpCode::Id::IFU) {
- Compile_UniformCondition(instr);
- } else if (instr.opcode.Value() == OpCode::Id::IFC) {
- Compile_EvaluateCondition(instr);
- }
- jz(l_else, T_NEAR);
- // Compile the code that corresponds to the condition evaluating as true
- Compile_Block(instr.flow_control.dest_offset);
- // If there isn't an "ELSE" condition, we are done here
- if (instr.flow_control.num_instructions == 0) {
- L(l_else);
- return;
- }
- jmp(l_endif, T_NEAR);
- L(l_else);
- // This code corresponds to the "ELSE" condition
- // Comple the code that corresponds to the condition evaluating as false
- Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
- L(l_endif);
-void JitShader::Compile_LOOP(Instruction instr) {
- Compile_Assert(instr.flow_control.dest_offset >= program_counter,
- "Backwards loops not supported");
- Compile_Assert(!looping, "Nested loops not supported");
- looping = true;
- // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
- // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
- // 4 bits) to be used as an offset into the 16-byte vector registers later
- size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
- mov(LOOPCOUNT, dword[SETUP + offset]);
- shr(LOOPCOUNT_REG, 4);
- and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
- shr(LOOPINC, 12);
- and(LOOPINC, 0xFF0); // Z-component is the incrementer
- movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
- add(LOOPCOUNT, 1); // Iteration count is X-component + 1
- Label l_loop_start;
- L(l_loop_start);
- Compile_Block(instr.flow_control.dest_offset + 1);
- add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
- sub(LOOPCOUNT, 1); // Increment loop count by 1
- jnz(l_loop_start); // Loop if not equal
- looping = false;
-void JitShader::Compile_JMP(Instruction instr) {
- if (instr.opcode.Value() == OpCode::Id::JMPC)
- Compile_EvaluateCondition(instr);
- else if (instr.opcode.Value() == OpCode::Id::JMPU)
- Compile_UniformCondition(instr);
- else
- bool inverted_condition =
- (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
- Label& b = instruction_labels[instr.flow_control.dest_offset];
- if (inverted_condition) {
- jz(b, T_NEAR);
- } else {
- jnz(b, T_NEAR);
- }
-void JitShader::Compile_Block(unsigned end) {
- while (program_counter < end) {
- Compile_NextInstr();
- }
-void JitShader::Compile_Return() {
- // Peek return offset on the stack and check if we're at that offset
- mov(rax, qword[rsp + 8]);
- cmp(eax, (program_counter));
- // If so, jump back to before CALL
- Label b;
- jnz(b);
- ret();
- L(b);
-void JitShader::Compile_NextInstr() {
- if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
- Compile_Return();
- }
- L(instruction_labels[program_counter]);
- Instruction instr = GetVertexShaderInstruction(program_counter++);
- OpCode::Id opcode = instr.opcode.Value();
- auto instr_func = instr_table[static_cast<unsigned>(opcode)];
- if (instr_func) {
- // JIT the instruction!
- ((*this).*instr_func)(instr);
- } else {
- // Unhandled instruction
- LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
- instr.opcode.Value().EffectiveOpCode(), instr.hex);
- }
-void JitShader::FindReturnOffsets() {
- return_offsets.clear();
- for (size_t offset = 0; offset < g_state.vs.program_code.size(); ++offset) {
- Instruction instr = GetVertexShaderInstruction(offset);
- switch (instr.opcode.Value()) {
- case OpCode::Id::CALL:
- case OpCode::Id::CALLC:
- case OpCode::Id::CALLU:
- return_offsets.push_back(instr.flow_control.dest_offset +
- instr.flow_control.num_instructions);
- break;
- default:
- break;
- }
- }
- // Sort for efficient binary search later
- std::sort(return_offsets.begin(), return_offsets.end());
-void JitShader::Compile() {
- // Reset flow control state
- program = (CompiledShader*)getCurr();
- program_counter = 0;
- looping = false;
- instruction_labels.fill(Xbyak::Label());
- // Find all `CALL` instructions and identify return locations
- FindReturnOffsets();
- // The stack pointer is 8 modulo 16 at the entry of a procedure
- ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8);
- // Zero address/loop registers
- xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
- xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
- // Used to set a register to one
- static const __m128 one = {1.f, 1.f, 1.f, 1.f};
- mov(rax, reinterpret_cast<size_t>(&one));
- movaps(ONE, xword[rax]);
- // Used to negate registers
- static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
- mov(rax, reinterpret_cast<size_t>(&neg));
- movaps(NEGBIT, xword[rax]);
- // Jump to start of the shader program
- jmp(ABI_PARAM3);
- // Compile entire program
- Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
- // Free memory that's no longer needed
- return_offsets.clear();
- return_offsets.shrink_to_fit();
- ready();
- uintptr_t size = reinterpret_cast<uintptr_t>(getCurr()) - reinterpret_cast<uintptr_t>(program);
- ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
- LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size);
-JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
} // namespace Shader
} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index f37548306..078b2cba5 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -1,121 +1,30 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
-#include <array>
-#include <cstddef>
-#include <utility>
-#include <vector>
-#include <nihstro/shader_bytecode.h>
-#include <xbyak.h>
-#include "common/bit_set.h"
+#include <memory>
+#include <unordered_map>
#include "common/common_types.h"
-#include "common/x64/emitter.h"
#include "video_core/shader/shader.h"
-using nihstro::Instruction;
-using nihstro::OpCode;
-using nihstro::SwizzlePattern;
namespace Pica {
namespace Shader {
-/// Memory allocated for each compiled shader (64Kb)
-constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
+class JitShader;
- * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
- * code that can be executed on the host machine directly.
- */
-class JitShader : public Xbyak::CodeGenerator {
+class JitX64Engine final : public ShaderEngine {
- JitShader();
- void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
- program(&setup, &state, instruction_labels[offset].getAddress());
- }
- void Compile();
+ JitX64Engine();
+ ~JitX64Engine() override;
- void Compile_ADD(Instruction instr);
- void Compile_DP3(Instruction instr);
- void Compile_DP4(Instruction instr);
- void Compile_DPH(Instruction instr);
- void Compile_EX2(Instruction instr);
- void Compile_LG2(Instruction instr);
- void Compile_MUL(Instruction instr);
- void Compile_SGE(Instruction instr);
- void Compile_SLT(Instruction instr);
- void Compile_FLR(Instruction instr);
- void Compile_MAX(Instruction instr);
- void Compile_MIN(Instruction instr);
- void Compile_RCP(Instruction instr);
- void Compile_RSQ(Instruction instr);
- void Compile_MOVA(Instruction instr);
- void Compile_MOV(Instruction instr);
- void Compile_NOP(Instruction instr);
- void Compile_END(Instruction instr);
- void Compile_CALL(Instruction instr);
- void Compile_CALLC(Instruction instr);
- void Compile_CALLU(Instruction instr);
- void Compile_IF(Instruction instr);
- void Compile_LOOP(Instruction instr);
- void Compile_JMP(Instruction instr);
- void Compile_CMP(Instruction instr);
- void Compile_MAD(Instruction instr);
+ void SetupBatch(ShaderSetup& setup, unsigned int entry_point) override;
+ void Run(const ShaderSetup& setup, UnitState& state) const override;
- void Compile_Block(unsigned end);
- void Compile_NextInstr();
- void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
- Xbyak::Xmm dest);
- void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
- /**
- * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
- * zero by inf. Clobbers `src2` and `scratch`.
- */
- void Compile_SanitizedMul(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
- void Compile_EvaluateCondition(Instruction instr);
- void Compile_UniformCondition(Instruction instr);
- /**
- * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
- */
- void Compile_Return();
- BitSet32 PersistentCallerSavedRegs();
- /**
- * Assertion evaluated at compile-time, but only triggered if executed at runtime.
- * @param msg Message to be logged if the assertion fails.
- */
- void Compile_Assert(bool condition, const char* msg);
- /**
- * Analyzes the entire shader program for `CALL` instructions before emitting any code,
- * identifying the locations where a return needs to be inserted.
- */
- void FindReturnOffsets();
- /// Mapping of Pica VS instructions to pointers in the emitted code
- std::array<Xbyak::Label, 1024> instruction_labels;
- /// Offsets in code where a return needs to be inserted
- std::vector<unsigned> return_offsets;
- unsigned program_counter = 0; ///< Offset of the next instruction to decode
- bool looping = false; ///< True if compiling a loop, used to check for nested loops
- using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
- CompiledShader* program = nullptr;
+ std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
-} // Shader
-} // Pica
+} // namespace Shader
+} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
new file mode 100644
index 000000000..49806e8c9
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -0,0 +1,884 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <nihstro/shader_bytecode.h>
+#include <smmintrin.h>
+#include <xmmintrin.h>
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/vector_math.h"
+#include "common/x64/cpu_detect.h"
+#include "common/x64/xbyak_abi.h"
+#include "common/x64/xbyak_util.h"
+#include "video_core/pica_state.h"
+#include "video_core/pica_types.h"
+#include "video_core/shader/shader.h"
+#include "video_core/shader/shader_jit_x64_compiler.h"
+using namespace Common::X64;
+using namespace Xbyak::util;
+using Xbyak::Label;
+using Xbyak::Reg32;
+using Xbyak::Reg64;
+using Xbyak::Xmm;
+namespace Pica {
+namespace Shader {
+typedef void (JitShader::*JitFunction)(Instruction instr);
+const JitFunction instr_table[64] = {
+ &JitShader::Compile_ADD, // add
+ &JitShader::Compile_DP3, // dp3
+ &JitShader::Compile_DP4, // dp4
+ &JitShader::Compile_DPH, // dph
+ nullptr, // unknown
+ &JitShader::Compile_EX2, // ex2
+ &JitShader::Compile_LG2, // lg2
+ nullptr, // unknown
+ &JitShader::Compile_MUL, // mul
+ &JitShader::Compile_SGE, // sge
+ &JitShader::Compile_SLT, // slt
+ &JitShader::Compile_FLR, // flr
+ &JitShader::Compile_MAX, // max
+ &JitShader::Compile_MIN, // min
+ &JitShader::Compile_RCP, // rcp
+ &JitShader::Compile_RSQ, // rsq
+ nullptr, // unknown
+ nullptr, // unknown
+ &JitShader::Compile_MOVA, // mova
+ &JitShader::Compile_MOV, // mov
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ &JitShader::Compile_DPH, // dphi
+ nullptr, // unknown
+ &JitShader::Compile_SGE, // sgei
+ &JitShader::Compile_SLT, // slti
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ nullptr, // unknown
+ &JitShader::Compile_NOP, // nop
+ &JitShader::Compile_END, // end
+ nullptr, // break
+ &JitShader::Compile_CALL, // call
+ &JitShader::Compile_CALLC, // callc
+ &JitShader::Compile_CALLU, // callu
+ &JitShader::Compile_IF, // ifu
+ &JitShader::Compile_IF, // ifc
+ &JitShader::Compile_LOOP, // loop
+ nullptr, // emit
+ nullptr, // sete
+ &JitShader::Compile_JMP, // jmpc
+ &JitShader::Compile_JMP, // jmpu
+ &JitShader::Compile_CMP, // cmp
+ &JitShader::Compile_CMP, // cmp
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // madi
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+ &JitShader::Compile_MAD, // mad
+// The following is used to alias some commonly used registers. Generally, RAX-RDX and XMM0-XMM3 can
+// be used as scratch registers within a compiler function. The other registers have designated
+// purposes, as documented below:
+/// Pointer to the uniform memory
+static const Reg64 SETUP = r9;
+/// The two 32-bit VS address offset registers set by the MOVA instruction
+static const Reg64 ADDROFFS_REG_0 = r10;
+static const Reg64 ADDROFFS_REG_1 = r11;
+/// VS loop count register (Multiplied by 16)
+static const Reg32 LOOPCOUNT_REG = r12d;
+/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
+static const Reg32 LOOPCOUNT = esi;
+/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
+static const Reg32 LOOPINC = edi;
+/// Result of the previous CMP instruction for the X-component comparison
+static const Reg64 COND0 = r13;
+/// Result of the previous CMP instruction for the Y-component comparison
+static const Reg64 COND1 = r14;
+/// Pointer to the UnitState instance for the current VS unit
+static const Reg64 STATE = r15;
+/// SIMD scratch register
+static const Xmm SCRATCH = xmm0;
+/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
+static const Xmm SRC1 = xmm1;
+/// Loaded with the second swizzled source register, otherwise can be used as a scratch register
+static const Xmm SRC2 = xmm2;
+/// Loaded with the third swizzled source register, otherwise can be used as a scratch register
+static const Xmm SRC3 = xmm3;
+/// Additional scratch register
+static const Xmm SCRATCH2 = xmm4;
+/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
+static const Xmm ONE = xmm14;
+/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
+static const Xmm NEGBIT = xmm15;
+// State registers that must not be modified by external functions calls
+// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
+static const BitSet32 persistent_regs = BuildRegSet({
+ // Pointers to register blocks
+ // Cached registers
+ // Constants
+/// Raw constant for the source register selector that indicates no swizzling is performed
+static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
+/// Raw constant for the destination register enable mask that indicates all components are enabled
+static const u8 NO_DEST_REG_MASK = 0xf;
+static void LogCritical(const char* msg) {
+ LOG_CRITICAL(HW_GPU, "%s", msg);
+void JitShader::Compile_Assert(bool condition, const char* msg) {
+ if (!condition) {
+ mov(ABI_PARAM1, reinterpret_cast<size_t>(msg));
+ CallFarFunction(*this, LogCritical);
+ }
+ * Loads and swizzles a source register into the specified XMM register.
+ * @param instr VS instruction, used for determining how to load the source register
+ * @param src_num Number indicating which source register to load (1 = src1, 2 = src2, 3 = src3)
+ * @param src_reg SourceRegister object corresponding to the source register to load
+ * @param dest Destination XMM register to store the loaded, swizzled source register
+ */
+void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
+ Xmm dest) {
+ Reg64 src_ptr;
+ size_t src_offset;
+ if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
+ src_ptr = SETUP;
+ src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
+ } else {
+ src_ptr = STATE;
+ src_offset = UnitState::InputOffset(src_reg);
+ }
+ int src_offset_disp = (int)src_offset;
+ ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
+ unsigned operand_desc_id;
+ const bool is_inverted =
+ (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+ unsigned address_register_index;
+ unsigned offset_src;
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
+ instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
+ operand_desc_id = instr.mad.operand_desc_id;
+ offset_src = is_inverted ? 3 : 2;
+ address_register_index = instr.mad.address_register_index;
+ } else {
+ operand_desc_id = instr.common.operand_desc_id;
+ offset_src = is_inverted ? 2 : 1;
+ address_register_index = instr.common.address_register_index;
+ }
+ if (src_num == offset_src && address_register_index != 0) {
+ switch (address_register_index) {
+ case 1: // address offset 1
+ movaps(dest, xword[src_ptr + ADDROFFS_REG_0 + src_offset_disp]);
+ break;
+ case 2: // address offset 2
+ movaps(dest, xword[src_ptr + ADDROFFS_REG_1 + src_offset_disp]);
+ break;
+ case 3: // address offset 3
+ movaps(dest, xword[src_ptr + LOOPCOUNT_REG.cvt64() + src_offset_disp]);
+ break;
+ default:
+ break;
+ }
+ } else {
+ // Load the source
+ movaps(dest, xword[src_ptr + src_offset_disp]);
+ }
+ SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]};
+ // Generate instructions for source register swizzling as needed
+ u8 sel = swiz.GetRawSelector(src_num);
+ if (sel != NO_SRC_REG_SWIZZLE) {
+ // Selector component order needs to be reversed for the SHUFPS instruction
+ sel = ((sel & 0xc0) >> 6) | ((sel & 3) << 6) | ((sel & 0xc) << 2) | ((sel & 0x30) >> 2);
+ // Shuffle inputs for swizzle
+ shufps(dest, dest, sel);
+ }
+ // If the source register should be negated, flip the negative bit using XOR
+ const bool negate[] = {swiz.negate_src1, swiz.negate_src2, swiz.negate_src3};
+ if (negate[src_num - 1]) {
+ xorps(dest, NEGBIT);
+ }
+void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
+ DestRegister dest;
+ unsigned operand_desc_id;
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
+ instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
+ operand_desc_id = instr.mad.operand_desc_id;
+ dest = instr.mad.dest.Value();
+ } else {
+ operand_desc_id = instr.common.operand_desc_id;
+ dest = instr.common.dest.Value();
+ }
+ SwizzlePattern swiz = {(*swizzle_data)[operand_desc_id]};
+ size_t dest_offset_disp = UnitState::OutputOffset(dest);
+ // If all components are enabled, write the result to the destination register
+ if (swiz.dest_mask == NO_DEST_REG_MASK) {
+ // Store dest back to memory
+ movaps(xword[STATE + dest_offset_disp], src);
+ } else {
+ // Not all components are enabled, so mask the result when storing to the destination
+ // register...
+ movaps(SCRATCH, xword[STATE + dest_offset_disp]);
+ if (Common::GetCPUCaps().sse4_1) {
+ u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
+ ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
+ blendps(SCRATCH, src, mask);
+ } else {
+ movaps(SCRATCH2, src);
+ unpckhps(SCRATCH2, SCRATCH); // Unpack X/Y components of source and destination
+ unpcklps(SCRATCH, src); // Unpack Z/W components of source and destination
+ // Compute selector to selectively copy source components to destination for SHUFPS
+ // instruction
+ u8 sel = ((swiz.DestComponentEnabled(0) ? 1 : 0) << 0) |
+ ((swiz.DestComponentEnabled(1) ? 3 : 2) << 2) |
+ ((swiz.DestComponentEnabled(2) ? 0 : 1) << 4) |
+ ((swiz.DestComponentEnabled(3) ? 2 : 3) << 6);
+ shufps(SCRATCH, SCRATCH2, sel);
+ }
+ // Store dest back to memory
+ movaps(xword[STATE + dest_offset_disp], SCRATCH);
+ }
+void JitShader::Compile_SanitizedMul(Xmm src1, Xmm src2, Xmm scratch) {
+ movaps(scratch, src1);
+ cmpordps(scratch, src2);
+ mulps(src1, src2);
+ movaps(src2, src1);
+ cmpunordps(src2, src2);
+ xorps(scratch, src2);
+ andps(src1, scratch);
+void JitShader::Compile_EvaluateCondition(Instruction instr) {
+ // Note: NXOR is used below to check for equality
+ switch (instr.flow_control.op) {
+ case Instruction::FlowControlType::Or:
+ mov(eax, COND0);
+ mov(ebx, COND1);
+ xor(eax, (instr.flow_control.refx.Value() ^ 1));
+ xor(ebx, (instr.flow_control.refy.Value() ^ 1));
+ or (eax, ebx);
+ break;
+ case Instruction::FlowControlType::And:
+ mov(eax, COND0);
+ mov(ebx, COND1);
+ xor(eax, (instr.flow_control.refx.Value() ^ 1));
+ xor(ebx, (instr.flow_control.refy.Value() ^ 1));
+ and(eax, ebx);
+ break;
+ case Instruction::FlowControlType::JustX:
+ mov(eax, COND0);
+ xor(eax, (instr.flow_control.refx.Value() ^ 1));
+ break;
+ case Instruction::FlowControlType::JustY:
+ mov(eax, COND1);
+ xor(eax, (instr.flow_control.refy.Value() ^ 1));
+ break;
+ }
+void JitShader::Compile_UniformCondition(Instruction instr) {
+ size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
+ cmp(byte[SETUP + offset], 0);
+BitSet32 JitShader::PersistentCallerSavedRegs() {
+ return persistent_regs & ABI_ALL_CALLER_SAVED;
+void JitShader::Compile_ADD(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ addps(SRC1, SRC2);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_DP3(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ movaps(SRC2, SRC1);
+ shufps(SRC2, SRC2, _MM_SHUFFLE(1, 1, 1, 1));
+ movaps(SRC3, SRC1);
+ shufps(SRC3, SRC3, _MM_SHUFFLE(2, 2, 2, 2));
+ shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0));
+ addps(SRC1, SRC2);
+ addps(SRC1, SRC3);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_DP4(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ movaps(SRC2, SRC1);
+ shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ addps(SRC1, SRC2);
+ movaps(SRC2, SRC1);
+ shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ addps(SRC1, SRC2);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_DPH(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::DPHI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+ if (Common::GetCPUCaps().sse4_1) {
+ // Set 4th component to 1.0
+ blendps(SRC1, ONE, 0b1000);
+ } else {
+ // Set 4th component to 1.0
+ movaps(SCRATCH, SRC1);
+ unpckhps(SCRATCH, ONE); // XYZW, 1111 -> Z1__
+ unpcklpd(SRC1, SCRATCH); // XYZW, Z1__ -> XYZ1
+ }
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ movaps(SRC2, SRC1);
+ shufps(SRC1, SRC1, _MM_SHUFFLE(2, 3, 0, 1)); // XYZW -> ZWXY
+ addps(SRC1, SRC2);
+ movaps(SRC2, SRC1);
+ shufps(SRC1, SRC1, _MM_SHUFFLE(0, 1, 2, 3)); // XYZW -> WZYX
+ addps(SRC1, SRC2);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_EX2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ movss(xmm0, SRC1); // ABI_PARAM1
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ CallFarFunction(*this, exp2f);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
+ movaps(SRC1, xmm0);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_LG2(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ movss(xmm0, SRC1); // ABI_PARAM1
+ ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ CallFarFunction(*this, log2f);
+ ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+ shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); // ABI_RETURN
+ movaps(SRC1, xmm0);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_MUL(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_SGE(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SGEI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+ cmpleps(SRC2, SRC1);
+ andps(SRC2, ONE);
+ Compile_DestEnable(instr, SRC2);
+void JitShader::Compile_SLT(Instruction instr) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::SLTI) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1i, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2i, SRC2);
+ } else {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ }
+ cmpltps(SRC1, SRC2);
+ andps(SRC1, ONE);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_FLR(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ if (Common::GetCPUCaps().sse4_1) {
+ roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
+ } else {
+ cvttps2dq(SRC1, SRC1);
+ cvtdq2ps(SRC1, SRC1);
+ }
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_MAX(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
+ maxps(SRC1, SRC2);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_MIN(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE semantics match PICA200 ones: In case of NaN, SRC2 is returned.
+ minps(SRC1, SRC2);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_MOVA(Instruction instr) {
+ SwizzlePattern swiz = {(*swizzle_data)[instr.common.operand_desc_id]};
+ if (!swiz.DestComponentEnabled(0) && !swiz.DestComponentEnabled(1)) {
+ return; // NoOp
+ }
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ // Convert floats to integers using truncation (only care about X and Y components)
+ cvttps2dq(SRC1, SRC1);
+ // Get result
+ movq(rax, SRC1);
+ // Handle destination enable
+ if (swiz.DestComponentEnabled(0) && swiz.DestComponentEnabled(1)) {
+ // Move and sign-extend low 32 bits
+ movsxd(ADDROFFS_REG_0, eax);
+ // Move and sign-extend high 32 bits
+ shr(rax, 32);
+ movsxd(ADDROFFS_REG_1, eax);
+ // Multiply by 16 to be used as an offset later
+ shl(ADDROFFS_REG_0, 4);
+ shl(ADDROFFS_REG_1, 4);
+ } else {
+ if (swiz.DestComponentEnabled(0)) {
+ // Move and sign-extend low 32 bits
+ movsxd(ADDROFFS_REG_0, eax);
+ // Multiply by 16 to be used as an offset later
+ shl(ADDROFFS_REG_0, 4);
+ } else if (swiz.DestComponentEnabled(1)) {
+ // Move and sign-extend high 32 bits
+ shr(rax, 32);
+ movsxd(ADDROFFS_REG_1, eax);
+ // Multiply by 16 to be used as an offset later
+ shl(ADDROFFS_REG_1, 4);
+ }
+ }
+void JitShader::Compile_MOV(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_RCP(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ // TODO(bunnei): RCPSS is a pretty rough approximation, this might cause problems if Pica
+ // performs this operation more accurately. This should be checked on hardware.
+ rcpss(SRC1, SRC1);
+ shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_RSQ(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ // TODO(bunnei): RSQRTSS is a pretty rough approximation, this might cause problems if Pica
+ // performs this operation more accurately. This should be checked on hardware.
+ rsqrtss(SRC1, SRC1);
+ shufps(SRC1, SRC1, _MM_SHUFFLE(0, 0, 0, 0)); // XYWZ -> XXXX
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_NOP(Instruction instr) {}
+void JitShader::Compile_END(Instruction instr) {
+ ABI_PopRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8);
+ ret();
+void JitShader::Compile_CALL(Instruction instr) {
+ // Push offset of the return
+ push(qword, (instr.flow_control.dest_offset + instr.flow_control.num_instructions));
+ // Call the subroutine
+ call(instruction_labels[instr.flow_control.dest_offset]);
+ // Skip over the return offset that's on the stack
+ add(rsp, 8);
+void JitShader::Compile_CALLC(Instruction instr) {
+ Compile_EvaluateCondition(instr);
+ Label b;
+ jz(b);
+ Compile_CALL(instr);
+ L(b);
+void JitShader::Compile_CALLU(Instruction instr) {
+ Compile_UniformCondition(instr);
+ Label b;
+ jz(b);
+ Compile_CALL(instr);
+ L(b);
+void JitShader::Compile_CMP(Instruction instr) {
+ using Op = Instruction::Common::CompareOpType::Op;
+ Op op_x = instr.common.compare_op.x;
+ Op op_y = instr.common.compare_op.y;
+ Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
+ Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
+ // SSE doesn't have greater-than (GT) or greater-equal (GE) comparison operators. You need to
+ // emulate them by swapping the lhs and rhs and using LT and LE. NLT and NLE can't be used here
+ // because they don't match when used with NaNs.
+ static const u8 cmp[] = {CMP_EQ, CMP_NEQ, CMP_LT, CMP_LE, CMP_LT, CMP_LE};
+ bool invert_op_x = (op_x == Op::GreaterThan || op_x == Op::GreaterEqual);
+ Xmm lhs_x = invert_op_x ? SRC2 : SRC1;
+ Xmm rhs_x = invert_op_x ? SRC1 : SRC2;
+ if (op_x == op_y) {
+ // Compare X-component and Y-component together
+ cmpps(lhs_x, rhs_x, cmp[op_x]);
+ movq(COND0, lhs_x);
+ mov(COND1, COND0);
+ } else {
+ bool invert_op_y = (op_y == Op::GreaterThan || op_y == Op::GreaterEqual);
+ Xmm lhs_y = invert_op_y ? SRC2 : SRC1;
+ Xmm rhs_y = invert_op_y ? SRC1 : SRC2;
+ // Compare X-component
+ movaps(SCRATCH, lhs_x);
+ cmpss(SCRATCH, rhs_x, cmp[op_x]);
+ // Compare Y-component
+ cmpps(lhs_y, rhs_y, cmp[op_y]);
+ movq(COND0, SCRATCH);
+ movq(COND1, lhs_y);
+ }
+ shr(COND0.cvt32(), 31); // ignores upper 32 bits in source
+ shr(COND1, 63);
+void JitShader::Compile_MAD(Instruction instr) {
+ Compile_SwizzleSrc(instr, 1, instr.mad.src1, SRC1);
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
+ Compile_SwizzleSrc(instr, 2, instr.mad.src2i, SRC2);
+ Compile_SwizzleSrc(instr, 3, instr.mad.src3i, SRC3);
+ } else {
+ Compile_SwizzleSrc(instr, 2, instr.mad.src2, SRC2);
+ Compile_SwizzleSrc(instr, 3, instr.mad.src3, SRC3);
+ }
+ Compile_SanitizedMul(SRC1, SRC2, SCRATCH);
+ addps(SRC1, SRC3);
+ Compile_DestEnable(instr, SRC1);
+void JitShader::Compile_IF(Instruction instr) {
+ Compile_Assert(instr.flow_control.dest_offset >= program_counter,
+ "Backwards if-statements not supported");
+ Label l_else, l_endif;
+ // Evaluate the "IF" condition
+ if (instr.opcode.Value() == OpCode::Id::IFU) {
+ Compile_UniformCondition(instr);
+ } else if (instr.opcode.Value() == OpCode::Id::IFC) {
+ Compile_EvaluateCondition(instr);
+ }
+ jz(l_else, T_NEAR);
+ // Compile the code that corresponds to the condition evaluating as true
+ Compile_Block(instr.flow_control.dest_offset);
+ // If there isn't an "ELSE" condition, we are done here
+ if (instr.flow_control.num_instructions == 0) {
+ L(l_else);
+ return;
+ }
+ jmp(l_endif, T_NEAR);
+ L(l_else);
+ // This code corresponds to the "ELSE" condition
+ // Comple the code that corresponds to the condition evaluating as false
+ Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
+ L(l_endif);
+void JitShader::Compile_LOOP(Instruction instr) {
+ Compile_Assert(instr.flow_control.dest_offset >= program_counter,
+ "Backwards loops not supported");
+ Compile_Assert(!looping, "Nested loops not supported");
+ looping = true;
+ // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
+ // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
+ // 4 bits) to be used as an offset into the 16-byte vector registers later
+ size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
+ mov(LOOPCOUNT, dword[SETUP + offset]);
+ shr(LOOPCOUNT_REG, 4);
+ and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
+ shr(LOOPINC, 12);
+ and(LOOPINC, 0xFF0); // Z-component is the incrementer
+ movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
+ add(LOOPCOUNT, 1); // Iteration count is X-component + 1
+ Label l_loop_start;
+ L(l_loop_start);
+ Compile_Block(instr.flow_control.dest_offset + 1);
+ add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
+ sub(LOOPCOUNT, 1); // Increment loop count by 1
+ jnz(l_loop_start); // Loop if not equal
+ looping = false;
+void JitShader::Compile_JMP(Instruction instr) {
+ if (instr.opcode.Value() == OpCode::Id::JMPC)
+ Compile_EvaluateCondition(instr);
+ else if (instr.opcode.Value() == OpCode::Id::JMPU)
+ Compile_UniformCondition(instr);
+ else
+ bool inverted_condition =
+ (instr.opcode.Value() == OpCode::Id::JMPU) && (instr.flow_control.num_instructions & 1);
+ Label& b = instruction_labels[instr.flow_control.dest_offset];
+ if (inverted_condition) {
+ jz(b, T_NEAR);
+ } else {
+ jnz(b, T_NEAR);
+ }
+void JitShader::Compile_Block(unsigned end) {
+ while (program_counter < end) {
+ Compile_NextInstr();
+ }
+void JitShader::Compile_Return() {
+ // Peek return offset on the stack and check if we're at that offset
+ mov(rax, qword[rsp + 8]);
+ cmp(eax, (program_counter));
+ // If so, jump back to before CALL
+ Label b;
+ jnz(b);
+ ret();
+ L(b);
+void JitShader::Compile_NextInstr() {
+ if (std::binary_search(return_offsets.begin(), return_offsets.end(), program_counter)) {
+ Compile_Return();
+ }
+ L(instruction_labels[program_counter]);
+ Instruction instr = {(*program_code)[program_counter++]};
+ OpCode::Id opcode = instr.opcode.Value();
+ auto instr_func = instr_table[static_cast<unsigned>(opcode)];
+ if (instr_func) {
+ // JIT the instruction!
+ ((*this).*instr_func)(instr);
+ } else {
+ // Unhandled instruction
+ LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
+ instr.opcode.Value().EffectiveOpCode(), instr.hex);
+ }
+void JitShader::FindReturnOffsets() {
+ return_offsets.clear();
+ for (size_t offset = 0; offset < program_code->size(); ++offset) {
+ Instruction instr = {(*program_code)[offset]};
+ switch (instr.opcode.Value()) {
+ case OpCode::Id::CALL:
+ case OpCode::Id::CALLC:
+ case OpCode::Id::CALLU:
+ return_offsets.push_back(instr.flow_control.dest_offset +
+ instr.flow_control.num_instructions);
+ break;
+ default:
+ break;
+ }
+ }
+ // Sort for efficient binary search later
+ std::sort(return_offsets.begin(), return_offsets.end());
+void JitShader::Compile(const std::array<u32, 1024>* program_code_,
+ const std::array<u32, 1024>* swizzle_data_) {
+ program_code = program_code_;
+ swizzle_data = swizzle_data_;
+ // Reset flow control state
+ program = (CompiledShader*)getCurr();
+ program_counter = 0;
+ looping = false;
+ instruction_labels.fill(Xbyak::Label());
+ // Find all `CALL` instructions and identify return locations
+ FindReturnOffsets();
+ // The stack pointer is 8 modulo 16 at the entry of a procedure
+ ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8);
+ // Zero address/loop registers
+ xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
+ xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
+ // Used to set a register to one
+ static const __m128 one = {1.f, 1.f, 1.f, 1.f};
+ mov(rax, reinterpret_cast<size_t>(&one));
+ movaps(ONE, xword[rax]);
+ // Used to negate registers
+ static const __m128 neg = {-0.f, -0.f, -0.f, -0.f};
+ mov(rax, reinterpret_cast<size_t>(&neg));
+ movaps(NEGBIT, xword[rax]);
+ // Jump to start of the shader program
+ jmp(ABI_PARAM3);
+ // Compile entire program
+ Compile_Block(static_cast<unsigned>(program_code->size()));
+ // Free memory that's no longer needed
+ program_code = nullptr;
+ swizzle_data = nullptr;
+ return_offsets.clear();
+ return_offsets.shrink_to_fit();
+ ready();
+ ASSERT_MSG(getSize() <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
+ LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", getSize());
+JitShader::JitShader() : Xbyak::CodeGenerator(MAX_SHADER_SIZE) {}
+} // namespace Shader
+} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64_compiler.h b/src/video_core/shader/shader_jit_x64_compiler.h
new file mode 100644
index 000000000..29e9875ea
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@@ -0,0 +1,125 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+#pragma once
+#include <array>
+#include <cstddef>
+#include <utility>
+#include <vector>
+#include <nihstro/shader_bytecode.h>
+#include <xbyak.h>
+#include "common/bit_set.h"
+#include "common/common_types.h"
+#include "common/x64/emitter.h"
+#include "video_core/shader/shader.h"
+using nihstro::Instruction;
+using nihstro::OpCode;
+using nihstro::SwizzlePattern;
+namespace Pica {
+namespace Shader {
+/// Memory allocated for each compiled shader (64Kb)
+constexpr size_t MAX_SHADER_SIZE = 1024 * 64;
+ * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
+ * code that can be executed on the host machine directly.
+ */
+class JitShader : public Xbyak::CodeGenerator {
+ JitShader();
+ void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
+ program(&setup, &state, instruction_labels[offset].getAddress());
+ }
+ void Compile(const std::array<u32, 1024>* program_code,
+ const std::array<u32, 1024>* swizzle_data);
+ void Compile_ADD(Instruction instr);
+ void Compile_DP3(Instruction instr);
+ void Compile_DP4(Instruction instr);
+ void Compile_DPH(Instruction instr);
+ void Compile_EX2(Instruction instr);
+ void Compile_LG2(Instruction instr);
+ void Compile_MUL(Instruction instr);
+ void Compile_SGE(Instruction instr);
+ void Compile_SLT(Instruction instr);
+ void Compile_FLR(Instruction instr);
+ void Compile_MAX(Instruction instr);
+ void Compile_MIN(Instruction instr);
+ void Compile_RCP(Instruction instr);
+ void Compile_RSQ(Instruction instr);
+ void Compile_MOVA(Instruction instr);
+ void Compile_MOV(Instruction instr);
+ void Compile_NOP(Instruction instr);
+ void Compile_END(Instruction instr);
+ void Compile_CALL(Instruction instr);
+ void Compile_CALLC(Instruction instr);
+ void Compile_CALLU(Instruction instr);
+ void Compile_IF(Instruction instr);
+ void Compile_LOOP(Instruction instr);
+ void Compile_JMP(Instruction instr);
+ void Compile_CMP(Instruction instr);
+ void Compile_MAD(Instruction instr);
+ void Compile_Block(unsigned end);
+ void Compile_NextInstr();
+ void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg,
+ Xbyak::Xmm dest);
+ void Compile_DestEnable(Instruction instr, Xbyak::Xmm dest);
+ /**
+ * Compiles a `MUL src1, src2` operation, properly handling the PICA semantics when multiplying
+ * zero by inf. Clobbers `src2` and `scratch`.
+ */
+ void Compile_SanitizedMul(Xbyak::Xmm src1, Xbyak::Xmm src2, Xbyak::Xmm scratch);
+ void Compile_EvaluateCondition(Instruction instr);
+ void Compile_UniformCondition(Instruction instr);
+ /**
+ * Emits the code to conditionally return from a subroutine envoked by the `CALL` instruction.
+ */
+ void Compile_Return();
+ BitSet32 PersistentCallerSavedRegs();
+ /**
+ * Assertion evaluated at compile-time, but only triggered if executed at runtime.
+ * @param msg Message to be logged if the assertion fails.
+ */
+ void Compile_Assert(bool condition, const char* msg);
+ /**
+ * Analyzes the entire shader program for `CALL` instructions before emitting any code,
+ * identifying the locations where a return needs to be inserted.
+ */
+ void FindReturnOffsets();
+ const std::array<u32, 1024>* program_code = nullptr;
+ const std::array<u32, 1024>* swizzle_data = nullptr;
+ /// Mapping of Pica VS instructions to pointers in the emitted code
+ std::array<Xbyak::Label, 1024> instruction_labels;
+ /// Offsets in code where a return needs to be inserted
+ std::vector<unsigned> return_offsets;
+ unsigned program_counter = 0; ///< Offset of the next instruction to decode
+ bool looping = false; ///< True if compiling a loop, used to check for nested loops
+ using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
+ CompiledShader* program = nullptr;
+} // Shader
+} // Pica