Skip to content

Commit

Permalink
SIMD auto-vectorization for XTEA (otland#2476)
Browse files Browse the repository at this point in the history
  • Loading branch information
ranisalt authored Jul 9, 2018
1 parent 81eb7fc commit e4dfecd
Show file tree
Hide file tree
Showing 11 changed files with 225 additions and 67 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ include(cotire)

add_compile_options(-Wall -Werror -pipe -fvisibility=hidden)

set(CMAKE_CXX_FLAGS_PERFORMANCE "${CMAKE_CXX_FLAGS_RELEASE} -march=native")

if (CMAKE_COMPILER_IS_GNUCXX)
add_compile_options(-fno-strict-aliasing)
endif()
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,6 @@ set(tfs_SRC
${CMAKE_CURRENT_LIST_DIR}/waitlist.cpp
${CMAKE_CURRENT_LIST_DIR}/weapons.cpp
${CMAKE_CURRENT_LIST_DIR}/wildcardtree.cpp
${CMAKE_CURRENT_LIST_DIR}/xtea.cpp
PARENT_SCOPE)

57 changes: 6 additions & 51 deletions src/protocol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "protocol.h"
#include "outputmessage.h"
#include "rsa.h"
#include "xtea.h"

extern RSA g_RSA;

Expand Down Expand Up @@ -60,37 +61,14 @@ OutputMessage_ptr Protocol::getOutputBuffer(int32_t size)

void Protocol::XTEA_encrypt(OutputMessage& msg) const
{
const uint32_t delta = 0x61C88647;

// The message must be a multiple of 8
size_t paddingBytes = msg.getLength() % 8;
size_t paddingBytes = msg.getLength() % 8u;
if (paddingBytes != 0) {
msg.addPaddingBytes(8 - paddingBytes);
}

uint8_t* buffer = msg.getOutputBuffer();
const size_t messageLength = msg.getLength();
size_t readPos = 0;
const uint32_t k[] = {key[0], key[1], key[2], key[3]};
while (readPos < messageLength) {
uint32_t v0;
memcpy(&v0, buffer + readPos, 4);
uint32_t v1;
memcpy(&v1, buffer + readPos + 4, 4);

uint32_t sum = 0;

for (int32_t i = 32; --i >= 0;) {
v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
sum -= delta;
v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[(sum >> 11) & 3]);
}

memcpy(buffer + readPos, &v0, 4);
readPos += 4;
memcpy(buffer + readPos, &v1, 4);
readPos += 4;
}
xtea::encrypt(buffer, msg.getLength(), key);
}

bool Protocol::XTEA_decrypt(NetworkMessage& msg) const
Expand All @@ -99,34 +77,11 @@ bool Protocol::XTEA_decrypt(NetworkMessage& msg) const
return false;
}

const uint32_t delta = 0x61C88647;

uint8_t* buffer = msg.getBuffer() + msg.getBufferPosition();
const size_t messageLength = (msg.getLength() - 6);
size_t readPos = 0;
const uint32_t k[] = {key[0], key[1], key[2], key[3]};
while (readPos < messageLength) {
uint32_t v0;
memcpy(&v0, buffer + readPos, 4);
uint32_t v1;
memcpy(&v1, buffer + readPos + 4, 4);

uint32_t sum = 0xC6EF3720;

for (int32_t i = 32; --i >= 0;) {
v1 -= ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[(sum >> 11) & 3]);
sum += delta;
v0 -= ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]);
}

memcpy(buffer + readPos, &v0, 4);
readPos += 4;
memcpy(buffer + readPos, &v1, 4);
readPos += 4;
}
xtea::decrypt(buffer, msg.getLength() - 6, key);

int innerLength = msg.get<uint16_t>();
if (innerLength > msg.getLength() - 8) {
uint16_t innerLength = msg.get<uint16_t>();
if (innerLength + 8 > msg.getLength()) {
return false;
}

Expand Down
7 changes: 4 additions & 3 deletions src/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define FS_PROTOCOL_H_D71405071ACF4137A4B1203899DE80E1

#include "connection.h"
#include "xtea.h"

class Protocol : public std::enable_shared_from_this<Protocol>
{
Expand Down Expand Up @@ -71,8 +72,8 @@ class Protocol : public std::enable_shared_from_this<Protocol>
void enableXTEAEncryption() {
encryptionEnabled = true;
}
void setXTEAKey(const uint32_t* key) {
memcpy(this->key, key, sizeof(*key) * 4);
void setXTEAKey(xtea::key key) {
this->key = std::move(key);
}
void disableChecksum() {
checksumEnabled = false;
Expand All @@ -95,7 +96,7 @@ class Protocol : public std::enable_shared_from_this<Protocol>
OutputMessage_ptr outputBuffer;

const ConnectionWeak_ptr connection;
uint32_t key[4] = {};
xtea::key key;
bool encryptionEnabled = false;
bool checksumEnabled = true;
bool rawMessages = false;
Expand Down
4 changes: 2 additions & 2 deletions src/protocolgame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,13 @@ void ProtocolGame::onRecvFirstMessage(NetworkMessage& msg)
return;
}

uint32_t key[4];
xtea::key key;
key[0] = msg.get<uint32_t>();
key[1] = msg.get<uint32_t>();
key[2] = msg.get<uint32_t>();
key[3] = msg.get<uint32_t>();
enableXTEAEncryption();
setXTEAKey(key);
setXTEAKey(std::move(key));

if (operatingSystem >= CLIENTOS_OTCLIENT_LINUX) {
NetworkMessage opcodeMessage;
Expand Down
4 changes: 2 additions & 2 deletions src/protocollogin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,13 @@ void ProtocolLogin::onRecvFirstMessage(NetworkMessage& msg)
return;
}

uint32_t key[4];
xtea::key key;
key[0] = msg.get<uint32_t>();
key[1] = msg.get<uint32_t>();
key[2] = msg.get<uint32_t>();
key[3] = msg.get<uint32_t>();
enableXTEAEncryption();
setXTEAKey(key);
setXTEAKey(std::move(key));

if (version < CLIENT_VERSION_MIN || version > CLIENT_VERSION_MAX) {
std::ostringstream ss;
Expand Down
4 changes: 2 additions & 2 deletions src/protocolold.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ void ProtocolOld::onRecvFirstMessage(NetworkMessage& msg)
return;
}

uint32_t key[4];
xtea::key key;
key[0] = msg.get<uint32_t>();
key[1] = msg.get<uint32_t>();
key[2] = msg.get<uint32_t>();
key[3] = msg.get<uint32_t>();
enableXTEAEncryption();
setXTEAKey(key);
setXTEAKey(std::move(key));

if (version <= 822) {
disableChecksum();
Expand Down
140 changes: 140 additions & 0 deletions src/xtea.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/**
* The Forgotten Server - a free and open-source MMORPG server emulator
* Copyright (C) 2018 Mark Samman <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#include "otpch.h"

#include "xtea.h"

#include <array>
#include <assert.h>

namespace xtea {

namespace {

constexpr uint32_t delta = 0x9E3779B9;

template<size_t BLOCK_SIZE>
void XTEA_encrypt(uint8_t data[BLOCK_SIZE * 8], const key& k)
{
alignas(16) uint32_t left[BLOCK_SIZE], right[BLOCK_SIZE];
for (auto i = 0u, j = 0u; i < BLOCK_SIZE; i += 1u, j += 8u) {
left[i] = data[j] | data[j+1] << 8u | data[j+2] << 16u | data[j+3] << 24u;
right[i] = data[j+4] | data[j+5] << 8u | data[j+6] << 16u | data[j+7] << 24u;
}

uint32_t sum = 0u;
for (auto i = 0u; i < 32; ++i) {
for (auto j = 0u; j < BLOCK_SIZE; ++j) {
left[j] += (((right[j] << 4) ^ (right[j] >> 5)) + right[j]) ^ (sum + k[sum & 3]);
}
sum += delta;
for (auto j = 0u; j < BLOCK_SIZE; ++j) {
right[j] += (((left[j] << 4) ^ (left[j] >> 5)) + left[j]) ^ (sum + k[(sum >> 11) & 3]);
}
}

for (auto i = 0u, j = 0u; i < BLOCK_SIZE; i += 1u, j += 8u) {
data[j] = static_cast<uint8_t>(left[i]);
data[j+1] = static_cast<uint8_t>(left[i] >> 8u);
data[j+2] = static_cast<uint8_t>(left[i] >> 16u);
data[j+3] = static_cast<uint8_t>(left[i] >> 24u);
data[j+4] = static_cast<uint8_t>(right[i]);
data[j+5] = static_cast<uint8_t>(right[i] >> 8u);
data[j+6] = static_cast<uint8_t>(right[i] >> 16u);
data[j+7] = static_cast<uint8_t>(right[i] >> 24u);
}
}

template<size_t BLOCK_SIZE>
void XTEA_decrypt(uint8_t data[BLOCK_SIZE * 8], const key& k)
{
alignas(16) uint32_t left[BLOCK_SIZE], right[BLOCK_SIZE];
for (auto i = 0u, j = 0u; i < BLOCK_SIZE; i += 1u, j += 8u) {
left[i] = data[j] | data[j+1] << 8u | data[j+2] << 16u | data[j+3] << 24u;
right[i] = data[j+4] | data[j+5] << 8u | data[j+6] << 16u | data[j+7] << 24u;
}

uint32_t sum = delta << 5;
for (auto i = 0u; i < 32; ++i) {
for (auto j = 0u; j < BLOCK_SIZE; ++j) {
right[j] -= (((left[j] << 4) ^ (left[j] >> 5)) + left[j]) ^ (sum + k[(sum >> 11) & 3]);
}
sum -= delta;
for (auto j = 0u; j < BLOCK_SIZE; ++j) {
left[j] -= (((right[j] << 4) ^ (right[j] >> 5)) + right[j]) ^ (sum + k[(sum) & 3]);
}
}

for (auto i = 0u, j = 0u; i < BLOCK_SIZE; i += 1u, j += 8u) {
data[j] = static_cast<uint8_t>(left[i]);
data[j+1] = static_cast<uint8_t>(left[i] >> 8u);
data[j+2] = static_cast<uint8_t>(left[i] >> 16u);
data[j+3] = static_cast<uint8_t>(left[i] >> 24u);
data[j+4] = static_cast<uint8_t>(right[i]);
data[j+5] = static_cast<uint8_t>(right[i] >> 8u);
data[j+6] = static_cast<uint8_t>(right[i] >> 16u);
data[j+7] = static_cast<uint8_t>(right[i] >> 24u);
}
}

constexpr auto InitialBlockSize =
#if defined(__AVX512F__)
128u;
#elif defined(__AVX__)
32u;
#elif defined(__SSE__) || defined(__ARM_FEATURE_SIMD32)
8u;
#elif defined(__x86_64__)
2u;
#else
1u;
#endif

template<bool Encrypt, size_t BlockSize>
struct XTEA {
static constexpr auto step = BlockSize * 8u;

void operator()(uint8_t* input, size_t length, const key& k) const {
const auto blocks = (length & ~(step - 1));
for (auto i = 0u; i < blocks; i += step) {
if (Encrypt) {
XTEA_encrypt<BlockSize>(input + i, k);
} else {
XTEA_decrypt<BlockSize>(input + i, k);
}
}
input += blocks;
length -= blocks;

if (BlockSize != 1) {
XTEA<Encrypt, (BlockSize + 1u) / 2u>()(input, length, k);
}
}
};

constexpr auto encrypt_v = XTEA<true, InitialBlockSize>();
constexpr auto decrypt_v = XTEA<false, InitialBlockSize>();

} // anonymous namespace

void encrypt(uint8_t* i, size_t l, const key& k) { encrypt_v(i, l, k); }
void decrypt(uint8_t* i, size_t l, const key& k) { decrypt_v(i, l, k); }

} // namespace xtea
32 changes: 32 additions & 0 deletions src/xtea.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* The Forgotten Server - a free and open-source MMORPG server emulator
* Copyright (C) 2018 Mark Samman <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/

#ifndef TFS_XTEA_H
#define TFS_XTEA_H

namespace xtea {

using key = std::array<uint32_t, 4>;

void encrypt(uint8_t* data, size_t length, const key& k);
void decrypt(uint8_t* data, size_t length, const key& k);

} // namespace xtea

#endif // TFS_XTEA_H
7 changes: 5 additions & 2 deletions vc14/theforgottenserver.sln
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 14
VisualStudioVersion = 14.0.23107.0
# Visual Studio 15
VisualStudioVersion = 15.0.27703.2035
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "theforgottenserver", "theforgottenserver.vcxproj", "{A10F9657-129F-0FEF-14CB-CEE0B0E5AA3E}"
EndProject
Expand All @@ -25,4 +25,7 @@ Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {81EB239E-6AEB-4015-9915-256C2C90D3FD}
EndGlobalSection
EndGlobal
Loading

0 comments on commit e4dfecd

Please sign in to comment.