Compare commits

...

15 Commits

Author SHA1 Message Date
Dmitry Maksyoma
8f1f88d6b2 [Skip CI] document current state 2021-09-14 19:05:58 +12:00
Dmitry Maksyoma
2f91fbd235 CI: add set -e 2021-09-11 17:27:50 +12:00
Dmitry Maksyoma
3500489bb8 CI: fix build exists check 2021-09-11 17:26:56 +12:00
Dmitry Maksyoma
22362e73ad CI: refactor 2021-09-11 17:24:36 +12:00
Dmitry Maksyoma
0a1f705637 CI: fix permissions 2021-09-11 17:18:43 +12:00
Dmitry Maksyoma
02dc1a4b53 CI: refactor 2021-09-11 17:14:00 +12:00
Dmitry Maksyoma
33dd45b6e5 CI: extract .ci/check_if_build_was_already_uploaded 2021-09-11 17:13:10 +12:00
Dmitry Maksyoma
c5d61ca63e CI: remove redundant code 2021-09-11 17:10:01 +12:00
Dmitry Maksyoma
bb2dc1787d CI: refactor 2021-09-11 17:09:23 +12:00
Dmitry Maksyoma
2bdfd0f70a CI: add curl for testing existing build 2021-09-11 17:02:20 +12:00
Dmitry Maksyoma
c9a4319ca7 CI: stop pipeline when build already was uploaded spike 2021-09-11 17:00:01 +12:00
Dmitry Maksyoma
b367d1711d CI: fix rpm naming 2021-09-11 16:42:16 +12:00
matt
159d752795 Update noVNC commit 2021-09-10 18:11:35 +00:00
mmcclaskey
0cb2c0ba9f Sse scaling (#52)
* Add CPUID functions for runtime dispatch
* Add SSE2 scaling
2021-09-09 13:55:33 -04:00
mmcclaskey
dc21d5f97c Add a set of self-microbenchmarks (#51)
Co-authored-by: Lauri Kasanen <cand@gmx.com>
2021-09-09 12:46:57 -04:00
19 changed files with 772 additions and 12 deletions

View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -e
check_directory_exists() {
local remote_dir="$1"
curl --output /dev/null --silent --head --fail "$remote_dir"
}
S3_URL="https://${S3_BUCKET}.s3.amazonaws.com/${S3_BUILD_DIRECTORY}/";
if check_directory_exists "$S3_URL"; then
exit 1
fi

View File

@@ -1,15 +1,15 @@
#!/bin/bash
is_kasmvnc() {
is_kasmvnc_package() {
local package="$1";
echo "$package" | grep -q 'kasmvncserver_'
echo "$package" | grep -E -q 'kasmvncserver_|rpm'
}
function prepare_upload_filename() {
local package="$1";
if ! is_kasmvnc "$package"; then
if ! is_kasmvnc_package "$package"; then
export upload_filename="$package"
return
fi

View File

@@ -6,11 +6,22 @@ variables:
GITLAB_SHARED_DIND_DIR: /builds/$CI_PROJECT_PATH/shared
GIT_SUBMODULE_STRATEGY: normal
GIT_FETCH_EXTRA_FLAGS: --tags
# S3_BUILD_DIRECTORY: kasmvnc/${CI_COMMIT_SHA}
S3_BUILD_DIRECTORY: kasmvnc/159d7527955f131e096cf1602b7f9f66cc5d66cb
stages:
- check_if_build_was_already_uploaded_for_the_commit
- build
- upload
check_if_build_was_already_uploaded_for_the_commit:
stage: check_if_build_was_already_uploaded_for_the_commit
script:
- apk add bash
- apk add curl
# - TODO: Try uploading and fail if the file .lock exists.
- .ci/check_if_build_was_already_uploaded
.prepare_build: &prepare_build
- ls -l
- pwd
@@ -112,7 +123,6 @@ upload:
before_script:
- . .ci/upload.sh
script:
- export S3_BUILD_DIRECTORY="kasmvnc/${CI_COMMIT_SHA}"
- prepare_to_run_scripts_and_s3_uploads
- export RELEASE_VERSION=$(.ci/next_release_version "$CI_COMMIT_REF_NAME")
- for package in `find output/ -type f -name '*.deb' -or -name '*.rpm'`; do

View File

@@ -16,6 +16,7 @@ include(CheckLibraryExists)
include(CheckTypeSize)
include(CheckCSourceCompiles)
include(CheckCXXSourceCompiles)
include(CheckCXXCompilerFlag)
include(CheckCSourceRuns)
include(CMakeMacroLibtoolFile)
@@ -208,6 +209,9 @@ if(ENABLE_PAM)
endif()
set(HAVE_PAM ${ENABLE_PAM})
# Check for SSE2
check_cxx_compiler_flag(-msse2 COMPILER_SUPPORTS_SSE2)
# Generate config.h and make sure the source finds it
configure_file(config.h.in config.h)
add_definitions(-DHAVE_CONFIG_H)

View File

@@ -21,6 +21,7 @@
#include <inttypes.h>
#include <network/GetAPI.h>
#include <rfb/ConnParams.h>
#include <rfb/EncodeManager.h>
#include <rfb/LogWriter.h>
#include <rfb/JpegCompressor.h>
#include <rfb/xxhash.h>
@@ -32,10 +33,6 @@ using namespace rfb;
static LogWriter vlog("GetAPIMessager");
PixelBuffer *progressiveBilinearScale(const PixelBuffer *pb,
const uint16_t tgtw, const uint16_t tgth,
const float tgtdiff);
struct TightJPEGConfiguration {
int quality;
int subsampling;

View File

@@ -48,6 +48,7 @@ set(RFB_SOURCES
Security.cxx
SecurityServer.cxx
SecurityClient.cxx
SelfBench.cxx
SSecurityPlain.cxx
SSecurityStack.cxx
SSecurityVncAuth.cxx
@@ -63,6 +64,7 @@ set(RFB_SOURCES
VNCServerST.cxx
ZRLEEncoder.cxx
ZRLEDecoder.cxx
cpuid.cxx
encodings.cxx
util.cxx
xxhash.c)
@@ -96,6 +98,27 @@ if(GNUTLS_FOUND)
)
endif()
# SSE2
set(SSE2_SOURCES
scale_sse2.cxx)
set(SCALE_DUMMY_SOURCES
scale_dummy.cxx)
if(COMPILER_SUPPORTS_SSE2)
set_source_files_properties(${SSE2_SOURCES} PROPERTIES COMPILE_FLAGS ${COMPILE_FLAGS} -msse2)
set(RFB_SOURCES
${RFB_SOURCES}
${SSE2_SOURCES}
)
else()
set(RFB_SOURCES
${RFB_SOURCES}
${SCALE_DUMMY_SOURCES}
)
endif()
add_library(rfb STATIC ${RFB_SOURCES})
target_link_libraries(rfb ${RFB_LIBRARIES})

View File

@@ -22,10 +22,12 @@
#include <omp.h>
#include <stdlib.h>
#include <rfb/cpuid.h>
#include <rfb/EncCache.h>
#include <rfb/EncodeManager.h>
#include <rfb/Encoder.h>
#include <rfb/Palette.h>
#include <rfb/scale_sse2.h>
#include <rfb/SConnection.h>
#include <rfb/ServerCore.h>
#include <rfb/SMsgWriter.h>
@@ -895,7 +897,7 @@ void EncodeManager::updateVideoStats(const std::vector<Rect> &rects, const Pixel
}
}
static PixelBuffer *nearestScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
PixelBuffer *rfb::nearestScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
const float diff)
{
ManagedPixelBuffer *newpb = new ManagedPixelBuffer(pb->getPF(), w, h);
@@ -920,7 +922,7 @@ static PixelBuffer *nearestScale(const PixelBuffer *pb, const uint16_t w, const
return newpb;
}
static PixelBuffer *bilinearScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
PixelBuffer *rfb::bilinearScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
const float diff)
{
ManagedPixelBuffer *newpb = new ManagedPixelBuffer(pb->getPF(), w, h);
@@ -968,10 +970,68 @@ static PixelBuffer *bilinearScale(const PixelBuffer *pb, const uint16_t w, const
return newpb;
}
PixelBuffer *progressiveBilinearScale(const PixelBuffer *pb,
PixelBuffer *rfb::progressiveBilinearScale(const PixelBuffer *pb,
const uint16_t tgtw, const uint16_t tgth,
const float tgtdiff)
{
if (supportsSSE2()) {
if (tgtdiff >= 0.5f) {
ManagedPixelBuffer *newpb = new ManagedPixelBuffer(pb->getPF(), tgtw, tgth);
int oldstride, newstride;
const rdr::U8 *oldpx = pb->getBuffer(pb->getRect(), &oldstride);
rdr::U8 *newpx = newpb->getBufferRW(newpb->getRect(), &newstride);
SSE2_scale(oldpx, tgtw, tgth, newpx, oldstride, newstride, tgtdiff);
return newpb;
}
PixelBuffer *newpb;
uint16_t neww, newh, oldw, oldh;
bool del = false;
do {
oldw = pb->getRect().width();
oldh = pb->getRect().height();
neww = oldw / 2;
newh = oldh / 2;
newpb = new ManagedPixelBuffer(pb->getPF(), neww, newh);
int oldstride, newstride;
const rdr::U8 *oldpx = pb->getBuffer(pb->getRect(), &oldstride);
rdr::U8 *newpx = ((ManagedPixelBuffer *) newpb)->getBufferRW(newpb->getRect(),
&newstride);
SSE2_halve(oldpx, neww, newh, newpx, oldstride, newstride);
if (del)
delete pb;
del = true;
pb = newpb;
} while (tgtw * 2 < neww);
// Final, non-halving step
if (tgtw != neww || tgth != newh) {
oldw = pb->getRect().width();
oldh = pb->getRect().height();
newpb = new ManagedPixelBuffer(pb->getPF(), tgtw, tgth);
int oldstride, newstride;
const rdr::U8 *oldpx = pb->getBuffer(pb->getRect(), &oldstride);
rdr::U8 *newpx = ((ManagedPixelBuffer *) newpb)->getBufferRW(newpb->getRect(),
&newstride);
SSE2_scale(oldpx, tgtw, tgth, newpx, oldstride, newstride, tgtdiff);
if (del)
delete pb;
}
return newpb;
} // SSE2
if (tgtdiff >= 0.5f)
return bilinearScale(pb, tgtw, tgth, tgtdiff);

View File

@@ -215,6 +215,13 @@ namespace rfb {
virtual rdr::U8* getBufferRW(const Rect& r, int* stride);
};
};
PixelBuffer *nearestScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
const float diff);
PixelBuffer *bilinearScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
const float diff);
PixelBuffer *progressiveBilinearScale(const PixelBuffer *pb, const uint16_t w, const uint16_t h,
const float diff);
}
#endif

197
common/rfb/SelfBench.cxx Normal file
View File

@@ -0,0 +1,197 @@
/* Copyright (C) 2021 Kasm Web
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
#include <rfb/ComparingUpdateTracker.h>
#include <rfb/EncodeManager.h>
#include <rfb/LogWriter.h>
#include <rfb/SConnection.h>
#include <rfb/ServerCore.h>
#include <rfb/PixelBuffer.h>
#include <rfb/TightJPEGEncoder.h>
#include <rfb/TightWEBPEncoder.h>
#include <rfb/util.h>
#include <sys/time.h>
#include <stdint.h>
#include <stdlib.h>
using namespace rfb;
static LogWriter vlog("SelfBench");
static const PixelFormat pfRGBX(32, 24, false, true, 255, 255, 255, 0, 8, 16);
#define RUNS 64
#define W 1600
#define H 1200
void SelfBench() {
unsigned i, runs;
struct timeval start;
ManagedPixelBuffer f1(pfRGBX, W, H);
ManagedPixelBuffer f2(pfRGBX, W, H);
ManagedPixelBuffer screen(pfRGBX, W, H);
int stride;
rdr::U8 *f1ptr = f1.getBufferRW(f1.getRect(), &stride);
rdr::U8 *f2ptr = f2.getBufferRW(f2.getRect(), &stride);
rdr::U8 * const screenptr = screen.getBufferRW(screen.getRect(), &stride);
rdr::U8 * const f1orig = f1ptr;
rdr::U8 * const f2orig = f2ptr;
for (i = 0; i < W * H * 4; i += 4) {
f1ptr[0] = rand();
f1ptr[1] = rand();
f1ptr[2] = rand();
f2ptr[0] = rand();
f2ptr[1] = rand();
f2ptr[2] = rand();
f1ptr += 4;
f2ptr += 4;
}
vlog.info("Running micro-benchmarks (single-threaded, runs depending on task)");
// Encoding
std::vector<uint8_t> vec;
TightJPEGEncoder jpeg(NULL);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
jpeg.compressOnly(&f1, 8, vec, false);
}
vlog.info("Jpeg compression at quality 8 took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
jpeg.compressOnly(&f1, 4, vec, false);
}
vlog.info("Jpeg compression at quality 4 took %u ms (%u runs)", msSince(&start), runs);
TightWEBPEncoder webp(NULL);
gettimeofday(&start, NULL);
runs = RUNS / 8;
for (i = 0; i < runs; i++) {
webp.compressOnly(&f1, 8, vec, false);
}
vlog.info("Webp compression at quality 8 took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS / 4;
for (i = 0; i < runs; i++) {
webp.compressOnly(&f1, 4, vec, false);
}
vlog.info("Webp compression at quality 4 took %u ms (%u runs)", msSince(&start), runs);
// Scaling
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
PixelBuffer *pb = nearestScale(&f1, W * 0.8, H * 0.8, 0.8);
delete pb;
}
vlog.info("Nearest scaling to 80%% took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
PixelBuffer *pb = nearestScale(&f1, W * 0.4, H * 0.4, 0.4);
delete pb;
}
vlog.info("Nearest scaling to 40%% took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
PixelBuffer *pb = bilinearScale(&f1, W * 0.8, H * 0.8, 0.8);
delete pb;
}
vlog.info("Bilinear scaling to 80%% took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
PixelBuffer *pb = bilinearScale(&f1, W * 0.4, H * 0.4, 0.4);
delete pb;
}
vlog.info("Bilinear scaling to 40%% took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
PixelBuffer *pb = progressiveBilinearScale(&f1, W * 0.8, H * 0.8, 0.8);
delete pb;
}
vlog.info("Progressive bilinear scaling to 80%% took %u ms (%u runs)", msSince(&start), runs);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
PixelBuffer *pb = progressiveBilinearScale(&f1, W * 0.4, H * 0.4, 0.4);
delete pb;
}
vlog.info("Progressive bilinear scaling to 40%% took %u ms (%u runs)", msSince(&start), runs);
// Analysis
ComparingUpdateTracker *comparer = new ComparingUpdateTracker(&screen);
Region cursorReg;
Server::detectScrolling.setParam(false);
Server::detectHorizontal.setParam(false);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
memcpy(screenptr, i % 2 ? f1orig : f2orig, W * H * 4);
comparer->compare(true, cursorReg);
}
vlog.info("Analysis took %u ms (%u runs) (incl. memcpy overhead)", msSince(&start), runs);
Server::detectScrolling.setParam(true);
gettimeofday(&start, NULL);
runs = RUNS;
for (i = 0; i < runs; i++) {
memcpy(screenptr, i % 2 ? f1orig : f2orig, W * H * 4);
comparer->compare(false, cursorReg);
}
vlog.info("Analysis w/ scroll detection took %u ms (%u runs) (incl. memcpy overhead)", msSince(&start), runs);
Server::detectHorizontal.setParam(true);
delete comparer;
comparer = new ComparingUpdateTracker(&screen);
gettimeofday(&start, NULL);
runs = RUNS / 2;
for (i = 0; i < runs; i++) {
memcpy(screenptr, i % 2 ? f1orig : f2orig, W * H * 4);
comparer->compare(false, cursorReg);
}
vlog.info("Analysis w/ horizontal scroll detection took %u ms (%u runs) (incl. memcpy overhead)", msSince(&start), runs);
exit(0);
}

View File

@@ -113,6 +113,10 @@ rfb::BoolParameter rfb::Server::ignoreClientSettingsKasm
("IgnoreClientSettingsKasm",
"Ignore the additional client settings exposed in Kasm.",
false);
rfb::BoolParameter rfb::Server::selfBench
("SelfBench",
"Run self-benchmarks and exit.",
false);
rfb::IntParameter rfb::Server::dynamicQualityMin
("DynamicQualityMin",
"The minimum dynamic JPEG quality, 0 = low, 9 = high",

View File

@@ -74,6 +74,7 @@ namespace rfb {
static BoolParameter detectScrolling;
static BoolParameter detectHorizontal;
static BoolParameter ignoreClientSettingsKasm;
static BoolParameter selfBench;
static PresetParameter preferBandwidth;
};

View File

@@ -53,6 +53,7 @@
#include <network/GetAPI.h>
#include <rfb/cpuid.h>
#include <rfb/ComparingUpdateTracker.h>
#include <rfb/KeyRemapper.h>
#include <rfb/ListConnInfo.h>
@@ -76,6 +77,8 @@ static LogWriter slog("VNCServerST");
LogWriter VNCServerST::connectionsLog("Connections");
EncCache VNCServerST::encCache;
void SelfBench();
//
// -=- VNCServerST Implementation
//
@@ -132,6 +135,9 @@ VNCServerST::VNCServerST(const char* name_, SDesktop* desktop_)
{
lastUserInputTime = lastDisconnectTime = time(0);
slog.debug("creating single-threaded server %s", name.buf);
slog.info("CPU capability: SSE2 %s, AVX512f %s",
supportsSSE2() ? "yes" : "no",
supportsAVX512f() ? "yes" : "no");
DLPRegion.enabled = DLPRegion.percents = false;
@@ -212,6 +218,9 @@ VNCServerST::VNCServerST(const char* name_, SDesktop* desktop_)
}
trackingClient[0] = 0;
if (Server::selfBench)
SelfBench();
}
VNCServerST::~VNCServerST()

70
common/rfb/cpuid.cxx Normal file
View File

@@ -0,0 +1,70 @@
/* Copyright (C) 2021 Kasm Web
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
#include <stdint.h>
static uint32_t cpuid[4] = { 0 };
static uint32_t extcpuid[4] = { 0 };
static void getcpuid() {
if (cpuid[0])
return;
#if defined(__x86_64__) || defined(__i386__)
uint32_t eax, ecx = 0;
eax = 1; // normal feature bits
__asm__ __volatile__(
"cpuid\n\t"
: "=a"(cpuid[0]), "=b"(cpuid[1]), "=c"(cpuid[2]), "=d"(cpuid[3])
: "0"(eax), "2"(ecx)
);
eax = 7; // ext feature bits
ecx = 0;
__asm__ __volatile__(
"cpuid\n\t"
: "=a"(extcpuid[0]), "=b"(extcpuid[1]), "=c"(extcpuid[2]), "=d"(extcpuid[3])
: "0"(eax), "2"(ecx)
);
#endif
}
namespace rfb {
bool supportsSSE2() {
getcpuid();
#if defined(__x86_64__) || defined(__i386__)
#define bit_SSE2 (1 << 26)
return cpuid[3] & bit_SSE2;
#endif
return false;
}
bool supportsAVX512f() {
getcpuid();
#if defined(__x86_64__) || defined(__i386__)
#define bit_AVX512f (1 << 16)
return extcpuid[1] & bit_AVX512f;
#endif
return false;
}
}; // namespace rfb

28
common/rfb/cpuid.h Normal file
View File

@@ -0,0 +1,28 @@
/* Copyright (C) 2021 Kasm Web
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
#ifndef __RFB_CPUID_H__
#define __RFB_CPUID_H__
namespace rfb {
bool supportsSSE2();
bool supportsAVX512f();
};
#endif

View File

@@ -0,0 +1,37 @@
/* Copyright (C) 2021 Kasm Web
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
#include <rfb/scale_sse2.h>
namespace rfb {
void SSE2_halve(const uint8_t *oldpx,
const uint16_t tgtw, const uint16_t tgth,
uint8_t *newpx,
const unsigned oldstride, const unsigned newstride) {
}
// Handles factors between 0.5 and 1.0
void SSE2_scale(const uint8_t *oldpx,
const uint16_t tgtw, const uint16_t tgth,
uint8_t *newpx,
const unsigned oldstride, const unsigned newstride,
const float tgtdiff) {
}
}; // namespace rfb

257
common/rfb/scale_sse2.cxx Normal file
View File

@@ -0,0 +1,257 @@
/* Copyright (C) 2021 Kasm Web
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
#include <emmintrin.h>
#include <rfb/scale_sse2.h>
namespace rfb {
/*
static void print128(const char msg[], const __m128i v) {
union {
__m128i v;
uint8_t c[16];
} u;
u.v = v;
printf("%s %02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x\n",
msg,
u.c[0],
u.c[1],
u.c[2],
u.c[3],
u.c[4],
u.c[5],
u.c[6],
u.c[7],
u.c[8],
u.c[9],
u.c[10],
u.c[11],
u.c[12],
u.c[13],
u.c[14],
u.c[15]);
}
*/
void SSE2_halve(const uint8_t *oldpx,
const uint16_t tgtw, const uint16_t tgth,
uint8_t *newpx,
const unsigned oldstride, const unsigned newstride) {
uint16_t x, y;
const uint16_t srcw = tgtw * 2, srch = tgth * 2;
const __m128i zero = _mm_setzero_si128();
const __m128i shift = _mm_set_epi32(0, 0, 0, 2);
const __m128i low = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
const __m128i high = _mm_set_epi32(0xffffffff, 0xffffffff, 0, 0);
for (y = 0; y < srch; y += 2) {
const uint8_t * const row0 = oldpx + oldstride * y * 4;
const uint8_t * const row1 = oldpx + oldstride * (y + 1) * 4;
uint8_t * const dst = newpx + newstride * (y / 2) * 4;
for (x = 0; x < srcw; x += 4) {
__m128i lo, hi, a, b, c, d;
lo = _mm_loadu_si128((__m128i *) &row0[x * 4]);
hi = _mm_loadu_si128((__m128i *) &row1[x * 4]);
a = _mm_unpacklo_epi8(lo, zero);
b = _mm_unpackhi_epi8(lo, zero);
c = _mm_unpacklo_epi8(hi, zero);
d = _mm_unpackhi_epi8(hi, zero);
a = _mm_add_epi16(a, c);
b = _mm_add_epi16(b, d);
c = _mm_srli_si128(a, 8);
a = _mm_and_si128(a, low);
a = _mm_add_epi16(a, c);
d = _mm_slli_si128(b, 8);
b = _mm_and_si128(b, high);
b = _mm_add_epi16(b, d);
a = _mm_add_epi16(a, b);
a = _mm_srl_epi16(a, shift);
a = _mm_packus_epi16(a, zero);
_mm_storel_epi64((__m128i *) &dst[(x / 2) * 4], a);
}
for (; x < srcw; x += 2) {
// Remainder in C
uint8_t i;
for (i = 0; i < 4; i++) {
dst[(x / 2) * 4 + i] =
(row0[x * 4 + i] +
row0[(x + 1) * 4 + i] +
row1[x * 4 + i] +
row1[(x + 1) * 4 + i]) / 4;
}
}
}
}
// Handles factors between 0.5 and 1.0
void SSE2_scale(const uint8_t *oldpx,
const uint16_t tgtw, const uint16_t tgth,
uint8_t *newpx,
const unsigned oldstride, const unsigned newstride,
const float tgtdiff) {
uint16_t x, y;
const __m128i zero = _mm_setzero_si128();
const __m128i low = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
const __m128i high = _mm_set_epi32(0xffffffff, 0xffffffff, 0, 0);
const float invdiff = 1 / tgtdiff;
for (y = 0; y < tgth; y++) {
const float ny = y * invdiff;
const uint16_t lowy = ny;
const uint16_t highy = lowy + 1;
const uint16_t bot = (ny - lowy) * 256;
const uint16_t top = 256 - bot;
const uint32_t * const row0 = (uint32_t *) (oldpx + oldstride * lowy * 4);
const uint32_t * const row1 = (uint32_t *) (oldpx + oldstride * highy * 4);
const uint8_t * const brow0 = (uint8_t *) row0;
const uint8_t * const brow1 = (uint8_t *) row1;
uint8_t * const dst = newpx + newstride * y * 4;
const __m128i vertmul = _mm_set1_epi16(top);
const __m128i vertmul2 = _mm_set1_epi16(bot);
for (x = 0; x < tgtw; x += 2) {
const float nx[2] = {
x * invdiff,
(x + 1) * invdiff,
};
const uint16_t lowx[2] = {
(uint16_t) nx[0],
(uint16_t) nx[1],
};
const uint16_t highx[2] = {
(uint16_t) (lowx[0] + 1),
(uint16_t) (lowx[1] + 1),
};
const uint16_t right[2] = {
(uint16_t) ((nx[0] - lowx[0]) * 256),
(uint16_t) ((nx[1] - lowx[1]) * 256),
};
const uint16_t left[2] = {
(uint16_t) (256 - right[0]),
(uint16_t) (256 - right[1]),
};
const __m128i horzmul = _mm_set_epi16(
right[0],
right[0],
right[0],
right[0],
left[0],
left[0],
left[0],
left[0]
);
const __m128i horzmul2 = _mm_set_epi16(
right[1],
right[1],
right[1],
right[1],
left[1],
left[1],
left[1],
left[1]
);
__m128i lo, hi, a, b, c, d;
lo = _mm_setr_epi32(row0[lowx[0]],
row0[highx[0]],
row0[lowx[1]],
row0[highx[1]]);
hi = _mm_setr_epi32(row1[lowx[0]],
row1[highx[0]],
row1[lowx[1]],
row1[highx[1]]);
a = _mm_unpacklo_epi8(lo, zero);
b = _mm_unpackhi_epi8(lo, zero);
c = _mm_unpacklo_epi8(hi, zero);
d = _mm_unpackhi_epi8(hi, zero);
a = _mm_mullo_epi16(a, vertmul);
b = _mm_mullo_epi16(b, vertmul);
c = _mm_mullo_epi16(c, vertmul2);
d = _mm_mullo_epi16(d, vertmul2);
a = _mm_add_epi16(a, c);
a = _mm_srli_epi16(a, 8);
b = _mm_add_epi16(b, d);
b = _mm_srli_epi16(b, 8);
a = _mm_mullo_epi16(a, horzmul);
b = _mm_mullo_epi16(b, horzmul2);
lo = _mm_srli_si128(a, 8);
a = _mm_and_si128(a, low);
a = _mm_add_epi16(a, lo);
hi = _mm_slli_si128(b, 8);
b = _mm_and_si128(b, high);
b = _mm_add_epi16(b, hi);
a = _mm_add_epi16(a, b);
a = _mm_srli_epi16(a, 8);
a = _mm_packus_epi16(a, zero);
_mm_storel_epi64((__m128i *) &dst[x * 4], a);
}
for (; x < tgtw; x++) {
// Remainder in C
const float nx = x * invdiff;
const uint16_t lowx = nx;
const uint16_t highx = lowx + 1;
const uint16_t right = (nx - lowx) * 256;
const uint16_t left = 256 - right;
uint8_t i;
uint32_t val, val2;
for (i = 0; i < 4; i++) {
val = brow0[lowx * 4 + i] * left;
val += brow0[highx * 4 + i] * right;
val >>= 8;
val2 = brow1[lowx * 4 + i] * left;
val2 += brow1[highx * 4 + i] * right;
val2 >>= 8;
dst[x * 4 + i] =
(val * top + val2 * bot) >> 8;
}
}
}
}
}; // namespace rfb

38
common/rfb/scale_sse2.h Normal file
View File

@@ -0,0 +1,38 @@
/* Copyright (C) 2021 Kasm Web
*
* This is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this software; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
* USA.
*/
#ifndef __RFB_SCALE_SSE2_H__
#define __RFB_SCALE_SSE2_H__
#include <stdint.h>
namespace rfb {
void SSE2_halve(const uint8_t *oldpx,
const uint16_t tgtw, const uint16_t tgth,
uint8_t *newpx,
const unsigned oldstride, const unsigned newstride);
void SSE2_scale(const uint8_t *oldpx,
const uint16_t tgtw, const uint16_t tgth,
uint8_t *newpx,
const unsigned oldstride, const unsigned newstride,
const float tgtdiff);
};
#endif

Submodule kasmweb updated: ba40cacce0...d1e4bda4b3

View File

@@ -317,6 +317,10 @@ Log clipboard and keyboard actions. Info logs just clipboard direction and size,
verbose adds the contents for both.
.
.TP
.B \-selfBench
Run a set of self-benchmarks and exit.
.
.TP
.B \-noWebsocket
Disable websockets and expose a traditional VNC port (5901, etc.).
.