Merge branch 'master' into KASM-1810_yaml_config
commit
6fa5b9271f
@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -eo pipefail
|
||||||
|
|
||||||
|
update_version_to_meet_packaging_standards() {
|
||||||
|
new_version=$(echo "$new_version" |
|
||||||
|
sed -e 's/\([0-9]\)-\([a-zA-Z]\)/\1~\2/')
|
||||||
|
}
|
||||||
|
|
||||||
|
add_debian_revision_to_new_version() {
|
||||||
|
echo "$new_version-1"
|
||||||
|
}
|
||||||
|
|
||||||
|
bump_rpm() {
|
||||||
|
sed -i "s/^Version:.\+/Version: $new_version/" centos/kasmvncserver.spec
|
||||||
|
}
|
||||||
|
|
||||||
|
bump_deb() {
|
||||||
|
local image="debbump_package_version:dev"
|
||||||
|
local L_UID=$(id -u)
|
||||||
|
local L_GID=$(id -g)
|
||||||
|
local debian_version=$(add_debian_revision_to_new_version)
|
||||||
|
|
||||||
|
docker build -t "$image" -f builder/dockerfile.bump-package-version .
|
||||||
|
docker run --rm -v "$PWD":/src --user "$L_UID:$L_GID" \
|
||||||
|
"$image" /bin/bash -c \
|
||||||
|
"cd /src && builder/bump-package-version-inside-docker-deb $debian_version"
|
||||||
|
}
|
||||||
|
|
||||||
|
new_version="$1"
|
||||||
|
|
||||||
|
if [[ -z "$new_version" ]]; then
|
||||||
|
echo >&2 "Usage: $(basename "$0") <new_version>"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/.."
|
||||||
|
|
||||||
|
update_version_to_meet_packaging_standards
|
||||||
|
bump_rpm
|
||||||
|
bump_deb
|
@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
new_version="$1"
|
||||||
|
|
||||||
|
update_version() {
|
||||||
|
dch --newversion $new_version 'New upstream release.'
|
||||||
|
}
|
||||||
|
|
||||||
|
mark_as_released() {
|
||||||
|
dch --release ""
|
||||||
|
}
|
||||||
|
|
||||||
|
update_version
|
||||||
|
mark_as_released
|
@ -0,0 +1,6 @@
|
|||||||
|
FROM debian:buster
|
||||||
|
|
||||||
|
ENV DEBEMAIL="Kasm Technologies LLC <info@kasmweb.com>"
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get -y install vim devscripts
|
@ -0,0 +1,197 @@
|
|||||||
|
/* Copyright (C) 2021 Kasm Web
|
||||||
|
*
|
||||||
|
* This is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this software; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
* USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <rfb/ComparingUpdateTracker.h>
|
||||||
|
#include <rfb/EncodeManager.h>
|
||||||
|
#include <rfb/LogWriter.h>
|
||||||
|
#include <rfb/SConnection.h>
|
||||||
|
#include <rfb/ServerCore.h>
|
||||||
|
#include <rfb/PixelBuffer.h>
|
||||||
|
#include <rfb/TightJPEGEncoder.h>
|
||||||
|
#include <rfb/TightWEBPEncoder.h>
|
||||||
|
#include <rfb/util.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
using namespace rfb;
|
||||||
|
static LogWriter vlog("SelfBench");
|
||||||
|
|
||||||
|
static const PixelFormat pfRGBX(32, 24, false, true, 255, 255, 255, 0, 8, 16);
|
||||||
|
|
||||||
|
#define RUNS 64
|
||||||
|
|
||||||
|
#define W 1600
|
||||||
|
#define H 1200
|
||||||
|
|
||||||
|
void SelfBench() {
|
||||||
|
|
||||||
|
unsigned i, runs;
|
||||||
|
struct timeval start;
|
||||||
|
|
||||||
|
ManagedPixelBuffer f1(pfRGBX, W, H);
|
||||||
|
ManagedPixelBuffer f2(pfRGBX, W, H);
|
||||||
|
ManagedPixelBuffer screen(pfRGBX, W, H);
|
||||||
|
|
||||||
|
int stride;
|
||||||
|
rdr::U8 *f1ptr = f1.getBufferRW(f1.getRect(), &stride);
|
||||||
|
rdr::U8 *f2ptr = f2.getBufferRW(f2.getRect(), &stride);
|
||||||
|
rdr::U8 * const screenptr = screen.getBufferRW(screen.getRect(), &stride);
|
||||||
|
|
||||||
|
rdr::U8 * const f1orig = f1ptr;
|
||||||
|
rdr::U8 * const f2orig = f2ptr;
|
||||||
|
|
||||||
|
for (i = 0; i < W * H * 4; i += 4) {
|
||||||
|
f1ptr[0] = rand();
|
||||||
|
f1ptr[1] = rand();
|
||||||
|
f1ptr[2] = rand();
|
||||||
|
|
||||||
|
f2ptr[0] = rand();
|
||||||
|
f2ptr[1] = rand();
|
||||||
|
f2ptr[2] = rand();
|
||||||
|
|
||||||
|
f1ptr += 4;
|
||||||
|
f2ptr += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
vlog.info("Running micro-benchmarks (single-threaded, runs depending on task)");
|
||||||
|
|
||||||
|
// Encoding
|
||||||
|
std::vector<uint8_t> vec;
|
||||||
|
|
||||||
|
TightJPEGEncoder jpeg(NULL);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
jpeg.compressOnly(&f1, 8, vec, false);
|
||||||
|
}
|
||||||
|
vlog.info("Jpeg compression at quality 8 took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
jpeg.compressOnly(&f1, 4, vec, false);
|
||||||
|
}
|
||||||
|
vlog.info("Jpeg compression at quality 4 took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
|
||||||
|
TightWEBPEncoder webp(NULL);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS / 8;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
webp.compressOnly(&f1, 8, vec, false);
|
||||||
|
}
|
||||||
|
vlog.info("Webp compression at quality 8 took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS / 4;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
webp.compressOnly(&f1, 4, vec, false);
|
||||||
|
}
|
||||||
|
vlog.info("Webp compression at quality 4 took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
// Scaling
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
PixelBuffer *pb = nearestScale(&f1, W * 0.8, H * 0.8, 0.8);
|
||||||
|
delete pb;
|
||||||
|
}
|
||||||
|
vlog.info("Nearest scaling to 80%% took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
PixelBuffer *pb = nearestScale(&f1, W * 0.4, H * 0.4, 0.4);
|
||||||
|
delete pb;
|
||||||
|
}
|
||||||
|
vlog.info("Nearest scaling to 40%% took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
PixelBuffer *pb = bilinearScale(&f1, W * 0.8, H * 0.8, 0.8);
|
||||||
|
delete pb;
|
||||||
|
}
|
||||||
|
vlog.info("Bilinear scaling to 80%% took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
PixelBuffer *pb = bilinearScale(&f1, W * 0.4, H * 0.4, 0.4);
|
||||||
|
delete pb;
|
||||||
|
}
|
||||||
|
vlog.info("Bilinear scaling to 40%% took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
PixelBuffer *pb = progressiveBilinearScale(&f1, W * 0.8, H * 0.8, 0.8);
|
||||||
|
delete pb;
|
||||||
|
}
|
||||||
|
vlog.info("Progressive bilinear scaling to 80%% took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
PixelBuffer *pb = progressiveBilinearScale(&f1, W * 0.4, H * 0.4, 0.4);
|
||||||
|
delete pb;
|
||||||
|
}
|
||||||
|
vlog.info("Progressive bilinear scaling to 40%% took %u ms (%u runs)", msSince(&start), runs);
|
||||||
|
|
||||||
|
// Analysis
|
||||||
|
ComparingUpdateTracker *comparer = new ComparingUpdateTracker(&screen);
|
||||||
|
Region cursorReg;
|
||||||
|
|
||||||
|
Server::detectScrolling.setParam(false);
|
||||||
|
Server::detectHorizontal.setParam(false);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
memcpy(screenptr, i % 2 ? f1orig : f2orig, W * H * 4);
|
||||||
|
comparer->compare(true, cursorReg);
|
||||||
|
}
|
||||||
|
vlog.info("Analysis took %u ms (%u runs) (incl. memcpy overhead)", msSince(&start), runs);
|
||||||
|
|
||||||
|
Server::detectScrolling.setParam(true);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
memcpy(screenptr, i % 2 ? f1orig : f2orig, W * H * 4);
|
||||||
|
comparer->compare(false, cursorReg);
|
||||||
|
}
|
||||||
|
vlog.info("Analysis w/ scroll detection took %u ms (%u runs) (incl. memcpy overhead)", msSince(&start), runs);
|
||||||
|
|
||||||
|
Server::detectHorizontal.setParam(true);
|
||||||
|
delete comparer;
|
||||||
|
comparer = new ComparingUpdateTracker(&screen);
|
||||||
|
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
runs = RUNS / 2;
|
||||||
|
for (i = 0; i < runs; i++) {
|
||||||
|
memcpy(screenptr, i % 2 ? f1orig : f2orig, W * H * 4);
|
||||||
|
comparer->compare(false, cursorReg);
|
||||||
|
}
|
||||||
|
vlog.info("Analysis w/ horizontal scroll detection took %u ms (%u runs) (incl. memcpy overhead)", msSince(&start), runs);
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
@ -0,0 +1,70 @@
|
|||||||
|
/* Copyright (C) 2021 Kasm Web
|
||||||
|
*
|
||||||
|
* This is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this software; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
* USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
static uint32_t cpuid[4] = { 0 };
|
||||||
|
static uint32_t extcpuid[4] = { 0 };
|
||||||
|
|
||||||
|
static void getcpuid() {
|
||||||
|
if (cpuid[0])
|
||||||
|
return;
|
||||||
|
|
||||||
|
#if defined(__x86_64__) || defined(__i386__)
|
||||||
|
uint32_t eax, ecx = 0;
|
||||||
|
|
||||||
|
eax = 1; // normal feature bits
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"cpuid\n\t"
|
||||||
|
: "=a"(cpuid[0]), "=b"(cpuid[1]), "=c"(cpuid[2]), "=d"(cpuid[3])
|
||||||
|
: "0"(eax), "2"(ecx)
|
||||||
|
);
|
||||||
|
|
||||||
|
eax = 7; // ext feature bits
|
||||||
|
ecx = 0;
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
"cpuid\n\t"
|
||||||
|
: "=a"(extcpuid[0]), "=b"(extcpuid[1]), "=c"(extcpuid[2]), "=d"(extcpuid[3])
|
||||||
|
: "0"(eax), "2"(ecx)
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace rfb {
|
||||||
|
|
||||||
|
bool supportsSSE2() {
|
||||||
|
getcpuid();
|
||||||
|
#if defined(__x86_64__) || defined(__i386__)
|
||||||
|
#define bit_SSE2 (1 << 26)
|
||||||
|
return cpuid[3] & bit_SSE2;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool supportsAVX512f() {
|
||||||
|
getcpuid();
|
||||||
|
#if defined(__x86_64__) || defined(__i386__)
|
||||||
|
#define bit_AVX512f (1 << 16)
|
||||||
|
return extcpuid[1] & bit_AVX512f;
|
||||||
|
#endif
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // namespace rfb
|
@ -0,0 +1,28 @@
|
|||||||
|
/* Copyright (C) 2021 Kasm Web
|
||||||
|
*
|
||||||
|
* This is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this software; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
* USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __RFB_CPUID_H__
|
||||||
|
#define __RFB_CPUID_H__
|
||||||
|
|
||||||
|
namespace rfb {
|
||||||
|
|
||||||
|
bool supportsSSE2();
|
||||||
|
bool supportsAVX512f();
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,37 @@
|
|||||||
|
/* Copyright (C) 2021 Kasm Web
|
||||||
|
*
|
||||||
|
* This is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this software; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
* USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <rfb/scale_sse2.h>
|
||||||
|
|
||||||
|
namespace rfb {
|
||||||
|
|
||||||
|
void SSE2_halve(const uint8_t *oldpx,
|
||||||
|
const uint16_t tgtw, const uint16_t tgth,
|
||||||
|
uint8_t *newpx,
|
||||||
|
const unsigned oldstride, const unsigned newstride) {
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handles factors between 0.5 and 1.0
|
||||||
|
void SSE2_scale(const uint8_t *oldpx,
|
||||||
|
const uint16_t tgtw, const uint16_t tgth,
|
||||||
|
uint8_t *newpx,
|
||||||
|
const unsigned oldstride, const unsigned newstride,
|
||||||
|
const float tgtdiff) {
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // namespace rfb
|
@ -0,0 +1,257 @@
|
|||||||
|
/* Copyright (C) 2021 Kasm Web
|
||||||
|
*
|
||||||
|
* This is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this software; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
* USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
#include <rfb/scale_sse2.h>
|
||||||
|
|
||||||
|
namespace rfb {
|
||||||
|
|
||||||
|
/*
|
||||||
|
static void print128(const char msg[], const __m128i v) {
|
||||||
|
union {
|
||||||
|
__m128i v;
|
||||||
|
uint8_t c[16];
|
||||||
|
} u;
|
||||||
|
|
||||||
|
u.v = v;
|
||||||
|
|
||||||
|
printf("%s %02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x\n",
|
||||||
|
msg,
|
||||||
|
u.c[0],
|
||||||
|
u.c[1],
|
||||||
|
u.c[2],
|
||||||
|
u.c[3],
|
||||||
|
u.c[4],
|
||||||
|
u.c[5],
|
||||||
|
u.c[6],
|
||||||
|
u.c[7],
|
||||||
|
u.c[8],
|
||||||
|
u.c[9],
|
||||||
|
u.c[10],
|
||||||
|
u.c[11],
|
||||||
|
u.c[12],
|
||||||
|
u.c[13],
|
||||||
|
u.c[14],
|
||||||
|
u.c[15]);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
void SSE2_halve(const uint8_t *oldpx,
|
||||||
|
const uint16_t tgtw, const uint16_t tgth,
|
||||||
|
uint8_t *newpx,
|
||||||
|
const unsigned oldstride, const unsigned newstride) {
|
||||||
|
uint16_t x, y;
|
||||||
|
const uint16_t srcw = tgtw * 2, srch = tgth * 2;
|
||||||
|
const __m128i zero = _mm_setzero_si128();
|
||||||
|
const __m128i shift = _mm_set_epi32(0, 0, 0, 2);
|
||||||
|
const __m128i low = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
|
||||||
|
const __m128i high = _mm_set_epi32(0xffffffff, 0xffffffff, 0, 0);
|
||||||
|
|
||||||
|
for (y = 0; y < srch; y += 2) {
|
||||||
|
const uint8_t * const row0 = oldpx + oldstride * y * 4;
|
||||||
|
const uint8_t * const row1 = oldpx + oldstride * (y + 1) * 4;
|
||||||
|
|
||||||
|
uint8_t * const dst = newpx + newstride * (y / 2) * 4;
|
||||||
|
|
||||||
|
for (x = 0; x < srcw - 3; x += 4) {
|
||||||
|
__m128i lo, hi, a, b, c, d;
|
||||||
|
lo = _mm_loadu_si128((__m128i *) &row0[x * 4]);
|
||||||
|
hi = _mm_loadu_si128((__m128i *) &row1[x * 4]);
|
||||||
|
|
||||||
|
a = _mm_unpacklo_epi8(lo, zero);
|
||||||
|
b = _mm_unpackhi_epi8(lo, zero);
|
||||||
|
c = _mm_unpacklo_epi8(hi, zero);
|
||||||
|
d = _mm_unpackhi_epi8(hi, zero);
|
||||||
|
|
||||||
|
a = _mm_add_epi16(a, c);
|
||||||
|
b = _mm_add_epi16(b, d);
|
||||||
|
|
||||||
|
c = _mm_srli_si128(a, 8);
|
||||||
|
a = _mm_and_si128(a, low);
|
||||||
|
a = _mm_add_epi16(a, c);
|
||||||
|
|
||||||
|
d = _mm_slli_si128(b, 8);
|
||||||
|
b = _mm_and_si128(b, high);
|
||||||
|
b = _mm_add_epi16(b, d);
|
||||||
|
|
||||||
|
a = _mm_add_epi16(a, b);
|
||||||
|
|
||||||
|
a = _mm_srl_epi16(a, shift);
|
||||||
|
a = _mm_packus_epi16(a, zero);
|
||||||
|
|
||||||
|
_mm_storel_epi64((__m128i *) &dst[(x / 2) * 4], a);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; x < srcw; x += 2) {
|
||||||
|
// Remainder in C
|
||||||
|
uint8_t i;
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
dst[(x / 2) * 4 + i] =
|
||||||
|
(row0[x * 4 + i] +
|
||||||
|
row0[(x + 1) * 4 + i] +
|
||||||
|
row1[x * 4 + i] +
|
||||||
|
row1[(x + 1) * 4 + i]) / 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handles factors between 0.5 and 1.0
|
||||||
|
void SSE2_scale(const uint8_t *oldpx,
|
||||||
|
const uint16_t tgtw, const uint16_t tgth,
|
||||||
|
uint8_t *newpx,
|
||||||
|
const unsigned oldstride, const unsigned newstride,
|
||||||
|
const float tgtdiff) {
|
||||||
|
|
||||||
|
uint16_t x, y;
|
||||||
|
const __m128i zero = _mm_setzero_si128();
|
||||||
|
const __m128i low = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
|
||||||
|
const __m128i high = _mm_set_epi32(0xffffffff, 0xffffffff, 0, 0);
|
||||||
|
const float invdiff = 1 / tgtdiff;
|
||||||
|
|
||||||
|
for (y = 0; y < tgth; y++) {
|
||||||
|
const float ny = y * invdiff;
|
||||||
|
const uint16_t lowy = ny;
|
||||||
|
const uint16_t highy = lowy + 1;
|
||||||
|
const uint16_t bot = (ny - lowy) * 256;
|
||||||
|
const uint16_t top = 256 - bot;
|
||||||
|
const uint32_t * const row0 = (uint32_t *) (oldpx + oldstride * lowy * 4);
|
||||||
|
const uint32_t * const row1 = (uint32_t *) (oldpx + oldstride * highy * 4);
|
||||||
|
const uint8_t * const brow0 = (uint8_t *) row0;
|
||||||
|
const uint8_t * const brow1 = (uint8_t *) row1;
|
||||||
|
|
||||||
|
uint8_t * const dst = newpx + newstride * y * 4;
|
||||||
|
|
||||||
|
const __m128i vertmul = _mm_set1_epi16(top);
|
||||||
|
const __m128i vertmul2 = _mm_set1_epi16(bot);
|
||||||
|
|
||||||
|
for (x = 0; x < tgtw - 1; x += 2) {
|
||||||
|
const float nx[2] = {
|
||||||
|
x * invdiff,
|
||||||
|
(x + 1) * invdiff,
|
||||||
|
};
|
||||||
|
const uint16_t lowx[2] = {
|
||||||
|
(uint16_t) nx[0],
|
||||||
|
(uint16_t) nx[1],
|
||||||
|
};
|
||||||
|
const uint16_t highx[2] = {
|
||||||
|
(uint16_t) (lowx[0] + 1),
|
||||||
|
(uint16_t) (lowx[1] + 1),
|
||||||
|
};
|
||||||
|
const uint16_t right[2] = {
|
||||||
|
(uint16_t) ((nx[0] - lowx[0]) * 256),
|
||||||
|
(uint16_t) ((nx[1] - lowx[1]) * 256),
|
||||||
|
};
|
||||||
|
const uint16_t left[2] = {
|
||||||
|
(uint16_t) (256 - right[0]),
|
||||||
|
(uint16_t) (256 - right[1]),
|
||||||
|
};
|
||||||
|
|
||||||
|
const __m128i horzmul = _mm_set_epi16(
|
||||||
|
right[0],
|
||||||
|
right[0],
|
||||||
|
right[0],
|
||||||
|
right[0],
|
||||||
|
left[0],
|
||||||
|
left[0],
|
||||||
|
left[0],
|
||||||
|
left[0]
|
||||||
|
);
|
||||||
|
const __m128i horzmul2 = _mm_set_epi16(
|
||||||
|
right[1],
|
||||||
|
right[1],
|
||||||
|
right[1],
|
||||||
|
right[1],
|
||||||
|
left[1],
|
||||||
|
left[1],
|
||||||
|
left[1],
|
||||||
|
left[1]
|
||||||
|
);
|
||||||
|
|
||||||
|
__m128i lo, hi, a, b, c, d;
|
||||||
|
lo = _mm_setr_epi32(row0[lowx[0]],
|
||||||
|
row0[highx[0]],
|
||||||
|
row0[lowx[1]],
|
||||||
|
row0[highx[1]]);
|
||||||
|
hi = _mm_setr_epi32(row1[lowx[0]],
|
||||||
|
row1[highx[0]],
|
||||||
|
row1[lowx[1]],
|
||||||
|
row1[highx[1]]);
|
||||||
|
|
||||||
|
a = _mm_unpacklo_epi8(lo, zero);
|
||||||
|
b = _mm_unpackhi_epi8(lo, zero);
|
||||||
|
c = _mm_unpacklo_epi8(hi, zero);
|
||||||
|
d = _mm_unpackhi_epi8(hi, zero);
|
||||||
|
|
||||||
|
a = _mm_mullo_epi16(a, vertmul);
|
||||||
|
b = _mm_mullo_epi16(b, vertmul);
|
||||||
|
c = _mm_mullo_epi16(c, vertmul2);
|
||||||
|
d = _mm_mullo_epi16(d, vertmul2);
|
||||||
|
|
||||||
|
a = _mm_add_epi16(a, c);
|
||||||
|
a = _mm_srli_epi16(a, 8);
|
||||||
|
b = _mm_add_epi16(b, d);
|
||||||
|
b = _mm_srli_epi16(b, 8);
|
||||||
|
|
||||||
|
a = _mm_mullo_epi16(a, horzmul);
|
||||||
|
b = _mm_mullo_epi16(b, horzmul2);
|
||||||
|
|
||||||
|
lo = _mm_srli_si128(a, 8);
|
||||||
|
a = _mm_and_si128(a, low);
|
||||||
|
a = _mm_add_epi16(a, lo);
|
||||||
|
|
||||||
|
hi = _mm_slli_si128(b, 8);
|
||||||
|
b = _mm_and_si128(b, high);
|
||||||
|
b = _mm_add_epi16(b, hi);
|
||||||
|
|
||||||
|
a = _mm_add_epi16(a, b);
|
||||||
|
a = _mm_srli_epi16(a, 8);
|
||||||
|
|
||||||
|
a = _mm_packus_epi16(a, zero);
|
||||||
|
|
||||||
|
_mm_storel_epi64((__m128i *) &dst[x * 4], a);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (; x < tgtw; x++) {
|
||||||
|
// Remainder in C
|
||||||
|
const float nx = x * invdiff;
|
||||||
|
const uint16_t lowx = nx;
|
||||||
|
const uint16_t highx = lowx + 1;
|
||||||
|
const uint16_t right = (nx - lowx) * 256;
|
||||||
|
const uint16_t left = 256 - right;
|
||||||
|
|
||||||
|
uint8_t i;
|
||||||
|
uint32_t val, val2;
|
||||||
|
for (i = 0; i < 4; i++) {
|
||||||
|
val = brow0[lowx * 4 + i] * left;
|
||||||
|
val += brow0[highx * 4 + i] * right;
|
||||||
|
val >>= 8;
|
||||||
|
|
||||||
|
val2 = brow1[lowx * 4 + i] * left;
|
||||||
|
val2 += brow1[highx * 4 + i] * right;
|
||||||
|
val2 >>= 8;
|
||||||
|
|
||||||
|
dst[x * 4 + i] =
|
||||||
|
(val * top + val2 * bot) >> 8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}; // namespace rfb
|
@ -0,0 +1,38 @@
|
|||||||
|
/* Copyright (C) 2021 Kasm Web
|
||||||
|
*
|
||||||
|
* This is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This software is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this software; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
|
||||||
|
* USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __RFB_SCALE_SSE2_H__
|
||||||
|
#define __RFB_SCALE_SSE2_H__
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
namespace rfb {
|
||||||
|
|
||||||
|
void SSE2_halve(const uint8_t *oldpx,
|
||||||
|
const uint16_t tgtw, const uint16_t tgth,
|
||||||
|
uint8_t *newpx,
|
||||||
|
const unsigned oldstride, const unsigned newstride);
|
||||||
|
|
||||||
|
void SSE2_scale(const uint8_t *oldpx,
|
||||||
|
const uint16_t tgtw, const uint16_t tgth,
|
||||||
|
uint8_t *newpx,
|
||||||
|
const unsigned oldstride, const unsigned newstride,
|
||||||
|
const float tgtdiff);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in New Issue