diff --git a/CMakeLists.txt b/CMakeLists.txt
index 179c278..c32884a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,7 @@ include(CheckLibraryExists)
 include(CheckTypeSize)
 include(CheckCSourceCompiles)
 include(CheckCXXSourceCompiles)
+include(CheckCXXCompilerFlag)
 include(CheckCSourceRuns)
 
 include(CMakeMacroLibtoolFile)
@@ -208,6 +209,9 @@ if(ENABLE_PAM)
 endif()
 set(HAVE_PAM ${ENABLE_PAM})
 
+# Check for SSE2
+check_cxx_compiler_flag(-msse2 COMPILER_SUPPORTS_SSE2)
+
 # Generate config.h and make sure the source finds it
 configure_file(config.h.in config.h)
 add_definitions(-DHAVE_CONFIG_H)
diff --git a/common/rfb/CMakeLists.txt b/common/rfb/CMakeLists.txt
index 5e1944f..56d0da6 100644
--- a/common/rfb/CMakeLists.txt
+++ b/common/rfb/CMakeLists.txt
@@ -99,6 +99,29 @@ endif()
 
 add_library(rfb STATIC ${RFB_SOURCES})
 
+# SSE2
+
+set(SSE2_SOURCES
+  scale_sse2.cxx)
+
+set(SCALE_DUMMY_SOURCES
+  scale_dummy.cxx)
+
+if(COMPILER_SUPPORTS_SSE2)
+  add_library(scale_sse2 STATIC ${SSE2_SOURCES})
+  set(RFB_LIBRARIES
+    ${RFB_LIBRARIES}
+    scale_sse2
+  )
+  set_target_properties(scale_sse2 PROPERTIES COMPILE_FLAGS ${COMPILE_FLAGS} -msse2)
+else()
+  add_library(scale_dummy STATIC ${SCALE_DUMMY_SOURCES})
+  set(RFB_LIBRARIES
+    ${RFB_LIBRARIES}
+    scale_dummy
+  )
+endif()
+
 target_link_libraries(rfb ${RFB_LIBRARIES})
 
 if(UNIX)
diff --git a/common/rfb/EncodeManager.cxx b/common/rfb/EncodeManager.cxx
index 8785190..b6d7210 100644
--- a/common/rfb/EncodeManager.cxx
+++ b/common/rfb/EncodeManager.cxx
@@ -27,6 +27,7 @@
 #include <rfb/EncodeManager.h>
 #include <rfb/Encoder.h>
 #include <rfb/Palette.h>
+#include <rfb/scale_sse2.h>
 #include <rfb/SConnection.h>
 #include <rfb/ServerCore.h>
 #include <rfb/SMsgWriter.h>
@@ -973,6 +974,64 @@ PixelBuffer *progressiveBilinearScale(const PixelBuffer *pb,
                                  const uint16_t tgtw, const uint16_t tgth,
                                  const float tgtdiff)
 {
+  if (supportsSSE2()) {
+    if (tgtdiff >= 0.5f) {
+      ManagedPixelBuffer *newpb = new ManagedPixelBuffer(pb->getPF(), tgtw, tgth);
+
+      int oldstride, newstride;
+      const rdr::U8 *oldpx = pb->getBuffer(pb->getRect(), &oldstride);
+      rdr::U8 *newpx = newpb->getBufferRW(newpb->getRect(), &newstride);
+
+      SSE2_scale(oldpx, tgtw, tgth, newpx, oldstride, newstride, tgtdiff);
+      return newpb;
+    }
+
+    PixelBuffer *newpb;
+    uint16_t neww, newh, oldw, oldh;
+    bool del = false;
+
+    do {
+      oldw = pb->getRect().width();
+      oldh = pb->getRect().height();
+      neww = oldw / 2;
+      newh = oldh / 2;
+
+      newpb = new ManagedPixelBuffer(pb->getPF(), neww, newh);
+
+      int oldstride, newstride;
+      const rdr::U8 *oldpx = pb->getBuffer(pb->getRect(), &oldstride);
+      rdr::U8 *newpx = ((ManagedPixelBuffer *) newpb)->getBufferRW(newpb->getRect(),
+                                                                   &newstride);
+
+      SSE2_halve(oldpx, neww, newh, newpx, oldstride, newstride);
+
+      if (del)
+        delete pb;
+      del = true;
+
+      pb = newpb;
+    } while (tgtw * 2 < neww);
+
+    // Final, non-halving step
+    if (tgtw != neww || tgth != newh) {
+      oldw = pb->getRect().width();
+      oldh = pb->getRect().height();
+
+      newpb = new ManagedPixelBuffer(pb->getPF(), tgtw, tgth);
+
+      int oldstride, newstride;
+      const rdr::U8 *oldpx = pb->getBuffer(pb->getRect(), &oldstride);
+      rdr::U8 *newpx = ((ManagedPixelBuffer *) newpb)->getBufferRW(newpb->getRect(),
+                                                                   &newstride);
+
+      SSE2_scale(oldpx, tgtw, tgth, newpx, oldstride, newstride, tgtdiff);
+      if (del)
+        delete pb;
+    }
+
+    return newpb;
+  } // SSE2
+
   if (tgtdiff >= 0.5f)
     return bilinearScale(pb, tgtw, tgth, tgtdiff);
 
diff --git a/common/rfb/scale_dummy.cxx b/common/rfb/scale_dummy.cxx
new file mode 100644
index 0000000..b978609
--- /dev/null
+++ b/common/rfb/scale_dummy.cxx
@@ -0,0 +1,37 @@
+/* Copyright (C) 2021 Kasm Web
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#include <rfb/scale_sse2.h>
+
+namespace rfb {
+
+void SSE2_halve(const uint8_t *oldpx,
+			const uint16_t tgtw, const uint16_t tgth,
+			uint8_t *newpx,
+			const unsigned oldstride, const unsigned newstride) {
+}
+
+// Handles factors between 0.5 and 1.0
+void SSE2_scale(const uint8_t *oldpx,
+		const uint16_t tgtw, const uint16_t tgth,
+		uint8_t *newpx,
+		const unsigned oldstride, const unsigned newstride,
+		const float tgtdiff) {
+}
+
+}; // namespace rfb
diff --git a/common/rfb/scale_sse2.cxx b/common/rfb/scale_sse2.cxx
new file mode 100644
index 0000000..e4c717b
--- /dev/null
+++ b/common/rfb/scale_sse2.cxx
@@ -0,0 +1,257 @@
+/* Copyright (C) 2021 Kasm Web
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#include <emmintrin.h>
+
+#include <rfb/scale_sse2.h>
+
+namespace rfb {
+
+/*
+static void print128(const char msg[], const __m128i v) {
+	union {
+		__m128i v;
+		uint8_t c[16];
+	} u;
+
+	u.v = v;
+
+	printf("%s %02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x,%02x\n",
+		msg,
+		u.c[0],
+		u.c[1],
+		u.c[2],
+		u.c[3],
+		u.c[4],
+		u.c[5],
+		u.c[6],
+		u.c[7],
+		u.c[8],
+		u.c[9],
+		u.c[10],
+		u.c[11],
+		u.c[12],
+		u.c[13],
+		u.c[14],
+		u.c[15]);
+}
+*/
+
+void SSE2_halve(const uint8_t *oldpx,
+			const uint16_t tgtw, const uint16_t tgth,
+			uint8_t *newpx,
+			const unsigned oldstride, const unsigned newstride) {
+	uint16_t x, y;
+	const uint16_t srcw = tgtw * 2, srch = tgth * 2;
+	const __m128i zero = _mm_setzero_si128();
+	const __m128i shift = _mm_set_epi32(0, 0, 0, 2);
+	const __m128i low = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+	const __m128i high = _mm_set_epi32(0xffffffff, 0xffffffff, 0, 0);
+
+	for (y = 0; y < srch; y += 2) {
+		const uint8_t * const row0 = oldpx + oldstride * y * 4;
+		const uint8_t * const row1 = oldpx + oldstride * (y + 1) * 4;
+
+		uint8_t * const dst = newpx + newstride * (y / 2) * 4;
+
+		for (x = 0; x < srcw; x += 4) {
+			__m128i lo, hi, a, b, c, d;
+			lo = _mm_loadu_si128((__m128i *) &row0[x * 4]);
+			hi = _mm_loadu_si128((__m128i *) &row1[x * 4]);
+
+			a = _mm_unpacklo_epi8(lo, zero);
+			b = _mm_unpackhi_epi8(lo, zero);
+			c = _mm_unpacklo_epi8(hi, zero);
+			d = _mm_unpackhi_epi8(hi, zero);
+
+			a = _mm_add_epi16(a, c);
+			b = _mm_add_epi16(b, d);
+
+			c = _mm_srli_si128(a, 8);
+			a = _mm_and_si128(a, low);
+			a = _mm_add_epi16(a, c);
+
+			d = _mm_slli_si128(b, 8);
+			b = _mm_and_si128(b, high);
+			b = _mm_add_epi16(b, d);
+
+			a = _mm_add_epi16(a, b);
+
+			a = _mm_srl_epi16(a, shift);
+			a = _mm_packus_epi16(a, zero);
+
+			_mm_storel_epi64((__m128i *) &dst[(x / 2) * 4], a);
+		}
+
+		for (; x < srcw; x += 2) {
+			// Remainder in C
+			uint8_t i;
+			for (i = 0; i < 4; i++) {
+				dst[(x / 2) * 4 + i] =
+					(row0[x * 4 + i] +
+					row0[(x + 1) * 4 + i] +
+					row1[x * 4 + i] +
+					row1[(x + 1) * 4 + i]) / 4;
+			}
+		}
+	}
+}
+
+// Handles factors between 0.5 and 1.0
+void SSE2_scale(const uint8_t *oldpx,
+		const uint16_t tgtw, const uint16_t tgth,
+		uint8_t *newpx,
+		const unsigned oldstride, const unsigned newstride,
+		const float tgtdiff) {
+
+	uint16_t x, y;
+	const __m128i zero = _mm_setzero_si128();
+	const __m128i low = _mm_set_epi32(0, 0, 0xffffffff, 0xffffffff);
+	const __m128i high = _mm_set_epi32(0xffffffff, 0xffffffff, 0, 0);
+	const float invdiff = 1 / tgtdiff;
+
+	for (y = 0; y < tgth; y++) {
+		const float ny = y * invdiff;
+		const uint16_t lowy = ny;
+		const uint16_t highy = lowy + 1;
+		const uint16_t bot = (ny - lowy) * 256;
+		const uint16_t top = 256 - bot;
+		const uint32_t * const row0 = (uint32_t *) (oldpx + oldstride * lowy * 4);
+		const uint32_t * const row1 = (uint32_t *) (oldpx + oldstride * highy * 4);
+		const uint8_t * const brow0 = (uint8_t *) row0;
+		const uint8_t * const brow1 = (uint8_t *) row1;
+
+		uint8_t * const dst = newpx + newstride * y * 4;
+
+		const __m128i vertmul = _mm_set1_epi16(top);
+		const __m128i vertmul2 = _mm_set1_epi16(bot);
+
+		for (x = 0; x < tgtw; x += 2) {
+			const float nx[2] = {
+				x * invdiff,
+				(x + 1) * invdiff,
+			};
+			const uint16_t lowx[2] =  {
+				(uint16_t) nx[0],
+				(uint16_t) nx[1],
+			};
+			const uint16_t highx[2] = {
+				(uint16_t) (lowx[0] + 1),
+				(uint16_t) (lowx[1] + 1),
+			};
+			const uint16_t right[2] = {
+				(uint16_t) ((nx[0] - lowx[0]) * 256),
+				(uint16_t) ((nx[1] - lowx[1]) * 256),
+			};
+			const uint16_t left[2] = {
+				(uint16_t) (256 - right[0]),
+				(uint16_t) (256 - right[1]),
+			};
+
+			const __m128i horzmul = _mm_set_epi16(
+				right[0],
+				right[0],
+				right[0],
+				right[0],
+				left[0],
+				left[0],
+				left[0],
+				left[0]
+			);
+			const __m128i horzmul2 = _mm_set_epi16(
+				right[1],
+				right[1],
+				right[1],
+				right[1],
+				left[1],
+				left[1],
+				left[1],
+				left[1]
+			);
+
+			__m128i lo, hi, a, b, c, d;
+			lo = _mm_setr_epi32(row0[lowx[0]],
+						row0[highx[0]],
+						row0[lowx[1]],
+						row0[highx[1]]);
+			hi = _mm_setr_epi32(row1[lowx[0]],
+						row1[highx[0]],
+						row1[lowx[1]],
+						row1[highx[1]]);
+
+			a = _mm_unpacklo_epi8(lo, zero);
+			b = _mm_unpackhi_epi8(lo, zero);
+			c = _mm_unpacklo_epi8(hi, zero);
+			d = _mm_unpackhi_epi8(hi, zero);
+
+			a = _mm_mullo_epi16(a, vertmul);
+			b = _mm_mullo_epi16(b, vertmul);
+			c = _mm_mullo_epi16(c, vertmul2);
+			d = _mm_mullo_epi16(d, vertmul2);
+
+			a = _mm_add_epi16(a, c);
+			a = _mm_srli_epi16(a, 8);
+			b = _mm_add_epi16(b, d);
+			b = _mm_srli_epi16(b, 8);
+
+			a = _mm_mullo_epi16(a, horzmul);
+			b = _mm_mullo_epi16(b, horzmul2);
+
+			lo = _mm_srli_si128(a, 8);
+			a = _mm_and_si128(a, low);
+			a = _mm_add_epi16(a, lo);
+
+			hi = _mm_slli_si128(b, 8);
+			b = _mm_and_si128(b, high);
+			b = _mm_add_epi16(b, hi);
+
+			a = _mm_add_epi16(a, b);
+			a = _mm_srli_epi16(a, 8);
+
+			a = _mm_packus_epi16(a, zero);
+
+			_mm_storel_epi64((__m128i *) &dst[x * 4], a);
+		}
+
+		for (; x < tgtw; x++) {
+			// Remainder in C
+			const float nx = x * invdiff;
+			const uint16_t lowx = nx;
+			const uint16_t highx = lowx + 1;
+			const uint16_t right = (nx - lowx) * 256;
+			const uint16_t left = 256 - right;
+
+			uint8_t i;
+			uint32_t val, val2;
+			for (i = 0; i < 4; i++) {
+				val = brow0[lowx * 4 + i] * left;
+				val += brow0[highx * 4 + i] * right;
+				val >>= 8;
+
+				val2 = brow1[lowx * 4 + i] * left;
+				val2 += brow1[highx * 4 + i] * right;
+				val2 >>= 8;
+
+				dst[x * 4 + i] =
+					(val * top + val2 * bot) >> 8;
+			}
+		}
+	}
+}
+
+}; // namespace rfb
diff --git a/common/rfb/scale_sse2.h b/common/rfb/scale_sse2.h
new file mode 100644
index 0000000..e60357d
--- /dev/null
+++ b/common/rfb/scale_sse2.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2021 Kasm Web
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this software; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+ * USA.
+ */
+
+#ifndef __RFB_SCALE_SSE2_H__
+#define __RFB_SCALE_SSE2_H__
+
+#include <stdint.h>
+
+namespace rfb {
+
+	void SSE2_halve(const uint8_t *oldpx,
+			const uint16_t tgtw, const uint16_t tgth,
+			uint8_t *newpx,
+			const unsigned oldstride, const unsigned newstride);
+
+	void SSE2_scale(const uint8_t *oldpx,
+			const uint16_t tgtw, const uint16_t tgth,
+			uint8_t *newpx,
+			const unsigned oldstride, const unsigned newstride,
+			const float tgtdiff);
+};
+
+#endif