10 #include "emmintrin.h"
13 #include "tmmintrin.h"
20 template <
typename CLASS,
typename TAG>
struct IsTagged
21 : std::is_base_of<TAG, CLASS> {};
224 template <
typename Pixel,
unsigned w1 = 1,
unsigned w2 = 1>
class BlendLines
229 Pixel* out,
unsigned width);
236 template<
typename Pixel>
242 Pixel* out,
unsigned outWidth)
const;
261 Pixel* out,
unsigned width);
263 Pixel* out,
unsigned width);
281 template<
typename Pixel>
300 virtual bool isCopy()
const = 0;
309 template<
typename Pixel,
typename Scaler>
323 scaler(in, out, outWidth);
336 template<
typename Pixel,
typename Scaler>
346 scaler(in, out, outWidth);
359 template <
typename Pixel,
unsigned N>
360 static inline void scale_1onN(
361 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width)
363 unsigned i = 0, j = 0;
364 for (; i < (width - (N - 1)); i += N, j += 1) {
366 for (
unsigned k = 0; k < N; ++k) {
370 for (
unsigned k = 0; k < (N - 1); ++k) {
371 if ((i + k) < width) out[i + k] = 0;
375 template <
typename Pixel>
378 scale_1onN<Pixel, 3>(in, out, width);
381 template <
typename Pixel>
384 scale_1onN<Pixel, 4>(in, out, width);
387 template <
typename Pixel>
390 scale_1onN<Pixel, 6>(in, out, width);
394 template<
typename Pixel>
static inline __m128i unpacklo(__m128i x, __m128i y)
396 if (
sizeof(
Pixel) == 4) {
397 return _mm_unpacklo_epi32(x, y);
398 }
else if (
sizeof(
Pixel) == 2) {
399 return _mm_unpacklo_epi16(x, y);
404 template<
typename Pixel>
static inline __m128i unpackhi(__m128i x, __m128i y)
406 if (
sizeof(
Pixel) == 4) {
407 return _mm_unpackhi_epi32(x, y);
408 }
else if (
sizeof(
Pixel) == 2) {
409 return _mm_unpackhi_epi16(x, y);
415 template<
typename Pixel>
416 static inline void scale_1on2_SSE(
const Pixel* in_,
Pixel* out_,
size_t srcWidth)
418 assert((reinterpret_cast<size_t>(in_ ) %
sizeof(__m128i)) == 0);
419 assert((reinterpret_cast<size_t>(out_) %
sizeof(__m128i)) == 0);
421 size_t bytes = srcWidth *
sizeof(
Pixel);
422 assert((bytes % (4 *
sizeof(__m128i))) == 0);
425 auto* in =
reinterpret_cast<const char*
>(in_) + bytes;
426 auto* out =
reinterpret_cast< char*
>(out_) + 2 * bytes;
428 ptrdiff_t x = -bytes;
430 __m128i a0 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 0));
431 __m128i a1 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 16));
432 __m128i a2 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 32));
433 __m128i a3 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + x + 48));
434 __m128i l0 = unpacklo<Pixel>(a0, a0);
435 __m128i h0 = unpackhi<Pixel>(a0, a0);
436 __m128i l1 = unpacklo<Pixel>(a1, a1);
437 __m128i h1 = unpackhi<Pixel>(a1, a1);
438 __m128i l2 = unpacklo<Pixel>(a2, a2);
439 __m128i h2 = unpackhi<Pixel>(a2, a2);
440 __m128i l3 = unpacklo<Pixel>(a3, a3);
441 __m128i h3 = unpackhi<Pixel>(a3, a3);
442 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 0), l0);
443 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 16), h0);
444 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 32), l1);
445 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 48), h1);
446 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 64), l2);
447 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 80), h2);
448 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 96), l3);
449 _mm_store_si128(reinterpret_cast<__m128i*>(out + 2*x + 112), h3);
450 x += 4 *
sizeof(__m128i);
455 template <
typename Pixel>
457 const Pixel* __restrict in,
Pixel* __restrict out,
size_t dstWidth) __restrict
471 size_t srcWidth = dstWidth / 2;
474 size_t chunk = 4 *
sizeof(__m128i) /
sizeof(
Pixel);
475 size_t srcWidth2 = srcWidth & ~(chunk - 1);
476 scale_1on2_SSE(in, out, srcWidth2);
478 out += 2 * srcWidth2;
479 srcWidth -= srcWidth2;
484 for (
size_t x = 0; x < srcWidth; ++x) {
485 out[x * 2] = out[x * 2 + 1] = in[x];
493 static inline void memcpy_SSE_128(
494 const void* __restrict in_,
void* __restrict out_,
size_t size)
496 assert((reinterpret_cast<size_t>(in_ ) % 16) == 0);
497 assert((reinterpret_cast<size_t>(out_) % 16) == 0);
498 assert((size % 128) == 0);
501 auto* in =
reinterpret_cast<const __m128i*
>(in_);
502 auto* out =
reinterpret_cast< __m128i*
>(out_);
503 auto* end = in + (size /
sizeof(__m128i));
519 template <
typename Pixel>
521 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
523 size_t nBytes = width *
sizeof(
Pixel);
529 size_t n128 = nBytes & ~127;
530 memcpy_SSE_128(in, out, n128);
532 if (
likely(nBytes == 0))
return;
533 in += n128 /
sizeof(
Pixel);
534 out += n128 /
sizeof(
Pixel);
537 size_t n64 = nBytes & ~63;
538 assert((
size_t(in) & 3) == 0);
539 assert((
size_t(out) & 3) == 0);
540 assert((n64 % 64) == 0);
545 "ldmia %[IN]! ,{r3,r4,r5,r6,r8,r9,r10,r12};\n\t"
546 "stmia %[OUT]!,{r3,r4,r5,r6,r8,r9,r10,r12};\n\t"
547 "ldmia %[IN]! ,{r3,r4,r5,r6,r8,r9,r10,r12};\n\t"
548 "stmia %[OUT]!,{r3,r4,r5,r6,r8,r9,r10,r12};\n\t"
549 "subs %[NUM],%[NUM],#64;\n\t"
558 :
"r3",
"r4",
"r5",
"r6",
"r8",
"r9",
"r10",
"r12"
563 if (
likely(nBytes == 0))
return;
566 memcpy(out, in, nBytes);
570 template <
typename Pixel>
572 : pixelOps(pixelOps_)
577 template<
int IMM8>
static inline __m128i shuffle(__m128i x, __m128i y)
579 return _mm_castps_si128(_mm_shuffle_ps(
580 _mm_castsi128_ps(x), _mm_castsi128_ps(y), IMM8));
583 template<
typename Pixel>
584 static inline __m128i blend(__m128i x, __m128i y,
Pixel mask)
586 if (
sizeof(
Pixel) == 4) {
588 __m128i p = shuffle<0x88>(x, y);
589 __m128i q = shuffle<0xDD>(x, y);
590 return _mm_avg_epu8(p, q);
595 const __m128i LL = _mm_set_epi8(
596 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
597 0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00);
598 const __m128i HL = _mm_set_epi8(
599 0x0D, 0x0C, 0x09, 0x08, 0x05, 0x04, 0x01, 0x00,
600 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
601 const __m128i LH = _mm_set_epi8(
602 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
603 0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02);
604 const __m128i HH = _mm_set_epi8(
605 0x0F, 0x0E, 0x0B, 0x0A, 0x07, 0x06, 0x03, 0x02,
606 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
607 __m128i ll = _mm_shuffle_epi8(x, LL);
608 __m128i hl = _mm_shuffle_epi8(y, HL);
609 __m128i lh = _mm_shuffle_epi8(x, LH);
610 __m128i hh = _mm_shuffle_epi8(y, HH);
611 __m128i p = _mm_or_si128(ll, hl);
612 __m128i q = _mm_or_si128(lh, hh);
616 __m128i s = _mm_unpacklo_epi16(x, y);
617 __m128i t = _mm_unpackhi_epi16(x, y);
618 __m128i u = _mm_unpacklo_epi16(s, t);
619 __m128i v = _mm_unpackhi_epi16(s, t);
620 __m128i p = _mm_unpacklo_epi16(u, v);
621 __m128i q = _mm_unpackhi_epi16(u, v);
624 __m128i m = _mm_set1_epi16(mask);
625 __m128i a = _mm_and_si128(p, q);
626 __m128i b = _mm_xor_si128(p, q);
627 __m128i c = _mm_and_si128(b, m);
628 __m128i d = _mm_srli_epi16(c, 1);
629 return _mm_add_epi16(a, d);
633 template<
typename Pixel>
634 static inline void scale_2on1_SSE(
635 const Pixel* __restrict in_,
Pixel* __restrict out_,
size_t dstBytes,
638 assert((reinterpret_cast<size_t>(in_ ) %
sizeof(__m128i)) == 0);
639 assert((reinterpret_cast<size_t>(out_) %
sizeof(__m128i)) == 0);
640 assert((dstBytes % (4 *
sizeof(__m128i))) == 0);
641 assert(dstBytes != 0);
643 auto* in =
reinterpret_cast<const char*
>(in_) + 2 * dstBytes;
644 auto* out =
reinterpret_cast< char*
>(out_) + dstBytes;
646 ptrdiff_t x = -dstBytes;
648 __m128i a0 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 0));
649 __m128i a1 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 16));
650 __m128i a2 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 32));
651 __m128i a3 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 48));
652 __m128i a4 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 64));
653 __m128i a5 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 80));
654 __m128i a6 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 96));
655 __m128i a7 = _mm_load_si128(reinterpret_cast<const __m128i*>(in + 2*x + 112));
656 __m128i b0 = blend(a0, a1, mask);
657 __m128i b1 = blend(a2, a3, mask);
658 __m128i b2 = blend(a4, a5, mask);
659 __m128i b3 = blend(a6, a7, mask);
660 _mm_store_si128(reinterpret_cast<__m128i*>(out + x + 0), b0);
661 _mm_store_si128(reinterpret_cast<__m128i*>(out + x + 16), b1);
662 _mm_store_si128(reinterpret_cast<__m128i*>(out + x + 32), b2);
663 _mm_store_si128(reinterpret_cast<__m128i*>(out + x + 48), b3);
664 x += 4 *
sizeof(__m128i);
669 template <
typename Pixel>
671 const Pixel* __restrict in,
Pixel* __restrict out,
size_t dstWidth) __restrict
674 size_t n64 = (dstWidth *
sizeof(
Pixel)) & ~63;
675 Pixel mask = pixelOps.getBlendMask();
676 scale_2on1_SSE(in, out, n64, mask);
677 dstWidth &= ((64 /
sizeof(
Pixel)) - 1);
678 if (
likely(dstWidth == 0))
return;
679 in += (2 * n64) /
sizeof(
Pixel);
680 out += n64 /
sizeof(
Pixel);
684 for (
size_t i = 0; i < dstWidth; ++i) {
685 out[i] = pixelOps.template blend<1, 1>(
686 in[2 * i + 0], in[2 * i + 1]);
691 template <
typename Pixel>
693 : pixelOps(pixelOps_)
697 template <
typename Pixel>
699 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
701 for (
unsigned i = 0; i < width; ++i) {
702 out[i] = pixelOps.template blend6<1, 1, 1, 1, 1, 1>(&in[6 * i]);
707 template <
typename Pixel>
709 : pixelOps(pixelOps_)
713 template <
typename Pixel>
715 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
717 for (
unsigned i = 0; i < width; ++i) {
718 out[i] = pixelOps.template blend4<1, 1, 1, 1>(&in[4 * i]);
723 template <
typename Pixel>
725 : pixelOps(pixelOps_)
729 template <
typename Pixel>
731 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
733 for (
unsigned i = 0; i < width; ++i) {
734 out[i] = pixelOps.template blend3<1, 1, 1>(&in[3 * i]);
739 template <
typename Pixel>
741 : pixelOps(pixelOps_)
745 template <
typename Pixel>
747 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
749 unsigned i = 0, j = 0;
750 for (; i < (width - 1); i += 2, j += 3) {
751 out[i + 0] = pixelOps.template blend2<2, 1>(&in[j + 0]);
752 out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 1]);
754 if (i < width) out[i] = 0;
758 template <
typename Pixel>
760 : pixelOps(pixelOps_)
764 template <
typename Pixel>
766 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
768 unsigned i = 0, j = 0;
769 for (; i < (width - 3); i += 4, j += 3) {
770 out[i + 0] = in[j + 0];
771 out[i + 1] = pixelOps.template blend2<1, 2>(&in[j + 0]);
772 out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 1]);
773 out[i + 3] = in[j + 2];
775 for (
unsigned k = 0; k < (4 - 1); ++k) {
776 if ((i + k) < width) out[i + k] = 0;
781 template <
typename Pixel>
783 : pixelOps(pixelOps_)
787 template <
typename Pixel>
789 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
791 unsigned i = 0, j = 0;
792 for (; i < (width - 7); i += 8, j += 3) {
793 out[i + 0] = in[j + 0];
794 out[i + 1] = in[j + 0];
795 out[i + 2] = pixelOps.template blend2<2, 1>(&in[j + 0]);
796 out[i + 3] = in[j + 1];
797 out[i + 4] = in[j + 1];
798 out[i + 5] = pixelOps.template blend2<1, 2>(&in[j + 1]);
799 out[i + 6] = in[j + 2];
800 out[i + 7] = in[j + 2];
802 for (
unsigned k = 0; k < (8 - 1); ++k) {
803 if ((i + k) < width) out[i + k] = 0;
808 template <
typename Pixel>
810 : pixelOps(pixelOps_)
814 template <
typename Pixel>
816 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
818 unsigned i = 0, j = 0;
819 for (; i < (width - 2); i += 3, j += 2) {
820 out[i + 0] = in[j + 0];
821 out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 0]);
822 out[i + 2] = in[j + 1];
824 if ((i + 0) < width) out[i + 0] = 0;
825 if ((i + 1) < width) out[i + 1] = 0;
829 template <
typename Pixel>
831 : pixelOps(pixelOps_)
835 template <
typename Pixel>
837 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
839 unsigned i = 0, j = 0;
840 for (; i < (width - 2); i += 3, j += 4) {
841 out[i + 0] = pixelOps.template blend2<3, 1>(&in[j + 0]);
842 out[i + 1] = pixelOps.template blend2<1, 1>(&in[j + 1]);
843 out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 2]);
845 if ((i + 0) < width) out[i + 0] = 0;
846 if ((i + 1) < width) out[i + 1] = 0;
850 template <
typename Pixel>
852 : pixelOps(pixelOps_)
856 template <
typename Pixel>
858 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
860 unsigned i = 0, j = 0;
861 for (; i < (width - 2); i += 3, j += 8) {
862 out[i + 0] = pixelOps.template blend3<3, 3, 2> (&in[j + 0]);
863 out[i + 1] = pixelOps.template blend4<1, 3, 3, 1>(&in[j + 2]);
864 out[i + 2] = pixelOps.template blend3<2, 3, 3> (&in[j + 5]);
866 if ((i + 0) < width) out[i + 0] = 0;
867 if ((i + 1) < width) out[i + 1] = 0;
871 template <
typename Pixel>
873 : pixelOps(pixelOps_)
877 template <
typename Pixel>
879 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
881 unsigned i = 0, j = 0;
882 for (; i < (width - 8); i += 9, j += 2) {
883 out[i + 0] = in[j + 0];
884 out[i + 1] = in[j + 0];
885 out[i + 2] = in[j + 0];
886 out[i + 3] = in[j + 0];
887 out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 0]);
888 out[i + 5] = in[j + 1];
889 out[i + 6] = in[j + 1];
890 out[i + 7] = in[j + 1];
891 out[i + 8] = in[j + 1];
893 if ((i + 0) < width) out[i + 0] = 0;
894 if ((i + 1) < width) out[i + 1] = 0;
895 if ((i + 2) < width) out[i + 2] = 0;
896 if ((i + 3) < width) out[i + 3] = 0;
897 if ((i + 4) < width) out[i + 4] = 0;
898 if ((i + 5) < width) out[i + 5] = 0;
899 if ((i + 6) < width) out[i + 6] = 0;
900 if ((i + 7) < width) out[i + 7] = 0;
904 template <
typename Pixel>
906 : pixelOps(pixelOps_)
910 template <
typename Pixel>
912 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
914 unsigned i = 0, j = 0;
915 for (; i < (width - 8); i += 9, j += 4) {
916 out[i + 0] = in[j + 0];
917 out[i + 1] = in[j + 0];
918 out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 0]);
919 out[i + 3] = in[j + 1];
920 out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 1]);
921 out[i + 5] = in[j + 2];
922 out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 2]);
923 out[i + 7] = in[j + 3];
924 out[i + 8] = in[j + 3];
926 if ((i + 0) < width) out[i + 0] = 0;
927 if ((i + 1) < width) out[i + 1] = 0;
928 if ((i + 2) < width) out[i + 2] = 0;
929 if ((i + 3) < width) out[i + 3] = 0;
930 if ((i + 4) < width) out[i + 4] = 0;
931 if ((i + 5) < width) out[i + 5] = 0;
932 if ((i + 6) < width) out[i + 6] = 0;
933 if ((i + 7) < width) out[i + 7] = 0;
937 template <
typename Pixel>
939 : pixelOps(pixelOps_)
943 template <
typename Pixel>
945 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
947 unsigned i = 0, j = 0;
948 for (; i < width; i += 9, j += 8) {
949 out[i + 0] = in[j + 0];
950 out[i + 1] = pixelOps.template blend2<1, 7>(&in[j + 0]);
951 out[i + 2] = pixelOps.template blend2<1, 3>(&in[j + 1]);
952 out[i + 3] = pixelOps.template blend2<3, 5>(&in[j + 2]);
953 out[i + 4] = pixelOps.template blend2<1, 1>(&in[j + 3]);
954 out[i + 5] = pixelOps.template blend2<5, 3>(&in[j + 4]);
955 out[i + 6] = pixelOps.template blend2<3, 1>(&in[j + 5]);
956 out[i + 7] = pixelOps.template blend2<7, 1>(&in[j + 6]);
957 out[i + 8] = in[j + 7];
959 if ((i + 0) < width) out[i + 0] = 0;
960 if ((i + 1) < width) out[i + 1] = 0;
961 if ((i + 2) < width) out[i + 2] = 0;
962 if ((i + 3) < width) out[i + 3] = 0;
963 if ((i + 4) < width) out[i + 4] = 0;
964 if ((i + 5) < width) out[i + 5] = 0;
965 if ((i + 6) < width) out[i + 6] = 0;
966 if ((i + 7) < width) out[i + 7] = 0;
970 template <
typename Pixel>
972 : pixelOps(pixelOps_)
976 template <
typename Pixel>
978 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
980 assert((width % 5) == 0);
981 for (
unsigned i = 0, j = 0; i < width; i += 5, j += 4) {
982 out[i + 0] = in[j + 0];
983 out[i + 1] = pixelOps.template blend2<1, 3>(&in[j + 0]);
984 out[i + 2] = pixelOps.template blend2<1, 1>(&in[j + 1]);
985 out[i + 3] = pixelOps.template blend2<3, 1>(&in[j + 2]);
986 out[i + 4] = in[j + 3];
991 template <
typename Pixel>
993 : pixelOps(pixelOps_)
997 template <
typename Pixel>
999 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
1001 assert((width % 8) == 0);
1002 for (
unsigned i = 0, j = 0; i < width; i += 8, j += 7) {
1003 out[i + 0] = in[j + 0];
1004 out[i + 1] = pixelOps.template blend2<1, 6>(&in[j + 0]);
1005 out[i + 2] = pixelOps.template blend2<2, 5>(&in[j + 1]);
1006 out[i + 3] = pixelOps.template blend2<3, 4>(&in[j + 2]);
1007 out[i + 4] = pixelOps.template blend2<4, 3>(&in[j + 3]);
1008 out[i + 5] = pixelOps.template blend2<5, 2>(&in[j + 4]);
1009 out[i + 6] = pixelOps.template blend2<6, 1>(&in[j + 5]);
1010 out[i + 7] = in[j + 6];
1015 template <
typename Pixel>
1017 : pixelOps(pixelOps_)
1021 template <
typename Pixel>
1023 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
1025 assert((width % 20) == 0);
1026 for (
unsigned i = 0, j = 0; i < width; i += 20, j += 17) {
1027 out[i + 0] = in[j + 0];
1028 out[i + 1] = pixelOps.template blend2< 3, 14>(&in[j + 0]);
1029 out[i + 2] = pixelOps.template blend2< 6, 11>(&in[j + 1]);
1030 out[i + 3] = pixelOps.template blend2< 9, 8>(&in[j + 2]);
1031 out[i + 4] = pixelOps.template blend2<12, 5>(&in[j + 3]);
1032 out[i + 5] = pixelOps.template blend2<15, 2>(&in[j + 4]);
1033 out[i + 6] = in[j + 5];
1034 out[i + 7] = pixelOps.template blend2< 1, 16>(&in[j + 5]);
1035 out[i + 8] = pixelOps.template blend2< 4, 13>(&in[j + 6]);
1036 out[i + 9] = pixelOps.template blend2< 7, 10>(&in[j + 7]);
1037 out[i + 10] = pixelOps.template blend2<10, 7>(&in[j + 8]);
1038 out[i + 11] = pixelOps.template blend2<13, 4>(&in[j + 9]);
1039 out[i + 12] = pixelOps.template blend2<16, 1>(&in[j + 10]);
1040 out[i + 13] = in[j + 11];
1041 out[i + 14] = pixelOps.template blend2< 2, 15>(&in[j + 11]);
1042 out[i + 15] = pixelOps.template blend2< 5, 12>(&in[j + 12]);
1043 out[i + 16] = pixelOps.template blend2< 8, 9>(&in[j + 13]);
1044 out[i + 17] = pixelOps.template blend2<11, 6>(&in[j + 14]);
1045 out[i + 18] = pixelOps.template blend2<14, 3>(&in[j + 15]);
1046 out[i + 19] = in[j + 16];
1051 template <
typename Pixel>
1053 : pixelOps(pixelOps_)
1057 template <
typename Pixel>
1059 const Pixel* __restrict in,
Pixel* __restrict out,
size_t width) __restrict
1061 assert((width % 10) == 0);
1062 for (
unsigned i = 0, j = 0; i < width; i += 10, j += 9) {
1063 out[i + 0] = in[j + 0];
1064 out[i + 1] = pixelOps.template blend2<1, 8>(&in[j + 0]);
1065 out[i + 2] = pixelOps.template blend2<2, 7>(&in[j + 1]);
1066 out[i + 3] = pixelOps.template blend2<3, 6>(&in[j + 2]);
1067 out[i + 4] = pixelOps.template blend2<4, 5>(&in[j + 3]);
1068 out[i + 5] = pixelOps.template blend2<5, 4>(&in[j + 4]);
1069 out[i + 6] = pixelOps.template blend2<6, 3>(&in[j + 5]);
1070 out[i + 7] = pixelOps.template blend2<7, 2>(&in[j + 6]);
1071 out[i + 8] = pixelOps.template blend2<8, 1>(&in[j + 7]);
1072 out[i + 9] = in[j + 8];
1077 template <
typename Pixel,
unsigned w1,
unsigned w2>
1079 : pixelOps(pixelOps_)
1083 template <
typename Pixel,
unsigned w1,
unsigned w2>
1090 for (
unsigned i = 0; i < width; ++i) {
1091 out[i] = pixelOps.template blend<w1, w2>(in1[i], in2[i]);
1096 template<
typename Pixel>
1098 : pixelOps(pixelOps_)
1102 template<
typename Pixel>
1104 const Pixel* in,
unsigned inWidth,
1105 Pixel* out,
unsigned outWidth)
const
1107 static const unsigned FACTOR = 256;
1109 unsigned step = FACTOR * inWidth / outWidth;
1110 unsigned i = 0 * FACTOR;
1111 for (
unsigned o = 0; o < outWidth; ++o) {
1112 Pixel p0 = in[(i / FACTOR) + 0];
1113 Pixel p1 = in[(i / FACTOR) + 1];
1114 out[o] = pixelOps.lerp(p0, p1, i % FACTOR);
1120 template <
typename Pixel>
1122 : pixelOps(pixelOps_)
1126 template <
typename Pixel>
1131 for (
unsigned i = 0; i < width; ++i) {
1132 out[i] = pixelOps.alphaBlend(in1[i], in2[i]);
1136 template <
typename Pixel>
1144 assert(
sizeof(
Pixel) == 4);
1146 unsigned alpha = pixelOps.alpha(in1);
1154 Pixel in1M = pixelOps.multiply(in1, alpha);
1155 unsigned alpha2 = 256 - alpha;
1156 for (
unsigned i = 0; i < width; ++i) {
1157 out[i] = in1M + pixelOps.multiply(in2[i], alpha2);