22 static const unsigned NOISE_SHIFT = 8192;
23 static const unsigned NOISE_BUF_SIZE = 2 * NOISE_SHIFT;
24 ALIGNED(
static signed char noiseBuf[NOISE_BUF_SIZE], 16);
30 void __cdecl FBPostProcessor_drawNoiseLine_4_SSE2(
31 void* in,
void* out,
void* noise,
unsigned long width);
35 template <
class Pixel>
36 void FBPostProcessor<Pixel>::preCalcNoise(
double factor)
40 if (factor == 0)
return;
47 if (
sizeof(
Pixel) == 4) {
53 const Pixel p =
Pixel(OPENMSX_BIGENDIAN ? 0x00010203
58 scale[0] = scale[1] = scale[2] = scale[3] = 0.0;
59 scale[pixelOps.red (p)] = factor;
60 scale[pixelOps.green(p)] = factor;
61 scale[pixelOps.blue (p)] = factor;
64 scale[0] = (pixelOps.getMaxRed() / 255.0) * factor;
65 scale[1] = (pixelOps.getMaxGreen() / 255.0) * factor;
66 scale[2] = (pixelOps.getMaxBlue() / 255.0) * factor;
70 for (
unsigned i = 0; i < NOISE_BUF_SIZE; i += 8) {
73 noiseBuf[i + 0] =
Math::clip<-128, 127>(r1, scale[0]);
74 noiseBuf[i + 1] =
Math::clip<-128, 127>(r1, scale[1]);
75 noiseBuf[i + 2] =
Math::clip<-128, 127>(r1, scale[2]);
76 noiseBuf[i + 3] =
Math::clip<-128, 127>(r1, scale[3]);
77 noiseBuf[i + 4] =
Math::clip<-128, 127>(r2, scale[0]);
78 noiseBuf[i + 5] =
Math::clip<-128, 127>(r2, scale[1]);
79 noiseBuf[i + 6] =
Math::clip<-128, 127>(r2, scale[2]);
80 noiseBuf[i + 7] =
Math::clip<-128, 127>(r2, scale[3]);
89 static inline unsigned addNoise4(
unsigned p,
unsigned n)
97 unsigned ci = (p ^ n ^ s0) & 0x01010100;
111 unsigned t = (p ^ n) & (p ^ s) & 0x80808080;
117 unsigned u8 = (u1 << 1) - (u1 >> 7);
120 unsigned o8 = (o1 << 1) - (o1 >> 7);
123 return (s & (~u8)) | o8;
126 template <
class Pixel>
127 void FBPostProcessor<Pixel>::drawNoiseLine(
128 Pixel* in,
Pixel* out,
signed char* noise,
unsigned long width)
133 assert(((4 * width) % 64) == 0);
135 FBPostProcessor_drawNoiseLine_4_SSE2(in, out, noise, width);
141 "pcmpeqb %%xmm7, %%xmm7;"
143 "packsswb %%xmm7, %%xmm7;"
146 "movdqa (%[IN], %[CNT]), %%xmm0;"
147 "movdqa 16(%[IN], %[CNT]), %%xmm1;"
148 "movdqa 32(%[IN], %[CNT]), %%xmm2;"
149 "pxor %%xmm7, %%xmm0;"
150 "movdqa 48(%[IN], %[CNT]), %%xmm3;"
151 "pxor %%xmm7, %%xmm1;"
152 "pxor %%xmm7, %%xmm2;"
153 "paddsb (%[NOISE], %[CNT]), %%xmm0;"
154 "pxor %%xmm7, %%xmm3;"
155 "paddsb 16(%[NOISE], %[CNT]), %%xmm1;"
156 "paddsb 32(%[NOISE], %[CNT]), %%xmm2;"
157 "pxor %%xmm7, %%xmm0;"
158 "paddsb 48(%[NOISE], %[CNT]), %%xmm3;"
159 "pxor %%xmm7, %%xmm1;"
160 "pxor %%xmm7, %%xmm2;"
161 "movdqa %%xmm0, (%[OUT], %[CNT]);"
162 "pxor %%xmm7, %%xmm3;"
163 "movdqa %%xmm1, 16(%[OUT], %[CNT]);"
164 "movdqa %%xmm2, 32(%[OUT], %[CNT]);"
165 "movdqa %%xmm3, 48(%[OUT], %[CNT]);"
170 : [IN]
"r" (in + width)
171 , [OUT]
"r" (out + width)
172 , [NOISE]
"r" (noise + 4 * width)
173 ,
"[CNT]" (-4 * width)
176 ,
"xmm0",
"xmm1",
"xmm2",
"xmm3",
"xmm7"
183 assert(((4 * width) % 32) == 0);
186 "pcmpeqb %%mm7, %%mm7;"
188 "packsswb %%mm7, %%mm7;"
191 "prefetchnta 320(%[IN], %[CNT]);"
192 "movq (%[IN], %[CNT]), %%mm0;"
193 "movq 8(%[IN], %[CNT]), %%mm1;"
194 "movq 16(%[IN], %[CNT]), %%mm2;"
196 "movq 24(%[IN], %[CNT]), %%mm3;"
199 "paddsb (%[NOISE], %[CNT]), %%mm0;"
201 "paddsb 8(%[NOISE], %[CNT]), %%mm1;"
202 "paddsb 16(%[NOISE], %[CNT]), %%mm2;"
204 "paddsb 24(%[NOISE], %[CNT]), %%mm3;"
207 "movq %%mm0, (%[OUT], %[CNT]);"
209 "movq %%mm1, 8(%[OUT], %[CNT]);"
210 "movq %%mm2, 16(%[OUT], %[CNT]);"
211 "movq %%mm3, 24(%[OUT], %[CNT]);"
217 : [IN]
"r" (in + width)
218 , [OUT]
"r" (out + width)
219 , [NOISE]
"r" (noise + 4 * width)
220 ,
"[CNT]" (-4 * width)
223 ,
"mm0",
"mm1",
"mm2",
"mm3",
"mm7"
230 assert((4 * width % 32) == 0);
233 "pcmpeqb %%mm7, %%mm7;"
235 "packsswb %%mm7, %%mm7;"
238 "movq (%[IN], %[CNT]), %%mm0;"
239 "movq 8(%[IN], %[CNT]), %%mm1;"
240 "movq 16(%[IN], %[CNT]), %%mm2;"
242 "movq 24(%[IN], %[CNT]), %%mm3;"
245 "paddsb (%[NOISE], %[CNT]), %%mm0;"
247 "paddsb 8(%[NOISE], %[CNT]), %%mm1;"
248 "paddsb 16(%[NOISE], %[CNT]), %%mm2;"
250 "paddsb 24(%[NOISE], %[CNT]), %%mm3;"
253 "movq %%mm0, (%[OUT], %[CNT]);"
255 "movq %%mm1, 8(%[OUT], %[CNT]);"
256 "movq %%mm2, 16(%[OUT], %[CNT]);"
257 "movq %%mm3, 24(%[OUT], %[CNT]);"
263 : [IN]
"r" (in + width)
264 , [OUT]
"r" (out + width)
265 , [NOISE]
"r" (noise + 4 * width)
266 ,
"[CNT]" (-4 * width)
269 ,
"mm0",
"mm1",
"mm2",
"mm3",
"mm7"
278 if (
sizeof(
Pixel) == 4) {
280 auto noise4 =
reinterpret_cast<unsigned*
>(noise);
281 for (
unsigned i = 0; i < width; ++i) {
282 out[i] = addNoise4(in[i], noise4[i]);
285 int mr = pixelOps.getMaxRed();
286 int mg = pixelOps.getMaxGreen();
287 int mb = pixelOps.getMaxBlue();
288 for (
unsigned i = 0; i < width; ++i) {
290 int r = pixelOps.red(p);
291 int g = pixelOps.green(p);
292 int b = pixelOps.blue(p);
294 r += noise[4 * i + 0];
295 g += noise[4 * i + 1];
296 b += noise[4 * i + 2];
298 r = std::min(std::max(r, 0), mr);
299 g = std::min(std::max(g, 0), mg);
300 b = std::min(std::max(b, 0), mb);
302 out[i] = pixelOps.combine(r, g, b);
307 template <
class Pixel>
308 void FBPostProcessor<Pixel>::drawNoise(OutputSurface& output)
310 if (renderSettings.getNoise().getValue() == 0)
return;
312 unsigned height = output.getHeight();
313 unsigned width = output.getWidth();
315 for (
unsigned y = 0; y < height; ++y) {
316 Pixel* buf = output.getLinePtrDirect<
Pixel>(y);
317 drawNoiseLine(buf, buf, &noiseBuf[noiseShift[y]], width);
321 template <
class Pixel>
322 void FBPostProcessor<Pixel>::update(
const Setting& setting)
324 VideoLayer::update(setting);
325 FloatSetting& noiseSetting = renderSettings.getNoise();
326 if (&setting == &noiseSetting) {
327 preCalcNoise(noiseSetting.getValue());
332 template <
class Pixel>
335 unsigned maxWidth,
unsigned height,
bool canDoInterlace)
337 motherBoard, display, screen_, videoSource, maxWidth, height,
339 , noiseShift(screen.getHeight())
340 , pixelOps(screen.getSDLFormat())
343 scaleFactor = unsigned(-1);
346 noiseSetting.
attach(*
this);
347 preCalcNoise(noiseSetting.
getValue());
351 template <
class Pixel>
355 noiseSetting.
detach(*
this);
358 template <
class Pixel>
361 if (!paintFrame)
return;
365 renderSettings.getScaleAlgorithm().getValue();
366 unsigned factor = renderSettings.getScaleFactor().getValue();
367 if ((scaleAlgorithm != algo) || (scaleFactor != factor)) {
368 scaleAlgorithm = algo;
369 scaleFactor = factor;
376 const unsigned srcHeight = paintFrame->getHeight();
377 const unsigned dstHeight = output.
getHeight();
379 unsigned g =
Math::gcd(srcHeight, dstHeight);
380 unsigned srcStep = srcHeight / g;
381 unsigned dstStep = dstHeight / g;
385 unsigned srcStartY = 0;
386 unsigned dstStartY = 0;
387 while (dstStartY < dstHeight) {
390 assert(srcStartY < srcHeight);
393 unsigned lineWidth = getLineWidth(paintFrame, srcStartY, srcStep);
394 unsigned srcEndY = srcStartY + srcStep;
395 unsigned dstEndY = dstStartY + dstStep;
396 while ((srcEndY < srcHeight) && (dstEndY < dstHeight) &&
397 (getLineWidth(paintFrame, srcEndY, srcStep) == lineWidth)) {
406 double horStretch = renderSettings.getHorizontalStretch().getValue();
407 unsigned inWidth = unsigned(horStretch + 0.5);
408 std::unique_ptr<ScalerOutput<Pixel>> dst(
410 output, pixelOps, inWidth));
411 currScaler->scaleImage(
412 *paintFrame, superImposeVideoFrame,
413 srcStartY, srcEndY, lineWidth,
414 *dst, dstStartY, dstEndY);
415 paintFrame->freeLineBuffers();
427 template <
class Pixel>
432 for (
auto y :
xrange(screen.getHeight())) {
433 noiseShift[y] = rand() & (NOISE_SHIFT - 1) & ~15;