openMSX
HQCommon.hh
Go to the documentation of this file.
1 #ifndef HQCOMMON_HH
2 #define HQCOMMON_HH
3 
4 #include "FrameSource.hh"
5 #include "ScalerOutput.hh"
6 #include "LineScalers.hh"
7 #include "PixelOperations.hh"
8 #include "vla.hh"
9 #include "build-info.hh"
10 #include <algorithm>
11 #include <cassert>
12 #include <cstdint>
13 
14 namespace openmsx {
15 
16 template <typename Pixel>
17 static inline uint32_t readPixel(Pixel p)
18 {
19  // TODO: Use surface info instead.
20  if (sizeof(Pixel) == 2) {
21  return ((p & 0xF800) << 8) |
22  ((p & 0x07C0) << 5) | // drop lowest green bit
23  ((p & 0x001F) << 3);
24  } else {
25  return p & 0xF8F8F8F8;
26  }
27 }
28 
29 template <typename Pixel>
30 static inline Pixel writePixel(uint32_t p)
31 {
32  // TODO: Use surface info instead.
33  if (sizeof(Pixel) == 2) {
34  return ((p & 0xF80000) >> 8) |
35  ((p & 0x00FC00) >> 5) |
36  ((p & 0x0000F8) >> 3);
37  } else {
38  return (p & 0xF8F8F8F8) | ((p & 0xE0E0E0E0) >> 5);
39  }
40 }
41 
42 class EdgeHQ
43 {
44 public:
45  EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
46  : shiftR(shiftR_), shiftG(shiftG_), shiftB(shiftB_)
47  {
48  }
49 
50  inline bool operator()(uint32_t c1, uint32_t c2) const
51  {
52  if (c1 == c2) return false;
53 
54  unsigned r1 = (c1 >> shiftR) & 0xFF;
55  unsigned g1 = (c1 >> shiftG) & 0xFF;
56  unsigned b1 = (c1 >> shiftB) & 0xFF;
57 
58  unsigned r2 = (c2 >> shiftR) & 0xFF;
59  unsigned g2 = (c2 >> shiftG) & 0xFF;
60  unsigned b2 = (c2 >> shiftB) & 0xFF;
61 
62  int dr = r1 - r2;
63  int dg = g1 - g2;
64  int db = b1 - b2;
65 
66  int dy = dr + dg + db;
67  if (dy < -0xC0 || dy > 0xC0) return true;
68 
69  int du = dr - db;
70  if (du < -0x1C || du > 0x1C) return true;
71 
72  int dv = 3 * dg - dy;
73  if (dv < -0x30 || dv > 0x30) return true;
74 
75  return false;
76  }
77 private:
78  const unsigned shiftR;
79  const unsigned shiftG;
80  const unsigned shiftB;
81 };
82 
83 template<typename Pixel>
85 {
86  if (sizeof(Pixel) == 2) {
87  return EdgeHQ(0, 8, 16);
88  } else {
89  return EdgeHQ(pixelOps.getRshift(),
90  pixelOps.getGshift(),
91  pixelOps.getBshift());
92  }
93 }
94 
95 struct EdgeHQLite
96 {
97  inline bool operator()(uint32_t c1, uint32_t c2) const
98  {
99  return c1 != c2;
100  }
101 };
102 
103 template <typename EdgeOp>
104 void calcEdgesGL(const uint32_t* __restrict curr, const uint32_t* __restrict next,
105  uint32_t* __restrict edges2, EdgeOp edgeOp)
106 {
107  typedef uint32_t Pixel;
108  if (OPENMSX_BIGENDIAN) {
109  unsigned pattern = 0;
110  Pixel c5 = curr[0];
111  Pixel c8 = next[0];
112  if (edgeOp(c5, c8)) pattern |= 0x1400;
113 
114  for (unsigned xx = 0; xx < (320 - 2) / 2; ++xx) {
115  pattern = (pattern << (16 + 1)) & 0xA8000000;
116  pattern |= ((edges2[xx] >> 5) & 0x01F001F0);
117 
118  if (edgeOp(c5, c8)) pattern |= 0x02000000;
119  Pixel c6 = curr[2 * xx + 1];
120  if (edgeOp(c6, c8)) pattern |= 0x10002000;
121  if (edgeOp(c5, c6)) pattern |= 0x40008000;
122  Pixel c9 = next[2 * xx + 1];
123  if (edgeOp(c5, c9)) pattern |= 0x04000800;
124 
125  if (edgeOp(c6, c9)) pattern |= 0x0200;
126  c5 = curr[2 * xx + 2];
127  if (edgeOp(c5, c9)) pattern |= 0x1000;
128  if (edgeOp(c6, c5)) pattern |= 0x4000;
129  c8 = next[2 * xx + 2];
130  if (edgeOp(c6, c8)) pattern |= 0x0400;
131 
132  edges2[xx] = pattern;
133  }
134 
135  pattern = (pattern << (16 + 1)) & 0xA8000000;
136  pattern |= ((edges2[159] >> 5) & 0x01F001F0);
137 
138  if (edgeOp(c5, c8)) pattern |= 0x02000000;
139  Pixel c6 = curr[319];
140  if (edgeOp(c6, c8)) pattern |= 0x10002000;
141  if (edgeOp(c5, c6)) pattern |= 0x40008000;
142  Pixel c9 = next[319];
143  if (edgeOp(c5, c9)) pattern |= 0x04000800;
144 
145  if (edgeOp(c6, c9)) pattern |= 0x1600;
146 
147  edges2[159] = pattern;
148  } else {
149  unsigned pattern = 0;
150  Pixel c5 = curr[0];
151  Pixel c8 = next[0];
152  if (edgeOp(c5, c8)) pattern |= 0x14000000;
153 
154  for (unsigned xx = 0; xx < (320 - 2) / 2; ++xx) {
155  pattern = (pattern >> (16 -1)) & 0xA800;
156  pattern |= ((edges2[xx] >> 5) & 0x01F001F0);
157 
158  if (edgeOp(c5, c8)) pattern |= 0x0200;
159  Pixel c6 = curr[2 * xx + 1];
160  if (edgeOp(c6, c8)) pattern |= 0x20001000;
161  if (edgeOp(c5, c6)) pattern |= 0x80004000;
162  Pixel c9 = next[2 * xx + 1];
163  if (edgeOp(c5, c9)) pattern |= 0x08000400;
164 
165  if (edgeOp(c6, c9)) pattern |= 0x02000000;
166  c5 = curr[2 * xx + 2];
167  if (edgeOp(c5, c9)) pattern |= 0x10000000;
168  if (edgeOp(c6, c5)) pattern |= 0x40000000;
169  c8 = next[2 * xx + 2];
170  if (edgeOp(c6, c8)) pattern |= 0x04000000;
171 
172  edges2[xx] = pattern;
173  }
174 
175  pattern = (pattern >> (16 -1)) & 0xA800;
176  pattern |= ((edges2[159] >> 5) & 0x01F001F0);
177 
178  if (edgeOp(c5, c8)) pattern |= 0x0200;
179  Pixel c6 = curr[319];
180  if (edgeOp(c6, c8)) pattern |= 0x20001000;
181  if (edgeOp(c5, c6)) pattern |= 0x80004000;
182  Pixel c9 = next[319];
183  if (edgeOp(c5, c9)) pattern |= 0x08000400;
184 
185  if (edgeOp(c6, c9)) pattern |= 0x16000000;
186 
187  edges2[159] = pattern;
188  }
189 }
190 
191 template <typename Pixel, typename EdgeOp>
192 static void calcInitialEdges(
193  const Pixel* __restrict srcPrev, const Pixel* __restrict srcCurr,
194  unsigned srcWidth, unsigned* __restrict edgeBuf, EdgeOp edgeOp)
195 {
196  unsigned x = 0;
197  uint32_t c1 = readPixel(srcPrev[x]);
198  uint32_t c2 = readPixel(srcCurr[x]);
199  unsigned pattern = edgeOp(c1, c2) ? ((1 << 6) | (1 << 7)) : 0;
200  for (/* */; x < (srcWidth - 1); ++x) {
201  pattern >>= 6;
202  uint32_t n1 = readPixel(srcPrev[x + 1]);
203  uint32_t n2 = readPixel(srcCurr[x + 1]);
204  if (edgeOp(c1, c2)) pattern |= (1 << 5);
205  if (edgeOp(c1, n2)) pattern |= (1 << 6);
206  if (edgeOp(c2, n1)) pattern |= (1 << 7);
207  edgeBuf[x] = pattern;
208  c1 = n1; c2 = n2;
209  }
210  pattern >>= 6;
211  if (edgeOp(c1, c2)) pattern |= (1 << 5) | (1 << 6) | (1 << 7);
212  edgeBuf[x] = pattern;
213 }
214 
215 template <typename Pixel, typename HQScale, typename EdgeOp>
216 static void doHQScale2(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
217  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
218  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
219 {
220  VLA(unsigned, edgeBuf, srcWidth);
221  VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
222  VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
223  VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
224  VLA_SSE_ALIGNED(Pixel, bufA, 2 * srcWidth);
225  VLA_SSE_ALIGNED(Pixel, bufB, 2 * srcWidth);
226 
227  int srcY = srcStartY;
228  auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
229  auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
230 
231  calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
232 
233  bool isCopy = postScale.isCopy();
234  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 2) {
235  auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
236  auto* dst0 = dst.acquireLine(dstY + 0);
237  auto* dst1 = dst.acquireLine(dstY + 1);
238  if (isCopy) {
239  hqScale(srcPrev, srcCurr, srcNext, dst0, dst1,
240  srcWidth, edgeBuf, edgeOp);
241  } else {
242  hqScale(srcPrev, srcCurr, srcNext, bufA, bufB,
243  srcWidth, edgeBuf, edgeOp);
244  postScale(bufA, dst0, dstWidth);
245  postScale(bufB, dst1, dstWidth);
246  }
247  dst.releaseLine(dstY + 0, dst0);
248  dst.releaseLine(dstY + 1, dst1);
249  srcPrev = srcCurr;
250  srcCurr = srcNext;
251  std::swap(buf1, buf2);
252  std::swap(buf2, buf3);
253  }
254 }
255 
256 template <typename Pixel, typename HQScale, typename EdgeOp>
257 static void doHQScale3(HQScale hqScale, EdgeOp edgeOp, PolyLineScaler<Pixel>& postScale,
258  FrameSource& src, unsigned srcStartY, unsigned /*srcEndY*/, unsigned srcWidth,
259  ScalerOutput<Pixel>& dst, unsigned dstStartY, unsigned dstEndY, unsigned dstWidth)
260 {
261  VLA(unsigned, edgeBuf, srcWidth);
262  VLA_SSE_ALIGNED(Pixel, buf1_, srcWidth); auto* buf1 = buf1_;
263  VLA_SSE_ALIGNED(Pixel, buf2_, srcWidth); auto* buf2 = buf2_;
264  VLA_SSE_ALIGNED(Pixel, buf3_, srcWidth); auto* buf3 = buf3_;
265  VLA_SSE_ALIGNED(Pixel, bufA, 3 * srcWidth);
266  VLA_SSE_ALIGNED(Pixel, bufB, 3 * srcWidth);
267  VLA_SSE_ALIGNED(Pixel, bufC, 3 * srcWidth);
268 
269  int srcY = srcStartY;
270  auto* srcPrev = src.getLinePtr(srcY - 1, srcWidth, buf1);
271  auto* srcCurr = src.getLinePtr(srcY + 0, srcWidth, buf2);
272 
273  calcInitialEdges(srcPrev, srcCurr, srcWidth, edgeBuf, edgeOp);
274 
275  bool isCopy = postScale.isCopy();
276  for (unsigned dstY = dstStartY; dstY < dstEndY; srcY += 1, dstY += 3) {
277  auto* srcNext = src.getLinePtr(srcY + 1, srcWidth, buf3);
278  auto* dst0 = dst.acquireLine(dstY + 0);
279  auto* dst1 = dst.acquireLine(dstY + 1);
280  auto* dst2 = dst.acquireLine(dstY + 2);
281  if (isCopy) {
282  hqScale(srcPrev, srcCurr, srcNext, dst0, dst1, dst2,
283  srcWidth, edgeBuf, edgeOp);
284  } else {
285  hqScale(srcPrev, srcCurr, srcNext, bufA, bufB, bufC,
286  srcWidth, edgeBuf, edgeOp);
287  postScale(bufA, dst0, dstWidth);
288  postScale(bufB, dst1, dstWidth);
289  postScale(bufC, dst2, dstWidth);
290  }
291  dst.releaseLine(dstY + 0, dst0);
292  dst.releaseLine(dstY + 1, dst1);
293  dst.releaseLine(dstY + 2, dst2);
294  srcPrev = srcCurr;
295  srcCurr = srcNext;
296  std::swap(buf1, buf2);
297  std::swap(buf2, buf3);
298  }
299 }
300 
301 } // namespace openmsx
302 
303 #endif
EdgeHQ(unsigned shiftR_, unsigned shiftG_, unsigned shiftB_)
Definition: HQCommon.hh:45
void calcEdgesGL(const uint32_t *curr, const uint32_t *next, uint32_t *edges2, EdgeOp edgeOp)
Definition: HQCommon.hh:104
unsigned Pixel
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:97
EdgeHQ createEdgeHQ(const PixelOperations< Pixel > &pixelOps)
Definition: HQCommon.hh:84
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:7
bool operator()(uint32_t c1, uint32_t c2) const
Definition: HQCommon.hh:50
#define VLA(TYPE, NAME, LENGTH)
Definition: vla.hh:10
#define VLA_SSE_ALIGNED(TYPE, NAME, LENGTH)
Definition: vla.hh:44