openMSX
CharacterConverter.cc
Go to the documentation of this file.
1 /*
2 TODO:
3 - Clean up renderGraphics2, it is currently very hard to understand
4  with all the masks and quarters etc.
5 - Correctly implement vertical scroll in text modes.
6  Can be implemented by reordering blitting, but uses a smaller
7  wrap than GFX modes: 8 lines instead of 256 lines.
8 */
9 
10 #include "CharacterConverter.hh"
11 #include "VDP.hh"
12 #include "VDPVRAM.hh"
13 #include "build-info.hh"
14 #include "components.hh"
15 #include <cstdint>
16 
17 #ifdef __SSE2__
18 #include "emmintrin.h" // SSE2
19 #endif
20 
21 namespace openmsx {
22 
23 template <class Pixel>
25  VDP& vdp_, const Pixel* palFg_, const Pixel* palBg_)
26  : vdp(vdp_), vram(vdp.getVRAM()), palFg(palFg_), palBg(palBg_)
27 {
28  modeBase = 0; // not strictly needed, but avoids Coverity warning
29 }
30 
31 template <class Pixel>
33 {
34  modeBase = mode.getBase();
35  assert(modeBase < 0x0C);
36 }
37 
38 template <class Pixel>
40 {
41  // TODO: Support YJK on modes other than Graphic 6/7.
42  switch (modeBase) {
43  case DisplayMode::GRAPHIC1: // screen 1
44  renderGraphic1(linePtr, line);
45  break;
46  case DisplayMode::TEXT1: // screen 0, width 40
47  renderText1(linePtr, line);
48  break;
49  case DisplayMode::MULTICOLOR: // screen 3
50  renderMulti(linePtr, line);
51  break;
52  case DisplayMode::GRAPHIC2: // screen 2
53  renderGraphic2(linePtr, line);
54  break;
55  case DisplayMode::GRAPHIC3: // screen 4
56  renderGraphic2(linePtr, line); // graphic3, actually
57  break;
58  case DisplayMode::TEXT2: // screen 0, width 80
59  renderText2(linePtr, line);
60  break;
61  case DisplayMode::TEXT1Q: // TMSxxxx only
62  if (vdp.isMSX1VDP()) {
63  renderText1Q(linePtr, line);
64  } else {
65  renderBlank (linePtr);
66  }
67  break;
68  case DisplayMode::MULTIQ: // TMSxxxx only
69  if (vdp.isMSX1VDP()) {
70  renderMultiQ(linePtr, line);
71  } else {
72  renderBlank (linePtr);
73  }
74  break;
75  default: // remaining (non-bitmap) modes
76  if (vdp.isMSX1VDP()) {
77  renderBogus(linePtr);
78  } else {
79  renderBlank(linePtr);
80  }
81  }
82 }
83 
84 #ifdef __SSE2__
85 // Copied from Scale2xScaler.cc, TODO move to common location?
86 static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
87 {
88  return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
89 }
90 #endif
91 
92 template<typename Pixel> static inline void draw6(
93  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
94 {
95  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
96  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
97  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
98  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
99  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
100  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
101  pixelPtr += 6;
102 }
103 
104 template<typename Pixel> static inline void draw8(
105  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern,
106  bool misAligned, uint32_t& partial)
107 {
108 #ifdef __arm__
109  // ARM version, 16bpp, (32-bit aligned/unaligned destination)
110  if (sizeof(Pixel) == 2) {
111  if (misAligned) {
112  asm volatile (
113  "mov r0,%[PART]\n\t"
114  "tst %[PAT],#128\n\t"
115  "ite eq\n\t"
116  "orreq r0,r0,%[BG], lsl #16\n\t"
117  "orrne r0,r0,%[FG], lsl #16\n\t"
118  "tst %[PAT],#64\n\t"
119  "ite eq\n\t"
120  "moveq r1,%[BG]\n\t"
121  "movne r1,%[FG]\n\t"
122  "tst %[PAT],#32\n\t"
123  "ite eq\n\t"
124  "orreq r1,r1,%[BG], lsl #16\n\t"
125  "orrne r1,r1,%[FG], lsl #16\n\t"
126  "tst %[PAT],#16\n\t"
127  "ite eq\n\t"
128  "moveq r2,%[BG]\n\t"
129  "movne r2,%[FG]\n\t"
130  "tst %[PAT],#8\n\t"
131  "ite eq\n\t"
132  "orreq r2,r2,%[BG], lsl #16\n\t"
133  "orrne r2,r2,%[FG], lsl #16\n\t"
134  "tst %[PAT],#4\n\t"
135  "ite eq\n\t"
136  "moveq r3,%[BG]\n\t"
137  "movne r3,%[FG]\n\t"
138  "tst %[PAT],#2\n\t"
139  "ite eq\n\t"
140  "orreq r3,r3,%[BG], lsl #16\n\t"
141  "orrne r3,r3,%[FG], lsl #16\n\t"
142  "tst %[PAT],#1\n\t"
143  "ite eq\n\t"
144  "moveq %[PART],%[BG]\n\t"
145  "movne %[PART],%[FG]\n\t"
146  "stmia %[OUT]!,{r0-r3}\n\t"
147  : [OUT] "=r" (pixelPtr)
148  , [PART] "=r" (partial)
149  : "[OUT]" (pixelPtr)
150  , "[PART]" (partial)
151  , [PAT] "r" (pattern)
152  , [FG] "r" (uint32_t(fg))
153  , [BG] "r" (uint32_t(bg))
154  : "r0","r1","r2","r3","memory"
155  );
156  } else {
157  asm volatile (
158  "tst %[PAT],#128\n\t"
159  "ite eq\n\t"
160  "moveq r0,%[BG]\n\t"
161  "movne r0,%[FG]\n\t"
162  "tst %[PAT],#64\n\t"
163  "ite eq\n\t"
164  "orreq r0,r0,%[BG], lsl #16\n\t"
165  "orrne r0,r0,%[FG], lsl #16\n\t"
166  "tst %[PAT],#32\n\t"
167  "ite eq\n\t"
168  "moveq r1,%[BG]\n\t"
169  "movne r1,%[FG]\n\t"
170  "tst %[PAT],#16\n\t"
171  "ite eq\n\t"
172  "orreq r1,r1,%[BG], lsl #16\n\t"
173  "orrne r1,r1,%[FG], lsl #16\n\t"
174  "tst %[PAT],#8\n\t"
175  "ite eq\n\t"
176  "moveq r2,%[BG]\n\t"
177  "movne r2,%[FG]\n\t"
178  "tst %[PAT],#4\n\t"
179  "ite eq\n\t"
180  "orreq r2,r2,%[BG], lsl #16\n\t"
181  "orrne r2,r2,%[FG], lsl #16\n\t"
182  "tst %[PAT],#2\n\t"
183  "ite eq\n\t"
184  "moveq r3,%[BG]\n\t"
185  "movne r3,%[FG]\n\t"
186  "tst %[PAT],#1\n\t"
187  "ite eq\n\t"
188  "orreq r3,r3,%[BG], lsl #16\n\t"
189  "orrne r3,r3,%[FG], lsl #16\n\t"
190  "stmia %[OUT]!,{r0-r3}\n\t"
191 
192  : [OUT] "=r" (pixelPtr)
193  : "[OUT]" (pixelPtr)
194  , [PAT] "r" (pattern)
195  , [FG] "r" (uint32_t(fg))
196  , [BG] "r" (uint32_t(bg))
197  : "r0","r1","r2","r3","memory"
198  );
199  }
200  return;
201  }
202 #endif
203  (void)misAligned; (void)partial;
204 
205 #ifdef __SSE2__
206  // SSE2 version, 32bpp (16bpp is possible, but not worth it anymore)
207  if (sizeof(Pixel) == 4) {
208  const __m128i m74 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
209  const __m128i m30 = _mm_set_epi32(0x01, 0x02, 0x04, 0x08);
210  const __m128i zero = _mm_setzero_si128();
211 
212  __m128i fg4 = _mm_set1_epi32(fg);
213  __m128i bg4 = _mm_set1_epi32(bg);
214  __m128i pat = _mm_set1_epi32(pattern);
215 
216  __m128i b74 = _mm_cmpeq_epi32(_mm_and_si128(pat, m74), zero);
217  __m128i b30 = _mm_cmpeq_epi32(_mm_and_si128(pat, m30), zero);
218 
219  __m128i* out = reinterpret_cast<__m128i*>(pixelPtr);
220  _mm_storeu_si128(out + 0, select(fg4, bg4, b74));
221  _mm_storeu_si128(out + 1, select(fg4, bg4, b30));
222  pixelPtr += 8;
223  return;
224  }
225 #endif
226 
227  // C++ version
228  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
229  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
230  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
231  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
232  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
233  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
234  pixelPtr[6] = (pattern & 0x02) ? fg : bg;
235  pixelPtr[7] = (pattern & 0x01) ? fg : bg;
236  pixelPtr += 8;
237 }
238 
239 template <class Pixel>
240 void CharacterConverter<Pixel>::renderText1(
241  Pixel* __restrict pixelPtr, int line)
242 {
243  Pixel fg = palFg[vdp.getForegroundColor()];
244  Pixel bg = palFg[vdp.getBackgroundColor()];
245 
246  // 8 * 256 is small enough to always be contiguous
247  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
248  patternArea += (line + vdp.getVerticalScroll()) & 7;
249 
250  // Note: Because line width is not a power of two, reading an entire line
251  // from a VRAM pointer returned by readArea will not wrap the index
252  // correctly. Therefore we read one character at a time.
253  unsigned nameStart = (line / 8) * 40;
254  unsigned nameEnd = nameStart + 40;
255  for (unsigned name = nameStart; name < nameEnd; ++name) {
256  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
257  unsigned pattern = patternArea[charcode * 8];
258  draw6(pixelPtr, fg, bg, pattern);
259  }
260 }
261 
262 template <class Pixel>
263 void CharacterConverter<Pixel>::renderText1Q(
264  Pixel* __restrict pixelPtr, int line)
265 {
266  Pixel fg = palFg[vdp.getForegroundColor()];
267  Pixel bg = palFg[vdp.getBackgroundColor()];
268 
269  unsigned patternBaseLine = (~0u << 13) | ((line + vdp.getVerticalScroll()) & 7);
270 
271  // Note: Because line width is not a power of two, reading an entire line
272  // from a VRAM pointer returned by readArea will not wrap the index
273  // correctly. Therefore we read one character at a time.
274  unsigned nameStart = (line / 8) * 40;
275  unsigned nameEnd = nameStart + 40;
276  unsigned patternQuarter = (line & 0xC0) << 2;
277  for (unsigned name = nameStart; name < nameEnd; ++name) {
278  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
279  unsigned patternNr = patternQuarter | charcode;
280  unsigned pattern = vram.patternTable.readNP(
281  patternBaseLine | (patternNr * 8));
282  draw6(pixelPtr, fg, bg, pattern);
283  }
284 }
285 
286 template <class Pixel>
287 void CharacterConverter<Pixel>::renderText2(
288  Pixel* __restrict pixelPtr, int line)
289 {
290  Pixel plainFg = palFg[vdp.getForegroundColor()];
291  Pixel plainBg = palFg[vdp.getBackgroundColor()];
292  Pixel blinkFg, blinkBg;
293  if (vdp.getBlinkState()) {
294  int fg = vdp.getBlinkForegroundColor();
295  blinkFg = palBg[fg ? fg : vdp.getBlinkBackgroundColor()];
296  blinkBg = palBg[vdp.getBlinkBackgroundColor()];
297  } else {
298  blinkFg = plainFg;
299  blinkBg = plainBg;
300  }
301 
302  // 8 * 256 is small enough to always be contiguous
303  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
304  patternArea += (line + vdp.getVerticalScroll()) & 7;
305 
306  unsigned colorStart = (line / 8) * (80 / 8);
307  unsigned nameStart = (line / 8) * 80;
308  for (unsigned i = 0; i < (80 / 8); ++i) {
309  unsigned colorPattern = vram.colorTable.readNP(
310  (colorStart + i) | (~0u << 9));
311  const byte* nameArea = vram.nameTable.getReadArea(
312  (nameStart + 8 * i) | (~0u << 12), 8);
313  draw6(pixelPtr,
314  (colorPattern & 0x80) ? blinkFg : plainFg,
315  (colorPattern & 0x80) ? blinkBg : plainBg,
316  patternArea[nameArea[0] * 8]);
317  draw6(pixelPtr,
318  (colorPattern & 0x40) ? blinkFg : plainFg,
319  (colorPattern & 0x40) ? blinkBg : plainBg,
320  patternArea[nameArea[1] * 8]);
321  draw6(pixelPtr,
322  (colorPattern & 0x20) ? blinkFg : plainFg,
323  (colorPattern & 0x20) ? blinkBg : plainBg,
324  patternArea[nameArea[2] * 8]);
325  draw6(pixelPtr,
326  (colorPattern & 0x10) ? blinkFg : plainFg,
327  (colorPattern & 0x10) ? blinkBg : plainBg,
328  patternArea[nameArea[3] * 8]);
329  draw6(pixelPtr,
330  (colorPattern & 0x08) ? blinkFg : plainFg,
331  (colorPattern & 0x08) ? blinkBg : plainBg,
332  patternArea[nameArea[4] * 8]);
333  draw6(pixelPtr,
334  (colorPattern & 0x04) ? blinkFg : plainFg,
335  (colorPattern & 0x04) ? blinkBg : plainBg,
336  patternArea[nameArea[5] * 8]);
337  draw6(pixelPtr,
338  (colorPattern & 0x02) ? blinkFg : plainFg,
339  (colorPattern & 0x02) ? blinkBg : plainBg,
340  patternArea[nameArea[6] * 8]);
341  draw6(pixelPtr,
342  (colorPattern & 0x01) ? blinkFg : plainFg,
343  (colorPattern & 0x01) ? blinkBg : plainBg,
344  patternArea[nameArea[7] * 8]);
345  }
346 }
347 
348 template <class Pixel>
349 const byte* CharacterConverter<Pixel>::getNamePtr(int line, int scroll)
350 {
351  // no need to test whether multi-page scrolling is enabled,
352  // indexMask in the nameTable already takes care of it
353  return vram.nameTable.getReadArea(
354  ((line / 8) * 32) | ((scroll & 0x20) ? 0x8000 : 0), 32);
355 }
356 template <class Pixel>
357 void CharacterConverter<Pixel>::renderGraphic1(
358  Pixel* __restrict pixelPtr, int line)
359 {
360  bool misAligned = false; // initialize with dummy
361  uint32_t partial = 0; // values to avoid warning
362 #ifdef __arm__
363  misAligned = sizeof(Pixel) == 2 && (reinterpret_cast<uintptr_t>(pixelPtr) & 3);
364  if (misAligned) pixelPtr--;
365  partial = *pixelPtr;
366 #endif
367 
368  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
369  patternArea += line & 7;
370  const byte* colorArea = vram.colorTable.getReadArea(0, 256 / 8);
371 
372  int scroll = vdp.getHorizontalScrollHigh();
373  const byte* namePtr = getNamePtr(line, scroll);
374  for (unsigned n = 0; n < 32; ++n) {
375  unsigned charcode = namePtr[scroll & 0x1F];
376  unsigned pattern = patternArea[charcode * 8];
377  unsigned color = colorArea[charcode / 8];
378  Pixel fg = palFg[color >> 4];
379  Pixel bg = palFg[color & 0x0F];
380  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
381  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
382  }
383 
384 #ifdef __arm__
385  if (misAligned) *pixelPtr = static_cast<Pixel>(partial);
386 #endif
387 }
388 
389 template <class Pixel>
390 void CharacterConverter<Pixel>::renderGraphic2(
391  Pixel* __restrict pixelPtr, int line)
392 {
393  bool misAligned = false; // initialize with dummy
394  uint32_t partial = 0; // values to avoid warning
395 #ifdef __arm__
396  misAligned = sizeof(Pixel) == 2 && (reinterpret_cast<uintptr_t>(pixelPtr) & 3);
397  if (misAligned) pixelPtr--;
398  partial = *pixelPtr;
399 #endif
400 
401  int quarter8 = (((line / 8) * 32) & ~0xFF) * 8;
402  int line7 = line & 7;
403  int scroll = vdp.getHorizontalScrollHigh();
404  const byte* namePtr = getNamePtr(line, scroll);
405 
406  if (vram.colorTable .isContinuous((8 * 256) - 1) &&
407  vram.patternTable.isContinuous((8 * 256) - 1) &&
408  ((scroll & 0x1f) == 0)) {
409  // Both color and pattern table can be accessed contiguously
410  // (no mirroring) and there's no v9958 horizontal scrolling.
411  // This is very common, so make an optimized version for this.
412  const byte* patternArea = vram.patternTable.getReadArea(quarter8, 8 * 256) + line7;
413  const byte* colorArea = vram.colorTable .getReadArea(quarter8, 8 * 256) + line7;
414  for (unsigned n = 0; n < 32; ++n) {
415  unsigned charCode8 = namePtr[n] * 8;
416  unsigned pattern = patternArea[charCode8];
417  unsigned color = colorArea [charCode8];
418  Pixel fg = palFg[color >> 4];
419  Pixel bg = palFg[color & 0x0F];
420  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
421  }
422  } else {
423  // Slower variant, also works when:
424  // - there is mirroring in the color table
425  // - there is mirroring in the pattern table (TMS9929)
426  // - V9958 horizontal scroll feature is used
427  int baseLine = (~0u << 13) | quarter8 | line7;
428  for (unsigned n = 0; n < 32; ++n) {
429  unsigned charCode8 = namePtr[scroll & 0x1F] * 8;
430  unsigned index = charCode8 | baseLine;
431  unsigned pattern = vram.patternTable.readNP(index);
432  unsigned color = vram.colorTable .readNP(index);
433  Pixel fg = palFg[color >> 4];
434  Pixel bg = palFg[color & 0x0F];
435  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
436  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
437  }
438  }
439 
440 #ifdef __arm__
441  if (misAligned) *pixelPtr = static_cast<Pixel>(partial);
442 #endif
443 }
444 
445 template <class Pixel>
446 void CharacterConverter<Pixel>::renderMultiHelper(
447  Pixel* __restrict pixelPtr, int line,
448  int mask, int patternQuarter)
449 {
450  unsigned baseLine = mask | ((line / 4) & 7);
451  unsigned scroll = vdp.getHorizontalScrollHigh();
452  const byte* namePtr = getNamePtr(line, scroll);
453  for (unsigned n = 0; n < 32; ++n) {
454  unsigned patternNr = patternQuarter | namePtr[scroll & 0x1F];
455  unsigned color = vram.patternTable.readNP((patternNr * 8) | baseLine);
456  Pixel cl = palFg[color >> 4];
457  Pixel cr = palFg[color & 0x0F];
458  pixelPtr[0] = cl; pixelPtr[1] = cl;
459  pixelPtr[2] = cl; pixelPtr[3] = cl;
460  pixelPtr[4] = cr; pixelPtr[5] = cr;
461  pixelPtr[6] = cr; pixelPtr[7] = cr;
462  pixelPtr += 8;
463  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
464  }
465 }
466 template <class Pixel>
467 void CharacterConverter<Pixel>::renderMulti(
468  Pixel* __restrict pixelPtr, int line)
469 {
470  int mask = (~0u << 11);
471  renderMultiHelper(pixelPtr, line, mask, 0);
472 }
473 
474 template <class Pixel>
475 void CharacterConverter<Pixel>::renderMultiQ(
476  Pixel* __restrict pixelPtr, int line)
477 {
478  int mask = (~0u << 13);
479  int patternQuarter = (line * 4) & ~0xFF; // (line / 8) * 32
480  renderMultiHelper(pixelPtr, line, mask, patternQuarter);
481 }
482 
483 template <class Pixel>
484 void CharacterConverter<Pixel>::renderBogus(
485  Pixel* __restrict pixelPtr)
486 {
487  Pixel fg = palFg[vdp.getForegroundColor()];
488  Pixel bg = palFg[vdp.getBackgroundColor()];
489  for (int n = 8; n--; ) *pixelPtr++ = bg;
490  for (int c = 40; c--; ) {
491  for (int n = 4; n--; ) *pixelPtr++ = fg;
492  for (int n = 2; n--; ) *pixelPtr++ = bg;
493  }
494  for (int n = 8; n--; ) *pixelPtr++ = bg;
495 }
496 
497 template <class Pixel>
498 void CharacterConverter<Pixel>::renderBlank(
499  Pixel* __restrict pixelPtr)
500 {
501  // when this is in effect, the VRAM is not refreshed anymore, but that
502  // is not emulated
503  for (int n = 256; n--; ) *pixelPtr++ = palFg[15];
504 }
505 
506 // Force template instantiation.
507 #if HAVE_16BPP
508 template class CharacterConverter<uint16_t>;
509 #endif
510 #if HAVE_32BPP || COMPONENT_GL
511 template class CharacterConverter<uint32_t>;
512 #endif
513 
514 } // namespace openmsx
unsigned char byte
8 bit unsigned integer
Definition: openmsx.hh:27
Represents a VDP display mode.
Definition: DisplayMode.hh:14
unsigned Pixel
CharacterConverter(VDP &vdp, const Pixel *palFg, const Pixel *palBg)
Create a new bitmap scanline converter.
Thanks to enen for testing this on a real cartridge:
Definition: Autofire.cc:7
void convertLine(Pixel *linePtr, int line)
Convert a line of V9938 VRAM to 512 host pixels.
Unified implementation of MSX Video Display Processors (VDPs).
Definition: VDP.hh:67
byte getBase() const
Get the base dispay mode as an integer: M5..M1 combined.
Definition: DisplayMode.hh:123
void setDisplayMode(DisplayMode mode)
Select the display mode to use for scanline conversion.