openMSX
CharacterConverter.cc
Go to the documentation of this file.
1 /*
2 TODO:
3 - Clean up renderGraphics2, it is currently very hard to understand
4  with all the masks and quarters etc.
5 - Correctly implement vertical scroll in text modes.
6  Can be implemented by reordering blitting, but uses a smaller
7  wrap than GFX modes: 8 lines instead of 256 lines.
8 */
9 
10 #include "CharacterConverter.hh"
11 #include "VDP.hh"
12 #include "VDPVRAM.hh"
13 #include "build-info.hh"
14 #include "components.hh"
15 #include <cstdint>
16 
17 #ifdef __SSE2__
18 #include "emmintrin.h" // SSE2
19 #endif
20 
21 namespace openmsx {
22 
23 template <class Pixel>
25  VDP& vdp_, const Pixel* palFg_, const Pixel* palBg_)
26  : vdp(vdp_), vram(vdp.getVRAM()), palFg(palFg_), palBg(palBg_)
27 {
28  modeBase = 0; // not strictly needed, but avoids Coverity warning
29 }
30 
31 template <class Pixel>
33 {
34  modeBase = mode.getBase();
35  assert(modeBase < 0x0C);
36 }
37 
38 template <class Pixel>
40 {
41  // TODO: Support YJK on modes other than Graphic 6/7.
42  switch (modeBase) {
43  case DisplayMode::GRAPHIC1: // screen 1
44  renderGraphic1(linePtr, line);
45  break;
46  case DisplayMode::TEXT1: // screen 0, width 40
47  renderText1(linePtr, line);
48  break;
49  case DisplayMode::MULTICOLOR: // screen 3
50  renderMulti(linePtr, line);
51  break;
52  case DisplayMode::GRAPHIC2: // screen 2
53  renderGraphic2(linePtr, line);
54  break;
55  case DisplayMode::GRAPHIC3: // screen 4
56  renderGraphic2(linePtr, line); // graphic3, actually
57  break;
58  case DisplayMode::TEXT2: // screen 0, width 80
59  renderText2(linePtr, line);
60  break;
61  case DisplayMode::TEXT1Q: // TMSxxxx only
62  if (vdp.isMSX1VDP()) {
63  renderText1Q(linePtr, line);
64  } else {
65  renderBlank (linePtr);
66  }
67  break;
68  case DisplayMode::MULTIQ: // TMSxxxx only
69  if (vdp.isMSX1VDP()) {
70  renderMultiQ(linePtr, line);
71  } else {
72  renderBlank (linePtr);
73  }
74  break;
75  default: // remaining (non-bitmap) modes
76  if (vdp.isMSX1VDP()) {
77  renderBogus(linePtr);
78  } else {
79  renderBlank(linePtr);
80  }
81  }
82 }
83 
84 #ifdef __SSE2__
85 // Copied from Scale2xScaler.cc, TODO move to common location?
86 static inline __m128i select(__m128i a0, __m128i a1, __m128i mask)
87 {
88  return _mm_xor_si128(_mm_and_si128(_mm_xor_si128(a0, a1), mask), a0);
89 }
90 #endif
91 
92 template<typename Pixel> static inline void draw6(
93  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern)
94 {
95  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
96  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
97  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
98  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
99  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
100  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
101  pixelPtr += 6;
102 }
103 
104 template<typename Pixel> static inline void draw8(
105  Pixel* __restrict & pixelPtr, Pixel fg, Pixel bg, byte pattern,
106  bool misAligned, uint32_t& partial)
107 {
108 #ifdef __arm__
109  // ARM version, 16bpp, (32-bit aligned/unaligned destination)
110  if (sizeof(Pixel) == 2) {
111  if (misAligned) {
112  asm volatile (
113  "mov r0,%[PART]\n\t"
114  "tst %[PAT],#128\n\t"
115  "ite eq\n\t"
116  "orreq r0,r0,%[BG], lsl #16\n\t"
117  "orrne r0,r0,%[FG], lsl #16\n\t"
118  "tst %[PAT],#64\n\t"
119  "ite eq\n\t"
120  "moveq r1,%[BG]\n\t"
121  "movne r1,%[FG]\n\t"
122  "tst %[PAT],#32\n\t"
123  "ite eq\n\t"
124  "orreq r1,r1,%[BG], lsl #16\n\t"
125  "orrne r1,r1,%[FG], lsl #16\n\t"
126  "tst %[PAT],#16\n\t"
127  "ite eq\n\t"
128  "moveq r2,%[BG]\n\t"
129  "movne r2,%[FG]\n\t"
130  "tst %[PAT],#8\n\t"
131  "ite eq\n\t"
132  "orreq r2,r2,%[BG], lsl #16\n\t"
133  "orrne r2,r2,%[FG], lsl #16\n\t"
134  "tst %[PAT],#4\n\t"
135  "ite eq\n\t"
136  "moveq r3,%[BG]\n\t"
137  "movne r3,%[FG]\n\t"
138  "tst %[PAT],#2\n\t"
139  "ite eq\n\t"
140  "orreq r3,r3,%[BG], lsl #16\n\t"
141  "orrne r3,r3,%[FG], lsl #16\n\t"
142  "tst %[PAT],#1\n\t"
143  "ite eq\n\t"
144  "moveq %[PART],%[BG]\n\t"
145  "movne %[PART],%[FG]\n\t"
146  "stmia %[OUT]!,{r0-r3}\n\t"
147  : [OUT] "=r" (pixelPtr)
148  , [PART] "=r" (partial)
149  : "[OUT]" (pixelPtr)
150  , "[PART]" (partial)
151  , [PAT] "r" (pattern)
152  , [FG] "r" (uint32_t(fg))
153  , [BG] "r" (uint32_t(bg))
154  : "r0","r1","r2","r3","memory"
155  );
156  } else {
157  asm volatile (
158  "tst %[PAT],#128\n\t"
159  "ite eq\n\t"
160  "moveq r0,%[BG]\n\t"
161  "movne r0,%[FG]\n\t"
162  "tst %[PAT],#64\n\t"
163  "ite eq\n\t"
164  "orreq r0,r0,%[BG], lsl #16\n\t"
165  "orrne r0,r0,%[FG], lsl #16\n\t"
166  "tst %[PAT],#32\n\t"
167  "ite eq\n\t"
168  "moveq r1,%[BG]\n\t"
169  "movne r1,%[FG]\n\t"
170  "tst %[PAT],#16\n\t"
171  "ite eq\n\t"
172  "orreq r1,r1,%[BG], lsl #16\n\t"
173  "orrne r1,r1,%[FG], lsl #16\n\t"
174  "tst %[PAT],#8\n\t"
175  "ite eq\n\t"
176  "moveq r2,%[BG]\n\t"
177  "movne r2,%[FG]\n\t"
178  "tst %[PAT],#4\n\t"
179  "ite eq\n\t"
180  "orreq r2,r2,%[BG], lsl #16\n\t"
181  "orrne r2,r2,%[FG], lsl #16\n\t"
182  "tst %[PAT],#2\n\t"
183  "ite eq\n\t"
184  "moveq r3,%[BG]\n\t"
185  "movne r3,%[FG]\n\t"
186  "tst %[PAT],#1\n\t"
187  "ite eq\n\t"
188  "orreq r3,r3,%[BG], lsl #16\n\t"
189  "orrne r3,r3,%[FG], lsl #16\n\t"
190  "stmia %[OUT]!,{r0-r3}\n\t"
191 
192  : [OUT] "=r" (pixelPtr)
193  : "[OUT]" (pixelPtr)
194  , [PAT] "r" (pattern)
195  , [FG] "r" (uint32_t(fg))
196  , [BG] "r" (uint32_t(bg))
197  : "r0","r1","r2","r3","memory"
198  );
199  }
200  return;
201  }
202 #endif
203  (void)misAligned; (void)partial;
204 
205 #ifdef __SSE2__
206  // SSE2 version, 32bpp (16bpp is possible, but not worth it anymore)
207  if (sizeof(Pixel) == 4) {
208  const __m128i m74 = _mm_set_epi32(0x10, 0x20, 0x40, 0x80);
209  const __m128i m30 = _mm_set_epi32(0x01, 0x02, 0x04, 0x08);
210  const __m128i zero = _mm_setzero_si128();
211 
212  __m128i fg4 = _mm_set1_epi32(fg);
213  __m128i bg4 = _mm_set1_epi32(bg);
214  __m128i pat = _mm_set1_epi32(pattern);
215 
216  __m128i b74 = _mm_cmpeq_epi32(_mm_and_si128(pat, m74), zero);
217  __m128i b30 = _mm_cmpeq_epi32(_mm_and_si128(pat, m30), zero);
218 
219  __m128i* out = reinterpret_cast<__m128i*>(pixelPtr);
220  _mm_storeu_si128(out + 0, select(fg4, bg4, b74));
221  _mm_storeu_si128(out + 1, select(fg4, bg4, b30));
222  pixelPtr += 8;
223  return;
224  }
225 #endif
226 
227  // C++ version
228  pixelPtr[0] = (pattern & 0x80) ? fg : bg;
229  pixelPtr[1] = (pattern & 0x40) ? fg : bg;
230  pixelPtr[2] = (pattern & 0x20) ? fg : bg;
231  pixelPtr[3] = (pattern & 0x10) ? fg : bg;
232  pixelPtr[4] = (pattern & 0x08) ? fg : bg;
233  pixelPtr[5] = (pattern & 0x04) ? fg : bg;
234  pixelPtr[6] = (pattern & 0x02) ? fg : bg;
235  pixelPtr[7] = (pattern & 0x01) ? fg : bg;
236  pixelPtr += 8;
237 }
238 
239 template <class Pixel>
240 void CharacterConverter<Pixel>::renderText1(
241  Pixel* __restrict pixelPtr, int line)
242 {
243  Pixel fg = palFg[vdp.getForegroundColor()];
244  Pixel bg = palFg[vdp.getBackgroundColor()];
245 
246  // 8 * 256 is small enough to always be contiguous
247  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
248  patternArea += (line + vdp.getVerticalScroll()) & 7;
249 
250  // Note: Because line width is not a power of two, reading an entire line
251  // from a VRAM pointer returned by readArea will not wrap the index
252  // correctly. Therefore we read one character at a time.
253  unsigned nameStart = (line / 8) * 40;
254  unsigned nameEnd = nameStart + 40;
255  for (unsigned name = nameStart; name < nameEnd; ++name) {
256  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
257  unsigned pattern = patternArea[charcode * 8];
258  draw6(pixelPtr, fg, bg, pattern);
259  }
260 }
261 
262 template <class Pixel>
263 void CharacterConverter<Pixel>::renderText1Q(
264  Pixel* __restrict pixelPtr, int line)
265 {
266  Pixel fg = palFg[vdp.getForegroundColor()];
267  Pixel bg = palFg[vdp.getBackgroundColor()];
268 
269  unsigned patternBaseLine = (~0u << 13) | ((line + vdp.getVerticalScroll()) & 7);
270 
271  // Note: Because line width is not a power of two, reading an entire line
272  // from a VRAM pointer returned by readArea will not wrap the index
273  // correctly. Therefore we read one character at a time.
274  unsigned nameStart = (line / 8) * 40;
275  unsigned nameEnd = nameStart + 40;
276  unsigned patternQuarter = (line & 0xC0) << 2;
277  for (unsigned name = nameStart; name < nameEnd; ++name) {
278  unsigned charcode = vram.nameTable.readNP((name + 0xC00) | (~0u << 12));
279  unsigned patternNr = patternQuarter | charcode;
280  unsigned pattern = vram.patternTable.readNP(
281  patternBaseLine | (patternNr * 8));
282  draw6(pixelPtr, fg, bg, pattern);
283  }
284 }
285 
286 template <class Pixel>
287 void CharacterConverter<Pixel>::renderText2(
288  Pixel* __restrict pixelPtr, int line)
289 {
290  Pixel plainFg = palFg[vdp.getForegroundColor()];
291  Pixel plainBg = palFg[vdp.getBackgroundColor()];
292  Pixel blinkFg, blinkBg;
293  if (vdp.getBlinkState()) {
294  int fg = vdp.getBlinkForegroundColor();
295  blinkFg = palBg[fg ? fg : vdp.getBlinkBackgroundColor()];
296  blinkBg = palBg[vdp.getBlinkBackgroundColor()];
297  } else {
298  blinkFg = plainFg;
299  blinkBg = plainBg;
300  }
301 
302  // 8 * 256 is small enough to always be contiguous
303  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
304  patternArea += (line + vdp.getVerticalScroll()) & 7;
305 
306  unsigned colorStart = (line / 8) * (80 / 8);
307  unsigned nameStart = (line / 8) * 80;
308  for (unsigned i = 0; i < (80 / 8); ++i) {
309  unsigned colorPattern = vram.colorTable.readNP(
310  (colorStart + i) | (~0u << 9));
311  const byte* nameArea = vram.nameTable.getReadArea(
312  (nameStart + 8 * i) | (~0u << 12), 8);
313  draw6(pixelPtr,
314  (colorPattern & 0x80) ? blinkFg : plainFg,
315  (colorPattern & 0x80) ? blinkBg : plainBg,
316  patternArea[nameArea[0] * 8]);
317  draw6(pixelPtr,
318  (colorPattern & 0x40) ? blinkFg : plainFg,
319  (colorPattern & 0x40) ? blinkBg : plainBg,
320  patternArea[nameArea[1] * 8]);
321  draw6(pixelPtr,
322  (colorPattern & 0x20) ? blinkFg : plainFg,
323  (colorPattern & 0x20) ? blinkBg : plainBg,
324  patternArea[nameArea[2] * 8]);
325  draw6(pixelPtr,
326  (colorPattern & 0x10) ? blinkFg : plainFg,
327  (colorPattern & 0x10) ? blinkBg : plainBg,
328  patternArea[nameArea[3] * 8]);
329  draw6(pixelPtr,
330  (colorPattern & 0x08) ? blinkFg : plainFg,
331  (colorPattern & 0x08) ? blinkBg : plainBg,
332  patternArea[nameArea[4] * 8]);
333  draw6(pixelPtr,
334  (colorPattern & 0x04) ? blinkFg : plainFg,
335  (colorPattern & 0x04) ? blinkBg : plainBg,
336  patternArea[nameArea[5] * 8]);
337  draw6(pixelPtr,
338  (colorPattern & 0x02) ? blinkFg : plainFg,
339  (colorPattern & 0x02) ? blinkBg : plainBg,
340  patternArea[nameArea[6] * 8]);
341  draw6(pixelPtr,
342  (colorPattern & 0x01) ? blinkFg : plainFg,
343  (colorPattern & 0x01) ? blinkBg : plainBg,
344  patternArea[nameArea[7] * 8]);
345  }
346 }
347 
348 template <class Pixel>
349 const byte* CharacterConverter<Pixel>::getNamePtr(int line, int scroll)
350 {
351  // no need to test whether multi-page scrolling is enabled,
352  // indexMask in the nameTable already takes care of it
353  return vram.nameTable.getReadArea(
354  ((line / 8) * 32) | ((scroll & 0x20) ? 0x8000 : 0), 32);
355 }
356 template <class Pixel>
357 void CharacterConverter<Pixel>::renderGraphic1(
358  Pixel* __restrict pixelPtr, int line)
359 {
360  bool misAligned; uint32_t partial;
361 #ifdef __arm__
362  misAligned = sizeof(Pixel) == 2 && (reinterpret_cast<uintptr_t>(pixelPtr) & 3);
363  if (misAligned) pixelPtr--;
364  partial = *pixelPtr;
365 #endif
366 
367  const byte* patternArea = vram.patternTable.getReadArea(0, 256 * 8);
368  patternArea += line & 7;
369  const byte* colorArea = vram.colorTable.getReadArea(0, 256 / 8);
370 
371  int scroll = vdp.getHorizontalScrollHigh();
372  const byte* namePtr = getNamePtr(line, scroll);
373  for (unsigned n = 0; n < 32; ++n) {
374  unsigned charcode = namePtr[scroll & 0x1F];
375  unsigned pattern = patternArea[charcode * 8];
376  unsigned color = colorArea[charcode / 8];
377  Pixel fg = palFg[color >> 4];
378  Pixel bg = palFg[color & 0x0F];
379  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
380  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
381  }
382 
383 #ifdef __arm__
384  if (misAligned) *pixelPtr = static_cast<Pixel>(partial);
385 #endif
386 }
387 
388 template <class Pixel>
389 void CharacterConverter<Pixel>::renderGraphic2(
390  Pixel* __restrict pixelPtr, int line)
391 {
392  bool misAligned; uint32_t partial;
393 #ifdef __arm__
394  misAligned = sizeof(Pixel) == 2 && (reinterpret_cast<uintptr_t>(pixelPtr) & 3);
395  if (misAligned) pixelPtr--;
396  partial = *pixelPtr;
397 #endif
398 
399  int quarter8 = (((line / 8) * 32) & ~0xFF) * 8;
400  int line7 = line & 7;
401  int scroll = vdp.getHorizontalScrollHigh();
402  const byte* namePtr = getNamePtr(line, scroll);
403 
404  if (vram.colorTable .isContinuous((8 * 256) - 1) &&
405  vram.patternTable.isContinuous((8 * 256) - 1) &&
406  ((scroll & 0x1f) == 0)) {
407  // Both color and pattern table can be accessed contiguously
408  // (no mirroring) and there's no v9958 horizontal scrolling.
409  // This is very common, so make an optimized version for this.
410  const byte* patternArea = vram.patternTable.getReadArea(quarter8, 8 * 256) + line7;
411  const byte* colorArea = vram.colorTable .getReadArea(quarter8, 8 * 256) + line7;
412  for (unsigned n = 0; n < 32; ++n) {
413  unsigned charCode8 = namePtr[n] * 8;
414  unsigned pattern = patternArea[charCode8];
415  unsigned color = colorArea [charCode8];
416  Pixel fg = palFg[color >> 4];
417  Pixel bg = palFg[color & 0x0F];
418  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
419  }
420  } else {
421  // Slower variant, also works when:
422  // - there is mirroring in the color table
423  // - there is mirroring in the pattern table (TMS9929)
424  // - V9958 horizontal scroll feature is used
425  int baseLine = (~0u << 13) | quarter8 | line7;
426  for (unsigned n = 0; n < 32; ++n) {
427  unsigned charCode8 = namePtr[scroll & 0x1F] * 8;
428  unsigned index = charCode8 | baseLine;
429  unsigned pattern = vram.patternTable.readNP(index);
430  unsigned color = vram.colorTable .readNP(index);
431  Pixel fg = palFg[color >> 4];
432  Pixel bg = palFg[color & 0x0F];
433  draw8(pixelPtr, fg, bg, pattern, misAligned, partial);
434  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
435  }
436  }
437 
438 #ifdef __arm__
439  if (misAligned) *pixelPtr = static_cast<Pixel>(partial);
440 #endif
441 }
442 
443 template <class Pixel>
444 void CharacterConverter<Pixel>::renderMultiHelper(
445  Pixel* __restrict pixelPtr, int line,
446  int mask, int patternQuarter)
447 {
448  unsigned baseLine = mask | ((line / 4) & 7);
449  unsigned scroll = vdp.getHorizontalScrollHigh();
450  const byte* namePtr = getNamePtr(line, scroll);
451  for (unsigned n = 0; n < 32; ++n) {
452  unsigned patternNr = patternQuarter | namePtr[scroll & 0x1F];
453  unsigned color = vram.patternTable.readNP((patternNr * 8) | baseLine);
454  Pixel cl = palFg[color >> 4];
455  Pixel cr = palFg[color & 0x0F];
456  pixelPtr[0] = cl; pixelPtr[1] = cl;
457  pixelPtr[2] = cl; pixelPtr[3] = cl;
458  pixelPtr[4] = cr; pixelPtr[5] = cr;
459  pixelPtr[6] = cr; pixelPtr[7] = cr;
460  pixelPtr += 8;
461  if (!(++scroll & 0x1F)) namePtr = getNamePtr(line, scroll);
462  }
463 }
464 template <class Pixel>
465 void CharacterConverter<Pixel>::renderMulti(
466  Pixel* __restrict pixelPtr, int line)
467 {
468  int mask = (~0u << 11);
469  renderMultiHelper(pixelPtr, line, mask, 0);
470 }
471 
472 template <class Pixel>
473 void CharacterConverter<Pixel>::renderMultiQ(
474  Pixel* __restrict pixelPtr, int line)
475 {
476  int mask = (~0u << 13);
477  int patternQuarter = (line * 4) & ~0xFF; // (line / 8) * 32
478  renderMultiHelper(pixelPtr, line, mask, patternQuarter);
479 }
480 
481 template <class Pixel>
482 void CharacterConverter<Pixel>::renderBogus(
483  Pixel* __restrict pixelPtr)
484 {
485  Pixel fg = palFg[vdp.getForegroundColor()];
486  Pixel bg = palFg[vdp.getBackgroundColor()];
487  for (int n = 8; n--; ) *pixelPtr++ = bg;
488  for (int c = 40; c--; ) {
489  for (int n = 4; n--; ) *pixelPtr++ = fg;
490  for (int n = 2; n--; ) *pixelPtr++ = bg;
491  }
492  for (int n = 8; n--; ) *pixelPtr++ = bg;
493 }
494 
495 template <class Pixel>
496 void CharacterConverter<Pixel>::renderBlank(
497  Pixel* __restrict pixelPtr)
498 {
499  // when this is in effect, the VRAM is not refreshed anymore, but that
500  // is not emulated
501  for (int n = 256; n--; ) *pixelPtr++ = palFg[15];
502 }
503 
504 // Force template instantiation.
505 #if HAVE_16BPP
506 template class CharacterConverter<uint16_t>;
507 #endif
508 #if HAVE_32BPP || COMPONENT_GL
509 template class CharacterConverter<uint32_t>;
510 #endif
511 
512 } // namespace openmsx
unsigned char byte
8 bit unsigned integer
Definition: openmsx.hh:33
Represents a VDP display mode.
Definition: DisplayMode.hh:14
unsigned Pixel
CharacterConverter(VDP &vdp, const Pixel *palFg, const Pixel *palBg)
Create a new bitmap scanline converter.
void convertLine(Pixel *linePtr, int line)
Convert a line of V9938 VRAM to 512 host pixels.
Unified implementation of MSX Video Display Processors (VDPs).
Definition: VDP.hh:66
byte getBase() const
Get the base dispay mode as an integer: M5..M1 combined.
Definition: DisplayMode.hh:123
void setDisplayMode(DisplayMode mode)
Select the display mode to use for scanline conversion.