1 /** 2 * Implement the elliptic gradient fill style. dplug:canvas internals. 3 * 4 * Copyright: Copyright Chris Jones 2020. 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 6 */ 7 module dplug.canvas.ellipticalblit; 8 9 import dplug.canvas.rasterizer; 10 import dplug.canvas.gradient; 11 import dplug.canvas.misc; 12 13 struct EllipticalBlit 14 { 15 nothrow: 16 @nogc: 17 18 void init(Gradient g, float x0, float y0, float x1, float y1, float r2) 19 { 20 assert(g !is null); 21 this.gradient = g; 22 int lutsize = g.lutLength; 23 24 xctr = x0; 25 yctr = y0; 26 float w = x1-x0; 27 float h = y1-y0; 28 float hyp = w*w + h*h; 29 if (hyp < 1.0) hyp = 1.0; 30 xstep0 = lutsize * w / hyp; 31 ystep0 = lutsize * h / hyp; 32 hyp = sqrt(hyp); 33 xstep1 = lutsize * h / (r2*hyp); 34 ystep1 = lutsize * -w / (r2*hyp); 35 } 36 37 private: 38 39 void color_blit(WindingRule wr)(uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) 40 { 41 assert(x0 >= 0); 42 assert(y >= 0); 43 assert((x0 & 3) == 0); 44 assert((x1 & 3) == 0); 45 46 // main blit variables 47 48 int bpos = x0 / 4; 49 int endbit = x1 / 4; 50 51 __m128i xmWinding = 0; 52 uint* lut = gradient.getLookup.ptr; 53 short lutMax = cast(short)(gradient.lutLength - 1); 54 bool isopaque = false;//gradient.isOpaque 55 56 // XMM constants 57 58 immutable __m128i XMZERO = 0; 59 immutable __m128i XMFFFF = [0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF]; 60 immutable __m128i XMMSK16 = [0xFFFF,0xFFFF,0xFFFF,0xFFFF]; 61 62 // paint variables 63 64 float t0 = (bpos*4-xctr)*xstep0 + (y-yctr)*ystep0; 65 __m128 xmT0 = _mm_mul_ps(_mm_set1_ps(xstep0), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 66 xmT0 = _mm_add_ps(xmT0, _mm_set1_ps(t0)); 67 __m128 xmStep0 = _mm_set1_ps(xstep0*4); 68 69 float t1 = (bpos*4-xctr)*xstep1 + (y-yctr)*ystep1; 70 __m128 xmT1 = _mm_mul_ps(_mm_set1_ps(xstep1), _mm_setr_ps(0.0f,1.0f,2.0f,3.0f)); 71 xmT1 = _mm_add_ps(xmT1, _mm_set1_ps(t1)); 72 __m128 xmStep1 = _mm_set1_ps(xstep1*4); 73 74 // main loop 75 76 while (bpos < endbit) 77 { 78 int nsb = nextSetBit(mask, bpos, endbit); 79 80 // do we have a span of unchanging coverage? 81 82 if (bpos < nsb) 83 { 84 // Calc coverage of first pixel 85 86 static if (wr == WindingRule.NonZero) 87 { 88 int cover = xmWinding[3]+delta[bpos*4]; 89 cover = abs(cover)*2; 90 if (cover > 0xFFFF) cover = 0xFFFF; 91 } 92 else 93 { 94 int cover = xmWinding[3]+delta[bpos*4]; 95 short tsc = cast(short) cover; 96 cover = (tsc ^ (tsc >> 15)) * 2; 97 } 98 99 // We can skip the span 100 101 if (cover == 0) 102 { 103 __m128 tsl = _mm_set1_ps(nsb-bpos); 104 xmT0 = _mm_add_ps(xmT0, _mm_mul_ps(tsl,xmStep0)); 105 xmT1 = _mm_add_ps(xmT1, _mm_mul_ps(tsl,xmStep1)); 106 bpos = nsb; 107 } 108 109 // Or fill span with soid color 110 111 else if (isopaque && (cover > 0xFF00)) 112 { 113 uint* ptr = &dest[bpos*4]; 114 uint* end = ptr + ((nsb-bpos)*4); 115 116 while (ptr < end) 117 { 118 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 119 rad = _mm_sqrt_ps(rad); 120 xmT0 = xmT0 + xmStep0; 121 xmT1 = xmT1 + xmStep1; 122 __m128i ipos = _mm_cvttps_epi32 (rad); 123 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax); 124 125 ptr[0] = lut[ ipos.array[0] ]; 126 ptr[1] = lut[ ipos.array[1] ]; 127 ptr[2] = lut[ ipos.array[2] ]; 128 ptr[3] = lut[ ipos.array[3] ]; 129 130 ptr+=4; 131 } 132 133 bpos = nsb; 134 } 135 136 // Or fill span with transparent color 137 138 else 139 { 140 __m128i tqcvr = _mm_set1_epi16 (cast(ushort) cover); 141 142 uint* ptr = &dest[bpos*4]; 143 uint* end = &dest[nsb*4]; 144 145 while (ptr < end) 146 { 147 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 148 xmT0 = xmT0 + xmStep0; 149 xmT1 = xmT1 + xmStep1; 150 rad = _mm_sqrt_ps(rad); 151 152 __m128i d0 = _mm_loadu_si64 (ptr); 153 d0 = _mm_unpacklo_epi8 (d0, XMZERO); 154 __m128i d1 = _mm_loadu_si64 (ptr+2); 155 d1 = _mm_unpacklo_epi8 (d1, XMZERO); 156 157 __m128i ipos = _mm_cvttps_epi32 (rad); 158 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax); 159 160 __m128i c0 = _mm_loadu_si32 (&lut[ ipos.array[0] ]); 161 __m128i tnc = _mm_loadu_si32 (&lut[ ipos.array[1] ]); 162 c0 = _mm_unpacklo_epi32 (c0, tnc); 163 c0 = _mm_unpacklo_epi8 (c0, XMZERO); 164 __m128i a0 = _mm_broadcast_alpha(c0); 165 a0 = _mm_mulhi_epu16(a0, tqcvr); 166 167 __m128i c1 = _mm_loadu_si32 (&lut[ ipos.array[2] ]); 168 tnc = _mm_loadu_si32 (&lut[ ipos.array[3] ]); 169 c1 = _mm_unpacklo_epi32 (c1, tnc); 170 c1 = _mm_unpacklo_epi8 (c1, XMZERO); 171 __m128i a1 = _mm_broadcast_alpha(c1); 172 a1 = _mm_mulhi_epu16(a1, tqcvr); 173 174 // alpha*source + dest - alpha*dest 175 176 c0 = _mm_mulhi_epu16 (c0,a0); 177 c1 = _mm_mulhi_epu16 (c1,a1); 178 c0 = _mm_adds_epi16 (c0,d0); 179 c1 = _mm_adds_epi16 (c1,d1); 180 d0 = _mm_mulhi_epu16 (d0,a0); 181 d1 = _mm_mulhi_epu16 (d1,a1); 182 c0 = _mm_subs_epi16 (c0, d0); 183 c1 = _mm_subs_epi16 (c1, d1); 184 185 d0 = _mm_packus_epi16 (c0,c1); 186 187 _mm_storeu_si128 (cast(__m128i*)ptr,d0); 188 189 ptr+=4; 190 } 191 192 bpos = nsb; 193 } 194 } 195 196 // At this point we need to integrate scandelta 197 198 uint* ptr = &dest[bpos*4]; 199 uint* end = &dest[endbit*4]; 200 int* dlptr = &delta[bpos*4]; 201 202 while (bpos < endbit) 203 { 204 __m128 rad = _mm_add_ps(_mm_mul_ps(xmT0, xmT0),_mm_mul_ps(xmT1, xmT1)); 205 rad = _mm_sqrt_ps(rad); 206 207 // Integrate delta values 208 209 __m128i tqw = _mm_loadu_si128(cast(__m128i*)dlptr); 210 tqw = _mm_add_epi32(tqw, _mm_slli_si128!4(tqw)); 211 tqw = _mm_add_epi32(tqw, _mm_slli_si128!8(tqw)); 212 tqw = _mm_add_epi32(tqw, xmWinding); 213 xmWinding = _mm_shuffle_epi32!255(tqw); 214 _mm_storeu_si128(cast(__m128i*)dlptr,XMZERO); 215 216 // convert grad pos to integer 217 218 __m128i ipos = _mm_cvttps_epi32(rad); 219 ipos = _mm_clamp_0_to_N_epi32(ipos, lutMax); 220 xmT0 = xmT0 + xmStep0; 221 xmT1 = xmT1 + xmStep1; 222 223 // Process coverage values taking account of winding rule 224 225 static if (wr == WindingRule.NonZero) 226 { 227 __m128i tcvr = _mm_srai_epi32(tqw,31); 228 tqw = _mm_add_epi32(tcvr,tqw); 229 tqw = _mm_xor_si128(tqw,tcvr); // abs 230 tcvr = _mm_packs_epi32(tqw,XMZERO); // saturate/pack to int16 231 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 232 } 233 else 234 { 235 __m128i tcvr = _mm_and_si128(tqw,XMMSK16); 236 tqw = _mm_srai_epi16(tcvr,15); // mask 237 tcvr = _mm_xor_si128(tcvr,tqw); // fold in halff 238 tcvr = _mm_packs_epi32(tcvr,XMZERO); // pack to int16 239 tcvr = _mm_slli_epi16(tcvr, 1); // << to uint16 240 } 241 242 // Load destination pixels 243 244 __m128i d0 = _mm_loadu_si64 (ptr); 245 d0 = _mm_unpacklo_epi8 (d0, XMZERO); 246 __m128i d1 = _mm_loadu_si64 (ptr+2); 247 d1 = _mm_unpacklo_epi8 (d1, XMZERO); 248 249 // load grad colors 250 251 tcvr = _mm_unpacklo_epi16 (tcvr, tcvr); 252 __m128i tcvr2 = _mm_unpackhi_epi32 (tcvr, tcvr); 253 tcvr = _mm_unpacklo_epi32 (tcvr, tcvr); 254 255 __m128i c0 = _mm_loadu_si32 (&lut[ ipos.array[0] ]); 256 __m128i tnc = _mm_loadu_si32 (&lut[ ipos.array[1] ]); 257 c0 = _mm_unpacklo_epi32 (c0, tnc); 258 c0 = _mm_unpacklo_epi8 (c0, XMZERO); 259 __m128i a0 = _mm_broadcast_alpha(c0); 260 a0 = _mm_mulhi_epu16(a0, tcvr); 261 262 __m128i c1 = _mm_loadu_si32 (&lut[ ipos.array[2] ]); 263 tnc = _mm_loadu_si32 (&lut[ ipos.array[3] ]); 264 c1 = _mm_unpacklo_epi32 (c1, tnc); 265 c1 = _mm_unpacklo_epi8 (c1, XMZERO); 266 __m128i a1 = _mm_broadcast_alpha(c1); 267 a1 = _mm_mulhi_epu16(a1, tcvr2); 268 269 // alpha*source + dest - alpha*dest 270 271 c0 = _mm_mulhi_epu16 (c0,a0); 272 c1 = _mm_mulhi_epu16 (c1,a1); 273 c0 = _mm_adds_epi16 (c0,d0); 274 c1 = _mm_adds_epi16 (c1,d1); 275 d0 = _mm_mulhi_epu16 (d0,a0); 276 d1 = _mm_mulhi_epu16 (d1,a1); 277 c0 = _mm_subs_epi16 (c0, d0); 278 c1 = _mm_subs_epi16 (c1, d1); 279 280 d0 = _mm_packus_epi16 (c0,c1); 281 282 _mm_storeu_si128 (cast(__m128i*)ptr,d0); 283 284 bpos++; 285 ptr+=4; 286 dlptr+=4; 287 288 if (((cast(ulong*)dlptr)[0] | (cast(ulong*)dlptr)[1]) == 0) break; 289 } 290 } 291 } 292 293 // Member variables 294 295 Gradient gradient; 296 float xctr,yctr; 297 float xstep0,ystep0; 298 float xstep1,ystep1; 299 } 300 301 void doBlit_EllipticalBlit_NonZero(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) nothrow @nogc 302 { 303 EllipticalBlit* cb = cast(EllipticalBlit*)userData; 304 return cb.color_blit!(WindingRule.NonZero)(dest, delta, mask, x0, x1, y); 305 } 306 307 void doBlit_EllipticalBlit_EvenOdd(void* userData, uint* dest, int* delta, DMWord* mask, int x0, int x1, int y) nothrow @nogc 308 { 309 EllipticalBlit* cb = cast(EllipticalBlit*)userData; 310 return cb.color_blit!(WindingRule.EvenOdd)(dest, delta, mask, x0, x1, y); 311 } 312