summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'lcms2mt/src/cmsintrp.c')
-rw-r--r--lcms2mt/src/cmsintrp.c996
1 files changed, 586 insertions, 410 deletions
diff --git a/lcms2mt/src/cmsintrp.c b/lcms2mt/src/cmsintrp.c
index 651e25c3..555125e3 100644
--- a/lcms2mt/src/cmsintrp.c
+++ b/lcms2mt/src/cmsintrp.c
@@ -85,6 +85,10 @@ cmsBool _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Data)
cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
{
_cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+ cmsUInt32Number flags = 0;
+
+ if (ContextID->dwFlags & cmsFLAGS_FORCE_LINEARINTERP)
+ flags = CMS_LERP_FLAGS_TRILINEAR;
p ->Interpolation.Lerp16 = NULL;
@@ -95,7 +99,7 @@ cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
// If unsupported by the plug-in, go for the LittleCMS default.
// If happens only if an extern plug-in is being used
if (p ->Interpolation.Lerp16 == NULL)
- p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags);
+ p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags | flags);
// Check for valid interpolator (we just check one member of the union)
if (p ->Interpolation.Lerp16 == NULL) {
@@ -607,6 +611,149 @@ void TrilinearInterp16(cmsContext ContextID, register const cmsUInt16Number Inpu
# undef DENS
}
+static
+void QuadrilinearInterpFloat(cmsContext ContextID,
+ const cmsFloat32Number Input[],
+ cmsFloat32Number Output[],
+ const cmsInterpParams *p)
+
+{
+ cmsFloat32Number rest;
+ cmsFloat32Number pk;
+ int k0;
+ cmsUInt32Number i, n;
+ cmsFloat32Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p;
+ cmsFloat32Number i0 = fclamp(Input[0]);
+
+ pk = i0 * p->Domain[0];
+ k0 = _cmsQuickFloor(pk);
+ rest = pk - (cmsFloat32Number) k0;
+
+ memmove(&p1.Domain[0], &p ->Domain[1], 3*sizeof(cmsUInt32Number));
+ p1.Table = ((cmsFloat32Number*) p -> Table) + p -> opta[3] * k0;
+
+ TrilinearInterpFloat(ContextID, Input + 1, Output, &p1);
+
+ if (i0 == 1.0)
+ return;
+
+ p1.Table = ((cmsFloat32Number*) p1.Table) + p->opta[3];
+ TrilinearInterpFloat(ContextID, Input + 1, Tmp, &p1);
+
+ n = p -> nOutputs;
+ for (i=0; i < n; i++) {
+ cmsFloat32Number y0 = Output[i];
+ cmsFloat32Number y1 = Tmp[i];
+
+ Output[i] = y0 + (y1 - y0) * rest;
+ }
+}
+
+static CMS_NO_SANITIZE
+void QuadrilinearInterp16(cmsContext ContextID,
+ register const cmsUInt16Number Input[],
+ register cmsUInt16Number Output[],
+ register const cmsInterpParams *p)
+
+{
+#define DENS(i,j,k,l) (LutTable[(i)+(j)+(k)+(l)+OutChan])
+#define LERP(a,l,h) (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+ const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+ int OutChan, TotalOut;
+ cmsS15Fixed16Number fx, fy, fz, fk;
+ register int rx, ry, rz, rk;
+ int x0, y0, z0, k0;
+ register int X0, X1, Y0, Y1, Z0, Z1, K0, K1;
+ int d0000, d0001, d0010, d0011,
+ d0100, d0101, d0110, d0111,
+ d1000, d1001, d1010, d1011,
+ d1100, d1101, d1110, d1111,
+ d000, d001, d010, d011,
+ d100, d101, d110, d111,
+ dx00, dx01, dx10, dx11,
+ dxy0, dxy1, dxyz;
+ cmsUNUSED_PARAMETER(ContextID);
+
+ TotalOut = p -> nOutputs;
+
+ fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+ x0 = FIXED_TO_INT(fx);
+ rx = FIXED_REST_TO_INT(fx); // Rest in 0..1.0 domain
+
+
+ fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+ y0 = FIXED_TO_INT(fy);
+ ry = FIXED_REST_TO_INT(fy);
+
+ fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+ z0 = FIXED_TO_INT(fz);
+ rz = FIXED_REST_TO_INT(fz);
+
+ fk = _cmsToFixedDomain((int) Input[3] * p -> Domain[3]);
+ k0 = FIXED_TO_INT(fk);
+ rk = FIXED_REST_TO_INT(fk);
+
+
+ X0 = p -> opta[3] * x0;
+ X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[3]);
+
+ Y0 = p -> opta[2] * y0;
+ Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[2]);
+
+ Z0 = p -> opta[1] * z0;
+ Z1 = Z0 + (Input[2] == 0xFFFFU ? 0 : p->opta[1]);
+
+ K0 = p -> opta[0] * k0;
+ K1 = K0 + (Input[3] == 0xFFFFU ? 0 : p->opta[0]);
+
+ for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+ d0000 = DENS(X0, Y0, Z0, K0);
+ d0001 = DENS(X0, Y0, Z0, K1);
+ d000 = LERP(rk, d0000, d0001);
+ d0010 = DENS(X0, Y0, Z1, K0);
+ d0011 = DENS(X0, Y0, Z1, K1);
+ d001 = LERP(rk, d0010, d0011);
+ d0100 = DENS(X0, Y1, Z0, K0);
+ d0101 = DENS(X0, Y1, Z0, K1);
+ d010 = LERP(rk, d0100, d0101);
+ d0110 = DENS(X0, Y1, Z1, K0);
+ d0111 = DENS(X0, Y1, Z1, K1);
+ d011 = LERP(rk, d0110, d0111);
+
+ d1000 = DENS(X1, Y0, Z0, K0);
+ d1001 = DENS(X1, Y0, Z0, K1);
+ d100 = LERP(rk, d1000, d1001);
+ d1010 = DENS(X1, Y0, Z1, K0);
+ d1011 = DENS(X1, Y0, Z1, K1);
+ d101 = LERP(rk, d1010, d1011);
+ d1100 = DENS(X1, Y1, Z0, K0);
+ d1101 = DENS(X1, Y1, Z0, K1);
+ d110 = LERP(rk, d1100, d1101);
+ d1110 = DENS(X1, Y1, Z1, K0);
+ d1111 = DENS(X1, Y1, Z1, K1);
+ d111 = LERP(rk, d1110, d1111);
+
+ dx00 = LERP(rx, d000, d100);
+ dx01 = LERP(rx, d001, d101);
+ dx10 = LERP(rx, d010, d110);
+ dx11 = LERP(rx, d011, d111);
+
+ dxy0 = LERP(ry, dx00, dx10);
+ dxy1 = LERP(ry, dx01, dx11);
+
+ dxyz = LERP(rz, dxy0, dxy1);
+
+ Output[OutChan] = (cmsUInt16Number) dxyz;
+ }
+
+
+# undef LERP
+# undef DENS
+}
+
// Tetrahedral interpolation, using Sakamoto algorithm.
#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
@@ -846,27 +993,25 @@ void TetrahedralInterp16(cmsContext ContextID, register const cmsUInt16Number In
}
-#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+/* Pentachoronal Interpolation */
static CMS_NO_SANITIZE
void Eval4Inputs(cmsContext ContextID, register const cmsUInt16Number Input[],
register cmsUInt16Number Output[],
register const cmsInterpParams* p16)
{
- const cmsUInt16Number* LutTable;
- cmsS15Fixed16Number fk;
- cmsS15Fixed16Number k0, rk;
- int K0, K1;
- cmsS15Fixed16Number fx, fy, fz;
- cmsS15Fixed16Number rx, ry, rz;
- int x0, y0, z0;
- cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1;
- cmsUInt32Number i;
- cmsS15Fixed16Number c0, c1, c2, c3, Rest;
+ const cmsUInt16Number *LutTable;
+ cmsS15Fixed16Number fx, fy, fz, fk;
+ cmsS15Fixed16Number rx, ry, rz, rk;
+ cmsS15Fixed16Number m1, m2, m3, m4;
+ int x0, y0, z0, k0;
+ cmsS15Fixed16Number X0, X1, Y0, Y1, Z0, Z1, K0, K1;
+ cmsS15Fixed16Number o1, o2, o3, o4;
+ cmsS15Fixed16Number c0, c1, c2, c3, c4, Rest;
cmsUInt32Number OutChan;
- cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
+ cmsUInt16Number *Out = Output;
+ int which;
cmsUNUSED_PARAMETER(ContextID);
-
fk = _cmsToFixedDomain((int) Input[0] * p16 -> Domain[0]);
fx = _cmsToFixedDomain((int) Input[1] * p16 -> Domain[1]);
fy = _cmsToFixedDomain((int) Input[2] * p16 -> Domain[2]);
@@ -883,240 +1028,326 @@ void Eval4Inputs(cmsContext ContextID, register const cmsUInt16Number Input[],
rz = FIXED_REST_TO_INT(fz);
K0 = p16 -> opta[3] * k0;
- K1 = K0 + (Input[0] == 0xFFFFU ? 0 : p16->opta[3]);
+ K1 = (Input[0] == 0xFFFFU ? 0 : p16->opta[3]);
X0 = p16 -> opta[2] * x0;
- X1 = X0 + (Input[1] == 0xFFFFU ? 0 : p16->opta[2]);
+ X1 = (Input[1] == 0xFFFFU ? 0 : p16->opta[2]);
Y0 = p16 -> opta[1] * y0;
- Y1 = Y0 + (Input[2] == 0xFFFFU ? 0 : p16->opta[1]);
+ Y1 = (Input[2] == 0xFFFFU ? 0 : p16->opta[1]);
Z0 = p16 -> opta[0] * z0;
- Z1 = Z0 + (Input[3] == 0xFFFFU ? 0 : p16->opta[0]);
+ Z1 = (Input[3] == 0xFFFFU ? 0 : p16->opta[0]);
LutTable = (cmsUInt16Number*) p16 -> Table;
- LutTable += K0;
-
- for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
-
- c0 = DENS(X0, Y0, Z0);
-
- if (rx >= ry && ry >= rz) {
-
- c1 = DENS(X1, Y0, Z0) - c0;
- c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
- c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
-
- }
- else
- if (rx >= rz && rz >= ry) {
-
- c1 = DENS(X1, Y0, Z0) - c0;
- c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
- c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
-
- }
- else
- if (rz >= rx && rx >= ry) {
-
- c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
- c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
- c3 = DENS(X0, Y0, Z1) - c0;
-
- }
- else
- if (ry >= rx && rx >= rz) {
-
- c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
- c2 = DENS(X0, Y1, Z0) - c0;
- c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
-
- }
- else
- if (ry >= rz && rz >= rx) {
-
- c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
- c2 = DENS(X0, Y1, Z0) - c0;
- c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
-
- }
- else
- if (rz >= ry && ry >= rx) {
-
- c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
- c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
- c3 = DENS(X0, Y0, Z1) - c0;
-
- }
- else {
- c1 = c2 = c3 = 0;
- }
-
- Rest = c1 * rx + c2 * ry + c3 * rz;
-
- Tmp1[OutChan] = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+ LutTable += K0 + X0 + Y0 + Z0;
+
+ /* We carefully choose the following tests, a) cos these
+ * work nicely in SSE (see CAL), and b) because, as well
+ * as the standard 24 pentachorons, we get some useful
+ * special cases. */
+ which = (rx > ry ? 1 : 0) +
+ (ry > rz ? 2 : 0) +
+ (rz > rk ? 4 : 0) +
+ (rk > rx ? 8 : 0) +
+ (rz > rx ? 16 : 0) +
+ (rk > ry ? 32 : 0);
+
+ o4 = X1+Y1+Z1+K1;
+ switch(which)
+ {
+ default: /* Never happens, but stops the compiler complaining of uninitialised vars */
+ case 0x00: /* x == y == z == k - special case */
+ m1 = rx; goto one_lerp;
+ case 0x01: /* x > k == z == y - special case */
+ o1 = X1; m1 = rx; m2 = ry; goto two_lerps;
+ case 0x18: /* y == z == k > x - special case */
+ o1 = Y1+Z1+K1; m1 = ry; m2 = rx; goto two_lerps;
+ case 0x04: /* z == y == x > k - special case */
+ o1 = X1+Y1+Z1; m1 = ry; m2 = rk; goto two_lerps;
+ case 0x28: /* k > z == y == x - special case */
+ o1 = K1; m1 = rk; m2 = ry; goto two_lerps;
+ case 0x02: /* y >= x >= k >= z */
+ o1 = Y1; o2 = X1; o3 = K1; m1 = ry; m2 = rx; m3 = rk; m4 = rz; break;
+ case 0x03: /* x > y >= k >= z */
+ o1 = X1; o2 = Y1; o3 = K1; m1 = rx; m2 = ry; m3 = rk; m4 = rz; break;
+ case 0x05: /* x >= z >= y >= k */
+ o1 = X1; o2 = Z1; o3 = Y1; m1 = rx; m2 = rz; m3 = ry; m4 = rk; break;
+ case 0x06: /* y >= x >= z > k */
+ o1 = Y1; o2 = X1; o3 = Z1; m1 = ry; m2 = rx; m3 = rz; m4 = rk; break;
+ case 0x07: /* x > y > z > k */
+ o1 = X1; o2 = Y1; o3 = Z1; m1 = rx; m2 = ry; m3 = rz; m4 = rk; break;
+ case 0x0a: /* y >= k > x >= z */
+ o1 = Y1; o2 = K1; o3 = X1; m1 = ry; m2 = rk; m3 = rx; m4 = rz; break;
+ case 0x14: /* z >= y >= x >= k */
+ o1 = Z1; o2 = Y1; o3 = X1; m1 = rz; m2 = ry; m3 = rx; m4 = rk; break;
+ case 0x15: /* z > x >= y >= k */
+ o1 = Z1; o2 = X1; o3 = Y1; m1 = rz; m2 = rx; m3 = ry; m4 = rk; break;
+ case 0x16: /* y >= z > x >= k */
+ o1 = Y1; o2 = Z1; o3 = X1; m1 = ry; m2 = rz; m3 = rx; m4 = rk; break;
+ case 0x1a: /* y >= k >= z > x */
+ o1 = Y1; o2 = K1; o3 = Z1; m1 = ry; m2 = rk; m3 = rz; m4 = rx; break;
+ case 0x1c: /* z >= y >= k > x */
+ o1 = Z1; o2 = Y1; o3 = K1; m1 = rz; m2 = ry; m3 = rk; m4 = rx; break;
+ case 0x1e: /* y > z > k > x */
+ o1 = Y1; o2 = Z1; o3 = K1; m1 = ry; m2 = rz; m3 = rk; m4 = rx; break;
+ case 0x21: /* x >= k >= z >= y */
+ o1 = X1; o2 = K1; o3 = Z1; m1 = rx; m2 = rk; m3 = rz; m4 = ry; break;
+ case 0x23: /* x >= k > y > z */
+ o1 = X1; o2 = K1; o3 = Y1; m1 = rx; m2 = rk; m3 = ry; m4 = rz; break;
+ case 0x25: /* x >= z > k > y */
+ o1 = X1; o2 = Z1; o3 = K1; m1 = rx; m2 = rz; m3 = rk; m4 = ry; break;
+ case 0x29: /* k > x >= z >= y */
+ o1 = K1; o2 = X1; o3 = Z1; m1 = rk; m2 = rx; m3 = rz; m4 = ry; break;
+ case 0x2a: /* k > y >= x >= z */
+ o1 = K1; o2 = Y1; o3 = X1; m1 = rk; m2 = ry; m3 = rx; m4 = rz; break;
+ case 0x2b: /* k > x > y > z */
+ o1 = K1; o2 = X1; o3 = Y1; m1 = rk; m2 = rx; m3 = ry; m4 = rz; break;
+ case 0x35: /* z > x >= k > y */
+ o1 = Z1; o2 = X1; o3 = K1; m1 = rz; m2 = rx; m3 = rk; m4 = ry; break;
+ case 0x38: /* k >= z >= y >= x */
+ o1 = K1; o2 = Z1; o3 = Y1; m1 = rk; m2 = rz; m3 = ry; m4 = rx; break;
+ case 0x39: /* k >= z > x > y */
+ o1 = K1; o2 = Z1; o3 = X1; m1 = rk; m2 = rz; m3 = rx; m4 = ry; break;
+ case 0x3a: /* k > y > z > x */
+ o1 = K1; o2 = Y1; o3 = Z1; m1 = rk; m2 = ry; m3 = rz; m4 = rx; break;
+ case 0x3c: /* z > k > y >= x */
+ o1 = Z1; o2 = K1; o3 = Y1; m1 = rz; m2 = rk; m3 = ry; m4 = rx; break;
+ case 0x3d: /* z > k > x > y */
+ o1 = Z1; o2 = K1; o3 = X1; m1 = rz; m2 = rk; m3 = rx; m4 = ry; break;
}
+ assert(m1 >= m2 && m2 >= m3 && m3 >= m4);
+ o2 += o1;
+ o3 += o2;
+ for (OutChan=p16 -> nOutputs; OutChan != 0; OutChan--) {
+ c1 = LutTable[o1];
+ c2 = LutTable[o2];
+ c3 = LutTable[o3];
+ c4 = LutTable[o4] - c3;
+ c0 = *LutTable++;
+ c3 -= c2;
+ c2 -= c1;
+ c1 -= c0;
+
+ Rest = c1 * m1 + c2 * m2 + c3 * m3 + c4 * m4;
+
+ *Out++ = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+ }
+ return;
+two_lerps:
+ assert(m1 >= m2);
+ for (OutChan=p16 -> nOutputs; OutChan != 0; OutChan--) {
+ c1 = LutTable[o1];
+ c2 = LutTable[o4] - c1;
+ c0 = *LutTable++;
+ c1 -= c0;
- LutTable = (cmsUInt16Number*) p16 -> Table;
- LutTable += K1;
-
- for (OutChan=0; OutChan < p16 -> nOutputs; OutChan++) {
-
- c0 = DENS(X0, Y0, Z0);
-
- if (rx >= ry && ry >= rz) {
-
- c1 = DENS(X1, Y0, Z0) - c0;
- c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
- c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
-
- }
- else
- if (rx >= rz && rz >= ry) {
-
- c1 = DENS(X1, Y0, Z0) - c0;
- c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
- c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
-
- }
- else
- if (rz >= rx && rx >= ry) {
-
- c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
- c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
- c3 = DENS(X0, Y0, Z1) - c0;
-
- }
- else
- if (ry >= rx && rx >= rz) {
-
- c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
- c2 = DENS(X0, Y1, Z0) - c0;
- c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
-
- }
- else
- if (ry >= rz && rz >= rx) {
-
- c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
- c2 = DENS(X0, Y1, Z0) - c0;
- c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
-
- }
- else
- if (rz >= ry && ry >= rx) {
-
- c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
- c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
- c3 = DENS(X0, Y0, Z1) - c0;
-
- }
- else {
- c1 = c2 = c3 = 0;
- }
+ Rest = c1 * m1 + c2 * m2;
- Rest = c1 * rx + c2 * ry + c3 * rz;
-
- Tmp2[OutChan] = (cmsUInt16Number) (c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
+ *Out++ = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
}
+ return;
+one_lerp:
+ for (OutChan=p16 -> nOutputs; OutChan != 0; OutChan--) {
+ c1 = LutTable[o4];
+ c0 = *LutTable++;
+ c1 -= c0;
+ Rest = c1 * m1;
- for (i=0; i < p16 -> nOutputs; i++) {
- Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
+ *Out++ = (cmsUInt16Number)(c0 + ROUND_FIXED_TO_INT(_cmsToFixedDomain(Rest)));
}
+ return;
}
-#undef DENS
-
-
-// For more that 3 inputs (i.e., CMYK)
-// evaluate two 3-dimensional interpolations and then linearly interpolate between them.
-
static
void Eval4InputsFloat(cmsContext ContextID, const cmsFloat32Number Input[],
cmsFloat32Number Output[],
const cmsInterpParams* p)
{
- const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
- cmsFloat32Number rest;
- cmsFloat32Number pk;
- int k0, K0, K1;
- const cmsFloat32Number* T;
- cmsUInt32Number i;
- cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
+ const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
+ cmsFloat32Number px, py, pz, pk;
+ int x0, y0, z0, k0;
+ int X0, Y0, Z0, K0, X1, Y1, Z1, K1;
+ cmsFloat32Number rx, ry, rz, rk;
+ cmsFloat32Number m1, m2, m3, m4;
+ cmsFloat32Number c0, c1, c2, c3, c4;
+ int o1, o2, o3, o4;
+ int OutChan, TotalOut;
+ cmsFloat32Number *Out = Output;
+ int which;
+ cmsUNUSED_PARAMETER(ContextID);
- pk = fclamp(Input[0]) * p->Domain[0];
- k0 = _cmsQuickFloor(pk);
- rest = pk - (cmsFloat32Number) k0;
+ TotalOut = p -> nOutputs;
- K0 = p -> opta[3] * k0;
- K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[3]);
+ // We need some clipping here
+ pk = fclamp(Input[0]) * p->Domain[0];
+ px = fclamp(Input[1]) * p->Domain[1];
+ py = fclamp(Input[2]) * p->Domain[2];
+ pz = fclamp(Input[3]) * p->Domain[3];
- p1 = *p;
- memmove(&p1.Domain[0], &p ->Domain[1], 3*sizeof(cmsUInt32Number));
+ k0 = (int) floor(pk); rk = (pk - (cmsFloat32Number) k0);
+ x0 = (int) floor(px); rx = (px - (cmsFloat32Number) x0); // We need full floor functionality here
+ y0 = (int) floor(py); ry = (py - (cmsFloat32Number) y0);
+ z0 = (int) floor(pz); rz = (pz - (cmsFloat32Number) z0);
- T = LutTable + K0;
- p1.Table = T;
+ K0 = p -> opta[3] * k0;
+ K1 = (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[3]);
- TetrahedralInterpFloat(ContextID, Input + 1, Tmp1, &p1);
+ X0 = p -> opta[2] * x0;
+ X1 = (fclamp(Input[1]) >= 1.0 ? 0 : p->opta[2]);
- T = LutTable + K1;
- p1.Table = T;
- TetrahedralInterpFloat(ContextID, Input + 1, Tmp2, &p1);
+ Y0 = p -> opta[1] * y0;
+ Y1 = (fclamp(Input[2]) >= 1.0 ? 0 : p->opta[1]);
- for (i=0; i < p -> nOutputs; i++)
- {
- cmsFloat32Number y0 = Tmp1[i];
- cmsFloat32Number y1 = Tmp2[i];
+ Z0 = p -> opta[0] * z0;
+ Z1 = (fclamp(Input[3]) >= 1.0 ? 0 : p->opta[0]);
+
+ LutTable = (cmsFloat32Number*) p -> Table;
+ LutTable += K0 + X0 + Y0 + Z0;
+
+ /* We carefully choose the following tests, a) cos these
+ * work nicely in SSE (see CAL), and b) because, as well
+ * as the standard 24 pentachorons, we get some useful
+ * special cases. */
+ which = (rx > ry ? 1 : 0) +
+ (ry > rz ? 2 : 0) +
+ (rz > rk ? 4 : 0) +
+ (rk > rx ? 8 : 0) +
+ (rz > rx ? 16 : 0) +
+ (rk > ry ? 32 : 0);
+
+ o4 = X1+Y1+Z1+K1;
+ switch(which)
+ {
+ default: /* Never happens, but stops the compiler complaining of uninitialised vars */
+ case 0x00: /* x == y == z == k - special case */
+ m1 = rx; goto one_lerp;
+ case 0x01: /* x > k == z == y - special case */
+ o1 = X1; m1 = rx; m2 = ry; goto two_lerps;
+ case 0x18: /* y == z == k > x - special case */
+ o1 = Y1+Z1+K1; m1 = ry; m2 = rx; goto two_lerps;
+ case 0x04: /* z == y == x > k - special case */
+ o1 = X1+Y1+Z1; m1 = ry; m2 = rk; goto two_lerps;
+ case 0x28: /* k > z == y == x - special case */
+ o1 = K1; m1 = rk; m2 = ry; goto two_lerps;
+ case 0x02: /* y >= x >= k >= z */
+ o1 = Y1; o2 = X1; o3 = K1; m1 = ry; m2 = rx; m3 = rk; m4 = rz; break;
+ case 0x03: /* x > y >= k >= z */
+ o1 = X1; o2 = Y1; o3 = K1; m1 = rx; m2 = ry; m3 = rk; m4 = rz; break;
+ case 0x05: /* x >= z >= y >= k */
+ o1 = X1; o2 = Z1; o3 = Y1; m1 = rx; m2 = rz; m3 = ry; m4 = rk; break;
+ case 0x06: /* y >= x >= z > k */
+ o1 = Y1; o2 = X1; o3 = Z1; m1 = ry; m2 = rx; m3 = rz; m4 = rk; break;
+ case 0x07: /* x > y > z > k */
+ o1 = X1; o2 = Y1; o3 = Z1; m1 = rx; m2 = ry; m3 = rz; m4 = rk; break;
+ case 0x0a: /* y >= k > x >= z */
+ o1 = Y1; o2 = K1; o3 = X1; m1 = ry; m2 = rk; m3 = rx; m4 = rz; break;
+ case 0x14: /* z >= y >= x >= k */
+ o1 = Z1; o2 = Y1; o3 = X1; m1 = rz; m2 = ry; m3 = rx; m4 = rk; break;
+ case 0x15: /* z > x >= y >= k */
+ o1 = Z1; o2 = X1; o3 = Y1; m1 = rz; m2 = rx; m3 = ry; m4 = rk; break;
+ case 0x16: /* y >= z > x >= k */
+ o1 = Y1; o2 = Z1; o3 = X1; m1 = ry; m2 = rz; m3 = rx; m4 = rk; break;
+ case 0x1a: /* y >= k >= z > x */
+ o1 = Y1; o2 = K1; o3 = Z1; m1 = ry; m2 = rk; m3 = rz; m4 = rx; break;
+ case 0x1c: /* z >= y >= k > x */
+ o1 = Z1; o2 = Y1; o3 = K1; m1 = rz; m2 = ry; m3 = rk; m4 = rx; break;
+ case 0x1e: /* y > z > k > x */
+ o1 = Y1; o2 = Z1; o3 = K1; m1 = ry; m2 = rz; m3 = rk; m4 = rx; break;
+ case 0x21: /* x >= k >= z >= y */
+ o1 = X1; o2 = K1; o3 = Z1; m1 = rx; m2 = rk; m3 = rz; m4 = ry; break;
+ case 0x23: /* x >= k > y > z */
+ o1 = X1; o2 = K1; o3 = Y1; m1 = rx; m2 = rk; m3 = ry; m4 = rz; break;
+ case 0x25: /* x >= z > k > y */
+ o1 = X1; o2 = Z1; o3 = K1; m1 = rx; m2 = rz; m3 = rk; m4 = ry; break;
+ case 0x29: /* k > x >= z >= y */
+ o1 = K1; o2 = X1; o3 = Z1; m1 = rk; m2 = rx; m3 = rz; m4 = ry; break;
+ case 0x2a: /* k > y >= x >= z */
+ o1 = K1; o2 = Y1; o3 = X1; m1 = rk; m2 = ry; m3 = rx; m4 = rz; break;
+ case 0x2b: /* k > x > y > z */
+ o1 = K1; o2 = X1; o3 = Y1; m1 = rk; m2 = rx; m3 = ry; m4 = rz; break;
+ case 0x35: /* z > x >= k > y */
+ o1 = Z1; o2 = X1; o3 = K1; m1 = rz; m2 = rx; m3 = rk; m4 = ry; break;
+ case 0x38: /* k >= z >= y >= x */
+ o1 = K1; o2 = Z1; o3 = Y1; m1 = rk; m2 = rz; m3 = ry; m4 = rx; break;
+ case 0x39: /* k >= z > x > y */
+ o1 = K1; o2 = Z1; o3 = X1; m1 = rk; m2 = rz; m3 = rx; m4 = ry; break;
+ case 0x3a: /* k > y > z > x */
+ o1 = K1; o2 = Y1; o3 = Z1; m1 = rk; m2 = ry; m3 = rz; m4 = rx; break;
+ case 0x3c: /* z > k > y >= x */
+ o1 = Z1; o2 = K1; o3 = Y1; m1 = rz; m2 = rk; m3 = ry; m4 = rx; break;
+ case 0x3d: /* z > k > x > y */
+ o1 = Z1; o2 = K1; o3 = X1; m1 = rz; m2 = rk; m3 = rx; m4 = ry; break;
+ }
+ assert(m1 >= m2 && m2 >= m3 && m3 >= m4);
+ o2 += o1;
+ o3 += o2;
+ for (OutChan=TotalOut; OutChan != 0; OutChan--) {
+ c1 = LutTable[o1];
+ c2 = LutTable[o2];
+ c3 = LutTable[o3];
+ c4 = LutTable[o4] - c3;
+ c0 = *LutTable++;
+ c3 -= c2;
+ c2 -= c1;
+ c1 -= c0;
+
+ *Out++ = c0 + c1 * m1 + c2 * m2 + c3 * m3 + c4 * m4;
+ }
+ return;
- Output[i] = y0 + (y1 - y0) * rest;
- }
-}
+two_lerps:
+ assert(m1 >= m2);
+ for (OutChan=TotalOut; OutChan != 0; OutChan--) {
+ c1 = LutTable[o1];
+ c2 = LutTable[o4] - c1;
+ c0 = *LutTable++;
+ c1 -= c0;
+
+ *Out++ = c0 + c1 * m1 + c2 * m2;
+ }
+ return;
+
+one_lerp:
+ for (OutChan=TotalOut; OutChan != 0; OutChan--) {
+ c1 = LutTable[o4];
+ c0 = *LutTable++;
+ c1 -= c0;
+ *Out++ = c0 + c1 * m1;
+ }
+ return;
+}
static CMS_NO_SANITIZE
void Eval5Inputs(cmsContext ContextID, register const cmsUInt16Number Input[],
register cmsUInt16Number Output[],
register const cmsInterpParams* p16)
{
- const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
- cmsS15Fixed16Number fk;
- cmsS15Fixed16Number k0, rk;
- int K0, K1;
- const cmsUInt16Number* T;
- cmsUInt32Number i;
- cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
-
- fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
- k0 = FIXED_TO_INT(fk);
- rk = FIXED_REST_TO_INT(fk);
-
- K0 = p16 -> opta[4] * k0;
- K1 = p16 -> opta[4] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
-
- p1 = *p16;
- memmove(&p1.Domain[0], &p16 ->Domain[1], 4*sizeof(cmsUInt32Number));
-
- T = LutTable + K0;
- p1.Table = T;
-
- Eval4Inputs(ContextID, Input + 1, Tmp1, &p1);
+ cmsS15Fixed16Number fk;
+ cmsUInt32Number i, n;
+ cmsUInt16Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p16;
- T = LutTable + K1;
- p1.Table = T;
+ memmove(&p1.Domain[0], &p16 ->Domain[1], 4*sizeof(cmsUInt32Number));
+ fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+ p1.Table = ((cmsUInt16Number*)p16 -> Table) + p16 -> opta[4] * FIXED_TO_INT(fk);
- Eval4Inputs(ContextID, Input + 1, Tmp2, &p1);
+ Eval4Inputs(ContextID, Input + 1, Output, &p1);
- for (i=0; i < p16 -> nOutputs; i++) {
+ if (Input[0] == 0xFFFFU)
+ return;
- Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
- }
+ p1.Table = ((cmsUInt16Number*)p1.Table) + p16 -> opta[4];
+ Eval4Inputs(ContextID, Input + 1, Tmp, &p1);
+ fk = FIXED_REST_TO_INT(fk);
+ n = p16 -> nOutputs;
+ for (i=0; i < n; i++)
+ Output[i] = LinearInterp(fk, Output[i], Tmp[i]);
}
@@ -1125,42 +1356,36 @@ void Eval5InputsFloat(cmsContext ContextID, const cmsFloat32Number Input[],
cmsFloat32Number Output[],
const cmsInterpParams* p)
{
- const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
- cmsFloat32Number rest;
- cmsFloat32Number pk;
- int k0, K0, K1;
- const cmsFloat32Number* T;
- cmsUInt32Number i;
- cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
+ cmsFloat32Number rest;
+ cmsFloat32Number pk;
+ int k0;
+ cmsUInt32Number i, n;
+ cmsFloat32Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p;
+ cmsFloat32Number i0 = fclamp(Input[0]);
- pk = fclamp(Input[0]) * p->Domain[0];
- k0 = _cmsQuickFloor(pk);
- rest = pk - (cmsFloat32Number) k0;
+ pk = i0 * p->Domain[0];
+ k0 = _cmsQuickFloor(pk);
+ rest = pk - (cmsFloat32Number) k0;
- K0 = p -> opta[4] * k0;
- K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[4]);
+ memmove(&p1.Domain[0], &p ->Domain[1], 4*sizeof(cmsUInt32Number));
+ p1.Table = ((cmsFloat32Number*) p -> Table) + p -> opta[4] * k0;
- p1 = *p;
- memmove(&p1.Domain[0], &p ->Domain[1], 4*sizeof(cmsUInt32Number));
+ Eval4InputsFloat(ContextID, Input + 1, Output, &p1);
- T = LutTable + K0;
- p1.Table = T;
+ if (i0 == 1.0)
+ return;
- Eval4InputsFloat(ContextID, Input + 1, Tmp1, &p1);
+ p1.Table = ((cmsFloat32Number*) p1.Table) + p->opta[4];
+ Eval4InputsFloat(ContextID, Input + 1, Tmp, &p1);
- T = LutTable + K1;
- p1.Table = T;
+ n = p -> nOutputs;
+ for (i=0; i < n; i++) {
+ cmsFloat32Number y0 = Output[i];
+ cmsFloat32Number y1 = Tmp[i];
- Eval4InputsFloat(ContextID, Input + 1, Tmp2, &p1);
-
- for (i=0; i < p -> nOutputs; i++) {
-
- cmsFloat32Number y0 = Tmp1[i];
- cmsFloat32Number y1 = Tmp2[i];
-
- Output[i] = y0 + (y1 - y0) * rest;
- }
+ Output[i] = y0 + (y1 - y0) * rest;
+ }
}
@@ -1170,40 +1395,27 @@ void Eval6Inputs(cmsContext ContextID, register const cmsUInt16Number Input[],
register cmsUInt16Number Output[],
register const cmsInterpParams* p16)
{
- const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
- cmsS15Fixed16Number fk;
- cmsS15Fixed16Number k0, rk;
- int K0, K1;
- const cmsUInt16Number* T;
- cmsUInt32Number i;
- cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
- fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
- k0 = FIXED_TO_INT(fk);
- rk = FIXED_REST_TO_INT(fk);
-
- K0 = p16 -> opta[5] * k0;
- K1 = p16 -> opta[5] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
-
- p1 = *p16;
- memmove(&p1.Domain[0], &p16 ->Domain[1], 5*sizeof(cmsUInt32Number));
-
- T = LutTable + K0;
- p1.Table = T;
+ cmsS15Fixed16Number fk;
+ cmsUInt32Number i, n;
+ cmsUInt16Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p16;
- Eval5Inputs(ContextID, Input + 1, Tmp1, &p1);
+ memmove(&p1.Domain[0], &p16 ->Domain[1], 5*sizeof(cmsUInt32Number));
+ fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+ p1.Table = ((cmsUInt16Number*)p16 -> Table) + p16 -> opta[5] * FIXED_TO_INT(fk);
- T = LutTable + K1;
- p1.Table = T;
+ Eval5Inputs(ContextID, Input + 1, Output, &p1);
- Eval5Inputs(ContextID, Input + 1, Tmp2, &p1);
+ if (Input[0] == 0xFFFFU)
+ return;
- for (i=0; i < p16 -> nOutputs; i++) {
-
- Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
- }
+ p1.Table = ((cmsUInt16Number*)p1.Table) + p16 -> opta[5];
+ Eval5Inputs(ContextID, Input + 1, Tmp, &p1);
+ fk = FIXED_REST_TO_INT(fk);
+ n = p16 -> nOutputs;
+ for (i=0; i < n; i++)
+ Output[i] = LinearInterp(fk, Output[i], Tmp[i]);
}
@@ -1212,42 +1424,36 @@ void Eval6InputsFloat(cmsContext ContextID, const cmsFloat32Number Input[],
cmsFloat32Number Output[],
const cmsInterpParams* p)
{
- const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
- cmsFloat32Number rest;
- cmsFloat32Number pk;
- int k0, K0, K1;
- const cmsFloat32Number* T;
- cmsUInt32Number i;
- cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
- pk = fclamp(Input[0]) * p->Domain[0];
- k0 = _cmsQuickFloor(pk);
- rest = pk - (cmsFloat32Number) k0;
-
- K0 = p -> opta[5] * k0;
- K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[5]);
-
- p1 = *p;
- memmove(&p1.Domain[0], &p ->Domain[1], 5*sizeof(cmsUInt32Number));
+ cmsFloat32Number rest;
+ cmsFloat32Number pk;
+ int k0;
+ cmsUInt32Number i, n;
+ cmsFloat32Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p;
+ cmsFloat32Number i0 = fclamp(Input[0]);
- T = LutTable + K0;
- p1.Table = T;
+ pk = i0 * p->Domain[0];
+ k0 = _cmsQuickFloor(pk);
+ rest = pk - (cmsFloat32Number) k0;
- Eval5InputsFloat(ContextID, Input + 1, Tmp1, &p1);
+ memmove(&p1.Domain[0], &p ->Domain[1], 5*sizeof(cmsUInt32Number));
+ p1.Table = ((cmsFloat32Number*) p -> Table) + p -> opta[5] * k0;
- T = LutTable + K1;
- p1.Table = T;
+ Eval5InputsFloat(ContextID, Input + 1, Output, &p1);
- Eval5InputsFloat(ContextID, Input + 1, Tmp2, &p1);
+ if (i0 == 1.0)
+ return;
- for (i=0; i < p -> nOutputs; i++) {
+ p1.Table = ((cmsFloat32Number*) p1.Table) + p->opta[5];
+ Eval5InputsFloat(ContextID, Input + 1, Tmp, &p1);
- cmsFloat32Number y0 = Tmp1[i];
- cmsFloat32Number y1 = Tmp2[i];
+ n = p -> nOutputs;
+ for (i=0; i < n; i++) {
+ cmsFloat32Number y0 = Output[i];
+ cmsFloat32Number y1 = Tmp[i];
- Output[i] = y0 + (y1 - y0) * rest;
- }
+ Output[i] = y0 + (y1 - y0) * rest;
+ }
}
@@ -1256,39 +1462,27 @@ void Eval7Inputs(cmsContext ContextID, register const cmsUInt16Number Input[],
register cmsUInt16Number Output[],
register const cmsInterpParams* p16)
{
- const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
- cmsS15Fixed16Number fk;
- cmsS15Fixed16Number k0, rk;
- int K0, K1;
- const cmsUInt16Number* T;
- cmsUInt32Number i;
- cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
-
- fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
- k0 = FIXED_TO_INT(fk);
- rk = FIXED_REST_TO_INT(fk);
-
- K0 = p16 -> opta[6] * k0;
- K1 = p16 -> opta[6] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
-
- p1 = *p16;
- memmove(&p1.Domain[0], &p16 ->Domain[1], 6*sizeof(cmsUInt32Number));
+ cmsS15Fixed16Number fk;
+ cmsUInt32Number i, n;
+ cmsUInt16Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p16;
- T = LutTable + K0;
- p1.Table = T;
+ memmove(&p1.Domain[0], &p16 ->Domain[1], 6*sizeof(cmsUInt32Number));
+ fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+ p1.Table = ((cmsUInt16Number*)p16 -> Table) + p16 -> opta[6] * FIXED_TO_INT(fk);
- Eval6Inputs(ContextID, Input + 1, Tmp1, &p1);
+ Eval6Inputs(ContextID, Input + 1, Output, &p1);
- T = LutTable + K1;
- p1.Table = T;
+ if (Input[0] == 0xFFFFU)
+ return;
- Eval6Inputs(ContextID, Input + 1, Tmp2, &p1);
+ p1.Table = ((cmsUInt16Number*)p1.Table) + p16 -> opta[6];
+ Eval6Inputs(ContextID, Input + 1, Tmp, &p1);
- for (i=0; i < p16 -> nOutputs; i++) {
- Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
- }
+ fk = FIXED_REST_TO_INT(fk);
+ n = p16 -> nOutputs;
+ for (i=0; i < n; i++)
+ Output[i] = LinearInterp(fk, Output[i], Tmp[i]);
}
@@ -1297,44 +1491,36 @@ void Eval7InputsFloat(cmsContext ContextID, const cmsFloat32Number Input[],
cmsFloat32Number Output[],
const cmsInterpParams* p)
{
- const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
- cmsFloat32Number rest;
- cmsFloat32Number pk;
- int k0, K0, K1;
- const cmsFloat32Number* T;
- cmsUInt32Number i;
- cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
- pk = fclamp(Input[0]) * p->Domain[0];
- k0 = _cmsQuickFloor(pk);
- rest = pk - (cmsFloat32Number) k0;
-
- K0 = p -> opta[6] * k0;
- K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[6]);
+ cmsFloat32Number rest;
+ cmsFloat32Number pk;
+ int k0;
+ cmsUInt32Number i, n;
+ cmsFloat32Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p;
+ cmsFloat32Number i0 = fclamp(Input[0]);
- p1 = *p;
- memmove(&p1.Domain[0], &p ->Domain[1], 6*sizeof(cmsUInt32Number));
+ pk = i0 * p->Domain[0];
+ k0 = _cmsQuickFloor(pk);
+ rest = pk - (cmsFloat32Number) k0;
- T = LutTable + K0;
- p1.Table = T;
+ memmove(&p1.Domain[0], &p ->Domain[1], 6*sizeof(cmsUInt32Number));
+ p1.Table = ((cmsFloat32Number*) p -> Table) + p -> opta[6] * k0;
- Eval6InputsFloat(ContextID, Input + 1, Tmp1, &p1);
+ Eval6InputsFloat(ContextID, Input + 1, Output, &p1);
- T = LutTable + K1;
- p1.Table = T;
+ if (i0 == 1.0)
+ return;
- Eval6InputsFloat(ContextID, Input + 1, Tmp2, &p1);
+ p1.Table = ((cmsFloat32Number*) p1.Table) + p->opta[6];
+ Eval6InputsFloat(ContextID, Input + 1, Tmp, &p1);
+ n = p -> nOutputs;
+ for (i=0; i < n; i++) {
+ cmsFloat32Number y0 = Output[i];
+ cmsFloat32Number y1 = Tmp[i];
- for (i=0; i < p -> nOutputs; i++) {
-
- cmsFloat32Number y0 = Tmp1[i];
- cmsFloat32Number y1 = Tmp2[i];
-
- Output[i] = y0 + (y1 - y0) * rest;
-
- }
+ Output[i] = y0 + (y1 - y0) * rest;
+ }
}
static CMS_NO_SANITIZE
@@ -1342,37 +1528,27 @@ void Eval8Inputs(cmsContext ContextID, register const cmsUInt16Number Input[],
register cmsUInt16Number Output[],
register const cmsInterpParams* p16)
{
- const cmsUInt16Number* LutTable = (cmsUInt16Number*) p16 -> Table;
- cmsS15Fixed16Number fk;
- cmsS15Fixed16Number k0, rk;
- int K0, K1;
- const cmsUInt16Number* T;
- cmsUInt32Number i;
- cmsUInt16Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
- fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
- k0 = FIXED_TO_INT(fk);
- rk = FIXED_REST_TO_INT(fk);
-
- K0 = p16 -> opta[7] * k0;
- K1 = p16 -> opta[7] * (k0 + (Input[0] != 0xFFFFU ? 1 : 0));
+ cmsS15Fixed16Number fk;
+ cmsUInt32Number i, n;
+ cmsUInt16Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p16;
- p1 = *p16;
- memmove(&p1.Domain[0], &p16 ->Domain[1], 7*sizeof(cmsUInt32Number));
+ memmove(&p1.Domain[0], &p16 ->Domain[1], 7*sizeof(cmsUInt32Number));
+ fk = _cmsToFixedDomain((cmsS15Fixed16Number) Input[0] * p16 -> Domain[0]);
+ p1.Table = ((cmsUInt16Number*)p16 -> Table) + p16 -> opta[7] * FIXED_TO_INT(fk);
- T = LutTable + K0;
- p1.Table = T;
+ Eval7Inputs(ContextID, Input + 1, Output, &p1);
- Eval7Inputs(ContextID, Input + 1, Tmp1, &p1);
+ if (Input[0] == 0xFFFFU)
+ return;
- T = LutTable + K1;
- p1.Table = T;
- Eval7Inputs(ContextID, Input + 1, Tmp2, &p1);
+ p1.Table = ((cmsUInt16Number*)p1.Table) + p16 -> opta[7];
+ Eval7Inputs(ContextID, Input + 1, Tmp, &p1);
- for (i=0; i < p16 -> nOutputs; i++) {
- Output[i] = LinearInterp(rk, Tmp1[i], Tmp2[i]);
- }
+ fk = FIXED_REST_TO_INT(fk);
+ n = p16 -> nOutputs;
+ for (i=0; i < n; i++)
+ Output[i] = LinearInterp(fk, Output[i], Tmp[i]);
}
@@ -1382,43 +1558,36 @@ void Eval8InputsFloat(cmsContext ContextID, const cmsFloat32Number Input[],
cmsFloat32Number Output[],
const cmsInterpParams* p)
{
- const cmsFloat32Number* LutTable = (cmsFloat32Number*) p -> Table;
- cmsFloat32Number rest;
- cmsFloat32Number pk;
- int k0, K0, K1;
- const cmsFloat32Number* T;
- cmsUInt32Number i;
- cmsFloat32Number Tmp1[MAX_STAGE_CHANNELS], Tmp2[MAX_STAGE_CHANNELS];
- cmsInterpParams p1;
-
- pk = fclamp(Input[0]) * p->Domain[0];
- k0 = _cmsQuickFloor(pk);
- rest = pk - (cmsFloat32Number) k0;
-
- K0 = p -> opta[7] * k0;
- K1 = K0 + (fclamp(Input[0]) >= 1.0 ? 0 : p->opta[7]);
+ cmsFloat32Number rest;
+ cmsFloat32Number pk;
+ int k0;
+ cmsUInt32Number i, n;
+ cmsFloat32Number Tmp[MAX_STAGE_CHANNELS];
+ cmsInterpParams p1 = *p;
+ cmsFloat32Number i0 = fclamp(Input[0]);
- p1 = *p;
- memmove(&p1.Domain[0], &p ->Domain[1], 7*sizeof(cmsUInt32Number));
+ pk = i0 * p->Domain[0];
+ k0 = _cmsQuickFloor(pk);
+ rest = pk - (cmsFloat32Number) k0;
- T = LutTable + K0;
- p1.Table = T;
+ memmove(&p1.Domain[0], &p ->Domain[1], 7*sizeof(cmsUInt32Number));
+ p1.Table = ((cmsFloat32Number*) p -> Table) + p -> opta[7] * k0;
- Eval7InputsFloat(ContextID, Input + 1, Tmp1, &p1);
+ Eval7InputsFloat(ContextID, Input + 1, Output, &p1);
- T = LutTable + K1;
- p1.Table = T;
+ if (i0 == 1.0)
+ return;
- Eval7InputsFloat(ContextID, Input + 1, Tmp2, &p1);
+ p1.Table = ((cmsFloat32Number*) p1.Table) + p->opta[7];
+ Eval7InputsFloat(ContextID, Input + 1, Tmp, &p1);
+ n = p -> nOutputs;
+ for (i=0; i < n; i++) {
+ cmsFloat32Number y0 = Output[i];
+ cmsFloat32Number y1 = Tmp[i];
- for (i=0; i < p -> nOutputs; i++) {
-
- cmsFloat32Number y0 = Tmp1[i];
- cmsFloat32Number y1 = Tmp2[i];
-
- Output[i] = y0 + (y1 - y0) * rest;
- }
+ Output[i] = y0 + (y1 - y0) * rest;
+ }
}
// The default factory
@@ -1486,10 +1655,17 @@ cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cm
case 4: // CMYK lut
- if (IsFloat)
- Interpolation.LerpFloat = Eval4InputsFloat;
- else
- Interpolation.Lerp16 = Eval4Inputs;
+ if (IsTrilinear) {
+ if (IsFloat)
+ Interpolation.LerpFloat = QuadrilinearInterpFloat;
+ else
+ Interpolation.Lerp16 = QuadrilinearInterp16;
+ } else {
+ if (IsFloat)
+ Interpolation.LerpFloat = Eval4InputsFloat;
+ else
+ Interpolation.Lerp16 = Eval4Inputs;
+ }
break;
case 5: // 5 Inks