Skip to content
This repository has been archived by the owner on May 14, 2024. It is now read-only.

Commit

Permalink
Handle variable saturation
Browse files Browse the repository at this point in the history
Change-Id: I167c357e31caa6ebda763fbcaa4ab71729190340
  • Loading branch information
b-sumner committed Jan 23, 2019
1 parent 76afd78 commit 7d0dded
Showing 1 changed file with 25 additions and 7 deletions.
32 changes: 25 additions & 7 deletions ockl/src/dots.cl
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ __ockl_fdot2(half2 a, half2 b, float c, bool s)
if (SWDOT)
return fmuladd((float)a.s1, (float)b.s1, fmuladd((float)a.s0, (float)b.s0, c));
else
return __llvm_amdgcn_fdot2(a, b, c, s);
return __llvm_amdgcn_fdot2(a, b, c, true);
}

ATTR int
Expand All @@ -67,7 +67,10 @@ __ockl_sdot2(short2 a, short2 b, int c, bool s)
else
return (int)r;
} else {
return __llvm_amdgcn_sdot2(a, b, c, s);
if (s)
return __llvm_amdgcn_sdot2(a, b, c, true);
else
return __llvm_amdgcn_sdot2(a, b, c, false);
}
}

Expand All @@ -80,7 +83,10 @@ __ockl_udot2(ushort2 a, ushort2 b, uint c, bool s)
ulong r = (ulong)c + (ulong)p0 + (ulong)p1;
return (s & (r > (ulong)0xffffffff)) ? 0xffffffff : (uint)r;
} else {
return __llvm_amdgcn_udot2(a, b, c, s);
if (s)
return __llvm_amdgcn_udot2(a, b, c, true);
else
return __llvm_amdgcn_udot2(a, b, c, false);
}
}

Expand All @@ -96,7 +102,10 @@ __ockl_sdot4(char4 a, char4 b, int c, bool s)
(int)a.s3 * (int)b.s3;
return s ? __ockl_add_sat_i32(t, c) : (t + c);
} else {
return __llvm_amdgcn_sdot4(AS_INT(a), AS_INT(b), c, s);
if (s)
return __llvm_amdgcn_sdot4(AS_INT(a), AS_INT(b), c, true);
else
return __llvm_amdgcn_sdot4(AS_INT(a), AS_INT(b), c, false);
}
}

Expand All @@ -111,7 +120,10 @@ __ockl_udot4(uchar4 a, uchar4 b, uint c, bool s)
(uint)a.s3 * (uint)b.s3;
return s ? __ockl_add_sat_u32(t, c) : (t + c);
} else {
return __llvm_amdgcn_udot4(AS_UINT(a), AS_UINT(b), c, s);
if (s)
return __llvm_amdgcn_udot4(AS_UINT(a), AS_UINT(b), c, true);
else
return __llvm_amdgcn_udot4(AS_UINT(a), AS_UINT(b), c, false);
}
}

Expand All @@ -131,7 +143,10 @@ __ockl_sdot8(int a, int b, int c, bool s)
( a >> 28) * ( b >> 28);
return s ? __ockl_add_sat_i32(t, c) : (t + c);
} else {
return __llvm_amdgcn_sdot8(a, b, c, s);
if (s)
return __llvm_amdgcn_sdot8(a, b, c, true);
else
return __llvm_amdgcn_sdot8(a, b, c, false);
}
}

Expand All @@ -150,7 +165,10 @@ __ockl_udot8(uint a, uint b, uint c, bool s)
((a >> 28) ) * ((b >> 28) );
return s ? __ockl_add_sat_u32(t, c) : (t + c);
} else {
return __llvm_amdgcn_udot8(a, b, c, s);
if (s)
return __llvm_amdgcn_udot8(a, b, c, true);
else
return __llvm_amdgcn_udot8(a, b, c, false);
}
}

0 comments on commit 7d0dded

Please sign in to comment.