Part of Slepp's ProjectsPastebinTURLImagebinFilebin
Feedback -- English French German Japanese
Create Upload Newest Tools Donate

Advertising

FP64 emulation
Sunday, March 21st, 2010 at 8:30:42pm UTC 

  1. #ifndef _DSMATH_H_
  2. #define _DSMATH_H_
  3.  
  4. // File date: 29 - 07 - 2008
  5.  
  6. // Double single functions based on DSFUN90 package:
  7. // http://crd.lbl.gov/~dhbailey/mpdist/index.html
  8. // Partially adapted from NVIDIA's CUDA Mandelbrot demo
  9.  
  10. // Set functions for agnostic arithmetic
  11. __host__ void set(float2 &a, double b);
  12. __host__ void set(float &a, double b);
  13. __device__ __host__ void set(float2 &a, const float b);
  14. __device__ __host__ void set(float &a, const float2 b);
  15. __device__ __host__ void set(float2 &a, const float2 b);
  16. __device__ __host__ void set(float &a, const float b);
  17.  
  18. // Arithmetic operators
  19. __device__ float2 operator-(const float2 a);
  20. __device__ float2 operator+(const float2 a, const float2 b);
  21. __device__ float2 operator-(const float2 a, const float2 b);
  22. __device__ float2 operator*(const float2 a, const float2 b);
  23. __device__ float2 operator/(const float2 a, const float2 b);
  24.  
  25. // Note that the / operator potentially makes use of the FMAD
  26. // instruction and may lose some accuracy as a result.
  27.  
  28. // This function sets the DS number A equal to the double precision floating point number B.
  29. __host__ inline void set(float2 &a, double b) {
  30.         a.x = (float)b;
  31.         a.y = (float)(b - a.x);
  32. }
  33.  
  34. // Store a (truncated) double in a float
  35. __host__ inline void set(float &a, double b) {
  36.         a = (float)b;
  37. }
  38.  
  39. // Store a float into a double single
  40. __device__ __host__ inline void set(float2 &a, const float b) {
  41.         a.x = b;
  42.         a.y = 0;
  43. }
  44.  
  45. // Store the hi word of the double single in a float
  46. __device__ __host__ inline void set(float &a, const float2 b) {
  47.         a = b.x;
  48. }
  49.  
  50. // Double single assignment
  51. __device__ __host__ inline void set(float2 &a, const float2 b) {
  52.         a = b;
  53. }
  54.  
  55. // Float assignment
  56. __device__ __host__ inline void set(float &a, const float b) {
  57.         a = b;
  58. }
  59.  
  60. // This function computes b = -a.
  61. __device__ inline float2 operator-(const float2 a) {
  62.         float2 b;
  63.         b.x = -a.x;
  64.         b.y = -a.y;
  65.  
  66.         return b;
  67. }
  68.  
  69. // Based on dsadd from DSFUN90, analysis by Norbert Juffa from NVIDIA.
  70. // For a and b of opposite sign whose magnitude is within a factor of two
  71. // of each other either variant below loses accuracy. Otherwise the result
  72. // is within 1.5 ulps of the correctly rounded result with 48-bit mantissa.
  73. // This function computes c = a + b.
  74. __device__ inline float2 operator+(const float2 a, const float2 b) {
  75.         float2 c;
  76. #if defined(__DEVICE_EMULATION__)
  77.         volatile float t1, e, t2;
  78. #else
  79.         float t1, e, t2;
  80. #endif
  81.  
  82.         // Compute dsa + dsb using Knuth's trick.
  83.         t1 = a.x + b.x;
  84.         e = t1 - a.x;
  85.         t2 = ((b.x - e) + (a.x - (t1 - e))) + a.y + b.y;
  86.  
  87.         // The result is t1 + t2, after normalization.
  88.         c.x = e = t1 + t2;
  89.         c.y = t2 - (e - t1);
  90.  
  91.         return c;
  92. }
  93.  
  94. // Based on dssub from DSFUN90
  95. // This function computes c = a - b.
  96. __device__ inline float2 operator-(const float2 a, const float2 b) {
  97.         float2 c;
  98. #if defined(__DEVICE_EMULATION__)
  99.         volatile float t1, e, t2;
  100. #else
  101.         float t1, e, t2;
  102. #endif
  103.  
  104.         // Compute dsa - dsb using Knuth's trick.
  105.         t1 = a.x - b.x;
  106.         e = t1 - a.x;
  107.         t2 = ((-b.x - e) + (a.x - (t1 - e))) + a.y - b.y;
  108.  
  109.         // The result is t1 + t2, after normalization.
  110.         c.x = e = t1 + t2;
  111.         c.y = t2 - (e - t1);
  112.  
  113.         return c;
  114. }
  115.  
  116. // This function multiplies DS numbers A and B to yield the DS product C.
  117. // Based on: Guillaume Da Graça, David Defour. Implementation of Float-Float
  118. // Operators on Graphics Hardware. RNC'7, pp. 23-32, 2006.
  119. __device__ inline float2 operator*(const float2 a, const float2 b) {
  120.         float2 c;
  121. #if defined(__DEVICE_EMULATION__)
  122.         volatile float up, vp, u1, u2, v1, v2, mh, ml;
  123.         volatile uint32_t tmp;
  124. #else
  125.         float up, vp, u1, u2, v1, v2, mh, ml;
  126.         uint32_t tmp;
  127. #endif
  128.         // This splits a.x and b.x into high-order and low-order words.
  129.         tmp = (*(uint32_t *)&a.x) & ~0xFFF;     // Bit-style splitting from Reimar
  130.         u1 = *(float *)&tmp;
  131.         //up  = a.x * 4097.0f;
  132.         //u1  = (a.x - up) + up;
  133.         u2  = a.x - u1;
  134.         tmp = (*(uint32_t *)&b.x) & ~0xFFF;
  135.         v1 = *(float *)&tmp;
  136.         //vp  = b.x * 4097.0f;
  137.         //v1  = (b.x - vp) + vp;
  138.         v2  = b.x - v1;
  139.  
  140.         // Multilply a.x * b.x using Dekker's method.
  141.         mh  = __fmul_rn(a.x, b.x);
  142.         ml  = (((__fmul_rn(u1, v1) - mh) + __fmul_rn(u1, v2)) + __fmul_rn(u2, v1)) + __fmul_rn(u2, v2);
  143.  
  144.         // Compute a.x * b.y + a.y * b.x
  145.         ml  = (__fmul_rn(a.x, b.y) + __fmul_rn(a.y, b.x)) + ml;
  146.  
  147.         // The result is mh + ml, after normalization.
  148.         c.x = up = mh + ml;
  149.         c.y = (mh - up) + ml;
  150.         return c;
  151. }
  152.  
  153. // Based on dsdiv from DSFUN90.
  154. // This function divides the DS number A by the DS number B to yield the DS
  155. // quotient DSC.
  156. __device__ inline float2 operator/(const float2 a, const float2 b) {
  157.         float2 c;
  158. #if defined(__DEVICE_EMULATION__)
  159.         volatile float s1, cona, conb, a1, b1, a2, b2, c11, c21;
  160.         volatile float c2, t1, e, t2, t12, t22, t11, t21, s2;
  161. #else
  162.         float s1, cona, conb, a1, b1, a2, b2, c11, c21;
  163.         float c2, t1, e, t2, t12, t22, t11, t21, s2;
  164. #endif
  165.         // Compute a DP approximation to the quotient.
  166.         s1 = a.x / b.x;
  167.  
  168.         // This splits s1 and b.x into high-order and low-order words.
  169.         cona = __fmul_rn(s1, 8193.0f);
  170.         conb = __fmul_rn(b.x, 8193.0f);
  171.         a1 = cona - (cona - s1);
  172.         b1 = conb - (conb - b.x);
  173.         a2 = s1 - a1;
  174.         b2 = b.x - b1;
  175.  
  176.         // Multiply s1 * dsb(1) using Dekker's method.
  177.         c11 = __fmul_rn(s1, b.x);
  178.         c21 = (((__fmul_rn(a1, b1) - c11) + __fmul_rn(a1, b2)) + __fmul_rn(a2, b1)) + __fmul_rn(a2, b2);
  179.  
  180.         // Compute s1 * b.y (only high-order word is needed).
  181.         c2 = s1 * b.y;
  182.  
  183.         // Compute (c11, c21) + c2 using Knuth's trick.
  184.         t1 = c11 + c2;
  185.         e = t1 - c11;
  186.         t2 = ((c2 - e) + (c11 - (t1 - e))) + c21;
  187.  
  188.         // The result is t1 + t2, after normalization.
  189.         t12 = t1 + t2;
  190.         t22 = t2 - (t12 - t1);
  191.  
  192.         // Compute dsa - (t12, t22) using Knuth's trick.
  193.  
  194.         t11 = a.x - t12;
  195.         e = t11 - a.x;
  196.         t21 = ((-t12 - e) + (a.x - (t11 - e))) + a.y - t22;
  197.  
  198.         // Compute high-order word of (t11, t21) and divide by b.x.
  199.         s2 = (t11 + t21) / b.x;
  200.  
  201.         // The result is s1 + s2, after normalization.
  202.         c.x = s1 + s2;
  203.         c.y = s2 - (c.x - s1);
  204.  
  205.         return c;
  206. }
  207.  
  208. #endif // _DSMATH_H_

Paste Details

Tags: fp64

advertising

Update the Post

Either update this post and resubmit it with changes, or make a new post.

You may also comment on this post.

update paste below
details of the post (optional)

Note: Only the paste content is required, though the following information can be useful to others.

Save name / title?

(space separated, optional)



Please note that information posted here will not expire by default. If you do not want it to expire, please set the expiry time above. If it is set to expire, web search engines will not be allowed to index it prior to it expiring. Items that are not marked to expire will be indexable by search engines. Be careful with your passwords. All illegal activities will be reported and any information will be handed over to the authorities, so be good.

comments powered by Disqus
worth-right