libstdc++
|
00001 // Optimizations for random number functions, x86 version -*- C++ -*- 00002 00003 // Copyright (C) 2012-2016 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/opt_random.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{random} 00028 */ 00029 00030 #ifndef _BITS_OPT_RANDOM_H 00031 #define _BITS_OPT_RANDOM_H 1 00032 00033 #ifdef __SSE3__ 00034 #include <pmmintrin.h> 00035 #endif 00036 00037 00038 #pragma GCC system_header 00039 00040 00041 namespace std _GLIBCXX_VISIBILITY(default) 00042 { 00043 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00044 00045 #ifdef __SSE3__ 00046 template<> 00047 template<typename _UniformRandomNumberGenerator> 00048 void 00049 normal_distribution<double>:: 00050 __generate(typename normal_distribution<double>::result_type* __f, 00051 typename normal_distribution<double>::result_type* __t, 00052 _UniformRandomNumberGenerator& __urng, 00053 const param_type& __param) 00054 { 00055 typedef uint64_t __uctype; 00056 00057 if (__f == __t) 00058 return; 00059 00060 if (_M_saved_available) 00061 { 00062 _M_saved_available = false; 00063 *__f++ = _M_saved * __param.stddev() + __param.mean(); 00064 00065 if (__f == __t) 00066 return; 00067 } 00068 00069 constexpr uint64_t __maskval = 0xfffffffffffffull; 00070 static const __m128i __mask = _mm_set1_epi64x(__maskval); 00071 static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull); 00072 static const __m128d __three = _mm_set1_pd(3.0); 00073 const __m128d __av = _mm_set1_pd(__param.mean()); 00074 00075 const __uctype __urngmin = __urng.min(); 00076 const __uctype __urngmax = __urng.max(); 00077 const __uctype __urngrange = __urngmax - __urngmin; 00078 const __uctype __uerngrange = __urngrange + 1; 00079 00080 while (__f + 1 < __t) 00081 { 00082 double __le; 00083 __m128d __x; 00084 do 00085 { 00086 union 00087 { 00088 __m128i __i; 00089 __m128d __d; 00090 } __v; 00091 00092 if (__urngrange > __maskval) 00093 { 00094 if (__detail::_Power_of_2(__uerngrange)) 00095 __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(), 00096 __urng()), 00097 __mask); 00098 else 00099 { 00100 const __uctype __uerange = __maskval + 1; 00101 const __uctype __scaling = __urngrange / __uerange; 00102 const __uctype __past = __uerange * __scaling; 00103 uint64_t __v1; 00104 do 00105 __v1 = __uctype(__urng()) - __urngmin; 00106 while (__v1 >= __past); 00107 __v1 /= __scaling; 00108 uint64_t __v2; 00109 do 00110 __v2 = __uctype(__urng()) - __urngmin; 00111 while (__v2 >= __past); 00112 __v2 /= __scaling; 00113 00114 __v.__i = _mm_set_epi64x(__v1, __v2); 00115 } 00116 } 00117 else if (__urngrange == __maskval) 00118 __v.__i = _mm_set_epi64x(__urng(), __urng()); 00119 else if ((__urngrange + 2) * __urngrange >= __maskval 00120 && __detail::_Power_of_2(__uerngrange)) 00121 { 00122 uint64_t __v1 = __urng() * __uerngrange + __urng(); 00123 uint64_t __v2 = __urng() * __uerngrange + __urng(); 00124 00125 __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2), 00126 __mask); 00127 } 00128 else 00129 { 00130 size_t __nrng = 2; 00131 __uctype __high = __maskval / __uerngrange / __uerngrange; 00132 while (__high > __uerngrange) 00133 { 00134 ++__nrng; 00135 __high /= __uerngrange; 00136 } 00137 const __uctype __highrange = __high + 1; 00138 const __uctype __scaling = __urngrange / __highrange; 00139 const __uctype __past = __highrange * __scaling; 00140 __uctype __tmp; 00141 00142 uint64_t __v1; 00143 do 00144 { 00145 do 00146 __tmp = __uctype(__urng()) - __urngmin; 00147 while (__tmp >= __past); 00148 __v1 = __tmp / __scaling; 00149 for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) 00150 { 00151 __tmp = __v1; 00152 __v1 *= __uerngrange; 00153 __v1 += __uctype(__urng()) - __urngmin; 00154 } 00155 } 00156 while (__v1 > __maskval || __v1 < __tmp); 00157 00158 uint64_t __v2; 00159 do 00160 { 00161 do 00162 __tmp = __uctype(__urng()) - __urngmin; 00163 while (__tmp >= __past); 00164 __v2 = __tmp / __scaling; 00165 for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) 00166 { 00167 __tmp = __v2; 00168 __v2 *= __uerngrange; 00169 __v2 += __uctype(__urng()) - __urngmin; 00170 } 00171 } 00172 while (__v2 > __maskval || __v2 < __tmp); 00173 00174 __v.__i = _mm_set_epi64x(__v1, __v2); 00175 } 00176 00177 __v.__i = _mm_or_si128(__v.__i, __two); 00178 __x = _mm_sub_pd(__v.__d, __three); 00179 __m128d __m = _mm_mul_pd(__x, __x); 00180 __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m)); 00181 } 00182 while (__le == 0.0 || __le >= 1.0); 00183 00184 double __mult = (std::sqrt(-2.0 * std::log(__le) / __le) 00185 * __param.stddev()); 00186 00187 __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av); 00188 00189 _mm_storeu_pd(__f, __x); 00190 __f += 2; 00191 } 00192 00193 if (__f != __t) 00194 { 00195 result_type __x, __y, __r2; 00196 00197 __detail::_Adaptor<_UniformRandomNumberGenerator, result_type> 00198 __aurng(__urng); 00199 00200 do 00201 { 00202 __x = result_type(2.0) * __aurng() - 1.0; 00203 __y = result_type(2.0) * __aurng() - 1.0; 00204 __r2 = __x * __x + __y * __y; 00205 } 00206 while (__r2 > 1.0 || __r2 == 0.0); 00207 00208 const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2); 00209 _M_saved = __x * __mult; 00210 _M_saved_available = true; 00211 *__f = __y * __mult * __param.stddev() + __param.mean(); 00212 } 00213 } 00214 #endif 00215 00216 00217 _GLIBCXX_END_NAMESPACE_VERSION 00218 } // namespace 00219 00220 00221 #endif // _BITS_OPT_RANDOM_H