libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2010-2017 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_compiler.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00034 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00035 00036 template<typename> 00037 class regex_traits; 00038 00039 _GLIBCXX_END_NAMESPACE_CXX11 00040 _GLIBCXX_END_NAMESPACE_VERSION 00041 00042 namespace __detail 00043 { 00044 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00045 00046 /** 00047 * @addtogroup regex-detail 00048 * @{ 00049 */ 00050 00051 template<typename, bool, bool> 00052 struct _BracketMatcher; 00053 00054 /** 00055 * @brief Builds an NFA from an input iterator range. 00056 * 00057 * The %_TraitsT type should fulfill requirements [28.3]. 00058 */ 00059 template<typename _TraitsT> 00060 class _Compiler 00061 { 00062 public: 00063 typedef typename _TraitsT::char_type _CharT; 00064 typedef const _CharT* _IterT; 00065 typedef _NFA<_TraitsT> _RegexT; 00066 typedef regex_constants::syntax_option_type _FlagT; 00067 00068 _Compiler(_IterT __b, _IterT __e, 00069 const typename _TraitsT::locale_type& __traits, _FlagT __flags); 00070 00071 shared_ptr<const _RegexT> 00072 _M_get_nfa() 00073 { return std::move(_M_nfa); } 00074 00075 private: 00076 typedef _Scanner<_CharT> _ScannerT; 00077 typedef typename _TraitsT::string_type _StringT; 00078 typedef typename _ScannerT::_TokenT _TokenT; 00079 typedef _StateSeq<_TraitsT> _StateSeqT; 00080 typedef std::stack<_StateSeqT> _StackT; 00081 typedef std::ctype<_CharT> _CtypeT; 00082 00083 // accepts a specific token or returns false. 00084 bool 00085 _M_match_token(_TokenT __token); 00086 00087 void 00088 _M_disjunction(); 00089 00090 void 00091 _M_alternative(); 00092 00093 bool 00094 _M_term(); 00095 00096 bool 00097 _M_assertion(); 00098 00099 bool 00100 _M_quantifier(); 00101 00102 bool 00103 _M_atom(); 00104 00105 bool 00106 _M_bracket_expression(); 00107 00108 template<bool __icase, bool __collate> 00109 void 00110 _M_insert_any_matcher_ecma(); 00111 00112 template<bool __icase, bool __collate> 00113 void 00114 _M_insert_any_matcher_posix(); 00115 00116 template<bool __icase, bool __collate> 00117 void 00118 _M_insert_char_matcher(); 00119 00120 template<bool __icase, bool __collate> 00121 void 00122 _M_insert_character_class_matcher(); 00123 00124 template<bool __icase, bool __collate> 00125 void 00126 _M_insert_bracket_matcher(bool __neg); 00127 00128 // Returns true if successfully matched one term and should continue. 00129 // Returns false if the compiler should move on. 00130 template<bool __icase, bool __collate> 00131 bool 00132 _M_expression_term(pair<bool, _CharT>& __last_char, 00133 _BracketMatcher<_TraitsT, __icase, __collate>& 00134 __matcher); 00135 00136 int 00137 _M_cur_int_value(int __radix); 00138 00139 bool 00140 _M_try_char(); 00141 00142 _StateSeqT 00143 _M_pop() 00144 { 00145 auto ret = _M_stack.top(); 00146 _M_stack.pop(); 00147 return ret; 00148 } 00149 00150 _FlagT _M_flags; 00151 _ScannerT _M_scanner; 00152 shared_ptr<_RegexT> _M_nfa; 00153 _StringT _M_value; 00154 _StackT _M_stack; 00155 const _TraitsT& _M_traits; 00156 const _CtypeT& _M_ctype; 00157 }; 00158 00159 template<typename _Tp> 00160 struct __has_contiguous_iter : std::false_type { }; 00161 00162 template<typename _Ch, typename _Tr, typename _Alloc> 00163 struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>> 00164 : std::true_type 00165 { }; 00166 00167 template<typename _Tp, typename _Alloc> 00168 struct __has_contiguous_iter<std::vector<_Tp, _Alloc>> 00169 : std::true_type 00170 { }; 00171 00172 template<typename _Tp> 00173 struct __is_contiguous_normal_iter : std::false_type { }; 00174 00175 template<typename _CharT> 00176 struct __is_contiguous_normal_iter<_CharT*> : std::true_type { }; 00177 00178 template<typename _Tp, typename _Cont> 00179 struct 00180 __is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>> 00181 : __has_contiguous_iter<_Cont>::type 00182 { }; 00183 00184 template<typename _Iter, typename _TraitsT> 00185 using __enable_if_contiguous_normal_iter 00186 = typename enable_if< __is_contiguous_normal_iter<_Iter>::value, 00187 std::shared_ptr<const _NFA<_TraitsT>> >::type; 00188 00189 template<typename _Iter, typename _TraitsT> 00190 using __disable_if_contiguous_normal_iter 00191 = typename enable_if< !__is_contiguous_normal_iter<_Iter>::value, 00192 std::shared_ptr<const _NFA<_TraitsT>> >::type; 00193 00194 template<typename _FwdIter, typename _TraitsT> 00195 inline __enable_if_contiguous_normal_iter<_FwdIter, _TraitsT> 00196 __compile_nfa(_FwdIter __first, _FwdIter __last, 00197 const typename _TraitsT::locale_type& __loc, 00198 regex_constants::syntax_option_type __flags) 00199 { 00200 size_t __len = __last - __first; 00201 const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr; 00202 using _Cmplr = _Compiler<_TraitsT>; 00203 return _Cmplr(__cfirst, __cfirst + __len, __loc, __flags)._M_get_nfa(); 00204 } 00205 00206 template<typename _FwdIter, typename _TraitsT> 00207 inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT> 00208 __compile_nfa(_FwdIter __first, _FwdIter __last, 00209 const typename _TraitsT::locale_type& __loc, 00210 regex_constants::syntax_option_type __flags) 00211 { 00212 using char_type = typename _TraitsT::char_type; 00213 const basic_string<char_type> __str(__first, __last); 00214 return __compile_nfa<const char_type*, _TraitsT>(__str.data(), 00215 __str.data() + __str.size(), __loc, __flags); 00216 } 00217 00218 // [28.13.14] 00219 template<typename _TraitsT, bool __icase, bool __collate> 00220 class _RegexTranslatorBase 00221 { 00222 public: 00223 typedef typename _TraitsT::char_type _CharT; 00224 typedef typename _TraitsT::string_type _StringT; 00225 typedef _StringT _StrTransT; 00226 00227 explicit 00228 _RegexTranslatorBase(const _TraitsT& __traits) 00229 : _M_traits(__traits) 00230 { } 00231 00232 _CharT 00233 _M_translate(_CharT __ch) const 00234 { 00235 if (__icase) 00236 return _M_traits.translate_nocase(__ch); 00237 else if (__collate) 00238 return _M_traits.translate(__ch); 00239 else 00240 return __ch; 00241 } 00242 00243 _StrTransT 00244 _M_transform(_CharT __ch) const 00245 { 00246 _StrTransT __str(1, __ch); 00247 return _M_traits.transform(__str.begin(), __str.end()); 00248 } 00249 00250 // See LWG 523. It's not efficiently implementable when _TraitsT is not 00251 // std::regex_traits<>, and __collate is true. See specializations for 00252 // implementations of other cases. 00253 bool 00254 _M_match_range(const _StrTransT& __first, const _StrTransT& __last, 00255 const _StrTransT& __s) const 00256 { return __first <= __s && __s <= __last; } 00257 00258 protected: 00259 bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const 00260 { 00261 typedef std::ctype<_CharT> __ctype_type; 00262 const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc()); 00263 auto __lower = __fctyp.tolower(__ch); 00264 auto __upper = __fctyp.toupper(__ch); 00265 return (__first <= __lower && __lower <= __last) 00266 || (__first <= __upper && __upper <= __last); 00267 } 00268 00269 const _TraitsT& _M_traits; 00270 }; 00271 00272 template<typename _TraitsT, bool __icase, bool __collate> 00273 class _RegexTranslator 00274 : public _RegexTranslatorBase<_TraitsT, __icase, __collate> 00275 { 00276 public: 00277 typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base; 00278 using _Base::_Base; 00279 }; 00280 00281 template<typename _TraitsT, bool __icase> 00282 class _RegexTranslator<_TraitsT, __icase, false> 00283 : public _RegexTranslatorBase<_TraitsT, __icase, false> 00284 { 00285 public: 00286 typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base; 00287 typedef typename _Base::_CharT _CharT; 00288 typedef _CharT _StrTransT; 00289 00290 using _Base::_Base; 00291 00292 _StrTransT 00293 _M_transform(_CharT __ch) const 00294 { return __ch; } 00295 00296 bool 00297 _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const 00298 { 00299 if (!__icase) 00300 return __first <= __ch && __ch <= __last; 00301 return this->_M_in_range_icase(__first, __last, __ch); 00302 } 00303 }; 00304 00305 template<typename _CharType> 00306 class _RegexTranslator<std::regex_traits<_CharType>, true, true> 00307 : public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true> 00308 { 00309 public: 00310 typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true> 00311 _Base; 00312 typedef typename _Base::_CharT _CharT; 00313 typedef typename _Base::_StrTransT _StrTransT; 00314 00315 using _Base::_Base; 00316 00317 bool 00318 _M_match_range(const _StrTransT& __first, const _StrTransT& __last, 00319 const _StrTransT& __str) const 00320 { 00321 __glibcxx_assert(__first.size() == 1); 00322 __glibcxx_assert(__last.size() == 1); 00323 __glibcxx_assert(__str.size() == 1); 00324 return this->_M_in_range_icase(__first[0], __last[0], __str[0]); 00325 } 00326 }; 00327 00328 template<typename _TraitsT> 00329 class _RegexTranslator<_TraitsT, false, false> 00330 { 00331 public: 00332 typedef typename _TraitsT::char_type _CharT; 00333 typedef _CharT _StrTransT; 00334 00335 explicit 00336 _RegexTranslator(const _TraitsT&) 00337 { } 00338 00339 _CharT 00340 _M_translate(_CharT __ch) const 00341 { return __ch; } 00342 00343 _StrTransT 00344 _M_transform(_CharT __ch) const 00345 { return __ch; } 00346 00347 bool 00348 _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const 00349 { return __first <= __ch && __ch <= __last; } 00350 }; 00351 00352 template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate> 00353 struct _AnyMatcher; 00354 00355 template<typename _TraitsT, bool __icase, bool __collate> 00356 struct _AnyMatcher<_TraitsT, false, __icase, __collate> 00357 { 00358 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00359 typedef typename _TransT::_CharT _CharT; 00360 00361 explicit 00362 _AnyMatcher(const _TraitsT& __traits) 00363 : _M_translator(__traits) 00364 { } 00365 00366 bool 00367 operator()(_CharT __ch) const 00368 { 00369 static auto __nul = _M_translator._M_translate('\0'); 00370 return _M_translator._M_translate(__ch) != __nul; 00371 } 00372 00373 _TransT _M_translator; 00374 }; 00375 00376 template<typename _TraitsT, bool __icase, bool __collate> 00377 struct _AnyMatcher<_TraitsT, true, __icase, __collate> 00378 { 00379 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00380 typedef typename _TransT::_CharT _CharT; 00381 00382 explicit 00383 _AnyMatcher(const _TraitsT& __traits) 00384 : _M_translator(__traits) 00385 { } 00386 00387 bool 00388 operator()(_CharT __ch) const 00389 { return _M_apply(__ch, typename is_same<_CharT, char>::type()); } 00390 00391 bool 00392 _M_apply(_CharT __ch, true_type) const 00393 { 00394 auto __c = _M_translator._M_translate(__ch); 00395 auto __n = _M_translator._M_translate('\n'); 00396 auto __r = _M_translator._M_translate('\r'); 00397 return __c != __n && __c != __r; 00398 } 00399 00400 bool 00401 _M_apply(_CharT __ch, false_type) const 00402 { 00403 auto __c = _M_translator._M_translate(__ch); 00404 auto __n = _M_translator._M_translate('\n'); 00405 auto __r = _M_translator._M_translate('\r'); 00406 auto __u2028 = _M_translator._M_translate(u'\u2028'); 00407 auto __u2029 = _M_translator._M_translate(u'\u2029'); 00408 return __c != __n && __c != __r && __c != __u2028 && __c != __u2029; 00409 } 00410 00411 _TransT _M_translator; 00412 }; 00413 00414 template<typename _TraitsT, bool __icase, bool __collate> 00415 struct _CharMatcher 00416 { 00417 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00418 typedef typename _TransT::_CharT _CharT; 00419 00420 _CharMatcher(_CharT __ch, const _TraitsT& __traits) 00421 : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch)) 00422 { } 00423 00424 bool 00425 operator()(_CharT __ch) const 00426 { return _M_ch == _M_translator._M_translate(__ch); } 00427 00428 _TransT _M_translator; 00429 _CharT _M_ch; 00430 }; 00431 00432 /// Matches a character range (bracket expression) 00433 template<typename _TraitsT, bool __icase, bool __collate> 00434 struct _BracketMatcher 00435 { 00436 public: 00437 typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT; 00438 typedef typename _TransT::_CharT _CharT; 00439 typedef typename _TransT::_StrTransT _StrTransT; 00440 typedef typename _TraitsT::string_type _StringT; 00441 typedef typename _TraitsT::char_class_type _CharClassT; 00442 00443 public: 00444 _BracketMatcher(bool __is_non_matching, 00445 const _TraitsT& __traits) 00446 : _M_class_set(0), _M_translator(__traits), _M_traits(__traits), 00447 _M_is_non_matching(__is_non_matching) 00448 { } 00449 00450 bool 00451 operator()(_CharT __ch) const 00452 { 00453 _GLIBCXX_DEBUG_ASSERT(_M_is_ready); 00454 return _M_apply(__ch, _UseCache()); 00455 } 00456 00457 void 00458 _M_add_char(_CharT __c) 00459 { 00460 _M_char_set.push_back(_M_translator._M_translate(__c)); 00461 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00462 } 00463 00464 _StringT 00465 _M_add_collate_element(const _StringT& __s) 00466 { 00467 auto __st = _M_traits.lookup_collatename(__s.data(), 00468 __s.data() + __s.size()); 00469 if (__st.empty()) 00470 __throw_regex_error(regex_constants::error_collate, 00471 "Invalid collate element."); 00472 _M_char_set.push_back(_M_translator._M_translate(__st[0])); 00473 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00474 return __st; 00475 } 00476 00477 void 00478 _M_add_equivalence_class(const _StringT& __s) 00479 { 00480 auto __st = _M_traits.lookup_collatename(__s.data(), 00481 __s.data() + __s.size()); 00482 if (__st.empty()) 00483 __throw_regex_error(regex_constants::error_collate, 00484 "Invalid equivalence class."); 00485 __st = _M_traits.transform_primary(__st.data(), 00486 __st.data() + __st.size()); 00487 _M_equiv_set.push_back(__st); 00488 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00489 } 00490 00491 // __neg should be true for \D, \S and \W only. 00492 void 00493 _M_add_character_class(const _StringT& __s, bool __neg) 00494 { 00495 auto __mask = _M_traits.lookup_classname(__s.data(), 00496 __s.data() + __s.size(), 00497 __icase); 00498 if (__mask == 0) 00499 __throw_regex_error(regex_constants::error_collate, 00500 "Invalid character class."); 00501 if (!__neg) 00502 _M_class_set |= __mask; 00503 else 00504 _M_neg_class_set.push_back(__mask); 00505 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00506 } 00507 00508 void 00509 _M_make_range(_CharT __l, _CharT __r) 00510 { 00511 if (__l > __r) 00512 __throw_regex_error(regex_constants::error_range, 00513 "Invalid range in bracket expression."); 00514 _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), 00515 _M_translator._M_transform(__r))); 00516 _GLIBCXX_DEBUG_ONLY(_M_is_ready = false); 00517 } 00518 00519 void 00520 _M_ready() 00521 { 00522 std::sort(_M_char_set.begin(), _M_char_set.end()); 00523 auto __end = std::unique(_M_char_set.begin(), _M_char_set.end()); 00524 _M_char_set.erase(__end, _M_char_set.end()); 00525 _M_make_cache(_UseCache()); 00526 _GLIBCXX_DEBUG_ONLY(_M_is_ready = true); 00527 } 00528 00529 private: 00530 // Currently we only use the cache for char 00531 typedef typename std::is_same<_CharT, char>::type _UseCache; 00532 00533 static constexpr size_t 00534 _S_cache_size() 00535 { 00536 return 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value)); 00537 } 00538 00539 struct _Dummy { }; 00540 typedef typename std::conditional<_UseCache::value, 00541 std::bitset<_S_cache_size()>, 00542 _Dummy>::type _CacheT; 00543 typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT; 00544 00545 bool 00546 _M_apply(_CharT __ch, false_type) const; 00547 00548 bool 00549 _M_apply(_CharT __ch, true_type) const 00550 { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; } 00551 00552 void 00553 _M_make_cache(true_type) 00554 { 00555 for (unsigned __i = 0; __i < _M_cache.size(); __i++) 00556 _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type()); 00557 } 00558 00559 void 00560 _M_make_cache(false_type) 00561 { } 00562 00563 private: 00564 std::vector<_CharT> _M_char_set; 00565 std::vector<_StringT> _M_equiv_set; 00566 std::vector<pair<_StrTransT, _StrTransT>> _M_range_set; 00567 std::vector<_CharClassT> _M_neg_class_set; 00568 _CharClassT _M_class_set; 00569 _TransT _M_translator; 00570 const _TraitsT& _M_traits; 00571 bool _M_is_non_matching; 00572 _CacheT _M_cache; 00573 #ifdef _GLIBCXX_DEBUG 00574 bool _M_is_ready = false; 00575 #endif 00576 }; 00577 00578 //@} regex-detail 00579 _GLIBCXX_END_NAMESPACE_VERSION 00580 } // namespace __detail 00581 } // namespace std 00582 00583 #include <bits/regex_compiler.tcc>