libfly  6.2.2
C++20 utility library for Linux, macOS, and Windows
string.hpp
1 #pragma once
2 
3 #include "fly/concepts/concepts.hpp"
4 #include "fly/types/string/concepts.hpp"
5 #include "fly/types/string/detail/classifier.hpp"
6 #include "fly/types/string/detail/converter.hpp"
7 #include "fly/types/string/detail/format_context.hpp"
8 #include "fly/types/string/detail/format_parameters.hpp"
9 #include "fly/types/string/detail/format_parse_context.hpp"
10 #include "fly/types/string/detail/format_specifier.hpp"
11 #include "fly/types/string/detail/format_string.hpp"
12 #include "fly/types/string/detail/traits.hpp"
13 #include "fly/types/string/detail/unicode.hpp"
14 #include "fly/types/string/formatters.hpp"
15 #include "fly/types/string/literals.hpp"
16 
17 #include <algorithm>
18 #include <array>
19 #include <cctype>
20 #include <chrono>
21 #include <cmath>
22 #include <cstdint>
23 #include <cstdlib>
24 #include <ios>
25 #include <iterator>
26 #include <optional>
27 #include <random>
28 #include <string>
29 #include <type_traits>
30 #include <vector>
31 
32 namespace fly {
33 
34 template <StandardCharacter CharType>
35 class BasicString;
36 
37 using String = BasicString<char>;
38 using WString = BasicString<wchar_t>;
39 using String8 = BasicString<char8_t>;
40 using String16 = BasicString<char16_t>;
41 using String32 = BasicString<char32_t>;
42 
49 template <StandardCharacter CharType>
51 {
54 
55 public:
56  using string_type = typename traits::string_type;
57  using size_type = typename traits::size_type;
58  using char_type = typename traits::char_type;
59  using view_type = typename traits::view_type;
60  using int_type = typename traits::int_type;
61  using codepoint_type = typename traits::codepoint_type;
62 
63  template <typename... ParameterTypes>
64  using FormatString =
66 
77  template <StandardStringLike T>
78  static constexpr size_type size(T &&value);
79 
92  static constexpr bool is_alpha(char_type ch);
93 
106  static constexpr bool is_upper(char_type ch);
107 
120  static constexpr bool is_lower(char_type ch);
121 
134  static constexpr char_type to_upper(char_type ch);
135 
148  static constexpr char_type to_lower(char_type ch);
149 
161  static constexpr bool is_digit(char_type ch);
162 
174  static constexpr bool is_x_digit(char_type ch);
175 
188  static constexpr bool is_space(char_type ch);
189 
198  static std::vector<string_type> split(view_type input, char_type delimiter);
199 
210  static std::vector<string_type> split(view_type input, char_type delimiter, size_type count);
211 
217  static void trim(string_type &target);
218 
226  static void replace_all(string_type &target, view_type search, char_type replace);
227 
235  static void replace_all(string_type &target, view_type search, view_type replace);
236 
243  static void remove_all(string_type &target, view_type search);
244 
253  static bool wildcard_match(view_type source, view_type search);
254 
262  static bool validate(view_type value);
263 
276  template <typename IteratorType>
277  static std::optional<codepoint_type>
278  decode_codepoint(IteratorType &it, const IteratorType &end);
279 
288  static std::optional<string_type> encode_codepoint(codepoint_type codepoint);
289 
314  template <char UnicodePrefix = 'U'>
315  requires fly::UnicodePrefixCharacter<UnicodePrefix>
316  static std::optional<string_type> escape_all_codepoints(view_type value);
317 
346  template <char UnicodePrefix = 'U', typename IteratorType>
347  requires fly::UnicodePrefixCharacter<UnicodePrefix>
348  static std::optional<string_type> escape_codepoint(IteratorType &it, const IteratorType &end);
349 
364  static std::optional<string_type> unescape_all_codepoints(view_type value);
365 
385  template <typename IteratorType>
386  static std::optional<string_type> unescape_codepoint(IteratorType &it, const IteratorType &end);
387 
395  static string_type generate_random_string(size_type length);
396 
503  template <typename... ParameterTypes>
504  static string_type
505  format(FormatString<ParameterTypes...> &&fmt, ParameterTypes &&...parameters);
506 
522  template <typename OutputIterator, typename... ParameterTypes>
523  static void format_to(
524  OutputIterator output,
526  ParameterTypes &&...parameters);
527 
538  template <typename... Args>
539  static string_type join(char_type separator, Args &&...args);
540 
552  template <typename T>
553  static std::optional<T> convert(const string_type &value);
554 
555 private:
559  template <typename T, typename... Args>
560  static void join_internal(string_type &result, char_type separator, T &&value, Args &&...args);
561 
565  template <typename T>
566  static void join_internal(string_type &result, char_type separator, T &&value);
567 
571  static constexpr const char_type *s_alpha_num =
572  FLY_STR(char_type, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
573 
574  static constexpr size_type s_alpha_num_length =
575  std::char_traits<char_type>::length(s_alpha_num);
576 
577  static constexpr const auto s_left_brace = FLY_CHR(char_type, '{');
578  static constexpr const auto s_right_brace = FLY_CHR(char_type, '}');
579 };
580 
581 //==================================================================================================
582 template <StandardCharacter CharType>
583 template <StandardStringLike T>
584 constexpr auto BasicString<CharType>::size(T &&value) -> size_type
585 {
586  return detail::BasicClassifier<char_type>::size(std::forward<T>(value));
587 }
588 
589 //==================================================================================================
590 template <StandardCharacter CharType>
591 constexpr bool BasicString<CharType>::is_alpha(char_type ch)
592 {
594 }
595 
596 //==================================================================================================
597 template <StandardCharacter CharType>
598 constexpr bool BasicString<CharType>::is_upper(char_type ch)
599 {
601 }
602 
603 //==================================================================================================
604 template <StandardCharacter CharType>
605 constexpr bool BasicString<CharType>::is_lower(char_type ch)
606 {
608 }
609 
610 //==================================================================================================
611 template <StandardCharacter CharType>
612 constexpr bool BasicString<CharType>::is_digit(char_type ch)
613 {
615 }
616 
617 //==================================================================================================
618 template <StandardCharacter CharType>
619 constexpr auto BasicString<CharType>::to_upper(char_type ch) -> char_type
620 {
622 }
623 
624 //==================================================================================================
625 template <StandardCharacter CharType>
626 constexpr auto BasicString<CharType>::to_lower(char_type ch) -> char_type
627 {
629 }
630 
631 //==================================================================================================
632 template <StandardCharacter CharType>
633 constexpr bool BasicString<CharType>::is_x_digit(char_type ch)
634 {
636 }
637 
638 //==================================================================================================
639 template <StandardCharacter CharType>
640 constexpr bool BasicString<CharType>::is_space(char_type ch)
641 {
643 }
644 
645 //==================================================================================================
646 template <StandardCharacter CharType>
647 auto BasicString<CharType>::split(view_type input, char_type delimiter) -> std::vector<string_type>
648 {
649  return split(input, delimiter, 0);
650 }
651 
652 //==================================================================================================
653 template <StandardCharacter CharType>
654 auto BasicString<CharType>::split(view_type input, char_type delimiter, size_type count)
655  -> std::vector<string_type>
656 {
657  std::vector<string_type> elements;
658  string_type item;
659 
660  size_type start = 0;
661  size_type end = input.find(delimiter);
662 
663  auto push_item = [&elements, &count, &delimiter](view_type str) {
664  if (!str.empty())
665  {
666  if ((count > 0) && (elements.size() == count))
667  {
668  elements.back() += delimiter;
669  elements.back() += str;
670  }
671  else
672  {
673  elements.push_back(string_type(str));
674  }
675  }
676  };
677 
678  while (end != string_type::npos)
679  {
680  item = input.substr(start, end - start);
681  push_item(item);
682 
683  start = end + 1;
684  end = input.find(delimiter, start);
685  }
686 
687  item = input.substr(start, end);
688  push_item(item);
689 
690  return elements;
691 }
692 
693 //==================================================================================================
694 template <StandardCharacter CharType>
695 void BasicString<CharType>::trim(string_type &target)
696 {
697  auto is_non_space = [](auto ch) {
698  return !is_space(ch);
699  };
700 
701  // Remove leading whitespace.
702  target.erase(target.begin(), std::find_if(target.begin(), target.end(), is_non_space));
703 
704  // Remove trailing whitespace.
705  target.erase(std::find_if(target.rbegin(), target.rend(), is_non_space).base(), target.end());
706 }
707 
708 //==================================================================================================
709 template <StandardCharacter CharType>
710 void BasicString<CharType>::replace_all(string_type &target, view_type search, char_type replace)
711 {
712  size_type index = target.find(search);
713 
714  while (!search.empty() && (index != string_type::npos))
715  {
716  target.replace(index, search.size(), 1, replace);
717  index = target.find(search);
718  }
719 }
720 
721 //==================================================================================================
722 template <StandardCharacter CharType>
723 void BasicString<CharType>::replace_all(string_type &target, view_type search, view_type replace)
724 {
725  size_type index = target.find(search);
726 
727  while (!search.empty() && (index != string_type::npos))
728  {
729  target.replace(index, search.size(), replace);
730  index = target.find(search);
731  }
732 }
733 
734 //==================================================================================================
735 template <StandardCharacter CharType>
736 void BasicString<CharType>::remove_all(string_type &target, view_type search)
737 {
738  replace_all(target, search, view_type {});
739 }
740 
741 //==================================================================================================
742 template <StandardCharacter CharType>
743 bool BasicString<CharType>::wildcard_match(view_type source, view_type search)
744 {
745  static constexpr char_type s_wildcard = '*';
746  bool result = !search.empty();
747 
748  const std::vector<string_type> segments = split(search, s_wildcard);
749  size_type index = 0;
750 
751  if (!segments.empty())
752  {
753  if (result && (search.front() != s_wildcard))
754  {
755  result = source.starts_with(segments.front());
756  }
757  if (result && (search.back() != s_wildcard))
758  {
759  result = source.ends_with(segments.back());
760  }
761 
762  for (auto it = segments.begin(); result && (it != segments.end()); ++it)
763  {
764  index = source.find(*it, index);
765 
766  if (index == string_type::npos)
767  {
768  result = false;
769  }
770  }
771  }
772 
773  return result;
774 }
775 
776 //==================================================================================================
777 template <StandardCharacter CharType>
778 inline bool BasicString<CharType>::validate(view_type value)
779 {
780  auto it = value.cbegin();
781  const auto end = value.cend();
782 
783  return unicode::validate_encoding(it, end);
784 }
785 
786 //==================================================================================================
787 template <StandardCharacter CharType>
788 template <typename IteratorType>
789 inline auto BasicString<CharType>::decode_codepoint(IteratorType &it, const IteratorType &end)
790  -> std::optional<codepoint_type>
791 {
792  return unicode::decode_codepoint(it, end);
793 }
794 
795 //==================================================================================================
796 template <StandardCharacter CharType>
797 inline auto BasicString<CharType>::encode_codepoint(codepoint_type codepoint)
798  -> std::optional<string_type>
799 {
800  return unicode::encode_codepoint(codepoint);
801 }
802 
803 //==================================================================================================
804 template <StandardCharacter CharType>
805 template <char UnicodePrefix>
806 requires fly::UnicodePrefixCharacter<UnicodePrefix>
807 auto BasicString<CharType>::escape_all_codepoints(view_type value) -> std::optional<string_type>
808 {
809  string_type result;
810  result.reserve(value.size());
811 
812  const auto end = value.cend();
813 
814  for (auto it = value.cbegin(); it != end;)
815  {
816  if (auto escaped = escape_codepoint<UnicodePrefix>(it, end); escaped)
817  {
818  result += *std::move(escaped);
819  }
820  else
821  {
822  return std::nullopt;
823  }
824  }
825 
826  return result;
827 }
828 
829 //==================================================================================================
830 template <StandardCharacter CharType>
831 template <char UnicodePrefix, typename IteratorType>
832 requires fly::UnicodePrefixCharacter<UnicodePrefix>
833 inline auto BasicString<CharType>::escape_codepoint(IteratorType &it, const IteratorType &end)
834  -> std::optional<string_type>
835 {
836  return unicode::template escape_codepoint<UnicodePrefix>(it, end);
837 }
838 
839 //==================================================================================================
840 template <StandardCharacter CharType>
841 auto BasicString<CharType>::unescape_all_codepoints(view_type value) -> std::optional<string_type>
842 {
843  string_type result;
844  result.reserve(value.size());
845 
846  const auto end = value.cend();
847 
848  for (auto it = value.cbegin(); it != end;)
849  {
850  if ((*it == '\\') && ((it + 1) != end))
851  {
852  switch (*(it + 1))
853  {
854  case FLY_CHR(char_type, 'u'):
855  case FLY_CHR(char_type, 'U'):
856  {
857  if (auto unescaped = unescape_codepoint(it, end); unescaped)
858  {
859  result += *std::move(unescaped);
860  }
861  else
862  {
863  return std::nullopt;
864  }
865 
866  break;
867  }
868 
869  default:
870  result += *(it++);
871  break;
872  }
873  }
874  else
875  {
876  result += *(it++);
877  }
878  }
879 
880  return result;
881 }
882 
883 //==================================================================================================
884 template <StandardCharacter CharType>
885 template <typename IteratorType>
886 inline auto BasicString<CharType>::unescape_codepoint(IteratorType &it, const IteratorType &end)
887  -> std::optional<string_type>
888 {
889  return unicode::unescape_codepoint(it, end);
890 }
891 
892 //==================================================================================================
893 template <StandardCharacter CharType>
894 auto BasicString<CharType>::generate_random_string(size_type length) -> string_type
895 {
896  using short_distribution = std::uniform_int_distribution<short>;
897 
898  constexpr auto limit = static_cast<short_distribution::result_type>(s_alpha_num_length - 1);
899  static_assert(limit > 0);
900 
901  static thread_local const auto s_now = std::chrono::system_clock::now().time_since_epoch();
902  static thread_local const auto s_seed = static_cast<std::mt19937::result_type>(s_now.count());
903 
904  static thread_local std::mt19937 s_engine(s_seed);
905  short_distribution distribution(0, limit);
906 
907  string_type result;
908  result.reserve(length);
909 
910  while (length-- != 0)
911  {
912  result.push_back(s_alpha_num[distribution(s_engine)]);
913  }
914 
915  return result;
916 }
917 
918 //==================================================================================================
919 template <StandardCharacter CharType>
920 template <typename... ParameterTypes>
921 inline auto
922 BasicString<CharType>::format(FormatString<ParameterTypes...> &&fmt, ParameterTypes &&...parameters)
923  -> string_type
924 {
925  string_type formatted;
926  formatted.reserve(fmt.context().view().size() * 2);
927 
928  format_to(
929  std::back_inserter(formatted),
930  std::move(fmt),
931  std::forward<ParameterTypes>(parameters)...);
932 
933  return formatted;
934 }
935 
936 //==================================================================================================
937 template <StandardCharacter CharType>
938 template <typename OutputIterator, typename... ParameterTypes>
940  OutputIterator output,
942  ParameterTypes &&...parameters)
943 {
945  using FormatParseContext = detail::BasicFormatParseContext<char_type>;
946 
947  FormatParseContext &parse_context = fmt.context();
948  const view_type view = parse_context.view();
949 
950  if (parse_context.has_error())
951  {
952  format_to(
953  output,
954  FLY_ARR(char_type, "Ignored invalid formatter: {}"),
955  parse_context.error());
956 
957  return;
958  }
959 
960  auto params =
961  detail::make_format_parameters<FormatContext>(std::forward<ParameterTypes>(parameters)...);
962  FormatContext context(output, params);
963 
964  for (std::size_t pos = 0; pos < view.size();)
965  {
966  switch (const auto &ch = view[pos])
967  {
968  case s_left_brace:
969  if (view[pos + 1] == s_left_brace)
970  {
971  *context.out()++ = ch;
972  pos += 2;
973  }
974  else
975  {
976  auto specifier = *std::move(fmt.next_specifier());
977  pos += specifier.m_size;
978 
979  const auto parameter = context.arg(specifier.m_position);
980  parameter.format(parse_context, context, std::move(specifier));
981  }
982  break;
983 
984  case s_right_brace:
985  *context.out()++ = ch;
986  pos += 2;
987  break;
988 
989  default:
990  *context.out()++ = ch;
991  ++pos;
992  break;
993  }
994  }
995 }
996 
997 //==================================================================================================
998 template <StandardCharacter CharType>
999 template <typename... Args>
1000 inline auto BasicString<CharType>::join(char_type separator, Args &&...args) -> string_type
1001 {
1002  string_type result;
1003  join_internal(result, separator, std::forward<Args>(args)...);
1004 
1005  return result;
1006 }
1007 
1008 //==================================================================================================
1009 template <StandardCharacter CharType>
1010 template <typename T, typename... Args>
1011 inline void BasicString<CharType>::join_internal(
1012  string_type &result,
1013  char_type separator,
1014  T &&value,
1015  Args &&...args)
1016 {
1017  result += format(FLY_ARR(char_type, "{}{}"), std::forward<T>(value), separator);
1018  join_internal(result, separator, std::forward<Args>(args)...);
1019 }
1020 
1021 //==================================================================================================
1022 template <StandardCharacter CharType>
1023 template <typename T>
1024 inline void BasicString<CharType>::join_internal(string_type &result, char_type, T &&value)
1025 {
1026  result += format(FLY_ARR(char_type, "{}"), std::forward<T>(value));
1027 }
1028 
1029 //==================================================================================================
1030 template <StandardCharacter CharType>
1031 template <typename T>
1032 std::optional<T> BasicString<CharType>::convert(const string_type &value)
1033 {
1034  if constexpr (StandardString<T>)
1035  {
1036  return unicode::template convert_encoding<T>(value);
1037  }
1038  else if constexpr (fly::SameAs<char_type, char>)
1039  {
1040  return detail::Converter<T>::convert(value);
1041  }
1042  else
1043  {
1044  if (auto result = unicode::template convert_encoding<std::string>(value); result)
1045  {
1046  return detail::Converter<T>::convert(*result);
1047  }
1048 
1049  return std::nullopt;
1050  }
1051 }
1052 
1053 } // namespace fly
Definition: string.hpp:51
static void remove_all(string_type &target, view_type search)
Definition: string.hpp:736
static string_type format(FormatString< ParameterTypes... > &&fmt, ParameterTypes &&...parameters)
static constexpr bool is_upper(char_type ch)
Definition: string.hpp:598
static std::optional< string_type > unescape_codepoint(IteratorType &it, const IteratorType &end)
static std::optional< T > convert(const string_type &value)
Definition: string.hpp:1032
static void replace_all(string_type &target, view_type search, char_type replace)
Definition: string.hpp:710
requires static fly::UnicodePrefixCharacter< UnicodePrefix > std::optional< string_type > escape_all_codepoints(view_type value)
static constexpr bool is_space(char_type ch)
Definition: string.hpp:640
static constexpr size_type size(T &&value)
static std::optional< string_type > unescape_all_codepoints(view_type value)
Definition: string.hpp:841
static constexpr bool is_alpha(char_type ch)
Definition: string.hpp:591
static string_type generate_random_string(size_type length)
Definition: string.hpp:894
static std::optional< string_type > encode_codepoint(codepoint_type codepoint)
Definition: string.hpp:797
static constexpr char_type to_lower(char_type ch)
Definition: string.hpp:626
static constexpr bool is_digit(char_type ch)
Definition: string.hpp:612
static bool validate(view_type value)
Definition: string.hpp:778
static void trim(string_type &target)
Definition: string.hpp:695
static std::optional< codepoint_type > decode_codepoint(IteratorType &it, const IteratorType &end)
static void format_to(OutputIterator output, FormatString< ParameterTypes... > &&fmt, ParameterTypes &&...parameters)
Definition: string.hpp:939
static constexpr char_type to_upper(char_type ch)
Definition: string.hpp:619
static string_type join(char_type separator, Args &&...args)
static std::vector< string_type > split(view_type input, char_type delimiter)
Definition: string.hpp:647
requires static fly::UnicodePrefixCharacter< UnicodePrefix > std::optional< string_type > escape_codepoint(IteratorType &it, const IteratorType &end)
static constexpr bool is_lower(char_type ch)
Definition: string.hpp:605
static bool wildcard_match(view_type source, view_type search)
Definition: string.hpp:743
static constexpr bool is_x_digit(char_type ch)
Definition: string.hpp:633
static constexpr bool is_digit(CharType ch)
Definition: classifier.hpp:261
static constexpr bool is_lower(CharType ch)
Definition: classifier.hpp:230
static constexpr bool is_space(CharType ch)
Definition: classifier.hpp:276
static constexpr CharType to_upper(CharType ch)
Definition: classifier.hpp:237
static constexpr CharType to_lower(CharType ch)
Definition: classifier.hpp:249
static constexpr bool is_alpha(CharType ch)
Definition: classifier.hpp:216
static constexpr bool is_x_digit(CharType ch)
Definition: classifier.hpp:268
static constexpr size_type size(T &&value)
static constexpr bool is_upper(CharType ch)
Definition: classifier.hpp:223
Definition: format_context.hpp:21
Definition: format_parse_context.hpp:22
constexpr view_type view() const
Definition: format_parse_context.hpp:179
Definition: format_string.hpp:36
Definition: unicode.hpp:31
Definition: traits.hpp:18
Definition: converter.hpp:26