29 #include <boost/algorithm/string.hpp>
30 #include <boost/lexical_cast.hpp>
31 #include <boost/tokenizer.hpp>
38 using NdArray::NdArray;
50 size_t comment_pos = line.
find(comment);
51 if (comment_pos != std::string::npos) {
52 line = line.
substr(0, comment_pos);
73 if (keyword ==
"bool" || keyword ==
"boolean") {
75 }
else if (keyword ==
"int" || keyword ==
"int32") {
76 return typeid(int32_t);
77 }
else if (keyword ==
"long" || keyword ==
"int64") {
78 return typeid(int64_t);
79 }
else if (keyword ==
"float") {
81 }
else if (keyword ==
"double") {
82 return typeid(double);
83 }
else if (keyword ==
"string") {
85 }
else if (keyword ==
"[bool]" || keyword ==
"[boolean]") {
87 }
else if (keyword ==
"[int]" || keyword ==
"[int32]") {
89 }
else if (keyword ==
"[long]" || keyword ==
"[int64]") {
91 }
else if (keyword ==
"[float]") {
93 }
else if (keyword ==
"[double]") {
95 }
else if (keyword ==
"[int+]" || keyword ==
"[int32+]") {
96 return typeid(NdArray<int32_t>);
97 }
else if (keyword ==
"[long+]" || keyword ==
"[int64+]") {
98 return typeid(NdArray<int64_t>);
99 }
else if (keyword ==
"[float+]") {
100 return typeid(NdArray<float>);
101 }
else if (keyword ==
"[double+]") {
102 return typeid(NdArray<double>);
117 if (boost::starts_with(line, comment)) {
120 boost::replace_all(line, comment,
"");
122 if (boost::starts_with(line,
"Column:")) {
130 if (descriptions.
count(name) != 0) {
136 if (!boost::starts_with(token,
"(") && token !=
"-") {
143 if (boost::starts_with(token,
"(")) {
150 if (line_stream && token ==
"-") {
154 while (line_stream) {
155 desc << token <<
' ';
159 boost::trim(desc_str);
186 if (boost::starts_with(line, comment)) {
189 boost::replace_all(line, comment,
"");
194 if (boost::starts_with(line,
"Column:")) {
198 auto space_i = temp.
find(
' ');
200 temp = temp.
substr(0, space_i);
202 desc_names.emplace_back(
std::move(temp));
210 if (!last_comment.empty()) {
214 while (line_stream) {
215 names.push_back(token);
218 if (names.size() != columns_number) {
225 if (desc_names.size() != 0 && desc_names.size() != columns_number) {
226 logger.
warn() <<
"Number of column descriptions does not matches the number"
227 <<
" of the columns";
232 if (names.size() < columns_number) {
233 for (
size_t i = names.size() + 1; i <= columns_number; ++i) {
239 for (
auto name : names) {
240 if (!set.insert(name).second) {
249 template <
typename T>
252 boost::char_separator<char> sep{
","};
253 boost::tokenizer<boost::char_separator<char>> tok{str, sep};
254 for (
auto& s : tok) {
260 template <
typename T>
261 NdArray<T> convertStringToNdArray(
const std::string& str) {
264 }
else if (str[0] !=
'<') {
268 auto closing_char = str.
find(
'>');
269 if (closing_char == std::string::npos) {
273 auto shape_str = str.
substr(1, closing_char - 1);
274 auto shape_i = convertStringToVector<int32_t>(shape_str);
275 auto data = convertStringToVector<T>(str.
substr(closing_char + 1));
279 return NdArray<T>(shape_u, data);
286 if (type ==
typeid(
bool)) {
287 if (value ==
"true" || value ==
"t" || value ==
"yes" || value ==
"y" || value ==
"1") {
290 if (value ==
"false" || value ==
"f" || value ==
"no" || value ==
"n" || value ==
"0") {
293 }
else if (type ==
typeid(int32_t)) {
295 }
else if (type ==
typeid(int64_t)) {
297 }
else if (type ==
typeid(
float)) {
299 }
else if (type ==
typeid(
double)) {
313 }
else if (type ==
typeid(NdArray<int32_t>)) {
315 }
else if (type ==
typeid(NdArray<int64_t>)) {
317 }
else if (type ==
typeid(NdArray<float>)) {
319 }
else if (type ==
typeid(NdArray<double>)) {
322 }
catch (boost::bad_lexical_cast
const&) {
333 size_t comment_pos = line.
find(comment);
334 if (comment_pos != std::string::npos) {
335 line = line.
substr(0, comment_pos);
351 size_t comment_pos = line.
find(comment);
352 if (comment_pos != std::string::npos) {
353 line = line.
substr(0, comment_pos);
std::size_t countRemainingRows(std::istream &in, const std::string &comment)
T forward_as_tuple(T...args)
static Elements::Logging logger
Row::cell_type convertToCellType(const std::string &value, std::type_index type)
Converts the given value to a Row::cell_type of the given type.
std::type_index keywordToType(const std::string &keyword)
void warn(const std::string &logMessage)
std::map< std::string, ColumnDescription > autoDetectColumnDescriptions(std::istream &in, const std::string &comment)
Reads the column descriptions of the given stream.
This class gets a stream as argument during construction and when it is deleted it sets the position ...
boost::variant< bool, int32_t, int64_t, float, double, std::string, std::vector< bool >, std::vector< int32_t >, std::vector< int64_t >, std::vector< float >, std::vector< double >, NdArray::NdArray< int32_t >, NdArray::NdArray< int64_t >, NdArray::NdArray< float >, NdArray::NdArray< double > > cell_type
The possible cell types.
bool hasNextRow(std::istream &in, const std::string &comment)
std::string quoted(const std::string &str)
T back_inserter(T...args)
static Logging getLogger(const std::string &name="")
size_t countColumns(std::istream &in, const std::string &comment)
Returns the number of whitespace separated tokens of the first non commented line.
std::vector< std::string > autoDetectColumnNames(std::istream &in, const std::string &comment, size_t columns_number)
Reads the column names of the given stream.