Line data Source code
1 : % Copyright (C) 2012-2021,2022 John E. Davis
2 : %
3 : % This file is part of the S-Lang Library and may be distributed under the
4 : % terms of the GNU General Public License. See the file COPYING for
5 : % more information.
6 : %---------------------------------------------------------------------------
7 1 : import ("csv");
8 :
9 : private define read_fp_callback (in_quote, info)
10 : {
11 117 : variable line, comment_char = info.comment_char;
12 : forever
13 : {
14 119 : if (-1 == fgets (&line, info.fp))
15 14 : return NULL;
16 :
17 105 : info.line_num++;
18 105 : if ((line[0] == comment_char)
19 : && (in_quote == 0)
20 : && (0 == strnbytecmp (line, info.comment, info.comment_len)))
21 2 : continue;
22 :
23 103 : return line;
24 : }
25 : }
26 :
27 : private define read_strings_callback (in_quote, str_info)
28 : {
29 : variable line;
30 :
31 39 : if (str_info.output_crlf)
32 : {
33 0 : str_info.output_crlf = 0;
34 0 : return "\n";
35 : }
36 39 : variable i = str_info.i;
37 39 : if (i >= str_info.n)
38 3 : return NULL;
39 36 : line = str_info.strings[i];
40 36 : str_info.i = i+1;
41 36 : if (line[-1] != '\n')
42 0 : str_info.output_crlf = 1;
43 :
44 36 : str_info.line_num++;
45 36 : return line;
46 : }
47 :
48 : private define resize_arrays (arrays, n)
49 : {
50 15 : _for (0, length(arrays)-1, 1)
51 : {
52 39 : variable i = ();
53 39 : variable a = arrays[i];
54 39 : variable m = length(a);
55 54 : if (m == n) continue;
56 24 : if (m > n)
57 : {
58 24 : arrays[i] = a[[:n-1]];
59 24 : continue;
60 : }
61 0 : variable b = _typeof(a)[n];
62 0 : b[[:m-1]] = a;
63 0 : arrays[i] = b;
64 : }
65 : }
66 :
67 : private define merge_column_arrays (list_of_column_arrays)
68 : {
69 15 : variable j, n = length (list_of_column_arrays);
70 15 : variable column_arrays = list_of_column_arrays[0];
71 15 : variable i, ncols = length (column_arrays);
72 15 : variable merged = {};
73 15 : _for i (0, ncols-1, 1)
74 : {
75 39 : variable array_list = {};
76 39 : _for j (0, n-1, 1)
77 : {
78 48 : column_arrays = list_of_column_arrays[j];
79 48 : list_append (array_list, column_arrays[i]);
80 : }
81 39 : list_append (merged, [__push_list(__tmp(array_list))]);
82 : }
83 15 : return merged;
84 : }
85 :
86 : private define atofloat (x)
87 : {
88 20 : typecast (atof(x), Float_Type);
89 : }
90 :
91 : private define get_blankrows_bits (val)
92 : {
93 60 : if (val == "skip") return CSV_SKIP_BLANK_ROWS;
94 0 : if (val == "stop") return CSV_STOP_BLANK_ROWS;
95 0 : return 0;
96 : }
97 :
98 : private define read_row (csv)
99 : {
100 : % The blank row handling default is to use that of the csv object.
101 15 : if (qualifier_exists ("blankrows"))
102 : {
103 0 : return _csv_decode_row (csv.decoder,
104 : get_blankrows_bits (qualifier("blankrows")));
105 : }
106 15 : return _csv_decode_row (csv.decoder);
107 : }
108 :
109 : private define fixup_header_names (names)
110 : {
111 50 : if (names == NULL) return names;
112 30 : if (typeof (names) == List_Type)
113 0 : names = list_to_array (names);
114 30 : if (_typeof(names) != String_Type)
115 3 : return names;
116 :
117 27 : variable is_scalar = (typeof (names) != Array_Type);
118 27 : if (is_scalar)
119 8 : names = [names];
120 :
121 27 : names = strlow (names);
122 27 : variable i = where (names == "");
123 27 : names[i] = array_map (String_Type, &sprintf, "col%d", i+1);
124 :
125 27 : names = strtrim (names); % strip leading/trailing WS
126 35 : if (is_scalar) names = names[0];
127 27 : return names;
128 : }
129 :
130 : private define pop_columns_as_array (n)
131 : {
132 6 : if (n == 0)
133 0 : return String_Type[0];
134 :
135 : try
136 : {
137 : % allow a mixture of arrays and scalars
138 6 : variable columns = __pop_list (n);
139 6 : columns = [__push_list(columns)];
140 6 : return columns;
141 : }
142 : catch TypeMismatchError:
143 : {
144 0 : throw TypeMismatchError, "Column arguments cannot be a mixture of ints and strings";
145 : }
146 : }
147 :
148 :
149 : private define read_cols ()
150 : {
151 15 : if ((_NARGS == 0) || (qualifier_exists ("help")))
152 : {
153 0 : usage("struct = .readcol ([columns] ; qualifiers)\n\
154 : where columns is an optional 1-based array of column numbers,\n\
155 : or array of column names.\n\
156 : Qualifiers:\n\
157 : header=header, fields=[array of field names],\n\
158 : type=value|array|string of 's','i','l','f','d' (str,int,long,float,dbl)\n\
159 : typeNTH=val (specifiy type for NTH column)\n\
160 : snan=\"\", inan=0, lnan=0L, fnan=_NaN, dnan=_NaN (defaults for empty fields),\n\
161 : nanNTH=val (value used for an empty field in the NTH column\n\
162 : init_size=int (number of rows to initially read)\n\
163 : "
164 : );
165 : }
166 :
167 15 : variable columns = NULL;
168 15 : if (_NARGS > 1)
169 : {
170 6 : columns = pop_columns_as_array (_NARGS-1);
171 : }
172 15 : variable csv = ();
173 :
174 15 : variable fields = qualifier ("fields");
175 15 : variable header = qualifier ("header");
176 15 : variable types = qualifier ("type");
177 15 : variable snan = qualifier ("snan", "");
178 15 : variable dnan = qualifier ("dnan", _NaN);
179 15 : variable fnan = qualifier ("fnan", typecast(_NaN,Float_Type));
180 15 : variable inan = qualifier ("inan", 0);
181 15 : variable lnan = qualifier ("lnan", 0L);
182 15 : variable init_size = qualifier ("init_size", 0x8000);
183 15 : if (init_size <= 0) init_size = 0x8000;
184 :
185 15 : if ((fields != NULL) && (columns != NULL)
186 : && (length(fields) != length(columns)))
187 0 : throw InvalidParmError, "The fields qualifier must be the same size as the number of columns";
188 :
189 15 : variable flags = get_blankrows_bits (qualifier("blankrows", "skip"));
190 :
191 15 : header = fixup_header_names (header);
192 15 : columns = fixup_header_names (columns);
193 :
194 15 : variable columns_are_string = _typeof(columns) == String_Type;
195 :
196 15 : if ((header == NULL) && columns_are_string)
197 0 : throw InvalidParmError, "No header was supplied to map column names";
198 :
199 15 : variable column_ints = columns, col, i, j;
200 15 : if (columns_are_string)
201 : {
202 3 : column_ints = Int_Type[length(columns)];
203 3 : _for i (0, length(columns)-1, 1)
204 : {
205 6 : col = columns[i];
206 6 : j = wherefirst (col == header);
207 6 : if (j == NULL)
208 0 : throw InvalidParmError, "Unknown (canonical) column name $col"$;
209 6 : column_ints[i] = j+1;
210 : }
211 : }
212 :
213 15 : variable datastruct = NULL, ncols, row_data, e;
214 : try (e)
215 : {
216 15 : row_data = _csv_decode_row (csv.decoder, flags);
217 : }
218 : catch AnyError:
219 : {
220 0 : throw e.error, sprintf ("Error encountered decoding line %S: %S", csv.func_data.line_num, e.message);
221 : }
222 :
223 15 : variable nread = 0;
224 15 : if (row_data != NULL)
225 : {
226 13 : nread++;
227 :
228 13 : if (column_ints == NULL)
229 7 : column_ints = [1:length(row_data)];
230 :
231 13 : if (any(column_ints>length(row_data)))
232 : {
233 0 : throw InvalidParmError, "column number is too large for data";
234 : }
235 : }
236 :
237 15 : if (column_ints == NULL)
238 : {
239 2 : if (fields != NULL)
240 0 : ncols = length(fields);
241 2 : else if (columns_are_string)
242 0 : ncols = length(columns);
243 2 : else if (header != NULL)
244 2 : ncols = length (header);
245 : else
246 0 : throw RunTimeError, "Insufficient information to determine the number of columns in the CSV file";
247 :
248 2 : column_ints = [1:ncols];
249 : }
250 :
251 15 : if (fields == NULL)
252 : {
253 15 : if (columns_are_string)
254 3 : fields = columns;
255 12 : else if (header != NULL)
256 11 : fields = header[column_ints-1];
257 : else
258 1 : fields = array_map(String_Type, &sprintf, "col%d", column_ints);
259 : }
260 15 : ncols = length(fields);
261 15 : datastruct = @Struct_Type(fields);
262 :
263 15 : column_ints -= 1; % make 0-based
264 :
265 15 : variable convert_funcs = Ref_Type[ncols], convert_func, val;
266 69 : variable nan_values = {}; loop(ncols) list_append(nan_values, snan);
267 :
268 15 : if (types == NULL)
269 : {
270 13 : types = qualifier_exists ("auto") ? 'A' : 's';
271 : }
272 :
273 15 : if (typeof(types) == List_Type)
274 0 : types = list_to_array (types);
275 :
276 15 : if (typeof(types) == String_Type)
277 1 : types = bstring_to_array (types);
278 :
279 15 : if ((typeof(types) == Array_Type) && (length(types) != ncols))
280 0 : throw InvalidParmError, "types array must be equal to the number of columns";
281 :
282 15 : if (typeof (types) != Array_Type)
283 14 : types = types[Int_Type[ncols]]; % single (default) type specified
284 :
285 : variable i1;
286 15 : _for i (1, ncols, 1)
287 : {
288 39 : i1 = i-1;
289 39 : val = qualifier ("type$i"$, types[i1]);
290 :
291 39 : types[i1] = val;
292 : }
293 :
294 15 : i = where(types=='i');
295 30 : convert_funcs[i] = &atoi; nan_values[i] = typecast(inan, Int_Type);
296 15 : i = where(types=='l');
297 30 : convert_funcs[i] = &atol; nan_values[i] = typecast(lnan, Long_Type);
298 15 : i = where(types=='f');
299 30 : convert_funcs[i] = &atofloat; nan_values[i] = typecast (fnan, Float_Type);
300 15 : i = where(types=='d');
301 30 : convert_funcs[i] = &atof; nan_values[i] = typecast(dnan, Double_Type);
302 :
303 15 : _for i (1, ncols, 1)
304 : {
305 39 : i1 = i-1;
306 :
307 39 : if (types[i1] == 'A')
308 : {
309 3 : variable type = _slang_guess_type (row_data[i1]);
310 3 : if (type == Double_Type)
311 : {
312 2 : convert_funcs[i1] = &atof;
313 2 : nan_values[i1] = dnan;
314 : }
315 1 : else if (type == Int_Type)
316 : {
317 1 : convert_funcs[i1] = &atoi;
318 1 : nan_values[i1] = inan;
319 : }
320 : }
321 :
322 39 : val = nan_values[i1];
323 39 : nan_values[i1] = typecast (qualifier ("nan$i"$, val), typeof(val));
324 : }
325 :
326 15 : variable column_arrays = Array_Type[ncols], array;
327 15 : variable dsize = init_size;
328 15 : variable max_allocated = init_size;
329 15 : variable list_of_column_arrays = {};
330 15 : _for i (0, ncols-1, 1)
331 : {
332 39 : if (row_data == NULL)
333 : {
334 6 : column_arrays[i] = typeof(nan_values[i])[0];
335 6 : continue;
336 : }
337 :
338 33 : val = row_data[column_ints[i]];
339 33 : array = typeof(nan_values[i])[max_allocated];
340 33 : ifnot (strbytelen(val))
341 0 : val = nan_values[i];
342 : else
343 : {
344 33 : convert_func = convert_funcs[i];
345 33 : if (convert_func != NULL)
346 5 : val = (@convert_func)(val);
347 : }
348 33 : array[0] = val;
349 33 : column_arrays[i] = array;
350 : }
351 15 : list_append (list_of_column_arrays, column_arrays);
352 :
353 15 : variable min_row_size = 1+max(column_ints);
354 : forever
355 : {
356 : try (e)
357 : {
358 43 : row_data = _csv_decode_row (csv.decoder, flags);
359 : }
360 : catch AnyError:
361 : {
362 0 : throw e.error, sprintf ("Error encountered decoding line %S: %S", csv.func_data.line_num, e.message);
363 : }
364 58 : if (row_data == NULL) break;
365 :
366 28 : if (length (row_data) < min_row_size)
367 : {
368 : % FIXME-- make what to do here configurable
369 0 : if (length(row_data) == 0)
370 0 : break;
371 :
372 0 : continue;
373 : }
374 :
375 28 : if (nread >= max_allocated)
376 : {
377 3 : column_arrays = Array_Type[ncols];
378 3 : _for i (0, ncols-1, 1)
379 9 : column_arrays[i] = _typeof(list_of_column_arrays[0][i])[max_allocated];
380 3 : list_append (list_of_column_arrays, column_arrays);
381 3 : nread = 0;
382 : }
383 :
384 28 : _for i (0, ncols-1, 1)
385 : {
386 94 : val = row_data[column_ints[i]];
387 94 : ifnot (strbytelen(val))
388 : {
389 0 : column_arrays[i][nread] = nan_values[i];
390 0 : continue;
391 : }
392 94 : convert_func = convert_funcs[i];
393 94 : if (convert_func == NULL)
394 : {
395 49 : column_arrays[i][nread] = val;
396 49 : continue;
397 : }
398 45 : column_arrays[i][nread] = (@convert_func)(val);
399 : }
400 28 : nread++;
401 : }
402 15 : resize_arrays (__tmp(column_arrays), nread);
403 15 : list_of_column_arrays = merge_column_arrays (__tmp(list_of_column_arrays));
404 :
405 15 : set_struct_fields (datastruct, __push_list(list_of_column_arrays));
406 15 : return datastruct;
407 : }
408 :
409 : define csv_decoder_new ()
410 : {
411 15 : if (_NARGS != 1)
412 0 : usage ("\
413 : obj = csv_decoder_new (file|fp|strings ; qualifiers);\n\
414 : Qualifiers:\n\
415 : quote='\"', delim=',', skiplines=0, comment=string");
416 :
417 15 : variable fp = ();
418 30 : variable type = typeof(fp), file = fp;
419 15 : variable func = &read_fp_callback;
420 : variable func_data;
421 :
422 15 : variable skiplines = qualifier("skiplines", 0);
423 15 : variable delim = qualifier("delim", ',');
424 15 : variable quote = qualifier("quote", '"');
425 15 : variable comment = qualifier("comment", NULL);
426 15 : variable comment_char = (comment == NULL) ? NULL : comment[0];
427 15 : variable flags = get_blankrows_bits (qualifier("blankrows", "skip"));
428 :
429 15 : if ((type == Array_Type) || (type == List_Type))
430 : {
431 3 : func = &read_strings_callback;
432 3 : func_data = struct
433 : {
434 3 : strings = fp,
435 3 : line_num = skiplines,
436 6 : i = skiplines, n = length(fp),
437 3 : output_crlf = 0,
438 3 : comment_char = comment_char,
439 3 : comment = comment,
440 : };
441 : }
442 : else
443 : {
444 : variable line;
445 12 : if (type != File_Type)
446 : {
447 9 : fp = fopen (file, "r");
448 9 : if (fp == NULL)
449 0 : throw OpenError, "Unable to open CSV file '$file'"$;
450 :
451 : % Ignore a BOM if it exists
452 9 : if (-1 != fgets (&line, fp))
453 : {
454 8 : if (0 == strnbytecmp (line, "\xEF\xBB\xBF", 3))
455 0 : () = fseek (fp, 3, SEEK_SET);
456 : else
457 8 : () = fseek (fp, 0, SEEK_SET);
458 : }
459 : }
460 :
461 12 : func_data = struct
462 : {
463 12 : fp = fp,
464 12 : line_num = skiplines,
465 12 : comment_char = comment_char,
466 12 : comment = comment,
467 12 : comment_len = ((comment == NULL) ? 0 : strbytelen(comment)),
468 : };
469 12 : loop (skiplines)
470 0 : () = fgets (&line, fp);
471 : }
472 :
473 15 : variable csv = struct
474 : {
475 15 : decoder = _csv_decoder_new (func, func_data, delim, quote, flags),
476 15 : readrow = &read_row,
477 15 : readcol = &read_cols,
478 15 : func_data = func_data,
479 : };
480 :
481 15 : return csv;
482 : }
483 :
484 : % Encoder
485 :
486 : private define writecol ()
487 : {
488 3 : if ((_NARGS < 3) || qualifier_exists("help"))
489 : {
490 0 : usage("\
491 : writecol (file|fp, list_of_column_data | datastruct | col1,col2,...)\n\
492 : Qualifiers:\n\
493 : names=array-of-column-names, noheader, quoteall, quotesome, rdb\n\
494 : "
495 : );
496 : }
497 :
498 : variable csv, data, file;
499 3 : if (_NARGS == 3)
500 : {
501 3 : (csv, file, data) = ();
502 : }
503 : else
504 : {
505 0 : data = __pop_list (_NARGS-2);
506 0 : (csv, file) = ();
507 : }
508 :
509 3 : variable type = typeof (data);
510 3 : if ((type != List_Type) && (type != Array_Type)
511 : && not is_struct_type (data))
512 0 : data = {data};
513 :
514 3 : variable flags = 0;
515 3 : if (qualifier_exists ("quoteall")) flags |= CSV_QUOTE_ALL;
516 3 : if (qualifier_exists ("quotesome")) flags |= CSV_QUOTE_SOME;
517 3 : variable rdb = qualifier_exists ("rdb");
518 :
519 3 : variable fp = file;
520 3 : if (typeof(file) != File_Type)
521 3 : fp = fopen (file, "wb");
522 3 : if (fp == NULL)
523 0 : throw OpenError, "Error opening $file in write mode"$;
524 :
525 3 : variable names = NULL;
526 3 : ifnot (qualifier_exists ("noheader"))
527 : {
528 3 : names = qualifier ("names");
529 3 : if ((names == NULL) && is_struct_type (data))
530 3 : names = get_struct_field_names (data);
531 : }
532 :
533 3 : if (is_struct_type (data))
534 : {
535 3 : variable tmp = {};
536 3 : data = {(_push_struct_field_values(data), pop())};
537 3 : list_reverse (data);
538 : }
539 :
540 : EXIT_BLOCK
541 : {
542 3 : ifnot (__is_same(file, fp))
543 : {
544 3 : if (-1 == fclose (fp))
545 0 : throw WriteError, "Error closing $file"$;
546 : }
547 : }
548 :
549 3 : variable i, ncols = length(data);
550 3 : if (ncols == 0)
551 : return;
552 :
553 : % The following assumes that data is a list or array of lists or
554 : % array.
555 3 : data = @data;
556 3 : _for i (0, ncols-1, 1)
557 : {
558 10 : variable t = typeof(data[i]);
559 10 : if ((t != List_Type) && (t != Array_Type))
560 0 : data[i] = [data[i]];
561 : }
562 :
563 3 : variable nrows = length(data[0]), j;
564 3 : _for i (1, ncols-1, 1)
565 : {
566 7 : if (nrows != length(data[i]))
567 0 : throw InvalidParmError, "CSV data columns must be the same length";
568 : }
569 :
570 3 : variable str, encoder = csv.encoder;
571 :
572 3 : if (names != NULL)
573 : {
574 3 : if (typeof (names) == List_Type)
575 0 : names = list_to_array (names);
576 3 : str = _csv_encode_row (encoder, names, flags);
577 3 : if (-1 == fputs (str, fp))
578 0 : throw WriteError, "Write to CSV file failed";
579 3 : if (rdb)
580 : {
581 1 : variable types = String_Type[ncols];
582 1 : _for i (0, ncols-1, 1)
583 4 : types[i] = __is_datatype_numeric (_typeof(data[i])) ? "N" : "S";
584 :
585 1 : str = _csv_encode_row (encoder, types, flags);
586 1 : if (-1 == fputs (str, fp))
587 0 : throw WriteError, "Write to CSV file failed";
588 : }
589 : }
590 :
591 3 : variable row_data = String_Type[ncols];
592 3 : _for i (0, nrows-1, 1)
593 : {
594 12 : _for j (0, ncols-1, 1)
595 46 : row_data[j] = string (data[j][i]);
596 :
597 12 : str = _csv_encode_row (encoder, row_data, flags);
598 12 : if (-1 == fputs (str, fp))
599 0 : throw WriteError, "Write to CSV file failed";
600 : }
601 : }
602 :
603 : define csv_encoder_new ()
604 : {
605 3 : if (qualifier_exists ("help"))
606 : {
607 0 : usage ("csv = csv_encoder_new ();\n\
608 : Qualifiers:\n\
609 : delim=','\n\
610 : quote='\"'\n\
611 : quotesome, quoteall\n\
612 : rdb\n\
613 : "
614 : );
615 : }
616 :
617 3 : variable flags = 0;
618 3 : if (qualifier_exists ("quoteall")) flags |= CSV_QUOTE_ALL;
619 3 : if (qualifier_exists ("quotesome")) flags |= CSV_QUOTE_SOME;
620 3 : variable quotechar = qualifier ("quote", '"');
621 3 : variable delimchar = qualifier ("delim",
622 : qualifier_exists ("rdb") ? '\t' : ',');
623 :
624 3 : variable csv = struct
625 : {
626 3 : encoder = _csv_encoder_new (delimchar, quotechar, flags),
627 3 : writecol = &writecol,
628 : };
629 :
630 3 : return csv;
631 : }
632 :
633 : define csv_writecol ()
634 : {
635 3 : if ((_NARGS < 2) || qualifier_exists("help"))
636 : {
637 0 : usage("\
638 : csv_writecol (file|fp, list_of_column_data | datastruct | col1,col2,...)\n\
639 : Qualifiers:\n\
640 : names=array-of-column-names, noheader, quote=val, quoteall, quotesome\n\
641 : "
642 : );
643 : }
644 :
645 3 : variable args = __pop_list (_NARGS);
646 3 : variable csv = csv_encoder_new (;;__qualifiers);
647 3 : csv.writecol (__push_list(args);;__qualifiers);
648 : }
649 :
650 : private define convert_to_numeric (s, name)
651 : {
652 8 : variable val = get_struct_field (s, name);
653 8 : variable num = length (val);
654 8 : if ((num == 0) || (_typeof (val) != String_Type))
655 : return;
656 :
657 : EXIT_BLOCK
658 : {
659 3 : set_struct_field (s, name, val);
660 : }
661 :
662 3 : _for (0, length (val)-1, 1)
663 : {
664 12 : variable i = ();
665 12 : variable type = _slang_guess_type (val[i]);
666 12 : if ((type == Double_Type) || (type == Float_Type))
667 : {
668 2 : val = atof (val);
669 : return;
670 : }
671 10 : if (type == String_Type)
672 : return;
673 : % Otherwise an integer
674 : }
675 :
676 1 : variable lval = atol (val);
677 1 : val = atoi (val);
678 1 : if (any(val != lval))
679 0 : val = lval;
680 : }
681 :
682 : define csv_readcol ()
683 : {
684 15 : if ((_NARGS == 0) || qualifier_exists("help"))
685 : {
686 0 : usage ("struct = csvreadcol (file|fp [,columns] ;qualifier)\n\
687 : where columns is an optional 1-based array of column numbers,\n\
688 : or array of column names.\n\
689 : Qualifiers:\n\
690 : quote='\"', delim=',', skiplines=0, comment=string, has_header,\n\
691 : header=header, fields=[array of field names],\n\
692 : type=value|array of 's','i','l','f','d' (string,int,long,float,double)\n\
693 : typeNTH=val (specifiy type for NTH column)\n\
694 : snan=\"\", inan=0, lnan=0L, fnan=_NaN, dnan=_NaN (defaults for empty fields),\n\
695 : nanNTH=val (value used for an empty field in the NTH column\n\
696 : "
697 : );
698 : }
699 :
700 : variable file, columns;
701 15 : columns = __pop_list (_NARGS-1);
702 15 : file = ();
703 :
704 15 : variable q = __qualifiers ();
705 15 : variable rdb = qualifier_exists ("rdb");
706 :
707 : % rdb files are tab-delimited files, # is a comment character,
708 : % the first non-comment line contains the field names, the
709 : % second line gives the field types.
710 15 : if (rdb)
711 : {
712 8 : q = struct { @q, comment = "#", delim = '\t' };
713 : }
714 15 : variable types = NULL;
715 15 : variable csv = csv_decoder_new (file ;; q);
716 15 : if (rdb || qualifier_exists ("has_header"))
717 : {
718 13 : variable header = csv.readrow ();
719 13 : if (header == NULL)
720 0 : throw ReadError, "Unable to read a CSV header row";
721 :
722 39 : q = struct { header=header, @q };
723 13 : if (rdb)
724 : {
725 : % The type field consists of an integer, followed by a
726 : % type specifier, and a justification character. The
727 : % integer and justification characters are for display
728 : % purposes. The type specifier is N for numberic, S for
729 : % string, M for month. Here, M and S will be treated the
730 : % same.
731 2 : types = csv.readrow ();
732 2 : types = strtrans (types, "0-9<>", "");
733 : }
734 : }
735 :
736 15 : variable s = csv.readcol (__push_list(columns) ;; q);
737 15 : if (rdb)
738 : {
739 2 : ifnot (length (columns))
740 2 : columns = header;
741 :
742 2 : header = fixup_header_names (header);
743 2 : foreach (columns)
744 : {
745 8 : variable col = ();
746 8 : if (typeof (col) == String_Type)
747 8 : col = fixup_header_names (col);
748 : else
749 0 : col = header[col-1];
750 :
751 8 : variable i = wherefirst (col == header);
752 8 : if ((i == NULL) || (types[i] != "N"))
753 0 : continue;
754 :
755 8 : convert_to_numeric (s, col);
756 : }
757 : }
758 15 : return s;
759 : }
760 :
|