LCOV - all.lcov - modules/csv.sl

LCOV - code coverage report

Current view:	top level - modules - csv.sl (source / functions)		Hit	Total	Coverage
Test:	all.lcov	Lines:	327	380	86.1 %
Date:	2022-08-02 14:41:00	Functions:	16	16	100.0 %

          Line data    Source code

       1             : % Copyright (C) 2012-2021,2022 John E. Davis
       2             : %
       3             : % This file is part of the S-Lang Library and may be distributed under the
       4             : % terms of the GNU General Public License.  See the file COPYING for
       5             : % more information.
       6             : %---------------------------------------------------------------------------
       7           1 : import ("csv");
       8             : 
       9             : private define read_fp_callback (in_quote, info)
      10             : {
      11         117 :    variable line, comment_char = info.comment_char;
      12             :    forever
      13             :      {
      14         119 :         if (-1 == fgets (&line, info.fp))
      15          14 :           return NULL;
      16             : 
      17         105 :         info.line_num++;
      18         105 :         if ((line[0] == comment_char)
      19             :             && (in_quote == 0)
      20             :             && (0 == strnbytecmp (line, info.comment, info.comment_len)))
      21           2 :           continue;
      22             : 
      23         103 :         return line;
      24             :      }
      25             : }
      26             : 
      27             : private define read_strings_callback (in_quote, str_info)
      28             : {
      29             :    variable line;
      30             : 
      31          39 :    if (str_info.output_crlf)
      32             :      {
      33           0 :         str_info.output_crlf = 0;
      34           0 :         return "\n";
      35             :      }
      36          39 :    variable i = str_info.i;
      37          39 :    if (i >= str_info.n)
      38           3 :      return NULL;
      39          36 :    line = str_info.strings[i];
      40          36 :    str_info.i = i+1;
      41          36 :    if (line[-1] != '\n')
      42           0 :      str_info.output_crlf = 1;
      43             : 
      44          36 :    str_info.line_num++;
      45          36 :    return line;
      46             : }
      47             : 
      48             : private define resize_arrays (arrays, n)
      49             : {
      50          15 :    _for (0, length(arrays)-1, 1)
      51             :      {
      52          39 :         variable i = ();
      53          39 :         variable a = arrays[i];
      54          39 :         variable m = length(a);
      55          54 :         if (m == n) continue;
      56          24 :         if (m > n)
      57             :           {
      58          24 :              arrays[i] = a[[:n-1]];
      59          24 :              continue;
      60             :           }
      61           0 :         variable b = _typeof(a)[n];
      62           0 :         b[[:m-1]] = a;
      63           0 :         arrays[i] = b;
      64             :      }
      65             : }
      66             : 
      67             : private define merge_column_arrays (list_of_column_arrays)
      68             : {
      69          15 :    variable j, n = length (list_of_column_arrays);
      70          15 :    variable column_arrays = list_of_column_arrays[0];
      71          15 :    variable i, ncols = length (column_arrays);
      72          15 :    variable merged = {};
      73          15 :    _for i (0, ncols-1, 1)
      74             :      {
      75          39 :         variable array_list = {};
      76          39 :         _for j (0, n-1, 1)
      77             :           {
      78          48 :              column_arrays = list_of_column_arrays[j];
      79          48 :              list_append (array_list, column_arrays[i]);
      80             :           }
      81          39 :         list_append (merged, [__push_list(__tmp(array_list))]);
      82             :      }
      83          15 :    return merged;
      84             : }
      85             : 
      86             : private define atofloat (x)
      87             : {
      88          20 :    typecast (atof(x), Float_Type);
      89             : }
      90             : 
      91             : private define get_blankrows_bits (val)
      92             : {
      93          60 :    if (val == "skip") return CSV_SKIP_BLANK_ROWS;
      94           0 :    if (val == "stop") return CSV_STOP_BLANK_ROWS;
      95           0 :    return 0;
      96             : }
      97             : 
      98             : private define read_row (csv)
      99             : {
     100             :    % The blank row handling default is to use that of the csv object.
     101          15 :    if (qualifier_exists ("blankrows"))
     102             :      {
     103           0 :         return _csv_decode_row (csv.decoder,
     104             :                                get_blankrows_bits (qualifier("blankrows")));
     105             :      }
     106          15 :    return _csv_decode_row (csv.decoder);
     107             : }
     108             : 
     109             : private define fixup_header_names (names)
     110             : {
     111          50 :    if (names == NULL) return names;
     112          30 :    if (typeof (names) == List_Type)
     113           0 :      names = list_to_array (names);
     114          30 :    if (_typeof(names) != String_Type)
     115           3 :      return names;
     116             : 
     117          27 :    variable is_scalar = (typeof (names) != Array_Type);
     118          27 :    if (is_scalar)
     119           8 :      names = [names];
     120             : 
     121          27 :    names = strlow (names);
     122          27 :    variable i = where (names == "");
     123          27 :    names[i] = array_map (String_Type, &sprintf, "col%d", i+1);
     124             : 
     125          27 :    names = strtrim (names);            %  strip leading/trailing WS
     126          35 :    if (is_scalar) names = names[0];
     127          27 :    return names;
     128             : }
     129             : 
     130             : private define pop_columns_as_array (n)
     131             : {
     132           6 :    if (n == 0)
     133           0 :      return String_Type[0];
     134             : 
     135             :    try
     136             :      {
     137             :         % allow a mixture of arrays and scalars
     138           6 :         variable columns = __pop_list (n);
     139           6 :         columns = [__push_list(columns)];
     140           6 :         return columns;
     141             :      }
     142             :    catch TypeMismatchError:
     143             :      {
     144           0 :         throw TypeMismatchError, "Column arguments cannot be a mixture of ints and strings";
     145             :      }
     146             : }
     147             : 
     148             : 
     149             : private define read_cols ()
     150             : {
     151          15 :    if ((_NARGS == 0) || (qualifier_exists ("help")))
     152             :      {
     153           0 :         usage("struct = .readcol ([columns] ; qualifiers)\n\
     154             : where columns is an optional 1-based array of column numbers,\n\
     155             :  or array of column names.\n\
     156             : Qualifiers:\n\
     157             :  header=header, fields=[array of field names],\n\
     158             :  type=value|array|string of 's','i','l','f','d' (str,int,long,float,dbl)\n\
     159             :  typeNTH=val (specifiy type for NTH column)\n\
     160             :  snan=\"\", inan=0, lnan=0L, fnan=_NaN, dnan=_NaN (defaults for empty fields),\n\
     161             :  nanNTH=val (value used for an empty field in the NTH column\n\
     162             :  init_size=int (number of rows to initially read)\n\
     163             : "
     164             :              );
     165             :      }
     166             : 
     167          15 :    variable columns = NULL;
     168          15 :    if (_NARGS > 1)
     169             :      {
     170           6 :         columns = pop_columns_as_array (_NARGS-1);
     171             :      }
     172          15 :    variable csv = ();
     173             : 
     174          15 :    variable fields = qualifier ("fields");
     175          15 :    variable header = qualifier ("header");
     176          15 :    variable types = qualifier ("type");
     177          15 :    variable snan = qualifier ("snan", "");
     178          15 :    variable dnan = qualifier ("dnan", _NaN);
     179          15 :    variable fnan = qualifier ("fnan", typecast(_NaN,Float_Type));
     180          15 :    variable inan = qualifier ("inan", 0);
     181          15 :    variable lnan = qualifier ("lnan", 0L);
     182          15 :    variable init_size = qualifier ("init_size", 0x8000);
     183          15 :    if (init_size <= 0) init_size = 0x8000;
     184             : 
     185          15 :    if ((fields != NULL) && (columns != NULL)
     186             :        && (length(fields) != length(columns)))
     187           0 :      throw InvalidParmError, "The fields qualifier must be the same size as the number of columns";
     188             : 
     189          15 :    variable flags = get_blankrows_bits (qualifier("blankrows", "skip"));
     190             : 
     191          15 :    header = fixup_header_names (header);
     192          15 :    columns = fixup_header_names (columns);
     193             : 
     194          15 :    variable columns_are_string = _typeof(columns) == String_Type;
     195             : 
     196          15 :    if ((header == NULL) && columns_are_string)
     197           0 :      throw InvalidParmError, "No header was supplied to map column names";
     198             : 
     199          15 :    variable column_ints = columns, col, i, j;
     200          15 :    if (columns_are_string)
     201             :      {
     202           3 :         column_ints = Int_Type[length(columns)];
     203           3 :         _for i (0, length(columns)-1, 1)
     204             :           {
     205           6 :              col = columns[i];
     206           6 :              j = wherefirst (col == header);
     207           6 :              if (j == NULL)
     208           0 :                throw InvalidParmError, "Unknown (canonical) column name $col"$;
     209           6 :              column_ints[i] = j+1;
     210             :           }
     211             :      }
     212             : 
     213          15 :    variable datastruct = NULL, ncols, row_data, e;
     214             :    try (e)
     215             :      {
     216          15 :         row_data = _csv_decode_row (csv.decoder, flags);
     217             :      }
     218             :    catch AnyError:
     219             :      {
     220           0 :         throw e.error, sprintf ("Error encountered decoding line %S: %S", csv.func_data.line_num, e.message);
     221             :      }
     222             : 
     223          15 :    variable nread = 0;
     224          15 :    if (row_data != NULL)
     225             :      {
     226          13 :         nread++;
     227             : 
     228          13 :         if (column_ints == NULL)
     229           7 :           column_ints = [1:length(row_data)];
     230             : 
     231          13 :         if (any(column_ints>length(row_data)))
     232             :           {
     233           0 :              throw InvalidParmError, "column number is too large for data";
     234             :           }
     235             :      }
     236             : 
     237          15 :    if (column_ints == NULL)
     238             :      {
     239           2 :         if (fields != NULL)
     240           0 :           ncols = length(fields);
     241           2 :         else if (columns_are_string)
     242           0 :           ncols = length(columns);
     243           2 :         else if (header != NULL)
     244           2 :           ncols = length (header);
     245             :         else
     246           0 :           throw RunTimeError, "Insufficient information to determine the number of columns in the CSV file";
     247             : 
     248           2 :        column_ints = [1:ncols];
     249             :      }
     250             : 
     251          15 :    if (fields == NULL)
     252             :      {
     253          15 :         if (columns_are_string)
     254           3 :           fields = columns;
     255          12 :         else if (header != NULL)
     256          11 :           fields = header[column_ints-1];
     257             :         else
     258           1 :           fields = array_map(String_Type, &sprintf, "col%d", column_ints);
     259             :      }
     260          15 :    ncols = length(fields);
     261          15 :    datastruct = @Struct_Type(fields);
     262             : 
     263          15 :    column_ints -= 1;                   %  make 0-based
     264             : 
     265          15 :    variable convert_funcs = Ref_Type[ncols], convert_func, val;
     266          69 :    variable nan_values = {}; loop(ncols) list_append(nan_values, snan);
     267             : 
     268          15 :    if (types == NULL)
     269             :      {
     270          13 :         types = qualifier_exists ("auto") ? 'A' : 's';
     271             :      }
     272             : 
     273          15 :    if (typeof(types) == List_Type)
     274           0 :      types = list_to_array (types);
     275             : 
     276          15 :    if (typeof(types) == String_Type)
     277           1 :      types = bstring_to_array (types);
     278             : 
     279          15 :    if ((typeof(types) == Array_Type) && (length(types) != ncols))
     280           0 :      throw InvalidParmError, "types array must be equal to the number of columns";
     281             : 
     282          15 :    if (typeof (types) != Array_Type)
     283          14 :      types = types[Int_Type[ncols]];   %  single (default) type specified
     284             : 
     285             :    variable i1;
     286          15 :    _for i (1, ncols, 1)
     287             :      {
     288          39 :         i1 = i-1;
     289          39 :         val = qualifier ("type$i"$, types[i1]);
     290             : 
     291          39 :         types[i1] = val;
     292             :      }
     293             : 
     294          15 :    i = where(types=='i');
     295          30 :    convert_funcs[i] = &atoi; nan_values[i] = typecast(inan, Int_Type);
     296          15 :    i = where(types=='l');
     297          30 :    convert_funcs[i] = &atol; nan_values[i] = typecast(lnan, Long_Type);
     298          15 :    i = where(types=='f');
     299          30 :    convert_funcs[i] = &atofloat; nan_values[i] = typecast (fnan, Float_Type);
     300          15 :    i = where(types=='d');
     301          30 :    convert_funcs[i] = &atof; nan_values[i] = typecast(dnan, Double_Type);
     302             : 
     303          15 :    _for i (1, ncols, 1)
     304             :      {
     305          39 :         i1 = i-1;
     306             : 
     307          39 :         if (types[i1] == 'A')
     308             :           {
     309           3 :              variable type = _slang_guess_type (row_data[i1]);
     310           3 :              if (type == Double_Type)
     311             :                {
     312           2 :                   convert_funcs[i1] = &atof;
     313           2 :                   nan_values[i1] = dnan;
     314             :                }
     315           1 :              else if (type == Int_Type)
     316             :                {
     317           1 :                   convert_funcs[i1] = &atoi;
     318           1 :                   nan_values[i1] = inan;
     319             :                }
     320             :           }
     321             : 
     322          39 :         val = nan_values[i1];
     323          39 :         nan_values[i1] = typecast (qualifier ("nan$i"$, val), typeof(val));
     324             :      }
     325             : 
     326          15 :    variable column_arrays = Array_Type[ncols], array;
     327          15 :    variable dsize = init_size;
     328          15 :    variable max_allocated = init_size;
     329          15 :    variable list_of_column_arrays = {};
     330          15 :    _for i (0, ncols-1, 1)
     331             :      {
     332          39 :         if (row_data == NULL)
     333             :           {
     334           6 :              column_arrays[i] = typeof(nan_values[i])[0];
     335           6 :              continue;
     336             :           }
     337             : 
     338          33 :         val = row_data[column_ints[i]];
     339          33 :         array = typeof(nan_values[i])[max_allocated];
     340          33 :         ifnot (strbytelen(val))
     341           0 :           val = nan_values[i];
     342             :         else
     343             :           {
     344          33 :              convert_func = convert_funcs[i];
     345          33 :              if (convert_func != NULL)
     346           5 :                val = (@convert_func)(val);
     347             :           }
     348          33 :         array[0] = val;
     349          33 :         column_arrays[i] = array;
     350             :      }
     351          15 :    list_append (list_of_column_arrays, column_arrays);
     352             : 
     353          15 :    variable min_row_size = 1+max(column_ints);
     354             :    forever
     355             :      {
     356             :         try (e)
     357             :           {
     358          43 :              row_data = _csv_decode_row (csv.decoder, flags);
     359             :           }
     360             :         catch AnyError:
     361             :           {
     362           0 :              throw e.error, sprintf ("Error encountered decoding line %S: %S", csv.func_data.line_num, e.message);
     363             :           }
     364          58 :         if (row_data == NULL) break;
     365             : 
     366          28 :         if (length (row_data) < min_row_size)
     367             :           {
     368             :              % FIXME-- make what to do here configurable
     369           0 :              if (length(row_data) == 0)
     370           0 :                break;
     371             : 
     372           0 :              continue;
     373             :           }
     374             : 
     375          28 :         if (nread >= max_allocated)
     376             :           {
     377           3 :              column_arrays = Array_Type[ncols];
     378           3 :              _for i (0, ncols-1, 1)
     379           9 :                column_arrays[i] = _typeof(list_of_column_arrays[0][i])[max_allocated];
     380           3 :              list_append (list_of_column_arrays, column_arrays);
     381           3 :              nread = 0;
     382             :           }
     383             : 
     384          28 :         _for i (0, ncols-1, 1)
     385             :           {
     386          94 :              val = row_data[column_ints[i]];
     387          94 :              ifnot (strbytelen(val))
     388             :                {
     389           0 :                   column_arrays[i][nread] = nan_values[i];
     390           0 :                   continue;
     391             :                }
     392          94 :              convert_func = convert_funcs[i];
     393          94 :              if (convert_func == NULL)
     394             :                {
     395          49 :                   column_arrays[i][nread] = val;
     396          49 :                   continue;
     397             :                }
     398          45 :              column_arrays[i][nread] = (@convert_func)(val);
     399             :           }
     400          28 :         nread++;
     401             :      }
     402          15 :    resize_arrays (__tmp(column_arrays), nread);
     403          15 :    list_of_column_arrays = merge_column_arrays (__tmp(list_of_column_arrays));
     404             : 
     405          15 :    set_struct_fields (datastruct, __push_list(list_of_column_arrays));
     406          15 :    return datastruct;
     407             : }
     408             : 
     409             : define csv_decoder_new ()
     410             : {
     411          15 :    if (_NARGS != 1)
     412           0 :      usage ("\
     413             : obj = csv_decoder_new (file|fp|strings ; qualifiers);\n\
     414             : Qualifiers:\n\
     415             :   quote='\"', delim=',', skiplines=0, comment=string");
     416             : 
     417          15 :    variable fp = ();
     418          30 :    variable type = typeof(fp), file = fp;
     419          15 :    variable func = &read_fp_callback;
     420             :    variable func_data;
     421             : 
     422          15 :    variable skiplines = qualifier("skiplines", 0);
     423          15 :    variable delim = qualifier("delim", ',');
     424          15 :    variable quote = qualifier("quote", '"');
     425          15 :    variable comment = qualifier("comment", NULL);
     426          15 :    variable comment_char = (comment == NULL) ? NULL : comment[0];
     427          15 :    variable flags = get_blankrows_bits (qualifier("blankrows", "skip"));
     428             : 
     429          15 :    if ((type == Array_Type) || (type == List_Type))
     430             :      {
     431           3 :         func = &read_strings_callback;
     432           3 :         func_data = struct
     433             :           {
     434           3 :              strings = fp,
     435           3 :              line_num = skiplines,
     436           6 :              i = skiplines, n = length(fp),
     437           3 :              output_crlf = 0,
     438           3 :              comment_char = comment_char,
     439           3 :              comment = comment,
     440             :           };
     441             :      }
     442             :    else
     443             :      {
     444             :         variable line;
     445          12 :         if (type != File_Type)
     446             :           {
     447           9 :              fp = fopen (file, "r");
     448           9 :              if (fp == NULL)
     449           0 :                throw OpenError, "Unable to open CSV file '$file'"$;
     450             : 
     451             :              % Ignore a BOM if it exists
     452           9 :              if (-1 != fgets (&line, fp))
     453             :                {
     454           8 :                   if (0 == strnbytecmp (line, "\xEF\xBB\xBF", 3))
     455           0 :                     () = fseek (fp, 3, SEEK_SET);
     456             :                   else
     457           8 :                     () = fseek (fp, 0, SEEK_SET);
     458             :                }
     459             :           }
     460             : 
     461          12 :         func_data = struct
     462             :           {
     463          12 :              fp = fp,
     464          12 :              line_num = skiplines,
     465          12 :              comment_char = comment_char,
     466          12 :              comment = comment,
     467          12 :              comment_len = ((comment == NULL) ? 0 : strbytelen(comment)),
     468             :           };
     469          12 :         loop (skiplines)
     470           0 :           () = fgets (&line, fp);
     471             :      }
     472             : 
     473          15 :    variable csv = struct
     474             :      {
     475          15 :         decoder = _csv_decoder_new (func, func_data, delim, quote, flags),
     476          15 :         readrow = &read_row,
     477          15 :         readcol = &read_cols,
     478          15 :         func_data = func_data,
     479             :      };
     480             : 
     481          15 :    return csv;
     482             : }
     483             : 
     484             : % Encoder
     485             : 
     486             : private define writecol ()
     487             : {
     488           3 :    if ((_NARGS < 3) || qualifier_exists("help"))
     489             :      {
     490           0 :         usage("\
     491             : writecol (file|fp, list_of_column_data | datastruct | col1,col2,...)\n\
     492             : Qualifiers:\n\
     493             :   names=array-of-column-names, noheader, quoteall, quotesome, rdb\n\
     494             : "
     495             :              );
     496             :      }
     497             : 
     498             :    variable csv, data, file;
     499           3 :    if (_NARGS == 3)
     500             :      {
     501           3 :         (csv, file, data) = ();
     502             :      }
     503             :    else
     504             :      {
     505           0 :         data = __pop_list (_NARGS-2);
     506           0 :         (csv, file) = ();
     507             :      }
     508             : 
     509           3 :    variable type = typeof (data);
     510           3 :    if ((type != List_Type) && (type != Array_Type)
     511             :        && not is_struct_type (data))
     512           0 :      data = {data};
     513             : 
     514           3 :    variable flags = 0;
     515           3 :    if (qualifier_exists ("quoteall")) flags |= CSV_QUOTE_ALL;
     516           3 :    if (qualifier_exists ("quotesome")) flags |= CSV_QUOTE_SOME;
     517           3 :    variable rdb = qualifier_exists ("rdb");
     518             : 
     519           3 :    variable fp = file;
     520           3 :    if (typeof(file) != File_Type)
     521           3 :      fp = fopen (file, "wb");
     522           3 :    if (fp == NULL)
     523           0 :      throw OpenError, "Error opening $file in write mode"$;
     524             : 
     525           3 :    variable names = NULL;
     526           3 :    ifnot (qualifier_exists ("noheader"))
     527             :      {
     528           3 :         names = qualifier ("names");
     529           3 :         if ((names == NULL) && is_struct_type (data))
     530           3 :           names = get_struct_field_names (data);
     531             :      }
     532             : 
     533           3 :    if (is_struct_type (data))
     534             :      {
     535           3 :         variable tmp = {};
     536           3 :         data = {(_push_struct_field_values(data), pop())};
     537           3 :         list_reverse (data);
     538             :      }
     539             : 
     540             :    EXIT_BLOCK
     541             :      {
     542           3 :         ifnot (__is_same(file, fp))
     543             :           {
     544           3 :              if (-1 == fclose (fp))
     545           0 :                throw WriteError, "Error closing $file"$;
     546             :           }
     547             :      }
     548             : 
     549           3 :    variable i, ncols = length(data);
     550           3 :    if (ncols == 0)
     551             :      return;
     552             : 
     553             :    % The following assumes that data is a list or array of lists or
     554             :    % array.
     555           3 :    data = @data;
     556           3 :    _for i (0, ncols-1, 1)
     557             :      {
     558          10 :         variable t = typeof(data[i]);
     559          10 :         if ((t != List_Type) && (t != Array_Type))
     560           0 :           data[i] = [data[i]];
     561             :      }
     562             : 
     563           3 :    variable nrows = length(data[0]), j;
     564           3 :    _for i (1, ncols-1, 1)
     565             :      {
     566           7 :         if (nrows != length(data[i]))
     567           0 :           throw InvalidParmError, "CSV data columns must be the same length";
     568             :      }
     569             : 
     570           3 :    variable str, encoder = csv.encoder;
     571             : 
     572           3 :    if (names != NULL)
     573             :      {
     574           3 :         if (typeof (names) == List_Type)
     575           0 :           names = list_to_array (names);
     576           3 :         str = _csv_encode_row (encoder, names, flags);
     577           3 :         if (-1 == fputs (str, fp))
     578           0 :           throw WriteError, "Write to CSV file failed";
     579           3 :         if (rdb)
     580             :           {
     581           1 :              variable types = String_Type[ncols];
     582           1 :              _for i (0, ncols-1, 1)
     583           4 :                types[i] = __is_datatype_numeric (_typeof(data[i])) ? "N" : "S";
     584             : 
     585           1 :              str = _csv_encode_row (encoder, types, flags);
     586           1 :              if (-1 == fputs (str, fp))
     587           0 :                throw WriteError, "Write to CSV file failed";
     588             :           }
     589             :      }
     590             : 
     591           3 :    variable row_data = String_Type[ncols];
     592           3 :    _for i (0, nrows-1, 1)
     593             :      {
     594          12 :         _for j (0, ncols-1, 1)
     595          46 :           row_data[j] = string (data[j][i]);
     596             : 
     597          12 :         str = _csv_encode_row (encoder, row_data, flags);
     598          12 :         if (-1 == fputs (str, fp))
     599           0 :           throw WriteError, "Write to CSV file failed";
     600             :      }
     601             : }
     602             : 
     603             : define csv_encoder_new ()
     604             : {
     605           3 :    if (qualifier_exists ("help"))
     606             :      {
     607           0 :         usage ("csv = csv_encoder_new ();\n\
     608             : Qualifiers:\n\
     609             :   delim=','\n\
     610             :   quote='\"'\n\
     611             :   quotesome, quoteall\n\
     612             :   rdb\n\
     613             : "
     614             :               );
     615             :      }
     616             : 
     617           3 :    variable flags = 0;
     618           3 :    if (qualifier_exists ("quoteall")) flags |= CSV_QUOTE_ALL;
     619           3 :    if (qualifier_exists ("quotesome")) flags |= CSV_QUOTE_SOME;
     620           3 :    variable quotechar = qualifier ("quote", '"');
     621           3 :    variable delimchar = qualifier ("delim",
     622             :                                    qualifier_exists ("rdb") ? '\t' : ',');
     623             : 
     624           3 :    variable csv = struct
     625             :      {
     626           3 :         encoder = _csv_encoder_new (delimchar, quotechar, flags),
     627           3 :         writecol = &writecol,
     628             :      };
     629             : 
     630           3 :    return csv;
     631             : }
     632             : 
     633             : define csv_writecol ()
     634             : {
     635           3 :    if ((_NARGS < 2) || qualifier_exists("help"))
     636             :      {
     637           0 :         usage("\
     638             : csv_writecol (file|fp, list_of_column_data | datastruct | col1,col2,...)\n\
     639             : Qualifiers:\n\
     640             :   names=array-of-column-names, noheader, quote=val, quoteall, quotesome\n\
     641             : "
     642             :              );
     643             :      }
     644             : 
     645           3 :    variable args = __pop_list (_NARGS);
     646           3 :    variable csv = csv_encoder_new (;;__qualifiers);
     647           3 :    csv.writecol (__push_list(args);;__qualifiers);
     648             : }
     649             : 
     650             : private define convert_to_numeric (s, name)
     651             : {
     652           8 :    variable val = get_struct_field (s, name);
     653           8 :    variable num = length (val);
     654           8 :    if ((num == 0) || (_typeof (val) != String_Type))
     655             :      return;
     656             : 
     657             :    EXIT_BLOCK
     658             :      {
     659           3 :         set_struct_field (s, name, val);
     660             :      }
     661             : 
     662           3 :    _for (0, length (val)-1, 1)
     663             :      {
     664          12 :         variable i = ();
     665          12 :         variable type = _slang_guess_type (val[i]);
     666          12 :         if ((type == Double_Type) || (type == Float_Type))
     667             :           {
     668           2 :              val = atof (val);
     669             :              return;
     670             :           }
     671          10 :         if (type == String_Type)
     672             :           return;
     673             :         % Otherwise an integer
     674             :      }
     675             : 
     676           1 :    variable lval = atol (val);
     677           1 :    val = atoi (val);
     678           1 :    if (any(val != lval))
     679           0 :      val = lval;
     680             : }
     681             : 
     682             : define csv_readcol ()
     683             : {
     684          15 :    if ((_NARGS == 0) || qualifier_exists("help"))
     685             :      {
     686           0 :         usage ("struct = csvreadcol (file|fp [,columns] ;qualifier)\n\
     687             : where columns is an optional 1-based array of column numbers,\n\
     688             :  or array of column names.\n\
     689             : Qualifiers:\n\
     690             :  quote='\"', delim=',', skiplines=0, comment=string, has_header,\n\
     691             :  header=header, fields=[array of field names],\n\
     692             :  type=value|array of 's','i','l','f','d' (string,int,long,float,double)\n\
     693             :  typeNTH=val (specifiy type for NTH column)\n\
     694             :  snan=\"\", inan=0, lnan=0L, fnan=_NaN, dnan=_NaN (defaults for empty fields),\n\
     695             :  nanNTH=val (value used for an empty field in the NTH column\n\
     696             : "
     697             :               );
     698             :      }
     699             : 
     700             :    variable file, columns;
     701          15 :    columns = __pop_list (_NARGS-1);
     702          15 :    file = ();
     703             : 
     704          15 :    variable q = __qualifiers ();
     705          15 :    variable rdb = qualifier_exists ("rdb");
     706             : 
     707             :    % rdb files are tab-delimited files, # is a comment character,
     708             :    % the first non-comment line contains the field names, the
     709             :    % second line gives the field types.
     710          15 :    if (rdb)
     711             :      {
     712           8 :         q = struct { @q, comment = "#", delim = '\t' };
     713             :      }
     714          15 :    variable types = NULL;
     715          15 :    variable csv = csv_decoder_new (file ;; q);
     716          15 :    if (rdb || qualifier_exists ("has_header"))
     717             :      {
     718          13 :         variable header = csv.readrow ();
     719          13 :         if (header == NULL)
     720           0 :           throw ReadError, "Unable to read a CSV header row";
     721             : 
     722          39 :         q = struct { header=header, @q };
     723          13 :         if (rdb)
     724             :           {
     725             :              % The type field consists of an integer, followed by a
     726             :              % type specifier, and a justification character.  The
     727             :              % integer and justification characters are for display
     728             :              % purposes. The type specifier is N for numberic, S for
     729             :              % string, M for month.  Here, M and S will be treated the
     730             :              % same.
     731           2 :              types = csv.readrow ();
     732           2 :              types = strtrans (types, "0-9<>", "");
     733             :           }
     734             :      }
     735             : 
     736          15 :    variable s = csv.readcol (__push_list(columns) ;; q);
     737          15 :    if (rdb)
     738             :      {
     739           2 :         ifnot (length (columns))
     740           2 :           columns = header;
     741             : 
     742           2 :         header = fixup_header_names (header);
     743           2 :         foreach (columns)
     744             :           {
     745           8 :              variable col = ();
     746           8 :              if (typeof (col) == String_Type)
     747           8 :                col = fixup_header_names (col);
     748             :              else
     749           0 :                col = header[col-1];
     750             : 
     751           8 :              variable i = wherefirst (col == header);
     752           8 :              if ((i == NULL) || (types[i] != "N"))
     753           0 :                continue;
     754             : 
     755           8 :              convert_to_numeric (s, col);
     756             :           }
     757             :      }
     758          15 :    return s;
     759             : }
     760             :

Generated by: LCOV version 1.13