Python-2.7.3/Modules/_csv.c

No issues found

   1 /* csv module */
   2 
   3 /*
   4 
   5 This module provides the low-level underpinnings of a CSV reading/writing
   6 module.  Users should not use this module directly, but import the csv.py
   7 module instead.
   8 
   9 **** For people modifying this code, please note that as of this writing
  10 **** (2003-03-23), it is intended that this code should work with Python
  11 **** 2.2.
  12 
  13 */
  14 
  15 #define MODULE_VERSION "1.0"
  16 
  17 #include "Python.h"
  18 #include "structmember.h"
  19 
  20 
  21 /* begin 2.2 compatibility macros */
  22 #ifndef PyDoc_STRVAR
  23 /* Define macros for inline documentation. */
  24 #define PyDoc_VAR(name) static char name[]
  25 #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
  26 #ifdef WITH_DOC_STRINGS
  27 #define PyDoc_STR(str) str
  28 #else
  29 #define PyDoc_STR(str) ""
  30 #endif
  31 #endif /* ifndef PyDoc_STRVAR */
  32 
  33 #ifndef PyMODINIT_FUNC
  34 #       if defined(__cplusplus)
  35 #               define PyMODINIT_FUNC extern "C" void
  36 #       else /* __cplusplus */
  37 #               define PyMODINIT_FUNC void
  38 #       endif /* __cplusplus */
  39 #endif
  40 
  41 #ifndef Py_CLEAR
  42 #define Py_CLEAR(op)                                            \
  43     do {                                                        \
  44         if (op) {                                               \
  45             PyObject *tmp = (PyObject *)(op);                   \
  46             (op) = NULL;                                        \
  47             Py_DECREF(tmp);                                     \
  48         }                                                       \
  49     } while (0)
  50 #endif
  51 #ifndef Py_VISIT
  52 #define Py_VISIT(op)                                                    \
  53     do {                                                                \
  54         if (op) {                                                       \
  55             int vret = visit((PyObject *)(op), arg);                    \
  56             if (vret)                                                   \
  57                 return vret;                                            \
  58         }                                                               \
  59     } while (0)
  60 #endif
  61 
  62 /* end 2.2 compatibility macros */
  63 
  64 #define IS_BASESTRING(o) \
  65     PyObject_TypeCheck(o, &PyBaseString_Type)
  66 
  67 static PyObject *error_obj;     /* CSV exception */
  68 static PyObject *dialects;      /* Dialect registry */
  69 static long field_limit = 128 * 1024;   /* max parsed field size */
  70 
  71 typedef enum {
  72     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
  73     IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
  74     EAT_CRNL
  75 } ParserState;
  76 
  77 typedef enum {
  78     QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
  79 } QuoteStyle;
  80 
  81 typedef struct {
  82     QuoteStyle style;
  83     char *name;
  84 } StyleDesc;
  85 
  86 static StyleDesc quote_styles[] = {
  87     { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
  88     { QUOTE_ALL,        "QUOTE_ALL" },
  89     { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
  90     { QUOTE_NONE,       "QUOTE_NONE" },
  91     { 0 }
  92 };
  93 
  94 typedef struct {
  95     PyObject_HEAD
  96 
  97     int doublequote;            /* is " represented by ""? */
  98     char delimiter;             /* field separator */
  99     char quotechar;             /* quote character */
 100     char escapechar;            /* escape character */
 101     int skipinitialspace;       /* ignore spaces following delimiter? */
 102     PyObject *lineterminator; /* string to write between records */
 103     int quoting;                /* style of quoting to write */
 104 
 105     int strict;                 /* raise exception on bad CSV */
 106 } DialectObj;
 107 
 108 staticforward PyTypeObject Dialect_Type;
 109 
 110 typedef struct {
 111     PyObject_HEAD
 112 
 113     PyObject *input_iter;   /* iterate over this for input lines */
 114 
 115     DialectObj *dialect;    /* parsing dialect */
 116 
 117     PyObject *fields;           /* field list for current record */
 118     ParserState state;          /* current CSV parse state */
 119     char *field;                /* build current field in here */
 120     int field_size;             /* size of allocated buffer */
 121     int field_len;              /* length of current field */
 122     int numeric_field;          /* treat field as numeric */
 123     unsigned long line_num;     /* Source-file line number */
 124 } ReaderObj;
 125 
 126 staticforward PyTypeObject Reader_Type;
 127 
 128 #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type)
 129 
 130 typedef struct {
 131     PyObject_HEAD
 132 
 133     PyObject *writeline;    /* write output lines to this file */
 134 
 135     DialectObj *dialect;    /* parsing dialect */
 136 
 137     char *rec;                  /* buffer for parser.join */
 138     int rec_size;               /* size of allocated record */
 139     int rec_len;                /* length of record */
 140     int num_fields;             /* number of fields in record */
 141 } WriterObj;
 142 
 143 staticforward PyTypeObject Writer_Type;
 144 
 145 /*
 146  * DIALECT class
 147  */
 148 
 149 static PyObject *
 150 get_dialect_from_registry(PyObject * name_obj)
 151 {
 152     PyObject *dialect_obj;
 153 
 154     dialect_obj = PyDict_GetItem(dialects, name_obj);
 155     if (dialect_obj == NULL) {
 156         if (!PyErr_Occurred())
 157             PyErr_Format(error_obj, "unknown dialect");
 158     }
 159     else
 160         Py_INCREF(dialect_obj);
 161     return dialect_obj;
 162 }
 163 
 164 static PyObject *
 165 get_string(PyObject *str)
 166 {
 167     Py_XINCREF(str);
 168     return str;
 169 }
 170 
 171 static PyObject *
 172 get_nullchar_as_None(char c)
 173 {
 174     if (c == '\0') {
 175         Py_INCREF(Py_None);
 176         return Py_None;
 177     }
 178     else
 179         return PyString_FromStringAndSize((char*)&c, 1);
 180 }
 181 
 182 static PyObject *
 183 Dialect_get_lineterminator(DialectObj *self)
 184 {
 185     return get_string(self->lineterminator);
 186 }
 187 
 188 static PyObject *
 189 Dialect_get_escapechar(DialectObj *self)
 190 {
 191     return get_nullchar_as_None(self->escapechar);
 192 }
 193 
 194 static PyObject *
 195 Dialect_get_quotechar(DialectObj *self)
 196 {
 197     return get_nullchar_as_None(self->quotechar);
 198 }
 199 
 200 static PyObject *
 201 Dialect_get_quoting(DialectObj *self)
 202 {
 203     return PyInt_FromLong(self->quoting);
 204 }
 205 
 206 static int
 207 _set_bool(const char *name, int *target, PyObject *src, int dflt)
 208 {
 209     if (src == NULL)
 210         *target = dflt;
 211     else
 212         *target = PyObject_IsTrue(src);
 213     return 0;
 214 }
 215 
 216 static int
 217 _set_int(const char *name, int *target, PyObject *src, int dflt)
 218 {
 219     if (src == NULL)
 220         *target = dflt;
 221     else {
 222         if (!PyInt_Check(src)) {
 223             PyErr_Format(PyExc_TypeError,
 224                          "\"%s\" must be an integer", name);
 225             return -1;
 226         }
 227         *target = PyInt_AsLong(src);
 228     }
 229     return 0;
 230 }
 231 
 232 static int
 233 _set_char(const char *name, char *target, PyObject *src, char dflt)
 234 {
 235     if (src == NULL)
 236         *target = dflt;
 237     else {
 238         if (src == Py_None || PyString_Size(src) == 0)
 239             *target = '\0';
 240         else if (!PyString_Check(src) || PyString_Size(src) != 1) {
 241             PyErr_Format(PyExc_TypeError,
 242                          "\"%s\" must be an 1-character string",
 243                          name);
 244             return -1;
 245         }
 246         else {
 247             char *s = PyString_AsString(src);
 248             if (s == NULL)
 249                 return -1;
 250             *target = s[0];
 251         }
 252     }
 253     return 0;
 254 }
 255 
 256 static int
 257 _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
 258 {
 259     if (src == NULL)
 260         *target = PyString_FromString(dflt);
 261     else {
 262         if (src == Py_None)
 263             *target = NULL;
 264         else if (!IS_BASESTRING(src)) {
 265             PyErr_Format(PyExc_TypeError,
 266                          "\"%s\" must be an string", name);
 267             return -1;
 268         }
 269         else {
 270             Py_XDECREF(*target);
 271             Py_INCREF(src);
 272             *target = src;
 273         }
 274     }
 275     return 0;
 276 }
 277 
 278 static int
 279 dialect_check_quoting(int quoting)
 280 {
 281     StyleDesc *qs = quote_styles;
 282 
 283     for (qs = quote_styles; qs->name; qs++) {
 284         if (qs->style == quoting)
 285             return 0;
 286     }
 287     PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
 288     return -1;
 289 }
 290 
 291 #define D_OFF(x) offsetof(DialectObj, x)
 292 
 293 static struct PyMemberDef Dialect_memberlist[] = {
 294     { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY },
 295     { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY },
 296     { "doublequote",        T_INT, D_OFF(doublequote), READONLY },
 297     { "strict",             T_INT, D_OFF(strict), READONLY },
 298     { NULL }
 299 };
 300 
 301 static PyGetSetDef Dialect_getsetlist[] = {
 302     { "escapechar",             (getter)Dialect_get_escapechar},
 303     { "lineterminator",         (getter)Dialect_get_lineterminator},
 304     { "quotechar",              (getter)Dialect_get_quotechar},
 305     { "quoting",                (getter)Dialect_get_quoting},
 306     {NULL},
 307 };
 308 
 309 static void
 310 Dialect_dealloc(DialectObj *self)
 311 {
 312     Py_XDECREF(self->lineterminator);
 313     Py_TYPE(self)->tp_free((PyObject *)self);
 314 }
 315 
 316 static char *dialect_kws[] = {
 317     "dialect",
 318     "delimiter",
 319     "doublequote",
 320     "escapechar",
 321     "lineterminator",
 322     "quotechar",
 323     "quoting",
 324     "skipinitialspace",
 325     "strict",
 326     NULL
 327 };
 328 
 329 static PyObject *
 330 dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 331 {
 332     DialectObj *self;
 333     PyObject *ret = NULL;
 334     PyObject *dialect = NULL;
 335     PyObject *delimiter = NULL;
 336     PyObject *doublequote = NULL;
 337     PyObject *escapechar = NULL;
 338     PyObject *lineterminator = NULL;
 339     PyObject *quotechar = NULL;
 340     PyObject *quoting = NULL;
 341     PyObject *skipinitialspace = NULL;
 342     PyObject *strict = NULL;
 343 
 344     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
 345                                      "|OOOOOOOOO", dialect_kws,
 346                                      &dialect,
 347                                      &delimiter,
 348                                      &doublequote,
 349                                      &escapechar,
 350                                      &lineterminator,
 351                                      &quotechar,
 352                                      &quoting,
 353                                      &skipinitialspace,
 354                                      &strict))
 355         return NULL;
 356 
 357     if (dialect != NULL) {
 358         if (IS_BASESTRING(dialect)) {
 359             dialect = get_dialect_from_registry(dialect);
 360             if (dialect == NULL)
 361                 return NULL;
 362         }
 363         else
 364             Py_INCREF(dialect);
 365         /* Can we reuse this instance? */
 366         if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
 367             delimiter == 0 &&
 368             doublequote == 0 &&
 369             escapechar == 0 &&
 370             lineterminator == 0 &&
 371             quotechar == 0 &&
 372             quoting == 0 &&
 373             skipinitialspace == 0 &&
 374             strict == 0)
 375             return dialect;
 376     }
 377 
 378     self = (DialectObj *)type->tp_alloc(type, 0);
 379     if (self == NULL) {
 380         Py_XDECREF(dialect);
 381         return NULL;
 382     }
 383     self->lineterminator = NULL;
 384 
 385     Py_XINCREF(delimiter);
 386     Py_XINCREF(doublequote);
 387     Py_XINCREF(escapechar);
 388     Py_XINCREF(lineterminator);
 389     Py_XINCREF(quotechar);
 390     Py_XINCREF(quoting);
 391     Py_XINCREF(skipinitialspace);
 392     Py_XINCREF(strict);
 393     if (dialect != NULL) {
 394 #define DIALECT_GETATTR(v, n) \
 395         if (v == NULL) \
 396             v = PyObject_GetAttrString(dialect, n)
 397         DIALECT_GETATTR(delimiter, "delimiter");
 398         DIALECT_GETATTR(doublequote, "doublequote");
 399         DIALECT_GETATTR(escapechar, "escapechar");
 400         DIALECT_GETATTR(lineterminator, "lineterminator");
 401         DIALECT_GETATTR(quotechar, "quotechar");
 402         DIALECT_GETATTR(quoting, "quoting");
 403         DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
 404         DIALECT_GETATTR(strict, "strict");
 405         PyErr_Clear();
 406     }
 407 
 408     /* check types and convert to C values */
 409 #define DIASET(meth, name, target, src, dflt) \
 410     if (meth(name, target, src, dflt)) \
 411         goto err
 412     DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
 413     DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
 414     DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
 415     DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
 416     DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
 417     DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
 418     DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
 419     DIASET(_set_bool, "strict", &self->strict, strict, 0);
 420 
 421     /* validate options */
 422     if (dialect_check_quoting(self->quoting))
 423         goto err;
 424     if (self->delimiter == 0) {
 425         PyErr_SetString(PyExc_TypeError, "delimiter must be set");
 426         goto err;
 427     }
 428     if (quotechar == Py_None && quoting == NULL)
 429         self->quoting = QUOTE_NONE;
 430     if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
 431         PyErr_SetString(PyExc_TypeError,
 432                         "quotechar must be set if quoting enabled");
 433         goto err;
 434     }
 435     if (self->lineterminator == 0) {
 436         PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
 437         goto err;
 438     }
 439 
 440     ret = (PyObject *)self;
 441     Py_INCREF(self);
 442 err:
 443     Py_XDECREF(self);
 444     Py_XDECREF(dialect);
 445     Py_XDECREF(delimiter);
 446     Py_XDECREF(doublequote);
 447     Py_XDECREF(escapechar);
 448     Py_XDECREF(lineterminator);
 449     Py_XDECREF(quotechar);
 450     Py_XDECREF(quoting);
 451     Py_XDECREF(skipinitialspace);
 452     Py_XDECREF(strict);
 453     return ret;
 454 }
 455 
 456 
 457 PyDoc_STRVAR(Dialect_Type_doc,
 458 "CSV dialect\n"
 459 "\n"
 460 "The Dialect type records CSV parsing and generation options.\n");
 461 
 462 static PyTypeObject Dialect_Type = {
 463     PyVarObject_HEAD_INIT(NULL, 0)
 464     "_csv.Dialect",                         /* tp_name */
 465     sizeof(DialectObj),                     /* tp_basicsize */
 466     0,                                      /* tp_itemsize */
 467     /*  methods  */
 468     (destructor)Dialect_dealloc,            /* tp_dealloc */
 469     (printfunc)0,                           /* tp_print */
 470     (getattrfunc)0,                         /* tp_getattr */
 471     (setattrfunc)0,                         /* tp_setattr */
 472     (cmpfunc)0,                             /* tp_compare */
 473     (reprfunc)0,                            /* tp_repr */
 474     0,                                      /* tp_as_number */
 475     0,                                      /* tp_as_sequence */
 476     0,                                      /* tp_as_mapping */
 477     (hashfunc)0,                            /* tp_hash */
 478     (ternaryfunc)0,                         /* tp_call */
 479     (reprfunc)0,                                /* tp_str */
 480     0,                                      /* tp_getattro */
 481     0,                                      /* tp_setattro */
 482     0,                                      /* tp_as_buffer */
 483     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
 484     Dialect_Type_doc,                       /* tp_doc */
 485     0,                                      /* tp_traverse */
 486     0,                                      /* tp_clear */
 487     0,                                      /* tp_richcompare */
 488     0,                                      /* tp_weaklistoffset */
 489     0,                                      /* tp_iter */
 490     0,                                      /* tp_iternext */
 491     0,                                          /* tp_methods */
 492     Dialect_memberlist,                     /* tp_members */
 493     Dialect_getsetlist,                     /* tp_getset */
 494     0,                                          /* tp_base */
 495     0,                                          /* tp_dict */
 496     0,                                          /* tp_descr_get */
 497     0,                                          /* tp_descr_set */
 498     0,                                          /* tp_dictoffset */
 499     0,                                          /* tp_init */
 500     0,                                          /* tp_alloc */
 501     dialect_new,                                /* tp_new */
 502     0,                                          /* tp_free */
 503 };
 504 
 505 /*
 506  * Return an instance of the dialect type, given a Python instance or kwarg
 507  * description of the dialect
 508  */
 509 static PyObject *
 510 _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
 511 {
 512     PyObject *ctor_args;
 513     PyObject *dialect;
 514 
 515     ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
 516     if (ctor_args == NULL)
 517         return NULL;
 518     dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
 519     Py_DECREF(ctor_args);
 520     return dialect;
 521 }
 522 
 523 /*
 524  * READER
 525  */
 526 static int
 527 parse_save_field(ReaderObj *self)
 528 {
 529     PyObject *field;
 530 
 531     field = PyString_FromStringAndSize(self->field, self->field_len);
 532     if (field == NULL)
 533         return -1;
 534     self->field_len = 0;
 535     if (self->numeric_field) {
 536         PyObject *tmp;
 537 
 538         self->numeric_field = 0;
 539         tmp = PyNumber_Float(field);
 540         if (tmp == NULL) {
 541             Py_DECREF(field);
 542             return -1;
 543         }
 544         Py_DECREF(field);
 545         field = tmp;
 546     }
 547     PyList_Append(self->fields, field);
 548     Py_DECREF(field);
 549     return 0;
 550 }
 551 
 552 static int
 553 parse_grow_buff(ReaderObj *self)
 554 {
 555     if (self->field_size == 0) {
 556         self->field_size = 4096;
 557         if (self->field != NULL)
 558             PyMem_Free(self->field);
 559         self->field = PyMem_Malloc(self->field_size);
 560     }
 561     else {
 562         if (self->field_size > INT_MAX / 2) {
 563             PyErr_NoMemory();
 564             return 0;
 565         }
 566         self->field_size *= 2;
 567         self->field = PyMem_Realloc(self->field, self->field_size);
 568     }
 569     if (self->field == NULL) {
 570         PyErr_NoMemory();
 571         return 0;
 572     }
 573     return 1;
 574 }
 575 
 576 static int
 577 parse_add_char(ReaderObj *self, char c)
 578 {
 579     if (self->field_len >= field_limit) {
 580         PyErr_Format(error_obj, "field larger than field limit (%ld)",
 581                      field_limit);
 582         return -1;
 583     }
 584     if (self->field_len == self->field_size && !parse_grow_buff(self))
 585         return -1;
 586     self->field[self->field_len++] = c;
 587     return 0;
 588 }
 589 
 590 static int
 591 parse_process_char(ReaderObj *self, char c)
 592 {
 593     DialectObj *dialect = self->dialect;
 594 
 595     switch (self->state) {
 596     case START_RECORD:
 597         /* start of record */
 598         if (c == '\0')
 599             /* empty line - return [] */
 600             break;
 601         else if (c == '\n' || c == '\r') {
 602             self->state = EAT_CRNL;
 603             break;
 604         }
 605         /* normal character - handle as START_FIELD */
 606         self->state = START_FIELD;
 607         /* fallthru */
 608     case START_FIELD:
 609         /* expecting field */
 610         if (c == '\n' || c == '\r' || c == '\0') {
 611             /* save empty field - return [fields] */
 612             if (parse_save_field(self) < 0)
 613                 return -1;
 614             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 615         }
 616         else if (c == dialect->quotechar &&
 617                  dialect->quoting != QUOTE_NONE) {
 618             /* start quoted field */
 619             self->state = IN_QUOTED_FIELD;
 620         }
 621         else if (c == dialect->escapechar) {
 622             /* possible escaped character */
 623             self->state = ESCAPED_CHAR;
 624         }
 625         else if (c == ' ' && dialect->skipinitialspace)
 626             /* ignore space at start of field */
 627             ;
 628         else if (c == dialect->delimiter) {
 629             /* save empty field */
 630             if (parse_save_field(self) < 0)
 631                 return -1;
 632         }
 633         else {
 634             /* begin new unquoted field */
 635             if (dialect->quoting == QUOTE_NONNUMERIC)
 636                 self->numeric_field = 1;
 637             if (parse_add_char(self, c) < 0)
 638                 return -1;
 639             self->state = IN_FIELD;
 640         }
 641         break;
 642 
 643     case ESCAPED_CHAR:
 644         if (c == '\0')
 645             c = '\n';
 646         if (parse_add_char(self, c) < 0)
 647             return -1;
 648         self->state = IN_FIELD;
 649         break;
 650 
 651     case IN_FIELD:
 652         /* in unquoted field */
 653         if (c == '\n' || c == '\r' || c == '\0') {
 654             /* end of line - return [fields] */
 655             if (parse_save_field(self) < 0)
 656                 return -1;
 657             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 658         }
 659         else if (c == dialect->escapechar) {
 660             /* possible escaped character */
 661             self->state = ESCAPED_CHAR;
 662         }
 663         else if (c == dialect->delimiter) {
 664             /* save field - wait for new field */
 665             if (parse_save_field(self) < 0)
 666                 return -1;
 667             self->state = START_FIELD;
 668         }
 669         else {
 670             /* normal character - save in field */
 671             if (parse_add_char(self, c) < 0)
 672                 return -1;
 673         }
 674         break;
 675 
 676     case IN_QUOTED_FIELD:
 677         /* in quoted field */
 678         if (c == '\0')
 679             ;
 680         else if (c == dialect->escapechar) {
 681             /* Possible escape character */
 682             self->state = ESCAPE_IN_QUOTED_FIELD;
 683         }
 684         else if (c == dialect->quotechar &&
 685                  dialect->quoting != QUOTE_NONE) {
 686             if (dialect->doublequote) {
 687                 /* doublequote; " represented by "" */
 688                 self->state = QUOTE_IN_QUOTED_FIELD;
 689             }
 690             else {
 691                 /* end of quote part of field */
 692                 self->state = IN_FIELD;
 693             }
 694         }
 695         else {
 696             /* normal character - save in field */
 697             if (parse_add_char(self, c) < 0)
 698                 return -1;
 699         }
 700         break;
 701 
 702     case ESCAPE_IN_QUOTED_FIELD:
 703         if (c == '\0')
 704             c = '\n';
 705         if (parse_add_char(self, c) < 0)
 706             return -1;
 707         self->state = IN_QUOTED_FIELD;
 708         break;
 709 
 710     case QUOTE_IN_QUOTED_FIELD:
 711         /* doublequote - seen a quote in an quoted field */
 712         if (dialect->quoting != QUOTE_NONE &&
 713             c == dialect->quotechar) {
 714             /* save "" as " */
 715             if (parse_add_char(self, c) < 0)
 716                 return -1;
 717             self->state = IN_QUOTED_FIELD;
 718         }
 719         else if (c == dialect->delimiter) {
 720             /* save field - wait for new field */
 721             if (parse_save_field(self) < 0)
 722                 return -1;
 723             self->state = START_FIELD;
 724         }
 725         else if (c == '\n' || c == '\r' || c == '\0') {
 726             /* end of line - return [fields] */
 727             if (parse_save_field(self) < 0)
 728                 return -1;
 729             self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
 730         }
 731         else if (!dialect->strict) {
 732             if (parse_add_char(self, c) < 0)
 733                 return -1;
 734             self->state = IN_FIELD;
 735         }
 736         else {
 737             /* illegal */
 738             PyErr_Format(error_obj, "'%c' expected after '%c'",
 739                             dialect->delimiter,
 740                             dialect->quotechar);
 741             return -1;
 742         }
 743         break;
 744 
 745     case EAT_CRNL:
 746         if (c == '\n' || c == '\r')
 747             ;
 748         else if (c == '\0')
 749             self->state = START_RECORD;
 750         else {
 751             PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
 752             return -1;
 753         }
 754         break;
 755 
 756     }
 757     return 0;
 758 }
 759 
 760 static int
 761 parse_reset(ReaderObj *self)
 762 {
 763     Py_XDECREF(self->fields);
 764     self->fields = PyList_New(0);
 765     if (self->fields == NULL)
 766         return -1;
 767     self->field_len = 0;
 768     self->state = START_RECORD;
 769     self->numeric_field = 0;
 770     return 0;
 771 }
 772 
 773 static PyObject *
 774 Reader_iternext(ReaderObj *self)
 775 {
 776     PyObject *lineobj;
 777     PyObject *fields = NULL;
 778     char *line, c;
 779     int linelen;
 780 
 781     if (parse_reset(self) < 0)
 782         return NULL;
 783     do {
 784         lineobj = PyIter_Next(self->input_iter);
 785         if (lineobj == NULL) {
 786             /* End of input OR exception */
 787             if (!PyErr_Occurred() && self->field_len != 0)
 788                 PyErr_Format(error_obj,
 789                              "newline inside string");
 790             return NULL;
 791         }
 792         ++self->line_num;
 793 
 794         line = PyString_AsString(lineobj);
 795         linelen = PyString_Size(lineobj);
 796 
 797         if (line == NULL || linelen < 0) {
 798             Py_DECREF(lineobj);
 799             return NULL;
 800         }
 801         while (linelen--) {
 802             c = *line++;
 803             if (c == '\0') {
 804                 Py_DECREF(lineobj);
 805                 PyErr_Format(error_obj,
 806                              "line contains NULL byte");
 807                 goto err;
 808             }
 809             if (parse_process_char(self, c) < 0) {
 810                 Py_DECREF(lineobj);
 811                 goto err;
 812             }
 813         }
 814         Py_DECREF(lineobj);
 815         if (parse_process_char(self, 0) < 0)
 816             goto err;
 817     } while (self->state != START_RECORD);
 818 
 819     fields = self->fields;
 820     self->fields = NULL;
 821 err:
 822     return fields;
 823 }
 824 
 825 static void
 826 Reader_dealloc(ReaderObj *self)
 827 {
 828     PyObject_GC_UnTrack(self);
 829     Py_XDECREF(self->dialect);
 830     Py_XDECREF(self->input_iter);
 831     Py_XDECREF(self->fields);
 832     if (self->field != NULL)
 833         PyMem_Free(self->field);
 834     PyObject_GC_Del(self);
 835 }
 836 
 837 static int
 838 Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
 839 {
 840     Py_VISIT(self->dialect);
 841     Py_VISIT(self->input_iter);
 842     Py_VISIT(self->fields);
 843     return 0;
 844 }
 845 
 846 static int
 847 Reader_clear(ReaderObj *self)
 848 {
 849     Py_CLEAR(self->dialect);
 850     Py_CLEAR(self->input_iter);
 851     Py_CLEAR(self->fields);
 852     return 0;
 853 }
 854 
 855 PyDoc_STRVAR(Reader_Type_doc,
 856 "CSV reader\n"
 857 "\n"
 858 "Reader objects are responsible for reading and parsing tabular data\n"
 859 "in CSV format.\n"
 860 );
 861 
 862 static struct PyMethodDef Reader_methods[] = {
 863     { NULL, NULL }
 864 };
 865 #define R_OFF(x) offsetof(ReaderObj, x)
 866 
 867 static struct PyMemberDef Reader_memberlist[] = {
 868     { "dialect", T_OBJECT, R_OFF(dialect), RO },
 869     { "line_num", T_ULONG, R_OFF(line_num), RO },
 870     { NULL }
 871 };
 872 
 873 
 874 static PyTypeObject Reader_Type = {
 875     PyVarObject_HEAD_INIT(NULL, 0)
 876     "_csv.reader",                          /*tp_name*/
 877     sizeof(ReaderObj),                      /*tp_basicsize*/
 878     0,                                      /*tp_itemsize*/
 879     /* methods */
 880     (destructor)Reader_dealloc,             /*tp_dealloc*/
 881     (printfunc)0,                           /*tp_print*/
 882     (getattrfunc)0,                         /*tp_getattr*/
 883     (setattrfunc)0,                         /*tp_setattr*/
 884     (cmpfunc)0,                             /*tp_compare*/
 885     (reprfunc)0,                            /*tp_repr*/
 886     0,                                      /*tp_as_number*/
 887     0,                                      /*tp_as_sequence*/
 888     0,                                      /*tp_as_mapping*/
 889     (hashfunc)0,                            /*tp_hash*/
 890     (ternaryfunc)0,                         /*tp_call*/
 891     (reprfunc)0,                                /*tp_str*/
 892     0,                                      /*tp_getattro*/
 893     0,                                      /*tp_setattro*/
 894     0,                                      /*tp_as_buffer*/
 895     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
 896         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
 897     Reader_Type_doc,                        /*tp_doc*/
 898     (traverseproc)Reader_traverse,          /*tp_traverse*/
 899     (inquiry)Reader_clear,                  /*tp_clear*/
 900     0,                                      /*tp_richcompare*/
 901     0,                                      /*tp_weaklistoffset*/
 902     PyObject_SelfIter,                          /*tp_iter*/
 903     (getiterfunc)Reader_iternext,           /*tp_iternext*/
 904     Reader_methods,                         /*tp_methods*/
 905     Reader_memberlist,                      /*tp_members*/
 906     0,                                      /*tp_getset*/
 907 
 908 };
 909 
 910 static PyObject *
 911 csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
 912 {
 913     PyObject * iterator, * dialect = NULL;
 914     ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
 915 
 916     if (!self)
 917         return NULL;
 918 
 919     self->dialect = NULL;
 920     self->fields = NULL;
 921     self->input_iter = NULL;
 922     self->field = NULL;
 923     self->field_size = 0;
 924     self->line_num = 0;
 925 
 926     if (parse_reset(self) < 0) {
 927         Py_DECREF(self);
 928         return NULL;
 929     }
 930 
 931     if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
 932         Py_DECREF(self);
 933         return NULL;
 934     }
 935     self->input_iter = PyObject_GetIter(iterator);
 936     if (self->input_iter == NULL) {
 937         PyErr_SetString(PyExc_TypeError,
 938                         "argument 1 must be an iterator");
 939         Py_DECREF(self);
 940         return NULL;
 941     }
 942     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
 943     if (self->dialect == NULL) {
 944         Py_DECREF(self);
 945         return NULL;
 946     }
 947 
 948     PyObject_GC_Track(self);
 949     return (PyObject *)self;
 950 }
 951 
 952 /*
 953  * WRITER
 954  */
 955 /* ---------------------------------------------------------------- */
 956 static void
 957 join_reset(WriterObj *self)
 958 {
 959     self->rec_len = 0;
 960     self->num_fields = 0;
 961 }
 962 
 963 #define MEM_INCR 32768
 964 
 965 /* Calculate new record length or append field to record.  Return new
 966  * record length.
 967  */
 968 static int
 969 join_append_data(WriterObj *self, char *field, int quote_empty,
 970                  int *quoted, int copy_phase)
 971 {
 972     DialectObj *dialect = self->dialect;
 973     int i, rec_len;
 974     char *lineterm;
 975 
 976 #define ADDCH(c) \
 977     do {\
 978         if (copy_phase) \
 979             self->rec[rec_len] = c;\
 980         rec_len++;\
 981     } while(0)
 982 
 983     lineterm = PyString_AsString(dialect->lineterminator);
 984     if (lineterm == NULL)
 985         return -1;
 986 
 987     rec_len = self->rec_len;
 988 
 989     /* If this is not the first field we need a field separator */
 990     if (self->num_fields > 0)
 991         ADDCH(dialect->delimiter);
 992 
 993     /* Handle preceding quote */
 994     if (copy_phase && *quoted)
 995         ADDCH(dialect->quotechar);
 996 
 997     /* Copy/count field data */
 998     for (i = 0;; i++) {
 999         char c = field[i];
1000         int want_escape = 0;
1001 
1002         if (c == '\0')
1003             break;
1004 
1005         if (c == dialect->delimiter ||
1006             c == dialect->escapechar ||
1007             c == dialect->quotechar ||
1008             strchr(lineterm, c)) {
1009             if (dialect->quoting == QUOTE_NONE)
1010                 want_escape = 1;
1011             else {
1012                 if (c == dialect->quotechar) {
1013                     if (dialect->doublequote)
1014                         ADDCH(dialect->quotechar);
1015                     else
1016                         want_escape = 1;
1017                 }
1018                 if (!want_escape)
1019                     *quoted = 1;
1020             }
1021             if (want_escape) {
1022                 if (!dialect->escapechar) {
1023                     PyErr_Format(error_obj,
1024                                  "need to escape, but no escapechar set");
1025                     return -1;
1026                 }
1027                 ADDCH(dialect->escapechar);
1028             }
1029         }
1030         /* Copy field character into record buffer.
1031          */
1032         ADDCH(c);
1033     }
1034 
1035     /* If field is empty check if it needs to be quoted.
1036      */
1037     if (i == 0 && quote_empty) {
1038         if (dialect->quoting == QUOTE_NONE) {
1039             PyErr_Format(error_obj,
1040                          "single empty field record must be quoted");
1041             return -1;
1042         }
1043         else
1044             *quoted = 1;
1045     }
1046 
1047     if (*quoted) {
1048         if (copy_phase)
1049             ADDCH(dialect->quotechar);
1050         else
1051             rec_len += 2;
1052     }
1053     return rec_len;
1054 #undef ADDCH
1055 }
1056 
1057 static int
1058 join_check_rec_size(WriterObj *self, int rec_len)
1059 {
1060 
1061     if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1062         PyErr_NoMemory();
1063         return 0;
1064     }
1065 
1066     if (rec_len > self->rec_size) {
1067         if (self->rec_size == 0) {
1068             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1069             if (self->rec != NULL)
1070                 PyMem_Free(self->rec);
1071             self->rec = PyMem_Malloc(self->rec_size);
1072         }
1073         else {
1074             char *old_rec = self->rec;
1075 
1076             self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077             self->rec = PyMem_Realloc(self->rec, self->rec_size);
1078             if (self->rec == NULL)
1079                 PyMem_Free(old_rec);
1080         }
1081         if (self->rec == NULL) {
1082             PyErr_NoMemory();
1083             return 0;
1084         }
1085     }
1086     return 1;
1087 }
1088 
1089 static int
1090 join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1091 {
1092     int rec_len;
1093 
1094     rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1095     if (rec_len < 0)
1096         return 0;
1097 
1098     /* grow record buffer if necessary */
1099     if (!join_check_rec_size(self, rec_len))
1100         return 0;
1101 
1102     self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1103     self->num_fields++;
1104 
1105     return 1;
1106 }
1107 
1108 static int
1109 join_append_lineterminator(WriterObj *self)
1110 {
1111     int terminator_len;
1112     char *terminator;
1113 
1114     terminator_len = PyString_Size(self->dialect->lineterminator);
1115     if (terminator_len == -1)
1116         return 0;
1117 
1118     /* grow record buffer if necessary */
1119     if (!join_check_rec_size(self, self->rec_len + terminator_len))
1120         return 0;
1121 
1122     terminator = PyString_AsString(self->dialect->lineterminator);
1123     if (terminator == NULL)
1124         return 0;
1125     memmove(self->rec + self->rec_len, terminator, terminator_len);
1126     self->rec_len += terminator_len;
1127 
1128     return 1;
1129 }
1130 
1131 PyDoc_STRVAR(csv_writerow_doc,
1132 "writerow(sequence)\n"
1133 "\n"
1134 "Construct and write a CSV record from a sequence of fields.  Non-string\n"
1135 "elements will be converted to string.");
1136 
1137 static PyObject *
1138 csv_writerow(WriterObj *self, PyObject *seq)
1139 {
1140     DialectObj *dialect = self->dialect;
1141     int len, i;
1142 
1143     if (!PySequence_Check(seq))
1144         return PyErr_Format(error_obj, "sequence expected");
1145 
1146     len = PySequence_Length(seq);
1147     if (len < 0)
1148         return NULL;
1149 
1150     /* Join all fields in internal buffer.
1151      */
1152     join_reset(self);
1153     for (i = 0; i < len; i++) {
1154         PyObject *field;
1155         int append_ok;
1156         int quoted;
1157 
1158         field = PySequence_GetItem(seq, i);
1159         if (field == NULL)
1160             return NULL;
1161 
1162         switch (dialect->quoting) {
1163         case QUOTE_NONNUMERIC:
1164             quoted = !PyNumber_Check(field);
1165             break;
1166         case QUOTE_ALL:
1167             quoted = 1;
1168             break;
1169         default:
1170             quoted = 0;
1171             break;
1172         }
1173 
1174         if (PyString_Check(field)) {
1175             append_ok = join_append(self,
1176                                     PyString_AS_STRING(field),
1177                                     &quoted, len == 1);
1178             Py_DECREF(field);
1179         }
1180         else if (field == Py_None) {
1181             append_ok = join_append(self, "", &quoted, len == 1);
1182             Py_DECREF(field);
1183         }
1184         else {
1185             PyObject *str;
1186 
1187             if (PyFloat_Check(field)) {
1188                 str = PyObject_Repr(field);
1189             } else {
1190                 str = PyObject_Str(field);
1191             }
1192             Py_DECREF(field);
1193             if (str == NULL)
1194                 return NULL;
1195 
1196             append_ok = join_append(self, PyString_AS_STRING(str),
1197                                     &quoted, len == 1);
1198             Py_DECREF(str);
1199         }
1200         if (!append_ok)
1201             return NULL;
1202     }
1203 
1204     /* Add line terminator.
1205      */
1206     if (!join_append_lineterminator(self))
1207         return 0;
1208 
1209     return PyObject_CallFunction(self->writeline,
1210                                  "(s#)", self->rec, self->rec_len);
1211 }
1212 
1213 PyDoc_STRVAR(csv_writerows_doc,
1214 "writerows(sequence of sequences)\n"
1215 "\n"
1216 "Construct and write a series of sequences to a csv file.  Non-string\n"
1217 "elements will be converted to string.");
1218 
1219 static PyObject *
1220 csv_writerows(WriterObj *self, PyObject *seqseq)
1221 {
1222     PyObject *row_iter, *row_obj, *result;
1223 
1224     row_iter = PyObject_GetIter(seqseq);
1225     if (row_iter == NULL) {
1226         PyErr_SetString(PyExc_TypeError,
1227                         "writerows() argument must be iterable");
1228         return NULL;
1229     }
1230     while ((row_obj = PyIter_Next(row_iter))) {
1231         result = csv_writerow(self, row_obj);
1232         Py_DECREF(row_obj);
1233         if (!result) {
1234             Py_DECREF(row_iter);
1235             return NULL;
1236         }
1237         else
1238              Py_DECREF(result);
1239     }
1240     Py_DECREF(row_iter);
1241     if (PyErr_Occurred())
1242         return NULL;
1243     Py_INCREF(Py_None);
1244     return Py_None;
1245 }
1246 
1247 static struct PyMethodDef Writer_methods[] = {
1248     { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1249     { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1250     { NULL, NULL }
1251 };
1252 
1253 #define W_OFF(x) offsetof(WriterObj, x)
1254 
1255 static struct PyMemberDef Writer_memberlist[] = {
1256     { "dialect", T_OBJECT, W_OFF(dialect), RO },
1257     { NULL }
1258 };
1259 
1260 static void
1261 Writer_dealloc(WriterObj *self)
1262 {
1263     PyObject_GC_UnTrack(self);
1264     Py_XDECREF(self->dialect);
1265     Py_XDECREF(self->writeline);
1266     if (self->rec != NULL)
1267         PyMem_Free(self->rec);
1268     PyObject_GC_Del(self);
1269 }
1270 
1271 static int
1272 Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1273 {
1274     Py_VISIT(self->dialect);
1275     Py_VISIT(self->writeline);
1276     return 0;
1277 }
1278 
1279 static int
1280 Writer_clear(WriterObj *self)
1281 {
1282     Py_CLEAR(self->dialect);
1283     Py_CLEAR(self->writeline);
1284     return 0;
1285 }
1286 
1287 PyDoc_STRVAR(Writer_Type_doc,
1288 "CSV writer\n"
1289 "\n"
1290 "Writer objects are responsible for generating tabular data\n"
1291 "in CSV format from sequence input.\n"
1292 );
1293 
1294 static PyTypeObject Writer_Type = {
1295     PyVarObject_HEAD_INIT(NULL, 0)
1296     "_csv.writer",                          /*tp_name*/
1297     sizeof(WriterObj),                      /*tp_basicsize*/
1298     0,                                      /*tp_itemsize*/
1299     /* methods */
1300     (destructor)Writer_dealloc,             /*tp_dealloc*/
1301     (printfunc)0,                           /*tp_print*/
1302     (getattrfunc)0,                         /*tp_getattr*/
1303     (setattrfunc)0,                         /*tp_setattr*/
1304     (cmpfunc)0,                             /*tp_compare*/
1305     (reprfunc)0,                            /*tp_repr*/
1306     0,                                      /*tp_as_number*/
1307     0,                                      /*tp_as_sequence*/
1308     0,                                      /*tp_as_mapping*/
1309     (hashfunc)0,                            /*tp_hash*/
1310     (ternaryfunc)0,                         /*tp_call*/
1311     (reprfunc)0,                            /*tp_str*/
1312     0,                                      /*tp_getattro*/
1313     0,                                      /*tp_setattro*/
1314     0,                                      /*tp_as_buffer*/
1315     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1316         Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/
1317     Writer_Type_doc,
1318     (traverseproc)Writer_traverse,          /*tp_traverse*/
1319     (inquiry)Writer_clear,                  /*tp_clear*/
1320     0,                                      /*tp_richcompare*/
1321     0,                                      /*tp_weaklistoffset*/
1322     (getiterfunc)0,                         /*tp_iter*/
1323     (getiterfunc)0,                         /*tp_iternext*/
1324     Writer_methods,                         /*tp_methods*/
1325     Writer_memberlist,                      /*tp_members*/
1326     0,                                      /*tp_getset*/
1327 };
1328 
1329 static PyObject *
1330 csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1331 {
1332     PyObject * output_file, * dialect = NULL;
1333     WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
1334 
1335     if (!self)
1336         return NULL;
1337 
1338     self->dialect = NULL;
1339     self->writeline = NULL;
1340 
1341     self->rec = NULL;
1342     self->rec_size = 0;
1343     self->rec_len = 0;
1344     self->num_fields = 0;
1345 
1346     if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1347         Py_DECREF(self);
1348         return NULL;
1349     }
1350     self->writeline = PyObject_GetAttrString(output_file, "write");
1351     if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1352         PyErr_SetString(PyExc_TypeError,
1353                         "argument 1 must have a \"write\" method");
1354         Py_DECREF(self);
1355         return NULL;
1356     }
1357     self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1358     if (self->dialect == NULL) {
1359         Py_DECREF(self);
1360         return NULL;
1361     }
1362     PyObject_GC_Track(self);
1363     return (PyObject *)self;
1364 }
1365 
1366 /*
1367  * DIALECT REGISTRY
1368  */
1369 static PyObject *
1370 csv_list_dialects(PyObject *module, PyObject *args)
1371 {
1372     return PyDict_Keys(dialects);
1373 }
1374 
1375 static PyObject *
1376 csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1377 {
1378     PyObject *name_obj, *dialect_obj = NULL;
1379     PyObject *dialect;
1380 
1381     if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1382         return NULL;
1383     if (!IS_BASESTRING(name_obj)) {
1384         PyErr_SetString(PyExc_TypeError,
1385                         "dialect name must be a string or unicode");
1386         return NULL;
1387     }
1388     dialect = _call_dialect(dialect_obj, kwargs);
1389     if (dialect == NULL)
1390         return NULL;
1391     if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1392         Py_DECREF(dialect);
1393         return NULL;
1394     }
1395     Py_DECREF(dialect);
1396     Py_INCREF(Py_None);
1397     return Py_None;
1398 }
1399 
1400 static PyObject *
1401 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
1402 {
1403     if (PyDict_DelItem(dialects, name_obj) < 0)
1404         return PyErr_Format(error_obj, "unknown dialect");
1405     Py_INCREF(Py_None);
1406     return Py_None;
1407 }
1408 
1409 static PyObject *
1410 csv_get_dialect(PyObject *module, PyObject *name_obj)
1411 {
1412     return get_dialect_from_registry(name_obj);
1413 }
1414 
1415 static PyObject *
1416 csv_field_size_limit(PyObject *module, PyObject *args)
1417 {
1418     PyObject *new_limit = NULL;
1419     long old_limit = field_limit;
1420 
1421     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1422         return NULL;
1423     if (new_limit != NULL) {
1424         if (!PyInt_Check(new_limit)) {
1425             PyErr_Format(PyExc_TypeError,
1426                          "limit must be an integer");
1427             return NULL;
1428         }
1429         field_limit = PyInt_AsLong(new_limit);
1430     }
1431     return PyInt_FromLong(old_limit);
1432 }
1433 
1434 /*
1435  * MODULE
1436  */
1437 
1438 PyDoc_STRVAR(csv_module_doc,
1439 "CSV parsing and writing.\n"
1440 "\n"
1441 "This module provides classes that assist in the reading and writing\n"
1442 "of Comma Separated Value (CSV) files, and implements the interface\n"
1443 "described by PEP 305.  Although many CSV files are simple to parse,\n"
1444 "the format is not formally defined by a stable specification and\n"
1445 "is subtle enough that parsing lines of a CSV file with something\n"
1446 "like line.split(\",\") is bound to fail.  The module supports three\n"
1447 "basic APIs: reading, writing, and registration of dialects.\n"
1448 "\n"
1449 "\n"
1450 "DIALECT REGISTRATION:\n"
1451 "\n"
1452 "Readers and writers support a dialect argument, which is a convenient\n"
1453 "handle on a group of settings.  When the dialect argument is a string,\n"
1454 "it identifies one of the dialects previously registered with the module.\n"
1455 "If it is a class or instance, the attributes of the argument are used as\n"
1456 "the settings for the reader or writer:\n"
1457 "\n"
1458 "    class excel:\n"
1459 "        delimiter = ','\n"
1460 "        quotechar = '\"'\n"
1461 "        escapechar = None\n"
1462 "        doublequote = True\n"
1463 "        skipinitialspace = False\n"
1464 "        lineterminator = '\\r\\n'\n"
1465 "        quoting = QUOTE_MINIMAL\n"
1466 "\n"
1467 "SETTINGS:\n"
1468 "\n"
1469 "    * quotechar - specifies a one-character string to use as the \n"
1470 "        quoting character.  It defaults to '\"'.\n"
1471 "    * delimiter - specifies a one-character string to use as the \n"
1472 "        field separator.  It defaults to ','.\n"
1473 "    * skipinitialspace - specifies how to interpret whitespace which\n"
1474 "        immediately follows a delimiter.  It defaults to False, which\n"
1475 "        means that whitespace immediately following a delimiter is part\n"
1476 "        of the following field.\n"
1477 "    * lineterminator -  specifies the character sequence which should \n"
1478 "        terminate rows.\n"
1479 "    * quoting - controls when quotes should be generated by the writer.\n"
1480 "        It can take on any of the following module constants:\n"
1481 "\n"
1482 "        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1483 "            field contains either the quotechar or the delimiter\n"
1484 "        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1485 "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1486 "            fields which do not parse as integers or floating point\n"
1487 "            numbers.\n"
1488 "        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1489 "    * escapechar - specifies a one-character string used to escape \n"
1490 "        the delimiter when quoting is set to QUOTE_NONE.\n"
1491 "    * doublequote - controls the handling of quotes inside fields.  When\n"
1492 "        True, two consecutive quotes are interpreted as one during read,\n"
1493 "        and when writing, each quote character embedded in the data is\n"
1494 "        written as two quotes\n");
1495 
1496 PyDoc_STRVAR(csv_reader_doc,
1497 "    csv_reader = reader(iterable [, dialect='excel']\n"
1498 "                        [optional keyword args])\n"
1499 "    for row in csv_reader:\n"
1500 "        process(row)\n"
1501 "\n"
1502 "The \"iterable\" argument can be any object that returns a line\n"
1503 "of input for each iteration, such as a file object or a list.  The\n"
1504 "optional \"dialect\" parameter is discussed below.  The function\n"
1505 "also accepts optional keyword arguments which override settings\n"
1506 "provided by the dialect.\n"
1507 "\n"
1508 "The returned object is an iterator.  Each iteration returns a row\n"
1509 "of the CSV file (which can span multiple input lines):\n");
1510 
1511 PyDoc_STRVAR(csv_writer_doc,
1512 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1513 "                            [optional keyword args])\n"
1514 "    for row in sequence:\n"
1515 "        csv_writer.writerow(row)\n"
1516 "\n"
1517 "    [or]\n"
1518 "\n"
1519 "    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1520 "                            [optional keyword args])\n"
1521 "    csv_writer.writerows(rows)\n"
1522 "\n"
1523 "The \"fileobj\" argument can be any object that supports the file API.\n");
1524 
1525 PyDoc_STRVAR(csv_list_dialects_doc,
1526 "Return a list of all know dialect names.\n"
1527 "    names = csv.list_dialects()");
1528 
1529 PyDoc_STRVAR(csv_get_dialect_doc,
1530 "Return the dialect instance associated with name.\n"
1531 "    dialect = csv.get_dialect(name)");
1532 
1533 PyDoc_STRVAR(csv_register_dialect_doc,
1534 "Create a mapping from a string name to a dialect class.\n"
1535 "    dialect = csv.register_dialect(name, dialect)");
1536 
1537 PyDoc_STRVAR(csv_unregister_dialect_doc,
1538 "Delete the name/dialect mapping associated with a string name.\n"
1539 "    csv.unregister_dialect(name)");
1540 
1541 PyDoc_STRVAR(csv_field_size_limit_doc,
1542 "Sets an upper limit on parsed fields.\n"
1543 "    csv.field_size_limit([limit])\n"
1544 "\n"
1545 "Returns old limit. If limit is not given, no new limit is set and\n"
1546 "the old limit is returned");
1547 
1548 static struct PyMethodDef csv_methods[] = {
1549     { "reader", (PyCFunction)csv_reader,
1550         METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1551     { "writer", (PyCFunction)csv_writer,
1552         METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1553     { "list_dialects", (PyCFunction)csv_list_dialects,
1554         METH_NOARGS, csv_list_dialects_doc},
1555     { "register_dialect", (PyCFunction)csv_register_dialect,
1556         METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1557     { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1558         METH_O, csv_unregister_dialect_doc},
1559     { "get_dialect", (PyCFunction)csv_get_dialect,
1560         METH_O, csv_get_dialect_doc},
1561     { "field_size_limit", (PyCFunction)csv_field_size_limit,
1562         METH_VARARGS, csv_field_size_limit_doc},
1563     { NULL, NULL }
1564 };
1565 
1566 PyMODINIT_FUNC
1567 init_csv(void)
1568 {
1569     PyObject *module;
1570     StyleDesc *style;
1571 
1572     if (PyType_Ready(&Dialect_Type) < 0)
1573         return;
1574 
1575     if (PyType_Ready(&Reader_Type) < 0)
1576         return;
1577 
1578     if (PyType_Ready(&Writer_Type) < 0)
1579         return;
1580 
1581     /* Create the module and add the functions */
1582     module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1583     if (module == NULL)
1584         return;
1585 
1586     /* Add version to the module. */
1587     if (PyModule_AddStringConstant(module, "__version__",
1588                                    MODULE_VERSION) == -1)
1589         return;
1590 
1591     /* Add _dialects dictionary */
1592     dialects = PyDict_New();
1593     if (dialects == NULL)
1594         return;
1595     if (PyModule_AddObject(module, "_dialects", dialects))
1596         return;
1597 
1598     /* Add quote styles into dictionary */
1599     for (style = quote_styles; style->name; style++) {
1600         if (PyModule_AddIntConstant(module, style->name,
1601                                     style->style) == -1)
1602             return;
1603     }
1604 
1605     /* Add the Dialect type */
1606     Py_INCREF(&Dialect_Type);
1607     if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1608         return;
1609 
1610     /* Add the CSV exception object to the module. */
1611     error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1612     if (error_obj == NULL)
1613         return;
1614     PyModule_AddObject(module, "Error", error_obj);
1615 }