00001
00019 #ifdef STANDALONE
00020 #include <sqlite3.h>
00021 #else
00022 #include <sqlite3ext.h>
00023 static SQLITE_EXTENSION_INIT1
00024 #endif
00025
00026 #include <stdio.h>
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <limits.h>
00030
00031 #ifdef _WIN32
00032 #include <windows.h>
00033 #define strcasecmp _stricmp
00034 #define strncasecmp _strnicmp
00035 #endif
00036
00043 typedef struct csv_file {
00044 FILE *f;
00045 char *sep;
00046 char *quot;
00047 int isdos;
00048 int maxl;
00049 char *line;
00050 long pos0;
00051 int maxc;
00052 int ncols;
00053 char **cols;
00054 } csv_file;
00055
00062 typedef struct csv_guess_fmt {
00063 int nlines;
00064 int hist[256];
00065 } csv_guess_fmt;
00066
00073 typedef struct csv_vtab {
00074 sqlite3_vtab vtab;
00075 csv_file *csv;
00076 int convert;
00077 char coltypes[1];
00078 } csv_vtab;
00079
00086 typedef struct {
00087 sqlite3_vtab_cursor cursor;
00088 long pos;
00089 } csv_cursor;
00090
00096 static void
00097 append_free(char **in)
00098 {
00099 long *p = (long *) *in;
00100
00101 if (p) {
00102 p -= 2;
00103 sqlite3_free(p);
00104 *in = 0;
00105 }
00106 }
00107
00117 static char *
00118 append(char **in, char const *append, char quote)
00119 {
00120 long *p = (long *) *in;
00121 long len, maxlen, actlen;
00122 int i;
00123 char *pp;
00124 int nappend = append ? strlen(append) : 0;
00125
00126 if (p) {
00127 p -= 2;
00128 maxlen = p[0];
00129 actlen = p[1];
00130 } else {
00131 maxlen = actlen = 0;
00132 }
00133 len = nappend + actlen;
00134 if (quote) {
00135 len += 2;
00136 for (i = 0; i < nappend; i++) {
00137 if (append[i] == quote) {
00138 len++;
00139 }
00140 }
00141 } else if (!nappend) {
00142 return *in;
00143 }
00144 if (len >= maxlen - 1) {
00145 long *q;
00146
00147 maxlen = (len + 0x03ff) & (~0x3ff);
00148 q = (long *) sqlite3_realloc(p, maxlen + 1 + 2 * sizeof (long));
00149 if (!q) {
00150 return 0;
00151 }
00152 if (!p) {
00153 q[1] = 0;
00154 }
00155 p = q;
00156 p[0] = maxlen;
00157 *in = (char *) (p + 2);
00158 }
00159 pp = *in + actlen;
00160 if (quote) {
00161 *pp++ = quote;
00162 for (i = 0; i < nappend; i++) {
00163 *pp++ = append[i];
00164 if (append[i] == quote) {
00165 *pp++ = quote;
00166 }
00167 }
00168 *pp++ = quote;
00169 *pp = '\0';
00170 } else {
00171 if (nappend) {
00172 memcpy(pp, append, nappend);
00173 pp += nappend;
00174 *pp = '\0';
00175 }
00176 }
00177 p[1] = pp - *in;
00178 return *in;
00179 }
00180
00187 static char *
00188 unquote(char const *in)
00189 {
00190 char c, *ret;
00191 int i;
00192
00193 ret = sqlite3_malloc(strlen(in) + 1);
00194 if (ret) {
00195 c = in[0];
00196 if ((c == '"') || (c == '\'')) {
00197 i = strlen(in + 1);
00198 if ((i > 0) && (in[i] == c)) {
00199 strcpy(ret, in + 1);
00200 ret[i - 1] = '\0';
00201 return ret;
00202 }
00203 }
00204 strcpy(ret, in);
00205 }
00206 return ret;
00207 }
00208
00215 static int
00216 maptype(char const *type)
00217 {
00218 int typelen = type ? strlen(type) : 0;
00219
00220 if ((typelen >= 3) &&
00221 (strncasecmp(type, "integer", 7) == 0)) {
00222 return SQLITE_INTEGER;
00223 }
00224 if ((typelen >= 6) &&
00225 (strncasecmp(type, "double", 6) == 0)) {
00226 return SQLITE_FLOAT;
00227 }
00228 if ((typelen >= 5) &&
00229 (strncasecmp(type, "float", 5) == 0)) {
00230 return SQLITE_FLOAT;
00231 }
00232 if ((typelen >= 4) &&
00233 (strncasecmp(type, "real", 4) == 0)) {
00234 return SQLITE_FLOAT;
00235 }
00236 return SQLITE_TEXT;
00237 }
00238
00245 static void
00246 conv_names(char **names, int ncols)
00247 {
00248 int i;
00249 char *p, *q;
00250 static const char ws[] = "\n\t\r\b\v ";
00251
00252 if (!names || ncols <= 0) {
00253 return;
00254 }
00255 for (i = 0; i < ncols; i++) {
00256 p = names[i];
00257
00258 while (*p) {
00259 if (strchr(ws, *p)) {
00260 *p++ = '_';
00261 q = p;
00262 while (*q && strchr(ws, *q)) {
00263 ++q;
00264 }
00265 if (*q && q > p) {
00266 strcpy(p, q);
00267 }
00268 continue;
00269 }
00270 ++p;
00271 }
00272 }
00273 }
00274
00285 static void
00286 result_or_bind(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00287 char *data, int len, int type)
00288 {
00289 char *endp;
00290
00291 if (!data) {
00292 if (ctx) {
00293 sqlite3_result_null(ctx);
00294 } else {
00295 sqlite3_bind_null(stmt, idx);
00296 }
00297 return;
00298 }
00299 if (type == SQLITE_INTEGER) {
00300 sqlite_int64 val;
00301 #if defined(_WIN32) || defined(_WIN64)
00302 char endc;
00303
00304 if (sscanf(data, "%I64d%c", &val, &endc) == 1) {
00305 if (ctx) {
00306 sqlite3_result_int64(ctx, val);
00307 } else {
00308 sqlite3_bind_int64(stmt, idx, val);
00309 }
00310 return;
00311 }
00312 #else
00313 endp = 0;
00314 #ifdef __osf__
00315 val = strtol(data, &endp, 0);
00316 #else
00317 val = strtoll(data, &endp, 0);
00318 #endif
00319 if (endp && (endp != data) && !*endp) {
00320 if (ctx) {
00321 sqlite3_result_int64(ctx, val);
00322 } else {
00323 sqlite3_bind_int64(stmt, idx, val);
00324 }
00325 return;
00326 }
00327 #endif
00328 } else if (type == SQLITE_FLOAT) {
00329 double val;
00330
00331 endp = 0;
00332 val = strtod(data, &endp);
00333 if (endp && (endp != data) && !*endp) {
00334 if (ctx) {
00335 sqlite3_result_double(ctx, val);
00336 } else {
00337 sqlite3_bind_double(stmt, idx, val);
00338 }
00339 return;
00340 }
00341 }
00342 if (ctx) {
00343 sqlite3_result_text(ctx, data, len, SQLITE_TRANSIENT);
00344 } else {
00345 sqlite3_bind_text(stmt, idx, data, len, SQLITE_TRANSIENT);
00346 }
00347 }
00348
00359 static int
00360 process_col(sqlite3_context *ctx, sqlite3_stmt *stmt, int idx,
00361 char *data, int type, int conv)
00362 {
00363 char c, *p;
00364 const char flchars[] = "Ee+-.,0123456789";
00365
00366 if (!data) {
00367 goto putdata;
00368 }
00369
00370
00371
00372
00373
00374 c = data[0];
00375 if ((c != '\0') && strchr(flchars + 2, c)) {
00376 p = data + 1;
00377 while (*p && strchr(flchars, *p)) {
00378 ++p;
00379 }
00380 if (*p == '\0') {
00381 char *first = 0;
00382 int n = 0;
00383
00384 p = data;
00385 while (p) {
00386 p = strchr(p, ',');
00387 if (!p) {
00388 break;
00389 }
00390 if (++n == 1) {
00391 first = p;
00392 }
00393 ++p;
00394 }
00395 if (first) {
00396 *first = '.';
00397 goto putdata;
00398 }
00399 }
00400 }
00401 if (conv) {
00402 char *utf = sqlite3_malloc(strlen(data) * 2 + 2);
00403
00404 if (utf) {
00405 p = utf;
00406 while ((c = *data) != '\0') {
00407 if (((conv & 10) == 10) && (c == '\\')) {
00408 if (data[1] == 'q') {
00409 *p++ = '\'';
00410 data += 2;
00411 continue;
00412 }
00413 }
00414 if ((conv & 2) && (c == '\\')) {
00415 char c2 = data[1];
00416
00417 switch (c2) {
00418 case '\0':
00419 goto convdone;
00420 case 'n':
00421 *p = '\n';
00422 break;
00423 case 't':
00424 *p = '\t';
00425 break;
00426 case 'r':
00427 *p = '\r';
00428 break;
00429 case 'f':
00430 *p = '\f';
00431 break;
00432 case 'v':
00433 *p = '\v';
00434 break;
00435 case 'b':
00436 *p = '\b';
00437 break;
00438 case 'a':
00439 *p = '\a';
00440 break;
00441 case '?':
00442 *p = '\?';
00443 break;
00444 case '\'':
00445 *p = '\'';
00446 break;
00447 case '"':
00448 *p = '\"';
00449 break;
00450 case '\\':
00451 *p = '\\';
00452 break;
00453 default:
00454 *p++ = c;
00455 *p = c2;
00456 break;
00457 }
00458 p++;
00459 data += 2;
00460 continue;
00461 }
00462 if ((conv & 1) && (c & 0x80)) {
00463 *p++ = 0xc0 | ((c >> 6) & 0x1f);
00464 *p++ = 0x80 | (c & 0x3f);
00465 } else {
00466 *p++ = c;
00467 }
00468 data++;
00469 }
00470 convdone:
00471 *p = '\0';
00472 result_or_bind(ctx, stmt, idx, utf, p - utf, type);
00473 sqlite3_free(utf);
00474 return SQLITE_OK;
00475 } else {
00476 if (ctx) {
00477 sqlite3_result_error(ctx, "out of memory", -1);
00478 }
00479 return SQLITE_NOMEM;
00480 }
00481 }
00482 putdata:
00483 result_or_bind(ctx, stmt, idx, data, -1, type);
00484 return SQLITE_OK;
00485 }
00486
00495 static csv_file *
00496 csv_open(const char *filename, const char *sep, const char *quot)
00497 {
00498 FILE *f;
00499 csv_file *csv;
00500
00501 #ifdef _WIN32
00502 f = fopen(filename, "rb");
00503 #else
00504 f = fopen(filename, "r");
00505 #endif
00506 if (!f) {
00507 return 0;
00508 }
00509 csv = sqlite3_malloc(sizeof (csv_file));
00510 if (!csv) {
00511 error0:
00512 fclose(f);
00513 return 0;
00514 }
00515 csv->f = f;
00516 if (sep && sep[0]) {
00517 csv->sep = sqlite3_malloc(strlen(sep) + 1);
00518 if (!csv->sep) {
00519 error1:
00520 sqlite3_free(csv);
00521 goto error0;
00522 }
00523 strcpy(csv->sep, sep);
00524 } else {
00525 csv->sep = 0;
00526 }
00527 if (quot && quot[0]) {
00528 csv->quot = sqlite3_malloc(strlen(quot) + 1);
00529 if (!csv->quot) {
00530 if (csv->sep) {
00531 sqlite3_free(csv->sep);
00532 }
00533 goto error1;
00534 }
00535 strcpy(csv->quot, quot);
00536 } else {
00537 csv->quot = 0;
00538 }
00539 csv->isdos = 0;
00540 csv->maxl = 0;
00541 csv->line = 0;
00542 csv->pos0 = 0;
00543 csv->maxc = 0;
00544 csv->ncols = 0;
00545 csv->cols = 0;
00546 return csv;
00547 }
00548
00554 static void
00555 csv_close(csv_file *csv)
00556 {
00557 if (csv) {
00558 if (csv->sep) {
00559 sqlite3_free(csv->sep);
00560 }
00561 if (csv->quot) {
00562 sqlite3_free(csv->quot);
00563 }
00564 if (csv->line) {
00565 sqlite3_free(csv->line);
00566 }
00567 if (csv->cols) {
00568 sqlite3_free(csv->cols);
00569 }
00570 if (csv->f) {
00571 fclose(csv->f);
00572 }
00573 sqlite3_free(csv);
00574 }
00575 }
00576
00583 static int
00584 csv_eof(csv_file *csv)
00585 {
00586 if (csv && csv->f) {
00587 return feof(csv->f);
00588 }
00589 return 1;
00590 }
00591
00599 static long
00600 csv_seek(csv_file *csv, long pos)
00601 {
00602 if (csv && csv->f) {
00603 return fseek(csv->f, pos, SEEK_SET);
00604 }
00605 return EOF;
00606 }
00607
00613 static void
00614 csv_rewind(csv_file *csv)
00615 {
00616 if (csv && csv->f) {
00617 csv_seek(csv, csv->pos0);
00618 }
00619 }
00620
00627 static long
00628 csv_tell(csv_file *csv)
00629 {
00630 if (csv && csv->f) {
00631 return ftell(csv->f);
00632 }
00633 return EOF;
00634 }
00635
00643 static int
00644 csv_getline(csv_file *csv, csv_guess_fmt *guess)
00645 {
00646 int i, index = 0, inq = 0, c, col;
00647 char *p, *sep;
00648
00649 if (!csv || !csv->f) {
00650 return EOF;
00651 }
00652 while (1) {
00653 c = fgetc(csv->f);
00654 if (c == EOF) {
00655 if (index > 0) {
00656 break;
00657 }
00658 return EOF;
00659 }
00660 if (c == '\0') {
00661 continue;
00662 }
00663 if (c == '\r') {
00664 int c2 = fgetc(csv->f);
00665 c = '\n';
00666
00667 if (c2 == '\n') {
00668 csv->isdos = 1;
00669 } else if (c2 != EOF) {
00670 ungetc(c2, csv->f);
00671 }
00672 }
00673
00674 if (csv->isdos && (c == '\032')) {
00675 int c2 = fgetc(csv->f);
00676
00677 if (c2 == EOF) {
00678 if (index > 0) {
00679 break;
00680 }
00681 return EOF;
00682 }
00683 ungetc(c2, csv->f);
00684 }
00685 if (index >= csv->maxl - 1) {
00686 int n = csv->maxl * 2;
00687 char *line;
00688
00689 if (n <= 0) {
00690 n = 4096;
00691 }
00692 line = sqlite3_malloc(n);
00693 if (!line) {
00694 return EOF;
00695 }
00696 if (csv->line) {
00697 memcpy(line, csv->line, index);
00698 sqlite3_free(csv->line);
00699 }
00700 csv->maxl = n;
00701 csv->line = line;
00702 }
00703 csv->line[index++] = c;
00704 if (csv->quot && (p = strchr(csv->quot, c))) {
00705 if (inq) {
00706 if (*p == inq) {
00707 inq = 0;
00708 }
00709 } else {
00710 inq = *p;
00711 }
00712 }
00713 if (!inq && (c == '\n')) {
00714 break;
00715 }
00716 }
00717 if (guess) {
00718 for (i = 0; i < index; i++) {
00719 guess->hist[csv->line[i] & 0xFF] += 1;
00720 }
00721 guess->nlines += 1;
00722 csv->ncols = 0;
00723 return 0;
00724 }
00725
00726 for (i = index - 1; i >= 0; i--) {
00727 if (csv->line[i] != '\n') {
00728 break;
00729 }
00730 }
00731 index = i + 1;
00732 csv->line[index] = '\0';
00733 i = inq = col = 0;
00734 sep = csv->sep ? csv->sep : ";";
00735 if (!csv->cols) {
00736 int n = 128;
00737
00738 csv->cols = sqlite3_malloc(sizeof (char *) * n);
00739 if (!csv->cols) {
00740 return EOF;
00741 }
00742 csv->maxc = n;
00743 }
00744 csv->cols[col++] = csv->line;
00745 while (i < index) {
00746 if (csv->quot && (p = strchr(csv->quot, csv->line[i]))) {
00747 if (inq) {
00748 if (*p == inq) {
00749 inq = 0;
00750 }
00751 } else {
00752 inq = *p;
00753 }
00754 }
00755 if (!inq && (p = strchr(sep, csv->line[i]))) {
00756 p = csv->line + i;
00757 *p = '\0';
00758 if (col >= csv->maxc) {
00759 int n = csv->maxc * 2;
00760 char **cols;
00761
00762 cols = sqlite3_realloc(csv->cols, sizeof (char *) * n);
00763 if (!cols) {
00764 return EOF;
00765 }
00766 csv->cols = cols;
00767 csv->maxc = n;
00768 }
00769 csv->cols[col++] = p + 1;
00770 }
00771 ++i;
00772 }
00773 csv->ncols = col;
00774
00775
00776 if (csv->quot) {
00777 for (i = 0; i < col; i++) {
00778 if (*csv->cols[i]) {
00779 p = strchr(csv->quot, *csv->cols[i]);
00780 if (p) {
00781 char *src, *dst;
00782
00783 c = *p;
00784 csv->cols[i] += 1;
00785 sep = csv->cols[i] + strlen(csv->cols[i]) - 1;
00786 if ((sep >= csv->cols[i]) && (*sep == c)) {
00787 *sep = '\0';
00788 }
00789
00790 src = csv->cols[i];
00791 dst = 0;
00792 while (*src) {
00793 if ((*src == c) && (src[1] == c)) {
00794 if (!dst) {
00795 dst = src;
00796 }
00797 src++;
00798 while (*src) {
00799 *dst++ = *src++;
00800 if (*src == c) {
00801 --src;
00802 break;
00803 }
00804 }
00805 }
00806 ++src;
00807 }
00808 if (dst) {
00809 *dst++ = '\0';
00810 }
00811 }
00812 }
00813 }
00814 }
00815 return col;
00816 }
00817
00824 static int
00825 csv_ncols(csv_file *csv)
00826 {
00827 if (csv && csv->cols) {
00828 return csv->ncols;
00829 }
00830 return 0;
00831 }
00832
00840 static char *
00841 csv_coldata(csv_file *csv, int n)
00842 {
00843 if (csv && csv->cols && (n >= 0) && (n < csv->ncols)) {
00844 return csv->cols[n];
00845 }
00846 return 0;
00847 }
00848
00855 static int
00856 csv_guess(csv_file *csv)
00857 {
00858 csv_guess_fmt guess;
00859 int i, n;
00860 char *p, sep[32], quot[4];
00861 const struct {
00862 int c;
00863 int min;
00864 } sep_test[] = {
00865 { ',', 2 },
00866 { ';', 2 },
00867 { '\t', 2 },
00868 { ' ', 4 },
00869 { '|', 2 }
00870 };
00871
00872 if (!csv) {
00873 return EOF;
00874 }
00875 memset(&guess, 0, sizeof (guess));
00876 csv->pos0 = 0;
00877 csv_rewind(csv);
00878 for (i = n = 0; i < 10; i++) {
00879 n = csv_getline(csv, &guess);
00880 if (n == EOF) {
00881 break;
00882 }
00883 }
00884 csv_rewind(csv);
00885 if (n && !i) {
00886 return EOF;
00887 }
00888 p = quot;
00889 n = '"';
00890 if (guess.hist[n] > 1) {
00891 *p++ = n;
00892 }
00893 n = '\'';
00894 if (guess.hist[n] > 1) {
00895 *p++ = n;
00896 }
00897 *p = '\0';
00898 p = sep;
00899 for (i = 0; i < sizeof (sep_test) / sizeof (sep_test[0]); i++) {
00900 if (guess.hist[sep_test[i].c] > sep_test[i].min * guess.nlines) {
00901 *p++ = sep_test[i].c;
00902 }
00903 }
00904 *p = '\0';
00905 if (quot[0]) {
00906 p = sqlite3_malloc(strlen(quot) + 1);
00907 if (p) {
00908 strcpy(p, quot);
00909 if (csv->quot) {
00910 sqlite3_free(csv->quot);
00911 }
00912 csv->quot = p;
00913 } else {
00914 return EOF;
00915 }
00916 }
00917 if (sep[0]) {
00918 p = sqlite3_malloc(strlen(sep) + 1);
00919 if (p) {
00920 strcpy(p, sep);
00921 if (csv->sep) {
00922 sqlite3_free(csv->sep);
00923 }
00924 csv->sep = p;
00925 } else {
00926 return EOF;
00927 }
00928 }
00929 return 0;
00930 }
00931
00965 static int
00966 csv_vtab_connect(sqlite3* db, void *aux, int argc, const char * const *argv,
00967 sqlite3_vtab **vtabp, char **errp)
00968 {
00969 csv_file *csv;
00970 int rc = SQLITE_ERROR, i, ncnames, row1;
00971 char **cnames, *schema = 0, **nargv;
00972 csv_vtab *vtab;
00973
00974 if (argc < 4) {
00975 *errp = sqlite3_mprintf("input file name missing");
00976 return SQLITE_ERROR;
00977 }
00978 nargv = sqlite3_malloc(sizeof (char *) * argc);
00979 memset(nargv, 0, sizeof (char *) * argc);
00980 for (i = 3; i < argc; i++) {
00981 nargv[i] = unquote(argv[i]);
00982 }
00983 csv = csv_open(nargv[3], (argc > 6) ? nargv[6] : 0,
00984 (argc > 7) ? nargv[7] : 0);
00985 if (!csv) {
00986 *errp = sqlite3_mprintf("unable to open input file");
00987 cleanup:
00988 append_free(&schema);
00989 for (i = 3; i < argc; i++) {
00990 if (nargv[i]) {
00991 sqlite3_free(nargv[i]);
00992 }
00993 }
00994 return rc;
00995 }
00996 if (!csv->sep && !csv->quot) {
00997 csv_guess(csv);
00998 }
00999 csv->pos0 = 0;
01000 row1 = 0;
01001 if (argc > 4) {
01002 row1 = strtol(nargv[4], 0, 10);
01003 }
01004 if (row1) {
01005
01006 csv_getline(csv, 0);
01007 if (csv->ncols < 1) {
01008 csv_close(csv);
01009 *errp = sqlite3_mprintf("unable to get column names");
01010 goto cleanup;
01011 }
01012 csv->pos0 = csv_tell(csv);
01013 csv_rewind(csv);
01014 ncnames = csv_ncols(csv);
01015 cnames = csv->cols;
01016 } else if (argc > 8) {
01017 ncnames = argc - 8;
01018 cnames = (char **) nargv + 8;
01019 } else {
01020
01021 csv_getline(csv, 0);
01022 if (csv->ncols < 1) {
01023 csv_close(csv);
01024 *errp = sqlite3_mprintf("unable to get column names");
01025 goto cleanup;
01026 }
01027 csv_rewind(csv);
01028 ncnames = csv_ncols(csv);
01029 cnames = 0;
01030 }
01031 vtab = sqlite3_malloc(sizeof(csv_vtab) + ncnames);
01032 if (!vtab) {
01033 csv_close(csv);
01034 *errp = sqlite3_mprintf("out of memory");
01035 goto cleanup;
01036 }
01037 memset(vtab, 0, sizeof (*vtab));
01038 vtab->convert = 0;
01039 if (argc > 5) {
01040 vtab->convert = strtol(nargv[5], 0, 10);
01041 if (row1 && (vtab->convert & 4)) {
01042 conv_names(cnames, ncnames);
01043 }
01044 }
01045 vtab->csv = csv;
01046 append(&schema, "CREATE TABLE x(", 0);
01047 for (i = 0; i < ncnames; i++) {
01048 vtab->coltypes[i] = SQLITE_TEXT;
01049 if (!cnames || !cnames[i]) {
01050 char colname[64];
01051
01052 sprintf(colname, "column_%d", i + 1);
01053 append(&schema, colname, '"');
01054 } else if (row1 > 0) {
01055 append(&schema, cnames[i], '"');
01056 } else if (row1 < 0) {
01057 append(&schema, cnames[i], '"');
01058 if (i + 8 < argc) {
01059 char *type = nargv[i + 8];
01060
01061 append(&schema, " ", 0);
01062 append(&schema, type, 0);
01063 vtab->coltypes[i] = maptype(type);
01064 }
01065 } else {
01066 char *type = cnames[i];
01067
01068 append(&schema, cnames[i], 0);
01069 while (*type && !strchr(" \t", *type)) {
01070 type++;
01071 }
01072 while (*type && strchr(" \t", *type)) {
01073 type++;
01074 }
01075 vtab->coltypes[i] = maptype(type);
01076 }
01077 if (i < ncnames - 1) {
01078 append(&schema, ",", 0);
01079 }
01080 }
01081 append(&schema, ")", 0);
01082 rc = sqlite3_declare_vtab(db, schema);
01083 if (rc != SQLITE_OK) {
01084 csv_close(csv);
01085 sqlite3_free(vtab);
01086 *errp = sqlite3_mprintf("table definition failed, error %d, "
01087 "schema '%s'", rc, schema);
01088 goto cleanup;
01089 }
01090 *vtabp = &vtab->vtab;
01091 *errp = 0;
01092 goto cleanup;
01093 }
01094
01106 static int
01107 csv_vtab_create(sqlite3* db, void *aux, int argc,
01108 const char *const *argv,
01109 sqlite3_vtab **vtabp, char **errp)
01110 {
01111 return csv_vtab_connect(db, aux, argc, argv, vtabp, errp);
01112 }
01113
01120 static int
01121 csv_vtab_disconnect(sqlite3_vtab *vtab)
01122 {
01123 csv_vtab *tab = (csv_vtab *) vtab;
01124
01125 csv_close(tab->csv);
01126 sqlite3_free(tab);
01127 return SQLITE_OK;
01128 }
01129
01136 static int
01137 csv_vtab_destroy(sqlite3_vtab *vtab)
01138 {
01139 return csv_vtab_disconnect(vtab);
01140 }
01141
01149 static int
01150 csv_vtab_bestindex(sqlite3_vtab *vtab, sqlite3_index_info *info)
01151 {
01152 return SQLITE_OK;
01153 }
01154
01162 static int
01163 csv_vtab_open(sqlite3_vtab *vtab, sqlite3_vtab_cursor **cursorp)
01164 {
01165 csv_cursor *cur = sqlite3_malloc(sizeof(*cur));
01166 csv_vtab *tab = (csv_vtab *) vtab;
01167
01168 if (!cur) {
01169 return SQLITE_ERROR;
01170 }
01171 cur->cursor.pVtab = vtab;
01172 csv_rewind(tab->csv);
01173 cur->pos = csv_tell(tab->csv);
01174 *cursorp = &cur->cursor;
01175 return SQLITE_OK;
01176 }
01177
01184 static int
01185 csv_vtab_close(sqlite3_vtab_cursor *cursor)
01186 {
01187 sqlite3_free(cursor);
01188 return SQLITE_OK;
01189 }
01190
01197 static int
01198 csv_vtab_next(sqlite3_vtab_cursor *cursor)
01199 {
01200 csv_cursor *cur = (csv_cursor *) cursor;
01201 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01202
01203 cur->pos = csv_tell(tab->csv);
01204 csv_getline(tab->csv, 0);
01205 return SQLITE_OK;
01206 }
01207
01218 static int
01219 csv_vtab_filter(sqlite3_vtab_cursor *cursor, int idxNum,
01220 const char *idxStr, int argc, sqlite3_value **argv)
01221 {
01222 csv_cursor *cur = (csv_cursor *) cursor;
01223 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01224
01225 csv_rewind(tab->csv);
01226 return csv_vtab_next(cursor);
01227 }
01228
01235 static int
01236 csv_vtab_eof(sqlite3_vtab_cursor *cursor)
01237 {
01238 csv_cursor *cur = (csv_cursor *) cursor;
01239 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01240
01241 return csv_eof(tab->csv);
01242 }
01243
01252 static int
01253 csv_vtab_column(sqlite3_vtab_cursor *cursor, sqlite3_context *ctx, int n)
01254 {
01255 csv_cursor *cur = (csv_cursor *) cursor;
01256 csv_vtab *tab = (csv_vtab *) cur->cursor.pVtab;
01257 char *data = csv_coldata(tab->csv, n);
01258
01259 return process_col(ctx, 0, 0, data, tab->coltypes[n], tab->convert);
01260 }
01261
01269 static int
01270 csv_vtab_rowid(sqlite3_vtab_cursor *cursor, sqlite_int64 *rowidp)
01271 {
01272 csv_cursor *cur = (csv_cursor *) cursor;
01273
01274 *rowidp = cur->pos;
01275 return SQLITE_OK;
01276 }
01277
01278 #if (SQLITE_VERSION_NUMBER > 3004000)
01279
01286 static int
01287 csv_vtab_rename(sqlite3_vtab *vtab, const char *newname)
01288 {
01289 return SQLITE_OK;
01290 }
01291
01292 #endif
01293
01298 static const sqlite3_module csv_vtab_mod = {
01299 1,
01300 csv_vtab_create,
01301 csv_vtab_connect,
01302 csv_vtab_bestindex,
01303 csv_vtab_disconnect,
01304 csv_vtab_destroy,
01305 csv_vtab_open,
01306 csv_vtab_close,
01307 csv_vtab_filter,
01308 csv_vtab_next,
01309 csv_vtab_eof,
01310 csv_vtab_column,
01311 csv_vtab_rowid,
01312 0,
01313 0,
01314 0,
01315 0,
01316 0,
01317 0,
01318 #if (SQLITE_VERSION_NUMBER > 3004000)
01319 csv_vtab_rename,
01320 #endif
01321 };
01322
01350 static void
01351 csv_import_func(sqlite3_context *ctx, int argc, sqlite3_value **argv)
01352 {
01353 csv_file *csv;
01354 int rc, i, ncnames, row1, convert = 0, useargs = 0;
01355 char *tname, *fname, *sql = 0, **cnames, *coltypes = 0;
01356 sqlite3 *db = (sqlite3 *) sqlite3_user_data(ctx);
01357 sqlite3_stmt *stmt = 0;
01358
01359 if (argc < 2) {
01360 sqlite3_result_error(ctx, "need at least 2 arguments", -1);
01361 return;
01362 }
01363 tname = (char *) sqlite3_value_text(argv[0]);
01364 if (!tname) {
01365 sqlite3_result_error(ctx, "table name is NULL", -1);
01366 return;
01367 }
01368 fname = (char *) sqlite3_value_text(argv[1]);
01369 if (!fname) {
01370 sqlite3_result_error(ctx, "file name is NULL", -1);
01371 return;
01372 }
01373 csv = csv_open(fname,
01374 (argc > 4) ? (char *) sqlite3_value_text(argv[4]) : 0,
01375 (argc > 5) ? (char *) sqlite3_value_text(argv[5]) : 0);
01376 if (!csv) {
01377 sqlite3_result_error(ctx, "unable to open input file", -1);
01378 cleanup:
01379 if (stmt) {
01380 sqlite3_finalize(stmt);
01381 }
01382 append_free(&sql);
01383 if (coltypes) {
01384 sqlite3_free(coltypes);
01385 }
01386 if (csv) {
01387 csv_close(csv);
01388 }
01389 return;
01390 }
01391 if (!csv->sep && !csv->quot) {
01392 csv_guess(csv);
01393 }
01394 csv->pos0 = 0;
01395 row1 = 0;
01396 if (argc > 2) {
01397 row1 = sqlite3_value_int(argv[2]);
01398 }
01399 if (row1) {
01400
01401 csv_getline(csv, 0);
01402 if (csv->ncols < 1) {
01403 sqlite3_result_error(ctx, "unable to get column names", -1);
01404 goto cleanup;
01405 }
01406 csv->pos0 = csv_tell(csv);
01407 csv_rewind(csv);
01408 ncnames = csv_ncols(csv);
01409 cnames = csv->cols;
01410 } else if (argc > 6) {
01411 ncnames = argc - 6;
01412 cnames = 0;
01413 useargs = 1;
01414 } else {
01415
01416 csv_getline(csv, 0);
01417 if (csv->ncols < 1) {
01418 sqlite3_result_error(ctx, "unable to get column names", -1);
01419 goto cleanup;
01420 }
01421 csv_rewind(csv);
01422 ncnames = csv_ncols(csv);
01423 cnames = 0;
01424 }
01425 convert = 0;
01426 if (argc > 3) {
01427 convert = sqlite3_value_int(argv[3]);
01428 if (row1 && (convert & 4)) {
01429 conv_names(cnames, ncnames);
01430 }
01431 }
01432
01433 append(&sql, "PRAGMA table_info(", 0);
01434 append(&sql, tname, '"');
01435 append(&sql, ")", 0);
01436 if (!sql) {
01437 oom:
01438 sqlite3_result_error(ctx, "out of memory", -1);
01439 goto cleanup;
01440 }
01441 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01442 append_free(&sql);
01443 if (rc != SQLITE_OK) {
01444 prepfail:
01445 sqlite3_result_error(ctx, "prepare failed", -1);
01446 goto cleanup;
01447 }
01448
01449 i = 0;
01450 rc = sqlite3_step(stmt);
01451 while (rc == SQLITE_ROW) {
01452 i++;
01453 rc = sqlite3_step(stmt);
01454 }
01455 if (rc != SQLITE_DONE) {
01456 selfail:
01457 sqlite3_result_error(ctx, "select failed", -1);
01458 goto cleanup;
01459 }
01460 if (i > 0) {
01461
01462 sqlite3_reset(stmt);
01463 ncnames = i;
01464 coltypes = sqlite3_malloc(ncnames);
01465 if (!coltypes) {
01466 goto oom;
01467 }
01468 rc = sqlite3_step(stmt);
01469 i = 0;
01470 while (rc == SQLITE_ROW) {
01471 coltypes[i++] = maptype((char *) sqlite3_column_text(stmt, 2));
01472 rc = sqlite3_step(stmt);
01473 }
01474 if (rc != SQLITE_DONE) {
01475 goto selfail;
01476 }
01477 } else {
01478
01479 sqlite3_finalize(stmt);
01480 stmt = 0;
01481 coltypes = sqlite3_malloc(ncnames);
01482 if (!coltypes) {
01483 goto oom;
01484 }
01485 append(&sql, "CREATE TABLE ", 0);
01486 append(&sql, tname, '"');
01487 append(&sql, "(", 0);
01488 for (i = 0; i < ncnames; i++) {
01489 char colname[64];
01490
01491 coltypes[i] = SQLITE_TEXT;
01492 if (useargs) {
01493 char *type = (char *) sqlite3_value_text(argv[i + 6]);
01494
01495 if (!type) {
01496 goto defcol;
01497 }
01498 append(&sql, type, 0);
01499 while (*type && !strchr(" \t", *type)) {
01500 type++;
01501 }
01502 while (*type && strchr(" \t", *type)) {
01503 type++;
01504 }
01505 coltypes[i] = maptype(type);
01506 } else if (!cnames || !cnames[i]) {
01507 defcol:
01508 sprintf(colname, "column_%d", i + 1);
01509 append(&sql, colname, '"');
01510 } else if (row1 > 0) {
01511 append(&sql, cnames[i], '"');
01512 } else if (row1 < 0) {
01513 append(&sql, cnames[i], '"');
01514 if (i + 6 < argc) {
01515 char *type = (char *) sqlite3_value_text(argv[i + 6]);
01516
01517 if (type) {
01518 append(&sql, " ", 0);
01519 append(&sql, type, 0);
01520 coltypes[i] = maptype(type);
01521 }
01522 }
01523 }
01524 if (i < ncnames - 1) {
01525 append(&sql, ",", 0);
01526 }
01527 }
01528 append(&sql, ")", 0);
01529 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01530 if (rc != SQLITE_OK) {
01531 goto prepfail;
01532 }
01533 rc = sqlite3_step(stmt);
01534 if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01535 sqlite3_result_error(ctx, "create table failed", -1);
01536 goto cleanup;
01537 }
01538 append_free(&sql);
01539 }
01540 sqlite3_finalize(stmt);
01541 stmt = 0;
01542
01543 append(&sql, "INSERT INTO ", 0);
01544 append(&sql, tname, '"');
01545 append(&sql, " VALUES(", 0);
01546 for (i = 0; i < ncnames; i++) {
01547 append(&sql, (i < ncnames - 1) ? "?," : "?)", 0);
01548 }
01549 rc = sqlite3_prepare_v2(db, sql, -1, &stmt, 0);
01550 if (rc != SQLITE_OK) {
01551 goto prepfail;
01552 }
01553 append_free(&sql);
01554
01555 row1 = 0;
01556 while (csv_getline(csv, 0) != EOF) {
01557 for (i = 0; i < ncnames; i++) {
01558 char *data = csv_coldata(csv, i);
01559
01560 rc = process_col(0, stmt, i + 1, data, coltypes[i], convert);
01561 if (rc != SQLITE_OK) {
01562 goto inserr;
01563 }
01564 }
01565 rc = sqlite3_step(stmt);
01566 if ((rc != SQLITE_DONE) && (rc != SQLITE_OK)) {
01567 if ((rc != SQLITE_MISMATCH) && (rc != SQLITE_CONSTRAINT)) {
01568 inserr:
01569 sqlite3_result_error(ctx, "insert failed", -1);
01570 goto cleanup;
01571 }
01572 } else {
01573 row1++;
01574 }
01575 sqlite3_reset(stmt);
01576 }
01577 sqlite3_result_int(ctx, row1);
01578 goto cleanup;
01579 }
01580
01587 #ifndef STANDALONE
01588 static
01589 #endif
01590 int
01591 csv_vtab_init(sqlite3 *db)
01592 {
01593 sqlite3_create_function(db, "import_csv", -1, SQLITE_UTF8,
01594 (void *) db, csv_import_func, 0, 0);
01595 return sqlite3_create_module(db, "csvtable", &csv_vtab_mod, 0);
01596 }
01597
01598 #ifndef STANDALONE
01599
01608 int
01609 sqlite3_extension_init(sqlite3 *db, char **errmsg,
01610 const sqlite3_api_routines *api)
01611 {
01612 SQLITE_EXTENSION_INIT2(api);
01613 return csv_vtab_init(db);
01614 }
01615
01616 #endif