public import "ecere"
+public int UnescapeString(char * d, char * s, int len)
+{
+ int j = 0, k = 0;
+ char ch;
+ while(j < len && (ch = s[j]))
+ {
+ switch(ch)
+ {
+ case '\\':
+ switch((ch = s[++j]))
+ {
+ case 'n': d[k] = '\n'; break;
+ case 't': d[k] = '\t'; break;
+ case 'a': d[k] = '\a'; break;
+ case 'b': d[k] = '\b'; break;
+ case 'f': d[k] = '\f'; break;
+ case 'r': d[k] = '\r'; break;
+ case 'v': d[k] = '\v'; break;
+ case '\\': d[k] = '\\'; break;
+ case '\"': d[k] = '\"'; break;
+ case '\'': d[k] = '\''; break;
+ default: d[k] = '\\'; d[k] = ch;
+ }
+ break;
+ default:
+ d[k] = ch;
+ }
+ j++, k++;
+ }
+ d[k] = '\0';
+ return k;
+}
+
// to be moved in ecere?
public class FileHandler
{
char valueQuotes;
int expectedFieldCount;
bool tolerateNewLineInValues;
+ bool escaped;
+ bool lastFieldEndsWithNewLine;
//bool checkNulls;
//bool checkCurlies;
};
public class CSVParser : public FileHandler
{
public:
- CSVParserParameters options;
+ CSVParserParameters options { ',', '\"', 0, false };
CSVParserState info;
void PrintMessage(typed_object object, ...)
OnMessage(buffer);
}
- virtual bool OnMessage(String message)
+ virtual void OnMessage(const String message)
{
::PrintLn(this._class.name, ": ", message,
" lineNum=", info.lineNum,
virtual void Process()
{
bool quoted = false, status = true;
+ bool escaped = false;
Array<String> values { };
+ bool started = false;
int start = 0, end = 0;
int readCount = 0;
Array<char> buffer { minAllocSize = 4096 };
- //info.charNum = 0;
+ info.charNum = 0;
info.lineNum = 0;
info.rowNum = 0;
info.fieldNum = 0;
{
int c, offset = 0;
- if(start)
+ if(started)
{
offset = readCount - start;
if(offset > buffer.minAllocSize / 2)
char ch = buffer[c];
if(quoted)
{
- if(ch == options.valueQuotes)
+ // For Git import...
+ bool inTextQuote = false;
+
+ if(options.lastFieldEndsWithNewLine && info.fieldNum == options.expectedFieldCount - 1 && ch == '\"' && info.charNum > 0)
+ inTextQuote = true;
+
+ if(!inTextQuote && !escaped && ch == options.valueQuotes)
{
- quoted = false;
- end = c;
+ if(buffer[c+1] == options.valueQuotes)
+ c++;
+ else
+ {
+ quoted = false;
+ end = c;
+ }
}
+ if(!escaped && options.escaped && ch == '\\')
+ escaped = true;
+ else
+ escaped = false;
}
else
{
{
quoted = true;
start = c + 1;
+ started = true;
}
//else if(ch == options.fieldSeparator || ch == '\n')
else if(ch == options.fieldSeparator ||
- (ch == '\n' && (!options.tolerateNewLineInValues || info.fieldNum == options.expectedFieldCount-1)))
+ (ch == '\n' && (!options.tolerateNewLineInValues || info.fieldNum >= options.expectedFieldCount-1)))
{
- int len = end-start;
- String value = new char[len+1];
- memcpy(value, &buffer[start], len);
- value[len] = 0;
- values.Add(value);
+ if(values.count < options.expectedFieldCount)
+ {
+ int len = started ? (end-start) : 0;
+ String value = new char[len+1];
+ if(options.escaped) // Escaped with a backslash
+ UnescapeString(value, &buffer[start], len);
+ else
+ {
+ String dq;
+ memcpy(value, &buffer[start], len);
+ value[len] = 0;
+ while((dq = strstr(value, "\"\"")))
+ {
+ memmove(dq + 1, dq + 2, len - (uint)(dq + 2 - value) + 1);
+ len--;
+ value[len] = 0;
+ }
+ }
+ values.Add(value);
+ }
start = end = 0;
+ started = false;
info.fieldNum++;
if(ch == '\n')
{
info.rowNum++;
status = OnRowStrings(values);
values.Free();
- //info.charNum = 0;
info.fieldNum = 0;
}
}
else if(ch == '\r');
else
{
- if(!start)
+ if(!started)
+ {
start = c;
- end = c;
+ started = true;
+ }
+ end = c+1;
}
}
- //info.charNum++;
+ if(ch == '\r' || ch == '\n')
+ info.charNum = 0;
+ else
+ info.charNum++;
}
}
if(end > start)
{
int len = end-start;
+
String value = new char[len+1];
- memcpy(value, &buffer[start], len);
- value[len] = 0;
+ if(options.escaped)
+ UnescapeString(value, &buffer[start], len);
+ else
+ {
+ String dq;
+ memcpy(value, &buffer[start], len);
+ value[len] = 0;
+ while((dq = strstr(value, "\"\"")))
+ {
+ memmove(dq + 1, dq + 2, len - (uint)(dq + 2 - value) + 1);
+ len--;
+ value[len] = 0;
+ }
+ }
values.Add(value);
}
if(values.count && status)