Skip to content

Commit

Permalink
Fix #31: add CsvParser.Feature.TRIM_HEADER_SPACES
Browse files Browse the repository at this point in the history
  • Loading branch information
cowtowncoder committed Jan 5, 2025
1 parent c78cfc4 commit c2d3ab2
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,29 @@ public enum Feature
* values (white space outside of double-quotes is never included regardless
* of trimming).
*<p>
* NOTE: this setting has no effect on header rows: see {@link #TRIM_HEADER_SPACES}
* for corresponding setting.
*<p>
* Default value is false, as per <a href="http://tools.ietf.org/html/rfc4180">RFC-4180</a>.
*/
TRIM_SPACES(false),

/**
* Feature determines whether spaces around separator characters
* (commas) in header line entries (header names) are to be automatically
* trimmed before being reported or not.
* Note that this does NOT force trimming of possible white space from
* within double-quoted values, but only those surrounding unquoted
* values (white space outside of double-quotes is never included regardless
* of trimming).
*<p>
* Default value is {@code true} for backwards compatibility (before 2.19 trimming
* was always performed)
*
* @since 2.19
*/
TRIM_HEADER_SPACES(true),

/**
* Feature that determines how stream of records (usually CSV lines, but sometimes
* multiple lines when linefeeds are included in quoted values) is exposed:
Expand Down Expand Up @@ -898,10 +917,13 @@ protected void _readHeaderLine() throws IOException {
CsvSchema.Builder builder = _schema.rebuild().clearColumns();
int count = 0;

final boolean trimHeaderNames = Feature.TRIM_HEADER_SPACES.enabledIn(_formatFeatures);
while ((name = _reader.nextString()) != null) {
// one more thing: always trim names, regardless of config settings
// TODO!!! [dataformats-text#31]: Allow disabling of trimming
name = name.trim();
// [dataformats-text#31]: Allow disabling of trimming
if (trimHeaderNames) {
name = name.trim();
}
// See if "old" schema defined type; if so, use that type...
CsvSchema.Column prev = _schema.column(name);
if (prev != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,30 @@ public void testSimpleHeader() throws Exception
}

// But! Can change header name trimming:
// [dataformats-text#31]: Allow disabling header name trimming
// [dataformats-text#31]: Allow disabling header row trimming
try (CsvParser parser = (CsvParser) MAPPER.reader()
.without(CsvParser.Feature.TRIM_HEADER_SPACES)
.createParser(
"name, age,other \nfoo,2,xyz\n")) {
// need to enable first-line-as-schema handling:
parser.setSchema(CsvSchema.emptySchema().withHeader());
assertToken(JsonToken.START_OBJECT, parser.nextToken());
CsvSchema schema = parser.getSchema();
assertEquals(3, schema.size());

// Verify header names are NOT trimmed when disabled
assertEquals("name", schema.columnName(0));
assertEquals(" age", schema.columnName(1));
assertEquals("other ", schema.columnName(2));

assertEquals("name", parser.nextFieldName());
assertEquals("foo", parser.nextTextValue());
assertEquals(" age", parser.nextFieldName());
assertEquals("2", parser.nextTextValue());
assertEquals("other ", parser.nextFieldName());
assertEquals("xyz", parser.nextTextValue());
assertToken(JsonToken.END_OBJECT, parser.nextToken());
}
}

public void testSimpleQuotes() throws Exception
Expand Down
4 changes: 4 additions & 0 deletions release-notes/CREDITS-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,7 @@ Burdyug Pavel (@Pavel38l)
* Reported #485: (csv) CSVDecoder: No Long and Int out of range exceptions
(2.18.0)

Robert DiFalco (@rdifalco)

* Reported #31: Header names seem to be trimmed
(2.19.0)
2 changes: 2 additions & 0 deletions release-notes/VERSION-2.x
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ Active Maintainers:

2.19.0 (not yet released)

#31: Header names seem to be trimmed
(reported by Robert D)
#502: Add an optional extended parser subclass (`YAMLAnchorReplayingFactory`)
able to inline anchors
(contributed by Heiko B)
Expand Down

0 comments on commit c2d3ab2

Please sign in to comment.