001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang;
018
019 import java.io.IOException;
020 import java.io.StringWriter;
021 import java.io.Writer;
022
023 import org.apache.commons.lang.exception.NestableRuntimeException;
024
025 /**
026 * <p>Escapes and unescapes <code>String</code>s for
027 * Java, Java Script, HTML, XML, and SQL.</p>
028 *
029 * @author Apache Jakarta Turbine
030 * @author Purple Technology
031 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
032 * @author Antony Riley
033 * @author Helge Tesgaard
034 * @author <a href="sean@boohai.com">Sean Brown</a>
035 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
036 * @author Phil Steitz
037 * @author Pete Gieser
038 * @since 2.0
039 * @version $Id: StringEscapeUtils.java 612880 2008-01-17 17:34:43Z ggregory $
040 */
041 public class StringEscapeUtils {
042
043 private static final char CSV_DELIMITER = ',';
044 private static final char CSV_QUOTE = '"';
045 private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE);
046 private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF};
047
048 /**
049 * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
050 * standard programming.</p>
051 *
052 * <p>Instead, the class should be used as:
053 * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
054 *
055 * <p>This constructor is public to permit tools that require a JavaBean
056 * instance to operate.</p>
057 */
058 public StringEscapeUtils() {
059 super();
060 }
061
062 // Java and JavaScript
063 //--------------------------------------------------------------------------
064 /**
065 * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
066 *
067 * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
068 *
069 * <p>So a tab becomes the characters <code>'\\'</code> and
070 * <code>'t'</code>.</p>
071 *
072 * <p>The only difference between Java strings and JavaScript strings
073 * is that in JavaScript, a single quote must be escaped.</p>
074 *
075 * <p>Example:
076 * <pre>
077 * input string: He didn't say, "Stop!"
078 * output string: He didn't say, \"Stop!\"
079 * </pre>
080 * </p>
081 *
082 * @param str String to escape values in, may be null
083 * @return String with escaped values, <code>null</code> if null string input
084 */
085 public static String escapeJava(String str) {
086 return escapeJavaStyleString(str, false);
087 }
088
089 /**
090 * <p>Escapes the characters in a <code>String</code> using Java String rules to
091 * a <code>Writer</code>.</p>
092 *
093 * <p>A <code>null</code> string input has no effect.</p>
094 *
095 * @see #escapeJava(java.lang.String)
096 * @param out Writer to write escaped string into
097 * @param str String to escape values in, may be null
098 * @throws IllegalArgumentException if the Writer is <code>null</code>
099 * @throws IOException if error occurs on underlying Writer
100 */
101 public static void escapeJava(Writer out, String str) throws IOException {
102 escapeJavaStyleString(out, str, false);
103 }
104
105 /**
106 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
107 * <p>Escapes any values it finds into their JavaScript String form.
108 * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
109 *
110 * <p>So a tab becomes the characters <code>'\\'</code> and
111 * <code>'t'</code>.</p>
112 *
113 * <p>The only difference between Java strings and JavaScript strings
114 * is that in JavaScript, a single quote must be escaped.</p>
115 *
116 * <p>Example:
117 * <pre>
118 * input string: He didn't say, "Stop!"
119 * output string: He didn\'t say, \"Stop!\"
120 * </pre>
121 * </p>
122 *
123 * @param str String to escape values in, may be null
124 * @return String with escaped values, <code>null</code> if null string input
125 */
126 public static String escapeJavaScript(String str) {
127 return escapeJavaStyleString(str, true);
128 }
129
130 /**
131 * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
132 * to a <code>Writer</code>.</p>
133 *
134 * <p>A <code>null</code> string input has no effect.</p>
135 *
136 * @see #escapeJavaScript(java.lang.String)
137 * @param out Writer to write escaped string into
138 * @param str String to escape values in, may be null
139 * @throws IllegalArgumentException if the Writer is <code>null</code>
140 * @throws IOException if error occurs on underlying Writer
141 **/
142 public static void escapeJavaScript(Writer out, String str) throws IOException {
143 escapeJavaStyleString(out, str, true);
144 }
145
146 /**
147 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
148 *
149 * @param str String to escape values in, may be null
150 * @param escapeSingleQuotes escapes single quotes if <code>true</code>
151 * @return the escaped string
152 */
153 private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes) {
154 if (str == null) {
155 return null;
156 }
157 try {
158 StringWriter writer = new StringWriter(str.length() * 2);
159 escapeJavaStyleString(writer, str, escapeSingleQuotes);
160 return writer.toString();
161 } catch (IOException ioe) {
162 // this should never ever happen while writing to a StringWriter
163 ioe.printStackTrace();
164 return null;
165 }
166 }
167
168 /**
169 * <p>Worker method for the {@link #escapeJavaScript(String)} method.</p>
170 *
171 * @param out write to receieve the escaped string
172 * @param str String to escape values in, may be null
173 * @param escapeSingleQuote escapes single quotes if <code>true</code>
174 * @throws IOException if an IOException occurs
175 */
176 private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote) throws IOException {
177 if (out == null) {
178 throw new IllegalArgumentException("The Writer must not be null");
179 }
180 if (str == null) {
181 return;
182 }
183 int sz;
184 sz = str.length();
185 for (int i = 0; i < sz; i++) {
186 char ch = str.charAt(i);
187
188 // handle unicode
189 if (ch > 0xfff) {
190 out.write("\\u" + hex(ch));
191 } else if (ch > 0xff) {
192 out.write("\\u0" + hex(ch));
193 } else if (ch > 0x7f) {
194 out.write("\\u00" + hex(ch));
195 } else if (ch < 32) {
196 switch (ch) {
197 case '\b':
198 out.write('\\');
199 out.write('b');
200 break;
201 case '\n':
202 out.write('\\');
203 out.write('n');
204 break;
205 case '\t':
206 out.write('\\');
207 out.write('t');
208 break;
209 case '\f':
210 out.write('\\');
211 out.write('f');
212 break;
213 case '\r':
214 out.write('\\');
215 out.write('r');
216 break;
217 default :
218 if (ch > 0xf) {
219 out.write("\\u00" + hex(ch));
220 } else {
221 out.write("\\u000" + hex(ch));
222 }
223 break;
224 }
225 } else {
226 switch (ch) {
227 case '\'':
228 if (escapeSingleQuote) {
229 out.write('\\');
230 }
231 out.write('\'');
232 break;
233 case '"':
234 out.write('\\');
235 out.write('"');
236 break;
237 case '\\':
238 out.write('\\');
239 out.write('\\');
240 break;
241 case '/':
242 out.write('\\');
243 out.write('/');
244 break;
245 default :
246 out.write(ch);
247 break;
248 }
249 }
250 }
251 }
252
253 /**
254 * <p>Returns an upper case hexadecimal <code>String</code> for the given
255 * character.</p>
256 *
257 * @param ch The character to convert.
258 * @return An upper case hexadecimal <code>String</code>
259 */
260 private static String hex(char ch) {
261 return Integer.toHexString(ch).toUpperCase();
262 }
263
264 /**
265 * <p>Unescapes any Java literals found in the <code>String</code>.
266 * For example, it will turn a sequence of <code>'\'</code> and
267 * <code>'n'</code> into a newline character, unless the <code>'\'</code>
268 * is preceded by another <code>'\'</code>.</p>
269 *
270 * @param str the <code>String</code> to unescape, may be null
271 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
272 */
273 public static String unescapeJava(String str) {
274 if (str == null) {
275 return null;
276 }
277 try {
278 StringWriter writer = new StringWriter(str.length());
279 unescapeJava(writer, str);
280 return writer.toString();
281 } catch (IOException ioe) {
282 // this should never ever happen while writing to a StringWriter
283 ioe.printStackTrace();
284 return null;
285 }
286 }
287
288 /**
289 * <p>Unescapes any Java literals found in the <code>String</code> to a
290 * <code>Writer</code>.</p>
291 *
292 * <p>For example, it will turn a sequence of <code>'\'</code> and
293 * <code>'n'</code> into a newline character, unless the <code>'\'</code>
294 * is preceded by another <code>'\'</code>.</p>
295 *
296 * <p>A <code>null</code> string input has no effect.</p>
297 *
298 * @param out the <code>Writer</code> used to output unescaped characters
299 * @param str the <code>String</code> to unescape, may be null
300 * @throws IllegalArgumentException if the Writer is <code>null</code>
301 * @throws IOException if error occurs on underlying Writer
302 */
303 public static void unescapeJava(Writer out, String str) throws IOException {
304 if (out == null) {
305 throw new IllegalArgumentException("The Writer must not be null");
306 }
307 if (str == null) {
308 return;
309 }
310 int sz = str.length();
311 StringBuffer unicode = new StringBuffer(4);
312 boolean hadSlash = false;
313 boolean inUnicode = false;
314 for (int i = 0; i < sz; i++) {
315 char ch = str.charAt(i);
316 if (inUnicode) {
317 // if in unicode, then we're reading unicode
318 // values in somehow
319 unicode.append(ch);
320 if (unicode.length() == 4) {
321 // unicode now contains the four hex digits
322 // which represents our unicode character
323 try {
324 int value = Integer.parseInt(unicode.toString(), 16);
325 out.write((char) value);
326 unicode.setLength(0);
327 inUnicode = false;
328 hadSlash = false;
329 } catch (NumberFormatException nfe) {
330 throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
331 }
332 }
333 continue;
334 }
335 if (hadSlash) {
336 // handle an escaped value
337 hadSlash = false;
338 switch (ch) {
339 case '\\':
340 out.write('\\');
341 break;
342 case '\'':
343 out.write('\'');
344 break;
345 case '\"':
346 out.write('"');
347 break;
348 case 'r':
349 out.write('\r');
350 break;
351 case 'f':
352 out.write('\f');
353 break;
354 case 't':
355 out.write('\t');
356 break;
357 case 'n':
358 out.write('\n');
359 break;
360 case 'b':
361 out.write('\b');
362 break;
363 case 'u':
364 {
365 // uh-oh, we're in unicode country....
366 inUnicode = true;
367 break;
368 }
369 default :
370 out.write(ch);
371 break;
372 }
373 continue;
374 } else if (ch == '\\') {
375 hadSlash = true;
376 continue;
377 }
378 out.write(ch);
379 }
380 if (hadSlash) {
381 // then we're in the weird case of a \ at the end of the
382 // string, let's output it anyway.
383 out.write('\\');
384 }
385 }
386
387 /**
388 * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
389 *
390 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
391 * into a newline character, unless the <code>'\'</code> is preceded by another
392 * <code>'\'</code>.</p>
393 *
394 * @see #unescapeJava(String)
395 * @param str the <code>String</code> to unescape, may be null
396 * @return A new unescaped <code>String</code>, <code>null</code> if null string input
397 */
398 public static String unescapeJavaScript(String str) {
399 return unescapeJava(str);
400 }
401
402 /**
403 * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
404 * <code>Writer</code>.</p>
405 *
406 * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
407 * into a newline character, unless the <code>'\'</code> is preceded by another
408 * <code>'\'</code>.</p>
409 *
410 * <p>A <code>null</code> string input has no effect.</p>
411 *
412 * @see #unescapeJava(Writer,String)
413 * @param out the <code>Writer</code> used to output unescaped characters
414 * @param str the <code>String</code> to unescape, may be null
415 * @throws IllegalArgumentException if the Writer is <code>null</code>
416 * @throws IOException if error occurs on underlying Writer
417 */
418 public static void unescapeJavaScript(Writer out, String str) throws IOException {
419 unescapeJava(out, str);
420 }
421
422 // HTML and XML
423 //--------------------------------------------------------------------------
424 /**
425 * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
426 *
427 * <p>
428 * For example:
429 * </p>
430 * <p><code>"bread" & "butter"</code></p>
431 * becomes:
432 * <p>
433 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
434 * </p>
435 *
436 * <p>Supports all known HTML 4.0 entities, including funky accents.
437 * Note that the commonly used apostrophe escape character (&apos;)
438 * is not a legal entity and so is not supported). </p>
439 *
440 * @param str the <code>String</code> to escape, may be null
441 * @return a new escaped <code>String</code>, <code>null</code> if null string input
442 *
443 * @see #unescapeHtml(String)
444 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
445 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
446 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
447 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
448 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
449 */
450 public static String escapeHtml(String str) {
451 if (str == null) {
452 return null;
453 }
454 try {
455 StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
456 escapeHtml(writer, str);
457 return writer.toString();
458 } catch (IOException e) {
459 //assert false;
460 //should be impossible
461 e.printStackTrace();
462 return null;
463 }
464 }
465
466 /**
467 * <p>Escapes the characters in a <code>String</code> using HTML entities and writes
468 * them to a <code>Writer</code>.</p>
469 *
470 * <p>
471 * For example:
472 * </p>
473 * <code>"bread" & "butter"</code>
474 * <p>becomes:</p>
475 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
476 *
477 * <p>Supports all known HTML 4.0 entities, including funky accents.
478 * Note that the commonly used apostrophe escape character (&apos;)
479 * is not a legal entity and so is not supported). </p>
480 *
481 * @param writer the writer receiving the escaped string, not null
482 * @param string the <code>String</code> to escape, may be null
483 * @throws IllegalArgumentException if the writer is null
484 * @throws IOException when <code>Writer</code> passed throws the exception from
485 * calls to the {@link Writer#write(int)} methods.
486 *
487 * @see #escapeHtml(String)
488 * @see #unescapeHtml(String)
489 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
490 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
491 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
492 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
493 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
494 */
495 public static void escapeHtml(Writer writer, String string) throws IOException {
496 if (writer == null ) {
497 throw new IllegalArgumentException ("The Writer must not be null.");
498 }
499 if (string == null) {
500 return;
501 }
502 Entities.HTML40.escape(writer, string);
503 }
504
505 //-----------------------------------------------------------------------
506 /**
507 * <p>Unescapes a string containing entity escapes to a string
508 * containing the actual Unicode characters corresponding to the
509 * escapes. Supports HTML 4.0 entities.</p>
510 *
511 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
512 * will become "<Français>"</p>
513 *
514 * <p>If an entity is unrecognized, it is left alone, and inserted
515 * verbatim into the result string. e.g. "&gt;&zzzz;x" will
516 * become ">&zzzz;x".</p>
517 *
518 * @param str the <code>String</code> to unescape, may be null
519 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
520 * @see #escapeHtml(Writer, String)
521 */
522 public static String unescapeHtml(String str) {
523 if (str == null) {
524 return null;
525 }
526 try {
527 StringWriter writer = new StringWriter ((int)(str.length() * 1.5));
528 unescapeHtml(writer, str);
529 return writer.toString();
530 } catch (IOException e) {
531 //assert false;
532 //should be impossible
533 e.printStackTrace();
534 return null;
535 }
536 }
537
538 /**
539 * <p>Unescapes a string containing entity escapes to a string
540 * containing the actual Unicode characters corresponding to the
541 * escapes. Supports HTML 4.0 entities.</p>
542 *
543 * <p>For example, the string "&lt;Fran&ccedil;ais&gt;"
544 * will become "<Français>"</p>
545 *
546 * <p>If an entity is unrecognized, it is left alone, and inserted
547 * verbatim into the result string. e.g. "&gt;&zzzz;x" will
548 * become ">&zzzz;x".</p>
549 *
550 * @param writer the writer receiving the unescaped string, not null
551 * @param string the <code>String</code> to unescape, may be null
552 * @throws IllegalArgumentException if the writer is null
553 * @throws IOException if an IOException occurs
554 * @see #escapeHtml(String)
555 */
556 public static void unescapeHtml(Writer writer, String string) throws IOException {
557 if (writer == null ) {
558 throw new IllegalArgumentException ("The Writer must not be null.");
559 }
560 if (string == null) {
561 return;
562 }
563 Entities.HTML40.unescape(writer, string);
564 }
565
566 //-----------------------------------------------------------------------
567 /**
568 * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
569 *
570 * <p>For example: <tt>"bread" & "butter"</tt> =>
571 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
572 * </p>
573 *
574 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
575 * Does not support DTDs or external entities.</p>
576 *
577 * <p>Note that unicode characters greater than 0x7f are currently escaped to
578 * their numerical \\u equivalent. This may change in future releases. </p>
579 *
580 * @param writer the writer receiving the unescaped string, not null
581 * @param str the <code>String</code> to escape, may be null
582 * @throws IllegalArgumentException if the writer is null
583 * @throws IOException if there is a problem writing
584 * @see #unescapeXml(java.lang.String)
585 */
586 public static void escapeXml(Writer writer, String str) throws IOException {
587 if (writer == null ) {
588 throw new IllegalArgumentException ("The Writer must not be null.");
589 }
590 if (str == null) {
591 return;
592 }
593 Entities.XML.escape(writer, str);
594 }
595
596 /**
597 * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
598 *
599 * <p>For example: <tt>"bread" & "butter"</tt> =>
600 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
601 * </p>
602 *
603 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
604 * Does not support DTDs or external entities.</p>
605 *
606 * <p>Note that unicode characters greater than 0x7f are currently escaped to
607 * their numerical \\u equivalent. This may change in future releases. </p>
608 *
609 * @param str the <code>String</code> to escape, may be null
610 * @return a new escaped <code>String</code>, <code>null</code> if null string input
611 * @see #unescapeXml(java.lang.String)
612 */
613 public static String escapeXml(String str) {
614 if (str == null) {
615 return null;
616 }
617 return Entities.XML.escape(str);
618 }
619
620 //-----------------------------------------------------------------------
621 /**
622 * <p>Unescapes a string containing XML entity escapes to a string
623 * containing the actual Unicode characters corresponding to the
624 * escapes.</p>
625 *
626 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
627 * Does not support DTDs or external entities.</p>
628 *
629 * <p>Note that numerical \\u unicode codes are unescaped to their respective
630 * unicode characters. This may change in future releases. </p>
631 *
632 * @param writer the writer receiving the unescaped string, not null
633 * @param str the <code>String</code> to unescape, may be null
634 * @throws IllegalArgumentException if the writer is null
635 * @throws IOException if there is a problem writing
636 * @see #escapeXml(String)
637 */
638 public static void unescapeXml(Writer writer, String str) throws IOException {
639 if (writer == null ) {
640 throw new IllegalArgumentException ("The Writer must not be null.");
641 }
642 if (str == null) {
643 return;
644 }
645 Entities.XML.unescape(writer, str);
646 }
647
648 /**
649 * <p>Unescapes a string containing XML entity escapes to a string
650 * containing the actual Unicode characters corresponding to the
651 * escapes.</p>
652 *
653 * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
654 * Does not support DTDs or external entities.</p>
655 *
656 * <p>Note that numerical \\u unicode codes are unescaped to their respective
657 * unicode characters. This may change in future releases. </p>
658 *
659 * @param str the <code>String</code> to unescape, may be null
660 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
661 * @see #escapeXml(String)
662 */
663 public static String unescapeXml(String str) {
664 if (str == null) {
665 return null;
666 }
667 return Entities.XML.unescape(str);
668 }
669
670 //-----------------------------------------------------------------------
671 /**
672 * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
673 * an SQL query.</p>
674 *
675 * <p>For example,
676 * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
677 * StringEscapeUtils.escapeSql("McHale's Navy") +
678 * "'");</pre>
679 * </p>
680 *
681 * <p>At present, this method only turns single-quotes into doubled single-quotes
682 * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
683 * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
684 *
685 * see http://www.jguru.com/faq/view.jsp?EID=8881
686 * @param str the string to escape, may be null
687 * @return a new String, escaped for SQL, <code>null</code> if null string input
688 */
689 public static String escapeSql(String str) {
690 if (str == null) {
691 return null;
692 }
693 return StringUtils.replace(str, "'", "''");
694 }
695
696 //-----------------------------------------------------------------------
697
698 /**
699 * <p>Returns a <code>String</code> value for a CSV column enclosed in double quotes,
700 * if required.</p>
701 *
702 * <p>If the value contains a comma, newline or double quote, then the
703 * String value is returned enclosed in double quotes.</p>
704 * </p>
705 *
706 * <p>Any double quote characters in the value are escaped with another double quote.</p>
707 *
708 * <p>If the value does not contain a comma, newline or double quote, then the
709 * String value is returned unchanged.</p>
710 * </p>
711 *
712 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
713 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
714 *
715 * @param str the input CSV column String, may be null
716 * @return the input String, enclosed in double quotes if the value contains a comma,
717 * newline or double quote, <code>null</code> if null string input
718 * @since 2.4
719 */
720 public static String escapeCsv(String str) {
721 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
722 return str;
723 }
724 try {
725 StringWriter writer = new StringWriter();
726 escapeCsv(writer, str);
727 return writer.toString();
728 } catch (IOException ioe) {
729 // this should never ever happen while writing to a StringWriter
730 ioe.printStackTrace();
731 return null;
732 }
733 }
734
735 /**
736 * <p>Writes a <code>String</code> value for a CSV column enclosed in double quotes,
737 * if required.</p>
738 *
739 * <p>If the value contains a comma, newline or double quote, then the
740 * String value is written enclosed in double quotes.</p>
741 * </p>
742 *
743 * <p>Any double quote characters in the value are escaped with another double quote.</p>
744 *
745 * <p>If the value does not contain a comma, newline or double quote, then the
746 * String value is written unchanged (null values are ignored).</p>
747 * </p>
748 *
749 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
750 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
751 *
752 * @param str the input CSV column String, may be null
753 * @param out Writer to write input string to, enclosed in double quotes if it contains
754 * a comma, newline or double quote
755 * @throws IOException if error occurs on underlying Writer
756 * @since 2.4
757 */
758 public static void escapeCsv(Writer out, String str) throws IOException {
759 if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) {
760 if (str != null) {
761 out.write(str);
762 }
763 return;
764 }
765 out.write(CSV_QUOTE);
766 for (int i = 0; i < str.length(); i++) {
767 char c = str.charAt(i);
768 if (c == CSV_QUOTE) {
769 out.write(CSV_QUOTE); // escape double quote
770 }
771 out.write(c);
772 }
773 out.write(CSV_QUOTE);
774 }
775
776 /**
777 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
778 *
779 * <p>If the value is enclosed in double quotes, and contains a comma, newline
780 * or double quote, then quotes are removed.
781 * </p>
782 *
783 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
784 * to just one double quote. </p>
785 *
786 * <p>If the value is not enclosed in double quotes, or is and does not contain a
787 * comma, newline or double quote, then the String value is returned unchanged.</p>
788 * </p>
789 *
790 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
791 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
792 *
793 * @param str the input CSV column String, may be null
794 * @return the input String, with enclosing double quotes removed and embedded double
795 * quotes unescaped, <code>null</code> if null string input
796 * @since 2.4
797 */
798 public static String unescapeCsv(String str) {
799 if (str == null) {
800 return null;
801 }
802 try {
803 StringWriter writer = new StringWriter();
804 unescapeCsv(writer, str);
805 return writer.toString();
806 } catch (IOException ioe) {
807 // this should never ever happen while writing to a StringWriter
808 ioe.printStackTrace();
809 return null;
810 }
811 }
812
813 /**
814 * <p>Returns a <code>String</code> value for an unescaped CSV column. </p>
815 *
816 * <p>If the value is enclosed in double quotes, and contains a comma, newline
817 * or double quote, then quotes are removed.
818 * </p>
819 *
820 * <p>Any double quote escaped characters (a pair of double quotes) are unescaped
821 * to just one double quote. </p>
822 *
823 * <p>If the value is not enclosed in double quotes, or is and does not contain a
824 * comma, newline or double quote, then the String value is returned unchanged.</p>
825 * </p>
826 *
827 * see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
828 * <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
829 *
830 * @param str the input CSV column String, may be null
831 * @param out Writer to write the input String to, with enclosing double quotes
832 * removed and embedded double quotes unescaped, <code>null</code> if null string input
833 * @throws IOException if error occurs on underlying Writer
834 * @since 2.4
835 */
836 public static void unescapeCsv(Writer out, String str) throws IOException {
837 if (str == null) {
838 return;
839 }
840 if (str.length() < 2) {
841 out.write(str);
842 return;
843 }
844 if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) {
845 out.write(str);
846 return;
847 }
848
849 // strip quotes
850 String quoteless = str.substring(1, str.length() - 1);
851
852 if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) {
853 // deal with escaped quotes; ie) ""
854 str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR);
855 }
856
857 out.write(str);
858 }
859
860 }