EscapeUnicodeReader.java

  1. /*
  2.  * Copyright 2014 Polago AB.
  3.  *
  4.  * Licensed under the Apache License, Version 2.0 (the "License");
  5.  * you may not use this file except in compliance with the License.
  6.  * You may obtain a copy of the License at
  7.  *
  8.  *      http://www.apache.org/licenses/LICENSE-2.0
  9.  *
  10.  * Unless required by applicable law or agreed to in writing, software
  11.  * distributed under the License is distributed on an "AS IS" BASIS,
  12.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13.  * See the License for the specific language governing permissions and
  14.  * limitations under the License.
  15.  */

  16. package org.polago.maven.shared.filtering.escapeunicode;

  17. import java.io.IOException;
  18. import java.io.Reader;

  19. /**
  20.  * A Reader that translates all non-ASCII characters to the corresponding java
  21.  * escape sequence.
  22.  */
  23. public class EscapeUnicodeReader extends Reader {

  24.     private static final String UNICODE_PLACEHOLDER = "u0000";

  25.     private final Reader reader;

  26.     private StringBuilder unicodeBuffer;

  27.     /**
  28.      * Public Constructor.
  29.      *
  30.      * @param reader the Reader to wrap
  31.      */
  32.     public EscapeUnicodeReader(Reader reader) {
  33.         this.reader = reader;
  34.         unicodeBuffer = new StringBuilder();
  35.     }

  36.     @Override
  37.     public int read(char[] cbuf, int off, int len) throws IOException {
  38.         for (int i = 0; i < len; i++) {
  39.             int ch = readChar();
  40.             if (ch == -1 && i > 0) {
  41.                 return i;
  42.             } else if (ch == -1) {
  43.                 return -1;
  44.             } else {
  45.                 cbuf[off + i] = (char) ch;
  46.             }
  47.         }

  48.         return len;
  49.     }

  50.     @Override
  51.     public void close() throws IOException {
  52.         reader.close();
  53.     }

  54.     /**
  55.      * Read a single char from the wrapped Reader and handle any non-ascii
  56.      * chars.
  57.      *
  58.      * @return a simgle character or -1 if EOF
  59.      * @throws IOException indicating IO Error
  60.      */
  61.     private int readChar() throws IOException {
  62.         int ch = -1;
  63.         if (unicodeBuffer.length() == 0) {
  64.             ch = reader.read();
  65.             if (ch != -1) {
  66.                 char c = (char) ch;
  67.                 if (c >= '\u0080') {
  68.                     unicodeBuffer = escapeUnicode(c);
  69.                     ch = '\\';
  70.                 }
  71.             }
  72.         } else {
  73.             ch = unicodeBuffer.charAt(0);
  74.             unicodeBuffer.deleteCharAt(0);
  75.         }

  76.         return ch;
  77.     }

  78.     /**
  79.      * Escape a single Unicode character to a Java escape sequence.
  80.      *
  81.      * @param ch the characater to process
  82.      * @return a StringBuilder containing the escape sequence
  83.      */
  84.     private StringBuilder escapeUnicode(char ch) {

  85.         StringBuilder result = new StringBuilder(UNICODE_PLACEHOLDER);

  86.         String s = Integer.toHexString(ch);

  87.         for (int i = 0; i < s.length(); i++) {
  88.             result.setCharAt(result.length() - s.length() + i, s.charAt(i));
  89.         }

  90.         return result;
  91.     }

  92. }