View Javadoc

1   /*
2    * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/util/EncodingUtil.java,v 1.8 2004/05/13 04:01:22 mbecke Exp $
3    * $Revision: 155418 $
4    * $Date: 2005-02-26 08:01:52 -0500 (Sat, 26 Feb 2005) $
5    *
6    * ====================================================================
7    *
8    *  Copyright 1999-2004 The Apache Software Foundation
9    *
10   *  Licensed under the Apache License, Version 2.0 (the "License");
11   *  you may not use this file except in compliance with the License.
12   *  You may obtain a copy of the License at
13   *
14   *      http://www.apache.org/licenses/LICENSE-2.0
15   *
16   *  Unless required by applicable law or agreed to in writing, software
17   *  distributed under the License is distributed on an "AS IS" BASIS,
18   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   *  See the License for the specific language governing permissions and
20   *  limitations under the License.
21   * ====================================================================
22   *
23   * This software consists of voluntary contributions made by many
24   * individuals on behalf of the Apache Software Foundation.  For more
25   * information on the Apache Software Foundation, please see
26   * <http://www.apache.org/>.
27   *
28   */
29  package org.apache.commons.httpclient.util;
30  
31  import java.io.UnsupportedEncodingException;
32  
33  import org.apache.commons.codec.net.URLCodec;
34  import org.apache.commons.httpclient.HttpClientError;
35  import org.apache.commons.httpclient.NameValuePair;
36  import org.apache.commons.logging.Log;
37  import org.apache.commons.logging.LogFactory;
38  
39  /***
40   * The home for utility methods that handle various encoding tasks.
41   * 
42   * @author Michael Becke
43   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
44   * 
45   * @since 2.0 final
46   */
47  public class EncodingUtil {
48  
49      /*** Default content encoding chatset */
50      private static final String DEFAULT_CHARSET = "ISO-8859-1";
51  
52      /*** Log object for this class. */
53      private static final Log LOG = LogFactory.getLog(EncodingUtil.class);
54  
55      /***
56       * Form-urlencoding routine.
57       *
58       * The default encoding for all forms is `application/x-www-form-urlencoded'. 
59       * A form data set is represented in this media type as follows:
60       *
61       * The form field names and values are escaped: space characters are replaced 
62       * by `+', and then reserved characters are escaped as per [URL]; that is, 
63       * non-alphanumeric characters are replaced by `%HH', a percent sign and two 
64       * hexadecimal digits representing the ASCII code of the character. Line breaks, 
65       * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
66       * 
67       * <p>
68       * if the given charset is not supported, ISO-8859-1 is used instead.
69       * </p>
70       * 
71       * @param pairs the values to be encoded
72       * @param charset the character set of pairs to be encoded
73       * 
74       * @return the urlencoded pairs
75       * 
76       * @since 2.0 final
77       */
78       public static String formUrlEncode(NameValuePair[] pairs, String charset) {
79          try {
80              return doFormUrlEncode(pairs, charset);
81          } catch (UnsupportedEncodingException e) {
82              LOG.error("Encoding not supported: " + charset);
83              try {
84                  return doFormUrlEncode(pairs, DEFAULT_CHARSET);
85              } catch (UnsupportedEncodingException fatal) {
86                  // Should never happen. ISO-8859-1 must be supported on all JVMs
87                  throw new HttpClientError("Encoding not supported: " + 
88                      DEFAULT_CHARSET);
89              }
90          }
91      }
92  
93      /***
94       * Form-urlencoding routine.
95       *
96       * The default encoding for all forms is `application/x-www-form-urlencoded'. 
97       * A form data set is represented in this media type as follows:
98       *
99       * The form field names and values are escaped: space characters are replaced 
100      * by `+', and then reserved characters are escaped as per [URL]; that is, 
101      * non-alphanumeric characters are replaced by `%HH', a percent sign and two 
102      * hexadecimal digits representing the ASCII code of the character. Line breaks, 
103      * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
104      * 
105      * @param pairs the values to be encoded
106      * @param charset the character set of pairs to be encoded
107      * 
108      * @return the urlencoded pairs
109      * @throws UnsupportedEncodingException if charset is not supported
110      * 
111      * @since 2.0 final
112      */
113      private static String doFormUrlEncode(NameValuePair[] pairs, String charset)
114         throws UnsupportedEncodingException 
115      {
116         StringBuffer buf = new StringBuffer();
117         for (int i = 0; i < pairs.length; i++) {
118             URLCodec codec = new URLCodec();
119             NameValuePair pair = pairs[i];
120             if (pair.getName() != null) {
121                 if (i > 0) {
122                     buf.append("&");
123                 }
124                 buf.append(codec.encode(pair.getName(), charset));
125                 buf.append("=");
126                 if (pair.getValue() != null) {
127                     buf.append(codec.encode(pair.getValue(), charset));
128                 }
129             }
130         }
131         return buf.toString();
132     }
133     
134     /***
135      * Converts the byte array of HTTP content characters to a string. If
136      * the specified charset is not supported, default system encoding
137      * is used.
138      *
139      * @param data the byte array to be encoded
140      * @param offset the index of the first byte to encode
141      * @param length the number of bytes to encode 
142      * @param charset the desired character encoding
143      * @return The result of the conversion.
144      * 
145      * @since 3.0
146      */
147     public static String getString(
148         final byte[] data, 
149         int offset, 
150         int length, 
151         String charset
152     ) {
153 
154         if (data == null) {
155             throw new IllegalArgumentException("Parameter may not be null");
156         }
157 
158         if (charset == null || charset.length() == 0) {
159             throw new IllegalArgumentException("charset may not be null or empty");
160         }
161 
162         try {
163             return new String(data, offset, length, charset);
164         } catch (UnsupportedEncodingException e) {
165 
166             if (LOG.isWarnEnabled()) {
167                 LOG.warn("Unsupported encoding: " + charset + ". System encoding used");
168             }
169             return new String(data, offset, length);
170         }
171     }
172 
173 
174     /***
175      * Converts the byte array of HTTP content characters to a string. If
176      * the specified charset is not supported, default system encoding
177      * is used.
178      *
179      * @param data the byte array to be encoded
180      * @param charset the desired character encoding
181      * @return The result of the conversion.
182      * 
183      * @since 3.0
184      */
185     public static String getString(final byte[] data, String charset) {
186         return getString(data, 0, data.length, charset);
187     }
188 
189     /***
190      * Converts the specified string to a byte array.  If the charset is not supported the
191      * default system charset is used.
192      *
193      * @param data the string to be encoded
194      * @param charset the desired character encoding
195      * @return The resulting byte array.
196      * 
197      * @since 3.0
198      */
199     public static byte[] getBytes(final String data, String charset) {
200 
201         if (data == null) {
202             throw new IllegalArgumentException("data may not be null");
203         }
204 
205         if (charset == null || charset.length() == 0) {
206             throw new IllegalArgumentException("charset may not be null or empty");
207         }
208 
209         try {
210             return data.getBytes(charset);
211         } catch (UnsupportedEncodingException e) {
212 
213             if (LOG.isWarnEnabled()) {
214                 LOG.warn("Unsupported encoding: " + charset + ". System encoding used.");
215             }
216             
217             return data.getBytes();
218         }
219     }    
220     
221     /***
222      * Converts the specified string to byte array of ASCII characters.
223      *
224      * @param data the string to be encoded
225      * @return The string as a byte array.
226      * 
227      * @since 3.0
228      */
229     public static byte[] getAsciiBytes(final String data) {
230 
231         if (data == null) {
232             throw new IllegalArgumentException("Parameter may not be null");
233         }
234 
235         try {
236             return data.getBytes("US-ASCII");
237         } catch (UnsupportedEncodingException e) {
238             throw new HttpClientError("HttpClient requires ASCII support");
239         }
240     }
241 
242     /***
243      * Converts the byte array of ASCII characters to a string. This method is
244      * to be used when decoding content of HTTP elements (such as response
245      * headers)
246      *
247      * @param data the byte array to be encoded
248      * @param offset the index of the first byte to encode
249      * @param length the number of bytes to encode 
250      * @return The string representation of the byte array
251      * 
252      * @since 3.0
253      */
254     public static String getAsciiString(final byte[] data, int offset, int length) {
255 
256         if (data == null) {
257             throw new IllegalArgumentException("Parameter may not be null");
258         }
259 
260         try {
261             return new String(data, offset, length, "US-ASCII");
262         } catch (UnsupportedEncodingException e) {
263             throw new HttpClientError("HttpClient requires ASCII support");
264         }
265     }
266 
267     /***
268      * Converts the byte array of ASCII characters to a string. This method is
269      * to be used when decoding content of HTTP elements (such as response
270      * headers)
271      *
272      * @param data the byte array to be encoded
273      * @return The string representation of the byte array
274      * 
275      * @since 3.0
276      */
277     public static String getAsciiString(final byte[] data) {
278         return getAsciiString(data, 0, data.length);
279     }
280 
281     /***
282      * This class should not be instantiated.
283      */
284     private EncodingUtil() {
285     }
286 
287 }