1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 package org.apache.commons.httpclient.util;
30
31 import java.io.UnsupportedEncodingException;
32
33 import org.apache.commons.codec.net.URLCodec;
34 import org.apache.commons.httpclient.HttpClientError;
35 import org.apache.commons.httpclient.NameValuePair;
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38
39 /***
40 * The home for utility methods that handle various encoding tasks.
41 *
42 * @author Michael Becke
43 * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
44 *
45 * @since 2.0 final
46 */
47 public class EncodingUtil {
48
49 /*** Default content encoding chatset */
50 private static final String DEFAULT_CHARSET = "ISO-8859-1";
51
52 /*** Log object for this class. */
53 private static final Log LOG = LogFactory.getLog(EncodingUtil.class);
54
55 /***
56 * Form-urlencoding routine.
57 *
58 * The default encoding for all forms is `application/x-www-form-urlencoded'.
59 * A form data set is represented in this media type as follows:
60 *
61 * The form field names and values are escaped: space characters are replaced
62 * by `+', and then reserved characters are escaped as per [URL]; that is,
63 * non-alphanumeric characters are replaced by `%HH', a percent sign and two
64 * hexadecimal digits representing the ASCII code of the character. Line breaks,
65 * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
66 *
67 * <p>
68 * if the given charset is not supported, ISO-8859-1 is used instead.
69 * </p>
70 *
71 * @param pairs the values to be encoded
72 * @param charset the character set of pairs to be encoded
73 *
74 * @return the urlencoded pairs
75 *
76 * @since 2.0 final
77 */
78 public static String formUrlEncode(NameValuePair[] pairs, String charset) {
79 try {
80 return doFormUrlEncode(pairs, charset);
81 } catch (UnsupportedEncodingException e) {
82 LOG.error("Encoding not supported: " + charset);
83 try {
84 return doFormUrlEncode(pairs, DEFAULT_CHARSET);
85 } catch (UnsupportedEncodingException fatal) {
86
87 throw new HttpClientError("Encoding not supported: " +
88 DEFAULT_CHARSET);
89 }
90 }
91 }
92
93 /***
94 * Form-urlencoding routine.
95 *
96 * The default encoding for all forms is `application/x-www-form-urlencoded'.
97 * A form data set is represented in this media type as follows:
98 *
99 * The form field names and values are escaped: space characters are replaced
100 * by `+', and then reserved characters are escaped as per [URL]; that is,
101 * non-alphanumeric characters are replaced by `%HH', a percent sign and two
102 * hexadecimal digits representing the ASCII code of the character. Line breaks,
103 * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
104 *
105 * @param pairs the values to be encoded
106 * @param charset the character set of pairs to be encoded
107 *
108 * @return the urlencoded pairs
109 * @throws UnsupportedEncodingException if charset is not supported
110 *
111 * @since 2.0 final
112 */
113 private static String doFormUrlEncode(NameValuePair[] pairs, String charset)
114 throws UnsupportedEncodingException
115 {
116 StringBuffer buf = new StringBuffer();
117 for (int i = 0; i < pairs.length; i++) {
118 URLCodec codec = new URLCodec();
119 NameValuePair pair = pairs[i];
120 if (pair.getName() != null) {
121 if (i > 0) {
122 buf.append("&");
123 }
124 buf.append(codec.encode(pair.getName(), charset));
125 buf.append("=");
126 if (pair.getValue() != null) {
127 buf.append(codec.encode(pair.getValue(), charset));
128 }
129 }
130 }
131 return buf.toString();
132 }
133
134 /***
135 * Converts the byte array of HTTP content characters to a string. If
136 * the specified charset is not supported, default system encoding
137 * is used.
138 *
139 * @param data the byte array to be encoded
140 * @param offset the index of the first byte to encode
141 * @param length the number of bytes to encode
142 * @param charset the desired character encoding
143 * @return The result of the conversion.
144 *
145 * @since 3.0
146 */
147 public static String getString(
148 final byte[] data,
149 int offset,
150 int length,
151 String charset
152 ) {
153
154 if (data == null) {
155 throw new IllegalArgumentException("Parameter may not be null");
156 }
157
158 if (charset == null || charset.length() == 0) {
159 throw new IllegalArgumentException("charset may not be null or empty");
160 }
161
162 try {
163 return new String(data, offset, length, charset);
164 } catch (UnsupportedEncodingException e) {
165
166 if (LOG.isWarnEnabled()) {
167 LOG.warn("Unsupported encoding: " + charset + ". System encoding used");
168 }
169 return new String(data, offset, length);
170 }
171 }
172
173
174 /***
175 * Converts the byte array of HTTP content characters to a string. If
176 * the specified charset is not supported, default system encoding
177 * is used.
178 *
179 * @param data the byte array to be encoded
180 * @param charset the desired character encoding
181 * @return The result of the conversion.
182 *
183 * @since 3.0
184 */
185 public static String getString(final byte[] data, String charset) {
186 return getString(data, 0, data.length, charset);
187 }
188
189 /***
190 * Converts the specified string to a byte array. If the charset is not supported the
191 * default system charset is used.
192 *
193 * @param data the string to be encoded
194 * @param charset the desired character encoding
195 * @return The resulting byte array.
196 *
197 * @since 3.0
198 */
199 public static byte[] getBytes(final String data, String charset) {
200
201 if (data == null) {
202 throw new IllegalArgumentException("data may not be null");
203 }
204
205 if (charset == null || charset.length() == 0) {
206 throw new IllegalArgumentException("charset may not be null or empty");
207 }
208
209 try {
210 return data.getBytes(charset);
211 } catch (UnsupportedEncodingException e) {
212
213 if (LOG.isWarnEnabled()) {
214 LOG.warn("Unsupported encoding: " + charset + ". System encoding used.");
215 }
216
217 return data.getBytes();
218 }
219 }
220
221 /***
222 * Converts the specified string to byte array of ASCII characters.
223 *
224 * @param data the string to be encoded
225 * @return The string as a byte array.
226 *
227 * @since 3.0
228 */
229 public static byte[] getAsciiBytes(final String data) {
230
231 if (data == null) {
232 throw new IllegalArgumentException("Parameter may not be null");
233 }
234
235 try {
236 return data.getBytes("US-ASCII");
237 } catch (UnsupportedEncodingException e) {
238 throw new HttpClientError("HttpClient requires ASCII support");
239 }
240 }
241
242 /***
243 * Converts the byte array of ASCII characters to a string. This method is
244 * to be used when decoding content of HTTP elements (such as response
245 * headers)
246 *
247 * @param data the byte array to be encoded
248 * @param offset the index of the first byte to encode
249 * @param length the number of bytes to encode
250 * @return The string representation of the byte array
251 *
252 * @since 3.0
253 */
254 public static String getAsciiString(final byte[] data, int offset, int length) {
255
256 if (data == null) {
257 throw new IllegalArgumentException("Parameter may not be null");
258 }
259
260 try {
261 return new String(data, offset, length, "US-ASCII");
262 } catch (UnsupportedEncodingException e) {
263 throw new HttpClientError("HttpClient requires ASCII support");
264 }
265 }
266
267 /***
268 * Converts the byte array of ASCII characters to a string. This method is
269 * to be used when decoding content of HTTP elements (such as response
270 * headers)
271 *
272 * @param data the byte array to be encoded
273 * @return The string representation of the byte array
274 *
275 * @since 3.0
276 */
277 public static String getAsciiString(final byte[] data) {
278 return getAsciiString(data, 0, data.length);
279 }
280
281 /***
282 * This class should not be instantiated.
283 */
284 private EncodingUtil() {
285 }
286
287 }