1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.apache.commons.httpclient.util;
31
32 import java.util.BitSet;
33
34 import org.apache.commons.codec.DecoderException;
35 import org.apache.commons.codec.net.URLCodec;
36 import org.apache.commons.httpclient.URI;
37 import org.apache.commons.httpclient.URIException;
38
39 /***
40 * The URI escape and character encoding and decoding utility.
41 * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
42 * than {@link org.apache.commons.httpclient.URI}.
43 *
44 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
45 * @version $Revision: 155418 $ $Date: 2002/03/14 15:14:01
46 */
47 public class URIUtil {
48
49
50
51 protected static final BitSet empty = new BitSet(1);
52
53
54
55 /***
56 * Get the basename of an URI. It's possibly an empty string.
57 *
58 * @param uri a string regarded an URI
59 * @return the basename string; an empty string if the path ends with slash
60 */
61 public static String getName(String uri) {
62 if (uri == null || uri.length() == 0) { return uri; }
63 String path = URIUtil.getPath(uri);
64 int at = path.lastIndexOf("/");
65 int to = path.length();
66 return (at >= 0) ? path.substring(at + 1, to) : path;
67 }
68
69
70 /***
71 * Get the query of an URI.
72 *
73 * @param uri a string regarded an URI
74 * @return the query string; <code>null</code> if empty or undefined
75 */
76 public static String getQuery(String uri) {
77 if (uri == null || uri.length() == 0) { return null; }
78
79 int at = uri.indexOf("//");
80 int from = uri.indexOf(
81 "/",
82 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
83 );
84
85 int to = uri.length();
86
87 at = uri.indexOf("?", from);
88 if (at >= 0) {
89 from = at + 1;
90 } else {
91 return null;
92 }
93
94 if (uri.lastIndexOf("#") > from) {
95 to = uri.lastIndexOf("#");
96 }
97
98 return (from < 0 || from == to) ? null : uri.substring(from, to);
99 }
100
101
102 /***
103 * Get the path of an URI.
104 *
105 * @param uri a string regarded an URI
106 * @return the path string
107 */
108 public static String getPath(String uri) {
109 if (uri == null) {
110 return null;
111 }
112
113 int at = uri.indexOf("//");
114 int from = uri.indexOf(
115 "/",
116 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
117 );
118
119 int to = uri.length();
120
121 if (uri.indexOf('?', from) != -1) {
122 to = uri.indexOf('?', from);
123 }
124
125 if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
126 to = uri.lastIndexOf("#");
127 }
128
129 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
130 }
131
132
133 /***
134 * Get the path and query of an URI.
135 *
136 * @param uri a string regarded an URI
137 * @return the path and query string
138 */
139 public static String getPathQuery(String uri) {
140 if (uri == null) {
141 return null;
142 }
143
144 int at = uri.indexOf("//");
145 int from = uri.indexOf(
146 "/",
147 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
148 );
149
150 int to = uri.length();
151
152
153 if (uri.lastIndexOf("#") > from) {
154 to = uri.lastIndexOf("#");
155 }
156
157 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
158 }
159
160
161 /***
162 * Get the path of an URI and its rest part.
163 *
164 * @param uri a string regarded an URI
165 * @return the string from the path part
166 */
167 public static String getFromPath(String uri) {
168 if (uri == null) {
169 return null;
170 }
171
172 int at = uri.indexOf("//");
173 int from = uri.indexOf(
174 "/",
175 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
176 );
177
178 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
179 }
180
181
182
183 /***
184 * Get the all escaped and encoded string with the default protocl charset.
185 * It's the same function to use <code>encode(String unescaped, Bitset
186 * empty, URI.getDefaultProtocolCharset())</code>.
187 *
188 * @param unescaped an unescaped string
189 * @return the escaped string
190 *
191 * @throws URIException if the default protocol charset is not supported
192 *
193 * @see URI#getDefaultProtocolCharset
194 * @see #encode
195 */
196 public static String encodeAll(String unescaped) throws URIException {
197 return encodeAll(unescaped, URI.getDefaultProtocolCharset());
198 }
199
200
201 /***
202 * Get the all escaped and encoded string with a given charset.
203 * It's the same function to use <code>encode(String unescaped, Bitset
204 * empty, String charset)</code>.
205 *
206 * @param unescaped an unescaped string
207 * @param charset the charset
208 * @return the escaped string
209 *
210 * @throws URIException if the charset is not supported
211 *
212 * @see #encode
213 */
214 public static String encodeAll(String unescaped, String charset)
215 throws URIException {
216
217 return encode(unescaped, empty, charset);
218 }
219
220
221 /***
222 * Escape and encode a string regarded as within the authority component of
223 * an URI with the default protocol charset.
224 * Within the authority component, the characters ";", ":", "@", "?", and
225 * "/" are reserved.
226 *
227 * @param unescaped an unescaped string
228 * @return the escaped string
229 *
230 * @throws URIException if the default protocol charset is not supported
231 *
232 * @see URI#getDefaultProtocolCharset
233 * @see #encode
234 */
235 public static String encodeWithinAuthority(String unescaped)
236 throws URIException {
237
238 return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
239 }
240
241
242 /***
243 * Escape and encode a string regarded as within the authority component of
244 * an URI with a given charset.
245 * Within the authority component, the characters ";", ":", "@", "?", and
246 * "/" are reserved.
247 *
248 * @param unescaped an unescaped string
249 * @param charset the charset
250 * @return the escaped string
251 *
252 * @throws URIException if the charset is not supported
253 *
254 * @see #encode
255 */
256 public static String encodeWithinAuthority(String unescaped, String charset)
257 throws URIException {
258
259 return encode(unescaped, URI.allowed_within_authority, charset);
260 }
261
262
263 /***
264 * Escape and encode a string regarded as the path and query components of
265 * an URI with the default protocol charset.
266 *
267 * @param unescaped an unescaped string
268 * @return the escaped string
269 *
270 * @throws URIException if the default protocol charset is not supported
271 *
272 * @see URI#getDefaultProtocolCharset
273 * @see #encode
274 */
275 public static String encodePathQuery(String unescaped) throws URIException {
276 return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
277 }
278
279
280 /***
281 * Escape and encode a string regarded as the path and query components of
282 * an URI with a given charset.
283 *
284 * @param unescaped an unescaped string
285 * @param charset the charset
286 * @return the escaped string
287 *
288 * @throws URIException if the charset is not supported
289 *
290 * @see #encode
291 */
292 public static String encodePathQuery(String unescaped, String charset)
293 throws URIException {
294
295 int at = unescaped.indexOf('?');
296 if (at < 0) {
297 return encode(unescaped, URI.allowed_abs_path, charset);
298 }
299
300 return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
301 + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
302 }
303
304
305 /***
306 * Escape and encode a string regarded as within the path component of an
307 * URI with the default protocol charset.
308 * The path may consist of a sequence of path segments separated by a
309 * single slash "/" character. Within a path segment, the characters
310 * "/", ";", "=", and "?" are reserved.
311 *
312 * @param unescaped an unescaped string
313 * @return the escaped string
314 *
315 * @throws URIException if the default protocol charset is not supported
316 *
317 * @see URI#getDefaultProtocolCharset
318 * @see #encode
319 */
320 public static String encodeWithinPath(String unescaped)
321 throws URIException {
322
323 return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
324 }
325
326
327 /***
328 * Escape and encode a string regarded as within the path component of an
329 * URI with a given charset.
330 * The path may consist of a sequence of path segments separated by a
331 * single slash "/" character. Within a path segment, the characters
332 * "/", ";", "=", and "?" are reserved.
333 *
334 * @param unescaped an unescaped string
335 * @param charset the charset
336 * @return the escaped string
337 *
338 * @throws URIException if the charset is not supported
339 *
340 * @see #encode
341 */
342 public static String encodeWithinPath(String unescaped, String charset)
343 throws URIException {
344
345 return encode(unescaped, URI.allowed_within_path, charset);
346 }
347
348
349 /***
350 * Escape and encode a string regarded as the path component of an URI with
351 * the default protocol charset.
352 *
353 * @param unescaped an unescaped string
354 * @return the escaped string
355 *
356 * @throws URIException if the default protocol charset is not supported
357 *
358 * @see URI#getDefaultProtocolCharset
359 * @see #encode
360 */
361 public static String encodePath(String unescaped) throws URIException {
362 return encodePath(unescaped, URI.getDefaultProtocolCharset());
363 }
364
365
366 /***
367 * Escape and encode a string regarded as the path component of an URI with
368 * a given charset.
369 *
370 * @param unescaped an unescaped string
371 * @param charset the charset
372 * @return the escaped string
373 *
374 * @throws URIException if the charset is not supported
375 *
376 * @see #encode
377 */
378 public static String encodePath(String unescaped, String charset)
379 throws URIException {
380
381 return encode(unescaped, URI.allowed_abs_path, charset);
382 }
383
384
385 /***
386 * Escape and encode a string regarded as within the query component of an
387 * URI with the default protocol charset.
388 * When a query comprise the name and value pairs, it is used in order
389 * to encode each name and value string. The reserved special characters
390 * within a query component are being included in encoding the query.
391 *
392 * @param unescaped an unescaped string
393 * @return the escaped string
394 *
395 * @throws URIException if the default protocol charset is not supported
396 *
397 * @see URI#getDefaultProtocolCharset
398 * @see #encode
399 */
400 public static String encodeWithinQuery(String unescaped)
401 throws URIException {
402
403 return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
404 }
405
406
407 /***
408 * Escape and encode a string regarded as within the query component of an
409 * URI with a given charset.
410 * When a query comprise the name and value pairs, it is used in order
411 * to encode each name and value string. The reserved special characters
412 * within a query component are being included in encoding the query.
413 *
414 * @param unescaped an unescaped string
415 * @param charset the charset
416 * @return the escaped string
417 *
418 * @throws URIException if the charset is not supported
419 *
420 * @see #encode
421 */
422 public static String encodeWithinQuery(String unescaped, String charset)
423 throws URIException {
424
425 return encode(unescaped, URI.allowed_within_query, charset);
426 }
427
428
429 /***
430 * Escape and encode a string regarded as the query component of an URI with
431 * the default protocol charset.
432 * When a query string is not misunderstood the reserved special characters
433 * ("&", "=", "+", ",", and "$") within a query component, this method
434 * is recommended to use in encoding the whole query.
435 *
436 * @param unescaped an unescaped string
437 * @return the escaped string
438 *
439 * @throws URIException if the default protocol charset is not supported
440 *
441 * @see URI#getDefaultProtocolCharset
442 * @see #encode
443 */
444 public static String encodeQuery(String unescaped) throws URIException {
445 return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
446 }
447
448
449 /***
450 * Escape and encode a string regarded as the query component of an URI with
451 * a given charset.
452 * When a query string is not misunderstood the reserved special characters
453 * ("&", "=", "+", ",", and "$") within a query component, this method
454 * is recommended to use in encoding the whole query.
455 *
456 * @param unescaped an unescaped string
457 * @param charset the charset
458 * @return the escaped string
459 *
460 * @throws URIException if the charset is not supported
461 *
462 * @see #encode
463 */
464 public static String encodeQuery(String unescaped, String charset)
465 throws URIException {
466
467 return encode(unescaped, URI.allowed_query, charset);
468 }
469
470
471 /***
472 * Escape and encode a given string with allowed characters not to be
473 * escaped and the default protocol charset.
474 *
475 * @param unescaped a string
476 * @param allowed allowed characters not to be escaped
477 * @return the escaped string
478 *
479 * @throws URIException if the default protocol charset is not supported
480 *
481 * @see URI#getDefaultProtocolCharset
482 */
483 public static String encode(String unescaped, BitSet allowed)
484 throws URIException {
485
486 return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
487 }
488
489
490 /***
491 * Escape and encode a given string with allowed characters not to be
492 * escaped and a given charset.
493 *
494 * @param unescaped a string
495 * @param allowed allowed characters not to be escaped
496 * @param charset the charset
497 * @return the escaped string
498 */
499 public static String encode(String unescaped, BitSet allowed,
500 String charset) throws URIException {
501 byte[] rawdata = URLCodec.encodeUrl(allowed,
502 EncodingUtil.getBytes(unescaped, charset));
503 return EncodingUtil.getAsciiString(rawdata);
504 }
505
506
507 /***
508 * Unescape and decode a given string regarded as an escaped string with the
509 * default protocol charset.
510 *
511 * @param escaped a string
512 * @return the unescaped string
513 *
514 * @throws URIException if the string cannot be decoded (invalid)
515 *
516 * @see URI#getDefaultProtocolCharset
517 */
518 public static String decode(String escaped) throws URIException {
519 try {
520 byte[] rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(escaped));
521 return EncodingUtil.getString(rawdata, URI.getDefaultProtocolCharset());
522 } catch (DecoderException e) {
523 throw new URIException(e.getMessage());
524 }
525 }
526
527 /***
528 * Unescape and decode a given string regarded as an escaped string.
529 *
530 * @param escaped a string
531 * @param charset the charset
532 * @return the unescaped string
533 *
534 * @throws URIException if the charset is not supported
535 *
536 * @see Coder#decode
537 */
538 public static String decode(String escaped, String charset)
539 throws URIException {
540
541 return Coder.decode(escaped.toCharArray(), charset);
542 }
543
544
545
546 /***
547 * The basic and internal utility for URI escape and character encoding and
548 * decoding.
549 *
550 * @deprecated use org.apache.commons.codec.net.URLCodec
551 */
552 protected static class Coder extends URI {
553
554 /***
555 * Escape and encode a given string with allowed characters not to be
556 * escaped.
557 *
558 * @param unescapedComponent an unescaped component
559 * @param allowed allowed characters not to be escaped
560 * @param charset the charset to encode
561 * @return the escaped and encoded string
562 *
563 * @throws URIException if the charset is not supported
564 *
565 * @deprecated use org.apache.commons.codec.net.URLCodec
566 */
567 public static char[] encode(String unescapedComponent, BitSet allowed, String charset)
568 throws URIException {
569
570 return URI.encode(unescapedComponent, allowed, charset);
571 }
572
573
574 /***
575 * Unescape and decode a given string.
576 *
577 * @param escapedComponent an being-unescaped component
578 * @param charset the charset to decode
579 * @return the escaped and encoded string
580 *
581 * @throws URIException if the charset is not supported
582 *
583 * @deprecated use org.apache.commons.codec.net.URLCodec
584 */
585 public static String decode(char[] escapedComponent, String charset)
586 throws URIException {
587
588 return URI.decode(escapedComponent, charset);
589 }
590
591
592 /***
593 * Verify whether a given string is escaped or not
594 *
595 * @param original given characters
596 * @return true if the given character array is 7 bit ASCII-compatible.
597 */
598 public static boolean verifyEscaped(char[] original) {
599 for (int i = 0; i < original.length; i++) {
600 int c = original[i];
601 if (c > 128) {
602 return false;
603 } else if (c == '%') {
604 if (Character.digit(original[++i], 16) == -1
605 || Character.digit(original[++i], 16) == -1) {
606 return false;
607 }
608 }
609 }
610 return true;
611 }
612
613
614 /***
615 * Replace from a given character to given character in an array order
616 * for a given string.
617 *
618 * @param original a given string
619 * @param from a replacing character array
620 * @param to a replaced character array
621 * @return the replaced string
622 */
623 public static String replace(String original, char[] from, char[] to) {
624 for (int i = from.length; i > 0; --i) {
625 original = replace(original, from[i], to[i]);
626 }
627 return original.toString();
628 }
629
630
631 /***
632 * Replace from a given character to given character for a given string.
633 *
634 * @param original a given string
635 * @param from a replacing character array
636 * @param to a replaced character array
637 * @return the replaced string
638 */
639 public static String replace(String original, char from, char to) {
640 StringBuffer result = new StringBuffer(original.length());
641 int at, saved = 0;
642 do {
643 at = original.indexOf(from);
644 if (at >= 0) {
645 result.append(original.substring(0, at));
646 result.append(to);
647 } else {
648 result.append(original.substring(saved));
649 }
650 saved = at;
651 } while (at >= 0);
652 return result.toString();
653 }
654 }
655
656 }
657