1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 package org.apache.commons.httpclient;
31
32 import java.io.ByteArrayOutputStream;
33 import java.io.IOException;
34 import java.io.InputStream;
35
36 import org.apache.commons.httpclient.util.EncodingUtil;
37 import org.apache.commons.httpclient.util.ExceptionUtil;
38 import org.apache.commons.logging.Log;
39 import org.apache.commons.logging.LogFactory;
40
41
42 /***
43 * <p>Transparently coalesces chunks of a HTTP stream that uses
44 * Transfer-Encoding chunked.</p>
45 *
46 * <p>Note that this class NEVER closes the underlying stream, even when close
47 * gets called. Instead, it will read until the "end" of its chunking on close,
48 * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
49 * not requiring the client to remember to read the entire contents of the
50 * response.</p>
51 *
52 * @author Ortwin Glueck
53 * @author Sean C. Sullivan
54 * @author Martin Elwin
55 * @author Eric Johnson
56 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
57 * @author Michael Becke
58 * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
59 *
60 * @since 2.0
61 *
62 */
63 public class ChunkedInputStream extends InputStream {
64 /*** The inputstream that we're wrapping */
65 private InputStream in;
66
67 /*** The chunk size */
68 private int chunkSize;
69
70 /*** The current position within the current chunk */
71 private int pos;
72
73 /*** True if we'are at the beginning of stream */
74 private boolean bof = true;
75
76 /*** True if we've reached the end of stream */
77 private boolean eof = false;
78
79 /*** True if this stream is closed */
80 private boolean closed = false;
81
82 /*** The method that this stream came from */
83 private HttpMethod method = null;
84
85 /*** Log object for this class. */
86 private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class);
87
88 /***
89 * ChunkedInputStream constructor that associates the chunked input stream with a
90 * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod
91 * HTTP method} the chunked input stream originates from. If chunked input stream
92 * contains any footers (trailing headers), they will be added to the associated
93 * {@link HttpMethod HTTP method}.
94 *
95 * @param in the raw input stream
96 * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>.
97 *
98 * @throws IOException If an IO error occurs
99 */
100 public ChunkedInputStream(
101 final InputStream in, final HttpMethod method) throws IOException {
102
103 if (in == null) {
104 throw new IllegalArgumentException("InputStream parameter may not be null");
105 }
106 this.in = in;
107 this.method = method;
108 this.pos = 0;
109 }
110
111 /***
112 * ChunkedInputStream constructor
113 *
114 * @param in the raw input stream
115 *
116 * @throws IOException If an IO error occurs
117 */
118 public ChunkedInputStream(final InputStream in) throws IOException {
119 this(in, null);
120 }
121
122 /***
123 * <p> Returns all the data in a chunked stream in coalesced form. A chunk
124 * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
125 * is detected.</p>
126 *
127 * <p> Trailer headers are read automcatically at the end of the stream and
128 * can be obtained with the getResponseFooters() method.</p>
129 *
130 * @return -1 of the end of the stream has been reached or the next data
131 * byte
132 * @throws IOException If an IO problem occurs
133 *
134 * @see HttpMethod#getResponseFooters()
135 */
136 public int read() throws IOException {
137
138 if (closed) {
139 throw new IOException("Attempted read from closed stream.");
140 }
141 if (eof) {
142 return -1;
143 }
144 if (pos >= chunkSize) {
145 nextChunk();
146 if (eof) {
147 return -1;
148 }
149 }
150 pos++;
151 return in.read();
152 }
153
154 /***
155 * Read some bytes from the stream.
156 * @param b The byte array that will hold the contents from the stream.
157 * @param off The offset into the byte array at which bytes will start to be
158 * placed.
159 * @param len the maximum number of bytes that can be returned.
160 * @return The number of bytes returned or -1 if the end of stream has been
161 * reached.
162 * @see java.io.InputStream#read(byte[], int, int)
163 * @throws IOException if an IO problem occurs.
164 */
165 public int read (byte[] b, int off, int len) throws IOException {
166
167 if (closed) {
168 throw new IOException("Attempted read from closed stream.");
169 }
170
171 if (eof) {
172 return -1;
173 }
174 if (pos >= chunkSize) {
175 nextChunk();
176 if (eof) {
177 return -1;
178 }
179 }
180 len = Math.min(len, chunkSize - pos);
181 int count = in.read(b, off, len);
182 pos += count;
183 return count;
184 }
185
186 /***
187 * Read some bytes from the stream.
188 * @param b The byte array that will hold the contents from the stream.
189 * @return The number of bytes returned or -1 if the end of stream has been
190 * reached.
191 * @see java.io.InputStream#read(byte[])
192 * @throws IOException if an IO problem occurs.
193 */
194 public int read (byte[] b) throws IOException {
195 return read(b, 0, b.length);
196 }
197
198 /***
199 * Read the CRLF terminator.
200 * @throws IOException If an IO error occurs.
201 */
202 private void readCRLF() throws IOException {
203 int cr = in.read();
204 int lf = in.read();
205 if ((cr != '\r') || (lf != '\n')) {
206 throw new IOException(
207 "CRLF expected at end of chunk: " + cr + "/" + lf);
208 }
209 }
210
211
212 /***
213 * Read the next chunk.
214 * @throws IOException If an IO error occurs.
215 */
216 private void nextChunk() throws IOException {
217 if (!bof) {
218 readCRLF();
219 }
220 chunkSize = getChunkSizeFromInputStream(in);
221 bof = false;
222 pos = 0;
223 if (chunkSize == 0) {
224 eof = true;
225 parseTrailerHeaders();
226 }
227 }
228
229 /***
230 * Expects the stream to start with a chunksize in hex with optional
231 * comments after a semicolon. The line must end with a CRLF: "a3; some
232 * comment\r\n" Positions the stream at the start of the next line.
233 *
234 * @param in The new input stream.
235 * @param required <tt>true<tt/> if a valid chunk must be present,
236 * <tt>false<tt/> otherwise.
237 *
238 * @return the chunk size as integer
239 *
240 * @throws IOException when the chunk size could not be parsed
241 */
242 private static int getChunkSizeFromInputStream(final InputStream in)
243 throws IOException {
244
245 ByteArrayOutputStream baos = new ByteArrayOutputStream();
246
247 int state = 0;
248 while (state != -1) {
249 int b = in.read();
250 if (b == -1) {
251 throw new IOException("chunked stream ended unexpectedly");
252 }
253 switch (state) {
254 case 0:
255 switch (b) {
256 case '\r':
257 state = 1;
258 break;
259 case '\"':
260 state = 2;
261
262 default:
263 baos.write(b);
264 }
265 break;
266
267 case 1:
268 if (b == '\n') {
269 state = -1;
270 } else {
271
272 throw new IOException("Protocol violation: Unexpected"
273 + " single newline character in chunk size");
274 }
275 break;
276
277 case 2:
278 switch (b) {
279 case '//':
280 b = in.read();
281 baos.write(b);
282 break;
283 case '\"':
284 state = 0;
285
286 default:
287 baos.write(b);
288 }
289 break;
290 default: throw new RuntimeException("assertion failed");
291 }
292 }
293
294
295 String dataString = EncodingUtil.getAsciiString(baos.toByteArray());
296 int separator = dataString.indexOf(';');
297 dataString = (separator > 0)
298 ? dataString.substring(0, separator).trim()
299 : dataString.trim();
300
301 int result;
302 try {
303 result = Integer.parseInt(dataString.trim(), 16);
304 } catch (NumberFormatException e) {
305 throw new IOException ("Bad chunk size: " + dataString);
306 }
307 return result;
308 }
309
310 /***
311 * Reads and stores the Trailer headers.
312 * @throws IOException If an IO problem occurs
313 */
314 private void parseTrailerHeaders() throws IOException {
315 Header[] footers = null;
316 try {
317 String charset = "US-ASCII";
318 if (this.method != null) {
319 charset = this.method.getParams().getHttpElementCharset();
320 }
321 footers = HttpParser.parseHeaders(in, charset);
322 } catch(HttpException e) {
323 LOG.error("Error parsing trailer headers", e);
324 IOException ioe = new IOException(e.getMessage());
325 ExceptionUtil.initCause(ioe, e);
326 throw ioe;
327 }
328 if (this.method != null) {
329 for (int i = 0; i < footers.length; i++) {
330 this.method.addResponseFooter(footers[i]);
331 }
332 }
333 }
334
335 /***
336 * Upon close, this reads the remainder of the chunked message,
337 * leaving the underlying socket at a position to start reading the
338 * next response without scanning.
339 * @throws IOException If an IO problem occurs.
340 */
341 public void close() throws IOException {
342 if (!closed) {
343 try {
344 if (!eof) {
345 exhaustInputStream(this);
346 }
347 } finally {
348 eof = true;
349 closed = true;
350 }
351 }
352 }
353
354 /***
355 * Exhaust an input stream, reading until EOF has been encountered.
356 *
357 * <p>Note that this function is intended as a non-public utility.
358 * This is a little weird, but it seemed silly to make a utility
359 * class for this one function, so instead it is just static and
360 * shared that way.</p>
361 *
362 * @param inStream The {@link InputStream} to exhaust.
363 * @throws IOException If an IO problem occurs
364 */
365 static void exhaustInputStream(InputStream inStream) throws IOException {
366
367 byte buffer[] = new byte[1024];
368 while (inStream.read(buffer) >= 0) {
369 ;
370 }
371 }
372 }