View Javadoc

1   /*
2    * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/ChunkedInputStream.java,v 1.24 2004/10/10 15:18:55 olegk Exp $
3    * $Revision: 291181 $
4    * $Date: 2005-09-23 14:13:25 -0400 (Fri, 23 Sep 2005) $
5    *
6    * ====================================================================
7    *
8    *  Copyright 2002-2004 The Apache Software Foundation
9    *
10   *  Licensed under the Apache License, Version 2.0 (the "License");
11   *  you may not use this file except in compliance with the License.
12   *  You may obtain a copy of the License at
13   *
14   *      http://www.apache.org/licenses/LICENSE-2.0
15   *
16   *  Unless required by applicable law or agreed to in writing, software
17   *  distributed under the License is distributed on an "AS IS" BASIS,
18   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19   *  See the License for the specific language governing permissions and
20   *  limitations under the License.
21   * ====================================================================
22   *
23   * This software consists of voluntary contributions made by many
24   * individuals on behalf of the Apache Software Foundation.  For more
25   * information on the Apache Software Foundation, please see
26   * <http://www.apache.org/>.
27   *
28   */
29  
30  package org.apache.commons.httpclient;
31  
32  import java.io.ByteArrayOutputStream;
33  import java.io.IOException;
34  import java.io.InputStream;
35  
36  import org.apache.commons.httpclient.util.EncodingUtil;
37  import org.apache.commons.httpclient.util.ExceptionUtil;
38  import org.apache.commons.logging.Log;
39  import org.apache.commons.logging.LogFactory;
40  
41  
42  /***
43   * <p>Transparently coalesces chunks of a HTTP stream that uses
44   * Transfer-Encoding chunked.</p>
45   *
46   * <p>Note that this class NEVER closes the underlying stream, even when close
47   * gets called.  Instead, it will read until the "end" of its chunking on close,
48   * which allows for the seamless invocation of subsequent HTTP 1.1 calls, while
49   * not requiring the client to remember to read the entire contents of the
50   * response.</p>
51   *
52   * @author Ortwin Glueck
53   * @author Sean C. Sullivan
54   * @author Martin Elwin
55   * @author Eric Johnson
56   * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
57   * @author Michael Becke
58   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
59   *
60   * @since 2.0
61   *
62   */
63  public class ChunkedInputStream extends InputStream {
64      /*** The inputstream that we're wrapping */
65      private InputStream in;
66  
67      /*** The chunk size */
68      private int chunkSize;
69  
70      /*** The current position within the current chunk */
71      private int pos;
72  
73      /*** True if we'are at the beginning of stream */
74      private boolean bof = true;
75  
76      /*** True if we've reached the end of stream */
77      private boolean eof = false;
78  
79      /*** True if this stream is closed */
80      private boolean closed = false;
81  
82      /*** The method that this stream came from */
83      private HttpMethod method = null;
84  
85      /*** Log object for this class. */
86      private static final Log LOG = LogFactory.getLog(ChunkedInputStream.class);
87  
88      /***
89       * ChunkedInputStream constructor that associates the chunked input stream with a 
90       * {@link HttpMethod HTTP method}. Usually it should be the same {@link HttpMethod 
91       * HTTP method} the chunked input stream originates from. If chunked input stream 
92       * contains any footers (trailing headers), they will be added to the associated 
93       * {@link HttpMethod HTTP method}.
94       *
95       * @param in the raw input stream
96       * @param method the HTTP method to associate this input stream with. Can be <tt>null</tt>.  
97       *
98       * @throws IOException If an IO error occurs
99       */
100     public ChunkedInputStream(
101         final InputStream in, final HttpMethod method) throws IOException {
102             
103     	if (in == null) {
104     		throw new IllegalArgumentException("InputStream parameter may not be null");
105     	}
106         this.in = in;
107         this.method = method;
108         this.pos = 0;
109     }
110 
111     /***
112      * ChunkedInputStream constructor
113      *
114      * @param in the raw input stream
115      *
116      * @throws IOException If an IO error occurs
117      */
118     public ChunkedInputStream(final InputStream in) throws IOException {
119     	this(in, null);
120     }
121     
122     /***
123      * <p> Returns all the data in a chunked stream in coalesced form. A chunk
124      * is followed by a CRLF. The method returns -1 as soon as a chunksize of 0
125      * is detected.</p>
126      * 
127      * <p> Trailer headers are read automcatically at the end of the stream and
128      * can be obtained with the getResponseFooters() method.</p>
129      *
130      * @return -1 of the end of the stream has been reached or the next data
131      * byte
132      * @throws IOException If an IO problem occurs
133      * 
134      * @see HttpMethod#getResponseFooters()
135      */
136     public int read() throws IOException {
137 
138         if (closed) {
139             throw new IOException("Attempted read from closed stream.");
140         }
141         if (eof) {
142             return -1;
143         } 
144         if (pos >= chunkSize) {
145             nextChunk();
146             if (eof) { 
147                 return -1;
148             }
149         }
150         pos++;
151         return in.read();
152     }
153 
154     /***
155      * Read some bytes from the stream.
156      * @param b The byte array that will hold the contents from the stream.
157      * @param off The offset into the byte array at which bytes will start to be
158      * placed.
159      * @param len the maximum number of bytes that can be returned.
160      * @return The number of bytes returned or -1 if the end of stream has been
161      * reached.
162      * @see java.io.InputStream#read(byte[], int, int)
163      * @throws IOException if an IO problem occurs.
164      */
165     public int read (byte[] b, int off, int len) throws IOException {
166 
167         if (closed) {
168             throw new IOException("Attempted read from closed stream.");
169         }
170 
171         if (eof) { 
172             return -1;
173         }
174         if (pos >= chunkSize) {
175             nextChunk();
176             if (eof) { 
177                 return -1;
178             }
179         }
180         len = Math.min(len, chunkSize - pos);
181         int count = in.read(b, off, len);
182         pos += count;
183         return count;
184     }
185 
186     /***
187      * Read some bytes from the stream.
188      * @param b The byte array that will hold the contents from the stream.
189      * @return The number of bytes returned or -1 if the end of stream has been
190      * reached.
191      * @see java.io.InputStream#read(byte[])
192      * @throws IOException if an IO problem occurs.
193      */
194     public int read (byte[] b) throws IOException {
195         return read(b, 0, b.length);
196     }
197 
198     /***
199      * Read the CRLF terminator.
200      * @throws IOException If an IO error occurs.
201      */
202     private void readCRLF() throws IOException {
203         int cr = in.read();
204         int lf = in.read();
205         if ((cr != '\r') || (lf != '\n')) { 
206             throw new IOException(
207                 "CRLF expected at end of chunk: " + cr + "/" + lf);
208         }
209     }
210 
211 
212     /***
213      * Read the next chunk.
214      * @throws IOException If an IO error occurs.
215      */
216     private void nextChunk() throws IOException {
217         if (!bof) {
218             readCRLF();
219         }
220         chunkSize = getChunkSizeFromInputStream(in);
221         bof = false;
222         pos = 0;
223         if (chunkSize == 0) {
224             eof = true;
225             parseTrailerHeaders();
226         }
227     }
228 
229     /***
230      * Expects the stream to start with a chunksize in hex with optional
231      * comments after a semicolon. The line must end with a CRLF: "a3; some
232      * comment\r\n" Positions the stream at the start of the next line.
233      *
234      * @param in The new input stream.
235      * @param required <tt>true<tt/> if a valid chunk must be present,
236      *                 <tt>false<tt/> otherwise.
237      * 
238      * @return the chunk size as integer
239      * 
240      * @throws IOException when the chunk size could not be parsed
241      */
242     private static int getChunkSizeFromInputStream(final InputStream in) 
243       throws IOException {
244             
245         ByteArrayOutputStream baos = new ByteArrayOutputStream();
246         // States: 0=normal, 1=\r was scanned, 2=inside quoted string, -1=end
247         int state = 0; 
248         while (state != -1) {
249         int b = in.read();
250             if (b == -1) { 
251                 throw new IOException("chunked stream ended unexpectedly");
252             }
253             switch (state) {
254                 case 0: 
255                     switch (b) {
256                         case '\r':
257                             state = 1;
258                             break;
259                         case '\"':
260                             state = 2;
261                             /* fall through */
262                         default:
263                             baos.write(b);
264                     }
265                     break;
266 
267                 case 1:
268                     if (b == '\n') {
269                         state = -1;
270                     } else {
271                         // this was not CRLF
272                         throw new IOException("Protocol violation: Unexpected"
273                             + " single newline character in chunk size");
274                     }
275                     break;
276 
277                 case 2:
278                     switch (b) {
279                         case '//':
280                             b = in.read();
281                             baos.write(b);
282                             break;
283                         case '\"':
284                             state = 0;
285                             /* fall through */
286                         default:
287                             baos.write(b);
288                     }
289                     break;
290                 default: throw new RuntimeException("assertion failed");
291             }
292         }
293 
294         //parse data
295         String dataString = EncodingUtil.getAsciiString(baos.toByteArray());
296         int separator = dataString.indexOf(';');
297         dataString = (separator > 0)
298             ? dataString.substring(0, separator).trim()
299             : dataString.trim();
300 
301         int result;
302         try {
303             result = Integer.parseInt(dataString.trim(), 16);
304         } catch (NumberFormatException e) {
305             throw new IOException ("Bad chunk size: " + dataString);
306         }
307         return result;
308     }
309 
310     /***
311      * Reads and stores the Trailer headers.
312      * @throws IOException If an IO problem occurs
313      */
314     private void parseTrailerHeaders() throws IOException {
315         Header[] footers = null;
316         try {
317             String charset = "US-ASCII";
318             if (this.method != null) {
319                 charset = this.method.getParams().getHttpElementCharset();
320             }
321             footers = HttpParser.parseHeaders(in, charset);
322         } catch(HttpException e) {
323             LOG.error("Error parsing trailer headers", e);
324             IOException ioe = new IOException(e.getMessage());
325             ExceptionUtil.initCause(ioe, e); 
326             throw ioe;
327         }
328         if (this.method != null) {
329             for (int i = 0; i < footers.length; i++) {
330                 this.method.addResponseFooter(footers[i]);
331             }
332         }
333     }
334 
335     /***
336      * Upon close, this reads the remainder of the chunked message,
337      * leaving the underlying socket at a position to start reading the
338      * next response without scanning.
339      * @throws IOException If an IO problem occurs.
340      */
341     public void close() throws IOException {
342         if (!closed) {
343             try {
344                 if (!eof) {
345                     exhaustInputStream(this);
346                 }
347             } finally {
348                 eof = true;
349                 closed = true;
350             }
351         }
352     }
353 
354     /***
355      * Exhaust an input stream, reading until EOF has been encountered.
356      *
357      * <p>Note that this function is intended as a non-public utility.
358      * This is a little weird, but it seemed silly to make a utility
359      * class for this one function, so instead it is just static and
360      * shared that way.</p>
361      *
362      * @param inStream The {@link InputStream} to exhaust.
363      * @throws IOException If an IO problem occurs
364      */
365     static void exhaustInputStream(InputStream inStream) throws IOException {
366         // read and discard the remainder of the message
367         byte buffer[] = new byte[1024];
368         while (inStream.read(buffer) >= 0) {
369             ;
370         }
371     }
372 }