/* ======================================================================
The Bodington System Software License, Version 1.0
  
Copyright (c) 2001 The University of Leeds.  All rights reserved.
  
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1.  Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2.  Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3.  The end-user documentation included with the redistribution, if any,
must include the following acknowledgement:  "This product includes
software developed by the University of Leeds
(http://www.bodington.org/)."  Alternately, this acknowledgement may
appear in the software itself, if and wherever such third-party
acknowledgements normally appear.

4.  The names "Bodington", "Nathan Bodington", "Bodington System",
"Bodington Open Source Project", and "The University of Leeds" must not be
used to endorse or promote products derived from this software without
prior written permission. For written permission, please contact
d.gardner@leeds.ac.uk.

5.  The name "Bodington" may not appear in the name of products derived
from this software without prior written permission of the University of
Leeds.

THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  TITLE,  THE IMPLIED WARRANTIES 
OF QUALITY  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO 
EVENT SHALL THE UNIVERSITY OF LEEDS OR ITS CONTRIBUTORS BE LIABLE FOR 
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY OF SUCH DAMAGE.
=========================================================

This software was originally created by the University of Leeds and may contain voluntary 
contributions from others.  For more information on the Bodington Open Source Project, please 
see http://bodington.org/

====================================================================== */
package org.bodington.util;

import java.io.*;

public class CsvReader
	{
	BufferedReader reader;
	boolean start_of_line, end_of_line, end_of_file;
	
	public CsvReader( Reader reader )
		throws IOException
		{
		this.reader = new BufferedReader( reader );
		end_of_line = true;
		end_of_file = false;
		}
		
	public boolean nextLine()
		throws IOException
		{
		if ( !end_of_line )
			throw new IOException( "Can only call nextLine() at end of line." );
		end_of_line=false;
		start_of_line=true;
		return !end_of_file;
		}

	public String readField()
		throws IOException
		{
		if ( end_of_line )
			return null;
		if ( end_of_file )
			return null;
			
		
		StringBuffer field = new StringBuffer();
		boolean in_quote=false, is_quoted=false, in_whitespace=true;
		int c, last_c=-1;
		

		while ( (c=reader.read()) != -1 )
			{
			// just skip past control characters no matter where they are
			// placed
			if ( Character.isIdentifierIgnorable( (char)c ) )
				continue;
				
			if ( c=='\"' )
				{
				if ( !is_quoted )
					{
					// first quote found in the field
					
					// erase leading whitespace
					if ( in_whitespace )
						{
						in_whitespace=false;
						field.setLength( 0 );
						}
					is_quoted=true;
					in_quote=true;
					}
				else
					{
					// If this is an open quote immediately after a 
					// close quote then it is actually an escaped literal
					// quote character.
					if ( !in_quote && last_c == '\"' )
						field.append( "\"" );
					// either this is the first or second character in an escaped quote ""
					// or its the the end of the field
					// just change flag and deal with what comes next
					in_quote=!in_quote;
					}
				}
			else
				{
				// anything other than quote
				// action depends on whether we are in a quoted field
				
				if ( is_quoted && in_quote )
					{
					// anything appearing in the quotes is part of the
					// field including white space.
					// (end quote will complete it and end of file will
					// throw an exception.
					field.append( (char)c );
					}
				else
					{
					// at field delimiter?
					if ( c == ',' )
						{
						// discard comma and return field
						// even if there's nothing after the comma
						// it will count as a field so don't signal
						// end of line.
						return field.toString();
						}
						
					// at line delimiter?
					if ( c == '\n' )
						{
						end_of_line=true;
						// if the line had nothing but white space
						// (we are still at start) there is no field
						// to return.
						if ( start_of_line )
							return null;
							
						return field.toString();
						}
						
					// not a delimiter...
					
					// between or after quoted section?
					if ( is_quoted )
						{
						// non-white space is invalid
						if ( !Character.isWhitespace( (char)c ) )
							throw new IOException( "Invalid file format. Misplaced quotes." );
						//white space discarded
						}
					else
						{
						// any characters inside field are added on
						field.append( (char)c );
						}
					}
				}
			
			// whatever turns up that isn't white space - we aren't
			// at the start of the line anymore.
			if ( !Character.isWhitespace( (char)c ) )
				start_of_line=false;
				
			last_c = c;
			}
			
		
		// reached end of file
		end_of_file = true;
		end_of_line = true;
		
		// if in incomplete field throw exception
		if ( in_quote )
			throw new IOException( "Unexpected end of file." );
		
		if ( start_of_line )
			return null;
			
		return field.toString();
		}
	}
	
