package org.bodington.util;

import java.io.*;

public class CsvReader
	{
	BufferedReader reader;
	boolean start_of_line, end_of_line, end_of_file;
	
	public CsvReader( Reader reader )
		throws IOException
		{
		this.reader = new BufferedReader( reader );
		end_of_line = true;
		end_of_file = false;
		}
		
	public boolean nextLine()
		throws IOException
		{
		if ( !end_of_line )
			throw new IOException( "Can only call nextLine() at end of line." );
		end_of_line=false;
		start_of_line=true;
		return !end_of_file;
		}

	public String readField()
		throws IOException
		{
		if ( end_of_line )
			return null;
		if ( end_of_file )
			return null;
			
		
		StringBuffer field = new StringBuffer();
		boolean in_quote=false, is_quoted=false, in_whitespace=true;
		int c, last_c=-1;
		

		while ( (c=reader.read()) != -1 )
			{
			// just skip past control characters no matter where they are
			// placed
			if ( Character.isIdentifierIgnorable( (char)c ) )
				continue;
				
			if ( c=='\"' )
				{
				if ( !is_quoted )
					{
					// first quote found in the field
					
					// erase leading whitespace
					if ( in_whitespace )
						{
						in_whitespace=false;
						field.setLength( 0 );
						}
					is_quoted=true;
					in_quote=true;
					}
				else
					{
					// If this is an open quote immediately after a 
					// close quote then it is actually an escaped literal
					// quote character.
					if ( !in_quote && last_c == '\"' )
						field.append( "\"" );
					// either this is the first or second character in an escaped quote ""
					// or its the the end of the field
					// just change flag and deal with what comes next
					in_quote=!in_quote;
					}
				}
			else
				{
				// anything other than quote
				// action depends on whether we are in a quoted field
				
				if ( is_quoted && in_quote )
					{
					// anything appearing in the quotes is part of the
					// field including white space.
					// (end quote will complete it and end of file will
					// throw an exception.
					field.append( (char)c );
					}
				else
					{
					// at field delimiter?
					if ( c == ',' )
						{
						// discard comma and return field
						// even if there's nothing after the comma
						// it will count as a field so don't signal
						// end of line.
						return field.toString();
						}
						
					// at line delimiter?
					if ( c == '\n' )
						{
						end_of_line=true;
						// if the line had nothing but white space
						// (we are still at start) there is no field
						// to return.
						if ( start_of_line )
							return null;
							
						return field.toString();
						}
						
					// not a delimiter...
					
					// between or after quoted section?
					if ( is_quoted )
						{
						// non-white space is invalid
						if ( !Character.isWhitespace( (char)c ) )
							throw new IOException( "Invalid file format. Misplaced quotes." );
						//white space discarded
						}
					else
						{
						// any characters inside field are added on
						field.append( (char)c );
						}
					}
				}
			
			// whatever turns up that isn't white space - we aren't
			// at the start of the line anymore.
			if ( !Character.isWhitespace( (char)c ) )
				start_of_line=false;
				
			last_c = c;
			}
			
		
		// reached end of file
		end_of_file = true;
		end_of_line = true;
		
		// if in incomplete field throw exception
		if ( in_quote )
			throw new IOException( "Unexpected end of file." );
		
		if ( start_of_line )
			return null;
			
		return field.toString();
		}
	}
	
