/* ======================================================================
The Bodington System Software License, Version 1.0
  
Copyright (c) 2001 The University of Leeds.  All rights reserved.
  
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1.  Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2.  Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3.  The end-user documentation included with the redistribution, if any,
must include the following acknowledgement:  "This product includes
software developed by the University of Leeds
(http://www.bodington.org/)."  Alternately, this acknowledgement may
appear in the software itself, if and wherever such third-party
acknowledgements normally appear.

4.  The names "Bodington", "Nathan Bodington", "Bodington System",
"Bodington Open Source Project", and "The University of Leeds" must not be
used to endorse or promote products derived from this software without
prior written permission. For written permission, please contact
d.gardner@leeds.ac.uk.

5.  The name "Bodington" may not appear in the name of products derived
from this software without prior written permission of the University of
Leeds.

THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  TITLE,  THE IMPLIED WARRANTIES 
OF QUALITY  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO 
EVENT SHALL THE UNIVERSITY OF LEEDS OR ITS CONTRIBUTORS BE LIABLE FOR 
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY OF SUCH DAMAGE.
=========================================================

This software was originally created by the University of Leeds and may contain voluntary 
contributions from others.  For more information on the Bodington Open Source Project, please 
see http://bodington.org/

====================================================================== */

package org.bodington.xml;

import java.util.logging.*;



import java.sql.*;
import java.io.*;

import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;

import org.xml.sax.*;
import org.xml.sax.helpers.*;
import org.w3c.dom.*;

import org.apache.crimson.tree.*;

import javax.xml.parsers.*;  
import java.util.Stack;
import java.util.Vector;
import java.util.Hashtable;

public class XMLRepository extends DefaultHandler
	{
	public final int XML_OBJECT_DEPOSITING		= 1;
	public final int XML_OBJECT_DEPOSITED		= 2;
	public final int XML_OBJECT_UPDATING		= 3;
	public final int XML_OBJECT_DELETED			= 4;
	
	public String o_table, e_table, a_table, c_table, t_table, w_table;


	private Connection con;
	private XMLReader xml_reader;

	// these statements are recreated every time an object is deposited
	PreparedStatement insert_xml_object, update_xml_object, insert_xml_element, 
	            insert_xml_attribute, insert_xml_cdata, update_xml_element,
	            find_xml_token, insert_xml_token, insert_xml_word;
	            
	private int max_xml_object_id, max_xml_element_id, max_xml_attribute_id, 
	            max_xml_cdata_id, max_xml_token_id, max_xml_word_id;

	private Stack stack;
	private Locator loc;
	private int visitation;
	
	private boolean in_text;
	private StringBuffer cdata_buffer;

	private Hashtable word_cache;
	private Hashtable reverse_word_cache;

	private boolean use_character_stream;
	private String db_character_encoding;

	private File tempdir=null;
	
	public XMLRepository( String driver_class_name, String o_table, String e_table, String a_table, String c_table )
		throws SAXException, ParserConfigurationException, TransformerConfigurationException
		{
		this.o_table = o_table;
		this.e_table = e_table;
		this.a_table = a_table;
		this.c_table = c_table;
		this.t_table = "xml_tokens";
		this.w_table = "xml_words";
		
		use_character_stream = true;
		db_character_encoding = "UTF-16LE";
		
		if ( driver_class_name == null )
			xml_reader = XMLReaderFactory.createXMLReader();
		else
			xml_reader = XMLReaderFactory.createXMLReader( driver_class_name );
		
		xml_reader.setContentHandler( this );
		xml_reader.setFeature( "http://xml.org/sax/features/validation", false );
		loc=null;
		}

	public void setTempDirectory( File base )
	    throws IOException
	    {
	    if ( base!=null )
		{
		if ( !base.exists() )
		    throw new IOException( "Temporary file directory doesn't exist." );
		if ( !base.isDirectory() )
		    throw new IOException( "Temporary file directory isn't a directory." );
		}
	    tempdir = base;
	    }
		
	void initWordCache( Connection con )
		throws SQLException
		{
		if ( word_cache!=null )
			return;
			
		word_cache = new Hashtable();
		reverse_word_cache = new Hashtable();
		
		Statement st = con.createStatement();
		ResultSet results = st.executeQuery( "SELECT xml_token_id, token FROM " + t_table );
		
		int id;
		Integer nid;
		String t;
		Logger.getLogger( "org.bodington" ).fine( "Loading cache of search tokens." );
		while ( results.next() )
			{
			id = results.getInt( 1 );
			t = results.getString( 2 );
			nid = new Integer( id );
			word_cache.put( nid, t );
			reverse_word_cache.put( t, nid );
			}
		results.close();
		st.close();
		Logger.getLogger( "org.bodington" ).fine( "Loaded cache of search tokens. There are " + word_cache.size() + " entries." );
		}

	public Integer getTokenId( String token )
		{
		if ( reverse_word_cache == null )
			throw new IllegalArgumentException( "The cache of search tokens isn't initialised." );
			
		return (Integer)reverse_word_cache.get( token );
		}

	public void setObjectTable( String table )
		{
		o_table = table;
		}
	
	public void setElementTable( String table )
		{
		e_table = table;
		}
	
	public void setAttributeTable( String table )
		{
		a_table = table;
		}
	
	public void setCharacterTable( String table )
		{
		c_table = table;
		}
	
	
	public void useCharacterStream( boolean b )
		{
		use_character_stream = b;
		}
		
	public void setDBCharacterEncoding( String s )
		{
		db_character_encoding = s;
		}

	
	public XMLQuery getQueryInstance()
		{
		return new XMLQuery( this );
		}
	
	private synchronized void newObjectId( String path, String file, int reference, String title )
		throws SQLException, IOException
		{
		Statement st = con.createStatement();

		ResultSet results = st.executeQuery( "SELECT max(xml_object_id) FROM " + o_table );
		results.next();
		max_xml_object_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_object_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_element_id) FROM " + e_table );
		results.next();
		max_xml_element_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_element_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_attribute_id) FROM " + a_table );
		results.next();
		max_xml_attribute_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_attribute_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_cdata_id) FROM " + c_table );
		results.next();
		max_xml_cdata_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_cdata_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_token_id) FROM " + t_table );
		results.next();
		max_xml_token_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_token_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_word_id) FROM " + w_table );
		results.next();
		max_xml_word_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_word_id =0;
		results.close();
		
		insert_xml_object = con.prepareStatement( "INSERT INTO " + o_table + " (xml_object_id,state,path,file_name,reference,title) VALUES (?, ?, ?, ?, ?, ?)" );
		update_xml_object = con.prepareStatement( "UPDATE " + o_table + " SET state = ? WHERE xml_object_id = ?" );
		insert_xml_element = con.prepareStatement( "INSERT INTO " + e_table + " (xml_element_id,xml_object_id,left_index,right_index,element_name,xml_parent_id) VALUES (?, ?, ?, ?, ?, ?)" );
		insert_xml_attribute = con.prepareStatement( "INSERT INTO " + a_table + " (xml_attribute_id,xml_element_id,name,value) VALUES (?, ?, ?, ?)" );
		insert_xml_cdata = con.prepareStatement( "INSERT INTO " + c_table + " (xml_cdata_id,xml_element_id,cdata) VALUES (?, ?, ?)" );
		update_xml_element = con.prepareStatement( "UPDATE " + e_table + " SET right_index = ? WHERE xml_element_id = ?" );
		find_xml_token = con.prepareStatement( "SELECT xml_token_id, token FROM " + t_table + " WHERE token = ?" );
		insert_xml_token = con.prepareStatement( "INSERT INTO " + t_table + " (xml_token_id,token) VALUES (?, ?)" );
		insert_xml_word = con.prepareStatement( "INSERT INTO " + w_table + " (xml_word_id,xml_cdata_id,xml_token_id,xml_element_id) VALUES (?, ?, ?, ?)" );

		
		
		insert_xml_object.clearParameters();
		insert_xml_object.setInt( 1, ++max_xml_object_id );
		insert_xml_object.setInt( 2, XML_OBJECT_DEPOSITING );
		insert_xml_object.setString( 3, path );
		insert_xml_object.setString( 4, file );
		insert_xml_object.setInt( 5, reference );
		insert_xml_object.setString( 6, title );
		insert_xml_object.executeUpdate();
		insert_xml_object.clearParameters();
		}

	private synchronized void cleanUpStatements()
		throws SQLException
		{
		insert_xml_object.close();
		update_xml_object.close();
		insert_xml_element.close();
	   insert_xml_attribute.close();
	   insert_xml_cdata.close();
	   update_xml_element.close();
	   find_xml_token.close();
	   insert_xml_token.close();
	   insert_xml_word.close();
		}

		
	public synchronized int depositXMLObject(  Connection con, File xmlfile, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		this.con = con;

		this.initWordCache( con );

		newObjectId( xmlfile.getPath(), xmlfile.getName(), reference, title );
		
		//PushbackReader reader;
		//int c;
		
		/*
		reader  = new PushbackReader( new InputStreamReader( new FileInputStream( xmlfile ), "UTF-8" ), 8 );
		while ( (c=reader.read()) >= 0 )
			Logger.getLogger( "org.bodington" ).warning( "/" + Integer.toHexString( c ) );
		reader.close();
		*/
		
		//reader  = new PushbackReader( new InputStreamReader( new FileInputStream( xmlfile ), "UTF-8" ), 8 );
		//c=reader.read();
		// just skip over a unicode no operation character, otherwise unread the character
		//if ( c!=0xfeff )
		//	reader.unread( c );
			
		//InputSource source = new InputSource( reader );
		
		InputSource source = new InputSource( new FileInputStream( xmlfile ) );
		xml_reader.parse( source );
		
		update_xml_object.clearParameters();
		update_xml_object.setInt( 1, XML_OBJECT_DEPOSITED );
		update_xml_object.setInt( 2, max_xml_object_id );
		update_xml_object.executeUpdate();
		update_xml_object.clearParameters();
		
		cleanUpStatements();
		
		return max_xml_object_id;
		}

	private void depositElement( Node node )
		throws SAXException
		{
		int i;
		switch ( node.getNodeType() )
			{
			case Node.ELEMENT_NODE:
				Element element = (Element)node;
				NamedNodeMap att_map = element.getAttributes();
				AttributesImpl att_list = new AttributesImpl();
				Attr att;
				NodeList list;
				
				for ( i =0; i< att_map.getLength(); i++ )
					{
					att = (Attr)att_map.item( i );
					att_list.addAttribute( null, null, att.getName(), "", att.getValue() );
					}
				startElement( null, null, element.getTagName(), att_list );
				
				list = element.getChildNodes();
				for ( i=0; list!=null && i<list.getLength(); i++ )
					depositElement( list.item( i ) );
				
				endElement( null, null, element.getTagName() );
				
				break;

			case Node.TEXT_NODE:
			case Node.CDATA_SECTION_NODE:
				org.w3c.dom.CharacterData cdata = (org.w3c.dom.CharacterData)node;
				String data = cdata.getData();
				if ( data!= null )
					{
					data = data.trim();
					if ( data.length() > 0 )
						{
						characters( data.toCharArray(), 0, data.length() );
						}
					}
				break;
			}
		}

	public synchronized int depositXMLObject(  Connection con, Document doc, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		this.con = con;
		
		this.initWordCache( con );
		
		newObjectId( "domsource", "dom", reference, title );
		
		startDocument();
		
		depositElement( doc.getDocumentElement() );
		
		endDocument();
		
		update_xml_object.clearParameters();
		update_xml_object.setInt( 1, XML_OBJECT_DEPOSITED );
		update_xml_object.setInt( 2, max_xml_object_id );
		update_xml_object.executeUpdate();
		update_xml_object.clearParameters();
		
		cleanUpStatements();

		return max_xml_object_id;
		}
		

/*
	public synchronized int depositXMLObject(  Connection con, Document doc, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		File xmlfile = File.createTempFile( "metadata", ".xml", tempdir );
		FileOutputStream out = new FileOutputStream( xmlfile );
		
		//tran.transform( new DOMSource( doc ), new StreamResult( xmlfile ) );
		XmlDocument xdoc = (XmlDocument)doc;
		xdoc.write( out );
		
		out.close();
		int id = depositXMLObject( con, xmlfile, reference, title );
		xmlfile.delete();
		return id;
		}
*/	
	public Document getXMLObject( Connection con, int id )
		throws SQLException, IOException, SAXException, SAXParseException, ParserConfigurationException
		{
		File xmlfile = File.createTempFile( "deposit", ".xml", tempdir );
		Logger.getLogger( "org.bodington" ).fine( "Temp XML File = " + xmlfile.getAbsoluteFile() );
		FileOutputStream out = new FileOutputStream( xmlfile );
		outputXMLObject( con, out, id );
		out.close();
		Logger.getLogger( "org.bodington" ).fine( "Closed " + xmlfile.getAbsoluteFile() );
		
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
   	DocumentBuilder builder = factory.newDocumentBuilder();
   	Document doc = (Document)builder.parse( xmlfile );
   	
   	xmlfile.delete();
   	
		return doc;
		}
		
	public void deleteXMLObject( Connection con, int id )
		throws SQLException
		{
		Statement st =con.createStatement();
		// mark object as deleted before anything else happens
		st.executeUpdate( "UPDATE " + o_table + " SET state = 4 WHERE xml_object_id = " + id );
		st.executeUpdate( "DELETE FROM " + w_table + 
								" WHERE EXISTS (SELECT * FROM " + c_table + 
								" WHERE " +
								w_table + ".xml_cdata_id = " + c_table + ".xml_cdata_id AND " +
								" EXISTS (SELECT * FROM " + e_table + 
								" WHERE " + 
								e_table + ".xml_element_id = " +
								c_table + ".xml_element_id AND " +
								e_table + ".xml_object_id = " + id + ") )" );
		st.executeUpdate( "DELETE FROM " + c_table + 
								" WHERE EXISTS (SELECT * FROM " + e_table + 
								" WHERE " + 
								e_table + ".xml_element_id = " +
								c_table + ".xml_element_id AND " +
								e_table + ".xml_object_id = " + id + ")" );
								
		st.executeUpdate( "DELETE FROM " + a_table + 
								" WHERE EXISTS (SELECT * FROM " + e_table + 
								" WHERE " +
								e_table + ".xml_element_id = " +
								a_table + ".xml_element_id AND " +
								e_table + ".xml_object_id = " + id + ")" );
		st.executeUpdate( "DELETE FROM " + e_table + " WHERE xml_object_id = " + id );
		st.executeUpdate( "DELETE FROM " + o_table + " WHERE xml_object_id = " + id );
		st.close();
		}
		
	public void outputXMLObject( Connection con, OutputStream output, int id )
		throws SQLException, IOException
		{
		outputXMLObject( con, output, id, null );
		}
		
	public void outputXMLObject( Connection con, OutputStream output, int id, int eid )
		throws SQLException, IOException
		{
		outputXMLObject( con, output, id, new Integer( eid ) );
		}
		
	private void outputXMLObject( Connection con, OutputStream output, int id, Integer eid )
		throws SQLException, IOException
		{
		int b, i, left, right, element_id;
		String name;

		boolean switched_on= (eid == null);
		int selected_right=0;
		
		ResultSet results;
		Reader text;
		PrintWriter out = new PrintWriter( new OutputStreamWriter( output, "utf-8" ) );
		out.println( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" );
		
		PreparedStatement e_st = con.prepareStatement( "SELECT * FROM " + e_table + " WHERE xml_object_id = ? AND (left_index = ? OR right_index = ?)" );
		PreparedStatement a_st = con.prepareStatement( "SELECT * FROM " + a_table + " WHERE xml_element_id = ? ORDER BY xml_attribute_id" );
		PreparedStatement c_st = con.prepareStatement( "SELECT * FROM " + c_table + " WHERE xml_element_id = ? ORDER BY xml_cdata_id" );
		
			
		for ( i=0; true; i++ )
			{
			e_st.clearParameters();
			e_st.setInt( 1, id );
			e_st.setInt( 2, i );
			e_st.setInt( 3, i );
			results = e_st.executeQuery();
			if ( !results.next() )
				{
				results.close();
				break;
				}
				
			element_id = results.getInt( 1 );
			left = results.getInt( 3 );
			right = results.getInt( 4 );
			name = results.getString( 5 );
			results.close();
			
			
			if ( left == i )
				{
				if ( eid != null && element_id == eid.intValue() )
					{
					switched_on = true;
					selected_right = right;
					}
					
				if ( switched_on )
					{
					if ( name.equals( "xmlrepository:pcdata" ) )
						{
						c_st.clearParameters();
						c_st.setInt( 1, element_id );
						results = c_st.executeQuery();
						if ( results.next() )
							{
							// intended to trap use of older JDBC drivers that lack
							// the get CharacterStream() method.  If calling the method
							// causes a linkage error then the repository will fall
							// back to opening a binary stream and will make assumptions
							// about the character encoding.
							text=null;
							if ( use_character_stream )
								{
								try
									{
									text = results.getCharacterStream( 3 );
									}
								catch ( LinkageError lerr )
									{
									// getCharacterStream won't be called again.
									use_character_stream = false;
									}
								}

							if ( !use_character_stream )
								text = new InputStreamReader( results.getBinaryStream( 3 ), db_character_encoding );
							
							while ( (b = text.read()) >= 0 )
								{
								//out.print( (char)b );
								
								switch ( b )
									{
									case '<':
										out.print( "&lt;" );
										break;
									case '>':
										out.print( "&gt;" );
										break;
									case '&':
										out.print( "&amp;" );
										break;
									default:
										if ( b>127 )
											out.print( "&#" + b + ";" );
										else
											out.print( (char)b );
									}
								
								}
							}
						results.close();
						}
					else
						{
						out.print( "<" );
						out.print( name );
						
						a_st.clearParameters();
						a_st.setInt( 1, element_id );
						results = a_st.executeQuery();
						while ( results.next() )
							{
							out.print( " " );
							out.print( results.getString( 3 ) );
							out.print( "=\"" );
							out.print( results.getString( 4 ) );
							out.print( "\"" );
							}
						out.print( ">" );
						results.close();
						}
					}
				}
			else
				{
				if ( switched_on && !name.equals( "xmlrepository:pcdata" ) )
					{
					out.print( "</" );
					out.print( name );
					out.println( ">" );
					}
				if ( eid!=null && right == selected_right )
					switched_on = false;
				}
			}

		out.flush();
		
		e_st.close();
		a_st.close();
		c_st.close();
		}
		
	public void setDocumentLocator( Locator l )
		{
		loc = l;
		}
		
	public void startDocument ()
		throws SAXException
		{
		stack = new Stack();
		visitation=0;
		in_text=false;
		cdata_buffer=new StringBuffer();
		//if ( loc==null )
		//	throw new SAXException( "SAX parser doesn't report line, column numbers." );
		}

	public void endDocument ()
		throws SAXException
		{
		if ( !stack.isEmpty() )
			throw new SAXException( "Unmatched start/end element tags." );
		}

	public void startElement( String uri, String localName, String qName, Attributes attributes ) 
      throws SAXException
		{
		int i;
		
		try
			{
			if ( in_text )
				exitText();

			Integer parent_id = null;
			if ( !stack.empty() )
				parent_id = (Integer)stack.peek();

			insert_xml_element.clearParameters();
			insert_xml_element.setInt( 1, ++max_xml_element_id );
			insert_xml_element.setInt( 2, max_xml_object_id );
			insert_xml_element.setInt( 3, visitation++ );
			insert_xml_element.setInt( 4, -1 );
			insert_xml_element.setString( 5, qName );
			
			if ( parent_id == null )
				insert_xml_element.setNull( 6, Types.INTEGER );
			else
				insert_xml_element.setInt( 6, parent_id.intValue() );
				
			insert_xml_element.executeUpdate();
			insert_xml_element.clearParameters();
			
			stack.push( new Integer( max_xml_element_id ) );
			
			for ( i=0; i<attributes.getLength(); i++ )
				{
				insert_xml_attribute.clearParameters();
				insert_xml_attribute.setInt( 1, ++max_xml_attribute_id );
				insert_xml_attribute.setInt( 2, max_xml_element_id );
				insert_xml_attribute.setString( 3, attributes.getQName( i ) );
				insert_xml_attribute.setString( 4, attributes.getValue( i ) );
				insert_xml_attribute.executeUpdate();
				}
			insert_xml_attribute.clearParameters();
			}
		catch ( Exception ex )
			{
			Logger.getLogger( "org.bodington" ).logp( 
			    Level.SEVERE, 
			    "XMLRepository", 
			    "startElement", 
			    ex.getMessage(), 
			    ex );
			throw new SAXException( ex );
			}
		
		}

	public void endElement( String uri, String localName, String qName ) 
		throws SAXException
		{

		try
			{
			if ( in_text )
				exitText();

			Integer element_id = (Integer)stack.pop();
	        
			update_xml_element.clearParameters();
			update_xml_element.setInt( 1, visitation++ );
			update_xml_element.setInt( 2, element_id.intValue() );
			update_xml_element.executeUpdate();
			update_xml_element.clearParameters();
			}
		catch ( Exception ex )
			{
			throw new SAXException( ex );
			}
		}

	// put pretend start tag in database
	public void enterText()
		throws SQLException, IOException
		{
		Integer parent_id = null;
		if ( !stack.empty() )
			parent_id = (Integer)stack.peek();

		insert_xml_element.clearParameters();
		insert_xml_element.setInt( 1, ++max_xml_element_id );
		insert_xml_element.setInt( 2, max_xml_object_id );
		insert_xml_element.setInt( 3, visitation++ );
		insert_xml_element.setInt( 4, visitation++ );
		insert_xml_element.setString( 5, "xmlrepository:pcdata" );
		if ( parent_id == null )
			insert_xml_element.setNull( 6, Types.INTEGER );
		else
			insert_xml_element.setInt( 6, parent_id.intValue() );
		insert_xml_element.executeUpdate();
		insert_xml_element.clearParameters();
		
		in_text=true;
		}
		
	public void exitText()
		throws SQLException, IOException
		{
		insert_xml_cdata.clearParameters();
		insert_xml_cdata.setInt(		1, ++max_xml_cdata_id );
		insert_xml_cdata.setInt(		2, max_xml_element_id );
		insert_xml_cdata.setString(	3, cdata_buffer.toString() );
		insert_xml_cdata.executeUpdate();
		
		saveTokens( max_xml_element_id, max_xml_cdata_id, cdata_buffer );
		
		cdata_buffer.setLength( 0 );
		in_text=false;
		}

    private void saveToken( int xml_element_id, int xml_cdata_id, StringBuffer cdata, int start, int end, boolean continuation )
		throws SQLException
        {
        Integer id;
        int n_id=-1;
        int type, xml_word_id;
        String word = cdata.substring( start, end );
        String db_word;
        ResultSet results;
        
        Logger.getLogger("org.bodington").fine( "Storing word [" + word + "]" );
        
        if ( !Character.isLetter( word.charAt( 0 ) ) )
            type = 3;
        else
            if ( continuation )
                type = 1;
            else
                type = 0;
                
        id = (Integer)word_cache.get( word );
        if ( id != null )
            n_id = id.intValue();
        else
            {
            find_xml_token.setString( 1, word );
            results = find_xml_token.executeQuery();
            while ( results.next() )
                {
                db_word = results.getString( 2 );
                if ( db_word.equals( word ) )
                    {
                    n_id = results.getInt( 1 );
                    break;
                    }
                }
            results.close();
            }
        
        // this word isn't in the tokens table
        // so it needs to be put there
        if ( n_id == -1 )
            {
            Logger.getLogger( "org.bodington" ).fine( "Storing new token [" + word + "]" );
            n_id = ++max_xml_token_id;
            insert_xml_token.setInt( 1, n_id );
            insert_xml_token.setString( 2, word );
            insert_xml_token.executeUpdate();
            insert_xml_token.clearParameters();
            }

        id = new Integer( n_id );
        word_cache.put( id, word );
        reverse_word_cache.put( word, id );
        
        // finally the word can be stored
        xml_word_id = ++max_xml_word_id;
        
        // not sure if we really need to record
        // continuation as a type.
        //while ( ((xml_word_id) & 3) != type )
        //    xml_word_id++;
        insert_xml_word.setInt( 1, xml_word_id );
        insert_xml_word.setInt( 2, xml_cdata_id );
        insert_xml_word.setInt( 3, n_id );
        insert_xml_word.setInt( 4, xml_element_id );
        insert_xml_word.executeUpdate();
        insert_xml_word.clearParameters();
        Logger.getLogger( "org.bodington" ).fine( "Word stored [" + word + "]" );
        }
        

    private void saveTokens( int element_id, int  cdata_id, StringBuffer cdata )
		throws SQLException
        {
        int start, i;
        char c, next;
        boolean in_word, in_continuation, in_nonsense;
        
        if ( cdata.length() == 0 )
            return;
            
        c = cdata.charAt( 0 );
        in_word =  Character.isLetter( c );
        in_continuation = false;
        in_nonsense = !in_word;
        
        start=0;
        i=0;
        
        do
            {
            if ( (i+1) < cdata.length() )
                next = cdata.charAt( i+1 );
            else
                next = 0;
                
            if ( in_word || in_continuation )
                {
                // is this the last char in the word?
                if ( next==0 || !Character.isLetter( next ) )
                    {
                    in_word = false;
                    in_continuation = false;
                    in_nonsense = true;
                    saveToken( element_id, cdata_id, cdata, start, i+1, in_continuation );
                    start = i+1;
                    }
                else if ( (i-start) == 32 )
                    {
                    in_word = false;
                    in_continuation = true;
                    in_nonsense = false;
                    saveToken( element_id, cdata_id, cdata, start, i+1, in_continuation );
                    start = i+1;
                    }
                }
            else if ( in_nonsense )
                {
                // is this the last nonsense char?
                if ( (i+1)==cdata.length() || Character.isLetter( next ) )
                    {
                    in_word = true;
                    in_continuation = false;
                    in_nonsense = false;
                    saveToken( element_id, cdata_id, cdata, start, i+1, false );
                    start = i+1;
                    }
                else if ( (i-start) == 32 )
                    {
                    in_word = false;
                    in_continuation = false;
                    in_nonsense = true;
                    saveToken( element_id, cdata_id, cdata, start, i+1, false );
                    start = i+1;
                    }
                }
            c=next;
            i++;
            }
        while ( i < cdata.length() );

        }
		
	public void characters(char[] ch, int start, int length )
   	throws SAXException
   	{
   	int i=start;
   	
   	//for debugging
   	Logger.getLogger( "org.bodington" ).fine( "XMLRepository.characters(char[] ch, int start, int length )" );
   	
   	try
   		{
			if ( !in_text )
   			for ( i=start; i<(start+length); i++ )
   				{
   				if ( !Character.isWhitespace( ch[i] ) )
  						{
  						enterText();
   					break;
   					}
   				}
   		
   		if ( in_text )
   			{
   			if ( (cdata_buffer.length() + length ) > (16*1024) )
   				throw new SAXException( "Unable to support more than 16k in CDATA." );
   			
   			cdata_buffer.append( ch, start, length );
   			}
   		
			}
		catch ( Exception ex )
			{
			throw new SAXException( ex );
			}
   	}
   	

	
	}
