/* ======================================================================
The Bodington System Software License, Version 1.0
  
Copyright (c) 2001 The University of Leeds.  All rights reserved.
  
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1.  Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2.  Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3.  The end-user documentation included with the redistribution, if any,
must include the following acknowledgement:  "This product includes
software developed by the University of Leeds
(http://www.bodington.org/)."  Alternately, this acknowledgement may
appear in the software itself, if and wherever such third-party
acknowledgements normally appear.

4.  The names "Bodington", "Nathan Bodington", "Bodington System",
"Bodington Open Source Project", and "The University of Leeds" must not be
used to endorse or promote products derived from this software without
prior written permission. For written permission, please contact
d.gardner@leeds.ac.uk.

5.  The name "Bodington" may not appear in the name of products derived
from this software without prior written permission of the University of
Leeds.

THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  TITLE,  THE IMPLIED WARRANTIES 
OF QUALITY  AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO 
EVENT SHALL THE UNIVERSITY OF LEEDS OR ITS CONTRIBUTORS BE LIABLE FOR 
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
POSSIBILITY OF SUCH DAMAGE.
=========================================================

This software was originally created by the University of Leeds and may contain voluntary 
contributions from others.  For more information on the Bodington Open Source Project, please 
see http://bodington.org/

====================================================================== */

package org.bodington.xml;

import java.sql.*;
import java.io.*;
import java.text.*;

import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;

import org.apache.log4j.Logger;

import org.xml.sax.*;
import org.xml.sax.helpers.*;
import org.w3c.dom.*;

import javax.xml.parsers.*;  
import java.util.Stack;
import java.util.Vector;
import java.util.Hashtable;
import java.util.Locale;

public class XMLRepository extends DefaultHandler
	{
    
    private static Logger log = Logger.getLogger(XMLRepository.class);
    
	public final int XML_OBJECT_DEPOSITING		= 1;
	public final int XML_OBJECT_DEPOSITED		= 2;
	public final int XML_OBJECT_UPDATING		= 3;
	public final int XML_OBJECT_DELETED			= 4;
	
	public String o_table, e_table, a_table, c_table, t_table, w_table;


	private Connection con;
	private XMLReader xml_reader;
        
        private BreakIterator boundary;
        private WordRepository word_repository;

	// these statements are recreated every time an object is deposited
	PreparedStatement insert_xml_object, update_xml_object, insert_xml_element, 
	            insert_xml_attribute, insert_xml_cdata, update_xml_element,
	            insert_xml_token, insert_xml_word;
	            
	private int max_xml_object_id, max_xml_element_id, max_xml_attribute_id, 
	            max_xml_cdata_id, max_xml_token_id, max_xml_word_id;

	private Stack stack;
	private Locator loc;
	private int visitation;
	
	private boolean in_text;
	private StringBuffer cdata_buffer;

	//private Hashtable word_cache;
	//private Hashtable reverse_word_cache;

	private boolean use_character_stream;
	private String db_character_encoding;

	private File tempdir=null;
	
    /**
     * Create a new XMLRespository object.
     * @param driver_class_name
     * @param o_table Name of the XML Objects table.
     * @param e_table Name of the XML Entities table.
     * @param a_table Name of the XML Attributes table.
     * @param c_table Name of XML CDATA table.
     * @param t_table Name of XML Tokens table.
     * @param w_table Name of XML Words table.
     * @throws SAXException Thrown if we can't setup our parser.
     */
	public XMLRepository( String driver_class_name, 
                         String o_table, 
                         String e_table, 
                         String a_table, 
                         String c_table,
                         String t_table,
                         String w_table
                         )
		throws SAXException
		{
		this.o_table = o_table;
		this.e_table = e_table;
		this.a_table = a_table;
		this.c_table = c_table;
		this.t_table = t_table;
		this.w_table = w_table;
		
		use_character_stream = true;
		db_character_encoding = "UTF-16LE";
		
		if ( driver_class_name == null )
			xml_reader = XMLReaderFactory.createXMLReader();
		else
			xml_reader = XMLReaderFactory.createXMLReader( driver_class_name );
		
		xml_reader.setContentHandler( this );
		xml_reader.setFeature( "http://xml.org/sax/features/validation", false );
		loc=null;
                
                boundary = BreakIterator.getWordInstance( java.util.Locale.ENGLISH );
                word_repository = new WordRepository( Locale.ENGLISH );
		}

	public void setTempDirectory( File base )
	    throws IOException
	    {
	    if ( base!=null )
		{
		if ( !base.exists() )
		    throw new IOException( "Temporary file directory doesn't exist." );
		if ( !base.isDirectory() )
		    throw new IOException( "Temporary file directory isn't a directory." );
		}
	    tempdir = base;
	    }
		
	void initWordCache( Connection con )
		throws SQLException
		{
                ResultSet results;
                Statement st;
                ResultSetMetaData md;
                int idi, id1, id2, id3;
                String source;
                Word word;
                
		if ( word_repository.isLoaded() )
			return;

		st = con.createStatement();
                // silly query to get table fields
		results = st.executeQuery( "SELECT * FROM " + t_table + " WHERE xml_token_id<0" );
                md = results.getMetaData();
                if ( md.getColumnCount() != 5 )
                    throw new SQLException( "Expected 5 columns in table " + t_table + ". Consult Bodington documentation." );
		results = st.executeQuery( "SELECT * FROM " + w_table + " WHERE xml_word_id<0" );
                md = results.getMetaData();
                if ( md.getColumnCount() != 6 )
                    throw new SQLException( "Expected 6 columns in table " + w_table + ". Consult Bodington documentation." );
                
		results = st.executeQuery( "SELECT xml_token_id, tertiary_id, secondary_id, primary_id, token FROM " + t_table );
		

		log.debug( "Loading cache of search tokens." );
		while ( results.next() )
			{
			idi = results.getInt( 1 );
			id3 = results.getInt( 2 );
			id2 = results.getInt( 3 );
			id1 = results.getInt( 4 );
			source = results.getString( 5 );
                        
                        word = new Word( source, id1, id2, id3, idi, null, null, null );
                        word_repository.loadWord( word );
			}
		results.close();
		st.close();
                word_repository.completeLoading();
		log.debug( "Loaded cache of search tokens." );
		}

	
	public void useCharacterStream( boolean b )
		{
		use_character_stream = b;
		}
		
	public void setDBCharacterEncoding( String s )
		{
		db_character_encoding = s;
		}

	
	public XMLQuery getQueryInstance()
		{
		return new XMLQuery( this );
		}
	
	private synchronized void newObjectId( String path, String file, int reference, String title )
		throws SQLException, IOException
		{
		Statement st = con.createStatement();

		ResultSet results = st.executeQuery( "SELECT max(xml_object_id) FROM " + o_table );
		results.next();
		max_xml_object_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_object_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_element_id) FROM " + e_table );
		results.next();
		max_xml_element_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_element_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_attribute_id) FROM " + a_table );
		results.next();
		max_xml_attribute_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_attribute_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_cdata_id) FROM " + c_table );
		results.next();
		max_xml_cdata_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_cdata_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_token_id) FROM " + t_table );
		results.next();
		max_xml_token_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_token_id =0;
		results.close();
		
		results = st.executeQuery( "SELECT max(xml_word_id) FROM " + w_table );
		results.next();
		max_xml_word_id = results.getInt( 1 );
		if ( results.wasNull() ) max_xml_word_id =0;
		results.close();
		
		insert_xml_object = con.prepareStatement( "INSERT INTO " + o_table + " (xml_object_id,state,path,file_name,reference,title) VALUES (?, ?, ?, ?, ?, ?)" );
		update_xml_object = con.prepareStatement( "UPDATE " + o_table + " SET state = ? WHERE xml_object_id = ?" );
		insert_xml_element = con.prepareStatement( "INSERT INTO " + e_table + " (xml_element_id,xml_object_id,left_index,right_index,element_name,xml_parent_id) VALUES (?, ?, ?, ?, ?, ?)" );
		insert_xml_attribute = con.prepareStatement( "INSERT INTO " + a_table + " (xml_attribute_id,xml_element_id,name,value) VALUES (?, ?, ?, ?)" );
		insert_xml_cdata = con.prepareStatement( "INSERT INTO " + c_table + " (xml_cdata_id,xml_element_id,cdata) VALUES (?, ?, ?)" );
		update_xml_element = con.prepareStatement( "UPDATE " + e_table + " SET right_index = ? WHERE xml_element_id = ?" );
		insert_xml_token = con.prepareStatement( "INSERT INTO " + t_table + " (xml_token_id,tertiary_id,secondary_id,primary_id,token) VALUES (?, ?, ?, ?, ?)" );
		insert_xml_word = con.prepareStatement( "INSERT INTO " + w_table + " (xml_word_id,xml_cdata_id,xml_element_id,xml_token_id,pos,flags) VALUES (?, ?, ?, ?, ?, ?)" );

		
		
		insert_xml_object.clearParameters();
		insert_xml_object.setInt( 1, ++max_xml_object_id );
		insert_xml_object.setInt( 2, XML_OBJECT_DEPOSITING );
		insert_xml_object.setString( 3, path );
		insert_xml_object.setString( 4, file );
		insert_xml_object.setInt( 5, reference );
		insert_xml_object.setString( 6, title );
		insert_xml_object.executeUpdate();
		insert_xml_object.clearParameters();
		}

	private synchronized void cleanUpStatements()
		throws SQLException
		{
		insert_xml_object.close();
		update_xml_object.close();
		insert_xml_element.close();
	   insert_xml_attribute.close();
	   insert_xml_cdata.close();
	   update_xml_element.close();
	   insert_xml_token.close();
	   insert_xml_word.close();
		}

		
	public synchronized int depositXMLObject(  Connection con, File xmlfile, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		this.con = con;

		this.initWordCache( con );

		newObjectId( xmlfile.getPath(), xmlfile.getName(), reference, title );
		
		InputSource source = new InputSource( new FileInputStream( xmlfile ) );
		xml_reader.parse( source );
		
		update_xml_object.clearParameters();
		update_xml_object.setInt( 1, XML_OBJECT_DEPOSITED );
		update_xml_object.setInt( 2, max_xml_object_id );
		update_xml_object.executeUpdate();
		update_xml_object.clearParameters();
		
		cleanUpStatements();
		
		return max_xml_object_id;
		}

	private void depositElement( Node node )
		throws SAXException
		{
		int i;
		switch ( node.getNodeType() )
			{
			case Node.ELEMENT_NODE:
				Element element = (Element)node;
				NamedNodeMap att_map = element.getAttributes();
				AttributesImpl att_list = new AttributesImpl();
				Attr att;
				NodeList list;
				
				for ( i =0; i< att_map.getLength(); i++ )
					{
					att = (Attr)att_map.item( i );
					att_list.addAttribute( null, null, att.getName(), "", att.getValue() );
					}
				startElement( null, null, element.getTagName(), att_list );
				
				list = element.getChildNodes();
				for ( i=0; list!=null && i<list.getLength(); i++ )
					depositElement( list.item( i ) );
				
				endElement( null, null, element.getTagName() );
				
				break;

			case Node.TEXT_NODE:
			case Node.CDATA_SECTION_NODE:
				org.w3c.dom.CharacterData cdata = (org.w3c.dom.CharacterData)node;
				String data = cdata.getData();
				if ( data!= null )
					{
					data = data.trim();
					if ( data.length() > 0 )
						{
						characters( data.toCharArray(), 0, data.length() );
						}
					}
				break;
			}
		}

	public synchronized int depositXMLObject(  Connection con, Document doc, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		this.con = con;
		
		this.initWordCache( con );
		
		newObjectId( "domsource", "dom", reference, title );
		
		startDocument();
		
		depositElement( doc.getDocumentElement() );
		
		endDocument();
		
		update_xml_object.clearParameters();
		update_xml_object.setInt( 1, XML_OBJECT_DEPOSITED );
		update_xml_object.setInt( 2, max_xml_object_id );
		update_xml_object.executeUpdate();
		update_xml_object.clearParameters();
		
		cleanUpStatements();

		return max_xml_object_id;
		}
		

/*
	public synchronized int depositXMLObject(  Connection con, Document doc, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		File xmlfile = File.createTempFile( "metadata", ".xml", tempdir );
		FileOutputStream out = new FileOutputStream( xmlfile );
		
		//tran.transform( new DOMSource( doc ), new StreamResult( xmlfile ) );
		XmlDocument xdoc = (XmlDocument)doc;
		xdoc.write( out );
		
		out.close();
		int id = depositXMLObject( con, xmlfile, reference, title );
		xmlfile.delete();
		return id;
		}
*/	
	public Document getXMLObject( Connection con, int id )
		throws SQLException, IOException, SAXException, SAXParseException, ParserConfigurationException
		{
		File xmlfile = File.createTempFile( "deposit", ".xml", tempdir );
		log.debug( "Temp XML File = " + xmlfile.getAbsoluteFile() );
		FileOutputStream out = new FileOutputStream( xmlfile );
		outputXMLObject( con, out, id );
		out.close();
		log.debug( "Closed " + xmlfile.getAbsoluteFile() );
		
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
   	DocumentBuilder builder = factory.newDocumentBuilder();
   	Document doc = (Document)builder.parse( xmlfile );
   	
   	xmlfile.delete();
   	
		return doc;
		}
		
	public void deleteXMLObject( Connection con, int id )
		throws SQLException
		{
		Statement st =con.createStatement();
		// mark object as deleted before anything else happens
		st.executeUpdate( "UPDATE " + o_table + " SET state = 4 WHERE xml_object_id = " + id );
		st.executeUpdate( "DELETE FROM " + w_table + 
								" WHERE  xml_cdata_id in (SELECT xml_cdata_id FROM " + c_table +
								" WHERE " +
								" xml_element_id  IN (SELECT xml_element_id  FROM " + e_table + 
								" WHERE " + 
								"xml_object_id = " + id + ") )" );
		st.executeUpdate( "DELETE FROM " + c_table + 
								" WHERE xml_element_id IN (SELECT xml_element_id FROM " + e_table + 
								" WHERE xml_object_id = " + id + ")" );
		st.executeUpdate( "DELETE FROM " + a_table + 
								" WHERE xml_element_id IN (SELECT xml_element_id FROM " + e_table + 
								" WHERE xml_object_id = " + id + ")" );
		st.executeUpdate( "DELETE FROM " + e_table + " WHERE xml_object_id = " + id );
		st.executeUpdate( "DELETE FROM " + o_table + " WHERE xml_object_id = " + id );
		st.close();
		}
		
	public void outputXMLObject( Connection con, OutputStream output, int id )
		throws SQLException, IOException
		{
		outputXMLObject( con, output, id, null );
		}
		
	public void outputXMLObject( Connection con, OutputStream output, int id, int eid )
		throws SQLException, IOException
		{
		outputXMLObject( con, output, id, new Integer( eid ) );
		}
		
	private void outputXMLObject( Connection con, OutputStream output, int id, Integer eid )
		throws SQLException, IOException
		{
		int b, i, left, right, element_id;
		String name;

		boolean switched_on= (eid == null);
		int selected_right=0;
		
		ResultSet results;
		Reader text;
		PrintWriter out = new PrintWriter( new OutputStreamWriter( output, "utf-8" ) );
		out.println( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" );
		
		PreparedStatement e_st = con.prepareStatement( "SELECT * FROM " + e_table + " WHERE xml_object_id = ? AND (left_index = ? OR right_index = ?)" );
		PreparedStatement a_st = con.prepareStatement( "SELECT * FROM " + a_table + " WHERE xml_element_id = ? ORDER BY xml_attribute_id" );
		PreparedStatement c_st = con.prepareStatement( "SELECT * FROM " + c_table + " WHERE xml_element_id = ? ORDER BY xml_cdata_id" );
		
			
		for ( i=0; true; i++ )
			{
			e_st.clearParameters();
			e_st.setInt( 1, id );
			e_st.setInt( 2, i );
			e_st.setInt( 3, i );
			results = e_st.executeQuery();
			if ( !results.next() )
				{
				results.close();
				break;
				}
				
			element_id = results.getInt( 1 );
			left = results.getInt( 3 );
			right = results.getInt( 4 );
			name = results.getString( 5 );
			results.close();
			
			
			if ( left == i )
				{
				if ( eid != null && element_id == eid.intValue() )
					{
					switched_on = true;
					selected_right = right;
					}
					
				if ( switched_on )
					{
					if ( name.equals( "xmlrepository:pcdata" ) )
						{
						c_st.clearParameters();
						c_st.setInt( 1, element_id );
						results = c_st.executeQuery();
						if ( results.next() )
							{
							// intended to trap use of older JDBC drivers that lack
							// the get CharacterStream() method.  If calling the method
							// causes a linkage error then the repository will fall
							// back to opening a binary stream and will make assumptions
							// about the character encoding.
							text=null;
							if ( use_character_stream )
								{
								try
									{
									text = results.getCharacterStream( 3 );
									}
								catch ( LinkageError lerr )
									{
									// getCharacterStream won't be called again.
									use_character_stream = false;
                                    log.warn("Database Driver (JDBC) doesn't have getCharacterStream(), using "+ db_character_encoding);
									}
								}

							if ( !use_character_stream )
								text = new InputStreamReader( results.getBinaryStream( 3 ), db_character_encoding );
							
							while ( (b = text.read()) >= 0 )
								{
								//out.print( (char)b );
								
								switch ( b )
									{
									case '<':
										out.print( "&lt;" );
										break;
									case '>':
										out.print( "&gt;" );
										break;
									case '&':
										out.print( "&amp;" );
										break;
									default:
										if ( b>127 )
											out.print( "&#" + b + ";" );
										else
											out.print( (char)b );
									}
								
								}
							}
						results.close();
						}
					else
						{
						out.print( "<" );
						out.print( name );
						
						a_st.clearParameters();
						a_st.setInt( 1, element_id );
						results = a_st.executeQuery();
						while ( results.next() )
							{
							out.print( " " );
							out.print( results.getString( 3 ) );
							out.print( "=\"" );
							out.print( results.getString( 4 ) );
							out.print( "\"" );
							}
						out.print( ">" );
						results.close();
						}
					}
				}
			else
				{
				if ( switched_on && !name.equals( "xmlrepository:pcdata" ) )
					{
					out.print( "</" );
					out.print( name );
					out.println( ">" );
					}
				if ( eid!=null && right == selected_right )
					switched_on = false;
				}
			}

		out.flush();
		
		e_st.close();
		a_st.close();
		c_st.close();
		}
		
	public void setDocumentLocator( Locator l )
		{
		loc = l;
		}
		
	public void startDocument ()
		throws SAXException
		{
		stack = new Stack();
		visitation=0;
		in_text=false;
		cdata_buffer=new StringBuffer();
		//if ( loc==null )
		//	throw new SAXException( "SAX parser doesn't report line, column numbers." );
		}

	public void endDocument ()
		throws SAXException
		{
		if ( !stack.isEmpty() )
			throw new SAXException( "Unmatched start/end element tags." );
		}

	public void startElement( String uri, String localName, String qName, Attributes attributes ) 
      throws SAXException
		{
		int i;
		
		try
			{
			if ( in_text )
				exitText();

			Integer parent_id = null;
			if ( !stack.empty() )
				parent_id = (Integer)stack.peek();

			insert_xml_element.clearParameters();
			insert_xml_element.setInt( 1, ++max_xml_element_id );
			insert_xml_element.setInt( 2, max_xml_object_id );
			insert_xml_element.setInt( 3, visitation++ );
			insert_xml_element.setInt( 4, -1 );
			insert_xml_element.setString( 5, qName );
			
			if ( parent_id == null )
				insert_xml_element.setNull( 6, Types.INTEGER );
			else
				insert_xml_element.setInt( 6, parent_id.intValue() );
				
			insert_xml_element.executeUpdate();
			insert_xml_element.clearParameters();
			
			stack.push( new Integer( max_xml_element_id ) );
			
			for ( i=0; i<attributes.getLength(); i++ )
				{
				insert_xml_attribute.clearParameters();
				insert_xml_attribute.setInt( 1, ++max_xml_attribute_id );
				insert_xml_attribute.setInt( 2, max_xml_element_id );
				insert_xml_attribute.setString( 3, attributes.getQName( i ) );
				insert_xml_attribute.setString( 4, attributes.getValue( i ) );
				insert_xml_attribute.executeUpdate();
				}
			insert_xml_attribute.clearParameters();
			}
		catch ( Exception ex )
			{
			log.error( ex.getMessage(), ex );
			throw new SAXException( ex );
			}
		
		}

	public void endElement( String uri, String localName, String qName ) 
		throws SAXException
		{

		try
			{
			if ( in_text )
				exitText();

			Integer element_id = (Integer)stack.pop();
	        
			update_xml_element.clearParameters();
			update_xml_element.setInt( 1, visitation++ );
			update_xml_element.setInt( 2, element_id.intValue() );
			update_xml_element.executeUpdate();
			update_xml_element.clearParameters();
			}
		catch ( Exception ex )
			{
                        ex.printStackTrace();
			throw new SAXException( ex );
			}
		}

	// put pretend start tag in database
	public void enterText()
		throws SQLException, IOException
		{
		Integer parent_id = null;
		if ( !stack.empty() )
			parent_id = (Integer)stack.peek();

		insert_xml_element.clearParameters();
		insert_xml_element.setInt( 1, ++max_xml_element_id );
		insert_xml_element.setInt( 2, max_xml_object_id );
		insert_xml_element.setInt( 3, visitation++ );
		insert_xml_element.setInt( 4, visitation++ );
		insert_xml_element.setString( 5, "xmlrepository:pcdata" );
		if ( parent_id == null )
			insert_xml_element.setNull( 6, Types.INTEGER );
		else
			insert_xml_element.setInt( 6, parent_id.intValue() );
		insert_xml_element.executeUpdate();
		insert_xml_element.clearParameters();
		
		in_text=true;
		}
		
	public void exitText()
		throws SQLException, IOException
		{
		insert_xml_cdata.clearParameters();
		insert_xml_cdata.setInt(		1, ++max_xml_cdata_id );
		insert_xml_cdata.setInt(		2, max_xml_element_id );
		insert_xml_cdata.setString(	3, cdata_buffer.toString() );
		insert_xml_cdata.executeUpdate();
		
		saveTokens( max_xml_element_id, max_xml_cdata_id, cdata_buffer );
		
		cdata_buffer.setLength( 0 );
		in_text=false;
		}

    private void saveToken( int xml_element_id, int xml_cdata_id, Word indexed_word )
		throws SQLException
    {

        // this word isn't in the tokens table
        // so it needs to be put there
        log.debug( "Storing new token [" + indexed_word.getSource() + "]" );
        insert_xml_token.setInt( 1, indexed_word.getID( Collator.IDENTICAL ) );
        insert_xml_token.setInt( 2, indexed_word.getID( Collator.TERTIARY ) );
        insert_xml_token.setInt( 3, indexed_word.getID( Collator.SECONDARY ) );
        insert_xml_token.setInt( 4, indexed_word.getID( Collator.PRIMARY ) );
        insert_xml_token.setString( 5, indexed_word.getSource() );
        insert_xml_token.executeUpdate();
        insert_xml_token.clearParameters();
    }

    public int strengthToWordFlag( int strength )
    {
        switch ( strength )
        {
            case Collator.IDENTICAL:
                return 0;
            case Collator.TERTIARY:
                return 1;
            case Collator.SECONDARY:
                return 2;
            case Collator.PRIMARY:
                return 3;
        }
        throw new IllegalArgumentException( "Invalid collation strength." );
    }
    
    private void saveWord( int xml_element_id, int xml_cdata_id, Word indexed_word, int pos )
		throws SQLException
    {
        int last_id=0, next_id;
        int xml_word_id;
        
        for ( int i=0; i<4; i++ )
        {
            switch ( i )
            {
                case 0:
                    next_id = indexed_word.getID( Collator.IDENTICAL );
                    break;
                case 1:
                    next_id = indexed_word.getID( Collator.TERTIARY );
                    break;
                case 2:
                    next_id = indexed_word.getID( Collator.SECONDARY );
                    break;
                case 3:
                    next_id = indexed_word.getID( Collator.PRIMARY );
                    break;
                default:
                    throw new IllegalStateException( "This exception can't happen!" );
            }
            
            // don't store two records the same - only where different
            // strengths of matching give different IDs.
            if ( next_id == last_id )
                continue;
            
            xml_word_id = ++max_xml_word_id;

            insert_xml_word.setInt( 1, xml_word_id );
            insert_xml_word.setInt( 2, xml_cdata_id );
            insert_xml_word.setInt( 3, xml_element_id );
            insert_xml_word.setInt( 4, next_id );
            insert_xml_word.setInt( 5, pos );
            insert_xml_word.setInt( 6, i );
            insert_xml_word.executeUpdate();
            insert_xml_word.clearParameters();
            log.debug( "Word token ref stored [" + next_id + "]" );
            last_id = next_id;
        }
    }
        

    private void saveTokens( int element_id, int  cdata_id, StringBuffer cdata )
		throws SQLException
    {
        int start, end, i;
        String word;
        boolean continuation;
        Word indexed_word;

        boundary.setText( cdata.toString() );

        start = boundary.first();
        for ( end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next() )
        {
            word = cdata.substring( start, end );
            if ( word.length() > 64 )
                word = word.substring( 0, 63 );
            indexed_word = word_repository.addWord( word );
            if ( indexed_word != null )
                saveToken( element_id, cdata_id, indexed_word );
            else
                indexed_word = word_repository.getWord( word );
            saveWord( element_id, cdata_id, indexed_word, start );
        }
    }
		
	public void characters(char[] ch, int start, int length )
   	throws SAXException
   	{
   	int i=start;
   	
   	//for debugging
   	log.debug( "XMLRepository.characters(char[] ch, int start, int length )" );
   	
   	try
   		{
			if ( !in_text )
   			for ( i=start; i<(start+length); i++ )
   				{
   				if ( !Character.isWhitespace( ch[i] ) )
  						{
  						enterText();
   					break;
   					}
   				}
   		
   		if ( in_text )
   			{
   			if ( (cdata_buffer.length() + length ) > (16*1024) )
   				throw new SAXException( "Unable to support more than 16k in CDATA." );
   			
   			cdata_buffer.append( ch, start, length );
   			}
   		
			}
		catch ( Exception ex )
			{
			throw new SAXException( ex );
			}
   	}
   	
        public Integer getTokenId( String source, int strength )
        {
            return new Integer( word_repository.getWordId( source, strength ) );
        }
	
 
        public synchronized void regenerateTokens( Connection con )
        throws SQLException, IOException
        {
            regenerateTokens( con, null );
        }
        
        public synchronized void regenerateTokens( Connection con, XMLRepositoryListener listener )
        throws SQLException, IOException
        {
            int i, c, xml_cdata_id, xml_element_id;
            Statement st;
            ResultSet results;
            Reader text;
            StringBuffer cdata;

            this.con = con;
		
            this.initWordCache( con );
            
            st = con.createStatement();

            results = st.executeQuery( "SELECT max(xml_cdata_id) FROM " + c_table );
            results.next();
            max_xml_cdata_id = results.getInt( 1 );
            if ( results.wasNull() ) max_xml_cdata_id =0;
            results.close();

            // If there isn't any character data then there's
            // nothing to do.
            if ( max_xml_cdata_id == 0 )
                return;

            // Must empty the two tables to avoid duplicates
            st.executeUpdate( "delete from " + w_table );
            st.executeUpdate( "delete from " + t_table );
            max_xml_word_id=0;
            max_xml_token_id=0;
            
            insert_xml_token = con.prepareStatement( "INSERT INTO " + t_table + " (xml_token_id,tertiary_id,secondary_id,primary_id,token) VALUES (?, ?, ?, ?, ?)" );
            insert_xml_word = con.prepareStatement( "INSERT INTO " + w_table + " (xml_word_id,xml_cdata_id,xml_element_id,xml_token_id,pos,flags) VALUES (?, ?, ?, ?, ?, ?)" );

            for ( i=0; i<max_xml_cdata_id; i++ )
            {
                results = st.executeQuery( 
                    "select xml_cdata_id, xml_element_id, cdata from " + 
                    c_table + " where xml_cdata_id = " + i );
                if ( !results.next() )
                {
                    results.close();
                    continue;
                }
                
                xml_cdata_id = results.getInt( 1 );
                xml_element_id = results.getInt( 2 );
                text=null;
                if ( use_character_stream )
                {
                    try
                    {
                    text = results.getCharacterStream( 3 );
                    }
                    catch ( LinkageError lerr )
                    {
                    // getCharacterStream won't be called again.
                    use_character_stream = false;
                    log.warn("Database Driver (JDBC) doesn't have getCharacterStream(), using "+ db_character_encoding);
                    }
                }

                if ( !use_character_stream )
                    text = new InputStreamReader( results.getBinaryStream( 3 ), db_character_encoding );
                
                cdata = new StringBuffer();
                while ( (c = text.read()) >=0 )
                    cdata.append( (char)c );
                text.close();
                results.close();
                
                this.saveTokens( xml_element_id, xml_cdata_id, cdata );
                
                if ( listener!=null && (i%100) == 0 )
                {
                    listener.xmlTokensRegenerated( i, max_xml_cdata_id );
                }
            }
            insert_xml_token.close();
            insert_xml_word.close();
            
            // clear word repository so it can be reloaded next time it's needed.
            word_repository = new WordRepository( Locale.ENGLISH );
        }
        
}
