/* ======================================================================
   Parts Copyright 2006 University of Leeds, Oxford University, University of the Highlands and Islands.

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

====================================================================== */

package org.bodington.xml;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.text.BreakIterator;
import java.text.Collator;
import java.util.Locale;
import java.util.Stack;
import java.util.Vector;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.apache.log4j.Logger;
import org.bodington.server.BuildingContext;
import org.bodington.sqldatabase.SqlDatabase;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class XMLRepository extends DefaultHandler
	{
    
    private static Logger log = Logger.getLogger(XMLRepository.class);
    
	public final int XML_OBJECT_DEPOSITING		= 1;
	public final int XML_OBJECT_DEPOSITED		= 2;
	public final int XML_OBJECT_UPDATING		= 3;
	public final int XML_OBJECT_DELETED			= 4;
	
	public String o_table, e_table, a_table, c_table, t_table, w_table;
	private XMLReader xml_reader;
        
        private BreakIterator boundary;
        private WordRepository word_repository;

	// these statements are recreated every time an object is deposited
	PreparedStatement insert_xml_object, update_xml_object, insert_xml_element, 
	            insert_xml_attribute, insert_xml_cdata, update_xml_element,
	            insert_xml_token, insert_xml_word;
	            
	private int max_xml_object_id, max_xml_element_id, max_xml_attribute_id, 
	            max_xml_cdata_id, max_xml_token_id, max_xml_word_id;

	private Stack stack;
	private int visitation;
	
	private boolean in_text;
	private StringBuffer cdata_buffer;

	private boolean use_character_stream;
	private String db_character_encoding;

	private File tempdir=null;
	
    /**
     * Create a new XMLRespository object.
     * @param driver_class_name
     * @param o_table Name of the XML Objects table.
     * @param e_table Name of the XML Entities table.
     * @param a_table Name of the XML Attributes table.
     * @param c_table Name of XML CDATA table.
     * @param t_table Name of XML Tokens table.
     * @param w_table Name of XML Words table.
     * @throws SAXException Thrown if we can't setup our parser.
     */
	public XMLRepository( String driver_class_name, 
                         String o_table, 
                         String e_table, 
                         String a_table, 
                         String c_table,
                         String t_table,
                         String w_table
                         )
		throws SAXException
		{
	    this.o_table = o_table;
	    this.e_table = e_table;
	    this.a_table = a_table;
	    this.c_table = c_table;
	    this.t_table = t_table;
	    this.w_table = w_table;

	    use_character_stream = true;
	    db_character_encoding = "UTF-16LE";

	    if ( driver_class_name == null )
	        xml_reader = XMLReaderFactory.createXMLReader();
	    else
	        xml_reader = XMLReaderFactory.createXMLReader( driver_class_name );

	    xml_reader.setContentHandler( this );
	    xml_reader.setFeature( "http://xml.org/sax/features/validation", false );
	    boundary = BreakIterator.getWordInstance( java.util.Locale.ENGLISH );
	    word_repository = new WordRepository( Locale.ENGLISH );
        
		}

    /**
     * Initialize the XML repository.
     * This must be called before the repository can be used.
     * @param con
     * @throws SQLException
     */
	public synchronized void init(Connection con) throws SQLException
    {
        Statement st = con.createStatement();

        max_xml_object_id = getMaxId(st, "xml_object_id", o_table);
        max_xml_element_id = getMaxId(st, "xml_element_id", e_table);
        max_xml_attribute_id = getMaxId(st, "xml_attribute_id", a_table);
        max_xml_cdata_id = getMaxId(st, "xml_cdata_id", c_table);
        max_xml_token_id = getMaxId(st, "xml_token_id", t_table);
        max_xml_word_id = getMaxId(st, "xml_word_id", w_table);
        
     
    }
    
    private static int getMaxId(Statement st, String field, String table) throws SQLException {
        ResultSet results = st.executeQuery( "SELECT max("+field+" ) FROM " + table );
        results.next();
        int id = results.getInt( 1 );
        if ( results.wasNull() ) id = 0;
        results.close();
        log.debug("Max for: "+ table+ " is "+ id);
        return id;
    }

    /**
     * Set the directory for temporary files.
     * @param base 
     * @throws IOException
     */
    public void setTempDirectory( File base )
	    throws IOException
	    {
	    if ( base!=null )
		{
		if ( !base.exists() )
		    throw new IOException( "Temporary file directory doesn't exist." );
		if ( !base.isDirectory() )
		    throw new IOException( "Temporary file directory isn't a directory." );
		}
	    tempdir = base;
	    }
		
	void initWordCache( Connection con )
		throws SQLException
		{
                ResultSet results;
                Statement st;
                ResultSetMetaData md;
                int idi, id1, id2, id3;
                String source;
                Word word;
                
		if ( word_repository.isLoaded() )
			return;

		st = con.createStatement();
                // silly query to get table fields
		results = st.executeQuery( "SELECT * FROM " + t_table + " WHERE xml_token_id<0" );
                md = results.getMetaData();
                if ( md.getColumnCount() != 5 )
                    throw new SQLException( "Expected 5 columns in table " + t_table + ". Consult Bodington documentation." );
		results = st.executeQuery( "SELECT * FROM " + w_table + " WHERE xml_word_id<0" );
                md = results.getMetaData();
                if ( md.getColumnCount() != 6 )
                    throw new SQLException( "Expected 6 columns in table " + w_table + ". Consult Bodington documentation." );
                
		results = st.executeQuery( "SELECT xml_token_id, tertiary_id, secondary_id, primary_id, token FROM " + t_table );
		

		log.debug( "Loading cache of search tokens." );
		while ( results.next() )
			{
			idi = results.getInt( 1 );
			id3 = results.getInt( 2 );
			id2 = results.getInt( 3 );
			id1 = results.getInt( 4 );
			source = results.getString( 5 );
                        
                        word = new Word( source, id1, id2, id3, idi, null, null, null );
                        word_repository.loadWord( word );
			}
		results.close();
		st.close();
                word_repository.completeLoading();
		log.debug( "Loaded cache of search tokens." );
		}

	
	public void setCharacterStream( boolean b )
		{
		use_character_stream = b;
		}
		
	public void setDBCharacterEncoding( String s )
		{
		db_character_encoding = s;
		}

	
	public XMLQuery getQueryInstance()
		{
		return new XMLQuery( this );
		}
	
	/**
	 * Setup variables for a new object to be deposited.
	 * {@link #max_xml_object_id} gets incremented.
	 */
	private synchronized void newObjectId(Connection con, String path, String file, int reference, String title )
		throws SQLException
		{
        
        insert_xml_object = con.prepareStatement( "INSERT INTO " + o_table + " (xml_object_id,state,path,file_name,reference,title) VALUES (?, ?, ?, ?, ?, ?)" );
        update_xml_object = con.prepareStatement( "UPDATE " + o_table + " SET state = ? WHERE xml_object_id = ?" );
        insert_xml_element = con.prepareStatement( "INSERT INTO " + e_table + " (xml_element_id,xml_object_id,left_index,right_index,element_name,xml_parent_id) VALUES (?, ?, ?, ?, ?, ?)" );
        insert_xml_attribute = con.prepareStatement( "INSERT INTO " + a_table + " (xml_attribute_id,xml_element_id,name,value) VALUES (?, ?, ?, ?)" );
        insert_xml_cdata = con.prepareStatement( "INSERT INTO " + c_table + " (xml_cdata_id,xml_element_id,cdata) VALUES (?, ?, ?)" );
        update_xml_element = con.prepareStatement( "UPDATE " + e_table + " SET right_index = ? WHERE xml_element_id = ?" );
        insert_xml_token = con.prepareStatement( "INSERT INTO " + t_table + " (xml_token_id,tertiary_id,secondary_id,primary_id,token) VALUES (?, ?, ?, ?, ?)" );
        insert_xml_word = con.prepareStatement( "INSERT INTO " + w_table + " (xml_word_id,xml_cdata_id,xml_element_id,xml_token_id,pos,flags) VALUES (?, ?, ?, ?, ?, ?)" );
   
        
		insert_xml_object.clearParameters();
		insert_xml_object.setInt( 1, ++max_xml_object_id );
		insert_xml_object.setInt( 2, XML_OBJECT_DEPOSITING );
		insert_xml_object.setString( 3, path );
		insert_xml_object.setString( 4, file );
		insert_xml_object.setInt( 5, reference );
		insert_xml_object.setString( 6, title );
		insert_xml_object.executeUpdate();
		insert_xml_object.clearParameters();
		}

	private synchronized void cleanUpStatements()
		throws SQLException
		{
		insert_xml_object.close();
		update_xml_object.close();
		insert_xml_element.close();
	   insert_xml_attribute.close();
	   insert_xml_cdata.close();
	   update_xml_element.close();
	   insert_xml_token.close();
	   insert_xml_word.close();
		}

		
	public synchronized int depositXMLObject(  Connection con, File xmlfile, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		this.initWordCache( con );

		newObjectId(con, xmlfile.getPath(), xmlfile.getName(), reference, title );
		
		InputSource source = new InputSource( new FileInputStream( xmlfile ) );
		xml_reader.parse( source );
		
		update_xml_object.clearParameters();
		update_xml_object.setInt( 1, XML_OBJECT_DEPOSITED );
		update_xml_object.setInt( 2, max_xml_object_id );
		update_xml_object.executeUpdate();
		update_xml_object.clearParameters();
		
		cleanUpStatements();
		
		return max_xml_object_id;
		}

	private void depositElement( Node node )
		throws SAXException
		{
		int i;
		switch ( node.getNodeType() )
			{
			case Node.ELEMENT_NODE:
				Element element = (Element)node;
				NamedNodeMap att_map = element.getAttributes();
				AttributesImpl att_list = new AttributesImpl();
				Attr att;
				NodeList list;
				
				for ( i =0; i< att_map.getLength(); i++ )
					{
					att = (Attr)att_map.item( i );
					att_list.addAttribute( null, null, att.getName(), "", att.getValue() );
					}
				startElement( null, null, element.getTagName(), att_list );
				
				list = element.getChildNodes();
				for ( i=0; list!=null && i<list.getLength(); i++ )
					depositElement( list.item( i ) );
				
				endElement( null, null, element.getTagName() );
				
				break;

			case Node.TEXT_NODE:
			case Node.CDATA_SECTION_NODE:
				org.w3c.dom.CharacterData cdata = (org.w3c.dom.CharacterData)node;
				String data = cdata.getData();
				if ( data!= null )
					{
					data = data.trim();
					if ( data.length() > 0 )
						{
						characters( data.toCharArray(), 0, data.length() );
						}
					}
				break;
			}
		}

	public synchronized int depositXMLObject(  Connection con, Document doc, int reference, String title )
		throws SQLException, SAXException
		{
		this.initWordCache( con );
		
		newObjectId(con, "domsource", "dom", reference, title );
		
		startDocument();
		
		depositElement( doc.getDocumentElement() );
		
		endDocument();
		
		update_xml_object.clearParameters();
		update_xml_object.setInt( 1, XML_OBJECT_DEPOSITED );
		update_xml_object.setInt( 2, max_xml_object_id );
		update_xml_object.executeUpdate();
		update_xml_object.clearParameters();
		
		cleanUpStatements();

		return max_xml_object_id;
		}
		

/*
	public synchronized int depositXMLObject(  Connection con, Document doc, int reference, String title )
		throws SQLException, IOException, SAXException
		{
		File xmlfile = File.createTempFile( "metadata", ".xml", tempdir );
		FileOutputStream out = new FileOutputStream( xmlfile );
		
		//tran.transform( new DOMSource( doc ), new StreamResult( xmlfile ) );
		XmlDocument xdoc = (XmlDocument)doc;
		xdoc.write( out );
		
		out.close();
		int id = depositXMLObject( con, xmlfile, reference, title );
		xmlfile.delete();
		return id;
		}
*/	
	public Document getXMLObject( Connection con, int id )
	throws SQLException, IOException, SAXException, SAXParseException, ParserConfigurationException
	{
	    File xmlfile = File.createTempFile( "deposit", ".xml", tempdir );
	    log.debug( "Temp XML File = " + xmlfile.getAbsoluteFile() );
	    FileOutputStream out = new FileOutputStream( xmlfile );
	    outputXMLObject( con, out, id );
	    out.close();
	    log.debug( "Closed " + xmlfile.getAbsoluteFile() );

	    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	    DocumentBuilder builder = factory.newDocumentBuilder();
	    Document doc = builder.parse( xmlfile );

	    xmlfile.delete();

	    return doc;
	}
		
	/**
	 * Remove an object from the XMLRepository.
	 * This doesn't need any synchronisation as we don't cache any of this 
	 * information and there isn't any real harm with multiple copies of this
	 * running at the same time.
	 * @param con The database connection.
	 * @param id The ID of the XML object to delete.
	 * @throws SQLException If there was a problem removing this object.
	 */
	public void deleteXMLObject( Connection con, int id )
		throws SQLException
		{
		Statement st =con.createStatement();
		// mark object as deleted before anything else happens
		int updated = st.executeUpdate( "UPDATE " + o_table + " SET state = 4 WHERE state != 4 AND xml_object_id = " + id );
		if (updated != 1) {
		    // Warn about multiple deletes running at the same time.
		    // I think we should be able to return.
		    log.warn("Attempting to delete an object that is being deleted.");
		}
		st.executeUpdate( "DELETE FROM " + w_table + 
								" WHERE  xml_cdata_id in (SELECT xml_cdata_id FROM " + c_table +
								" WHERE " +
								" xml_element_id  IN (SELECT xml_element_id  FROM " + e_table + 
								" WHERE " + 
								"xml_object_id = " + id + ") )" );
		st.executeUpdate( "DELETE FROM " + c_table + 
								" WHERE xml_element_id IN (SELECT xml_element_id FROM " + e_table + 
								" WHERE xml_object_id = " + id + ")" );
		st.executeUpdate( "DELETE FROM " + a_table + 
								" WHERE xml_element_id IN (SELECT xml_element_id FROM " + e_table + 
								" WHERE xml_object_id = " + id + ")" );
		st.executeUpdate( "DELETE FROM " + e_table + " WHERE xml_object_id = " + id );
		st.executeUpdate( "DELETE FROM " + o_table + " WHERE xml_object_id = " + id );
		st.close();
		}
		
	public void outputXMLObject( Connection con, OutputStream output, int id )
		throws SQLException, IOException
		{
		outputXMLObject( con, output, id, null );
		}
		
	public void outputXMLObject( Connection con, OutputStream output, int id, int eid )
		throws SQLException, IOException
		{
		outputXMLObject( con, output, id, new Integer( eid ) );
		}
		
	private void outputXMLObject( Connection con, OutputStream output, int id, Integer eid )
		throws SQLException, IOException
		{
		int b, i, left, right, element_id;
		String name;

		boolean switched_on= (eid == null);
		int selected_right=0;
		
		ResultSet results;
		Reader text;
		PrintWriter out = new PrintWriter( new OutputStreamWriter( output, "utf-8" ) );
		out.println( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" );
		
		PreparedStatement e_st = con.prepareStatement( "SELECT * FROM " + e_table + " WHERE xml_object_id = ? AND (left_index = ? OR right_index = ?)" );
		PreparedStatement a_st = con.prepareStatement( "SELECT * FROM " + a_table + " WHERE xml_element_id = ? ORDER BY xml_attribute_id" );
		PreparedStatement c_st = con.prepareStatement( "SELECT * FROM " + c_table + " WHERE xml_element_id = ? ORDER BY xml_cdata_id" );
		
			
		for ( i=0; true; i++ )
			{
			e_st.clearParameters();
			e_st.setInt( 1, id );
			e_st.setInt( 2, i );
			e_st.setInt( 3, i );
			results = e_st.executeQuery();
			if ( !results.next() )
				{
				results.close();
				break;
				}
				
			element_id = results.getInt( 1 );
			left = results.getInt( 3 );
			right = results.getInt( 4 );
			name = results.getString( 5 );
			results.close();
			
			
			if ( left == i )
				{
				if ( eid != null && element_id == eid.intValue() )
					{
					switched_on = true;
					selected_right = right;
					}
					
				if ( switched_on )
					{
					if ( name.equals( "xmlrepository:pcdata" ) )
						{
						c_st.clearParameters();
						c_st.setInt( 1, element_id );
						results = c_st.executeQuery();
						if ( results.next() )
							{
							// intended to trap use of older JDBC drivers that lack
							// the get CharacterStream() method.  If calling the method
							// causes a linkage error then the repository will fall
							// back to opening a binary stream and will make assumptions
							// about the character encoding.
							text=null;
							if ( use_character_stream )
								{
								try
									{
									text = results.getCharacterStream( 3 );
									}
								catch ( LinkageError lerr )
									{
									// getCharacterStream won't be called again.
									use_character_stream = false;
                                    log.warn("Database Driver (JDBC) doesn't have getCharacterStream(), using "+ db_character_encoding);
									}
								}

							if ( !use_character_stream )
								text = new InputStreamReader( results.getBinaryStream( 3 ), db_character_encoding );
							
							while ( (b = text.read()) >= 0 )
								{
								//out.print( (char)b );
								
								switch ( b )
									{
									case '<':
										out.print( "&lt;" );
										break;
									case '>':
										out.print( "&gt;" );
										break;
									case '&':
										out.print( "&amp;" );
										break;
									default:
										if ( b>127 )
											out.print( "&#" + b + ";" );
										else
											out.print( (char)b );
									}
								
								}
							}
						results.close();
						}
					else
						{
						out.print( "<" );
						out.print( name );
						
						a_st.clearParameters();
						a_st.setInt( 1, element_id );
						results = a_st.executeQuery();
						while ( results.next() )
							{
							out.print( " " );
							out.print( results.getString( 3 ) );
							out.print( "=\"" );
							out.print( results.getString( 4 ) );
							out.print( "\"" );
							}
						out.print( ">" );
						results.close();
						}
					}
				}
			else
				{
				if ( switched_on && !name.equals( "xmlrepository:pcdata" ) )
					{
					out.print( "</" );
					out.print( name );
					out.println( ">" );
					}
				if ( eid!=null && right == selected_right )
					switched_on = false;
				}
			}

		out.flush();
		
		e_st.close();
		a_st.close();
		c_st.close();
		}
		
	public void startDocument ()
		throws SAXException
		{
		stack = new Stack();
		visitation=0;
		in_text=false;
		cdata_buffer=new StringBuffer();
		//if ( loc==null )
		//	throw new SAXException( "SAX parser doesn't report line, column numbers." );
		}

	public void endDocument ()
		throws SAXException
		{
		if ( !stack.isEmpty() )
			throw new SAXException( "Unmatched start/end element tags." );
		}

	public void startElement( String uri, String localName, String qName, Attributes attributes ) 
      throws SAXException
		{
		int i;
		
		try
			{
			if ( in_text )
				exitText();

			Integer parent_id = null;
			if ( !stack.empty() )
				parent_id = (Integer)stack.peek();

			insert_xml_element.clearParameters();
			insert_xml_element.setInt( 1, ++max_xml_element_id );
			insert_xml_element.setInt( 2, max_xml_object_id );
			insert_xml_element.setInt( 3, visitation++ );
			insert_xml_element.setInt( 4, -1 );
			insert_xml_element.setString( 5, qName );
			
			if ( parent_id == null )
				insert_xml_element.setNull( 6, Types.INTEGER );
			else
				insert_xml_element.setInt( 6, parent_id.intValue() );
				
			insert_xml_element.executeUpdate();
			insert_xml_element.clearParameters();
			
			stack.push( new Integer( max_xml_element_id ) );
			
			for ( i=0; i<attributes.getLength(); i++ )
				{
				insert_xml_attribute.clearParameters();
				insert_xml_attribute.setInt( 1, ++max_xml_attribute_id );
				insert_xml_attribute.setInt( 2, max_xml_element_id );
				insert_xml_attribute.setString( 3, attributes.getQName( i ) );
				insert_xml_attribute.setString( 4, attributes.getValue( i ) );
				insert_xml_attribute.executeUpdate();
				}
			insert_xml_attribute.clearParameters();
			}
		catch ( Exception ex )
			{
			log.error( ex.getMessage(), ex );
			throw new SAXException( ex );
			}
		
		}

	public void endElement( String uri, String localName, String qName ) 
		throws SAXException
		{

		try
			{
			if ( in_text )
				exitText();

			Integer element_id = (Integer)stack.pop();
	        
			update_xml_element.clearParameters();
			update_xml_element.setInt( 1, visitation++ );
			update_xml_element.setInt( 2, element_id.intValue() );
			update_xml_element.executeUpdate();
			update_xml_element.clearParameters();
			}
		catch ( Exception ex )
			{
                        ex.printStackTrace();
			throw new SAXException( ex );
			}
		}

	// put pretend start tag in database
	private void enterText()
		throws SQLException
		{
		Integer parent_id = null;
		if ( !stack.empty() )
			parent_id = (Integer)stack.peek();

		insert_xml_element.clearParameters();
		insert_xml_element.setInt( 1, ++max_xml_element_id );
		insert_xml_element.setInt( 2, max_xml_object_id );
		insert_xml_element.setInt( 3, visitation++ );
		insert_xml_element.setInt( 4, visitation++ );
		insert_xml_element.setString( 5, "xmlrepository:pcdata" );
		if ( parent_id == null )
			insert_xml_element.setNull( 6, Types.INTEGER );
		else
			insert_xml_element.setInt( 6, parent_id.intValue() );
		insert_xml_element.executeUpdate();
		insert_xml_element.clearParameters();
		
		in_text=true;
		}
		
	private void exitText()
		throws SQLException
		{
		insert_xml_cdata.clearParameters();
		insert_xml_cdata.setInt(		1, ++max_xml_cdata_id );
		insert_xml_cdata.setInt(		2, max_xml_element_id );
		insert_xml_cdata.setString(	3, cdata_buffer.toString() );
		insert_xml_cdata.executeUpdate();
		
		saveTokens( max_xml_element_id, max_xml_cdata_id, cdata_buffer );
		
		cdata_buffer.setLength( 0 );
		in_text=false;
		}

    private void saveToken( int xml_element_id, int xml_cdata_id, Word indexed_word )
		throws SQLException
    {

        // this word isn't in the tokens table
        // so it needs to be put there
        log.debug( "Storing new token [" + indexed_word.getSource() + "]" );
        insert_xml_token.setInt( 1, indexed_word.getID( Collator.IDENTICAL ) );
        insert_xml_token.setInt( 2, indexed_word.getID( Collator.TERTIARY ) );
        insert_xml_token.setInt( 3, indexed_word.getID( Collator.SECONDARY ) );
        insert_xml_token.setInt( 4, indexed_word.getID( Collator.PRIMARY ) );
        insert_xml_token.setString( 5, indexed_word.getSource() );
        insert_xml_token.executeUpdate();
        insert_xml_token.clearParameters();
    }

    public int strengthToWordFlag( int strength )
    {
        switch ( strength )
        {
            case Collator.IDENTICAL:
                return 0;
            case Collator.TERTIARY:
                return 1;
            case Collator.SECONDARY:
                return 2;
            case Collator.PRIMARY:
                return 3;
        }
        throw new IllegalArgumentException( "Invalid collation strength." );
    }
    
    private void saveWord( int xml_element_id, int xml_cdata_id, Word indexed_word, int pos )
		throws SQLException
    {
        int last_id=0, next_id;
        int xml_word_id;
        
        for ( int i=0; i<4; i++ )
        {
            switch ( i )
            {
                case 0:
                    next_id = indexed_word.getID( Collator.IDENTICAL );
                    break;
                case 1:
                    next_id = indexed_word.getID( Collator.TERTIARY );
                    break;
                case 2:
                    next_id = indexed_word.getID( Collator.SECONDARY );
                    break;
                case 3:
                    next_id = indexed_word.getID( Collator.PRIMARY );
                    break;
                default:
                    throw new IllegalStateException( "This exception can't happen!" );
            }
            
            // don't store two records the same - only where different
            // strengths of matching give different IDs.
            if ( next_id == last_id )
                continue;
            
            xml_word_id = ++max_xml_word_id;

            insert_xml_word.setInt( 1, xml_word_id );
            insert_xml_word.setInt( 2, xml_cdata_id );
            insert_xml_word.setInt( 3, xml_element_id );
            insert_xml_word.setInt( 4, next_id );
            insert_xml_word.setInt( 5, pos );
            insert_xml_word.setInt( 6, i );
            insert_xml_word.executeUpdate();
            insert_xml_word.clearParameters();
            log.debug( "Word token ref stored [" + next_id + "]" );
            last_id = next_id;
        }
    }
        

    private void saveTokens( int element_id, int  cdata_id, StringBuffer cdata )
		throws SQLException
    {
        int start, end;
        String word;
        Word indexed_word;

        boundary.setText( cdata.toString() );

        start = boundary.first();
        for ( end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next() )
        {
            word = cdata.substring( start, end );
            if ( word.length() > 64 )
                word = word.substring( 0, 63 );
            indexed_word = word_repository.addWord( word );
            if ( indexed_word != null )
                saveToken( element_id, cdata_id, indexed_word );
            else
                indexed_word = word_repository.getWord( word );
            saveWord( element_id, cdata_id, indexed_word, start );
        }
    }
		
	public void characters(char[] ch, int start, int length )
   	throws SAXException
   	{
   	int i=start;
   	
   	//for debugging
   	log.debug( "XMLRepository.characters(char[] ch, int start, int length )" );
   	
   	try
   		{
			if ( !in_text )
   			for ( i=start; i<(start+length); i++ )
   				{
   				if ( !Character.isWhitespace( ch[i] ) )
  						{
  						enterText();
   					break;
   					}
   				}
   		
   		if ( in_text )
   			{
   			if ( (cdata_buffer.length() + length ) > (16*1024) )
   				throw new SAXException( "Unable to support more than 16k in CDATA." );
   			
   			cdata_buffer.append( ch, start, length );
   			}
   		
			}
		catch ( Exception ex )
			{
			throw new SAXException( ex );
			}
   	}
    
    /**
     * Gets the Word/token IDs that match the string supplied at the given strength.
     * (Converts from int[] to Integer[] for some reason, but is quite useful in XMLQuery.)
     * @see WordRepository getWordIds().
     * @param source
     * @param strength
     * @return Integer array
     */
   	
	public Integer[] getMatchingTokenIds( String source, int strength )
	{
	    int[] wordIDs = word_repository.getMatchingWordIds( source, strength );
	    Integer[] integerArray = new Integer[wordIDs.length];
        
	    for ( int i = 0; i < wordIDs.length; i++ )
	    {
	        integerArray[i] = new Integer(wordIDs[i]);
	    }
	    return integerArray;
	}
    
    /**
     * Gets tokens from the database that match using SQL wildcard query.
     * Returns a maximum of 10 tokens.
     * (Shouldn't have to worry about case, as CollationKeys check case, but problem
     * occurs if there is no token in the database for the differently cased version)
     * @param word to use with wildcard query
     * @param ignoreCase whether SQL generated should ignore case
     * @return String array of tokens
     */
    
    String[] getMatchingTokensUsingWildcard( String word, boolean ignoreCase )
    {
        Statement st;
        ResultSet results;
        String sql, token;
        Vector tokens = new Vector();
        Connection unsyncConnection;
        
        try
        {   
            if ( ignoreCase )
            {
                sql = "SELECT token FROM "+ t_table +" WHERE lower(token) LIKE "+
                    SqlDatabase.quotedSQL(word.toLowerCase()+"%");
            }
            else
                sql = "SELECT token FROM "+ t_table +" WHERE token LIKE "+
                    SqlDatabase.quotedSQL(word+"%");
            
            unsyncConnection= BuildingContext.getContext().getConnection();
            
            st = unsyncConnection.createStatement();
            results = st.executeQuery( sql );
          
            int count = 1;
            while ( results.next() && count <= 10 )
            {
                token = results.getString( 1 );
                if ( results.wasNull() ) continue;
                tokens.add( token );
                count ++;
            }
            
            results.close();
        }
        catch ( Exception e )
        {
            log.error("Error retrieving metadata tokens from database.", e );
        }
        return (String[])tokens.toArray( new String[tokens.size()] );
    }
	
 
        public synchronized void regenerateTokens( Connection con )
        throws SQLException, IOException
        {
            regenerateTokens( con, null );
        }
        
        public synchronized void regenerateTokens( Connection con, XMLRepositoryListener listener )
        throws SQLException, IOException
        {
            int i, c, xml_cdata_id, xml_element_id;
            Statement st;
            ResultSet results;
            Reader text;
            StringBuffer cdata;
		
            this.initWordCache( con );
            
            st = con.createStatement();

            results = st.executeQuery( "SELECT max(xml_cdata_id) FROM " + c_table );
            results.next();
            max_xml_cdata_id = results.getInt( 1 );
            if ( results.wasNull() ) max_xml_cdata_id =0;
            results.close();

            // If there isn't any character data then there's
            // nothing to do.
            if ( max_xml_cdata_id == 0 )
                return;

            // Must empty the two tables to avoid duplicates
            st.executeUpdate( "delete from " + w_table );
            st.executeUpdate( "delete from " + t_table );
            max_xml_word_id=0;
            max_xml_token_id=0;
            
            insert_xml_token = con.prepareStatement( "INSERT INTO " + t_table + " (xml_token_id,tertiary_id,secondary_id,primary_id,token) VALUES (?, ?, ?, ?, ?)" );
            insert_xml_word = con.prepareStatement( "INSERT INTO " + w_table + " (xml_word_id,xml_cdata_id,xml_element_id,xml_token_id,pos,flags) VALUES (?, ?, ?, ?, ?, ?)" );

            for ( i=0; i<max_xml_cdata_id; i++ )
            {
                results = st.executeQuery( 
                    "select xml_cdata_id, xml_element_id, cdata from " + 
                    c_table + " where xml_cdata_id = " + i );
                if ( !results.next() )
                {
                    results.close();
                    continue;
                }
                
                xml_cdata_id = results.getInt( 1 );
                xml_element_id = results.getInt( 2 );
                text=null;
                if ( use_character_stream )
                {
                    try
                    {
                    text = results.getCharacterStream( 3 );
                    }
                    catch ( LinkageError lerr )
                    {
                    // getCharacterStream won't be called again.
                    use_character_stream = false;
                    log.warn("Database Driver (JDBC) doesn't have getCharacterStream(), using "+ db_character_encoding);
                    }
                }

                if ( !use_character_stream )
                    text = new InputStreamReader( results.getBinaryStream( 3 ), db_character_encoding );
                
                cdata = new StringBuffer();
                while ( (c = text.read()) >=0 )
                    cdata.append( (char)c );
                text.close();
                results.close();
                
                this.saveTokens( xml_element_id, xml_cdata_id, cdata );
                
                if ( listener!=null && (i%100) == 0 )
                {
                    listener.xmlTokensRegenerated( i, max_xml_cdata_id );
                }
            }
            insert_xml_token.close();
            insert_xml_word.close();
            
            // clear word repository so it can be reloaded next time it's needed.
            word_repository = new WordRepository( Locale.ENGLISH );
        }
        
}
