package org.bodington.util;

import java.util.*;
import java.io.*;
import java.io.UnsupportedEncodingException;

public class URLEncoder
{
    // this is the same as "special chars" as defined in RFC 1738
    // but without the plus symbol which can cause problems
    // and with the space, double quotes and square brackets
    // which are pretty safe to URL encode
    private static final String safe_chars = "$-_.!*'(), \"[]";
    
    private static final int caseDiff = ('a' - 'A');
    
    static BitSet dontNeedEncoding;
    static
    {
        dontNeedEncoding = new BitSet(256);
        int i;
        for (i = 'a'; i <= 'z'; i++)
        {
            dontNeedEncoding.set(i);
        }
        for (i = 'A'; i <= 'Z'; i++)
        {
            dontNeedEncoding.set(i);
        }
        for (i = '0'; i <= '9'; i++)
        {
            dontNeedEncoding.set(i);
        }
        dontNeedEncoding.set('-');
        dontNeedEncoding.set('_');
        dontNeedEncoding.set('.');
        dontNeedEncoding.set('*');
    }
    
    // table of pools of internal encoders
    private static Hashtable pool_table = new Hashtable();
    
    private static boolean isSafeChar( char c )
    {
        if ( c>='a' && c<='z' )
            return true;
        if ( c>='A' && c<='Z' )
            return true;
        if ( c>='0' && c<='9' )
            return true;
        return safe_chars.indexOf( c ) >= 0;
    }
    
    
    private static String encodeToBodURL_2_1( String name )
    {
        // url encoding can go wrong for unicode characters with
        // bit 7 set and in iPlanet for semi-colons and pluses.
        // Tomcat 4 disallows many ASCII characters in URLs
        // Another problem is that web browsers have no way to
        // know the character encoding that was used to do the
        // url encoding which they need to do if the user wants to
        // save the file to disk.  ASCII seems to be the usual
        // assumption although UTF-8 would be more useful.
        
        // it would be nice to just use urlencoding with UTF8
        // but tomcat 4 doesn't process urls with certain url
        // encoded bytes.  Browsers don't understand UTF8 either
        // so it would cause a problem for users saving downloaded
        // files. (Browser wouldn't know what file name to suggest)
        
        // UTF-7 might be another choice except that it uses + as
        // an escape character which is the very character we need
        // to avoid!
        
        // solution to URL encoding is to divide names into
        // those that even the most unfriendly web server can
        // cope with and those that might cause problems.
        // The easy names are URL encoded with ASCII char set
        // in the conventional way.  Normal web browsers will be
        // able to work out the original file name.  For others
        // the 'awkward' characters are coded as 4 digit hex
        // escaped with an underscore.
        // The awkward URLs are prefixed with an underscore so
        // Bodington can decode them even if the web browser can't.
        
        // Safe = doesn't start with underscore and only has
        // characters in the set; a-z, A-Z, 0-9 or any of "-_.()"
        // doesn't even need URL encoding
        
        int i, j;
        char c;
        String hex;
        boolean dodgy = false;
        
        
        try
        {
            // start by looking for dodgy characters
            for ( i=0; i<name.length(); i++ )
            {
                c = name.charAt( i );
                
                // if the name starts with an underscore we have to
                // count it as dodgy so we can tell dodgy URLs from normal
                // ones when we DEcode
                if ( i==0 && c=='_' )
                {
                    dodgy = true;
                    break;
                }
                
                // if alpha numeric move on to next char
                // must have a potentially dodgy character
                if ( isSafeChar( c ) )
                    continue;
                
                dodgy = true;
                break;
            }
            
            if ( !dodgy )
                return encodeNormally( name, "US-ASCII" );
            
            // convert url encoding unfriendly UTF16 characters to an escape
            // character, 0x1b followed by four digit hexadecimal
            // then URL encode the whole thing
            
            StringBuffer utf16escaped = new StringBuffer( name.length()*2 );
            utf16escaped.append( "_" );
            boolean in_escape=false;
            for ( i=0; i<name.length(); i++ )
            {
                c = name.charAt( i );
                
                // underscore is escaped as double underscore
                if ( c == '_' )
                    utf16escaped.append( "__" );
                // safe characters other than underscore go straight through
                else if ( isSafeChar( c ) )
                {
                    utf16escaped.append( (char)c );
                }
                else
                {
                    // unsafe characters and underscore are escaped with an underscore
                    utf16escaped.append( '_' );
                    // and are output as four character hex
                    hex = Integer.toHexString( c );
                    for ( j=hex.length(); j<4; j++ )
                        utf16escaped.append( '0' );
                    utf16escaped.append( hex );
                }
            }
            
            return encodeNormally( utf16escaped.toString(), "US-ASCII" );
        }
        catch ( UnsupportedEncodingException e )
        {
            // can never happen with US_ASCII?
            return null;
        }
    }
    
    
    private static String encodeToBodURL_2_0( String name )
    {
        try
        {
            // convert url encoding unfriendly  UTF16 characters to an escape
            // character, 0x1b followed by four digit hexadecimal
            // then URL encode the whole thing
            StringBuffer utf16escaped = new StringBuffer( name.length() );
            int c, i, j;
            String hex;
            
            for ( i=0; i<name.length(); i++ )
            {
                c = name.charAt( i );
                
                // url encoding can go wrong for unicode characters with
                // bit 7 set and in iPlanet for semi-colons and pluses.
                // Also the escape code needs to be escaped.
                if ( c >= 0x80 || c == 0x3b || c == 0x2b || c == 0x1b )
                {
                    hex = Integer.toHexString( c );
                    utf16escaped.append( '\u001b' );
                    for ( j=hex.length(); j<4; j++ )
                        utf16escaped.append( '0' );
                    utf16escaped.append( hex );
                }
                else
                    utf16escaped.append( (char)c );
            }
            
            return encodeNormally( utf16escaped.toString(), "US-ASCII" );
        }
        catch ( UnsupportedEncodingException e )
        {
            // can never happen with US_ASCII?
            return null;
        }
    }
    
    /**
     * Translates a string into <code>x-www-form-urlencoded</code> format.
     *
     * @param   s   <code>String</code> to be translated.
     * @return  the translated <code>String</code>.
     */
    public static String encode( String s, String encoding )
    throws UnsupportedEncodingException
    {
        String url;
        if ( "bodington_underscore".equalsIgnoreCase( encoding ) )
            url = encodeToBodURL_2_1( s );
        else if ( "bodington_escape".equalsIgnoreCase( encoding ) )
            url = encodeToBodURL_2_0( s );
        else
            url = encodeNormally( s, encoding );
        
        //if ( url.indexOf( '+' ) < 0 )
            return url;
        
        /*
        // standard URL encoding replaces space with + but the plus can
        // cause problems with some web servers (eg iPlanet) that fail
        // to convert them back to space when they url decode
        // It should save a lot of trouble to encode space as %20 instead
        // which is perfectly legal in a URL.
        int i;
        char c;
        StringBuffer no_plus_url = new StringBuffer( url.length()*2 );
        for ( i=0; i<url.length(); i++ )
        {
            c = url.charAt( i );
            if ( c == '+' )
                no_plus_url.append( "%20" );
            else
                no_plus_url.append( c );
        }
        return no_plus_url.toString();
        */
    }
    
    
    private static String encodeNormally( String s, String enc )
    throws UnsupportedEncodingException
    {
        URLEncoder ienc = getEncoder( enc );
        String out = ienc.internalEncode( s );
        recycleEncoder( ienc );
        return out;
    }
    
    private static URLEncoder getEncoder( String enc )
    {
        synchronized ( pool_table )
        {
            Vector pool = (Vector)pool_table.get( enc );
            if ( pool == null )
            {
                pool = new Vector();
                pool_table.put( enc, pool );
            }
            
            int size = pool.size();
            if ( size > 0 )
            {
                URLEncoder ienc = (URLEncoder)pool.elementAt( size-1 );
                pool.remove( size-1 );
                return ienc;
            }
            else
            {
                return new URLEncoder( enc );
            }
        }
    }
    
    private static void recycleEncoder( URLEncoder ienc )
    {
        synchronized ( pool_table )
        {
            Vector pool = (Vector)pool_table.get( ienc.getEncoding() );
            if ( pool == null )
            {
                pool = new Vector();
                pool_table.put( ienc.getEncoding(), pool );
            }
            pool.addElement( ienc );
        }
    }
    
    
    
    public static void main( String[] params )
    {
        try
        {
            DataInputStream din = new DataInputStream( System.in );
            String test, enc1, enc2;
            String encoding = "utf-8";
            
            for ( char ch = 0; ch < 0xffff; ch++ )
            {
             if ( ch == 0xd800 )
                 ch = 0xe800;
                
                test = "abc" + ch + "def";
                enc1 = encode( test, encoding );
                enc2 = java.net.URLEncoder.encode(  test, encoding );
                if ( !enc1.equals( enc2 ) )
                {
                    System.out.println();
                    System.out.println( enc1 );
                    System.out.println( enc2 );
                }
            }
            
        }
        catch ( Exception e )
        {
            e.printStackTrace();
        }
    }
    
    
    
    
    private String encoding;
    private StringBuffer output=null;
    private SpecialByteArrayOutputStream buffer=null;
    private OutputStreamWriter writer=null;
    
    public URLEncoder( String encoding )
    {
        this.encoding = encoding;
    }
    
    private void reset()
    throws UnsupportedEncodingException
    {
        if ( output == null )
        {
            output = new StringBuffer( 10 );
            buffer = new SpecialByteArrayOutputStream( 10 );
            writer = new OutputStreamWriter( buffer, encoding );
        }
        else
        {
            output.setLength( 0 );
            buffer.reset();
        }
    }
    
    private String getEncoding()
    {
        return encoding;
    }
    
    private String internalEncode1( String s )
    throws UnsupportedEncodingException
    {
        reset();
        try
        {
            writer.write( s );
            writer.flush();
        }
        catch ( IOException ioex )
        {
            throw new UnsupportedEncodingException( "Unable to encode URL." );
        }
        
        int c;
        for ( int i=0; i< buffer.size(); i++ )
        {
            c = buffer.getByte( i );
            if ( c<=0 || !isSafeChar( (char)c ) )
            {
                output.append( "%" );
                char ch = Character.forDigit((c >> 4) & 0xF, 16);
                // converting to use uppercase letter as part of
                // the hex value if ch is a letter.
                if (Character.isLetter(ch))
                {
                    ch -= caseDiff;
                }
                output.append(ch);
                ch = Character.forDigit(c & 0xF, 16);
                if (Character.isLetter(ch))
                {
                    ch -= caseDiff;
                }
                output.append(ch);
            }
            else
            {
                output.append( (char)c );
            }
        }
        
        return output.toString();
    }
    
    
    private String internalEncode( String s )
    throws UnsupportedEncodingException
    {
        reset();
        
        boolean needToChange = false;
        try
        {
            for (int i = 0; i < s.length(); i++)
            {
                int c = (int) s.charAt(i);
                //System.out.println("Examining character: " + c);
                if ( dontNeedEncoding.get( c ) )
                {
                    //System.out.println("Storing: " + c);
                    output.append((char)c);
                }
                else
                {
                    // convert to external encoding before hex conversion
                    buffer.reset();
                    writer.write(c);
                    /*
                     * If this character represents the start of a Unicode
                     * surrogate pair, then pass in two characters. It's not
                     * clear what should be done if a bytes reserved in the
                     * surrogate pairs range occurs outside of a legal
                     * surrogate pair. For now, just treat it as if it were
                     * any other character.
                     */
                    if (c >= 0xD800 && c <= 0xDBFF)
                    {
                        /*
                          System.out.println(Integer.toHexString(c)
                          + " is high surrogate");
                         */
                        if ( (i+1) < s.length())
                        {
                            int d = (int) s.charAt(i+1);
                            /*
                              System.out.println("\tExamining "
                              + Integer.toHexString(d));
                             */
                            if (d >= 0xDC00 && d <= 0xDFFF)
                            {
                                /*
                                  System.out.println("\t"
                                  + Integer.toHexString(d)
                                  + " is low surrogate");
                                 */
                                writer.write(d);
                                i++;
                            }
                        }
                    }
                    writer.flush();
                    
                    for ( int j = 0; j < buffer.size(); j++ )
                    {
                        output.append('%');
                        char ch = Character.forDigit((buffer.getByte( j ) >> 4) & 0xF, 16);
                        // converting to use uppercase letter as part of
                        // the hex value if ch is a letter.
                        if (Character.isLetter(ch))
                        {
                            ch -= caseDiff;
                        }
                        output.append(ch);
                        ch = Character.forDigit(buffer.getByte( j ) & 0xF, 16);
                        if (Character.isLetter(ch))
                        {
                            ch -= caseDiff;
                        }
                        output.append(ch);
                    }
                    needToChange = true;
                }
            }
        }
        catch( IOException e )
        {
            throw new UnsupportedEncodingException( "Unable to encode URL." );
        }
        
        return (needToChange? output.toString() : s);
    }
    
    
    
    private class SpecialByteArrayOutputStream extends ByteArrayOutputStream
    {
        public SpecialByteArrayOutputStream( int initsize )
        {
            super( initsize );
        }
        
        public byte getByte( int n )
        {
            return buf[n];
        }
    }
    
    
    
}