org.jafer.record
Class MARC8Unicode

java.lang.Object
  extended by org.jafer.record.MARC8Unicode

public class MARC8Unicode
extends java.lang.Object

Method toUnicode converts MARC8 character encoding to Unicode (UTF8) - identifies character set and performs lookup using xml file derived from LOC character sets including multibyte characters for EACC. Loads character sets as required. Method toMARC8 converts from Unicode to MARC8 - performs sequential lookup using character sets.

Version:
1.0
Author:
Antony Corfield; Matthew Dovey; Colin Tatham

Field Summary
static java.lang.String ANSEL
           
static int ASCII
           
static java.lang.String ASCII_TYPE_1
           
static java.lang.String ASCII_TYPE_2
           
static java.lang.String BASIC_ARABIC
           
static java.lang.String BASIC_CYRILLIC
           
static java.lang.String BASIC_GREEK
           
static java.lang.String BASIC_HEBREW
           
static int C0_LIMITS_LOWER
           
static int C0_LIMITS_UPPER
           
static int C1_LIMITS_LOWER
           
static int C1_LIMITS_UPPER
           
private static java.util.Hashtable characterSetsMap
           
private static java.lang.String CONTROL
           
private  java.util.Hashtable controlPage
           
private  java.lang.String controlPageId
           
private  java.lang.String currentG0PageId
           
private  java.lang.String currentG1PageId
           
static int DIACRITIC_ANSEL_LOWER
           
static int DIACRITIC_ANSEL_UPPER
           
static int DIACRITIC_BASIC_ARABIC_LOWER
           
static int DIACRITIC_BASIC_ARABIC_UPPER
           
static int DIACRITIC_BASIC_GREEK_LOWER
           
static int DIACRITIC_BASIC_GREEK_UPPER
           
static int DIACRITIC_BASIC_HEBREW_LOWER
           
static int DIACRITIC_BASIC_HEBREW_UPPER
           
static int DIACRITIC_EXTENDED_ARABIC_LOWER
           
static int DIACRITIC_EXTENDED_ARABIC_UPPER
           
static java.lang.String EACC
           
private  boolean esc
           
static int ESC
           
static java.lang.String EXTENDED_ARABIC
           
static java.lang.String EXTENDED_CYRILLIC
           
static int G0_LIMITS_LOWER
           
static int G0_LIMITS_UPPER
           
static int G0_PAGE_CHAR1
           
static int G0_PAGE_CHAR2
           
private  java.util.Hashtable G0Page
           
private  java.lang.String G0PageId
           
static int G1_LIMITS_LOWER
           
static int G1_LIMITS_UPPER
           
static int G1_PAGE_CHAR1
           
static int G1_PAGE_CHAR2
           
private  java.util.Hashtable G1Page
           
private  java.lang.String G1PageId
           
static java.lang.String GREEK_SYMBOL
           
private static java.util.logging.Logger logger
           
private  java.util.Vector lookUpList
           
static char MARC8_UNKNOWN
           
private  char marc8Unknown
           
private static java.util.Hashtable marcKeyCharacterSets
           
static int MULTI_BYTE
           
private  boolean multiByte
           
private  boolean pageG0
           
private  boolean pageG1
           
static java.lang.String SUBSCRIPT
           
static java.lang.String SUPERSCRIPT
           
static char UNICODE_UNKNOWN
           
private static java.util.Hashtable unicodeKeyCharacterSets
           
private  char unicodeUnknown
           
 
Constructor Summary
MARC8Unicode()
           
MARC8Unicode(char marc8Unknown, char unicodeUnknown)
           
 
Method Summary
private  java.lang.StringBuffer appendMultiByte(int c, java.lang.StringBuffer bufferIn, java.lang.StringBuffer bufferOut)
           
private  void buildLookUpList()
           
private  java.lang.StringBuffer emptyBuffer(java.lang.StringBuffer bufferIn, java.lang.StringBuffer bufferOut, int offset)
           
private  java.util.Hashtable getCharacterSet(java.lang.String key, boolean marcKey)
           
private  java.lang.Character getControlCharacter(int c)
           
private  java.util.Hashtable getControlPage()
           
private  java.lang.String getControlPageId()
           
private  java.lang.String getCurrentG0PageId()
           
private  java.lang.String getCurrentG1PageId()
           
private  java.lang.Character getEACCCharacter(java.lang.String hexValue)
           
private  java.util.Hashtable getG0Page()
           
private  java.lang.String getG0PageId()
           
private  java.util.Hashtable getG1Page()
           
private  java.lang.String getG1PageId()
           
private  java.util.Vector getLookUpList()
           
private  char[] getMARC8Chars(char unicodeChar)
           
private  java.util.Hashtable getPage(java.lang.String key, boolean marcKey)
           
private  java.lang.String getPageESCSequence()
           
private  java.lang.String getPageId(int c)
           
private  java.lang.Character getUnicodeCharacter(int c)
           
private  boolean isBufferEmpty(java.lang.StringBuffer buffer)
           
private  boolean isC0Page(int c)
           
private  boolean isC1Page(int c)
           
private  boolean isControlFunction(int c)
           
private  boolean isDiacritic(int c)
           
private  boolean isESC()
           
private  boolean isESCChar(int c)
           
private  boolean isG0Page(int c)
           
private  boolean isG0PageChar(int c)
           
private  boolean isG1Page(int c)
           
private  boolean isG1PageChar(int c)
           
private  boolean isMultiByte()
           
private  boolean isMultiByteChar(int c)
           
private  boolean isNewPage()
           
private  boolean isPageG0()
           
private  boolean isPageG1()
           
private static void loadCharacterSetsMap()
           
private  void moveToBack(java.lang.String pageId, java.util.Vector lookUpList)
           
private  void moveToFront(java.lang.String pageId, java.util.Vector lookUpList)
           
private  void setControlPage(java.lang.String key)
           
private  void setControlPageId(java.lang.String id)
           
private  void setCurrentG0PageId(java.lang.String id)
           
private  void setCurrentG1PageId(java.lang.String id)
           
private  void setESC(boolean state)
           
private  void setG0Page(int c)
           
private  void setG0Page(java.lang.String key)
           
private  void setG0PageId(java.lang.String id)
           
private  void setG1Page(int c)
           
private  void setG1Page(java.lang.String key)
           
private  void setG1PageId(java.lang.String id)
           
private  void setMultiByte(boolean state)
           
private  void setPageG0(boolean state)
           
private  void setPageG1(boolean state)
           
 java.lang.String toMARC8(java.lang.String unicode)
           
private  char[] toMultiByte(java.lang.String multiByteHexValue)
           
 java.lang.String toUnicode(byte[] marc8)
           
 java.lang.String toUnicode(byte[] marc8, int offset, int len)
           
 java.lang.String toUnicode(java.lang.String marc8)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

UNICODE_UNKNOWN

public static final char UNICODE_UNKNOWN

MARC8_UNKNOWN

public static final char MARC8_UNKNOWN

ESC

public static final int ESC

ASCII

public static final int ASCII

MULTI_BYTE

public static final int MULTI_BYTE

G0_PAGE_CHAR1

public static final int G0_PAGE_CHAR1

G0_PAGE_CHAR2

public static final int G0_PAGE_CHAR2

G1_PAGE_CHAR1

public static final int G1_PAGE_CHAR1

G1_PAGE_CHAR2

public static final int G1_PAGE_CHAR2

C0_LIMITS_LOWER

public static final int C0_LIMITS_LOWER

C0_LIMITS_UPPER

public static final int C0_LIMITS_UPPER

C1_LIMITS_LOWER

public static final int C1_LIMITS_LOWER

C1_LIMITS_UPPER

public static final int C1_LIMITS_UPPER

G0_LIMITS_LOWER

public static final int G0_LIMITS_LOWER

G0_LIMITS_UPPER

public static final int G0_LIMITS_UPPER

G1_LIMITS_LOWER

public static final int G1_LIMITS_LOWER

G1_LIMITS_UPPER

public static final int G1_LIMITS_UPPER

DIACRITIC_BASIC_HEBREW_LOWER

public static final int DIACRITIC_BASIC_HEBREW_LOWER

DIACRITIC_BASIC_HEBREW_UPPER

public static final int DIACRITIC_BASIC_HEBREW_UPPER

DIACRITIC_BASIC_ARABIC_LOWER

public static final int DIACRITIC_BASIC_ARABIC_LOWER

DIACRITIC_BASIC_ARABIC_UPPER

public static final int DIACRITIC_BASIC_ARABIC_UPPER

DIACRITIC_EXTENDED_ARABIC_LOWER

public static final int DIACRITIC_EXTENDED_ARABIC_LOWER

DIACRITIC_EXTENDED_ARABIC_UPPER

public static final int DIACRITIC_EXTENDED_ARABIC_UPPER

DIACRITIC_BASIC_GREEK_LOWER

public static final int DIACRITIC_BASIC_GREEK_LOWER

DIACRITIC_BASIC_GREEK_UPPER

public static final int DIACRITIC_BASIC_GREEK_UPPER

DIACRITIC_ANSEL_LOWER

public static final int DIACRITIC_ANSEL_LOWER

DIACRITIC_ANSEL_UPPER

public static final int DIACRITIC_ANSEL_UPPER

SUBSCRIPT

public static final java.lang.String SUBSCRIPT
See Also:
Constant Field Values

GREEK_SYMBOL

public static final java.lang.String GREEK_SYMBOL
See Also:
Constant Field Values

SUPERSCRIPT

public static final java.lang.String SUPERSCRIPT
See Also:
Constant Field Values

ASCII_TYPE_1

public static final java.lang.String ASCII_TYPE_1
See Also:
Constant Field Values

EACC

public static final java.lang.String EACC
See Also:
Constant Field Values

BASIC_HEBREW

public static final java.lang.String BASIC_HEBREW
See Also:
Constant Field Values

BASIC_ARABIC

public static final java.lang.String BASIC_ARABIC
See Also:
Constant Field Values

EXTENDED_ARABIC

public static final java.lang.String EXTENDED_ARABIC
See Also:
Constant Field Values

ASCII_TYPE_2

public static final java.lang.String ASCII_TYPE_2
See Also:
Constant Field Values

BASIC_CYRILLIC

public static final java.lang.String BASIC_CYRILLIC
See Also:
Constant Field Values

EXTENDED_CYRILLIC

public static final java.lang.String EXTENDED_CYRILLIC
See Also:
Constant Field Values

BASIC_GREEK

public static final java.lang.String BASIC_GREEK
See Also:
Constant Field Values

ANSEL

public static final java.lang.String ANSEL
See Also:
Constant Field Values

CONTROL

private static final java.lang.String CONTROL
See Also:
Constant Field Values

esc

private boolean esc

multiByte

private boolean multiByte

pageG0

private boolean pageG0

pageG1

private boolean pageG1

characterSetsMap

private static java.util.Hashtable characterSetsMap

marcKeyCharacterSets

private static java.util.Hashtable marcKeyCharacterSets

unicodeKeyCharacterSets

private static java.util.Hashtable unicodeKeyCharacterSets

logger

private static java.util.logging.Logger logger

marc8Unknown

private char marc8Unknown

unicodeUnknown

private char unicodeUnknown

G0PageId

private java.lang.String G0PageId

G1PageId

private java.lang.String G1PageId

currentG0PageId

private java.lang.String currentG0PageId

currentG1PageId

private java.lang.String currentG1PageId

controlPageId

private java.lang.String controlPageId

G0Page

private java.util.Hashtable G0Page

G1Page

private java.util.Hashtable G1Page

controlPage

private java.util.Hashtable controlPage

lookUpList

private java.util.Vector lookUpList
Constructor Detail

MARC8Unicode

public MARC8Unicode()

MARC8Unicode

public MARC8Unicode(char marc8Unknown,
                    char unicodeUnknown)
Method Detail

toMARC8

public java.lang.String toMARC8(java.lang.String unicode)
                         throws JaferException
Throws:
JaferException

toUnicode

public java.lang.String toUnicode(java.lang.String marc8)
                           throws JaferException
Throws:
JaferException

toUnicode

public java.lang.String toUnicode(byte[] marc8)
                           throws JaferException
Throws:
JaferException

toUnicode

public java.lang.String toUnicode(byte[] marc8,
                                  int offset,
                                  int len)
                           throws JaferException
Throws:
JaferException

loadCharacterSetsMap

private static void loadCharacterSetsMap()
                                  throws JaferException
Throws:
JaferException

getCharacterSet

private java.util.Hashtable getCharacterSet(java.lang.String key,
                                            boolean marcKey)
                                     throws JaferException
Throws:
JaferException

setControlPage

private void setControlPage(java.lang.String key)
                     throws JaferException
Throws:
JaferException

setG0Page

private void setG0Page(java.lang.String key)
                throws JaferException
Throws:
JaferException

setG1Page

private void setG1Page(java.lang.String key)
                throws JaferException
Throws:
JaferException

setG0Page

private void setG0Page(int c)
                throws JaferException
Throws:
JaferException

setG1Page

private void setG1Page(int c)
                throws JaferException
Throws:
JaferException

getPage

private java.util.Hashtable getPage(java.lang.String key,
                                    boolean marcKey)
                             throws JaferException
Throws:
JaferException

getControlPage

private java.util.Hashtable getControlPage()

getG0Page

private java.util.Hashtable getG0Page()

getG1Page

private java.util.Hashtable getG1Page()

setControlPageId

private void setControlPageId(java.lang.String id)

setG0PageId

private void setG0PageId(java.lang.String id)

setCurrentG0PageId

private void setCurrentG0PageId(java.lang.String id)

setG1PageId

private void setG1PageId(java.lang.String id)

setCurrentG1PageId

private void setCurrentG1PageId(java.lang.String id)

getPageESCSequence

private java.lang.String getPageESCSequence()

getControlPageId

private java.lang.String getControlPageId()

getG0PageId

private java.lang.String getG0PageId()

getCurrentG0PageId

private java.lang.String getCurrentG0PageId()

getG1PageId

private java.lang.String getG1PageId()

getCurrentG1PageId

private java.lang.String getCurrentG1PageId()

getPageId

private java.lang.String getPageId(int c)
                            throws JaferException
Throws:
JaferException

setESC

private void setESC(boolean state)

isESC

private boolean isESC()

isESCChar

private boolean isESCChar(int c)

setPageG0

private void setPageG0(boolean state)

isPageG0

private boolean isPageG0()

isG0PageChar

private boolean isG0PageChar(int c)

setPageG1

private void setPageG1(boolean state)

isPageG1

private boolean isPageG1()

isG1PageChar

private boolean isG1PageChar(int c)

isBufferEmpty

private boolean isBufferEmpty(java.lang.StringBuffer buffer)

emptyBuffer

private java.lang.StringBuffer emptyBuffer(java.lang.StringBuffer bufferIn,
                                           java.lang.StringBuffer bufferOut,
                                           int offset)

isNewPage

private boolean isNewPage()

isMultiByteChar

private boolean isMultiByteChar(int c)

setMultiByte

private void setMultiByte(boolean state)

isMultiByte

private boolean isMultiByte()

isControlFunction

private boolean isControlFunction(int c)

isC0Page

private boolean isC0Page(int c)

isC1Page

private boolean isC1Page(int c)

isG0Page

private boolean isG0Page(int c)

isG1Page

private boolean isG1Page(int c)

isDiacritic

private boolean isDiacritic(int c)
                     throws JaferException
Throws:
JaferException

getControlCharacter

private java.lang.Character getControlCharacter(int c)
                                         throws JaferException
Throws:
JaferException

toMultiByte

private char[] toMultiByte(java.lang.String multiByteHexValue)

appendMultiByte

private java.lang.StringBuffer appendMultiByte(int c,
                                               java.lang.StringBuffer bufferIn,
                                               java.lang.StringBuffer bufferOut)
                                        throws JaferException
Throws:
JaferException

getEACCCharacter

private java.lang.Character getEACCCharacter(java.lang.String hexValue)
                                      throws JaferException
Throws:
JaferException

getUnicodeCharacter

private java.lang.Character getUnicodeCharacter(int c)
                                         throws JaferException
Throws:
JaferException

getMARC8Chars

private char[] getMARC8Chars(char unicodeChar)
                      throws JaferException
Throws:
JaferException

buildLookUpList

private void buildLookUpList()

getLookUpList

private java.util.Vector getLookUpList()

moveToFront

private void moveToFront(java.lang.String pageId,
                         java.util.Vector lookUpList)

moveToBack

private void moveToBack(java.lang.String pageId,
                        java.util.Vector lookUpList)