org.jafer.record
Class MARC8Unicode
java.lang.Object
org.jafer.record.MARC8Unicode
public class MARC8Unicode
- extends java.lang.Object
Method toUnicode converts MARC8 character encoding to Unicode (UTF8) - identifies character set and performs lookup using xml file derived from LOC character sets including multibyte characters for EACC.
Loads character sets as required. Method toMARC8 converts from Unicode to MARC8 - performs sequential lookup using character sets.
- Version:
- 1.0
- Author:
- Antony Corfield; Matthew Dovey; Colin Tatham
Method Summary |
private java.lang.StringBuffer |
appendMultiByte(int c,
java.lang.StringBuffer bufferIn,
java.lang.StringBuffer bufferOut)
|
private void |
buildLookUpList()
|
private java.lang.StringBuffer |
emptyBuffer(java.lang.StringBuffer bufferIn,
java.lang.StringBuffer bufferOut,
int offset)
|
private java.util.Hashtable |
getCharacterSet(java.lang.String key,
boolean marcKey)
|
private java.lang.Character |
getControlCharacter(int c)
|
private java.util.Hashtable |
getControlPage()
|
private java.lang.String |
getControlPageId()
|
private java.lang.String |
getCurrentG0PageId()
|
private java.lang.String |
getCurrentG1PageId()
|
private java.lang.Character |
getEACCCharacter(java.lang.String hexValue)
|
private java.util.Hashtable |
getG0Page()
|
private java.lang.String |
getG0PageId()
|
private java.util.Hashtable |
getG1Page()
|
private java.lang.String |
getG1PageId()
|
private java.util.Vector |
getLookUpList()
|
private char[] |
getMARC8Chars(char unicodeChar)
|
private java.util.Hashtable |
getPage(java.lang.String key,
boolean marcKey)
|
private java.lang.String |
getPageESCSequence()
|
private java.lang.String |
getPageId(int c)
|
private java.lang.Character |
getUnicodeCharacter(int c)
|
private boolean |
isBufferEmpty(java.lang.StringBuffer buffer)
|
private boolean |
isC0Page(int c)
|
private boolean |
isC1Page(int c)
|
private boolean |
isControlFunction(int c)
|
private boolean |
isDiacritic(int c)
|
private boolean |
isESC()
|
private boolean |
isESCChar(int c)
|
private boolean |
isG0Page(int c)
|
private boolean |
isG0PageChar(int c)
|
private boolean |
isG1Page(int c)
|
private boolean |
isG1PageChar(int c)
|
private boolean |
isMultiByte()
|
private boolean |
isMultiByteChar(int c)
|
private boolean |
isNewPage()
|
private boolean |
isPageG0()
|
private boolean |
isPageG1()
|
private static void |
loadCharacterSetsMap()
|
private void |
moveToBack(java.lang.String pageId,
java.util.Vector lookUpList)
|
private void |
moveToFront(java.lang.String pageId,
java.util.Vector lookUpList)
|
private void |
setControlPage(java.lang.String key)
|
private void |
setControlPageId(java.lang.String id)
|
private void |
setCurrentG0PageId(java.lang.String id)
|
private void |
setCurrentG1PageId(java.lang.String id)
|
private void |
setESC(boolean state)
|
private void |
setG0Page(int c)
|
private void |
setG0Page(java.lang.String key)
|
private void |
setG0PageId(java.lang.String id)
|
private void |
setG1Page(int c)
|
private void |
setG1Page(java.lang.String key)
|
private void |
setG1PageId(java.lang.String id)
|
private void |
setMultiByte(boolean state)
|
private void |
setPageG0(boolean state)
|
private void |
setPageG1(boolean state)
|
java.lang.String |
toMARC8(java.lang.String unicode)
|
private char[] |
toMultiByte(java.lang.String multiByteHexValue)
|
java.lang.String |
toUnicode(byte[] marc8)
|
java.lang.String |
toUnicode(byte[] marc8,
int offset,
int len)
|
java.lang.String |
toUnicode(java.lang.String marc8)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
UNICODE_UNKNOWN
public static final char UNICODE_UNKNOWN
MARC8_UNKNOWN
public static final char MARC8_UNKNOWN
ESC
public static final int ESC
ASCII
public static final int ASCII
MULTI_BYTE
public static final int MULTI_BYTE
G0_PAGE_CHAR1
public static final int G0_PAGE_CHAR1
G0_PAGE_CHAR2
public static final int G0_PAGE_CHAR2
G1_PAGE_CHAR1
public static final int G1_PAGE_CHAR1
G1_PAGE_CHAR2
public static final int G1_PAGE_CHAR2
C0_LIMITS_LOWER
public static final int C0_LIMITS_LOWER
C0_LIMITS_UPPER
public static final int C0_LIMITS_UPPER
C1_LIMITS_LOWER
public static final int C1_LIMITS_LOWER
C1_LIMITS_UPPER
public static final int C1_LIMITS_UPPER
G0_LIMITS_LOWER
public static final int G0_LIMITS_LOWER
G0_LIMITS_UPPER
public static final int G0_LIMITS_UPPER
G1_LIMITS_LOWER
public static final int G1_LIMITS_LOWER
G1_LIMITS_UPPER
public static final int G1_LIMITS_UPPER
DIACRITIC_BASIC_HEBREW_LOWER
public static final int DIACRITIC_BASIC_HEBREW_LOWER
DIACRITIC_BASIC_HEBREW_UPPER
public static final int DIACRITIC_BASIC_HEBREW_UPPER
DIACRITIC_BASIC_ARABIC_LOWER
public static final int DIACRITIC_BASIC_ARABIC_LOWER
DIACRITIC_BASIC_ARABIC_UPPER
public static final int DIACRITIC_BASIC_ARABIC_UPPER
DIACRITIC_EXTENDED_ARABIC_LOWER
public static final int DIACRITIC_EXTENDED_ARABIC_LOWER
DIACRITIC_EXTENDED_ARABIC_UPPER
public static final int DIACRITIC_EXTENDED_ARABIC_UPPER
DIACRITIC_BASIC_GREEK_LOWER
public static final int DIACRITIC_BASIC_GREEK_LOWER
DIACRITIC_BASIC_GREEK_UPPER
public static final int DIACRITIC_BASIC_GREEK_UPPER
DIACRITIC_ANSEL_LOWER
public static final int DIACRITIC_ANSEL_LOWER
DIACRITIC_ANSEL_UPPER
public static final int DIACRITIC_ANSEL_UPPER
SUBSCRIPT
public static final java.lang.String SUBSCRIPT
- See Also:
- Constant Field Values
GREEK_SYMBOL
public static final java.lang.String GREEK_SYMBOL
- See Also:
- Constant Field Values
SUPERSCRIPT
public static final java.lang.String SUPERSCRIPT
- See Also:
- Constant Field Values
ASCII_TYPE_1
public static final java.lang.String ASCII_TYPE_1
- See Also:
- Constant Field Values
EACC
public static final java.lang.String EACC
- See Also:
- Constant Field Values
BASIC_HEBREW
public static final java.lang.String BASIC_HEBREW
- See Also:
- Constant Field Values
BASIC_ARABIC
public static final java.lang.String BASIC_ARABIC
- See Also:
- Constant Field Values
EXTENDED_ARABIC
public static final java.lang.String EXTENDED_ARABIC
- See Also:
- Constant Field Values
ASCII_TYPE_2
public static final java.lang.String ASCII_TYPE_2
- See Also:
- Constant Field Values
BASIC_CYRILLIC
public static final java.lang.String BASIC_CYRILLIC
- See Also:
- Constant Field Values
EXTENDED_CYRILLIC
public static final java.lang.String EXTENDED_CYRILLIC
- See Also:
- Constant Field Values
BASIC_GREEK
public static final java.lang.String BASIC_GREEK
- See Also:
- Constant Field Values
ANSEL
public static final java.lang.String ANSEL
- See Also:
- Constant Field Values
CONTROL
private static final java.lang.String CONTROL
- See Also:
- Constant Field Values
esc
private boolean esc
multiByte
private boolean multiByte
pageG0
private boolean pageG0
pageG1
private boolean pageG1
characterSetsMap
private static java.util.Hashtable characterSetsMap
marcKeyCharacterSets
private static java.util.Hashtable marcKeyCharacterSets
unicodeKeyCharacterSets
private static java.util.Hashtable unicodeKeyCharacterSets
logger
private static java.util.logging.Logger logger
marc8Unknown
private char marc8Unknown
unicodeUnknown
private char unicodeUnknown
G0PageId
private java.lang.String G0PageId
G1PageId
private java.lang.String G1PageId
currentG0PageId
private java.lang.String currentG0PageId
currentG1PageId
private java.lang.String currentG1PageId
controlPageId
private java.lang.String controlPageId
G0Page
private java.util.Hashtable G0Page
G1Page
private java.util.Hashtable G1Page
controlPage
private java.util.Hashtable controlPage
lookUpList
private java.util.Vector lookUpList
MARC8Unicode
public MARC8Unicode()
MARC8Unicode
public MARC8Unicode(char marc8Unknown,
char unicodeUnknown)
toMARC8
public java.lang.String toMARC8(java.lang.String unicode)
throws JaferException
- Throws:
JaferException
toUnicode
public java.lang.String toUnicode(java.lang.String marc8)
throws JaferException
- Throws:
JaferException
toUnicode
public java.lang.String toUnicode(byte[] marc8)
throws JaferException
- Throws:
JaferException
toUnicode
public java.lang.String toUnicode(byte[] marc8,
int offset,
int len)
throws JaferException
- Throws:
JaferException
loadCharacterSetsMap
private static void loadCharacterSetsMap()
throws JaferException
- Throws:
JaferException
getCharacterSet
private java.util.Hashtable getCharacterSet(java.lang.String key,
boolean marcKey)
throws JaferException
- Throws:
JaferException
setControlPage
private void setControlPage(java.lang.String key)
throws JaferException
- Throws:
JaferException
setG0Page
private void setG0Page(java.lang.String key)
throws JaferException
- Throws:
JaferException
setG1Page
private void setG1Page(java.lang.String key)
throws JaferException
- Throws:
JaferException
setG0Page
private void setG0Page(int c)
throws JaferException
- Throws:
JaferException
setG1Page
private void setG1Page(int c)
throws JaferException
- Throws:
JaferException
getPage
private java.util.Hashtable getPage(java.lang.String key,
boolean marcKey)
throws JaferException
- Throws:
JaferException
getControlPage
private java.util.Hashtable getControlPage()
getG0Page
private java.util.Hashtable getG0Page()
getG1Page
private java.util.Hashtable getG1Page()
setControlPageId
private void setControlPageId(java.lang.String id)
setG0PageId
private void setG0PageId(java.lang.String id)
setCurrentG0PageId
private void setCurrentG0PageId(java.lang.String id)
setG1PageId
private void setG1PageId(java.lang.String id)
setCurrentG1PageId
private void setCurrentG1PageId(java.lang.String id)
getPageESCSequence
private java.lang.String getPageESCSequence()
getControlPageId
private java.lang.String getControlPageId()
getG0PageId
private java.lang.String getG0PageId()
getCurrentG0PageId
private java.lang.String getCurrentG0PageId()
getG1PageId
private java.lang.String getG1PageId()
getCurrentG1PageId
private java.lang.String getCurrentG1PageId()
getPageId
private java.lang.String getPageId(int c)
throws JaferException
- Throws:
JaferException
setESC
private void setESC(boolean state)
isESC
private boolean isESC()
isESCChar
private boolean isESCChar(int c)
setPageG0
private void setPageG0(boolean state)
isPageG0
private boolean isPageG0()
isG0PageChar
private boolean isG0PageChar(int c)
setPageG1
private void setPageG1(boolean state)
isPageG1
private boolean isPageG1()
isG1PageChar
private boolean isG1PageChar(int c)
isBufferEmpty
private boolean isBufferEmpty(java.lang.StringBuffer buffer)
emptyBuffer
private java.lang.StringBuffer emptyBuffer(java.lang.StringBuffer bufferIn,
java.lang.StringBuffer bufferOut,
int offset)
isNewPage
private boolean isNewPage()
isMultiByteChar
private boolean isMultiByteChar(int c)
setMultiByte
private void setMultiByte(boolean state)
isMultiByte
private boolean isMultiByte()
isControlFunction
private boolean isControlFunction(int c)
isC0Page
private boolean isC0Page(int c)
isC1Page
private boolean isC1Page(int c)
isG0Page
private boolean isG0Page(int c)
isG1Page
private boolean isG1Page(int c)
isDiacritic
private boolean isDiacritic(int c)
throws JaferException
- Throws:
JaferException
getControlCharacter
private java.lang.Character getControlCharacter(int c)
throws JaferException
- Throws:
JaferException
toMultiByte
private char[] toMultiByte(java.lang.String multiByteHexValue)
appendMultiByte
private java.lang.StringBuffer appendMultiByte(int c,
java.lang.StringBuffer bufferIn,
java.lang.StringBuffer bufferOut)
throws JaferException
- Throws:
JaferException
getEACCCharacter
private java.lang.Character getEACCCharacter(java.lang.String hexValue)
throws JaferException
- Throws:
JaferException
getUnicodeCharacter
private java.lang.Character getUnicodeCharacter(int c)
throws JaferException
- Throws:
JaferException
getMARC8Chars
private char[] getMARC8Chars(char unicodeChar)
throws JaferException
- Throws:
JaferException
buildLookUpList
private void buildLookUpList()
getLookUpList
private java.util.Vector getLookUpList()
moveToFront
private void moveToFront(java.lang.String pageId,
java.util.Vector lookUpList)
moveToBack
private void moveToBack(java.lang.String pageId,
java.util.Vector lookUpList)