diff options
Diffstat (limited to 'modules/mahonia/kuten.go')
-rw-r--r-- | modules/mahonia/kuten.go | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/modules/mahonia/kuten.go b/modules/mahonia/kuten.go new file mode 100644 index 00000000..24929666 --- /dev/null +++ b/modules/mahonia/kuten.go @@ -0,0 +1,88 @@ +package mahonia + +import ( + "sync" + "unicode/utf8" +) + +// A kutenTable holds the data for a double-byte character set, arranged by ku +// (区, zone) and ten (点, position). These can be converted to various actual +// encoding schemes. +type kutenTable struct { + // Data[ku][ten] is the unicode value for the character at that zone and + // position. + Data [94][94]uint16 + + // FromUnicode holds the ku and ten for each Unicode code point. + // It is not available until Reverse() has been called. + FromUnicode [][2]byte + + // once is used to synchronize the generation of FromUnicode. + once sync.Once +} + +// Reverse generates FromUnicode. +func (t *kutenTable) Reverse() { + t.once.Do(func() { + t.FromUnicode = make([][2]byte, 65536) + for ku := range t.Data { + for ten, unicode := range t.Data[ku] { + t.FromUnicode[unicode] = [2]byte{byte(ku), byte(ten)} + } + } + }) +} + +// DecodeLow decodes a character from an encoding that does not have the high +// bit set. +func (t *kutenTable) DecodeLow(p []byte) (c rune, size int, status Status) { + if len(p) < 2 { + return 0, 0, NO_ROOM + } + ku := p[0] - 0x21 + ten := p[1] - 0x21 + if ku > 93 || ten > 93 { + return utf8.RuneError, 1, INVALID_CHAR + } + u := t.Data[ku][ten] + if u == 0 { + return utf8.RuneError, 1, INVALID_CHAR + } + return rune(u), 2, SUCCESS +} + +// DecodeHigh decodes a character from an encoding that has the high bit set. +func (t *kutenTable) DecodeHigh(p []byte) (c rune, size int, status Status) { + if len(p) < 2 { + return 0, 0, NO_ROOM + } + ku := p[0] - 0xa1 + ten := p[1] - 0xa1 + if ku > 93 || ten > 93 { + return utf8.RuneError, 1, INVALID_CHAR + } + u := t.Data[ku][ten] + if u == 0 { + return utf8.RuneError, 1, INVALID_CHAR + } + return rune(u), 2, SUCCESS +} + +// EncodeHigh encodes a character in an encoding that has the high bit set. +func (t *kutenTable) EncodeHigh(p []byte, c rune) (size int, status Status) { + if len(p) < 2 { + return 0, NO_ROOM + } + if c > 0xffff { + p[0] = '?' + return 1, INVALID_CHAR + } + kuten := t.FromUnicode[c] + if kuten == [2]byte{0, 0} && c != rune(t.Data[0][0]) { + p[0] = '?' + return 1, INVALID_CHAR + } + p[0] = kuten[0] + 0xa1 + p[1] = kuten[1] + 0xa1 + return 2, SUCCESS +} |