diff options
Diffstat (limited to 'modules/mahonia/gb18030.go')
-rw-r--r-- | modules/mahonia/gb18030.go | 156 |
1 files changed, 0 insertions, 156 deletions
diff --git a/modules/mahonia/gb18030.go b/modules/mahonia/gb18030.go deleted file mode 100644 index d2759250..00000000 --- a/modules/mahonia/gb18030.go +++ /dev/null @@ -1,156 +0,0 @@ -package mahonia - -import ( - "sync" -) - -// Converters for GB18030 encoding. - -func init() { - RegisterCharset(&Charset{ - Name: "GB18030", - NewDecoder: func() Decoder { - gb18030Once.Do(buildGB18030Tables) - return decodeGB18030Rune - }, - NewEncoder: func() Encoder { - gb18030Once.Do(buildGB18030Tables) - return encodeGB18030Rune - }, - }) -} - -func decodeGB18030Rune(p []byte) (r rune, size int, status Status) { - if len(p) == 0 { - status = NO_ROOM - return - } - - b := p[0] - if b < 128 { - return rune(b), 1, SUCCESS - } - - if len(p) < 2 { - status = NO_ROOM - return - } - - if p[0] < 0x81 || p[0] > 0xfe { - return 0xfffd, 1, INVALID_CHAR - } - - if p[1] >= 0x40 { - // 2-byte character - c := uint16(p[0])<<8 + uint16(p[1]) - r = rune(gbkToUnicode[c]) - if r == 0 { - r = gbkToUnicodeExtra[c] - } - - if r != 0 { - return r, 2, SUCCESS - } - } else if p[1] >= 0x30 { - // 4-byte character - if len(p) < 4 { - return 0, 0, NO_ROOM - } - if p[2] < 0x81 || p[2] > 0xfe || p[3] < 0x30 || p[3] > 0x39 { - return 0xfffd, 1, INVALID_CHAR - } - - code := uint32(p[0])<<24 + uint32(p[1])<<16 + uint32(p[2])<<8 + uint32(p[3]) - lin := gb18030Linear(code) - - if lin <= maxGB18030Linear { - r = rune(gb18030LinearToUnicode[lin]) - if r != 0 { - return r, 4, SUCCESS - } - } - - for _, rng := range gb18030Ranges { - if lin >= rng.firstGB && lin <= rng.lastGB { - return rng.firstRune + rune(lin) - rune(rng.firstGB), 4, SUCCESS - } - } - } - - return 0xfffd, 1, INVALID_CHAR -} - -func encodeGB18030Rune(p []byte, r rune) (size int, status Status) { - if len(p) == 0 { - status = NO_ROOM - return - } - - if r < 128 { - p[0] = byte(r) - return 1, SUCCESS - } - - if len(p) < 2 { - status = NO_ROOM - return - } - - var c uint16 - if r < 0x10000 { - c = unicodeToGBK[r] - } else { - c = unicodeToGBKExtra[r] - } - - if c != 0 { - p[0] = byte(c >> 8) - p[1] = byte(c) - return 2, SUCCESS - } - - if len(p) < 4 { - return 0, NO_ROOM - } - - if r < 0x10000 { - f := unicodeToGB18030[r] - if f != 0 { - p[0] = byte(f >> 24) - p[1] = byte(f >> 16) - p[2] = byte(f >> 8) - p[3] = byte(f) - return 4, SUCCESS - } - } - - for _, rng := range gb18030Ranges { - if r >= rng.firstRune && r <= rng.lastRune { - lin := rng.firstGB + uint32(r) - uint32(rng.firstRune) - p[0] = byte(lin/(10*126*10)) + 0x81 - p[1] = byte(lin/(126*10)%10) + 0x30 - p[2] = byte(lin/10%126) + 0x81 - p[3] = byte(lin%10) + 0x30 - return 4, SUCCESS - } - } - - p[0] = 0x1a - return 1, INVALID_CHAR -} - -var gb18030Once sync.Once - -// Mapping from gb18039Linear values to Unicode. -var gb18030LinearToUnicode []uint16 - -var unicodeToGB18030 []uint32 - -func buildGB18030Tables() { - gb18030LinearToUnicode = make([]uint16, maxGB18030Linear+1) - unicodeToGB18030 = make([]uint32, 65536) - for _, data := range gb18030Data { - gb18030LinearToUnicode[gb18030Linear(data.gb18030)] = data.unicode - unicodeToGB18030[data.unicode] = data.gb18030 - } -} |