aboutsummaryrefslogtreecommitdiff
path: root/modules/mahonia/utf16.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/mahonia/utf16.go')
-rw-r--r--modules/mahonia/utf16.go170
1 files changed, 170 insertions, 0 deletions
diff --git a/modules/mahonia/utf16.go b/modules/mahonia/utf16.go
new file mode 100644
index 00000000..3ec7b770
--- /dev/null
+++ b/modules/mahonia/utf16.go
@@ -0,0 +1,170 @@
+package mahonia
+
+import (
+ "unicode/utf16"
+)
+
+func init() {
+ for i := 0; i < len(utf16Charsets); i++ {
+ RegisterCharset(&utf16Charsets[i])
+ }
+}
+
+var utf16Charsets = []Charset{
+ {
+ Name: "UTF-16",
+ NewDecoder: func() Decoder {
+ var decodeRune Decoder
+ return func(p []byte) (c rune, size int, status Status) {
+ if decodeRune == nil {
+ // haven't read the BOM yet
+ if len(p) < 2 {
+ status = NO_ROOM
+ return
+ }
+
+ switch {
+ case p[0] == 0xfe && p[1] == 0xff:
+ decodeRune = decodeUTF16beRune
+ return 0, 2, STATE_ONLY
+ case p[0] == 0xff && p[1] == 0xfe:
+ decodeRune = decodeUTF16leRune
+ return 0, 2, STATE_ONLY
+ default:
+ decodeRune = decodeUTF16beRune
+ }
+ }
+
+ return decodeRune(p)
+ }
+ },
+ NewEncoder: func() Encoder {
+ wroteBOM := false
+ return func(p []byte, c rune) (size int, status Status) {
+ if !wroteBOM {
+ if len(p) < 2 {
+ status = NO_ROOM
+ return
+ }
+
+ p[0] = 0xfe
+ p[1] = 0xff
+ wroteBOM = true
+ return 2, STATE_ONLY
+ }
+
+ return encodeUTF16beRune(p, c)
+ }
+ },
+ },
+ {
+ Name: "UTF-16BE",
+ NewDecoder: func() Decoder { return decodeUTF16beRune },
+ NewEncoder: func() Encoder { return encodeUTF16beRune },
+ },
+ {
+ Name: "UTF-16LE",
+ NewDecoder: func() Decoder { return decodeUTF16leRune },
+ NewEncoder: func() Encoder { return encodeUTF16leRune },
+ },
+}
+
+func decodeUTF16beRune(p []byte) (r rune, size int, status Status) {
+ if len(p) < 2 {
+ status = NO_ROOM
+ return
+ }
+
+ c := rune(p[0])<<8 + rune(p[1])
+
+ if utf16.IsSurrogate(c) {
+ if len(p) < 4 {
+ status = NO_ROOM
+ return
+ }
+
+ c2 := rune(p[2])<<8 + rune(p[3])
+ c = utf16.DecodeRune(c, c2)
+
+ if c == 0xfffd {
+ return c, 2, INVALID_CHAR
+ } else {
+ return c, 4, SUCCESS
+ }
+ }
+
+ return c, 2, SUCCESS
+}
+
+func encodeUTF16beRune(p []byte, c rune) (size int, status Status) {
+ if c < 0x10000 {
+ if len(p) < 2 {
+ status = NO_ROOM
+ return
+ }
+ p[0] = byte(c >> 8)
+ p[1] = byte(c)
+ return 2, SUCCESS
+ }
+
+ if len(p) < 4 {
+ status = NO_ROOM
+ return
+ }
+ s1, s2 := utf16.EncodeRune(c)
+ p[0] = byte(s1 >> 8)
+ p[1] = byte(s1)
+ p[2] = byte(s2 >> 8)
+ p[3] = byte(s2)
+ return 4, SUCCESS
+}
+
+func decodeUTF16leRune(p []byte) (r rune, size int, status Status) {
+ if len(p) < 2 {
+ status = NO_ROOM
+ return
+ }
+
+ c := rune(p[1])<<8 + rune(p[0])
+
+ if utf16.IsSurrogate(c) {
+ if len(p) < 4 {
+ status = NO_ROOM
+ return
+ }
+
+ c2 := rune(p[3])<<8 + rune(p[2])
+ c = utf16.DecodeRune(c, c2)
+
+ if c == 0xfffd {
+ return c, 2, INVALID_CHAR
+ } else {
+ return c, 4, SUCCESS
+ }
+ }
+
+ return c, 2, SUCCESS
+}
+
+func encodeUTF16leRune(p []byte, c rune) (size int, status Status) {
+ if c < 0x10000 {
+ if len(p) < 2 {
+ status = NO_ROOM
+ return
+ }
+ p[1] = byte(c >> 8)
+ p[0] = byte(c)
+ return 2, SUCCESS
+ }
+
+ if len(p) < 4 {
+ status = NO_ROOM
+ return
+ }
+ s1, s2 := utf16.EncodeRune(c)
+ p[1] = byte(s1 >> 8)
+ p[0] = byte(s1)
+ p[3] = byte(s2 >> 8)
+ p[2] = byte(s2)
+ return 4, SUCCESS
+}