diff options
Diffstat (limited to 'vendor/github.com/gogs/chardet/unicode.go')
-rw-r--r-- | vendor/github.com/gogs/chardet/unicode.go | 103 |
1 files changed, 0 insertions, 103 deletions
diff --git a/vendor/github.com/gogs/chardet/unicode.go b/vendor/github.com/gogs/chardet/unicode.go deleted file mode 100644 index 6f9fa9e6..00000000 --- a/vendor/github.com/gogs/chardet/unicode.go +++ /dev/null @@ -1,103 +0,0 @@ -package chardet - -import ( - "bytes" -) - -var ( - utf16beBom = []byte{0xFE, 0xFF} - utf16leBom = []byte{0xFF, 0xFE} - utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF} - utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00} -) - -type recognizerUtf16be struct { -} - -func newRecognizer_utf16be() *recognizerUtf16be { - return &recognizerUtf16be{} -} - -func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: "UTF-16BE", - } - if bytes.HasPrefix(input.raw, utf16beBom) { - output.Confidence = 100 - } - return -} - -type recognizerUtf16le struct { -} - -func newRecognizer_utf16le() *recognizerUtf16le { - return &recognizerUtf16le{} -} - -func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: "UTF-16LE", - } - if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) { - output.Confidence = 100 - } - return -} - -type recognizerUtf32 struct { - name string - bom []byte - decodeChar func(input []byte) uint32 -} - -func decodeUtf32be(input []byte) uint32 { - return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3]) -} - -func decodeUtf32le(input []byte) uint32 { - return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0]) -} - -func newRecognizer_utf32be() *recognizerUtf32 { - return &recognizerUtf32{ - "UTF-32BE", - utf32beBom, - decodeUtf32be, - } -} - -func newRecognizer_utf32le() *recognizerUtf32 { - return &recognizerUtf32{ - "UTF-32LE", - utf32leBom, - decodeUtf32le, - } -} - -func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) { - output = recognizerOutput{ - Charset: r.name, - } - hasBom := bytes.HasPrefix(input.raw, r.bom) - var numValid, numInvalid uint32 - for b := input.raw; len(b) >= 4; b = b[4:] { - if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) { - numInvalid++ - } else { - numValid++ - } - } - if hasBom && numInvalid == 0 { - output.Confidence = 100 - } else if hasBom && numValid > numInvalid*10 { - output.Confidence = 80 - } else if numValid > 3 && numInvalid == 0 { - output.Confidence = 100 - } else if numValid > 0 && numInvalid == 0 { - output.Confidence = 80 - } else if numValid > numInvalid*10 { - output.Confidence = 25 - } - return -} |