diff options
author | Unknwon <u@gogs.io> | 2018-05-27 09:07:15 +0800 |
---|---|---|
committer | Unknwon <u@gogs.io> | 2018-05-27 09:07:15 +0800 |
commit | e33d9e77f43e6829ea967e47964d13f5a8aec5cc (patch) | |
tree | ace1a09414a66fd7e293b3837865633168ba4f6e /vendor/github.com/gogs/chardet/utf8.go | |
parent | aff42082441715559bb2e62e11550af1a946a8c9 (diff) |
vendor: rename "gogits" to "gogs"
Diffstat (limited to 'vendor/github.com/gogs/chardet/utf8.go')
-rw-r--r-- | vendor/github.com/gogs/chardet/utf8.go | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/vendor/github.com/gogs/chardet/utf8.go b/vendor/github.com/gogs/chardet/utf8.go new file mode 100644 index 00000000..ae036ad9 --- /dev/null +++ b/vendor/github.com/gogs/chardet/utf8.go @@ -0,0 +1,71 @@ +package chardet + +import ( + "bytes" +) + +var utf8Bom = []byte{0xEF, 0xBB, 0xBF} + +type recognizerUtf8 struct { +} + +func newRecognizer_utf8() *recognizerUtf8 { + return &recognizerUtf8{} +} + +func (*recognizerUtf8) Match(input *recognizerInput) (output recognizerOutput) { + output = recognizerOutput{ + Charset: "UTF-8", + } + hasBom := bytes.HasPrefix(input.raw, utf8Bom) + inputLen := len(input.raw) + var numValid, numInvalid uint32 + var trailBytes uint8 + for i := 0; i < inputLen; i++ { + c := input.raw[i] + if c&0x80 == 0 { + continue + } + if c&0xE0 == 0xC0 { + trailBytes = 1 + } else if c&0xF0 == 0xE0 { + trailBytes = 2 + } else if c&0xF8 == 0xF0 { + trailBytes = 3 + } else { + numInvalid++ + if numInvalid > 5 { + break + } + trailBytes = 0 + } + + for i++; i < inputLen; i++ { + c = input.raw[i] + if c&0xC0 != 0x80 { + numInvalid++ + break + } + if trailBytes--; trailBytes == 0 { + numValid++ + break + } + } + } + + if hasBom && numInvalid == 0 { + output.Confidence = 100 + } else if hasBom && numValid > numInvalid*10 { + output.Confidence = 80 + } else if numValid > 3 && numInvalid == 0 { + output.Confidence = 100 + } else if numValid > 0 && numInvalid == 0 { + output.Confidence = 80 + } else if numValid == 0 && numInvalid == 0 { + // Plain ASCII + output.Confidence = 10 + } else if numValid > numInvalid*10 { + output.Confidence = 25 + } + return +} |