diff options
Diffstat (limited to 'modules/mahonia/reader.go')
-rw-r--r-- | modules/mahonia/reader.go | 151 |
1 files changed, 151 insertions, 0 deletions
diff --git a/modules/mahonia/reader.go b/modules/mahonia/reader.go new file mode 100644 index 00000000..3514b95b --- /dev/null +++ b/modules/mahonia/reader.go @@ -0,0 +1,151 @@ +package mahonia + +// This file is based on bufio.Reader in the Go standard library, +// which has the following copyright notice: + +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +import ( + "io" + "unicode/utf8" +) + +const ( + defaultBufSize = 4096 +) + +// Reader implements character-set decoding for an io.Reader object. +type Reader struct { + buf []byte + rd io.Reader + decode Decoder + r, w int + err error +} + +// NewReader creates a new Reader that uses the receiver to decode text. +func (d Decoder) NewReader(rd io.Reader) *Reader { + b := new(Reader) + b.buf = make([]byte, defaultBufSize) + b.rd = rd + b.decode = d + return b +} + +// fill reads a new chunk into the buffer. +func (b *Reader) fill() { + // Slide existing data to beginning. + if b.r > 0 { + copy(b.buf, b.buf[b.r:b.w]) + b.w -= b.r + b.r = 0 + } + + // Read new data. + n, e := b.rd.Read(b.buf[b.w:]) + b.w += n + if e != nil { + b.err = e + } +} + +// Read reads data into p. +// It returns the number of bytes read into p. +// It calls Read at most once on the underlying Reader, +// hence n may be less than len(p). +// At EOF, the count will be zero and err will be os.EOF. +func (b *Reader) Read(p []byte) (n int, err error) { + n = len(p) + filled := false + if n == 0 { + return 0, b.err + } + if b.w == b.r { + if b.err != nil { + return 0, b.err + } + if n > len(b.buf) { + // Large read, empty buffer. + // Allocate a larger buffer for efficiency. + b.buf = make([]byte, n) + } + b.fill() + filled = true + if b.w == b.r { + return 0, b.err + } + } + + i := 0 + for i < n { + rune, size, status := b.decode(b.buf[b.r:b.w]) + + if status == STATE_ONLY { + b.r += size + continue + } + + if status == NO_ROOM { + if b.err != nil { + rune = 0xfffd + size = b.w - b.r + if size == 0 { + break + } + status = INVALID_CHAR + } else if filled { + break + } else { + b.fill() + filled = true + continue + } + } + + if i+utf8.RuneLen(rune) > n { + break + } + + b.r += size + if rune < 128 { + p[i] = byte(rune) + i++ + } else { + i += utf8.EncodeRune(p[i:], rune) + } + } + + return i, nil +} + +// ReadRune reads a single Unicode character and returns the +// rune and its size in bytes. +func (b *Reader) ReadRune() (c rune, size int, err error) { +read: + c, size, status := b.decode(b.buf[b.r:b.w]) + + if status == NO_ROOM && b.err == nil { + b.fill() + goto read + } + + if status == STATE_ONLY { + b.r += size + goto read + } + + if b.r == b.w { + return 0, 0, b.err + } + + if status == NO_ROOM { + c = 0xfffd + size = b.w - b.r + status = INVALID_CHAR + } + + b.r += size + return c, size, nil +} |