diff options
Diffstat (limited to 'vendor/github.com/jaytaylor/html2text')
-rw-r--r-- | vendor/github.com/jaytaylor/html2text/LICENSE | 22 | ||||
-rw-r--r-- | vendor/github.com/jaytaylor/html2text/README.md | 112 | ||||
-rw-r--r-- | vendor/github.com/jaytaylor/html2text/html2text.go | 300 |
3 files changed, 0 insertions, 434 deletions
diff --git a/vendor/github.com/jaytaylor/html2text/LICENSE b/vendor/github.com/jaytaylor/html2text/LICENSE deleted file mode 100644 index 24dc4abe..00000000 --- a/vendor/github.com/jaytaylor/html2text/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Jay Taylor - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - diff --git a/vendor/github.com/jaytaylor/html2text/README.md b/vendor/github.com/jaytaylor/html2text/README.md deleted file mode 100644 index 6e67dbcc..00000000 --- a/vendor/github.com/jaytaylor/html2text/README.md +++ /dev/null @@ -1,112 +0,0 @@ -# html2text - -[](https://godoc.org/github.com/jaytaylor/html2text) -[](https://travis-ci.org/jaytaylor/html2text) -[](https://goreportcard.com/report/github.com/jaytaylor/html2text) - -### Converts HTML into text - - -## Introduction - -html2text is a simple golang package for rendering HTML into plaintext. - -There are still lots of improvements to be had, but FWIW this has worked fine for my [basic] HTML-2-text needs. - -It requires go 1.x or newer ;) - - -## Download the package - -```bash -go get github.com/jaytaylor/html2text -``` - -## Example usage - -```go -package main - -import ( - "fmt" - - "github.com/jaytaylor/html2text" -) - -func main() { - inputHtml := ` - <html> - <head> - <title>My Mega Service</title> - <link rel=\"stylesheet\" href=\"main.css\"> - <style type=\"text/css\">body { color: #fff; }</style> - </head> - - <body> - <div class="logo"> - <a href="http://mymegaservice.com/"><img src="/logo-image.jpg" alt="Mega Service"/></a> - </div> - - <h1>Welcome to your new account on my service!</h1> - - <p> - Here is some more information: - - <ul> - <li>Link 1: <a href="https://example.com">Example.com</a></li> - <li>Link 2: <a href="https://example2.com">Example2.com</a></li> - <li>Something else</li> - </ul> - </p> - </body> - </html> - ` - - text, err := html2text.FromString(inputHtml) - if err != nil { - panic(err) - } - fmt.Println(text) -} -``` - -Output: -``` -Mega Service ( http://mymegaservice.com/ ) - -****************************************** -Welcome to your new account on my service! -****************************************** - -Here is some more information: - -* Link 1: Example.com ( https://example.com ) -* Link 2: Example2.com ( https://example2.com ) -* Something else -``` - - -## Unit-tests - -Running the unit-tests is straightforward and standard: - -```bash -go test -``` - - -# License - -Permissive MIT license. - - -## Contact - -You are more than welcome to open issues and send pull requests if you find a bug or want a new feature. - -If you appreciate this library please feel free to drop me a line and tell me! It's always nice to hear from people who have benefitted from my work. - -Email: jay at (my github username).com - -Twitter: [@jtaylor](https://twitter.com/jtaylor) - diff --git a/vendor/github.com/jaytaylor/html2text/html2text.go b/vendor/github.com/jaytaylor/html2text/html2text.go deleted file mode 100644 index 66454cfc..00000000 --- a/vendor/github.com/jaytaylor/html2text/html2text.go +++ /dev/null @@ -1,300 +0,0 @@ -package html2text - -import ( - "bytes" - "io" - "regexp" - "strings" - "unicode" - - "golang.org/x/net/html" - "golang.org/x/net/html/atom" -) - -var ( - spacingRe = regexp.MustCompile(`[ \r\n\t]+`) - newlineRe = regexp.MustCompile(`\n\n+`) -) - -type textifyTraverseCtx struct { - Buf bytes.Buffer - - prefix string - blockquoteLevel int - lineLength int - endsWithSpace bool - endsWithNewline bool - justClosedDiv bool -} - -func (ctx *textifyTraverseCtx) traverse(node *html.Node) error { - switch node.Type { - - default: - return ctx.traverseChildren(node) - - case html.TextNode: - data := strings.Trim(spacingRe.ReplaceAllString(node.Data, " "), " ") - return ctx.emit(data) - - case html.ElementNode: - - ctx.justClosedDiv = false - switch node.DataAtom { - case atom.Br: - return ctx.emit("\n") - - case atom.H1, atom.H2, atom.H3: - subCtx := textifyTraverseCtx{} - if err := subCtx.traverseChildren(node); err != nil { - return err - } - - str := subCtx.Buf.String() - dividerLen := 0 - for _, line := range strings.Split(str, "\n") { - if lineLen := len([]rune(line)); lineLen-1 > dividerLen { - dividerLen = lineLen - 1 - } - } - divider := "" - if node.DataAtom == atom.H1 { - divider = strings.Repeat("*", dividerLen) - } else { - divider = strings.Repeat("-", dividerLen) - } - - if node.DataAtom == atom.H3 { - return ctx.emit("\n\n" + str + "\n" + divider + "\n\n") - } - return ctx.emit("\n\n" + divider + "\n" + str + "\n" + divider + "\n\n") - - case atom.Blockquote: - ctx.blockquoteLevel++ - ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) + " " - if err := ctx.emit("\n"); err != nil { - return err - } - if ctx.blockquoteLevel == 1 { - if err := ctx.emit("\n"); err != nil { - return err - } - } - if err := ctx.traverseChildren(node); err != nil { - return err - } - ctx.blockquoteLevel-- - ctx.prefix = strings.Repeat(">", ctx.blockquoteLevel) - if ctx.blockquoteLevel > 0 { - ctx.prefix += " " - } - return ctx.emit("\n\n") - - case atom.Div: - if ctx.lineLength > 0 { - if err := ctx.emit("\n"); err != nil { - return err - } - } - if err := ctx.traverseChildren(node); err != nil { - return err - } - var err error - if ctx.justClosedDiv == false { - err = ctx.emit("\n") - } - ctx.justClosedDiv = true - return err - - case atom.Li: - if err := ctx.emit("* "); err != nil { - return err - } - - if err := ctx.traverseChildren(node); err != nil { - return err - } - - return ctx.emit("\n") - - case atom.B, atom.Strong: - subCtx := textifyTraverseCtx{} - subCtx.endsWithSpace = true - if err := subCtx.traverseChildren(node); err != nil { - return err - } - str := subCtx.Buf.String() - return ctx.emit("*" + str + "*") - - case atom.A: - // If image is the only child, take its alt text as the link text - if img := node.FirstChild; img != nil && node.LastChild == img && img.DataAtom == atom.Img { - if altText := getAttrVal(img, "alt"); altText != "" { - ctx.emit(altText) - } - } else if err := ctx.traverseChildren(node); err != nil { - return err - } - - hrefLink := "" - if attrVal := getAttrVal(node, "href"); attrVal != "" { - attrVal = ctx.normalizeHrefLink(attrVal) - if attrVal != "" { - hrefLink = "( " + attrVal + " )" - } - } - - return ctx.emit(hrefLink) - - case atom.P, atom.Ul, atom.Table: - if err := ctx.emit("\n\n"); err != nil { - return err - } - - if err := ctx.traverseChildren(node); err != nil { - return err - } - - return ctx.emit("\n\n") - - case atom.Tr: - if err := ctx.traverseChildren(node); err != nil { - return err - } - - return ctx.emit("\n") - - case atom.Style, atom.Script, atom.Head: - // Ignore the subtree - return nil - - default: - return ctx.traverseChildren(node) - } - } -} - -func (ctx *textifyTraverseCtx) traverseChildren(node *html.Node) error { - for c := node.FirstChild; c != nil; c = c.NextSibling { - if err := ctx.traverse(c); err != nil { - return err - } - } - - return nil -} - -func (ctx *textifyTraverseCtx) emit(data string) error { - if len(data) == 0 { - return nil - } - lines := ctx.breakLongLines(data) - var err error - for _, line := range lines { - runes := []rune(line) - startsWithSpace := unicode.IsSpace(runes[0]) - if !startsWithSpace && !ctx.endsWithSpace { - ctx.Buf.WriteByte(' ') - ctx.lineLength++ - } - ctx.endsWithSpace = unicode.IsSpace(runes[len(runes)-1]) - for _, c := range line { - _, err = ctx.Buf.WriteString(string(c)) - if err != nil { - return err - } - ctx.lineLength++ - if c == '\n' { - ctx.lineLength = 0 - if ctx.prefix != "" { - _, err = ctx.Buf.WriteString(ctx.prefix) - if err != nil { - return err - } - } - } - } - } - return nil -} - -func (ctx *textifyTraverseCtx) breakLongLines(data string) []string { - // only break lines when we are in blockquotes - if ctx.blockquoteLevel == 0 { - return []string{data} - } - var ret []string - runes := []rune(data) - l := len(runes) - existing := ctx.lineLength - if existing >= 74 { - ret = append(ret, "\n") - existing = 0 - } - for l+existing > 74 { - i := 74 - existing - for i >= 0 && !unicode.IsSpace(runes[i]) { - i-- - } - if i == -1 { - // no spaces, so go the other way - i = 74 - existing - for i < l && !unicode.IsSpace(runes[i]) { - i++ - } - } - ret = append(ret, string(runes[:i])+"\n") - for i < l && unicode.IsSpace(runes[i]) { - i++ - } - runes = runes[i:] - l = len(runes) - existing = 0 - } - if len(runes) > 0 { - ret = append(ret, string(runes)) - } - return ret -} - -func (ctx *textifyTraverseCtx) normalizeHrefLink(link string) string { - link = strings.TrimSpace(link) - link = strings.TrimPrefix(link, "mailto:") - return link -} - -func getAttrVal(node *html.Node, attrName string) string { - for _, attr := range node.Attr { - if attr.Key == attrName { - return attr.Val - } - } - - return "" -} - -func FromReader(reader io.Reader) (string, error) { - doc, err := html.Parse(reader) - if err != nil { - return "", err - } - - ctx := textifyTraverseCtx{ - Buf: bytes.Buffer{}, - } - if err = ctx.traverse(doc); err != nil { - return "", err - } - - text := strings.TrimSpace(newlineRe.ReplaceAllString( - strings.Replace(ctx.Buf.String(), "\n ", "\n", -1), "\n\n")) - return text, nil -} - -func FromString(input string) (string, error) { - text, err := FromReader(strings.NewReader(input)) - if err != nil { - return "", err - } - return text, nil -} |