diff options
author | Unknwon <u@gogs.io> | 2017-04-04 19:29:59 -0400 |
---|---|---|
committer | Unknwon <u@gogs.io> | 2017-04-04 19:29:59 -0400 |
commit | d05395fe906dad7741201faa69a54fef538deda9 (patch) | |
tree | 11dae6c5c9b40b8ce85c7294bd0309c03cb1199e /pkg/markup | |
parent | 37b10666dea98cebf75d0c6f11ee87211ef94703 (diff) |
Refactoring: rename modules -> pkg
Reasons to change:
1. Shorter than 'modules'
2. More generally used by other Go projects
3. Corresponds to the naming of '$GOPATH/pkg' directory
Diffstat (limited to 'pkg/markup')
-rw-r--r-- | pkg/markup/markdown.go | 167 | ||||
-rw-r--r-- | pkg/markup/markdown_test.go | 111 | ||||
-rw-r--r-- | pkg/markup/markup.go | 335 | ||||
-rw-r--r-- | pkg/markup/markup_test.go | 284 | ||||
-rw-r--r-- | pkg/markup/sanitizer.go | 51 | ||||
-rw-r--r-- | pkg/markup/sanitizer_test.go | 38 |
6 files changed, 986 insertions, 0 deletions
diff --git a/pkg/markup/markdown.go b/pkg/markup/markdown.go new file mode 100644 index 00000000..3414a58c --- /dev/null +++ b/pkg/markup/markdown.go @@ -0,0 +1,167 @@ +// Copyright 2014 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "bytes" + "fmt" + "path" + "path/filepath" + "regexp" + "strings" + + "github.com/russross/blackfriday" + + "github.com/gogits/gogs/pkg/base" + "github.com/gogits/gogs/pkg/setting" +) + +// IsMarkdownFile reports whether name looks like a Markdown file based on its extension. +func IsMarkdownFile(name string) bool { + extension := strings.ToLower(filepath.Ext(name)) + for _, ext := range setting.Markdown.FileExtensions { + if strings.ToLower(ext) == extension { + return true + } + } + return false +} + +// MarkdownRenderer is a extended version of underlying Markdown render object. +type MarkdownRenderer struct { + blackfriday.Renderer + urlPrefix string +} + +var validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://|^mailto:`) + +// isLink reports whether link fits valid format. +func isLink(link []byte) bool { + return validLinksPattern.Match(link) +} + +// Link defines how formal links should be processed to produce corresponding HTML elements. +func (r *MarkdownRenderer) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) { + if len(link) > 0 && !isLink(link) { + if link[0] != '#' { + link = []byte(path.Join(r.urlPrefix, string(link))) + } + } + + r.Renderer.Link(out, link, title, content) +} + +// AutoLink defines how auto-detected links should be processed to produce corresponding HTML elements. +// Reference for kind: https://github.com/russross/blackfriday/blob/master/markdown.go#L69-L76 +func (r *MarkdownRenderer) AutoLink(out *bytes.Buffer, link []byte, kind int) { + if kind != blackfriday.LINK_TYPE_NORMAL { + r.Renderer.AutoLink(out, link, kind) + return + } + + // Since this method could only possibly serve one link at a time, + // we do not need to find all. + if bytes.HasPrefix(link, []byte(setting.AppUrl)) { + m := CommitPattern.Find(link) + if m != nil { + m = bytes.TrimSpace(m) + i := strings.Index(string(m), "commit/") + j := strings.Index(string(m), "#") + if j == -1 { + j = len(m) + } + out.WriteString(fmt.Sprintf(` <code><a href="%s">%s</a></code>`, m, base.ShortSha(string(m[i+7:j])))) + return + } + + m = IssueFullPattern.Find(link) + if m != nil { + m = bytes.TrimSpace(m) + i := strings.Index(string(m), "issues/") + j := strings.Index(string(m), "#") + if j == -1 { + j = len(m) + } + + index := string(m[i+7 : j]) + fullRepoURL := setting.AppUrl + strings.TrimPrefix(r.urlPrefix, "/") + var link string + if strings.HasPrefix(string(m), fullRepoURL) { + // Use a short issue reference if the URL refers to this repository + link = fmt.Sprintf(`<a href="%s">#%s</a>`, m, index) + } else { + // Use a cross-repository issue reference if the URL refers to a different repository + repo := string(m[len(setting.AppUrl) : i-1]) + link = fmt.Sprintf(`<a href="%s">%s#%s</a>`, m, repo, index) + } + out.WriteString(link) + return + } + } + + r.Renderer.AutoLink(out, link, kind) +} + +// ListItem defines how list items should be processed to produce corresponding HTML elements. +func (options *MarkdownRenderer) ListItem(out *bytes.Buffer, text []byte, flags int) { + // Detect procedures to draw checkboxes. + switch { + case bytes.HasPrefix(text, []byte("[ ] ")): + text = append([]byte(`<input type="checkbox" disabled="" />`), text[3:]...) + case bytes.HasPrefix(text, []byte("[x] ")): + text = append([]byte(`<input type="checkbox" disabled="" checked="" />`), text[3:]...) + } + options.Renderer.ListItem(out, text, flags) +} + +// RawMarkdown renders Markdown to HTML without handling special links. +func RawMarkdown(body []byte, urlPrefix string) []byte { + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_SKIP_STYLE + htmlFlags |= blackfriday.HTML_OMIT_CONTENTS + + if setting.Smartypants.Enabled { + htmlFlags |= blackfriday.HTML_USE_SMARTYPANTS + if setting.Smartypants.Fractions { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_FRACTIONS + } + if setting.Smartypants.Dashes { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_DASHES + } + if setting.Smartypants.LatexDashes { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_LATEX_DASHES + } + if setting.Smartypants.AngledQuotes { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_ANGLED_QUOTES + } + } + + renderer := &MarkdownRenderer{ + Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""), + urlPrefix: urlPrefix, + } + + // set up the parser + extensions := 0 + extensions |= blackfriday.EXTENSION_NO_INTRA_EMPHASIS + extensions |= blackfriday.EXTENSION_TABLES + extensions |= blackfriday.EXTENSION_FENCED_CODE + extensions |= blackfriday.EXTENSION_AUTOLINK + extensions |= blackfriday.EXTENSION_STRIKETHROUGH + extensions |= blackfriday.EXTENSION_SPACE_HEADERS + extensions |= blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK + + if setting.Markdown.EnableHardLineBreak { + extensions |= blackfriday.EXTENSION_HARD_LINE_BREAK + } + + body = blackfriday.Markdown(body, renderer, extensions) + return body +} + +// Markdown takes a string or []byte and renders to HTML in Markdown syntax with special links. +func Markdown(input interface{}, urlPrefix string, metas map[string]string) []byte { + return Render(MARKDOWN, input, urlPrefix, metas) +} diff --git a/pkg/markup/markdown_test.go b/pkg/markup/markdown_test.go new file mode 100644 index 00000000..a4bf074f --- /dev/null +++ b/pkg/markup/markdown_test.go @@ -0,0 +1,111 @@ +// Copyright 2016 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/russross/blackfriday" + . "github.com/smartystreets/goconvey/convey" + + . "github.com/gogits/gogs/pkg/markup" + "github.com/gogits/gogs/pkg/setting" +) + +func Test_IsMarkdownFile(t *testing.T) { + setting.Markdown.FileExtensions = strings.Split(".md,.markdown,.mdown,.mkd", ",") + Convey("Detect Markdown file extension", t, func() { + testCases := []struct { + ext string + match bool + }{ + {".md", true}, + {".markdown", true}, + {".mdown", true}, + {".mkd", true}, + {".org", false}, + {".rst", false}, + {".asciidoc", false}, + } + + for _, tc := range testCases { + So(IsMarkdownFile(tc.ext), ShouldEqual, tc.match) + } + }) +} + +func Test_Markdown(t *testing.T) { + Convey("Rendering an issue URL", t, func() { + setting.AppUrl = "http://localhost:3000/" + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_SKIP_STYLE + htmlFlags |= blackfriday.HTML_OMIT_CONTENTS + renderer := &MarkdownRenderer{ + Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""), + } + buffer := new(bytes.Buffer) + Convey("To the internal issue tracker", func() { + Convey("It should render valid issue URLs", func() { + testCases := []string{ + "http://localhost:3000/user/repo/issues/3333", "<a href=\"http://localhost:3000/user/repo/issues/3333\">#3333</a>", + } + + for i := 0; i < len(testCases); i += 2 { + renderer.AutoLink(buffer, []byte(testCases[i]), blackfriday.LINK_TYPE_NORMAL) + + line, _ := buffer.ReadString(0) + So(line, ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render but not change non-issue URLs", func() { + testCases := []string{ + "http://1111/2222/ssss-issues/3333?param=blah&blahh=333", "<a href=\"http://1111/2222/ssss-issues/3333?param=blah&blahh=333\">http://1111/2222/ssss-issues/3333?param=blah&blahh=333</a>", + "http://test.com/issues/33333", "<a href=\"http://test.com/issues/33333\">http://test.com/issues/33333</a>", + "http://test.com/issues/3", "<a href=\"http://test.com/issues/3\">http://test.com/issues/3</a>", + "http://issues/333", "<a href=\"http://issues/333\">http://issues/333</a>", + "https://issues/333", "<a href=\"https://issues/333\">https://issues/333</a>", + "http://tissues/0", "<a href=\"http://tissues/0\">http://tissues/0</a>", + } + + for i := 0; i < len(testCases); i += 2 { + renderer.AutoLink(buffer, []byte(testCases[i]), blackfriday.LINK_TYPE_NORMAL) + + line, _ := buffer.ReadString(0) + So(line, ShouldEqual, testCases[i+1]) + } + }) + }) + }) + + Convey("Rendering a commit URL", t, func() { + setting.AppUrl = "http://localhost:3000/" + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_SKIP_STYLE + htmlFlags |= blackfriday.HTML_OMIT_CONTENTS + renderer := &MarkdownRenderer{ + Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""), + } + buffer := new(bytes.Buffer) + Convey("To the internal issue tracker", func() { + Convey("It should correctly convert URLs", func() { + testCases := []string{ + "http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae", " <code><a href=\"http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae\">d8a994ef24</a></code>", + "http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2", " <code><a href=\"http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2\">d8a994ef24</a></code>", + "https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2", "<a href=\"https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2\">https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2</a>", + "https://commit/d8a994ef243349f321568f9e36d5c3f444b99cae", "<a href=\"https://commit/d8a994ef243349f321568f9e36d5c3f444b99cae\">https://commit/d8a994ef243349f321568f9e36d5c3f444b99cae</a>", + } + + for i := 0; i < len(testCases); i += 2 { + renderer.AutoLink(buffer, []byte(testCases[i]), blackfriday.LINK_TYPE_NORMAL) + + line, _ := buffer.ReadString(0) + So(line, ShouldEqual, testCases[i+1]) + } + }) + }) + }) +} diff --git a/pkg/markup/markup.go b/pkg/markup/markup.go new file mode 100644 index 00000000..5c92b74a --- /dev/null +++ b/pkg/markup/markup.go @@ -0,0 +1,335 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strings" + + "github.com/Unknwon/com" + "golang.org/x/net/html" + + "github.com/gogits/gogs/pkg/base" + "github.com/gogits/gogs/pkg/setting" +) + +// IsReadmeFile reports whether name looks like a README file based on its extension. +func IsReadmeFile(name string) bool { + return strings.HasPrefix(strings.ToLower(name), "readme") +} + +const ( + ISSUE_NAME_STYLE_NUMERIC = "numeric" + ISSUE_NAME_STYLE_ALPHANUMERIC = "alphanumeric" +) + +var ( + // MentionPattern matches string that mentions someone, e.g. @Unknwon + MentionPattern = regexp.MustCompile(`(\s|^|\W)@[0-9a-zA-Z-_\.]+`) + + // CommitPattern matches link to certain commit with or without trailing hash, + // e.g. https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2 + CommitPattern = regexp.MustCompile(`(\s|^)https?.*commit/[0-9a-zA-Z]+(#+[0-9a-zA-Z-]*)?`) + + // IssueFullPattern matches link to an issue with or without trailing hash, + // e.g. https://try.gogs.io/gogs/gogs/issues/4#issue-685 + IssueFullPattern = regexp.MustCompile(`(\s|^)https?.*issues/[0-9]+(#+[0-9a-zA-Z-]*)?`) + // IssueNumericPattern matches string that references to a numeric issue, e.g. #1287 + IssueNumericPattern = regexp.MustCompile(`( |^|\()#[0-9]+\b`) + // IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 + IssueAlphanumericPattern = regexp.MustCompile(`( |^|\()[A-Z]{1,10}-[1-9][0-9]*\b`) + // CrossReferenceIssueNumericPattern matches string that references a numeric issue in a difference repository + // e.g. gogits/gogs#12345 + CrossReferenceIssueNumericPattern = regexp.MustCompile(`( |^)[0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+\b`) + + // Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae + // FIXME: this pattern matches pure numbers as well, right now we do a hack to check in RenderSha1CurrentPattern + // by converting string to a number. + Sha1CurrentPattern = regexp.MustCompile(`\b[0-9a-f]{40}\b`) +) + +// FindAllMentions matches mention patterns in given content +// and returns a list of found user names without @ prefix. +func FindAllMentions(content string) []string { + mentions := MentionPattern.FindAllString(content, -1) + for i := range mentions { + mentions[i] = mentions[i][strings.Index(mentions[i], "@")+1:] // Strip @ character + } + return mentions +} + +// cutoutVerbosePrefix cutouts URL prefix including sub-path to +// return a clean unified string of request URL path. +func cutoutVerbosePrefix(prefix string) string { + if len(prefix) == 0 || prefix[0] != '/' { + return prefix + } + count := 0 + for i := 0; i < len(prefix); i++ { + if prefix[i] == '/' { + count++ + } + if count >= 3+setting.AppSubUrlDepth { + return prefix[:i] + } + } + return prefix +} + +// RenderIssueIndexPattern renders issue indexes to corresponding links. +func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + urlPrefix = cutoutVerbosePrefix(urlPrefix) + + pattern := IssueNumericPattern + if metas["style"] == ISSUE_NAME_STYLE_ALPHANUMERIC { + pattern = IssueAlphanumericPattern + } + + ms := pattern.FindAll(rawBytes, -1) + for _, m := range ms { + if m[0] == ' ' || m[0] == '(' { + m = m[1:] // ignore leading space or opening parentheses + } + var link string + if metas == nil { + link = fmt.Sprintf(`<a href="%s/issues/%s">%s</a>`, urlPrefix, m[1:], m) + } else { + // Support for external issue tracker + if metas["style"] == ISSUE_NAME_STYLE_ALPHANUMERIC { + metas["index"] = string(m) + } else { + metas["index"] = string(m[1:]) + } + link = fmt.Sprintf(`<a href="%s">%s</a>`, com.Expand(metas["format"], metas), m) + } + rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) + } + return rawBytes +} + +// Note: this section is for purpose of increase performance and +// reduce memory allocation at runtime since they are constant literals. +var pound = []byte("#") + +// RenderCrossReferenceIssueIndexPattern renders issue indexes from other repositories to corresponding links. +func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + ms := CrossReferenceIssueNumericPattern.FindAll(rawBytes, -1) + for _, m := range ms { + if m[0] == ' ' || m[0] == '(' { + m = m[1:] // ignore leading space or opening parentheses + } + + delimIdx := bytes.Index(m, pound) + repo := string(m[:delimIdx]) + index := string(m[delimIdx+1:]) + + link := fmt.Sprintf(`<a href="%s%s/issues/%s">%s</a>`, setting.AppUrl, repo, index, m) + rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) + } + return rawBytes +} + +// RenderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository. +func RenderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte { + return []byte(Sha1CurrentPattern.ReplaceAllStringFunc(string(rawBytes[:]), func(m string) string { + if com.StrTo(m).MustInt() > 0 { + return m + } + return fmt.Sprintf(`<a href="%s/commit/%s"><code>%s</code></a>`, urlPrefix, m, base.ShortSha(string(m))) + })) +} + +// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links. +func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + ms := MentionPattern.FindAll(rawBytes, -1) + for _, m := range ms { + m = m[bytes.Index(m, []byte("@")):] + rawBytes = bytes.Replace(rawBytes, m, + []byte(fmt.Sprintf(`<a href="%s/%s">%s</a>`, setting.AppSubUrl, m[1:], m)), -1) + } + + rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas) + rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas) + rawBytes = RenderSha1CurrentPattern(rawBytes, urlPrefix) + return rawBytes +} + +var ( + leftAngleBracket = []byte("</") + rightAngleBracket = []byte(">") +) + +var noEndTags = []string{"input", "br", "hr", "img"} + +// wrapImgWithLink warps link to standalone <img> tags. +func wrapImgWithLink(urlPrefix string, buf *bytes.Buffer, token html.Token) { + // Extract "src" and "alt" attributes + var src, alt string + for i := range token.Attr { + switch token.Attr[i].Key { + case "src": + src = token.Attr[i].Val + case "alt": + alt = token.Attr[i].Val + } + } + + // Skip in case the "src" is empty + if len(src) == 0 { + buf.WriteString(token.String()) + return + } + + // Prepend repository base URL for internal links + needPrepend := !isLink([]byte(src)) + if needPrepend { + urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) + if src[0] != '/' { + urlPrefix += "/" + } + } + + buf.WriteString(`<a href="`) + if needPrepend { + buf.WriteString(urlPrefix) + buf.WriteString(src) + } else { + buf.WriteString(src) + } + buf.WriteString(`">`) + + if needPrepend { + src = strings.Replace(urlPrefix+string(src), " ", "%20", -1) + buf.WriteString(`<img src="`) + buf.WriteString(src) + buf.WriteString(`"`) + + if len(alt) > 0 { + buf.WriteString(` alt="`) + buf.WriteString(alt) + buf.WriteString(`"`) + } + + buf.WriteString(`>`) + + } else { + buf.WriteString(token.String()) + } + + buf.WriteString(`</a>`) +} + +// postProcessHTML treats different types of HTML differently, +// and only renders special links for plain text blocks. +func postProcessHTML(rawHTML []byte, urlPrefix string, metas map[string]string) []byte { + startTags := make([]string, 0, 5) + buf := bytes.NewBuffer(nil) + tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML)) + +OUTER_LOOP: + for html.ErrorToken != tokenizer.Next() { + token := tokenizer.Token() + switch token.Type { + case html.TextToken: + buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas)) + + case html.StartTagToken: + tagName := token.Data + + if tagName == "img" { + wrapImgWithLink(urlPrefix, buf, token) + continue OUTER_LOOP + } + + buf.WriteString(token.String()) + // If this is an excluded tag, we skip processing all output until a close tag is encountered. + if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) { + stackNum := 1 + for html.ErrorToken != tokenizer.Next() { + token = tokenizer.Token() + + // Copy the token to the output verbatim + buf.WriteString(token.String()) + + // Stack number doesn't increate for tags without end tags. + if token.Type == html.StartTagToken && !com.IsSliceContainsStr(noEndTags, token.Data) { + stackNum++ + } + + // If this is the close tag to the outer-most, we are done + if token.Type == html.EndTagToken { + stackNum-- + if stackNum <= 0 && strings.EqualFold(tagName, token.Data) { + break + } + } + } + continue OUTER_LOOP + } + + if !com.IsSliceContainsStr(noEndTags, tagName) { + startTags = append(startTags, tagName) + } + + case html.EndTagToken: + if len(startTags) == 0 { + buf.WriteString(token.String()) + break + } + + buf.Write(leftAngleBracket) + buf.WriteString(startTags[len(startTags)-1]) + buf.Write(rightAngleBracket) + startTags = startTags[:len(startTags)-1] + default: + buf.WriteString(token.String()) + } + } + + if io.EOF == tokenizer.Err() { + return buf.Bytes() + } + + // If we are not at the end of the input, then some other parsing error has occurred, + // so return the input verbatim. + return rawHTML +} + +type Type string + +const ( + UNRECOGNIZED Type = "unrecognized" + MARKDOWN Type = "markdown" + ORG_MODE Type = "orgmode" +) + +// Render takes a string or []byte and renders to HTML in given type of syntax with special links. +func Render(typ Type, input interface{}, urlPrefix string, metas map[string]string) []byte { + var rawBytes []byte + switch v := input.(type) { + case []byte: + rawBytes = v + case string: + rawBytes = []byte(v) + default: + panic(fmt.Sprintf("unrecognized input content type: %T", input)) + } + + urlPrefix = strings.Replace(urlPrefix, " ", "%20", -1) + var rawHTML []byte + switch typ { + case MARKDOWN: + rawHTML = RawMarkdown(rawBytes, urlPrefix) + case ORG_MODE: + default: + return rawBytes // Do nothing if syntax type is not recognized + } + + rawHTML = postProcessHTML(rawHTML, urlPrefix, metas) + return SanitizeBytes(rawHTML) +} diff --git a/pkg/markup/markup_test.go b/pkg/markup/markup_test.go new file mode 100644 index 00000000..d3d72091 --- /dev/null +++ b/pkg/markup/markup_test.go @@ -0,0 +1,284 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "strings" + "testing" + + . "github.com/smartystreets/goconvey/convey" + + . "github.com/gogits/gogs/pkg/markup" + "github.com/gogits/gogs/pkg/setting" +) + +func Test_IsReadmeFile(t *testing.T) { + Convey("Detect README file extension", t, func() { + testCases := []struct { + ext string + match bool + }{ + {"readme", true}, + {"README", true}, + {"readme.md", true}, + {"readme.markdown", true}, + {"readme.mdown", true}, + {"readme.mkd", true}, + {"readme.org", true}, + {"readme.rst", true}, + {"readme.asciidoc", true}, + {"readme_ZH", true}, + } + + for _, tc := range testCases { + So(IsReadmeFile(tc.ext), ShouldEqual, tc.match) + } + }) +} + +func Test_FindAllMentions(t *testing.T) { + Convey("Find all mention patterns", t, func() { + testCases := []struct { + content string + matches string + }{ + {"@Unknwon, what do you think?", "Unknwon"}, + {"@Unknwon what do you think?", "Unknwon"}, + {"Hi @Unknwon, sounds good to me", "Unknwon"}, + {"cc/ @Unknwon @User", "Unknwon,User"}, + } + + for _, tc := range testCases { + So(strings.Join(FindAllMentions(tc.content), ","), ShouldEqual, tc.matches) + } + }) +} + +func Test_RenderIssueIndexPattern(t *testing.T) { + Convey("Rendering an issue reference", t, func() { + var ( + urlPrefix = "/prefix" + metas map[string]string = nil + ) + setting.AppSubUrlDepth = 0 + + Convey("To the internal issue tracker", func() { + Convey("It should not render anything when there are no mentions", func() { + testCases := []string{ + "", + "this is a test", + "test 123 123 1234", + "#", + "# # #", + "# 123", + "#abcd", + "##1234", + "test#1234", + "#1234test", + " test #1234test", + } + + for i := 0; i < len(testCases); i++ { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i]) + } + }) + Convey("It should render freestanding mentions", func() { + testCases := []string{ + "#1234 test", "<a href=\"/prefix/issues/1234\">#1234</a> test", + "test #1234 issue", "test <a href=\"/prefix/issues/1234\">#1234</a> issue", + "test issue #1234", "test issue <a href=\"/prefix/issues/1234\">#1234</a>", + "#5 test", "<a href=\"/prefix/issues/5\">#5</a> test", + "test #5 issue", "test <a href=\"/prefix/issues/5\">#5</a> issue", + "test issue #5", "test issue <a href=\"/prefix/issues/5\">#5</a>", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should not render issue mention without leading space", func() { + input := []byte("test#54321 issue") + expected := "test#54321 issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should not render issue mention without trailing space", func() { + input := []byte("test #54321issue") + expected := "test #54321issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should render issue mention in parentheses", func() { + testCases := []string{ + "(#54321 issue)", "(<a href=\"/prefix/issues/54321\">#54321</a> issue)", + "test (#54321) issue", "test (<a href=\"/prefix/issues/54321\">#54321</a>) issue", + "test (#54321 extra) issue", "test (<a href=\"/prefix/issues/54321\">#54321</a> extra) issue", + "test (#54321 issue)", "test (<a href=\"/prefix/issues/54321\">#54321</a> issue)", + "test (#54321)", "test (<a href=\"/prefix/issues/54321\">#54321</a>)", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render multiple issue mentions in the same line", func() { + testCases := []string{ + "#54321 #1243", "<a href=\"/prefix/issues/54321\">#54321</a> <a href=\"/prefix/issues/1243\">#1243</a>", + "test #54321 #1243", "test <a href=\"/prefix/issues/54321\">#54321</a> <a href=\"/prefix/issues/1243\">#1243</a>", + "(#54321 #1243)", "(<a href=\"/prefix/issues/54321\">#54321</a> <a href=\"/prefix/issues/1243\">#1243</a>)", + "(#54321)(#1243)", "(<a href=\"/prefix/issues/54321\">#54321</a>)(<a href=\"/prefix/issues/1243\">#1243</a>)", + "text #54321 test #1243 issue", "text <a href=\"/prefix/issues/54321\">#54321</a> test <a href=\"/prefix/issues/1243\">#1243</a> issue", + "#1 (#4321) test", "<a href=\"/prefix/issues/1\">#1</a> (<a href=\"/prefix/issues/4321\">#4321</a>) test", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + }) + Convey("To an external issue tracker with numeric style", func() { + metas = make(map[string]string) + metas["format"] = "https://someurl.com/{user}/{repo}/{index}" + metas["user"] = "someuser" + metas["repo"] = "somerepo" + metas["style"] = ISSUE_NAME_STYLE_NUMERIC + + Convey("should not render anything when there are no mentions", func() { + testCases := []string{ + "this is a test", + "test 123 123 1234", + "#", + "# # #", + "# 123", + "#abcd", + } + + for i := 0; i < len(testCases); i++ { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i]) + } + }) + Convey("It should render freestanding issue mentions", func() { + testCases := []string{ + "#1234 test", "<a href=\"https://someurl.com/someuser/somerepo/1234\">#1234</a> test", + "test #1234 issue", "test <a href=\"https://someurl.com/someuser/somerepo/1234\">#1234</a> issue", + "test issue #1234", "test issue <a href=\"https://someurl.com/someuser/somerepo/1234\">#1234</a>", + "#5 test", "<a href=\"https://someurl.com/someuser/somerepo/5\">#5</a> test", + "test #5 issue", "test <a href=\"https://someurl.com/someuser/somerepo/5\">#5</a> issue", + "test issue #5", "test issue <a href=\"https://someurl.com/someuser/somerepo/5\">#5</a>", + } + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should not render issue mention without leading space", func() { + input := []byte("test#54321 issue") + expected := "test#54321 issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should not render issue mention without trailing space", func() { + input := []byte("test #54321issue") + expected := "test #54321issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should render issue mention in parentheses", func() { + testCases := []string{ + "(#54321 issue)", "(<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> issue)", + "test (#54321) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a>) issue", + "test (#54321 extra) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> extra) issue", + "test (#54321 issue)", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> issue)", + "test (#54321)", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a>)", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render multiple issue mentions in the same line", func() { + testCases := []string{ + "#54321 #1243", "<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>", + "test #54321 #1243", "test <a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>", + "(#54321 #1243)", "(<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>)", + "(#54321)(#1243)", "(<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a>)(<a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>)", + "text #54321 test #1243 issue", "text <a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> test <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a> issue", + "#1 (#4321) test", "<a href=\"https://someurl.com/someuser/somerepo/1\">#1</a> (<a href=\"https://someurl.com/someuser/somerepo/4321\">#4321</a>) test", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + }) + Convey("To an external issue tracker with alphanumeric style", func() { + metas = make(map[string]string) + metas["format"] = "https://someurl.com/{user}/{repo}/?b={index}" + metas["user"] = "someuser" + metas["repo"] = "somerepo" + metas["style"] = ISSUE_NAME_STYLE_ALPHANUMERIC + Convey("It should not render anything when there are no mentions", func() { + testCases := []string{ + "", + "this is a test", + "test 123 123 1234", + "#", + "##1234", + "# 123", + "#abcd", + "test #123", + "abc-1234", // issue prefix must be capital + "ABc-1234", // issue prefix must be _all_ capital + "ABCDEFGHIJK-1234", // the limit is 10 characters in the prefix + "ABC1234", // dash is required + "test ABC- test", // number is required + "test -1234 test", // prefix is required + "testABC-123 test", // leading space is required + "test ABC-123test", // trailing space is required + "ABC-0123", // no leading zero + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i]) + } + }) + Convey("It should render freestanding issue mention", func() { + testCases := []string{ + "OTT-1234 test", "<a href=\"https://someurl.com/someuser/somerepo/?b=OTT-1234\">OTT-1234</a> test", + "test T-12 issue", "test <a href=\"https://someurl.com/someuser/somerepo/?b=T-12\">T-12</a> issue", + "test issue ABCDEFGHIJ-1234567890", "test issue <a href=\"https://someurl.com/someuser/somerepo/?b=ABCDEFGHIJ-1234567890\">ABCDEFGHIJ-1234567890</a>", + "A-1 test", "<a href=\"https://someurl.com/someuser/somerepo/?b=A-1\">A-1</a> test", + "test ZED-1 issue", "test <a href=\"https://someurl.com/someuser/somerepo/?b=ZED-1\">ZED-1</a> issue", + "test issue DEED-7154", "test issue <a href=\"https://someurl.com/someuser/somerepo/?b=DEED-7154\">DEED-7154</a>", + } + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render issue mention in parentheses", func() { + testCases := []string{ + "(ABG-124 issue)", "(<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> issue)", + "test (ABG-124) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>) issue", + "test (ABG-124 extra) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> extra) issue", + "test (ABG-124 issue)", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> issue)", + "test (ABG-124)", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>)", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render multiple issue mentions in the same line", func() { + testCases := []string{ + "ABG-124 OTT-4321", "<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>", + "test ABG-124 OTT-4321", "test <a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>", + "(ABG-124 OTT-4321)", "(<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>)", + "(ABG-124)(OTT-4321)", "(<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>)(<a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>)", + "text ABG-124 test OTT-4321 issue", "text <a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> test <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a> issue", + "A-1 (RRE-345) test", "<a href=\"https://someurl.com/someuser/somerepo/?b=A-1\">A-1</a> (<a href=\"https://someurl.com/someuser/somerepo/?b=RRE-345\">RRE-345</a>) test", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + }) + }) +} diff --git a/pkg/markup/sanitizer.go b/pkg/markup/sanitizer.go new file mode 100644 index 00000000..63ead6df --- /dev/null +++ b/pkg/markup/sanitizer.go @@ -0,0 +1,51 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "regexp" + "sync" + + "github.com/microcosm-cc/bluemonday" + + "github.com/gogits/gogs/pkg/setting" +) + +// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow +// any modification to the underlying policies once it's been created. +type Sanitizer struct { + policy *bluemonday.Policy + init sync.Once +} + +var sanitizer = &Sanitizer{} + +// NewSanitizer initializes sanitizer with allowed attributes based on settings. +// Multiple calls to this function will only create one instance of Sanitizer during +// entire application lifecycle. +func NewSanitizer() { + sanitizer.init.Do(func() { + sanitizer.policy = bluemonday.UGCPolicy() + // We only want to allow HighlightJS specific classes for code blocks + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") + + // Checkboxes + sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") + sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") + + // Custom URL-Schemes + sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) + }) +} + +// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. +func Sanitize(s string) string { + return sanitizer.policy.Sanitize(s) +} + +// SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist. +func SanitizeBytes(b []byte) []byte { + return sanitizer.policy.SanitizeBytes(b) +} diff --git a/pkg/markup/sanitizer_test.go b/pkg/markup/sanitizer_test.go new file mode 100644 index 00000000..ae341acc --- /dev/null +++ b/pkg/markup/sanitizer_test.go @@ -0,0 +1,38 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + + . "github.com/gogits/gogs/pkg/markup" +) + +func Test_Sanitizer(t *testing.T) { + NewSanitizer() + Convey("Sanitize HTML string and bytes", t, func() { + testCases := []string{ + // Regular + `<a onblur="alert(secret)" href="http://www.google.com">Google</a>`, `<a href="http://www.google.com" rel="nofollow">Google</a>`, + + // Code highlighting class + `<code class="random string"></code>`, `<code></code>`, + `<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`, + `<code class="language-go"></code>`, `<code class="language-go"></code>`, + + // Input checkbox + `<input type="hidden">`, ``, + `<input type="checkbox">`, `<input type="checkbox">`, + `<input checked disabled autofocus>`, `<input checked="" disabled="">`, + } + + for i := 0; i < len(testCases); i += 2 { + So(Sanitize(testCases[i]), ShouldEqual, testCases[i+1]) + So(string(SanitizeBytes([]byte(testCases[i]))), ShouldEqual, testCases[i+1]) + } + }) +} |