diff options
author | Unknwon <u@gogs.io> | 2019-10-24 01:51:46 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-10-24 01:51:46 -0700 |
commit | 01c8df01ec0608f1f25b2f1444adabb98fa5ee8a (patch) | |
tree | f8a7e5dd8d2a8c51e1ce2cabb9d33571a93314dd /internal/markup | |
parent | 613139e7bef81d3573e7988a47eb6765f3de347a (diff) |
internal: move packages under this directory (#5836)
* Rename pkg -> internal
* Rename routes -> route
* Move route -> internal/route
* Rename models -> db
* Move db -> internal/db
* Fix route2 -> route
* Move cmd -> internal/cmd
* Bump version
Diffstat (limited to 'internal/markup')
-rw-r--r-- | internal/markup/markdown.go | 167 | ||||
-rw-r--r-- | internal/markup/markdown_test.go | 111 | ||||
-rw-r--r-- | internal/markup/markup.go | 362 | ||||
-rw-r--r-- | internal/markup/markup_test.go | 310 | ||||
-rw-r--r-- | internal/markup/orgmode.go | 40 | ||||
-rw-r--r-- | internal/markup/sanitizer.go | 55 | ||||
-rw-r--r-- | internal/markup/sanitizer_test.go | 38 |
7 files changed, 1083 insertions, 0 deletions
diff --git a/internal/markup/markdown.go b/internal/markup/markdown.go new file mode 100644 index 00000000..db581a71 --- /dev/null +++ b/internal/markup/markdown.go @@ -0,0 +1,167 @@ +// Copyright 2014 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "bytes" + "fmt" + "path" + "path/filepath" + "regexp" + "strings" + + "github.com/russross/blackfriday" + + "gogs.io/gogs/internal/setting" + "gogs.io/gogs/internal/tool" +) + +// IsMarkdownFile reports whether name looks like a Markdown file based on its extension. +func IsMarkdownFile(name string) bool { + extension := strings.ToLower(filepath.Ext(name)) + for _, ext := range setting.Markdown.FileExtensions { + if strings.ToLower(ext) == extension { + return true + } + } + return false +} + +// MarkdownRenderer is a extended version of underlying Markdown render object. +type MarkdownRenderer struct { + blackfriday.Renderer + urlPrefix string +} + +var validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://|^mailto:`) + +// isLink reports whether link fits valid format. +func isLink(link []byte) bool { + return validLinksPattern.Match(link) +} + +// Link defines how formal links should be processed to produce corresponding HTML elements. +func (r *MarkdownRenderer) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) { + if len(link) > 0 && !isLink(link) { + if link[0] != '#' { + link = []byte(path.Join(r.urlPrefix, string(link))) + } + } + + r.Renderer.Link(out, link, title, content) +} + +// AutoLink defines how auto-detected links should be processed to produce corresponding HTML elements. +// Reference for kind: https://github.com/russross/blackfriday/blob/master/markdown.go#L69-L76 +func (r *MarkdownRenderer) AutoLink(out *bytes.Buffer, link []byte, kind int) { + if kind != blackfriday.LINK_TYPE_NORMAL { + r.Renderer.AutoLink(out, link, kind) + return + } + + // Since this method could only possibly serve one link at a time, + // we do not need to find all. + if bytes.HasPrefix(link, []byte(setting.AppURL)) { + m := CommitPattern.Find(link) + if m != nil { + m = bytes.TrimSpace(m) + i := strings.Index(string(m), "commit/") + j := strings.Index(string(m), "#") + if j == -1 { + j = len(m) + } + out.WriteString(fmt.Sprintf(` <code><a href="%s">%s</a></code>`, m, tool.ShortSHA1(string(m[i+7:j])))) + return + } + + m = IssueFullPattern.Find(link) + if m != nil { + m = bytes.TrimSpace(m) + i := strings.Index(string(m), "issues/") + j := strings.Index(string(m), "#") + if j == -1 { + j = len(m) + } + + index := string(m[i+7 : j]) + fullRepoURL := setting.AppURL + strings.TrimPrefix(r.urlPrefix, "/") + var link string + if strings.HasPrefix(string(m), fullRepoURL) { + // Use a short issue reference if the URL refers to this repository + link = fmt.Sprintf(`<a href="%s">#%s</a>`, m, index) + } else { + // Use a cross-repository issue reference if the URL refers to a different repository + repo := string(m[len(setting.AppURL) : i-1]) + link = fmt.Sprintf(`<a href="%s">%s#%s</a>`, m, repo, index) + } + out.WriteString(link) + return + } + } + + r.Renderer.AutoLink(out, link, kind) +} + +// ListItem defines how list items should be processed to produce corresponding HTML elements. +func (options *MarkdownRenderer) ListItem(out *bytes.Buffer, text []byte, flags int) { + // Detect procedures to draw checkboxes. + switch { + case bytes.HasPrefix(text, []byte("[ ] ")): + text = append([]byte(`<input type="checkbox" disabled="" />`), text[3:]...) + case bytes.HasPrefix(text, []byte("[x] ")): + text = append([]byte(`<input type="checkbox" disabled="" checked="" />`), text[3:]...) + } + options.Renderer.ListItem(out, text, flags) +} + +// RawMarkdown renders content in Markdown syntax to HTML without handling special links. +func RawMarkdown(body []byte, urlPrefix string) []byte { + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_SKIP_STYLE + htmlFlags |= blackfriday.HTML_OMIT_CONTENTS + + if setting.Smartypants.Enabled { + htmlFlags |= blackfriday.HTML_USE_SMARTYPANTS + if setting.Smartypants.Fractions { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_FRACTIONS + } + if setting.Smartypants.Dashes { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_DASHES + } + if setting.Smartypants.LatexDashes { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_LATEX_DASHES + } + if setting.Smartypants.AngledQuotes { + htmlFlags |= blackfriday.HTML_SMARTYPANTS_ANGLED_QUOTES + } + } + + renderer := &MarkdownRenderer{ + Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""), + urlPrefix: urlPrefix, + } + + // set up the parser + extensions := 0 + extensions |= blackfriday.EXTENSION_NO_INTRA_EMPHASIS + extensions |= blackfriday.EXTENSION_TABLES + extensions |= blackfriday.EXTENSION_FENCED_CODE + extensions |= blackfriday.EXTENSION_AUTOLINK + extensions |= blackfriday.EXTENSION_STRIKETHROUGH + extensions |= blackfriday.EXTENSION_SPACE_HEADERS + extensions |= blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK + + if setting.Markdown.EnableHardLineBreak { + extensions |= blackfriday.EXTENSION_HARD_LINE_BREAK + } + + body = blackfriday.Markdown(body, renderer, extensions) + return body +} + +// Markdown takes a string or []byte and renders to HTML in Markdown syntax with special links. +func Markdown(input interface{}, urlPrefix string, metas map[string]string) []byte { + return Render(MARKDOWN, input, urlPrefix, metas) +} diff --git a/internal/markup/markdown_test.go b/internal/markup/markdown_test.go new file mode 100644 index 00000000..e748adc7 --- /dev/null +++ b/internal/markup/markdown_test.go @@ -0,0 +1,111 @@ +// Copyright 2016 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/russross/blackfriday" + . "github.com/smartystreets/goconvey/convey" + + . "gogs.io/gogs/internal/markup" + "gogs.io/gogs/internal/setting" +) + +func Test_IsMarkdownFile(t *testing.T) { + setting.Markdown.FileExtensions = strings.Split(".md,.markdown,.mdown,.mkd", ",") + Convey("Detect Markdown file extension", t, func() { + testCases := []struct { + ext string + match bool + }{ + {".md", true}, + {".markdown", true}, + {".mdown", true}, + {".mkd", true}, + {".org", false}, + {".rst", false}, + {".asciidoc", false}, + } + + for _, tc := range testCases { + So(IsMarkdownFile(tc.ext), ShouldEqual, tc.match) + } + }) +} + +func Test_Markdown(t *testing.T) { + Convey("Rendering an issue URL", t, func() { + setting.AppURL = "http://localhost:3000/" + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_SKIP_STYLE + htmlFlags |= blackfriday.HTML_OMIT_CONTENTS + renderer := &MarkdownRenderer{ + Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""), + } + buffer := new(bytes.Buffer) + Convey("To the internal issue tracker", func() { + Convey("It should render valid issue URLs", func() { + testCases := []string{ + "http://localhost:3000/user/repo/issues/3333", "<a href=\"http://localhost:3000/user/repo/issues/3333\">#3333</a>", + } + + for i := 0; i < len(testCases); i += 2 { + renderer.AutoLink(buffer, []byte(testCases[i]), blackfriday.LINK_TYPE_NORMAL) + + line, _ := buffer.ReadString(0) + So(line, ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render but not change non-issue URLs", func() { + testCases := []string{ + "http://1111/2222/ssss-issues/3333?param=blah&blahh=333", "<a href=\"http://1111/2222/ssss-issues/3333?param=blah&blahh=333\">http://1111/2222/ssss-issues/3333?param=blah&blahh=333</a>", + "http://test.com/issues/33333", "<a href=\"http://test.com/issues/33333\">http://test.com/issues/33333</a>", + "http://test.com/issues/3", "<a href=\"http://test.com/issues/3\">http://test.com/issues/3</a>", + "http://issues/333", "<a href=\"http://issues/333\">http://issues/333</a>", + "https://issues/333", "<a href=\"https://issues/333\">https://issues/333</a>", + "http://tissues/0", "<a href=\"http://tissues/0\">http://tissues/0</a>", + } + + for i := 0; i < len(testCases); i += 2 { + renderer.AutoLink(buffer, []byte(testCases[i]), blackfriday.LINK_TYPE_NORMAL) + + line, _ := buffer.ReadString(0) + So(line, ShouldEqual, testCases[i+1]) + } + }) + }) + }) + + Convey("Rendering a commit URL", t, func() { + setting.AppURL = "http://localhost:3000/" + htmlFlags := 0 + htmlFlags |= blackfriday.HTML_SKIP_STYLE + htmlFlags |= blackfriday.HTML_OMIT_CONTENTS + renderer := &MarkdownRenderer{ + Renderer: blackfriday.HtmlRenderer(htmlFlags, "", ""), + } + buffer := new(bytes.Buffer) + Convey("To the internal issue tracker", func() { + Convey("It should correctly convert URLs", func() { + testCases := []string{ + "http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae", " <code><a href=\"http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae\">d8a994ef24</a></code>", + "http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2", " <code><a href=\"http://localhost:3000/user/project/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2\">d8a994ef24</a></code>", + "https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2", "<a href=\"https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2\">https://external-link.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2</a>", + "https://commit/d8a994ef243349f321568f9e36d5c3f444b99cae", "<a href=\"https://commit/d8a994ef243349f321568f9e36d5c3f444b99cae\">https://commit/d8a994ef243349f321568f9e36d5c3f444b99cae</a>", + } + + for i := 0; i < len(testCases); i += 2 { + renderer.AutoLink(buffer, []byte(testCases[i]), blackfriday.LINK_TYPE_NORMAL) + + line, _ := buffer.ReadString(0) + So(line, ShouldEqual, testCases[i+1]) + } + }) + }) + }) +} diff --git a/internal/markup/markup.go b/internal/markup/markup.go new file mode 100644 index 00000000..e09a0ba6 --- /dev/null +++ b/internal/markup/markup.go @@ -0,0 +1,362 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "bytes" + "fmt" + "io" + "regexp" + "strings" + + "github.com/unknwon/com" + "golang.org/x/net/html" + + "gogs.io/gogs/internal/setting" + "gogs.io/gogs/internal/tool" +) + +// IsReadmeFile reports whether name looks like a README file based on its extension. +func IsReadmeFile(name string) bool { + return strings.HasPrefix(strings.ToLower(name), "readme") +} + +// IsIPythonNotebook reports whether name looks like a IPython notebook based on its extension. +func IsIPythonNotebook(name string) bool { + return strings.HasSuffix(name, ".ipynb") +} + +const ( + ISSUE_NAME_STYLE_NUMERIC = "numeric" + ISSUE_NAME_STYLE_ALPHANUMERIC = "alphanumeric" +) + +var ( + // MentionPattern matches string that mentions someone, e.g. @Unknwon + MentionPattern = regexp.MustCompile(`(\s|^|\W)@[0-9a-zA-Z-_\.]+`) + + // CommitPattern matches link to certain commit with or without trailing hash, + // e.g. https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2 + CommitPattern = regexp.MustCompile(`(\s|^)https?.*commit/[0-9a-zA-Z]+(#+[0-9a-zA-Z-]*)?`) + + // IssueFullPattern matches link to an issue with or without trailing hash, + // e.g. https://try.gogs.io/gogs/gogs/issues/4#issue-685 + IssueFullPattern = regexp.MustCompile(`(\s|^)https?.*issues/[0-9]+(#+[0-9a-zA-Z-]*)?`) + // IssueNumericPattern matches string that references to a numeric issue, e.g. #1287 + IssueNumericPattern = regexp.MustCompile(`( |^|\(|\[)#[0-9]+\b`) + // IssueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 + IssueAlphanumericPattern = regexp.MustCompile(`( |^|\(|\[)[A-Z]{1,10}-[1-9][0-9]*\b`) + // CrossReferenceIssueNumericPattern matches string that references a numeric issue in a difference repository + // e.g. gogs/gogs#12345 + CrossReferenceIssueNumericPattern = regexp.MustCompile(`( |^)[0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+\b`) + + // Sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae + // FIXME: this pattern matches pure numbers as well, right now we do a hack to check in RenderSha1CurrentPattern by converting string to a number. + Sha1CurrentPattern = regexp.MustCompile(`\b[0-9a-f]{7,40}\b`) +) + +// FindAllMentions matches mention patterns in given content +// and returns a list of found user names without @ prefix. +func FindAllMentions(content string) []string { + mentions := MentionPattern.FindAllString(content, -1) + for i := range mentions { + mentions[i] = mentions[i][strings.Index(mentions[i], "@")+1:] // Strip @ character + } + return mentions +} + +// cutoutVerbosePrefix cutouts URL prefix including sub-path to +// return a clean unified string of request URL path. +func cutoutVerbosePrefix(prefix string) string { + if len(prefix) == 0 || prefix[0] != '/' { + return prefix + } + count := 0 + for i := 0; i < len(prefix); i++ { + if prefix[i] == '/' { + count++ + } + if count >= 3+setting.AppSubURLDepth { + return prefix[:i] + } + } + return prefix +} + +// RenderIssueIndexPattern renders issue indexes to corresponding links. +func RenderIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + urlPrefix = cutoutVerbosePrefix(urlPrefix) + + pattern := IssueNumericPattern + if metas["style"] == ISSUE_NAME_STYLE_ALPHANUMERIC { + pattern = IssueAlphanumericPattern + } + + ms := pattern.FindAll(rawBytes, -1) + for _, m := range ms { + if m[0] == ' ' || m[0] == '(' || m[0] == '[' { + // ignore leading space, opening parentheses, or opening square brackets + m = m[1:] + } + var link string + if metas == nil { + link = fmt.Sprintf(`<a href="%s/issues/%s">%s</a>`, urlPrefix, m[1:], m) + } else { + // Support for external issue tracker + if metas["style"] == ISSUE_NAME_STYLE_ALPHANUMERIC { + metas["index"] = string(m) + } else { + metas["index"] = string(m[1:]) + } + link = fmt.Sprintf(`<a href="%s">%s</a>`, com.Expand(metas["format"], metas), m) + } + rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) + } + return rawBytes +} + +// Note: this section is for purpose of increase performance and +// reduce memory allocation at runtime since they are constant literals. +var pound = []byte("#") + +// RenderCrossReferenceIssueIndexPattern renders issue indexes from other repositories to corresponding links. +func RenderCrossReferenceIssueIndexPattern(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + ms := CrossReferenceIssueNumericPattern.FindAll(rawBytes, -1) + for _, m := range ms { + if m[0] == ' ' || m[0] == '(' { + m = m[1:] // ignore leading space or opening parentheses + } + + delimIdx := bytes.Index(m, pound) + repo := string(m[:delimIdx]) + index := string(m[delimIdx+1:]) + + link := fmt.Sprintf(`<a href="%s%s/issues/%s">%s</a>`, setting.AppURL, repo, index, m) + rawBytes = bytes.Replace(rawBytes, m, []byte(link), 1) + } + return rawBytes +} + +// RenderSha1CurrentPattern renders SHA1 strings to corresponding links that assumes in the same repository. +func RenderSha1CurrentPattern(rawBytes []byte, urlPrefix string) []byte { + return []byte(Sha1CurrentPattern.ReplaceAllStringFunc(string(rawBytes[:]), func(m string) string { + if com.StrTo(m).MustInt() > 0 { + return m + } + return fmt.Sprintf(`<a href="%s/commit/%s"><code>%s</code></a>`, urlPrefix, m, tool.ShortSHA1(string(m))) + })) +} + +// RenderSpecialLink renders mentions, indexes and SHA1 strings to corresponding links. +func RenderSpecialLink(rawBytes []byte, urlPrefix string, metas map[string]string) []byte { + ms := MentionPattern.FindAll(rawBytes, -1) + for _, m := range ms { + m = m[bytes.Index(m, []byte("@")):] + rawBytes = bytes.Replace(rawBytes, m, + []byte(fmt.Sprintf(`<a href="%s/%s">%s</a>`, setting.AppSubURL, m[1:], m)), -1) + } + + rawBytes = RenderIssueIndexPattern(rawBytes, urlPrefix, metas) + rawBytes = RenderCrossReferenceIssueIndexPattern(rawBytes, urlPrefix, metas) + rawBytes = RenderSha1CurrentPattern(rawBytes, urlPrefix) + return rawBytes +} + +var ( + leftAngleBracket = []byte("</") + rightAngleBracket = []byte(">") +) + +var noEndTags = []string{"input", "br", "hr", "img"} + +// wrapImgWithLink warps link to standalone <img> tags. +func wrapImgWithLink(urlPrefix string, buf *bytes.Buffer, token html.Token) { + // Extract "src" and "alt" attributes + var src, alt string + for i := range token.Attr { + switch token.Attr[i].Key { + case "src": + src = token.Attr[i].Val + case "alt": + alt = token.Attr[i].Val + } + } + + // Skip in case the "src" is empty + if len(src) == 0 { + buf.WriteString(token.String()) + return + } + + // Skip in case the "src" is data url + if strings.HasPrefix(src, "data:") { + buf.WriteString(token.String()) + return + } + + // Prepend repository base URL for internal links + needPrepend := !isLink([]byte(src)) + if needPrepend { + urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1) + if src[0] != '/' { + urlPrefix += "/" + } + } + + buf.WriteString(`<a href="`) + if needPrepend { + buf.WriteString(urlPrefix) + buf.WriteString(src) + } else { + buf.WriteString(src) + } + buf.WriteString(`">`) + + if needPrepend { + src = strings.Replace(urlPrefix+string(src), " ", "%20", -1) + buf.WriteString(`<img src="`) + buf.WriteString(src) + buf.WriteString(`"`) + + if len(alt) > 0 { + buf.WriteString(` alt="`) + buf.WriteString(alt) + buf.WriteString(`"`) + } + + buf.WriteString(`>`) + + } else { + buf.WriteString(token.String()) + } + + buf.WriteString(`</a>`) +} + +// postProcessHTML treats different types of HTML differently, +// and only renders special links for plain text blocks. +func postProcessHTML(rawHTML []byte, urlPrefix string, metas map[string]string) []byte { + startTags := make([]string, 0, 5) + buf := bytes.NewBuffer(nil) + tokenizer := html.NewTokenizer(bytes.NewReader(rawHTML)) + +OUTER_LOOP: + for html.ErrorToken != tokenizer.Next() { + token := tokenizer.Token() + switch token.Type { + case html.TextToken: + buf.Write(RenderSpecialLink([]byte(token.String()), urlPrefix, metas)) + + case html.StartTagToken: + tagName := token.Data + + if tagName == "img" { + wrapImgWithLink(urlPrefix, buf, token) + continue OUTER_LOOP + } + + buf.WriteString(token.String()) + // If this is an excluded tag, we skip processing all output until a close tag is encountered. + if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) { + stackNum := 1 + for html.ErrorToken != tokenizer.Next() { + token = tokenizer.Token() + + // Copy the token to the output verbatim + buf.WriteString(token.String()) + + // Stack number doesn't increate for tags without end tags. + if token.Type == html.StartTagToken && !com.IsSliceContainsStr(noEndTags, token.Data) { + stackNum++ + } + + // If this is the close tag to the outer-most, we are done + if token.Type == html.EndTagToken { + stackNum-- + if stackNum <= 0 && strings.EqualFold(tagName, token.Data) { + break + } + } + } + continue OUTER_LOOP + } + + if !com.IsSliceContainsStr(noEndTags, tagName) { + startTags = append(startTags, tagName) + } + + case html.EndTagToken: + if len(startTags) == 0 { + buf.WriteString(token.String()) + break + } + + buf.Write(leftAngleBracket) + buf.WriteString(startTags[len(startTags)-1]) + buf.Write(rightAngleBracket) + startTags = startTags[:len(startTags)-1] + default: + buf.WriteString(token.String()) + } + } + + if io.EOF == tokenizer.Err() { + return buf.Bytes() + } + + // If we are not at the end of the input, then some other parsing error has occurred, + // so return the input verbatim. + return rawHTML +} + +type Type string + +const ( + UNRECOGNIZED Type = "unrecognized" + MARKDOWN Type = "markdown" + ORG_MODE Type = "orgmode" + IPYTHON_NOTEBOOK Type = "ipynb" +) + +// Detect returns best guess of a markup type based on file name. +func Detect(filename string) Type { + switch { + case IsMarkdownFile(filename): + return MARKDOWN + case IsOrgModeFile(filename): + return ORG_MODE + case IsIPythonNotebook(filename): + return IPYTHON_NOTEBOOK + default: + return UNRECOGNIZED + } +} + +// Render takes a string or []byte and renders to HTML in given type of syntax with special links. +func Render(typ Type, input interface{}, urlPrefix string, metas map[string]string) []byte { + var rawBytes []byte + switch v := input.(type) { + case []byte: + rawBytes = v + case string: + rawBytes = []byte(v) + default: + panic(fmt.Sprintf("unrecognized input content type: %T", input)) + } + + urlPrefix = strings.TrimRight(strings.Replace(urlPrefix, " ", "%20", -1), "/") + var rawHTML []byte + switch typ { + case MARKDOWN: + rawHTML = RawMarkdown(rawBytes, urlPrefix) + case ORG_MODE: + rawHTML = RawOrgMode(rawBytes, urlPrefix) + default: + return rawBytes // Do nothing if syntax type is not recognized + } + + rawHTML = postProcessHTML(rawHTML, urlPrefix, metas) + return SanitizeBytes(rawHTML) +} diff --git a/internal/markup/markup_test.go b/internal/markup/markup_test.go new file mode 100644 index 00000000..0e3beb76 --- /dev/null +++ b/internal/markup/markup_test.go @@ -0,0 +1,310 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "strings" + "testing" + + . "github.com/smartystreets/goconvey/convey" + + . "gogs.io/gogs/internal/markup" + "gogs.io/gogs/internal/setting" +) + +func Test_IsReadmeFile(t *testing.T) { + Convey("Detect README file extension", t, func() { + testCases := []struct { + ext string + match bool + }{ + {"readme", true}, + {"README", true}, + {"readme.md", true}, + {"readme.markdown", true}, + {"readme.mdown", true}, + {"readme.mkd", true}, + {"readme.org", true}, + {"readme.rst", true}, + {"readme.asciidoc", true}, + {"readme_ZH", true}, + } + + for _, tc := range testCases { + So(IsReadmeFile(tc.ext), ShouldEqual, tc.match) + } + }) +} + +func Test_FindAllMentions(t *testing.T) { + Convey("Find all mention patterns", t, func() { + testCases := []struct { + content string + matches string + }{ + {"@Unknwon, what do you think?", "Unknwon"}, + {"@Unknwon what do you think?", "Unknwon"}, + {"Hi @Unknwon, sounds good to me", "Unknwon"}, + {"cc/ @Unknwon @User", "Unknwon,User"}, + } + + for _, tc := range testCases { + So(strings.Join(FindAllMentions(tc.content), ","), ShouldEqual, tc.matches) + } + }) +} + +func Test_RenderIssueIndexPattern(t *testing.T) { + Convey("Rendering an issue reference", t, func() { + var ( + urlPrefix = "/prefix" + metas map[string]string = nil + ) + setting.AppSubURLDepth = 0 + + Convey("To the internal issue tracker", func() { + Convey("It should not render anything when there are no mentions", func() { + testCases := []string{ + "", + "this is a test", + "test 123 123 1234", + "#", + "# # #", + "# 123", + "#abcd", + "##1234", + "test#1234", + "#1234test", + " test #1234test", + } + + for i := 0; i < len(testCases); i++ { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i]) + } + }) + Convey("It should render freestanding mentions", func() { + testCases := []string{ + "#1234 test", "<a href=\"/prefix/issues/1234\">#1234</a> test", + "test #1234 issue", "test <a href=\"/prefix/issues/1234\">#1234</a> issue", + "test issue #1234", "test issue <a href=\"/prefix/issues/1234\">#1234</a>", + "#5 test", "<a href=\"/prefix/issues/5\">#5</a> test", + "test #5 issue", "test <a href=\"/prefix/issues/5\">#5</a> issue", + "test issue #5", "test issue <a href=\"/prefix/issues/5\">#5</a>", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should not render issue mention without leading space", func() { + input := []byte("test#54321 issue") + expected := "test#54321 issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should not render issue mention without trailing space", func() { + input := []byte("test #54321issue") + expected := "test #54321issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should render issue mention in parentheses", func() { + testCases := []string{ + "(#54321 issue)", "(<a href=\"/prefix/issues/54321\">#54321</a> issue)", + "test (#54321) issue", "test (<a href=\"/prefix/issues/54321\">#54321</a>) issue", + "test (#54321 extra) issue", "test (<a href=\"/prefix/issues/54321\">#54321</a> extra) issue", + "test (#54321 issue)", "test (<a href=\"/prefix/issues/54321\">#54321</a> issue)", + "test (#54321)", "test (<a href=\"/prefix/issues/54321\">#54321</a>)", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render issue mention in square brackets", func() { + testCases := []string{ + "[#54321 issue]", "[<a href=\"/prefix/issues/54321\">#54321</a> issue]", + "test [#54321] issue", "test [<a href=\"/prefix/issues/54321\">#54321</a>] issue", + "test [#54321 extra] issue", "test [<a href=\"/prefix/issues/54321\">#54321</a> extra] issue", + "test [#54321 issue]", "test [<a href=\"/prefix/issues/54321\">#54321</a> issue]", + "test [#54321]", "test [<a href=\"/prefix/issues/54321\">#54321</a>]", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render multiple issue mentions in the same line", func() { + testCases := []string{ + "#54321 #1243", "<a href=\"/prefix/issues/54321\">#54321</a> <a href=\"/prefix/issues/1243\">#1243</a>", + "test #54321 #1243", "test <a href=\"/prefix/issues/54321\">#54321</a> <a href=\"/prefix/issues/1243\">#1243</a>", + "(#54321 #1243)", "(<a href=\"/prefix/issues/54321\">#54321</a> <a href=\"/prefix/issues/1243\">#1243</a>)", + "(#54321)(#1243)", "(<a href=\"/prefix/issues/54321\">#54321</a>)(<a href=\"/prefix/issues/1243\">#1243</a>)", + "text #54321 test #1243 issue", "text <a href=\"/prefix/issues/54321\">#54321</a> test <a href=\"/prefix/issues/1243\">#1243</a> issue", + "#1 (#4321) test", "<a href=\"/prefix/issues/1\">#1</a> (<a href=\"/prefix/issues/4321\">#4321</a>) test", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + }) + Convey("To an external issue tracker with numeric style", func() { + metas = make(map[string]string) + metas["format"] = "https://someurl.com/{user}/{repo}/{index}" + metas["user"] = "someuser" + metas["repo"] = "somerepo" + metas["style"] = ISSUE_NAME_STYLE_NUMERIC + + Convey("should not render anything when there are no mentions", func() { + testCases := []string{ + "this is a test", + "test 123 123 1234", + "#", + "# # #", + "# 123", + "#abcd", + } + + for i := 0; i < len(testCases); i++ { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i]) + } + }) + Convey("It should render freestanding issue mentions", func() { + testCases := []string{ + "#1234 test", "<a href=\"https://someurl.com/someuser/somerepo/1234\">#1234</a> test", + "test #1234 issue", "test <a href=\"https://someurl.com/someuser/somerepo/1234\">#1234</a> issue", + "test issue #1234", "test issue <a href=\"https://someurl.com/someuser/somerepo/1234\">#1234</a>", + "#5 test", "<a href=\"https://someurl.com/someuser/somerepo/5\">#5</a> test", + "test #5 issue", "test <a href=\"https://someurl.com/someuser/somerepo/5\">#5</a> issue", + "test issue #5", "test issue <a href=\"https://someurl.com/someuser/somerepo/5\">#5</a>", + } + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should not render issue mention without leading space", func() { + input := []byte("test#54321 issue") + expected := "test#54321 issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should not render issue mention without trailing space", func() { + input := []byte("test #54321issue") + expected := "test #54321issue" + So(string(RenderIssueIndexPattern(input, urlPrefix, metas)), ShouldEqual, expected) + }) + Convey("It should render issue mention in parentheses", func() { + testCases := []string{ + "(#54321 issue)", "(<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> issue)", + "test (#54321) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a>) issue", + "test (#54321 extra) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> extra) issue", + "test (#54321 issue)", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> issue)", + "test (#54321)", "test (<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a>)", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render multiple issue mentions in the same line", func() { + testCases := []string{ + "#54321 #1243", "<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>", + "test #54321 #1243", "test <a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>", + "(#54321 #1243)", "(<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>)", + "(#54321)(#1243)", "(<a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a>)(<a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a>)", + "text #54321 test #1243 issue", "text <a href=\"https://someurl.com/someuser/somerepo/54321\">#54321</a> test <a href=\"https://someurl.com/someuser/somerepo/1243\">#1243</a> issue", + "#1 (#4321) test", "<a href=\"https://someurl.com/someuser/somerepo/1\">#1</a> (<a href=\"https://someurl.com/someuser/somerepo/4321\">#4321</a>) test", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + }) + Convey("To an external issue tracker with alphanumeric style", func() { + metas = make(map[string]string) + metas["format"] = "https://someurl.com/{user}/{repo}/?b={index}" + metas["user"] = "someuser" + metas["repo"] = "somerepo" + metas["style"] = ISSUE_NAME_STYLE_ALPHANUMERIC + Convey("It should not render anything when there are no mentions", func() { + testCases := []string{ + "", + "this is a test", + "test 123 123 1234", + "#", + "##1234", + "# 123", + "#abcd", + "test #123", + "abc-1234", // issue prefix must be capital + "ABc-1234", // issue prefix must be _all_ capital + "ABCDEFGHIJK-1234", // the limit is 10 characters in the prefix + "ABC1234", // dash is required + "test ABC- test", // number is required + "test -1234 test", // prefix is required + "testABC-123 test", // leading space is required + "test ABC-123test", // trailing space is required + "ABC-0123", // no leading zero + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i]) + } + }) + Convey("It should render freestanding issue mention", func() { + testCases := []string{ + "OTT-1234 test", "<a href=\"https://someurl.com/someuser/somerepo/?b=OTT-1234\">OTT-1234</a> test", + "test T-12 issue", "test <a href=\"https://someurl.com/someuser/somerepo/?b=T-12\">T-12</a> issue", + "test issue ABCDEFGHIJ-1234567890", "test issue <a href=\"https://someurl.com/someuser/somerepo/?b=ABCDEFGHIJ-1234567890\">ABCDEFGHIJ-1234567890</a>", + "A-1 test", "<a href=\"https://someurl.com/someuser/somerepo/?b=A-1\">A-1</a> test", + "test ZED-1 issue", "test <a href=\"https://someurl.com/someuser/somerepo/?b=ZED-1\">ZED-1</a> issue", + "test issue DEED-7154", "test issue <a href=\"https://someurl.com/someuser/somerepo/?b=DEED-7154\">DEED-7154</a>", + } + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render issue mention in parentheses", func() { + testCases := []string{ + "(ABG-124 issue)", "(<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> issue)", + "test (ABG-124) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>) issue", + "test (ABG-124 extra) issue", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> extra) issue", + "test (ABG-124 issue)", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> issue)", + "test (ABG-124)", "test (<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>)", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render issue mention in square brackets", func() { + testCases := []string{ + "[ABG-124] issue", "[<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>] issue", + "test [ABG-124] issue", "test [<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>] issue", + "test [ABG-124 extra] issue", "test [<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> extra] issue", + "test [ABG-124 issue]", "test [<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> issue]", + "test [ABG-124]", "test [<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>]", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + Convey("It should render multiple issue mentions in the same line", func() { + testCases := []string{ + "ABG-124 OTT-4321", "<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>", + "test ABG-124 OTT-4321", "test <a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>", + "(ABG-124 OTT-4321)", "(<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>)", + "(ABG-124)(OTT-4321)", "(<a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a>)(<a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a>)", + "text ABG-124 test OTT-4321 issue", "text <a href=\"https://someurl.com/someuser/somerepo/?b=ABG-124\">ABG-124</a> test <a href=\"https://someurl.com/someuser/somerepo/?b=OTT-4321\">OTT-4321</a> issue", + "A-1 (RRE-345) test", "<a href=\"https://someurl.com/someuser/somerepo/?b=A-1\">A-1</a> (<a href=\"https://someurl.com/someuser/somerepo/?b=RRE-345\">RRE-345</a>) test", + } + + for i := 0; i < len(testCases); i += 2 { + So(string(RenderIssueIndexPattern([]byte(testCases[i]), urlPrefix, metas)), ShouldEqual, testCases[i+1]) + } + }) + }) + }) +} diff --git a/internal/markup/orgmode.go b/internal/markup/orgmode.go new file mode 100644 index 00000000..6fe1240a --- /dev/null +++ b/internal/markup/orgmode.go @@ -0,0 +1,40 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "bytes" + "path/filepath" + "strings" + + "github.com/niklasfasching/go-org/org" +) + +var orgModeExtensions = []string{".org"} + +// IsOrgModeFile reports whether name looks like a Org-mode file based on its extension. +func IsOrgModeFile(name string) bool { + extension := strings.ToLower(filepath.Ext(name)) + for _, ext := range orgModeExtensions { + if strings.ToLower(ext) == extension { + return true + } + } + return false +} + +// RawOrgMode renders content in Org-mode syntax to HTML without handling special links. +func RawOrgMode(body []byte, urlPrefix string) (result []byte) { + html, err := org.New().Silent().Parse(bytes.NewReader(body), urlPrefix).Write(org.NewHTMLWriter()) + if err != nil { + return []byte(err.Error()) + } + return []byte(html) +} + +// OrgMode takes a string or []byte and renders to HTML in Org-mode syntax with special links. +func OrgMode(input interface{}, urlPrefix string, metas map[string]string) []byte { + return Render(ORG_MODE, input, urlPrefix, metas) +} diff --git a/internal/markup/sanitizer.go b/internal/markup/sanitizer.go new file mode 100644 index 00000000..e8d76b23 --- /dev/null +++ b/internal/markup/sanitizer.go @@ -0,0 +1,55 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup + +import ( + "regexp" + "sync" + + "github.com/microcosm-cc/bluemonday" + + "gogs.io/gogs/internal/setting" +) + +// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow +// any modification to the underlying policies once it's been created. +type Sanitizer struct { + policy *bluemonday.Policy + init sync.Once +} + +var sanitizer = &Sanitizer{ + policy: bluemonday.UGCPolicy(), +} + +// NewSanitizer initializes sanitizer with allowed attributes based on settings. +// Multiple calls to this function will only create one instance of Sanitizer during +// entire application lifecycle. +func NewSanitizer() { + sanitizer.init.Do(func() { + // We only want to allow HighlightJS specific classes for code blocks + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") + + // Checkboxes + sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") + sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") + + // Data URLs + sanitizer.policy.AllowURLSchemes("data") + + // Custom URL-Schemes + sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) + }) +} + +// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. +func Sanitize(s string) string { + return sanitizer.policy.Sanitize(s) +} + +// SanitizeBytes takes a []byte slice that contains a HTML fragment or document and applies policy whitelist. +func SanitizeBytes(b []byte) []byte { + return sanitizer.policy.SanitizeBytes(b) +} diff --git a/internal/markup/sanitizer_test.go b/internal/markup/sanitizer_test.go new file mode 100644 index 00000000..06b10822 --- /dev/null +++ b/internal/markup/sanitizer_test.go @@ -0,0 +1,38 @@ +// Copyright 2017 The Gogs Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markup_test + +import ( + "testing" + + . "github.com/smartystreets/goconvey/convey" + + . "gogs.io/gogs/internal/markup" +) + +func Test_Sanitizer(t *testing.T) { + NewSanitizer() + Convey("Sanitize HTML string and bytes", t, func() { + testCases := []string{ + // Regular + `<a onblur="alert(secret)" href="http://www.google.com">Google</a>`, `<a href="http://www.google.com" rel="nofollow">Google</a>`, + + // Code highlighting class + `<code class="random string"></code>`, `<code></code>`, + `<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`, + `<code class="language-go"></code>`, `<code class="language-go"></code>`, + + // Input checkbox + `<input type="hidden">`, ``, + `<input type="checkbox">`, `<input type="checkbox">`, + `<input checked disabled autofocus>`, `<input checked="" disabled="">`, + } + + for i := 0; i < len(testCases); i += 2 { + So(Sanitize(testCases[i]), ShouldEqual, testCases[i+1]) + So(string(SanitizeBytes([]byte(testCases[i]))), ShouldEqual, testCases[i+1]) + } + }) +} |