Fix corrupted external render content (#35946)

Fix #35944
This commit is contained in:
wxiaoguang 2025-11-14 08:31:11 +08:00 committed by GitHub
parent b95fd7e13e
commit 1f3558b65c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 60 additions and 88 deletions

View File

@ -92,8 +92,6 @@ func handleFileViewRenderMarkup(ctx *context.Context, filename string, sniffedTy
ctx.ServerError("Render", err) ctx.ServerError("Render", err)
return true return true
} }
// to prevent iframe from loading third-party url
ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'")
return true return true
} }
@ -241,14 +239,17 @@ func prepareFileView(ctx *context.Context, entry *git.TreeEntry) {
// * IsRenderableXxx: some files are rendered by backend "markup" engine, some are rendered by frontend (pdf, 3d) // * IsRenderableXxx: some files are rendered by backend "markup" engine, some are rendered by frontend (pdf, 3d)
// * DefaultViewMode: when there is no "display" query parameter, which view mode should be used by default, source or rendered // * DefaultViewMode: when there is no "display" query parameter, which view mode should be used by default, source or rendered
utf8Reader := charset.ToUTF8WithFallbackReader(io.MultiReader(bytes.NewReader(buf), dataRc), charset.ConvertOpts{}) contentReader := io.MultiReader(bytes.NewReader(buf), dataRc)
if fInfo.st.IsRepresentableAsText() {
contentReader = charset.ToUTF8WithFallbackReader(contentReader, charset.ConvertOpts{})
}
switch { switch {
case fInfo.blobOrLfsSize >= setting.UI.MaxDisplayFileSize: case fInfo.blobOrLfsSize >= setting.UI.MaxDisplayFileSize:
ctx.Data["IsFileTooLarge"] = true ctx.Data["IsFileTooLarge"] = true
case handleFileViewRenderMarkup(ctx, entry.Name(), fInfo.st, buf, utf8Reader): case handleFileViewRenderMarkup(ctx, entry.Name(), fInfo.st, buf, contentReader):
// it also sets ctx.Data["FileContent"] and more // it also sets ctx.Data["FileContent"] and more
ctx.Data["IsMarkup"] = true ctx.Data["IsMarkup"] = true
case handleFileViewRenderSource(ctx, entry.Name(), attrs, fInfo, utf8Reader): case handleFileViewRenderSource(ctx, entry.Name(), attrs, fInfo, contentReader):
// it also sets ctx.Data["FileContent"] and more // it also sets ctx.Data["FileContent"] and more
ctx.Data["IsDisplayingSource"] = true ctx.Data["IsDisplayingSource"] = true
case handleFileViewRenderImage(ctx, fInfo, buf): case handleFileViewRenderImage(ctx, fInfo, buf):

View File

@ -1 +0,0 @@
ref: refs/heads/master

View File

@ -1,6 +0,0 @@
[core]
repositoryformatversion = 0
filemode = true
bare = true
ignorecase = true
precomposeunicode = true

View File

@ -1 +0,0 @@
The repository will be used to test third-party renderer in TestExternalMarkupRenderer

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
data=$(cat)
exitcodes=""
hookname=$(basename $0)
GIT_DIR=${GIT_DIR:-$(dirname $0)}
for hook in ${GIT_DIR}/hooks/${hookname}.d/*; do
test -x "${hook}" && test -f "${hook}" || continue
echo "${data}" | "${hook}"
exitcodes="${exitcodes} $?"
done
for i in ${exitcodes}; do
[ ${i} -eq 0 ] || exit ${i}
done

View File

@ -1,2 +0,0 @@
#!/usr/bin/env bash
"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" post-receive

View File

@ -1,15 +0,0 @@
#!/usr/bin/env bash
data=$(cat)
exitcodes=""
hookname=$(basename $0)
GIT_DIR=${GIT_DIR:-$(dirname $0)}
for hook in ${GIT_DIR}/hooks/${hookname}.d/*; do
test -x "${hook}" && test -f "${hook}" || continue
echo "${data}" | "${hook}"
exitcodes="${exitcodes} $?"
done
for i in ${exitcodes}; do
[ ${i} -eq 0 ] || exit ${i}
done

View File

@ -1,2 +0,0 @@
#!/usr/bin/env bash
"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" pre-receive

View File

@ -1,14 +0,0 @@
#!/usr/bin/env bash
exitcodes=""
hookname=$(basename $0)
GIT_DIR=${GIT_DIR:-$(dirname $0)}
for hook in ${GIT_DIR}/hooks/${hookname}.d/*; do
test -x "${hook}" && test -f "${hook}" || continue
"${hook}" $1 $2 $3
exitcodes="${exitcodes} $?"
done
for i in ${exitcodes}; do
[ ${i} -eq 0 ] || exit ${i}
done

View File

@ -1,2 +0,0 @@
#!/usr/bin/env bash
"$GITEA_ROOT/gitea" hook --config="$GITEA_ROOT/$GITEA_CONF" update $1 $2 $3

View File

@ -1,2 +0,0 @@
# pack-refs with: peeled fully-peeled sorted
c961cc4d1ba6b7ee1ba228a9a02b00b7746d8033 refs/heads/master

View File

@ -12,6 +12,7 @@ import (
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user" user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/markup/external" "code.gitea.io/gitea/modules/markup/external"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
@ -25,29 +26,45 @@ import (
func TestExternalMarkupRenderer(t *testing.T) { func TestExternalMarkupRenderer(t *testing.T) {
defer tests.PrepareTestEnv(t)() defer tests.PrepareTestEnv(t)()
if !setting.Database.Type.IsSQLite3() { if !setting.Database.Type.IsSQLite3() {
t.Skip() t.Skip("only SQLite3 test config supports external markup renderer")
return return
} }
const binaryContentPrefix = "any prefix text."
const binaryContent = binaryContentPrefix + "\xfe\xfe\xfe\x00\xff\xff"
detectedEncoding, _ := charset.DetectEncoding([]byte(binaryContent))
assert.NotEqual(t, binaryContent, strings.ToValidUTF8(binaryContent, "?"))
assert.Equal(t, "ISO-8859-2", detectedEncoding) // even if the binary content can be detected as text encoding, it shouldn't affect the raw rendering
onGiteaRun(t, func(t *testing.T, _ *url.URL) { onGiteaRun(t, func(t *testing.T, _ *url.URL) {
user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2}) user2 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
repo1 := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1}) repo1 := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})
_, err := createFile(user2, repo1, "file.no-sanitizer", "master", `any content`) _, err := createFileInBranch(user2, repo1, createFileInBranchOptions{}, map[string]string{
"test.html": `<div><any attr="val"><script></script></div>`,
"html.no-sanitizer": `<script>foo("raw")</script>`,
"bin.no-sanitizer": binaryContent,
})
require.NoError(t, err) require.NoError(t, err)
t.Run("RenderNoSanitizer", func(t *testing.T) { t.Run("RenderNoSanitizer", func(t *testing.T) {
req := NewRequest(t, "GET", "/user2/repo1/src/branch/master/file.no-sanitizer") req := NewRequest(t, "GET", "/user2/repo1/src/branch/master/html.no-sanitizer")
resp := MakeRequest(t, req, http.StatusOK) resp := MakeRequest(t, req, http.StatusOK)
doc := NewHTMLParser(t, resp.Body) div := NewHTMLParser(t, resp.Body).Find("div.file-view")
div := doc.Find("div.file-view")
data, err := div.Html() data, err := div.Html()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, `<script>window.alert("hi")</script>`, strings.TrimSpace(data)) assert.Equal(t, `<script>foo("raw")</script>`, strings.TrimSpace(data))
req = NewRequest(t, "GET", "/user2/repo1/src/branch/master/bin.no-sanitizer")
resp = MakeRequest(t, req, http.StatusOK)
div = NewHTMLParser(t, resp.Body).Find("div.file-view")
data, err = div.Html()
assert.NoError(t, err)
assert.Equal(t, strings.ReplaceAll(binaryContent, "\x00", ""), strings.TrimSpace(data)) // HTML template engine removes the null bytes
}) })
}) })
t.Run("RenderContentDirectly", func(t *testing.T) { t.Run("RenderContentDirectly", func(t *testing.T) {
req := NewRequest(t, "GET", "/user30/renderer/src/branch/master/README.html") req := NewRequest(t, "GET", "/user2/repo1/src/branch/master/test.html")
resp := MakeRequest(t, req, http.StatusOK) resp := MakeRequest(t, req, http.StatusOK)
assert.Equal(t, "text/html; charset=utf-8", resp.Header().Get("Content-Type")) assert.Equal(t, "text/html; charset=utf-8", resp.Header().Get("Content-Type"))
@ -55,18 +72,21 @@ func TestExternalMarkupRenderer(t *testing.T) {
div := doc.Find("div.file-view") div := doc.Find("div.file-view")
data, err := div.Html() data, err := div.Html()
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, "<div>\n\ttest external renderer\n</div>", strings.TrimSpace(data)) // the content is fully sanitized
assert.Equal(t, `<div>&lt;script&gt;&lt;/script&gt;</div>`, strings.TrimSpace(data))
}) })
// above tested "no-sanitizer" mode, then we test iframe mode below // above tested in-page rendering (no iframe), then we test iframe mode below
r := markup.GetRendererByFileName("any-file.html").(*external.Renderer) r := markup.GetRendererByFileName("any-file.html").(*external.Renderer)
defer test.MockVariableValue(&r.RenderContentMode, setting.RenderContentModeIframe)() defer test.MockVariableValue(&r.RenderContentMode, setting.RenderContentModeIframe)()
assert.True(t, r.NeedPostProcess())
r = markup.GetRendererByFileName("any-file.no-sanitizer").(*external.Renderer) r = markup.GetRendererByFileName("any-file.no-sanitizer").(*external.Renderer)
defer test.MockVariableValue(&r.RenderContentMode, setting.RenderContentModeIframe)() defer test.MockVariableValue(&r.RenderContentMode, setting.RenderContentModeIframe)()
assert.False(t, r.NeedPostProcess())
t.Run("RenderContentInIFrame", func(t *testing.T) { t.Run("RenderContentInIFrame", func(t *testing.T) {
t.Run("DefaultSandbox", func(t *testing.T) { t.Run("DefaultSandbox", func(t *testing.T) {
req := NewRequest(t, "GET", "/user30/renderer/src/branch/master/README.html") req := NewRequest(t, "GET", "/user2/repo1/src/branch/master/test.html")
t.Run("ParentPage", func(t *testing.T) { t.Run("ParentPage", func(t *testing.T) {
respParent := MakeRequest(t, req, http.StatusOK) respParent := MakeRequest(t, req, http.StatusOK)
@ -77,31 +97,42 @@ func TestExternalMarkupRenderer(t *testing.T) {
// default sandbox on parent page // default sandbox on parent page
assert.Equal(t, "allow-scripts allow-popups", iframe.AttrOr("sandbox", "")) assert.Equal(t, "allow-scripts allow-popups", iframe.AttrOr("sandbox", ""))
assert.Equal(t, "/user30/renderer/render/branch/master/README.html", iframe.AttrOr("data-src", "")) assert.Equal(t, "/user2/repo1/render/branch/master/test.html", iframe.AttrOr("data-src", ""))
}) })
t.Run("SubPage", func(t *testing.T) { t.Run("SubPage", func(t *testing.T) {
req = NewRequest(t, "GET", "/user30/renderer/render/branch/master/README.html") req = NewRequest(t, "GET", "/user2/repo1/render/branch/master/test.html")
respSub := MakeRequest(t, req, http.StatusOK) respSub := MakeRequest(t, req, http.StatusOK)
assert.Equal(t, "text/html; charset=utf-8", respSub.Header().Get("Content-Type")) assert.Equal(t, "text/html; charset=utf-8", respSub.Header().Get("Content-Type"))
// default sandbox in sub page response // default sandbox in sub page response
assert.Equal(t, "frame-src 'self'; sandbox allow-scripts allow-popups", respSub.Header().Get("Content-Security-Policy")) assert.Equal(t, "frame-src 'self'; sandbox allow-scripts allow-popups", respSub.Header().Get("Content-Security-Policy"))
assert.Equal(t, "<script src=\"/assets/js/external-render-iframe.js\"></script><link rel=\"stylesheet\" href=\"/assets/css/external-render-iframe.css\"><div>\n\ttest external renderer\n</div>\n", respSub.Body.String()) // FIXME: actually here is a bug (legacy design problem), the "PostProcess" will escape "<script>" tag, but it indeed is the sanitizer's job
assert.Equal(t, `<script src="/assets/js/external-render-iframe.js"></script><link rel="stylesheet" href="/assets/css/external-render-iframe.css"><div><any attr="val">&lt;script&gt;&lt;/script&gt;</any></div>`, respSub.Body.String())
}) })
}) })
t.Run("NoSanitizerNoSandbox", func(t *testing.T) { t.Run("NoSanitizerNoSandbox", func(t *testing.T) {
req := NewRequest(t, "GET", "/user2/repo1/src/branch/master/file.no-sanitizer") t.Run("BinaryContent", func(t *testing.T) {
req := NewRequest(t, "GET", "/user2/repo1/src/branch/master/bin.no-sanitizer")
respParent := MakeRequest(t, req, http.StatusOK) respParent := MakeRequest(t, req, http.StatusOK)
iframe := NewHTMLParser(t, respParent.Body).Find("iframe.external-render-iframe") iframe := NewHTMLParser(t, respParent.Body).Find("iframe.external-render-iframe")
assert.Equal(t, "/user2/repo1/render/branch/master/file.no-sanitizer", iframe.AttrOr("data-src", "")) assert.Equal(t, "/user2/repo1/render/branch/master/bin.no-sanitizer", iframe.AttrOr("data-src", ""))
req = NewRequest(t, "GET", "/user2/repo1/render/branch/master/file.no-sanitizer") req = NewRequest(t, "GET", "/user2/repo1/render/branch/master/bin.no-sanitizer")
respSub := MakeRequest(t, req, http.StatusOK) respSub := MakeRequest(t, req, http.StatusOK)
assert.Equal(t, binaryContent, respSub.Body.String()) // raw content should keep the raw bytes (including invalid UTF-8 bytes), and no "external-render-iframe" helpers
// no sandbox (disabled by RENDER_CONTENT_SANDBOX) // no sandbox (disabled by RENDER_CONTENT_SANDBOX)
assert.Empty(t, iframe.AttrOr("sandbox", "")) assert.Empty(t, iframe.AttrOr("sandbox", ""))
assert.Equal(t, "frame-src 'self'", respSub.Header().Get("Content-Security-Policy")) assert.Equal(t, "frame-src 'self'", respSub.Header().Get("Content-Security-Policy"))
}) })
t.Run("HTMLContentWithExternalRenderIframeHelper", func(t *testing.T) {
req := NewRequest(t, "GET", "/user2/repo1/render/branch/master/html.no-sanitizer")
respSub := MakeRequest(t, req, http.StatusOK)
assert.Equal(t, `<script src="/assets/js/external-render-iframe.js"></script><link rel="stylesheet" href="/assets/css/external-render-iframe.css"><script>foo("raw")</script>`, respSub.Body.String())
assert.Equal(t, "frame-src 'self'", respSub.Header().Get("Content-Security-Policy"))
})
})
}) })
} }

View File

@ -122,7 +122,7 @@ RENDER_CONTENT_MODE = sanitized
[markup.no-sanitizer] [markup.no-sanitizer]
ENABLED = true ENABLED = true
FILE_EXTENSIONS = .no-sanitizer FILE_EXTENSIONS = .no-sanitizer
RENDER_COMMAND = echo '<script>window.alert("hi")</script>' RENDER_COMMAND = go run tools/test-echo.go
; This test case is reused, at first it is used to test "no-sanitizer" (sandbox doesn't take effect here) ; This test case is reused, at first it is used to test "no-sanitizer" (sandbox doesn't take effect here)
; Then it will be updated and used to test "iframe + sandbox-disabled" ; Then it will be updated and used to test "iframe + sandbox-disabled"
RENDER_CONTENT_MODE = no-sanitizer RENDER_CONTENT_MODE = no-sanitizer