Add support for 3D/CAD file formats preview (#34794)
Fix #34775 --------- Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
@@ -6,18 +6,14 @@ package typesniffer
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Use at most this many bytes to determine Content Type.
|
||||
const sniffLen = 1024
|
||||
const SniffContentSize = 1024
|
||||
|
||||
const (
|
||||
MimeTypeImageSvg = "image/svg+xml"
|
||||
@@ -26,22 +22,30 @@ const (
|
||||
MimeTypeApplicationOctetStream = "application/octet-stream"
|
||||
)
|
||||
|
||||
var (
|
||||
svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
|
||||
svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
|
||||
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
|
||||
)
|
||||
var globalVars = sync.OnceValue(func() (ret struct {
|
||||
svgComment, svgTagRegex, svgTagInXMLRegex *regexp.Regexp
|
||||
},
|
||||
) {
|
||||
ret.svgComment = regexp.MustCompile(`(?s)<!--.*?-->`)
|
||||
ret.svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
|
||||
ret.svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
|
||||
return ret
|
||||
})
|
||||
|
||||
// SniffedType contains information about a blobs type.
|
||||
// SniffedType contains information about a blob's type.
|
||||
type SniffedType struct {
|
||||
contentType string
|
||||
}
|
||||
|
||||
// IsText etects if content format is plain text.
|
||||
// IsText detects if the content format is text family, including text/plain, text/html, text/css, etc.
|
||||
func (ct SniffedType) IsText() bool {
|
||||
return strings.Contains(ct.contentType, "text/")
|
||||
}
|
||||
|
||||
func (ct SniffedType) IsTextPlain() bool {
|
||||
return strings.Contains(ct.contentType, "text/plain")
|
||||
}
|
||||
|
||||
// IsImage detects if data is an image format
|
||||
func (ct SniffedType) IsImage() bool {
|
||||
return strings.Contains(ct.contentType, "image/")
|
||||
@@ -57,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
|
||||
return strings.Contains(ct.contentType, "application/pdf")
|
||||
}
|
||||
|
||||
// IsVideo detects if data is an video format
|
||||
// IsVideo detects if data is a video format
|
||||
func (ct SniffedType) IsVideo() bool {
|
||||
return strings.Contains(ct.contentType, "video/")
|
||||
}
|
||||
|
||||
// IsAudio detects if data is an video format
|
||||
// IsAudio detects if data is a video format
|
||||
func (ct SniffedType) IsAudio() bool {
|
||||
return strings.Contains(ct.contentType, "audio/")
|
||||
}
|
||||
@@ -103,33 +107,34 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
|
||||
return brands, true
|
||||
}
|
||||
|
||||
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
|
||||
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
|
||||
func DetectContentType(data []byte) SniffedType {
|
||||
if len(data) == 0 {
|
||||
return SniffedType{"text/unknown"}
|
||||
return SniffedType{"text/plain"}
|
||||
}
|
||||
|
||||
ct := http.DetectContentType(data)
|
||||
|
||||
if len(data) > sniffLen {
|
||||
data = data[:sniffLen]
|
||||
if len(data) > SniffContentSize {
|
||||
data = data[:SniffContentSize]
|
||||
}
|
||||
|
||||
vars := globalVars()
|
||||
// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
|
||||
detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
|
||||
detectByXML := strings.Contains(ct, "text/xml")
|
||||
if detectByHTML || detectByXML {
|
||||
dataProcessed := svgComment.ReplaceAll(data, nil)
|
||||
dataProcessed := vars.svgComment.ReplaceAll(data, nil)
|
||||
dataProcessed = bytes.TrimSpace(dataProcessed)
|
||||
if detectByHTML && svgTagRegex.Match(dataProcessed) ||
|
||||
detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
|
||||
if detectByHTML && vars.svgTagRegex.Match(dataProcessed) ||
|
||||
detectByXML && vars.svgTagInXMLRegex.Match(dataProcessed) {
|
||||
ct = MimeTypeImageSvg
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
|
||||
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
|
||||
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
|
||||
// So remove the "ID3" prefix and detect again, then if the result is "text", it must be text content.
|
||||
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
|
||||
ct2 := http.DetectContentType(data[3:])
|
||||
if strings.HasPrefix(ct2, "text/") {
|
||||
@@ -155,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
|
||||
}
|
||||
return SniffedType{ct}
|
||||
}
|
||||
|
||||
// DetectContentTypeFromReader guesses the content type contained in the reader.
|
||||
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
|
||||
buf := make([]byte, sniffLen)
|
||||
n, err := util.ReadAtMost(r, buf)
|
||||
if err != nil {
|
||||
return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
|
||||
}
|
||||
buf = buf[:n]
|
||||
|
||||
return DetectContentType(buf), nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user