Add binary files support (#503)

This commit is contained in:
Thomas Miceli
2025-09-16 01:35:54 +02:00
committed by GitHub
parent 905276f24b
commit 594d876ba8
25 changed files with 426 additions and 194 deletions

View File

@@ -418,12 +418,20 @@ func (gist *Gist) Files(revision string, truncate bool) ([]*git.File, error) {
var files []*git.File
for _, fileCat := range filesCat {
var shortContent string
if len(fileCat.Content) > 512 {
shortContent = fileCat.Content[:512]
} else {
shortContent = fileCat.Content
}
files = append(files, &git.File{
Filename: fileCat.Name,
Size: fileCat.Size,
HumanSize: humanize.IBytes(fileCat.Size),
Content: fileCat.Content,
Truncated: fileCat.Truncated,
MimeType: git.DetectMimeType([]byte(shortContent)),
})
}
return files, err
@@ -444,12 +452,20 @@ func (gist *Gist) File(revision string, filename string, truncate bool) (*git.Fi
return nil, err
}
var shortContent string
if len(content) > 512 {
shortContent = content[:512]
} else {
shortContent = content
}
return &git.File{
Filename: filename,
Size: size,
HumanSize: humanize.IBytes(size),
Content: content,
Truncated: truncated,
MimeType: git.DetectMimeType([]byte(shortContent)),
}, err
}
@@ -660,10 +676,15 @@ func (gist *Gist) ToDTO() (*GistDTO, error) {
fileDTOs := make([]FileDTO, 0, len(files))
for _, file := range files {
fileDTOs = append(fileDTOs, FileDTO{
f := FileDTO{
Filename: file.Filename,
Content: file.Content,
})
}
if file.MimeType.CanBeEdited() {
f.Content = file.Content
} else {
f.Binary = true
}
fileDTOs = append(fileDTOs, f)
}
return &GistDTO{
@@ -702,6 +723,7 @@ type VisibilityDTO struct {
type FileDTO struct {
Filename string `validate:"excludes=\x2f,excludes=\x5c,max=255"`
Content string `validate:"required"`
Binary bool
}
func (dto *GistDTO) ToGist() *Gist {

89
internal/git/mime.go Normal file
View File

@@ -0,0 +1,89 @@
package git
import (
"fmt"
"strings"
"github.com/gabriel-vasile/mimetype"
)
type MimeType struct {
ContentType string
}
func (mt MimeType) IsText() bool {
return strings.Contains(mt.ContentType, "text/")
}
func (mt MimeType) IsCSV() bool {
return strings.Contains(mt.ContentType, "text/csv")
}
func (mt MimeType) IsImage() bool {
return strings.Contains(mt.ContentType, "image/")
}
func (mt MimeType) IsSVG() bool {
return strings.Contains(mt.ContentType, "image/svg+xml")
}
func (mt MimeType) IsPDF() bool {
return strings.Contains(mt.ContentType, "application/pdf")
}
func (mt MimeType) IsAudio() bool {
return strings.Contains(mt.ContentType, "audio/")
}
func (mt MimeType) IsVideo() bool {
return strings.Contains(mt.ContentType, "video/")
}
func (mt MimeType) CanBeHighlighted() bool {
return mt.IsText() && !mt.IsCSV()
}
func (mt MimeType) CanBeEmbedded() bool {
return mt.IsImage() || mt.IsPDF() || mt.IsAudio() || mt.IsVideo()
}
func (mt MimeType) CanBeRendered() bool {
return mt.IsText() || mt.IsImage() || mt.IsSVG() || mt.IsPDF() || mt.IsAudio() || mt.IsVideo()
}
func (mt MimeType) CanBeEdited() bool {
return mt.IsText() || mt.IsSVG()
}
func (mt MimeType) RenderType() string {
t := strings.Split(mt.ContentType, "/")
str := ""
if len(t) == 2 {
str = fmt.Sprintf("(%s)", strings.ToUpper(t[1]))
}
// More user friendly description
if mt.IsImage() || mt.IsSVG() {
return fmt.Sprintf("Image %s", str)
}
if mt.IsAudio() {
return fmt.Sprintf("Audio %s", str)
}
if mt.IsVideo() {
return fmt.Sprintf("Video %s", str)
}
if mt.IsPDF() {
return "PDF"
}
if mt.IsCSV() {
return "CSV"
}
if mt.IsText() {
return "Text"
}
return "Binary"
}
func DetectMimeType(data []byte) MimeType {
return MimeType{mimetype.Detect(data).String()}
}

View File

@@ -3,27 +3,23 @@ package git
import (
"bufio"
"bytes"
"encoding/csv"
"fmt"
"io"
"regexp"
"strings"
)
type File struct {
Filename string `json:"filename"`
Size uint64 `json:"size"`
HumanSize string `json:"human_size"`
OldFilename string `json:"-"`
Content string `json:"content"`
Truncated bool `json:"truncated"`
IsCreated bool `json:"-"`
IsDeleted bool `json:"-"`
}
type CsvFile struct {
File
Header []string
Rows [][]string
Filename string `json:"filename"`
Size uint64 `json:"size"`
HumanSize string `json:"human_size"`
OldFilename string `json:"-"`
Content string `json:"content"`
Truncated bool `json:"truncated"`
IsCreated bool `json:"-"`
IsDeleted bool `json:"-"`
IsBinary bool `json:"-"`
MimeType MimeType `json:"-"`
}
type Commit struct {
@@ -62,6 +58,8 @@ func truncateCommandOutput(out io.Reader, maxBytes int64) (string, bool, error)
return string(buf), truncated, nil
}
var reLogBinaryNames = regexp.MustCompile(`Binary files (.+) and (.+) differ`)
// inspired from https://github.com/go-gitea/gitea/blob/main/services/gitdiff/gitdiff.go
func parseLog(out io.Reader, maxFiles int, maxBytes int) ([]*Commit, error) {
var commits []*Commit
@@ -206,6 +204,20 @@ loopLog:
currentFile.IsCreated = true
case strings.HasPrefix(line, "deleted file"):
currentFile.IsDeleted = true
case strings.HasPrefix(line, "Binary files"):
currentFile.IsBinary = true
names := reLogBinaryNames.FindStringSubmatch(line)
if names[1][2:] != names[2][2:] {
if currentFile.IsCreated {
currentFile.Filename = convertOctalToUTF8(names[2])[2:]
}
if currentFile.IsDeleted {
currentFile.Filename = convertOctalToUTF8(names[1])[2:]
}
} else {
currentFile.OldFilename = convertOctalToUTF8(names[1])[2:]
currentFile.Filename = convertOctalToUTF8(names[2])[2:]
}
case strings.HasPrefix(line, "--- "):
name := convertOctalToUTF8(line[4 : len(line)-1])
if parseRename && currentFile.IsDeleted {
@@ -344,27 +356,3 @@ func skipToNextCommit(input *bufio.Reader) (line string, err error) {
}
return line, err
}
func ParseCsv(file *File) (*CsvFile, error) {
reader := csv.NewReader(strings.NewReader(file.Content))
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
header := records[0]
numColumns := len(header)
for i := 1; i < len(records); i++ {
if len(records[i]) != numColumns {
return nil, fmt.Errorf("CSV file has invalid row at index %d", i)
}
}
return &CsvFile{
File: *file,
Header: header,
Rows: records[1:],
}, nil
}

View File

@@ -23,6 +23,8 @@ gist.header.download-zip: Download ZIP
gist.raw: Raw
gist.file-truncated: This file has been truncated.
gist.file-raw: This file can't be rendered.
gist.file-binary-edit: This file is binary.
gist.watch-full-file: View the full file.
gist.file-not-valid: This file is not a valid CSV file.
gist.no-content: No files found
@@ -115,6 +117,7 @@ gist.revision.file-renamed: renamed to
gist.revision.diff-truncated: Diff is too large to be shown
gist.revision.file-renamed-no-changes: File renamed without changes
gist.revision.empty-file: Empty file
gist.revision.binary-file-changes: Binary file changes are not shown
gist.revision.no-changes: No changes
gist.revision.no-revisions: No revisions to show
gist.revision-of: Revision of %s

44
internal/render/csv.go Normal file
View File

@@ -0,0 +1,44 @@
package render
import (
"encoding/csv"
"fmt"
"strings"
"github.com/thomiceli/opengist/internal/git"
)
type CSVFile struct {
*git.File
Type string `json:"type"`
Header []string `json:"-"`
Rows [][]string `json:"-"`
}
func (r CSVFile) getFile() *git.File {
return r.File
}
func renderCsvFile(file *git.File) (*CSVFile, error) {
reader := csv.NewReader(strings.NewReader(file.Content))
records, err := reader.ReadAll()
if err != nil {
return nil, err
}
header := records[0]
numColumns := len(header)
for i := 1; i < len(records); i++ {
if len(records[i]) != numColumns {
return nil, fmt.Errorf("CSV file has invalid row at index %d", i)
}
}
return &CSVFile{
File: file,
Type: "CSV",
Header: header,
Rows: records[1:],
}, nil
}

View File

@@ -5,47 +5,44 @@ import (
"bytes"
"encoding/base64"
"fmt"
"github.com/alecthomas/chroma/v2"
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/alecthomas/chroma/v2/styles"
"github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/git"
"path"
"sync"
)
type RenderedFile struct {
type HighlightedFile struct {
*git.File
Type string `json:"type"`
Lines []string `json:"-"`
HTML string `json:"-"`
}
func (r HighlightedFile) getFile() *git.File {
return r.File
}
type RenderedGist struct {
*db.Gist
Lines []string
HTML string
}
func HighlightFile(file *git.File) (RenderedFile, error) {
func highlightFile(file *git.File) (HighlightedFile, error) {
rendered := HighlightedFile{
File: file,
}
if !file.MimeType.IsText() {
return rendered, nil
}
style := newStyle()
lexer := newLexer(file.Filename)
if lexer.Config().Name == "markdown" {
return MarkdownFile(file)
}
if lexer.Config().Name == "XML" && path.Ext(file.Filename) == ".svg" {
return RenderSvgFile(file), nil
}
formatter := html.New(html.WithClasses(true), html.PreventSurroundingPre(true))
rendered := RenderedFile{
File: file,
}
iterator, err := lexer.Tokenise(nil, file.Content+"\n")
if err != nil {
return rendered, err
@@ -74,38 +71,6 @@ func HighlightFile(file *git.File) (RenderedFile, error) {
return rendered, err
}
func HighlightFiles(files []*git.File) []RenderedFile {
const numWorkers = 10
jobs := make(chan int, numWorkers)
renderedFiles := make([]RenderedFile, len(files))
var wg sync.WaitGroup
worker := func() {
for idx := range jobs {
rendered, err := HighlightFile(files[idx])
if err != nil {
log.Error().Err(err).Msg("Error rendering gist preview for " + files[idx].Filename)
}
renderedFiles[idx] = rendered
}
wg.Done()
}
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go worker()
}
for i := range files {
jobs <- i
}
close(jobs)
wg.Wait()
return renderedFiles
}
func HighlightGistPreview(gist *db.Gist) (RenderedGist, error) {
rendered := RenderedGist{
Gist: gist,
@@ -146,18 +111,12 @@ func HighlightGistPreview(gist *db.Gist) (RenderedGist, error) {
return rendered, err
}
func RenderSvgFile(file *git.File) RenderedFile {
rendered := RenderedFile{
func renderSvgFile(file *git.File) HighlightedFile {
return HighlightedFile{
File: file,
HTML: `<img src="data:image/svg+xml;base64,` + base64.StdEncoding.EncodeToString([]byte(file.Content)) + `" />`,
Type: "SVG",
}
encoded := base64.StdEncoding.EncodeToString([]byte(file.Content))
content := `<img src="data:image/svg+xml;base64,` + encoded + `" />`
rendered.HTML = content
rendered.Type = "SVG"
return rendered
}
func parseFileTypeName(config chroma.Config) string {

View File

@@ -2,6 +2,8 @@ package render
import (
"bytes"
"regexp"
"github.com/alecthomas/chroma/v2/formatters/html"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/git"
@@ -12,7 +14,6 @@ import (
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util"
"go.abhg.dev/goldmark/mermaid"
"regexp"
)
func MarkdownGistPreview(gist *db.Gist) (RenderedGist, error) {
@@ -27,11 +28,11 @@ func MarkdownGistPreview(gist *db.Gist) (RenderedGist, error) {
}, err
}
func MarkdownFile(file *git.File) (RenderedFile, error) {
func renderMarkdownFile(file *git.File) (HighlightedFile, error) {
var buf bytes.Buffer
err := newMarkdownWithSvgExtension().Convert([]byte(file.Content), &buf)
return RenderedFile{
return HighlightedFile{
File: file,
HTML: buf.String(),
Type: "Markdown",

84
internal/render/render.go Normal file
View File

@@ -0,0 +1,84 @@
package render
import (
"path/filepath"
"sync"
"github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/git"
)
type RenderedFile interface {
getFile() *git.File
}
type NonHighlightedFile struct {
*git.File
Type string `json:"type"`
}
func (r NonHighlightedFile) getFile() *git.File {
return r.File
}
func RenderFiles(files []*git.File) []RenderedFile {
const numWorkers = 10
jobs := make(chan int, numWorkers)
renderedFiles := make([]RenderedFile, len(files))
var wg sync.WaitGroup
worker := func() {
for idx := range jobs {
renderedFiles[idx] = processFile(files[idx])
}
wg.Done()
}
for i := 0; i < numWorkers; i++ {
wg.Add(1)
go worker()
}
for i := range files {
jobs <- i
}
close(jobs)
wg.Wait()
return renderedFiles
}
func processFile(file *git.File) RenderedFile {
mt := file.MimeType
if mt.IsCSV() {
rendered, err := renderCsvFile(file)
if err != nil {
log.Error().Err(err).Msg("Error parsing CSV file for " + file.Filename)
}
return rendered
} else if mt.IsText() && filepath.Ext(file.Filename) == ".md" {
rendered, err := renderMarkdownFile(file)
if err != nil {
log.Error().Err(err).Msg("Error rendering markdown file for " + file.Filename)
}
return rendered
} else if mt.IsSVG() {
rendered := renderSvgFile(file)
return rendered
} else if mt.CanBeEmbedded() {
rendered := NonHighlightedFile{File: file, Type: mt.RenderType()}
file.Content = ""
return rendered
} else if mt.CanBeRendered() {
rendered, err := highlightFile(file)
if err != nil {
log.Error().Err(err).Msg("Error rendering gist preview for " + file.Filename)
}
return rendered
} else {
rendered := NonHighlightedFile{File: file, Type: mt.RenderType()}
file.Content = ""
return rendered
}
}

View File

@@ -1,14 +1,15 @@
package gist
import (
"net/url"
"strconv"
"strings"
"github.com/google/uuid"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/i18n"
"github.com/thomiceli/opengist/internal/validator"
"github.com/thomiceli/opengist/internal/web/context"
"net/url"
"strconv"
"strings"
)
func Create(ctx *context.Context) error {

View File

@@ -7,7 +7,6 @@ import (
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/web/context"
"github.com/thomiceli/opengist/internal/web/handlers"
)
func RawFile(ctx *context.Context) error {
@@ -20,10 +19,8 @@ func RawFile(ctx *context.Context) error {
if file == nil {
return ctx.NotFound("File not found")
}
contentType := handlers.GetContentTypeFromFilename(file.Filename)
ContentDisposition := handlers.GetContentDisposition(file.Filename)
ctx.Response().Header().Set("Content-Type", contentType)
ctx.Response().Header().Set("Content-Disposition", ContentDisposition)
ctx.Response().Header().Set("Content-Type", file.MimeType.ContentType)
ctx.Response().Header().Set("Content-Disposition", "inline; filename=\""+file.Filename+"\"")
return ctx.PlainText(200, file.Content)
}
@@ -38,7 +35,7 @@ func DownloadFile(ctx *context.Context) error {
return ctx.NotFound("File not found")
}
ctx.Response().Header().Set("Content-Type", "text/plain")
ctx.Response().Header().Set("Content-Type", file.MimeType.ContentType)
ctx.Response().Header().Set("Content-Disposition", "attachment; filename="+file.Filename)
ctx.Response().Header().Set("Content-Length", strconv.Itoa(len(file.Content)))
_, err = ctx.Response().Write([]byte(file.Content))

View File

@@ -5,12 +5,13 @@ import (
"bytes"
gojson "encoding/json"
"fmt"
"net/url"
"time"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/git"
"github.com/thomiceli/opengist/internal/render"
"github.com/thomiceli/opengist/internal/web/context"
"net/url"
"time"
)
func GistIndex(ctx *context.Context) error {
@@ -34,7 +35,7 @@ func GistIndex(ctx *context.Context) error {
return ctx.ErrorRes(500, "Error fetching files", err)
}
renderedFiles := render.HighlightFiles(files)
renderedFiles := render.RenderFiles(files)
ctx.SetData("page", "code")
ctx.SetData("commit", revision)
@@ -51,7 +52,7 @@ func GistJson(ctx *context.Context) error {
return ctx.ErrorRes(500, "Error fetching files", err)
}
renderedFiles := render.HighlightFiles(files)
renderedFiles := render.RenderFiles(files)
ctx.SetData("files", renderedFiles)
topics, err := gist.GetTopics()
@@ -106,7 +107,7 @@ func GistJs(ctx *context.Context) error {
return ctx.ErrorRes(500, "Error fetching files", err)
}
renderedFiles := render.HighlightFiles(files)
renderedFiles := render.RenderFiles(files)
ctx.SetData("files", renderedFiles)
htmlbuf := bytes.Buffer{}

View File

@@ -4,7 +4,6 @@ import (
"errors"
"html/template"
"net/url"
"path/filepath"
"strconv"
"strings"
@@ -141,22 +140,3 @@ func ParseSearchQueryStr(query string) (string, map[string]string) {
content := strings.TrimSpace(contentBuilder.String())
return content, metadata
}
func GetContentTypeFromFilename(filename string) (ret string) {
ext := strings.ToLower(filepath.Ext(filename))
switch ext {
case ".css":
ret = "text/css"
default:
ret = "text/plain"
}
// add charset=utf-8, if not, unicode charset will be broken
ret += "; charset=utf-8"
return
}
func GetContentDisposition(filename string) string {
return "inline; filename=\"" + filename + "\""
}

View File

@@ -3,6 +3,13 @@ package server
import (
"errors"
"fmt"
"html/template"
"net/http"
"path/filepath"
"regexp"
"strings"
"time"
"github.com/labstack/echo-contrib/echoprometheus"
"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
@@ -15,12 +22,6 @@ import (
"github.com/thomiceli/opengist/internal/web/handlers"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"html/template"
"net/http"
"path/filepath"
"regexp"
"strings"
"time"
)
func (s *Server) useCustomContext() {
@@ -54,7 +55,7 @@ func (s *Server) registerMiddlewares() {
return nil
},
}))
//s.echo.Use(middleware.Recover())
s.echo.Use(middleware.Recover())
s.echo.Use(middleware.Secure())
s.echo.Use(Middleware(sessionInit).toEcho())

View File

@@ -4,16 +4,6 @@ import (
gojson "encoding/json"
"errors"
"fmt"
"github.com/labstack/echo/v4"
"github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/config"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/git"
"github.com/thomiceli/opengist/internal/index"
"github.com/thomiceli/opengist/internal/web/context"
"github.com/thomiceli/opengist/internal/web/handlers"
"github.com/thomiceli/opengist/public"
"github.com/thomiceli/opengist/templates"
htmlpkg "html"
"html/template"
"io"
@@ -24,6 +14,16 @@ import (
"strconv"
"strings"
"time"
"github.com/labstack/echo/v4"
"github.com/rs/zerolog/log"
"github.com/thomiceli/opengist/internal/config"
"github.com/thomiceli/opengist/internal/db"
"github.com/thomiceli/opengist/internal/index"
"github.com/thomiceli/opengist/internal/web/context"
"github.com/thomiceli/opengist/internal/web/handlers"
"github.com/thomiceli/opengist/public"
"github.com/thomiceli/opengist/templates"
)
type Template struct {
@@ -58,24 +58,6 @@ func (s *Server) setFuncMap() {
"isMarkdown": func(i string) bool {
return strings.ToLower(filepath.Ext(i)) == ".md"
},
"isCsv": func(i string) bool {
return strings.ToLower(filepath.Ext(i)) == ".csv"
},
"isSvg": func(i string) bool {
return strings.ToLower(filepath.Ext(i)) == ".svg"
},
"csvFile": func(file *git.File) *git.CsvFile {
if strings.ToLower(filepath.Ext(file.Filename)) != ".csv" {
return nil
}
csvFile, err := git.ParseCsv(file)
if err != nil {
return nil
}
return csvFile
},
"httpStatusText": http.StatusText,
"loadedTime": func(startTime time.Time) string {
return fmt.Sprint(time.Since(startTime).Nanoseconds()/1e6) + "ms"