diff --git a/models/git_diff.go b/models/git_diff.go index 4bbe3c0e..7e91626f 100644 --- a/models/git_diff.go +++ b/models/git_diff.go @@ -14,12 +14,14 @@ import ( "strings" "time" + "golang.org/x/net/html/charset" + "golang.org/x/text/transform" + "github.com/Unknwon/com" "github.com/gogits/gogs/modules/base" "github.com/gogits/gogs/modules/git" "github.com/gogits/gogs/modules/log" - "github.com/gogits/gogs/modules/mahonia" "github.com/gogits/gogs/modules/process" ) @@ -192,14 +194,18 @@ func ParsePatch(pid int64, maxlines int, cmd *exec.Cmd, reader io.Reader) (*Diff } // FIXME: use first 30 lines to detect file encoding. - charset, err := base.DetectEncoding(buf.Bytes()) - if charset != "utf8" && err == nil { - decoder := mahonia.NewDecoder(charset) - if decoder != nil { + charsetLabel, err := base.DetectEncoding(buf.Bytes()) + if charsetLabel != "utf8" && err == nil { + encoding, _ := charset.Lookup(charsetLabel) + + if encoding != nil { + d := encoding.NewDecoder() for _, f := range diff.Files { for _, sec := range f.Sections { for _, l := range sec.Lines { - l.Content = decoder.ConvertString(l.Content) + if c, _, err := transform.String(d, l.Content); err == nil { + l.Content = c + } } } } diff --git a/modules/base/template.go b/modules/base/template.go index 462269aa..9107f3e1 100644 --- a/modules/base/template.go +++ b/modules/base/template.go @@ -7,14 +7,15 @@ package base import ( "container/list" "encoding/json" - "errors" "fmt" "html/template" "runtime" "strings" "time" - "github.com/gogits/gogs/modules/mahonia" + "golang.org/x/net/html/charset" + "golang.org/x/text/transform" + "github.com/gogits/gogs/modules/setting" "github.com/saintfish/chardet" ) @@ -54,20 +55,30 @@ func DetectEncoding(content []byte) (string, error) { } func ToUtf8WithErr(content []byte) (error, string) { - charset, err := DetectEncoding(content) + charsetLabel, err := DetectEncoding(content) if err != nil { return err, "" } - if charset == "utf8" { + if charsetLabel == "utf8" { return nil, string(content) } - decoder := mahonia.NewDecoder(charset) - if decoder != nil { - return nil, decoder.ConvertString(string(content)) + encoding, _ := charset.Lookup(charsetLabel) + + if encoding == nil { + return fmt.Errorf("unknow char decoder %s", charsetLabel), string(content) } - return errors.New("unknow char decoder"), string(content) + + result, n, err := transform.String(encoding.NewDecoder(), string(content)) + + // If there is an error, we concatenate the nicely decoded part and the + // original left over. This way we won't loose data. + if err != nil { + result = result + string(content[n:]) + } + + return err, result } func ToUtf8(content string) string {