From f44f1fdd33d23a5971661d51fce0748314c06683 Mon Sep 17 00:00:00 2001 From: 1379 <1379@1379.com> Date: Sun, 13 Nov 2022 21:19:55 +0800 Subject: [PATCH] feat: import from hexo jekyll hugo style: format code --- consts/enum.go | 7 +- dal/dal.go | 1 + event/listener/comment.go | 2 + event/listener/post_update.go | 1 - go.mod | 8 +- go.sum | 4 + handler/admin/backup.go | 5 + handler/admin/email.go | 1 + handler/content/api/journal.go | 1 - handler/content/archive.go | 1 + .../authentication/post_authentication.go | 1 - handler/content/category.go | 1 + handler/content/journal.go | 1 - handler/content/model/category.go | 1 - handler/content/photo.go | 1 - handler/content/search.go | 1 + handler/content/view.go | 1 + handler/route.go | 3 +- model/entity/hooks.go | 1 - model/param/post.go | 2 + model/property/attachment.go | 2 + service/category.go | 1 + service/export_import.go | 13 + .../file_storage_impl/file_descriptor.go | 1 + .../file_storage_impl/url_file_descriptor.go | 4 + service/impl/attachment.go | 1 - service/impl/backup.go | 23 +- service/impl/category.go | 16 +- service/impl/comment_base.go | 2 +- service/impl/export_import.go | 519 +++++++++++++++ service/impl/init.go | 1 + service/impl/option.go | 1 - service/impl/post.go | 11 +- service/impl/tag.go | 6 + service/tag.go | 1 + util/conv.go | 5 + util/gin.go | 1 + util/pageparser/item.go | 221 +++++++ util/pageparser/itemtype_string.go | 43 ++ util/pageparser/metadecoders/decoder.go | 261 ++++++++ util/pageparser/metadecoders/format.go | 104 +++ util/pageparser/pagelexer.go | 590 ++++++++++++++++++ util/pageparser/pagelexer_intro.go | 186 ++++++ util/pageparser/pagelexer_shortcode.go | 362 +++++++++++ util/pageparser/pageparser.go | 235 +++++++ 45 files changed, 2614 insertions(+), 40 deletions(-) create mode 100644 service/export_import.go create mode 100644 service/impl/export_import.go create mode 100644 util/pageparser/item.go create mode 100644 util/pageparser/itemtype_string.go create mode 100644 util/pageparser/metadecoders/decoder.go create mode 100644 util/pageparser/metadecoders/format.go create mode 100644 util/pageparser/pagelexer.go create mode 100644 util/pageparser/pagelexer_intro.go create mode 100644 util/pageparser/pagelexer_shortcode.go create mode 100644 util/pageparser/pageparser.go diff --git a/consts/enum.go b/consts/enum.go index 8dfd1843..26433574 100644 --- a/consts/enum.go +++ b/consts/enum.go @@ -387,6 +387,7 @@ func (c *CommentStatus) Scan(src interface{}) error { } return nil } + func CommentStatusFromString(str string) (CommentStatus, error) { if str == "PUBLISHED" { return CommentStatusPublished, nil @@ -421,10 +422,14 @@ const ( type EditorType int32 const ( - EditorTypeMarkdown = iota + EditorTypeMarkdown EditorType = iota EditorTypeRichText ) +func (e EditorType) Ptr() *EditorType { + return &e +} + func (e *EditorType) Scan(src interface{}) error { if src == nil { return xerr.BadParam.New("").WithMsg("field nil") diff --git a/dal/dal.go b/dal/dal.go index 621554b2..9c128c5d 100644 --- a/dal/dal.go +++ b/dal/dal.go @@ -130,6 +130,7 @@ func Transaction(ctx context.Context, fn func(txCtx context.Context) error) erro return fn(txCtx) }) } + func GetDB() *gorm.DB { return DB } diff --git a/event/listener/comment.go b/event/listener/comment.go index 4b725e52..095fce69 100644 --- a/event/listener/comment.go +++ b/event/listener/comment.go @@ -54,6 +54,7 @@ func NewCommentListener( bus.Subscribe(event.CommentNewEventName, c.HandleCommentNew) bus.Subscribe(event.CommentReplyEventName, c.HandleCommentReply) } + func (c *CommentListener) HandleCommentNew(ctx context.Context, ce event.Event) error { newCommentNotice, err := c.OptionService.GetOrByDefaultWithErr(ctx, property.CommentNewNotice, property.CommentNewNotice.DefaultValue) if err != nil { @@ -142,6 +143,7 @@ func (c *CommentListener) HandleCommentNew(ctx context.Context, ce event.Event) } return c.EmailService.SendTemplateEmail(ctx, users[0].Email, subject, content.String()) } + func (c *CommentListener) HandleCommentReply(ctx context.Context, ce event.Event) error { commentReplyNotice, err := c.OptionService.GetOrByDefaultWithErr(ctx, property.CommentReplyNotice, property.CommentNewNotice.DefaultValue) if err != nil { diff --git a/event/listener/post_update.go b/event/listener/post_update.go index 374d2519..70eaf793 100644 --- a/event/listener/post_update.go +++ b/event/listener/post_update.go @@ -29,7 +29,6 @@ func NewPostUpdateListener(bus event.Bus, } func (p *PostUpdateListener) HandlePostUpdateEvent(ctx context.Context, postUpdateEvent event.Event) error { - postID := postUpdateEvent.(*event.PostUpdateEvent).PostID categories, err := p.PostCategoryService.ListCategoryByPostID(ctx, postID) diff --git a/go.mod b/go.mod index f705d5de..d0ff24f2 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.19 require ( github.com/Masterminds/sprig/v3 v3.2.2 github.com/aliyun/aliyun-oss-go-sdk v2.2.6+incompatible + github.com/clbanning/mxj/v2 v2.5.7 github.com/disintegration/imaging v1.6.2 github.com/fsnotify/fsnotify v1.6.0 github.com/gin-contrib/cors v1.4.0 @@ -18,10 +19,14 @@ require ( github.com/minio/minio-go/v7 v7.0.45 github.com/natefinch/lumberjack v2.0.0+incompatible github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/pelletier/go-toml/v2 v2.0.5 github.com/pkg/errors v0.9.1 github.com/pquerna/otp v1.4.0 + github.com/spf13/afero v1.9.2 + github.com/spf13/cast v1.5.0 github.com/spf13/viper v1.14.0 github.com/yeqown/go-qrcode v1.5.10 + github.com/yuin/goldmark v1.5.3 go.uber.org/dig v1.15.0 go.uber.org/fx v1.18.2 go.uber.org/zap v1.24.0 @@ -64,12 +69,9 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pelletier/go-toml v1.9.5 // indirect - github.com/pelletier/go-toml/v2 v2.0.5 // indirect github.com/rs/xid v1.4.0 // indirect github.com/shopspring/decimal v1.2.0 // indirect github.com/sirupsen/logrus v1.9.0 // indirect - github.com/spf13/afero v1.9.2 // indirect - github.com/spf13/cast v1.5.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.4.1 // indirect diff --git a/go.sum b/go.sum index 8a511f61..a16b7c5f 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,8 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/clbanning/mxj/v2 v2.5.7 h1:7q5lvUpaPF/WOkqgIDiwjBJaznaLCCBd78pi8ZyAnE0= +github.com/clbanning/mxj/v2 v2.5.7/go.mod h1:hNiWqW14h+kc+MdF9C6/YoRfjEJoR3ou6tn/Qo+ve2s= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= @@ -392,6 +394,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yuin/goldmark v1.5.3 h1:3HUJmBFbQW9fhQOzMgseU134xfi6hU+mjWywx5Ty+/M= +github.com/yuin/goldmark v1.5.3/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= diff --git a/handler/admin/backup.go b/handler/admin/backup.go index 7a795bcf..e3660830 100644 --- a/handler/admin/backup.go +++ b/handler/admin/backup.go @@ -2,6 +2,7 @@ package admin import ( "net/http" + "path" "path/filepath" "github.com/gin-gonic/gin" @@ -116,6 +117,10 @@ func (b *BackupHandler) ImportMarkdown(ctx *gin.Context) (interface{}, error) { if err != nil { return nil, xerr.WithMsg(err, "上传文件错误").WithStatus(xerr.StatusBadRequest) } + filenameExt := path.Ext(fileHeader.Filename) + if filenameExt != ".md" && filenameExt != ".markdown" && filenameExt != ".mdown" { + return nil, xerr.WithMsg(err, "Unsupported format").WithStatus(xerr.StatusBadRequest) + } return nil, b.BackupService.ImportMarkdown(ctx, fileHeader) } diff --git a/handler/admin/email.go b/handler/admin/email.go index 1463ab51..ea0b798e 100644 --- a/handler/admin/email.go +++ b/handler/admin/email.go @@ -17,6 +17,7 @@ func NewEmailHandler(emailService service.EmailService) *EmailHandler { EmailService: emailService, } } + func (e *EmailHandler) Test(ctx *gin.Context) (interface{}, error) { p := ¶m.TestEmail{} if err := ctx.ShouldBindJSON(p); err != nil { diff --git a/handler/content/api/journal.go b/handler/content/api/journal.go index 46cacac9..ce5df863 100644 --- a/handler/content/api/journal.go +++ b/handler/content/api/journal.go @@ -79,7 +79,6 @@ func (j *JournalHandler) GetJournal(ctx *gin.Context) (interface{}, error) { } func (j *JournalHandler) ListTopComment(ctx *gin.Context) (interface{}, error) { - journalID, err := util.ParamInt32(ctx, "journalID") if err != nil { return nil, err diff --git a/handler/content/archive.go b/handler/content/archive.go index 9fcb2a66..ef3ddd80 100644 --- a/handler/content/archive.go +++ b/handler/content/archive.go @@ -44,6 +44,7 @@ func NewArchiveHandler( func (a *ArchiveHandler) Archives(ctx *gin.Context, model template.Model) (string, error) { return a.PostModel.Archives(ctx, 0, model) } + func (a *ArchiveHandler) ArchivesPage(ctx *gin.Context, model template.Model) (string, error) { page, err := util.ParamInt32(ctx, "page") if err != nil { diff --git a/handler/content/authentication/post_authentication.go b/handler/content/authentication/post_authentication.go index 9728997d..589a8a3c 100644 --- a/handler/content/authentication/post_authentication.go +++ b/handler/content/authentication/post_authentication.go @@ -85,7 +85,6 @@ func (p *PostAuthentication) Authenticate(ctx context.Context, token string, id } } return "", xerr.WithMsg(nil, "密码不正确").WithStatus(http.StatusUnauthorized) - } func (p *PostAuthentication) IsAuthenticated(ctx context.Context, tokenStr string, id int32) (bool, error) { diff --git a/handler/content/category.go b/handler/content/category.go index 54784ec7..46fcf57d 100644 --- a/handler/content/category.go +++ b/handler/content/category.go @@ -43,6 +43,7 @@ func NewCategoryHandler( func (c *CategoryHandler) Categories(ctx *gin.Context, model template.Model) (string, error) { return c.CategoryModel.ListCategories(ctx, model) } + func (c *CategoryHandler) CategoryDetail(ctx *gin.Context, model template.Model) (string, error) { slug, err := util.ParamString(ctx, "slug") if err != nil { diff --git a/handler/content/journal.go b/handler/content/journal.go index d82afe4c..3d8e8756 100644 --- a/handler/content/journal.go +++ b/handler/content/journal.go @@ -19,7 +19,6 @@ func NewJournalHandler( optionService service.OptionService, journalService service.JournalService, journalModel *model.JournalModel, - ) *JournalHandler { return &JournalHandler{ OptionService: optionService, diff --git a/handler/content/model/category.go b/handler/content/model/category.go index 50a78242..efac9641 100644 --- a/handler/content/model/category.go +++ b/handler/content/model/category.go @@ -52,7 +52,6 @@ type CategoryModel struct { } func (c *CategoryModel) ListCategories(ctx context.Context, model template.Model) (string, error) { - seoKeyWords := c.OptionService.GetOrByDefault(ctx, property.SeoKeywords) seoDescription := c.OptionService.GetOrByDefault(ctx, property.SeoDescription) diff --git a/handler/content/photo.go b/handler/content/photo.go index e1de1a84..dddee23b 100644 --- a/handler/content/photo.go +++ b/handler/content/photo.go @@ -19,7 +19,6 @@ func NewPhotoHandler( optionService service.OptionService, photoService service.PhotoService, photoModel *model.PhotoModel, - ) *PhotoHandler { return &PhotoHandler{ OptionService: optionService, diff --git a/handler/content/search.go b/handler/content/search.go index 37f68b7b..91cdf703 100644 --- a/handler/content/search.go +++ b/handler/content/search.go @@ -37,6 +37,7 @@ func NewSearchHandler( ThemeService: themeService, } } + func (s *SearchHandler) Search(ctx *gin.Context, model template.Model) (string, error) { return s.search(ctx, 0, model) } diff --git a/handler/content/view.go b/handler/content/view.go index 94f4b994..ed4c0c46 100644 --- a/handler/content/view.go +++ b/handler/content/view.go @@ -133,6 +133,7 @@ func (v *ViewHandler) authenticateCategory(ctx *gin.Context, slug, password, tok ctx.Redirect(http.StatusFound, categoryDTO.FullPath) return token, nil } + func (v *ViewHandler) authenticatePost(ctx *gin.Context, slug, password, token string) (string, error) { post, err := v.PostService.GetBySlug(ctx, slug) if err != nil { diff --git a/handler/route.go b/handler/route.go index 4c7302bb..601a5330 100644 --- a/handler/route.go +++ b/handler/route.go @@ -81,7 +81,7 @@ func (s *Server) RegisterRouters() { backupRouter.DELETE("/data", s.wrapHandler(s.BackupHandler.DeleteDataFile)) backupRouter.GET("/data/*path", s.BackupHandler.HandleData) backupRouter.POST("/markdown/export", s.wrapHandler(s.BackupHandler.ExportMarkdown)) - backupRouter.POST("/mark-down/import", s.wrapHandler(s.BackupHandler.ImportMarkdown)) + backupRouter.POST("/markdown/import", s.wrapHandler(s.BackupHandler.ImportMarkdown)) backupRouter.GET("/markdown/fetch", s.wrapHandler(s.BackupHandler.GetMarkDownBackup)) backupRouter.GET("/markdown/export", s.wrapHandler(s.BackupHandler.ListMarkdowns)) backupRouter.DELETE("/markdown/export", s.wrapHandler(s.BackupHandler.DeleteMarkdowns)) @@ -345,7 +345,6 @@ func (s *Server) RegisterRouters() { contentAPIRouter.GET("/options/comment", s.wrapHandler(s.ContentAPIOptionHander.Comment)) } } - } func (s *Server) registerDynamicRouters(contentRouter *gin.RouterGroup) error { diff --git a/model/entity/hooks.go b/model/entity/hooks.go index 530666bf..dc2d5848 100644 --- a/model/entity/hooks.go +++ b/model/entity/hooks.go @@ -132,7 +132,6 @@ func (m *Post) BeforeCreate(tx *gorm.DB) (err error) { if m.CreateTime == (time.Time{}) { m.CreateTime = time.Now() } - m.CreateTime = time.Now() return nil } diff --git a/model/param/post.go b/model/param/post.go index 2f4eae31..f53f934d 100644 --- a/model/param/post.go +++ b/model/param/post.go @@ -21,6 +21,8 @@ type Post struct { CategoryIDs []int32 `json:"categoryIds" form:"categoryIds"` MetaParam []Meta `json:"metas" form:"metas"` Content string `json:"content" form:"content"` + EditTime *int64 `json:"editTime" form:"editTime"` + UpdateTime *int64 `json:"updateTime" form:"updateTime"` } type PostContent struct { diff --git a/model/property/attachment.go b/model/property/attachment.go index f9b2a07c..d2c58d60 100644 --- a/model/property/attachment.go +++ b/model/property/attachment.go @@ -107,11 +107,13 @@ var AliOssSource = Property{ KeyValue: "oss_ali_source", Kind: reflect.String, } + var AliOssThumbnailStyleRule = Property{ DefaultValue: "", KeyValue: "oss_ali_thumbnail_style_rule", Kind: reflect.String, } + var AliOssStyleRule = Property{ DefaultValue: "", KeyValue: "oss_ali_style_rule", diff --git a/service/category.go b/service/category.go index 53b8deeb..d91a204d 100644 --- a/service/category.go +++ b/service/category.go @@ -12,6 +12,7 @@ import ( type CategoryService interface { GetByID(ctx context.Context, id int32) (*entity.Category, error) GetBySlug(ctx context.Context, slug string) (*entity.Category, error) + GetByName(ctx context.Context, name string) (*entity.Category, error) ListCategoryWithPostCountDTO(ctx context.Context, sort *param.Sort) ([]*dto.CategoryWithPostCount, error) ListAll(ctx context.Context, sort *param.Sort) ([]*entity.Category, error) ConvertToCategoryDTO(ctx context.Context, e *entity.Category) (*dto.CategoryDTO, error) diff --git a/service/export_import.go b/service/export_import.go new file mode 100644 index 00000000..18217359 --- /dev/null +++ b/service/export_import.go @@ -0,0 +1,13 @@ +package service + +import ( + "context" + "io" + + "github.com/go-sonic/sonic/model/entity" +) + +type ExportImport interface { + CreateByMarkdown(ctx context.Context, filename string, reader io.Reader) (*entity.Post, error) + ExportMarkdown(ctx context.Context, needFrontMatter bool) (string, error) +} diff --git a/service/file_storage/file_storage_impl/file_descriptor.go b/service/file_storage/file_storage_impl/file_descriptor.go index b0328431..26e6e2a4 100644 --- a/service/file_storage/file_storage_impl/file_descriptor.go +++ b/service/file_storage/file_storage_impl/file_descriptor.go @@ -103,6 +103,7 @@ func (f *localFileDescriptor) getExtensionName() string { func (f *localFileDescriptor) getFileName() string { return f.Name } + func (f *localFileDescriptor) getShouldRename() shouldRename { return f.ShouldRename } diff --git a/service/file_storage/file_storage_impl/url_file_descriptor.go b/service/file_storage/file_storage_impl/url_file_descriptor.go index 6e1bd8be..318c6c79 100644 --- a/service/file_storage/file_storage_impl/url_file_descriptor.go +++ b/service/file_storage/file_storage_impl/url_file_descriptor.go @@ -85,6 +85,7 @@ func (f *urlFileDescriptor) getFileName() string { func (f *urlFileDescriptor) setFileName(name string) { f.Name = name } + func (f *urlFileDescriptor) getShouldRename() shouldRename { return f.ShouldRename } @@ -96,16 +97,19 @@ func withBaseURL(baseURL string) urlOption { f.BasePath = baseURL } } + func withSubURLPath(subURL string) urlOption { return func(f *urlFileDescriptor) { f.SubPath = subURL } } + func withOriginalNameURLOption(originalName string) urlOption { return func(f *urlFileDescriptor) { f.OriginalName = originalName } } + func withShouldRenameURLOption(fn func(relativePath string) (bool, error)) urlOption { return func(f *urlFileDescriptor) { f.ShouldRename = fn diff --git a/service/impl/attachment.go b/service/impl/attachment.go index b3e8291e..19046bac 100644 --- a/service/impl/attachment.go +++ b/service/impl/attachment.go @@ -130,7 +130,6 @@ func (a *attachmentServiceImpl) GetAttachment(ctx context.Context, attachmentID } func (a *attachmentServiceImpl) Upload(ctx context.Context, fileHeader *multipart.FileHeader) (attachmentDTO *dto.AttachmentDTO, err error) { - attachmentType := a.OptionService.GetAttachmentType(ctx) fileStorage := a.FileStorageComposite.GetFileStorage(attachmentType) diff --git a/service/impl/backup.go b/service/impl/backup.go index 58cabdc5..b6ba10f3 100644 --- a/service/impl/backup.go +++ b/service/impl/backup.go @@ -3,7 +3,6 @@ package impl import ( "context" "encoding/json" - "io" "io/fs" "mime/multipart" "os" @@ -15,7 +14,6 @@ import ( "github.com/go-sonic/sonic/config" "github.com/go-sonic/sonic/consts" "github.com/go-sonic/sonic/dal" - "github.com/go-sonic/sonic/log" "github.com/go-sonic/sonic/model/dto" "github.com/go-sonic/sonic/service" "github.com/go-sonic/sonic/util" @@ -26,13 +24,15 @@ type backupServiceImpl struct { Config *config.Config OptionService service.OptionService OneTimeTokenService service.OneTimeTokenService + ExportImportService service.ExportImport } -func NewBackUpService(config *config.Config, optionService service.OptionService, oneTimeTokenService service.OneTimeTokenService) service.BackupService { +func NewBackUpService(config *config.Config, optionService service.OptionService, oneTimeTokenService service.OneTimeTokenService, exportImportService service.ExportImport) service.BackupService { return &backupServiceImpl{ Config: config, OptionService: optionService, OneTimeTokenService: oneTimeTokenService, + ExportImportService: exportImportService, } } @@ -144,14 +144,8 @@ func (b *backupServiceImpl) ImportMarkdown(ctx context.Context, fileHeader *mult if err != nil { return xerr.NoType.Wrap(err).WithMsg("upload file error") } - bContent, err := io.ReadAll(file) - if err != nil { - return xerr.NoType.Wrap(err).WithMsg("read file error") - } - content := string(bContent) - log.Info(content) - // TODO 导入markdown - return nil + _, err = b.ExportImportService.CreateByMarkdown(ctx, fileHeader.Filename, file) + return err } func (b *backupServiceImpl) ExportData(ctx context.Context) (*dto.BackupDTO, error) { @@ -208,8 +202,11 @@ func (b *backupServiceImpl) ExportData(ctx context.Context) (*dto.BackupDTO, err } func (b *backupServiceImpl) ExportMarkdown(ctx context.Context, needFrontMatter bool) (*dto.BackupDTO, error) { - // TODO - return nil, nil + fileName, err := b.ExportImportService.ExportMarkdown(ctx, needFrontMatter) + if err != nil { + return nil, err + } + return b.buildBackupDTO(ctx, string(service.Markdown), fileName) } func (b *backupServiceImpl) buildBackupDTO(ctx context.Context, baseBackupURL string, backupFilePath string) (*dto.BackupDTO, error) { diff --git a/service/impl/category.go b/service/impl/category.go index e77cf23f..9d375174 100644 --- a/service/impl/category.go +++ b/service/impl/category.go @@ -41,6 +41,12 @@ func (c categoryServiceImpl) GetBySlug(ctx context.Context, slug string) (*entit return category, WrapDBErr(err) } +func (c categoryServiceImpl) GetByName(ctx context.Context, name string) (*entity.Category, error) { + categoryDAL := dal.GetQueryByCtx(ctx).Category + category, err := categoryDAL.WithContext(ctx).Where(categoryDAL.Name.Eq(name)).Take() + return category, WrapDBErr(err) +} + func (c categoryServiceImpl) ListCategoryWithPostCountDTO(ctx context.Context, sort *param.Sort) ([]*dto.CategoryWithPostCount, error) { categoryDAL := dal.GetQueryByCtx(ctx).Category categoryDO := categoryDAL.WithContext(ctx) @@ -273,7 +279,6 @@ func (c categoryServiceImpl) Create(ctx context.Context, categoryParam *param.Ca } func (c *categoryServiceImpl) Update(ctx context.Context, categoryParam *param.Category) (*entity.Category, error) { - executor := newCategoryUpdateExecutor(ctx) if err := executor.Update(ctx, categoryParam); err != nil { return nil, err @@ -285,7 +290,6 @@ func (c *categoryServiceImpl) Update(ctx context.Context, categoryParam *param.C } func (c categoryServiceImpl) UpdateBatch(ctx context.Context, categoryParams []*param.Category) ([]*entity.Category, error) { - executor := newCategoryUpdateExecutor(ctx) if err := executor.UpdateBatch(ctx, categoryParams); err != nil { return nil, err @@ -360,7 +364,6 @@ func (c *categoryServiceImpl) GetChildCategory(ctx context.Context, parentCatego } } return childs, nil - } type categoryUpdateExecutor struct { @@ -417,7 +420,6 @@ func (c *categoryUpdateExecutor) Update(ctx context.Context, categoryParam *para } func (c *categoryUpdateExecutor) UpdateBatch(ctx context.Context, categoryParams []*param.Category) error { - categories := make([]*entity.Category, 0) for _, categoryParam := range categoryParams { categories = append(categories, c.convertParam(categoryParam)) @@ -459,7 +461,6 @@ func (c *categoryUpdateExecutor) Delete(ctx context.Context, categoryID int32) e parent, ok := c.AllCategory[curCategory.ParentID] err := dal.Transaction(ctx, func(txCtx context.Context) error { - if ok && parent != nil { if parent.Type == consts.CategoryTypeNormal && curCategory.Type == consts.CategoryTypeIntimate { if err := c.refreshChildsType(txCtx, categoryID, consts.CategoryTypeNormal); err != nil { @@ -550,7 +551,6 @@ func (c *categoryUpdateExecutor) prepare(ctx context.Context, categoryParams []* } func (c *categoryUpdateExecutor) getChildCategory(parentCategoryID int32) []*entity.Category { - parentIDToChild := make(map[int32][]*entity.Category) for _, category := range c.AllCategory { parentIDToChild[category.ParentID] = append(parentIDToChild[category.ParentID], category) @@ -570,7 +570,6 @@ func (c *categoryUpdateExecutor) getChildCategory(parentCategoryID int32) []*ent } } return childs - } func (c *categoryUpdateExecutor) convertParam(categoryParam *param.Category) *entity.Category { @@ -689,7 +688,6 @@ func (c *categoryUpdateExecutor) refreshAllType(ctx context.Context) error { } func (c *categoryUpdateExecutor) refreshPostStatus(ctx context.Context) error { - needEncryptPostID := make([]int32, 0) needDecryptPostID := make([]int32, 0) for id, post := range c.PostMap { @@ -752,14 +750,12 @@ func (c *categoryUpdateExecutor) removePostCategory(ctx context.Context, categor } postCategoryDAL := dal.GetQueryByCtx(ctx).PostCategory err := postCategoryDAL.WithContext(ctx).Create(postCategory...) - if err != nil { return WrapDBErr(err) } _, err = postCategoryDAL.WithContext(ctx).Where(postCategoryDAL.CategoryID.Eq(categoryID)).Delete() return WrapDBErr(err) - } func (c *categoryUpdateExecutor) removeCategory(ctx context.Context, categoryID int32) error { diff --git a/service/impl/comment_base.go b/service/impl/comment_base.go index 3fd90ae3..bbed0078 100644 --- a/service/impl/comment_base.go +++ b/service/impl/comment_base.go @@ -2,6 +2,7 @@ package impl import ( "context" + "gorm.io/gen/field" "github.com/go-sonic/sonic/consts" @@ -218,7 +219,6 @@ func (b baseCommentServiceImpl) Create(ctx context.Context, comment *entity.Comm Comment: comment, }) }() - } else { go func() { b.Event.Publish(context.TODO(), &event.CommentNewEvent{ diff --git a/service/impl/export_import.go b/service/impl/export_import.go new file mode 100644 index 00000000..50dceaf6 --- /dev/null +++ b/service/impl/export_import.go @@ -0,0 +1,519 @@ +package impl + +import ( + "bytes" + "context" + "io" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" + "unicode" + + "github.com/spf13/cast" + "github.com/yuin/goldmark" + "gopkg.in/yaml.v2" + + "github.com/go-sonic/sonic/config" + "github.com/go-sonic/sonic/consts" + "github.com/go-sonic/sonic/log" + "github.com/go-sonic/sonic/model/entity" + "github.com/go-sonic/sonic/model/param" + "github.com/go-sonic/sonic/service" + "github.com/go-sonic/sonic/util" + "github.com/go-sonic/sonic/util/pageparser" + "github.com/go-sonic/sonic/util/xerr" +) + +type exportImport struct { + CategoryService service.CategoryService + PostService service.PostService + TagService service.TagService + PostTagService service.PostTagService + PostCategoryService service.PostCategoryService +} + +func NewExportImport(categoryService service.CategoryService, + postService service.PostService, + tagService service.TagService, + postTagService service.PostTagService, + postCategoryService service.PostCategoryService, +) service.ExportImport { + return &exportImport{ + CategoryService: categoryService, + PostService: postService, + TagService: tagService, + PostTagService: postTagService, + PostCategoryService: postCategoryService, + } +} + +func (e *exportImport) CreateByMarkdown(ctx context.Context, filename string, reader io.Reader) (*entity.Post, error) { + contentFrontMatter, err := pageparser.ParseFrontMatterAndContent(reader) + if err != nil { + return nil, xerr.WithMsg(err, "parse markdown failed").WithStatus(xerr.StatusInternalServerError) + } + + content, frontmatter := string(contentFrontMatter.Content), contentFrontMatter.FrontMatter + + postDate, postName, err := parseJekyllFilename(filename) + if err == nil { + content = convertJekyllContent(frontmatter, content) + frontmatter = convertJekyllMetaData(frontmatter, postName, postDate) + } + + var buf bytes.Buffer + if err := goldmark.Convert([]byte(content), &buf); err != nil { + return nil, xerr.BadParam.Wrapf(err, "convert markdown err").WithStatus(xerr.StatusBadRequest) + } + + post := param.Post{ + Status: consts.PostStatusPublished, + EditorType: consts.EditorTypeMarkdown.Ptr(), + OriginalContent: content, + Content: buf.String(), + } + + for key, value := range frontmatter { + switch key { + case "title": + post.Title = value.(string) + case "permalink": + post.Slug = value.(string) + case "slug": + post.Slug = value.(string) + case "date": + if s, ok := value.(string); ok { + date, err := cast.StringToDate(s) + if err != nil { + log.CtxWarnf(ctx, "CreateByMarkdown convert date time err=%v", err) + } else { + post.CreateTime = util.Int64Ptr(date.UnixMilli()) + } + } + case "summary": + post.Summary = value.(string) + case "draft": + if s, ok := value.(string); ok && s == "true" { + post.Status = consts.PostStatusDraft + } + if b, ok := value.(bool); ok && b { + post.Status = consts.PostStatusDraft + } + case "updated": + if s, ok := value.(string); ok { + date, err := cast.StringToDate(s) + if err != nil { + log.CtxWarnf(ctx, "CreateByMarkdown convert lastmod time err=%v", err) + } else { + post.UpdateTime = util.Int64Ptr(date.UnixMilli()) + } + } + case "lastmod": + if s, ok := value.(string); ok { + date, err := cast.StringToDate(s) + if err == nil { + post.EditTime = util.Int64Ptr(date.UnixMilli()) + } else { + log.CtxWarnf(ctx, "CreateByMarkdown convert lastmod time err=%v", err) + } + } + case "keywords": + post.MetaKeywords = value.(string) + case "comments": + if s, ok := value.(string); ok && s == "true" { + post.DisallowComment = false + } + if b, ok := value.(bool); ok && b { + post.DisallowComment = !b + } + switch s := value.(type) { + case string: + comments, err := strconv.ParseBool(s) + if err != nil { + log.CtxWarnf(ctx, "CreateByMarkdown parse comments err=%v", err) + } else { + post.DisallowComment = !comments + } + case bool: + post.DisallowComment = !s + } + case "tags": + if _, ok := value.([]any); !ok { + continue + } + + for _, v := range value.([]any) { + if _, ok := v.(string); !ok { + continue + } + tag, err := e.TagService.GetByName(ctx, v.(string)) + if err != nil && xerr.GetType(err) == xerr.NoRecord { + tag, err := e.TagService.Create(ctx, ¶m.Tag{ + Name: v.(string), + Slug: util.Slug(v.(string)), + }) + if err != nil { + post.TagIDs = append(post.TagIDs, tag.ID) + } + } else if err == nil { + post.TagIDs = append(post.TagIDs, tag.ID) + } + } + + case "categories", "category": + + switch s := value.(type) { + case string: + // example: + // --- + // categories: life + // --- + name := strings.TrimSpace(s) + category, err := e.CategoryService.GetByName(ctx, name) + switch { + case xerr.GetType(err) == xerr.NoRecord: + categoryParam := ¶m.Category{ + Name: name, + Slug: util.Slug(name), + } + category, err = e.CategoryService.Create(ctx, categoryParam) + if err != nil { + return nil, err + } + post.CategoryIDs = append(post.CategoryIDs, category.ID) + case err != nil: + return nil, err + case err == nil: + post.CategoryIDs = append(post.CategoryIDs, category.ID) + } + case []any: + // example: + // --- + // categories: + // - Development + // - VIM + // --- + // VIM is sub category of Development + var parentCategoryID int32 + for _, v := range s { + if _, ok := v.(string); !ok { + continue + } + category, err := e.CategoryService.GetByName(ctx, v.(string)) + switch { + case xerr.GetType(err) == xerr.NoRecord: + categoryParam := ¶m.Category{ + Name: v.(string), + Slug: util.Slug(v.(string)), + ParentID: parentCategoryID, + } + category, err = e.CategoryService.Create(ctx, categoryParam) + if err != nil { + return nil, err + } + post.CategoryIDs = append(post.CategoryIDs, category.ID) + parentCategoryID = category.ID + case err != nil: + return nil, err + case err == nil: + post.CategoryIDs = append(post.CategoryIDs, category.ID) + } + } + } + } + } + return e.PostService.Create(ctx, &post) +} + +func (e *exportImport) ExportMarkdown(ctx context.Context, needFrontMatter bool) (string, error) { + posts, _, err := e.PostService.Page(ctx, param.PostQuery{ + Page: param.Page{ + PageNum: 0, + PageSize: 999999, + }, + Statuses: []*consts.PostStatus{consts.PostStatusDraft.Ptr(), consts.PostStatusIntimate.Ptr(), consts.PostStatusPublished.Ptr()}, + }) + if err != nil { + return "", err + } + + backupFilename := consts.SonicBackupMarkdownPrefix + time.Now().Format("2006-01-02-15-04-05") + util.GenUUIDWithOutDash() + ".zip" + backupFilePath := config.BackupMarkdownDir + + if _, err := os.Stat(backupFilePath); os.IsNotExist(err) { + err = os.MkdirAll(backupFilePath, os.ModePerm) + if err != nil { + return "", xerr.NoType.Wrap(err).WithMsg("create dir err") + } + } else if err != nil { + return "", xerr.NoType.Wrap(err).WithMsg("get fileInfo") + } + + toBackupPaths := []string{} + for _, post := range posts { + var markdown strings.Builder + if needFrontMatter { + frontMatter, err := e.getFrontMatterYaml(ctx, post) + if err == nil { + markdown.WriteString("---\n") + markdown.WriteString(frontMatter) + markdown.WriteString("---\n") + } + } + markdown.WriteString(post.OriginalContent) + + fileName := post.CreateTime.Format("2006-01-02") + "-" + post.Slug + ".md" + file, err := os.OpenFile(filepath.Join(backupFilePath, fileName), os.O_WRONLY|os.O_CREATE, 0o666) + if err != nil { + return "", xerr.WithStatus(err, xerr.StatusInternalServerError).WithMsg("create file err") + } + _, err = file.WriteString(markdown.String()) + if err != nil { + return "", xerr.WithStatus(err, xerr.StatusInternalServerError).WithMsg("write file err") + } + toBackupPaths = append(toBackupPaths, filepath.Join(backupFilePath, fileName)) + } + + backupFile := filepath.Join(backupFilePath, backupFilename) + + err = util.ZipFile(backupFile, toBackupPaths...) + if err != nil { + return "", err + } + return backupFile, nil +} + +func (e *exportImport) getFrontMatterYaml(ctx context.Context, post *entity.Post) (string, error) { + tags, err := e.PostTagService.ListTagByPostID(ctx, post.ID) + if err != nil { + return "", err + } + categories, err := e.PostCategoryService.ListCategoryByPostID(ctx, post.ID) + if err != nil { + return "", err + } + tagsStr := make([]string, 0) + for _, tag := range tags { + tagsStr = append(tagsStr, tag.Name) + } + categoriesStr := make([]string, 0) + for _, category := range categories { + categoriesStr = append(categoriesStr, category.Name) + } + frontMatter := make(map[string]any) + frontMatter["title"] = post.Title + frontMatter["draft"] = post.Status == consts.PostStatusDraft + frontMatter["date"] = post.CreateTime.Format("2006-01-02 15:04") + frontMatter["comments"] = !post.DisallowComment + frontMatter["slug"] = post.Slug + if post.EditTime != nil { + frontMatter["lastmod"] = post.EditTime.Format("2006-01-02 15:04") + } + + if post.UpdateTime != nil && post.UpdateTime != (&time.Time{}) { + frontMatter["updated"] = post.UpdateTime.Format("2006-01-02 15:04") + } + if post.Summary != "" { + frontMatter["summary"] = post.Summary + } + if len(tagsStr) > 0 { + frontMatter["tags"] = tagsStr + } + if len(categoriesStr) > 0 { + frontMatter["categories"] = categoriesStr + } + out, err := yaml.Marshal(frontMatter) + if err != nil { + return "", xerr.WithStatus(err, xerr.StatusInternalServerError) + } + return string(out), nil +} + +func convertJekyllMetaData(metadata map[string]any, postName string, postDate time.Time) map[string]any { + for key, value := range metadata { + lowerKey := strings.ToLower(key) + + switch lowerKey { + case "layout": + delete(metadata, key) + case "permalink": + if str, ok := value.(string); ok { + metadata["url"] = str + } + case "category": + if str, ok := value.(string); ok { + metadata["categories"] = []string{str} + } + delete(metadata, key) + case "excerpt_separator": + if key != lowerKey { + delete(metadata, key) + metadata[lowerKey] = value + } + case "date": + date, err := cast.StringToDate(value.(string)) + if err == nil { + log.Errorf("convertJekyllMetaData date parse err date=%v err=%v", value.(string), err) + } else { + postDate = date + } + case "title": + postName = value.(string) + case "published": + published, err := strconv.ParseBool(value.(string)) + if err != nil { + log.Errorf("convertJekyllMetaData parse published err published=%v err=%v", value.(string), err) + } else { + delete(metadata, key) + metadata["draft"] = strconv.FormatBool(published) + } + } + } + + metadata["date"] = postDate.Format(time.RFC3339) + metadata["title"] = postName + return metadata +} + +func convertJekyllContent(metadata map[string]any, content string) string { + lines := strings.Split(content, "\n") + var resultLines []string + for _, line := range lines { + resultLines = append(resultLines, strings.Trim(line, "\r\n")) + } + + content = strings.Join(resultLines, "\n") + + excerptSep := "" + if value, ok := metadata["excerpt_separator"]; ok { + if str, strOk := value.(string); strOk { + content = strings.ReplaceAll(content, strings.TrimSpace(str), excerptSep) + } + } + + replaceList := []struct { + re *regexp.Regexp + replace string + }{ + {regexp.MustCompile("(?i)"), ""}, + {regexp.MustCompile(`\{%\s*raw\s*%\}\s*(.*?)\s*\{%\s*endraw\s*%\}`), "$1"}, + {regexp.MustCompile(`{%\s*endhighlight\s*%}`), "{{< / highlight >}}"}, + } + + for _, replace := range replaceList { + content = replace.re.ReplaceAllString(content, replace.replace) + } + + replaceListFunc := []struct { + re *regexp.Regexp + replace func(string) string + }{ + // Octopress image tag: http://octopress.org/docs/plugins/image-tag/ + {regexp.MustCompile(`{%\s+img\s*(.*?)\s*%}`), replaceImageTag}, + {regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`), replaceHighlightTag}, + } + + for _, replace := range replaceListFunc { + content = replace.re.ReplaceAllStringFunc(content, replace.replace) + } + + return content +} + +func replaceHighlightTag(match string) string { + r := regexp.MustCompile(`{%\s*highlight\s*(.*?)\s*%}`) + parts := r.FindStringSubmatch(match) + lastQuote := rune(0) + f := func(c rune) bool { + switch { + case c == lastQuote: + lastQuote = rune(0) + return false + case lastQuote != rune(0): + return false + case unicode.In(c, unicode.Quotation_Mark): + lastQuote = c + return false + default: + return unicode.IsSpace(c) + } + } + // splitting string by space but considering quoted section + items := strings.FieldsFunc(parts[1], f) + + result := bytes.NewBufferString("{{< highlight ") + result.WriteString(items[0]) // language + options := items[1:] + for i, opt := range options { + opt = strings.ReplaceAll(opt, "\"", "") + if opt == "linenos" { + opt = "linenos=table" + } + if i == 0 { + opt = " \"" + opt + } + if i < len(options)-1 { + opt += "," + } else if i == len(options)-1 { + opt += "\"" + } + result.WriteString(opt) + } + + result.WriteString(" >}}") + return result.String() +} + +func replaceImageTag(match string) string { + r := regexp.MustCompile(`{%\s+img\s*(\p{L}*)\s+([\S]*/[\S]+)\s+(\d*)\s*(\d*)\s*(.*?)\s*%}`) + result := bytes.NewBufferString("{{< figure ") + parts := r.FindStringSubmatch(match) + // Index 0 is the entire string, ignore + replaceOptionalPart(result, "class", parts[1]) + replaceOptionalPart(result, "src", parts[2]) + replaceOptionalPart(result, "width", parts[3]) + replaceOptionalPart(result, "height", parts[4]) + // title + alt + part := parts[5] + if len(part) > 0 { + splits := strings.Split(part, "'") + lenSplits := len(splits) + switch lenSplits { + case 1: + replaceOptionalPart(result, "title", splits[0]) + case 3: + replaceOptionalPart(result, "title", splits[1]) + case 5: + replaceOptionalPart(result, "title", splits[1]) + replaceOptionalPart(result, "alt", splits[3]) + } + } + result.WriteString(">}}") + return result.String() +} + +func replaceOptionalPart(buffer *bytes.Buffer, partName string, part string) { + if len(part) > 0 { + buffer.WriteString(partName + "=\"" + part + "\" ") + } +} + +func parseJekyllFilename(filename string) (time.Time, string, error) { + re := regexp.MustCompile(`(\d+-\d+-\d+)-(.+)\..*`) + r := re.FindAllStringSubmatch(filename, -1) + if len(r) == 0 { + return time.Now(), "", xerr.NoType.New("filename not match") + } + + postDate, err := time.Parse("2006-1-2", r[0][1]) + if err != nil { + return time.Now(), "", err + } + + postName := r[0][2] + + return postDate, postName, nil +} diff --git a/service/impl/init.go b/service/impl/init.go index b6daa11f..7048baa3 100644 --- a/service/impl/init.go +++ b/service/impl/init.go @@ -38,6 +38,7 @@ func init() { NewTagService, NewThemeService, NewUserService, + NewExportImport, file_storage.NewFileStorageComposite, ) } diff --git a/service/impl/option.go b/service/impl/option.go index 093db766..7b1ac89c 100644 --- a/service/impl/option.go +++ b/service/impl/option.go @@ -468,5 +468,4 @@ func (o *optionServiceImpl) GetAttachmentType(ctx context.Context) consts.Attach default: return consts.AttachmentTypeLocal } - } diff --git a/service/impl/post.go b/service/impl/post.go index 2cbedda5..6366607b 100644 --- a/service/impl/post.go +++ b/service/impl/post.go @@ -174,6 +174,12 @@ func (p postServiceImpl) ConvertParam(ctx context.Context, postParam *param.Post } else { post.EditorType = consts.EditorTypeMarkdown } + if postParam.EditTime != nil { + post.EditTime = util.TimePtr(time.UnixMilli(*postParam.EditTime)) + } + if postParam.UpdateTime != nil { + post.UpdateTime = util.TimePtr(time.UnixMilli(*postParam.UpdateTime)) + } post.WordCount = util.HtmlFormatWordCount(post.FormatContent) if postParam.Slug == "" { @@ -182,7 +188,9 @@ func (p postServiceImpl) ConvertParam(ctx context.Context, postParam *param.Post post.Slug = util.Slug(postParam.Slug) } if postParam.CreateTime != nil { - time.UnixMilli(*postParam.CreateTime) + post.CreateTime = time.UnixMilli(*postParam.CreateTime) + } else { + post.CreateTime = time.Now() } return post, nil } @@ -279,6 +287,7 @@ func (p postServiceImpl) GetPrevPosts(ctx context.Context, post *entity.Post, si } return posts, nil } + func (p postServiceImpl) GetNextPosts(ctx context.Context, post *entity.Post, size int) ([]*entity.Post, error) { postSort := p.OptionService.GetOrByDefault(ctx, property.IndexSort) postDAL := dal.GetQueryByCtx(ctx).Post diff --git a/service/impl/tag.go b/service/impl/tag.go index 43111be8..b32051c5 100644 --- a/service/impl/tag.go +++ b/service/impl/tag.go @@ -220,3 +220,9 @@ func (t tagServiceImpl) CountAllTag(ctx context.Context) (int64, error) { } return count, nil } + +func (t tagServiceImpl) GetByName(ctx context.Context, name string) (*entity.Tag, error) { + tagDAL := dal.GetQueryByCtx(ctx).Tag + tag, err := tagDAL.WithContext(ctx).Where(tagDAL.Name.Eq(name)).First() + return tag, WrapDBErr(err) +} diff --git a/service/tag.go b/service/tag.go index b3845f32..0a2800bf 100644 --- a/service/tag.go +++ b/service/tag.go @@ -13,6 +13,7 @@ type TagService interface { ListByIDs(ctx context.Context, tagIDs []int32) ([]*entity.Tag, error) GetByID(ctx context.Context, id int32) (*entity.Tag, error) GetBySlug(ctx context.Context, slug string) (*entity.Tag, error) + GetByName(ctx context.Context, name string) (*entity.Tag, error) ConvertToDTO(ctx context.Context, tag *entity.Tag) (*dto.Tag, error) ConvertToDTOs(ctx context.Context, tags []*entity.Tag) ([]*dto.Tag, error) Create(ctx context.Context, tagParam *param.Tag) (*entity.Tag, error) diff --git a/util/conv.go b/util/conv.go index f43348f9..9fc8887c 100644 --- a/util/conv.go +++ b/util/conv.go @@ -29,10 +29,15 @@ func TimePtr(t time.Time) *time.Time { func BoolPtr(b bool) *bool { return &b } + func Int32Ptr(i int32) *int32 { return &i } +func Int64Ptr(i int64) *int64 { + return &i +} + func StringPtr(s string) *string { return &s } diff --git a/util/gin.go b/util/gin.go index 7054ab3f..0fa52ec1 100644 --- a/util/gin.go +++ b/util/gin.go @@ -57,6 +57,7 @@ func MustGetQueryInt64(ctx *gin.Context, key string) (int64, error) { } return value, nil } + func MustGetQueryInt(ctx *gin.Context, key string) (int, error) { str, ok := ctx.GetQuery(key) if !ok { diff --git a/util/pageparser/item.go b/util/pageparser/item.go new file mode 100644 index 00000000..2083be70 --- /dev/null +++ b/util/pageparser/item.go @@ -0,0 +1,221 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +import ( + "bytes" + "fmt" + "regexp" + "strconv" + + "github.com/yuin/goldmark/util" +) + +type lowHigh struct { + Low int + High int +} + +type Item struct { + Type ItemType + Err error + + // The common case is a single segment. + low int + high int + + // This is the uncommon case. + segments []lowHigh + + // Used for validation. + firstByte byte + + isString bool +} + +type Items []Item + +func (i Item) Pos() int { + if len(i.segments) > 0 { + return i.segments[0].Low + } + return i.low +} + +func (i Item) Val(source []byte) []byte { + if len(i.segments) == 0 { + return source[i.low:i.high] + } + + if len(i.segments) == 1 { + return source[i.segments[0].Low:i.segments[0].High] + } + + var b bytes.Buffer + for _, s := range i.segments { + b.Write(source[s.Low:s.High]) + } + return b.Bytes() +} + +func (i Item) ValStr(source []byte) string { + return string(i.Val(source)) +} + +func (i Item) ValTyped(source []byte) any { + str := i.ValStr(source) + if i.isString { + // A quoted value that is a string even if it looks like a number etc. + return str + } + + if boolRe.MatchString(str) { + return str == "true" + } + + if intRe.MatchString(str) { + num, err := strconv.Atoi(str) + if err != nil { + return str + } + return num + } + + if floatRe.MatchString(str) { + num, err := strconv.ParseFloat(str, 64) + if err != nil { + return str + } + return num + } + + return str +} + +func (i Item) IsText() bool { + return i.Type == tText || i.Type == tIndentation +} + +func (i Item) IsIndentation() bool { + return i.Type == tIndentation +} + +func (i Item) IsNonWhitespace(source []byte) bool { + return len(bytes.TrimSpace(i.Val(source))) > 0 +} + +func (i Item) IsShortcodeName() bool { + return i.Type == tScName +} + +func (i Item) IsInlineShortcodeName() bool { + return i.Type == tScNameInline +} + +func (i Item) IsLeftShortcodeDelim() bool { + return i.Type == tLeftDelimScWithMarkup || i.Type == tLeftDelimScNoMarkup +} + +func (i Item) IsRightShortcodeDelim() bool { + return i.Type == tRightDelimScWithMarkup || i.Type == tRightDelimScNoMarkup +} + +func (i Item) IsShortcodeClose() bool { + return i.Type == tScClose +} + +func (i Item) IsShortcodeParam() bool { + return i.Type == tScParam +} + +func (i Item) IsShortcodeParamVal() bool { + return i.Type == tScParamVal +} + +func (i Item) IsShortcodeMarkupDelimiter() bool { + return i.Type == tLeftDelimScWithMarkup || i.Type == tRightDelimScWithMarkup +} + +func (i Item) IsFrontMatter() bool { + return i.Type >= TypeFrontMatterYAML && i.Type <= TypeFrontMatterORG +} + +func (i Item) IsDone() bool { + return i.Type == tError || i.Type == tEOF +} + +func (i Item) IsEOF() bool { + return i.Type == tEOF +} + +func (i Item) IsError() bool { + return i.Type == tError +} + +func (i Item) ToString(source []byte) string { + val := i.Val(source) + switch { + case i.Type == tEOF: + return "EOF" + case i.Type == tError: + return string(val) + case i.Type == tIndentation: + return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(val)) + case i.Type > tKeywordMarker: + return fmt.Sprintf("<%s>", val) + case len(val) > 50: + return fmt.Sprintf("%v:%.20q...", i.Type, val) + } + return fmt.Sprintf("%v:[%s]", i.Type, val) +} + +type ItemType int + +const ( + tError ItemType = iota + tEOF + + // page items + TypeLeadSummaryDivider // , # more + TypeFrontMatterYAML + TypeFrontMatterTOML + TypeFrontMatterJSON + TypeFrontMatterORG + TypeEmoji + TypeIgnore // // The BOM Unicode byte order marker and possibly others + + // shortcode items + tLeftDelimScNoMarkup + tRightDelimScNoMarkup + tLeftDelimScWithMarkup + tRightDelimScWithMarkup + tScClose + tScName + tScNameInline + tScParam + tScParamVal + + tIndentation + + tText // plain text + + // preserved for later - keywords come after this + tKeywordMarker +) + +var ( + boolRe = regexp.MustCompile(`^(true$)|(false$)`) + intRe = regexp.MustCompile(`^[-+]?\d+$`) + floatRe = regexp.MustCompile(`^[-+]?\d*\.\d+$`) +) diff --git a/util/pageparser/itemtype_string.go b/util/pageparser/itemtype_string.go new file mode 100644 index 00000000..b0b849ad --- /dev/null +++ b/util/pageparser/itemtype_string.go @@ -0,0 +1,43 @@ +// Code generated by "stringer -type ItemType"; DO NOT EDIT. + +package pageparser + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[tError-0] + _ = x[tEOF-1] + _ = x[TypeLeadSummaryDivider-2] + _ = x[TypeFrontMatterYAML-3] + _ = x[TypeFrontMatterTOML-4] + _ = x[TypeFrontMatterJSON-5] + _ = x[TypeFrontMatterORG-6] + _ = x[TypeEmoji-7] + _ = x[TypeIgnore-8] + _ = x[tLeftDelimScNoMarkup-9] + _ = x[tRightDelimScNoMarkup-10] + _ = x[tLeftDelimScWithMarkup-11] + _ = x[tRightDelimScWithMarkup-12] + _ = x[tScClose-13] + _ = x[tScName-14] + _ = x[tScNameInline-15] + _ = x[tScParam-16] + _ = x[tScParamVal-17] + _ = x[tIndentation-18] + _ = x[tText-19] + _ = x[tKeywordMarker-20] +} + +const _ItemType_name = "tErrortEOFTypeLeadSummaryDividerTypeFrontMatterYAMLTypeFrontMatterTOMLTypeFrontMatterJSONTypeFrontMatterORGTypeEmojiTypeIgnoretLeftDelimScNoMarkuptRightDelimScNoMarkuptLeftDelimScWithMarkuptRightDelimScWithMarkuptScClosetScNametScNameInlinetScParamtScParamValtIndentationtTexttKeywordMarker" + +var _ItemType_index = [...]uint16{0, 6, 10, 32, 51, 70, 89, 107, 116, 126, 146, 167, 189, 212, 220, 227, 240, 248, 259, 271, 276, 290} + +func (i ItemType) String() string { + if i < 0 || i >= ItemType(len(_ItemType_index)-1) { + return "ItemType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ItemType_name[_ItemType_index[i]:_ItemType_index[i+1]] +} diff --git a/util/pageparser/metadecoders/decoder.go b/util/pageparser/metadecoders/decoder.go new file mode 100644 index 00000000..525f2e89 --- /dev/null +++ b/util/pageparser/metadecoders/decoder.go @@ -0,0 +1,261 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metadecoders + +import ( + "bytes" + "encoding/csv" + "encoding/json" + "fmt" + "strings" + + xml "github.com/clbanning/mxj/v2" + toml "github.com/pelletier/go-toml/v2" + "github.com/spf13/afero" + "github.com/spf13/cast" + yaml "gopkg.in/yaml.v2" +) + +// Decoder provides some configuration options for the decoders. +type Decoder struct { + // Delimiter is the field delimiter used in the CSV decoder. It defaults to ','. + Delimiter rune + + // Comment, if not 0, is the comment character ued in the CSV decoder. Lines beginning with the + // Comment character without preceding whitespace are ignored. + Comment rune +} + +// OptionsKey is used in cache keys. +func (d Decoder) OptionsKey() string { + var sb strings.Builder + sb.WriteRune(d.Delimiter) + sb.WriteRune(d.Comment) + return sb.String() +} + +// Default is a Decoder in its default configuration. +var Default = Decoder{ + Delimiter: ',', +} + +// UnmarshalToMap will unmarshall data in format f into a new map. This is +// what's needed for Hugo's front matter decoding. +func (d Decoder) UnmarshalToMap(data []byte, f Format) (map[string]any, error) { + m := make(map[string]any) + if data == nil { + return m, nil + } + + err := d.UnmarshalTo(data, f, &m) + + return m, err +} + +// UnmarshalFileToMap is the same as UnmarshalToMap, but reads the data from +// the given filename. +func (d Decoder) UnmarshalFileToMap(fs afero.Fs, filename string) (map[string]any, error) { + format := FormatFromString(filename) + if format == "" { + return nil, fmt.Errorf("%q is not a valid configuration format", filename) + } + + data, err := afero.ReadFile(fs, filename) + if err != nil { + return nil, err + } + return d.UnmarshalToMap(data, format) +} + +// UnmarshalStringTo tries to unmarshal data to a new instance of type typ. +func (d Decoder) UnmarshalStringTo(data string, typ any) (any, error) { + data = strings.TrimSpace(data) + // We only check for the possible types in YAML, JSON and TOML. + switch typ.(type) { + case string: + return data, nil + case map[string]any: + format := d.FormatFromContentString(data) + return d.UnmarshalToMap([]byte(data), format) + case []any: + // A standalone slice. Let YAML handle it. + return d.Unmarshal([]byte(data), YAML) + case bool: + return cast.ToBoolE(data) + case int: + return cast.ToIntE(data) + case int64: + return cast.ToInt64E(data) + case float64: + return cast.ToFloat64E(data) + default: + return nil, fmt.Errorf("unmarshal: %T not supported", typ) + } +} + +// Unmarshal will unmarshall data in format f into an interface{}. +// This is what's needed for Hugo's /data handling. +func (d Decoder) Unmarshal(data []byte, f Format) (any, error) { + if data == nil { + switch f { + case CSV: + return make([][]string, 0), nil + default: + return make(map[string]any), nil + } + } + var v any + err := d.UnmarshalTo(data, f, &v) + + return v, err +} + +// UnmarshalTo unmarshals data in format f into v. +func (d Decoder) UnmarshalTo(data []byte, f Format, v any) error { + var err error + + switch f { + case JSON: + err = json.Unmarshal(data, v) + case XML: + var xmlRoot xml.Map + xmlRoot, err = xml.NewMapXml(data) + + var xmlValue map[string]any + if err == nil { + xmlRootName, err := xmlRoot.Root() + if err != nil { + return err + } + xmlValue = xmlRoot[xmlRootName].(map[string]any) + } + + switch v := v.(type) { + case *map[string]any: + *v = xmlValue + case *any: + *v = xmlValue + } + case TOML: + err = toml.Unmarshal(data, v) + case YAML: + err = yaml.Unmarshal(data, v) + if err != nil { + return err + } + + // To support boolean keys, the YAML package unmarshals maps to + // map[interface{}]interface{}. Here we recurse through the result + // and change all maps to map[string]interface{} like we would've + // gotten from `json`. + var ptr any + switch i := v.(type) { + case *map[string]any: + ptr = *i + case *any: + ptr = *i + default: + // Not a map. + } + + if ptr != nil { + if mm, changed := stringifyMapKeys(ptr); changed { + switch i := v.(type) { + case *map[string]any: + *i = mm.(map[string]any) + case *any: + *i = mm + } + } + } + case CSV: + return d.unmarshalCSV(data, v) + + default: + return fmt.Errorf("unmarshal of format %q is not supported", f) + } + + if err == nil { + return nil + } + + return fmt.Errorf("unmarshal failed: %w", err) +} + +func (d Decoder) unmarshalCSV(data []byte, v any) error { + r := csv.NewReader(bytes.NewReader(data)) + r.Comma = d.Delimiter + r.Comment = d.Comment + + records, err := r.ReadAll() + if err != nil { + return err + } + + switch i := v.(type) { + case *any: + *i = records + default: + return fmt.Errorf("CSV cannot be unmarshaled into %T", v) + } + + return nil +} + +// stringifyMapKeys recurses into in and changes all instances of +// map[interface{}]interface{} to map[string]interface{}. This is useful to +// work around the impedance mismatch between JSON and YAML unmarshaling that's +// described here: https://github.com/go-yaml/yaml/issues/139 +// +// Inspired by https://github.com/stripe/stripe-mock, MIT licensed +func stringifyMapKeys(in any) (any, bool) { + switch in := in.(type) { + case []any: + for i, v := range in { + if vv, replaced := stringifyMapKeys(v); replaced { + in[i] = vv + } + } + case map[string]any: + for k, v := range in { + if vv, changed := stringifyMapKeys(v); changed { + in[k] = vv + } + } + case map[any]any: + res := make(map[string]any) + var ( + ok bool + err error + ) + for k, v := range in { + var ks string + + if ks, ok = k.(string); !ok { + ks, err = cast.ToStringE(k) + if err != nil { + ks = fmt.Sprintf("%v", k) + } + } + if vv, replaced := stringifyMapKeys(v); replaced { + res[ks] = vv + } else { + res[ks] = v + } + } + return res, true + } + + return nil, false +} diff --git a/util/pageparser/metadecoders/format.go b/util/pageparser/metadecoders/format.go new file mode 100644 index 00000000..c28d54d8 --- /dev/null +++ b/util/pageparser/metadecoders/format.go @@ -0,0 +1,104 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metadecoders + +import ( + "path/filepath" + "strings" +) + +type Format string + +const ( + // These are the supported metdata formats in Hugo. Most of these are also + // supported as /data formats. + ORG Format = "org" + JSON Format = "json" + TOML Format = "toml" + YAML Format = "yaml" + CSV Format = "csv" + XML Format = "xml" +) + +// FormatFromString turns formatStr, typically a file extension without any ".", +// into a Format. It returns an empty string for unknown formats. +func FormatFromString(formatStr string) Format { + formatStr = strings.ToLower(formatStr) + if strings.Contains(formatStr, ".") { + // Assume a filename + formatStr = strings.TrimPrefix(filepath.Ext(formatStr), ".") + } + switch formatStr { + case "yaml", "yml": + return YAML + case "json": + return JSON + case "toml": + return TOML + case "org": + return ORG + case "csv": + return CSV + case "xml": + return XML + } + + return "" +} + +// FormatFromContentString tries to detect the format (JSON, YAML, TOML or XML) +// in the given string. +// It return an empty string if no format could be detected. +func (d Decoder) FormatFromContentString(data string) Format { + csvIdx := strings.IndexRune(data, d.Delimiter) + jsonIdx := strings.Index(data, "{") + yamlIdx := strings.Index(data, ":") + xmlIdx := strings.Index(data, "<") + tomlIdx := strings.Index(data, "=") + + if isLowerIndexThan(csvIdx, jsonIdx, yamlIdx, xmlIdx, tomlIdx) { + return CSV + } + + if isLowerIndexThan(jsonIdx, yamlIdx, xmlIdx, tomlIdx) { + return JSON + } + + if isLowerIndexThan(yamlIdx, xmlIdx, tomlIdx) { + return YAML + } + + if isLowerIndexThan(xmlIdx, tomlIdx) { + return XML + } + + if tomlIdx != -1 { + return TOML + } + + return "" +} + +func isLowerIndexThan(first int, others ...int) bool { + if first == -1 { + return false + } + for _, other := range others { + if other != -1 && other < first { + return false + } + } + + return true +} diff --git a/util/pageparser/pagelexer.go b/util/pageparser/pagelexer.go new file mode 100644 index 00000000..d55bcef5 --- /dev/null +++ b/util/pageparser/pagelexer.go @@ -0,0 +1,590 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +import ( + "bytes" + "fmt" + "unicode" + "unicode/utf8" +) + +const eof = -1 + +// returns the next state in scanner. +type stateFunc func(*pageLexer) stateFunc + +type pageLexer struct { + input []byte + stateStart stateFunc + state stateFunc + pos int // input position + start int // item start position + width int // width of last element + + // Contains lexers for shortcodes and other main section + // elements. + sectionHandlers *sectionHandlers + + cfg Config + + // The summary divider to look for. + summaryDivider []byte + // Set when we have parsed any summary divider + summaryDividerChecked bool + // Whether we're in a HTML comment. + isInHTMLComment bool + + lexerShortcodeState + + // items delivered to client + items Items +} + +// Implement the Result interface +func (l *pageLexer) Iterator() *Iterator { + return NewIterator(l.items) +} + +func (l *pageLexer) Input() []byte { + return l.input +} + +type Config struct { + EnableEmoji bool +} + +// note: the input position here is normally 0 (start), but +// can be set if position of first shortcode is known +func newPageLexer(input []byte, stateStart stateFunc, cfg Config) *pageLexer { + lexer := &pageLexer{ + input: input, + stateStart: stateStart, + cfg: cfg, + lexerShortcodeState: lexerShortcodeState{ + currLeftDelimItem: tLeftDelimScNoMarkup, + currRightDelimItem: tRightDelimScNoMarkup, + openShortcodes: make(map[string]bool), + }, + items: make([]Item, 0, 5), + } + + lexer.sectionHandlers = createSectionHandlers(lexer) + + return lexer +} + +// main loop +func (l *pageLexer) run() *pageLexer { + for l.state = l.stateStart; l.state != nil; { + l.state = l.state(l) + } + return l +} + +// Page syntax +var ( + byteOrderMark = '\ufeff' + summaryDivider = []byte("") + summaryDividerOrg = []byte("# more") + delimTOML = []byte("+++") + delimYAML = []byte("---") + delimOrg = []byte("#+") + htmlCommentStart = []byte("") + + emojiDelim = byte(':') +) + +func (l *pageLexer) next() rune { + if l.pos >= len(l.input) { + l.width = 0 + return eof + } + + runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:]) + l.width = runeWidth + l.pos += l.width + + return runeValue +} + +// peek, but no consume +func (l *pageLexer) peek() rune { + r := l.next() + l.backup() + return r +} + +// steps back one +func (l *pageLexer) backup() { + l.pos -= l.width +} + +func (l *pageLexer) append(item Item) { + if item.Pos() < len(l.input) { + item.firstByte = l.input[item.Pos()] + } + l.items = append(l.items, item) +} + +// sends an item back to the client. +func (l *pageLexer) emit(t ItemType) { + defer func() { + l.start = l.pos + }() + + if t == tText { + // Identify any trailing whitespace/intendation. + // We currently only care about the last one. + Loop: + for i := l.pos - 1; i >= l.start; i-- { + b := l.input[i] + if b != ' ' && b != '\t' && b != '\r' && b != '\n' { + break + } + switch { + case i == l.start && b != '\n': + l.append(Item{Type: tIndentation, low: l.start, high: l.pos}) + return + case b == '\n' && i < l.pos-1: + l.append(Item{Type: t, low: l.start, high: i + 1}) + l.append(Item{Type: tIndentation, low: i + 1, high: l.pos}) + return + case b == '\n' && i == l.pos-1: + break Loop + } + switch { + case i == l.start && b != '\n': + l.append(Item{Type: tIndentation, low: l.start, high: l.pos}) + return + case b == '\n' && i < l.pos-1: + l.append(Item{Type: t, low: l.start, high: i + 1}) + l.append(Item{Type: tIndentation, low: i + 1, high: l.pos}) + return + case b == '\n' && i == l.pos-1: + break Loop + } + } + } + + l.append(Item{Type: t, low: l.start, high: l.pos}) +} + +// sends a string item back to the client. +func (l *pageLexer) emitString(t ItemType) { + l.append(Item{Type: t, low: l.start, high: l.pos, isString: true}) + l.start = l.pos +} + +func (l *pageLexer) isEOF() bool { + return l.pos >= len(l.input) +} + +// special case, do not send '\\' back to client +func (l *pageLexer) ignoreEscapesAndEmit(t ItemType) { + i := l.start + k := i + + var segments []lowHigh + + for i < l.pos { + r, w := utf8.DecodeRune(l.input[i:l.pos]) + if r == '\\' { + if i > k { + segments = append(segments, lowHigh{k, i}) + } + // See issue #10236. + // We don't send the backslash back to the client, + // which makes the end parsing simpler. + // This means that we cannot render the AST back to be + // exactly the same as the input, + // but that was also the situation before we introduced the issue in #10236. + k = i + w + } + i += w + } + + if k < l.pos { + segments = append(segments, lowHigh{k, l.pos}) + } + + if len(segments) > 0 { + l.append(Item{Type: t, segments: segments}) + } + + l.start = l.pos +} + +// gets the current value (for debugging and error handling) +func (l *pageLexer) current() []byte { + return l.input[l.start:l.pos] +} + +// ignore current element +func (l *pageLexer) ignore() { + l.start = l.pos +} + +var lf = []byte("\n") + +// nil terminates the parser +func (l *pageLexer) errorf(format string, args ...any) stateFunc { + l.append(Item{Type: tError, Err: fmt.Errorf(format, args...)}) + return nil +} + +func (l *pageLexer) consumeCRLF() bool { + var consumed bool + for _, r := range crLf { + if l.next() != r { + l.backup() + } else { + consumed = true + } + } + return consumed +} + +func (l *pageLexer) consumeToNextLine() { + for { + r := l.next() + if r == eof || isEndOfLine(r) { + return + } + } +} + +func (l *pageLexer) consumeToSpace() { + for { + r := l.next() + if r == eof || unicode.IsSpace(r) { + l.backup() + return + } + } +} + +func (l *pageLexer) consumeSpace() { + for { + r := l.next() + if r == eof || !unicode.IsSpace(r) { + l.backup() + return + } + } +} + +// lex a string starting at ":" +func lexEmoji(l *pageLexer) stateFunc { + pos := l.pos + 1 + valid := false + + for i := pos; i < len(l.input); i++ { + if i > pos && l.input[i] == emojiDelim { + pos = i + 1 + valid = true + break + } + r, _ := utf8.DecodeRune(l.input[i:]) + if !(isAlphaNumericOrHyphen(r) || r == '+') { + break + } + } + + if valid { + l.pos = pos + l.emit(TypeEmoji) + } else { + l.pos++ + l.emit(tText) + } + + return lexMainSection +} + +type sectionHandlers struct { + l *pageLexer + + // Set when none of the sections are found so we + // can safely stop looking and skip to the end. + skipAll bool + + handlers []*sectionHandler + skipIndexes []int +} + +func (s *sectionHandlers) skip() int { + if s.skipAll { + return -1 + } + + s.skipIndexes = s.skipIndexes[:0] + var shouldSkip bool + for _, skipper := range s.handlers { + idx := skipper.skip() + if idx != -1 { + shouldSkip = true + s.skipIndexes = append(s.skipIndexes, idx) + } + } + + if !shouldSkip { + s.skipAll = true + return -1 + } + + return minIndex(s.skipIndexes...) +} + +func createSectionHandlers(l *pageLexer) *sectionHandlers { + shortCodeHandler := §ionHandler{ + l: l, + skipFunc: func(l *pageLexer) int { + return l.index(leftDelimSc) + }, + lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) { + if !l.isShortCodeStart() { + return origin, false + } + + if l.isInline { + // If we're inside an inline shortcode, the only valid shortcode markup is + // the markup which closes it. + b := l.input[l.pos+3:] + end := indexNonWhiteSpace(b, '/') + if end != len(l.input)-1 { + b = bytes.TrimSpace(b[end+1:]) + if end == -1 || !bytes.HasPrefix(b, []byte(l.currShortcodeName+" ")) { + return l.errorf("inline shortcodes do not support nesting"), true + } + } + } + + if l.hasPrefix(leftDelimScWithMarkup) { + l.currLeftDelimItem = tLeftDelimScWithMarkup + l.currRightDelimItem = tRightDelimScWithMarkup + } else { + l.currLeftDelimItem = tLeftDelimScNoMarkup + l.currRightDelimItem = tRightDelimScNoMarkup + } + + return lexShortcodeLeftDelim, true + }, + } + + summaryDividerHandler := §ionHandler{ + l: l, + skipFunc: func(l *pageLexer) int { + if l.summaryDividerChecked || l.summaryDivider == nil { + return -1 + } + return l.index(l.summaryDivider) + }, + lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) { + if !l.hasPrefix(l.summaryDivider) { + return origin, false + } + + l.summaryDividerChecked = true + l.pos += len(l.summaryDivider) + // This makes it a little easier to reason about later. + l.consumeSpace() + l.emit(TypeLeadSummaryDivider) + + return origin, true + }, + } + + handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler} + + if l.cfg.EnableEmoji { + emojiHandler := §ionHandler{ + l: l, + skipFunc: func(l *pageLexer) int { + return l.indexByte(emojiDelim) + }, + lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) { + return lexEmoji, true + }, + } + + handlers = append(handlers, emojiHandler) + } + + return §ionHandlers{ + l: l, + handlers: handlers, + skipIndexes: make([]int, len(handlers)), + } +} + +func (s *sectionHandlers) lex(origin stateFunc) stateFunc { + if s.skipAll { + return nil + } + + if s.l.pos > s.l.start { + s.l.emit(tText) + } + + for _, handler := range s.handlers { + if handler.skipAll { + continue + } + + next, handled := handler.lexFunc(origin, handler.l) + if next == nil || handled { + return next + } + } + + // Not handled by the above. + s.l.pos++ + + return origin +} + +type sectionHandler struct { + l *pageLexer + + // No more sections of this type. + skipAll bool + + // Returns the index of the next match, -1 if none found. + skipFunc func(l *pageLexer) int + + // Lex lexes the current section and returns the next state func and + // a bool telling if this section was handled. + // Note that returning nil as the next state will terminate the + // lexer. + lexFunc func(origin stateFunc, l *pageLexer) (stateFunc, bool) +} + +func (s *sectionHandler) skip() int { + if s.skipAll { + return -1 + } + + idx := s.skipFunc(s.l) + if idx == -1 { + s.skipAll = true + } + return idx +} + +func lexMainSection(l *pageLexer) stateFunc { + if l.isEOF() { + return lexDone + } + + if l.isInHTMLComment { + return lexEndFrontMatterHTMLComment + } + + // Fast forward as far as possible. + skip := l.sectionHandlers.skip() + + if skip == -1 { + l.pos = len(l.input) + return lexDone + } else if skip > 0 { + l.pos += skip + } + + next := l.sectionHandlers.lex(lexMainSection) + if next != nil { + return next + } + + l.pos = len(l.input) + return lexDone +} + +func lexDone(l *pageLexer) stateFunc { + // Done! + if l.pos > l.start { + l.emit(tText) + } + l.emit(tEOF) + return nil +} + +// state helpers + +func (l *pageLexer) index(sep []byte) int { + return bytes.Index(l.input[l.pos:], sep) +} + +func (l *pageLexer) indexByte(sep byte) int { + return bytes.IndexByte(l.input[l.pos:], sep) +} + +func (l *pageLexer) hasPrefix(prefix []byte) bool { + return bytes.HasPrefix(l.input[l.pos:], prefix) +} + +// helper functions + +// returns the min index >= 0 +func minIndex(indices ...int) int { + min := -1 + + for _, j := range indices { + if j < 0 { + continue + } + if min == -1 { + min = j + } else if j < min { + min = j + } + } + return min +} + +func indexNonWhiteSpace(s []byte, in rune) int { + idx := bytes.IndexFunc(s, func(r rune) bool { + return !unicode.IsSpace(r) + }) + + if idx == -1 { + return -1 + } + + r, _ := utf8.DecodeRune(s[idx:]) + if r == in { + return idx + } + return -1 +} + +func isSpace(r rune) bool { + return r == ' ' || r == '\t' +} + +func isAlphaNumericOrHyphen(r rune) bool { + // let unquoted YouTube ids as positional params slip through (they contain hyphens) + return isAlphaNumeric(r) || r == '-' +} + +var crLf = []rune{'\r', '\n'} + +func isEndOfLine(r rune) bool { + return r == '\r' || r == '\n' +} + +func isAlphaNumeric(r rune) bool { + return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) +} diff --git a/util/pageparser/pagelexer_intro.go b/util/pageparser/pagelexer_intro.go new file mode 100644 index 00000000..6ce04723 --- /dev/null +++ b/util/pageparser/pagelexer_intro.go @@ -0,0 +1,186 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +func lexIntroSection(l *pageLexer) stateFunc { + l.summaryDivider = summaryDivider + +LOOP: + for { + r := l.next() + if r == eof { + break + } + + switch { + case r == '+': + return l.lexFrontMatterSection(TypeFrontMatterTOML, r, "TOML", delimTOML) + case r == '-': + return l.lexFrontMatterSection(TypeFrontMatterYAML, r, "YAML", delimYAML) + case r == '{': + return lexFrontMatterJSON + case r == '#': + return lexFrontMatterOrgMode + case r == byteOrderMark: + l.emit(TypeIgnore) + case !isSpace(r) && !isEndOfLine(r): + if r == '<' { + l.backup() + if l.hasPrefix(htmlCommentStart) { + // This may be commented out front matter, which should + // still be read. + l.consumeToNextLine() + l.isInHTMLComment = true + l.emit(TypeIgnore) + continue LOOP + } else { + return l.errorf("plain HTML documents not supported") + } + } + break LOOP + } + } + + // Now move on to the shortcodes. + return lexMainSection +} + +func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc { + l.isInHTMLComment = false + right := l.index(htmlCommentEnd) + if right == -1 { + return l.errorf("starting HTML comment with no end") + } + l.pos += right + len(htmlCommentEnd) + l.emit(TypeIgnore) + + // Now move on to the shortcodes. + return lexMainSection +} + +func lexFrontMatterJSON(l *pageLexer) stateFunc { + // Include the left delimiter + l.backup() + + var ( + inQuote bool + level int + ) + + for { + r := l.next() + + switch { + case r == eof: + return l.errorf("unexpected EOF parsing JSON front matter") + case r == '{': + if !inQuote { + level++ + } + case r == '}': + if !inQuote { + level-- + } + case r == '"': + inQuote = !inQuote + case r == '\\': + // This may be an escaped quote. Make sure it's not marked as a + // real one. + l.next() + } + + if level == 0 { + break + } + } + + l.consumeCRLF() + l.emit(TypeFrontMatterJSON) + + return lexMainSection +} + +func lexFrontMatterOrgMode(l *pageLexer) stateFunc { + /* + #+TITLE: Test File For chaseadamsio/goorgeous + #+AUTHOR: Chase Adams + #+DESCRIPTION: Just another golang parser for org content! + */ + + l.summaryDivider = summaryDividerOrg + + l.backup() + + if !l.hasPrefix(delimOrg) { + return lexMainSection + } + + // Read lines until we no longer see a #+ prefix +LOOP: + for { + r := l.next() + + switch { + case r == '\n': + if !l.hasPrefix(delimOrg) { + break LOOP + } + case r == eof: + break LOOP + } + } + + l.emit(TypeFrontMatterORG) + + return lexMainSection +} + +// Handle YAML or TOML front matter. +func (l *pageLexer) lexFrontMatterSection(tp ItemType, delimr rune, name string, delim []byte) stateFunc { + for i := 0; i < 2; i++ { + if r := l.next(); r != delimr { + return l.errorf("invalid %s delimiter", name) + } + } + + // Let front matter start at line 1 + wasEndOfLine := l.consumeCRLF() + // We don't care about the delimiters. + l.ignore() + + var r rune + + for { + if !wasEndOfLine { + r = l.next() + if r == eof { + return l.errorf("EOF looking for end %s front matter delimiter", name) + } + } + + if wasEndOfLine || isEndOfLine(r) { + if l.hasPrefix(delim) { + l.emit(tp) + l.pos += 3 + l.consumeCRLF() + l.ignore() + break + } + } + + wasEndOfLine = false + } + + return lexMainSection +} diff --git a/util/pageparser/pagelexer_shortcode.go b/util/pageparser/pagelexer_shortcode.go new file mode 100644 index 00000000..7e973e20 --- /dev/null +++ b/util/pageparser/pagelexer_shortcode.go @@ -0,0 +1,362 @@ +// Copyright 2018 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +type lexerShortcodeState struct { + currLeftDelimItem ItemType + currRightDelimItem ItemType + isInline bool + currShortcodeName string // is only set when a shortcode is in opened state + closingState int // > 0 = on its way to be closed + elementStepNum int // step number in element + paramElements int // number of elements (name + value = 2) found first + openShortcodes map[string]bool // set of shortcodes in open state +} + +// Shortcode syntax +var ( + leftDelimSc = []byte("{{") + leftDelimScNoMarkup = []byte("{{<") + rightDelimScNoMarkup = []byte(">}}") + leftDelimScWithMarkup = []byte("{{%") + rightDelimScWithMarkup = []byte("%}}") + leftComment = []byte("/*") // comments in this context us used to to mark shortcodes as "not really a shortcode" + rightComment = []byte("*/") +) + +func (l *pageLexer) isShortCodeStart() bool { + return l.hasPrefix(leftDelimScWithMarkup) || l.hasPrefix(leftDelimScNoMarkup) +} + +func lexShortcodeLeftDelim(l *pageLexer) stateFunc { + l.pos += len(l.currentLeftShortcodeDelim()) + if l.hasPrefix(leftComment) { + return lexShortcodeComment + } + l.emit(l.currentLeftShortcodeDelimItem()) + l.elementStepNum = 0 + l.paramElements = 0 + return lexInsideShortcode +} + +func lexShortcodeComment(l *pageLexer) stateFunc { + posRightComment := l.index(append(rightComment, l.currentRightShortcodeDelim()...)) + if posRightComment <= 1 { + return l.errorf("comment must be closed") + } + // we emit all as text, except the comment markers + l.emit(tText) + l.pos += len(leftComment) + l.ignore() + l.pos += posRightComment - len(leftComment) + l.emit(tText) + l.pos += len(rightComment) + l.ignore() + l.pos += len(l.currentRightShortcodeDelim()) + l.emit(tText) + return lexMainSection +} + +func lexShortcodeRightDelim(l *pageLexer) stateFunc { + l.closingState = 0 + l.pos += len(l.currentRightShortcodeDelim()) + l.emit(l.currentRightShortcodeDelimItem()) + return lexMainSection +} + +// either: +// 1. param +// 2. "param" or "param\" +// 3. param="123" or param="123\" +// 4. param="Some \"escaped\" text" +// 5. `param` +// 6. param=`123` +func lexShortcodeParam(l *pageLexer, escapedQuoteStart bool) stateFunc { + first := true + nextEq := false + + var r rune + + for { + r = l.next() + if first { + if r == '"' || (r == '`' && !escapedQuoteStart) { + // a positional param with quotes + if l.paramElements == 2 { + return l.errorf("got quoted positional parameter. Cannot mix named and positional parameters") + } + l.paramElements = 1 + l.backup() + if r == '"' { + return lexShortcodeQuotedParamVal(l, !escapedQuoteStart, tScParam) + } + return lexShortCodeParamRawStringVal(l, tScParam) + } else if r == '`' && escapedQuoteStart { + return l.errorf("unrecognized escape character") + } + first = false + } else if r == '=' { + // a named param + l.backup() + nextEq = true + break + } + + if !isAlphaNumericOrHyphen(r) && r != '.' { // Floats have period + l.backup() + break + } + } + + if l.paramElements == 0 { + l.paramElements++ + + if nextEq { + l.paramElements++ + } + } else { + if nextEq && l.paramElements == 1 { + return l.errorf("got named parameter '%s'. Cannot mix named and positional parameters", l.current()) + } else if !nextEq && l.paramElements == 2 { + return l.errorf("got positional parameter '%s'. Cannot mix named and positional parameters", l.current()) + } + } + + l.emit(tScParam) + return lexInsideShortcode +} + +func lexShortcodeParamVal(l *pageLexer) stateFunc { + l.consumeToSpace() + l.emit(tScParamVal) + return lexInsideShortcode +} + +func lexShortCodeParamRawStringVal(l *pageLexer, typ ItemType) stateFunc { + openBacktickFound := false + +Loop: + for { + switch r := l.next(); { + case r == '`': + if openBacktickFound { + l.backup() + break Loop + } else { + openBacktickFound = true + l.ignore() + } + case r == eof: + return l.errorf("unterminated raw string in shortcode parameter-argument: '%s'", l.current()) + } + } + + l.emitString(typ) + l.next() + l.ignore() + + return lexInsideShortcode +} + +func lexShortcodeQuotedParamVal(l *pageLexer, escapedQuotedValuesAllowed bool, typ ItemType) stateFunc { + openQuoteFound := false + escapedInnerQuoteFound := false + escapedQuoteState := 0 + +Loop: + for { + switch r := l.next(); { + case r == '\\': + if l.peek() == '"' { + if openQuoteFound && !escapedQuotedValuesAllowed { + l.backup() + break Loop + } else if openQuoteFound { + // the coming quote is inside + escapedInnerQuoteFound = true + escapedQuoteState = 1 + } + } else if l.peek() == '`' { + return l.errorf("unrecognized escape character") + } + case r == eof, r == '\n': + return l.errorf("unterminated quoted string in shortcode parameter-argument: '%s'", l.current()) + case r == '"': + if escapedQuoteState == 0 { + if openQuoteFound { + l.backup() + break Loop + } else { + openQuoteFound = true + l.ignore() + } + } else { + escapedQuoteState = 0 + } + } + } + + if escapedInnerQuoteFound { + l.ignoreEscapesAndEmit(typ) + } else { + l.emitString(typ) + } + + r := l.next() + + switch r { + case '\\': + if l.peek() == '"' { + // ignore the escaped closing quote + l.ignore() + l.next() + l.ignore() + } + case '"': + // ignore closing quote + l.ignore() + default: + // handled by next state + l.backup() + } + + return lexInsideShortcode +} + +// Inline shortcodes has the form {{< myshortcode.inline >}} +var inlineIdentifier = []byte("inline ") + +// scans an alphanumeric inside shortcode +func lexIdentifierInShortcode(l *pageLexer) stateFunc { + lookForEnd := false +Loop: + for { + switch r := l.next(); { + case isAlphaNumericOrHyphen(r): + // Allow forward slash inside names to make it possible to create namespaces. + case r == '/': + case r == '.': + l.isInline = l.hasPrefix(inlineIdentifier) + if !l.isInline { + return l.errorf("period in shortcode name only allowed for inline identifiers") + } + default: + l.backup() + word := string(l.input[l.start:l.pos]) + if l.closingState > 0 && !l.openShortcodes[word] { + return l.errorf("closing tag for shortcode '%s' does not match start tag", word) + } else if l.closingState > 0 { + l.openShortcodes[word] = false + lookForEnd = true + } + + l.closingState = 0 + l.currShortcodeName = word + l.openShortcodes[word] = true + l.elementStepNum++ + if l.isInline { + l.emit(tScNameInline) + } else { + l.emit(tScName) + } + break Loop + } + } + + if lookForEnd { + return lexEndOfShortcode + } + return lexInsideShortcode +} + +func lexEndOfShortcode(l *pageLexer) stateFunc { + l.isInline = false + if l.hasPrefix(l.currentRightShortcodeDelim()) { + return lexShortcodeRightDelim + } + switch r := l.next(); { + case isSpace(r): + l.ignore() + default: + return l.errorf("unclosed shortcode") + } + return lexEndOfShortcode +} + +// scans the elements inside shortcode tags +func lexInsideShortcode(l *pageLexer) stateFunc { + if l.hasPrefix(l.currentRightShortcodeDelim()) { + return lexShortcodeRightDelim + } + switch r := l.next(); { + case r == eof: + // eol is allowed inside shortcodes; this may go to end of document before it fails + return l.errorf("unclosed shortcode action") + case isSpace(r), isEndOfLine(r): + l.ignore() + case r == '=': + l.consumeSpace() + l.ignore() + peek := l.peek() + if peek == '"' || peek == '\\' { + return lexShortcodeQuotedParamVal(l, peek != '\\', tScParamVal) + } else if peek == '`' { + return lexShortCodeParamRawStringVal(l, tScParamVal) + } + return lexShortcodeParamVal + case r == '/': + if l.currShortcodeName == "" { + return l.errorf("got closing shortcode, but none is open") + } + l.closingState++ + l.isInline = false + l.emit(tScClose) + case r == '\\': + l.ignore() + if l.peek() == '"' || l.peek() == '`' { + return lexShortcodeParam(l, true) + } + case l.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r == '"' || r == '`'): // positional params can have quotes + l.backup() + return lexShortcodeParam(l, false) + case isAlphaNumeric(r): + l.backup() + return lexIdentifierInShortcode + default: + return l.errorf("unrecognized character in shortcode action: %#U. Note: Parameters with non-alphanumeric args must be quoted", r) + } + return lexInsideShortcode +} + +func (l *pageLexer) currentLeftShortcodeDelimItem() ItemType { + return l.currLeftDelimItem +} + +func (l *pageLexer) currentRightShortcodeDelimItem() ItemType { + return l.currRightDelimItem +} + +func (l *pageLexer) currentLeftShortcodeDelim() []byte { + if l.currLeftDelimItem == tLeftDelimScWithMarkup { + return leftDelimScWithMarkup + } + return leftDelimScNoMarkup +} + +func (l *pageLexer) currentRightShortcodeDelim() []byte { + if l.currRightDelimItem == tRightDelimScWithMarkup { + return rightDelimScWithMarkup + } + return rightDelimScNoMarkup +} diff --git a/util/pageparser/pageparser.go b/util/pageparser/pageparser.go new file mode 100644 index 00000000..7fcf4db5 --- /dev/null +++ b/util/pageparser/pageparser.go @@ -0,0 +1,235 @@ +// Copyright 2019 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pageparser + +import ( + "bytes" + "errors" + "fmt" + "io" + + "github.com/go-sonic/sonic/util/pageparser/metadecoders" +) + +// Result holds the parse result. +type Result interface { + // Iterator returns a new Iterator positioned at the beginning of the parse tree. + Iterator() *Iterator + // Input returns the input to Parse. + Input() []byte +} + +var _ Result = (*pageLexer)(nil) + +// Parse parses the page in the given reader according to the given Config. +func Parse(r io.Reader, cfg Config) (Result, error) { + return parseSection(r, cfg, lexIntroSection) +} + +type ContentFrontMatter struct { + Content []byte + FrontMatter map[string]any + FrontMatterFormat metadecoders.Format +} + +// ParseFrontMatterAndContent is a convenience method to extract front matter +// and content from a content page. +func ParseFrontMatterAndContent(r io.Reader) (ContentFrontMatter, error) { + var cf ContentFrontMatter + + psr, err := Parse(r, Config{}) + if err != nil { + return cf, err + } + + var frontMatterSource []byte + + iter := psr.Iterator() + + walkFn := func(item Item) bool { + if frontMatterSource != nil { + // The rest is content. + cf.Content = psr.Input()[item.low:] + // Done + return false + } else if item.IsFrontMatter() { + cf.FrontMatterFormat = FormatFromFrontMatterType(item.Type) + frontMatterSource = item.Val(psr.Input()) + } + return true + } + + iter.PeekWalk(walkFn) + + cf.FrontMatter, err = metadecoders.Default.UnmarshalToMap(frontMatterSource, cf.FrontMatterFormat) + return cf, err +} + +func FormatFromFrontMatterType(typ ItemType) metadecoders.Format { + switch typ { + case TypeFrontMatterJSON: + return metadecoders.JSON + case TypeFrontMatterORG: + return metadecoders.ORG + case TypeFrontMatterTOML: + return metadecoders.TOML + case TypeFrontMatterYAML: + return metadecoders.YAML + default: + return "" + } +} + +// ParseMain parses starting with the main section. Used in tests. +func ParseMain(r io.Reader, cfg Config) (Result, error) { + return parseSection(r, cfg, lexMainSection) +} + +func parseSection(r io.Reader, cfg Config, start stateFunc) (Result, error) { + b, err := io.ReadAll(r) + if err != nil { + return nil, fmt.Errorf("failed to read page content: %w", err) + } + return parseBytes(b, cfg, start) +} + +func parseBytes(b []byte, cfg Config, start stateFunc) (Result, error) { + lexer := newPageLexer(b, start, cfg) + lexer.run() + return lexer, nil +} + +// NewIterator creates a new Iterator. +func NewIterator(items Items) *Iterator { + return &Iterator{items: items, lastPos: -1} +} + +// An Iterator has methods to iterate a parsed page with support going back +// if needed. +type Iterator struct { + items Items + lastPos int // position of the last item returned by nextItem +} + +// consumes and returns the next item +func (t *Iterator) Next() Item { + t.lastPos++ + return t.Current() +} + +var errIndexOutOfBounds = Item{Type: tError, Err: errors.New("no more tokens")} + +// Current will repeatably return the current item. +func (t *Iterator) Current() Item { + if t.lastPos >= len(t.items) { + return errIndexOutOfBounds + } + return t.items[t.lastPos] +} + +// backs up one token. +func (t *Iterator) Backup() { + if t.lastPos < 0 { + panic("need to go forward before going back") + } + t.lastPos-- +} + +// Pos returns the current position in the input. +func (t *Iterator) Pos() int { + return t.lastPos +} + +// check for non-error and non-EOF types coming next +func (t *Iterator) IsValueNext() bool { + i := t.Peek() + return i.Type != tError && i.Type != tEOF +} + +// look at, but do not consume, the next item +// repeated, sequential calls will return the same item +func (t *Iterator) Peek() Item { + return t.items[t.lastPos+1] +} + +// PeekWalk will feed the next items in the iterator to walkFn +// until it returns false. +func (t *Iterator) PeekWalk(walkFn func(item Item) bool) { + for i := t.lastPos + 1; i < len(t.items); i++ { + item := t.items[i] + if !walkFn(item) { + break + } + } +} + +// Consume is a convenience method to consume the next n tokens, +// but back off Errors and EOF. +func (t *Iterator) Consume(cnt int) { + for i := 0; i < cnt; i++ { + token := t.Next() + if token.Type == tError || token.Type == tEOF { + t.Backup() + break + } + } +} + +// LineNumber returns the current line number. Used for logging. +func (t *Iterator) LineNumber(source []byte) int { + return bytes.Count(source[:t.Current().low], lf) + 1 +} + +// IsProbablySourceOfItems returns true if the given source looks like original +// source of the items. +// There may be some false positives, but that is highly unlikely and good enough +// for the planned purpose. +// It will also return false if the last item is not EOF (error situations) and +// true if both source and items are empty. +func IsProbablySourceOfItems(source []byte, items Items) bool { + if len(source) == 0 && len(items) == 0 { + return false + } + if len(items) == 0 { + return false + } + + last := items[len(items)-1] + if last.Type != tEOF { + return false + } + + if last.Pos() != len(source) { + return false + } + + for _, item := range items { + if item.Type == tError { + return false + } + if item.Type == tEOF { + return true + } + + if item.Pos() >= len(source) { + return false + } + + if item.firstByte != source[item.Pos()] { + return false + } + } + + return true +}