Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@ go get github.com/tiendc/go-linkpreview
// }
```

## Security Guidelines and Best Practices
## Added by VMHUNG
When using this library, it is important to follow security best practices to ensure the safety and integrity of your application. Here are some guidelines:

1. **Input Validation and Sanitization**: Always validate and sanitize input URLs before processing them. This library includes basic validation, but you should implement additional checks as needed for your specific use case.

2. **Security Headers**: The library sets some security headers for HTTP requests, such as `X-Content-Type-Options`, `X-Frame-Options`, and `X-XSS-Protection`. Ensure that these headers are appropriate for your application and consider adding more if necessary.

3. **Error Handling**: Properly handle errors to avoid exposing sensitive information. This library includes basic error handling, but you should implement additional error handling as needed.

4. **Dependencies**: Keep your dependencies up to date to avoid known vulnerabilities. Regularly check for updates and apply them promptly.

5. **Configuration**: Review and configure the library options to suit your security requirements. Disable features that you do not need to minimize the attack surface.

## Contributing

- You are welcome to make pull requests for new functions and bug fixes.
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ require github.com/PuerkitoBio/goquery v1.9.2

require (
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/mvdan/xurls v1.1.0 // indirect
golang.org/x/net v0.24.0 // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ github.com/PuerkitoBio/goquery v1.9.2 h1:4/wZksC3KgkQw7SQgkKotmKljk0M6V8TUvA8Wb4
github.com/PuerkitoBio/goquery v1.9.2/go.mod h1:GHPCaP0ODyyxqcNoFGYlAprUFH81NuRPd0GX3Zu2Mvk=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/mvdan/xurls v1.1.0 h1:OpuDelGQ1R1ueQ6sSryzi6P+1RtBpfQHM8fJwlE45ww=
github.com/mvdan/xurls v1.1.0/go.mod h1:tQlNn3BED8bE/15hnSL2HLkDeLWpNPAwtw7wkEq44oU=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
Expand Down
25 changes: 25 additions & 0 deletions linkpreview.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,38 @@
package linkpreview

import (
"errors"
"io"
"net/http"
"net/url"
"strings"

"github.com/PuerkitoBio/goquery"
)

func validateURL(link string) error {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this to utils.go

parsedURL, err := url.ParseRequestURI(link)
if err != nil || !strings.HasPrefix(parsedURL.Scheme, "http") {
return errors.New("invalid URL")
}
return nil
}

func Parse(link string, options ...ConfigOption) (ParserResult, error) {
if err := validateURL(link); err != nil {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this is not necessary as http.NewRequest() and http.DefaultClient.Do() should handle the invalid URL correctly.

return ParserResult{}, err
}

request, err := http.NewRequest("GET", link, nil)
if err != nil {
return ParserResult{}, err
}

// Add security headers
request.Header.Set("X-Content-Type-Options", "nosniff")
request.Header.Set("X-Frame-Options", "DENY")
request.Header.Set("X-XSS-Protection", "1; mode=block")
Comment on lines +32 to +34
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is incorrect usage of secure headers. They are for response headers only.
Please see: https://cheatsheetseries.owasp.org/cheatsheets/HTTP_Headers_Cheat_Sheet.html


response, err := http.DefaultClient.Do(request)
if err != nil {
return ParserResult{}, err
Expand All @@ -22,6 +43,10 @@ func Parse(link string, options ...ConfigOption) (ParserResult, error) {
}

func ParseFromReader(link string, data io.Reader, options ...ConfigOption) (ParserResult, error) {
if err := validateURL(link); err != nil {
return ParserResult{}, err
}

doc, err := goquery.NewDocumentFromReader(data)
if err != nil {
return ParserResult{}, err
Expand Down
30 changes: 30 additions & 0 deletions parser.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
package linkpreview

import (
"errors"
)

func (ctx *ParserContext) Parse() error {
err := ctx.readAllTags()
if err != nil {
Expand Down Expand Up @@ -52,6 +56,9 @@ func (ctx *ParserContext) readAllTags() error {
metaTag.OtherAttrs[attr.Key] = attr.Val
}
}
if err := validateMetaTag(metaTag); err != nil {
return err
}
ctx.MetaTags = append(ctx.MetaTags, metaTag)
}

Expand All @@ -76,6 +83,9 @@ func (ctx *ParserContext) readAllTags() error {
linkTag.OtherAttrs[attr.Key] = attr.Val
}
}
if err := validateLinkTag(linkTag); err != nil {
return err
}
ctx.LinkTags = append(ctx.LinkTags, linkTag)
}

Expand All @@ -95,3 +105,23 @@ func (ctx *ParserContext) parseBasicTags() {
}
}
}

func validateMetaTag(tag *MetaTag) error {
Copy link
Owner

@tiendc tiendc Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HTML content may be not correct 100%. The use of this function can cause error to be returned due to a single incorrect piece of data. I suggest adding a config item to ParserConfig, says SkipInvalidTags, with default value false (to be back-ward compatible). When it is turned on, every invalid tags will be ignored instead of returning error to client.

if tag.Name == "" && tag.Property == "" {
return errors.New("meta tag must have either a name or property attribute")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is recommended not to use dynamic error. You should place this error in top of the file like:

var (
     errTagInvalid = errors.New("tag is invalid")
)

// In here
return fmt.Errorf("%w: meta tag must have either a name or property attribute", errTagInvalid)

Another option is to make this function just return bool as we only need to know whether we should keep the tag or not.

}
if tag.Content == "" && tag.Value == "" {
return errors.New("meta tag must have either a content or value attribute")
}
return nil
}

func validateLinkTag(tag *LinkTag) error {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The same comment for this function.

if tag.Rel == "" {
return errors.New("link tag must have a rel attribute")
}
if tag.Href == "" {
return errors.New("link tag must have an href attribute")
}
return nil
}
14 changes: 9 additions & 5 deletions parser_favicons.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,22 @@ func (ctx *ParserContext) parseMaybeFavicon(tag *LinkTag) *Image {
return nil
}

if err := validateURL(tag.Href); err != nil {
return nil
}

image := &Image{
URL: parseURL(tag.Href, ctx.Link),
Type: strOr(tag.Type, tag.Rel),
}

if tag.Sizes != "" {
wh := strings.Split(tag.Sizes, "x")
if len(wh) == 2 {
image.Width = parseInt(wh[0])
image.Height = parseInt(wh[1])
wh := strings.Split(tag.Sizes, "x")
if len(wh) == 2 {
image.Width = parseInt(wh[0])
image.Height = parseInt(wh[1])
}
Comment on lines +32 to +36
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please restore indentation of this block.

}
}

return image
}
9 changes: 9 additions & 0 deletions parser_og.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ func (ctx *ParserContext) parseOGMeta() {
continue
}
tagContent := tag.Content
if tagContent == "" {
continue
}
parsed := true

switch tagProp {
Expand Down Expand Up @@ -60,6 +63,9 @@ func (ctx *ParserContext) parseOGMeta() {
func (ctx *ParserContext) parseOGImages(ogMeta *OGMeta, tags []*MetaTag) {
var currImage *OGImage
for _, tag := range tags {
if tag.Content == "" {
continue
}
switch tag.Property {
case "og:image":
if currImage != nil {
Expand Down Expand Up @@ -96,6 +102,9 @@ func (ctx *ParserContext) parseOGImages(ogMeta *OGMeta, tags []*MetaTag) {
func (ctx *ParserContext) parseOGVideos(ogMeta *OGMeta, tags []*MetaTag) {
var currVideo *OGVideo
for _, tag := range tags {
if tag.Content == "" {
continue
}
switch tag.Property {
case "og:video":
if currVideo != nil {
Expand Down
4 changes: 4 additions & 0 deletions parser_twitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ func (ctx *ParserContext) parseTwitterMeta() {
tagName = tagName[len("twitter:"):]
tagContent := strOr(tag.Content, tag.Value)

if tagContent == "" {
continue
}

switch tagName {
case "url":
twitterMeta.URL = tagContent
Expand Down