Skip to content

Releases: huantt/plaintext-extractor

v1.1.0

21 Aug 09:19
c8f302a
Compare
Choose a tag to compare

Version 1.1.0

Commits

  • [c8f302a] docs: add some simple benchmark for markdown (#3)

V1.0.1 - Update how to truncate new line

07 Aug 07:18
Compare
Choose a tag to compare
v1.0.1

fix(html): update how to truncate new line

V1.0.0 - Markdown & Html Extractors

27 Jul 07:51
Compare
Choose a tag to compare

Usage

Markdown extractor

markdownContent := "# H1 \n*italic* **bold** `code` `not code [link](https://example.com) ![image](https://image.com/image.png) ~~strikethrough~~"
extractor := NewMarkdownExtractor()
output, err := extractor.PlainText(markdownContent)
if err != nil {
    panic(err)
}
fmt.Println(output)
// Output: H1 \nitalic bold code `not code link image strikethrough

Custom Markdown Tag

markdownContent := "This is {color:#0A84FF}red{color}"

customTag := markdown.Tag{
    Name:       "color-custom-tag",
    FullRegex:  regexp.MustCompile("{color:[a-zA-Z0-9#]+}(.*?){color}"),
    StartRegex: regexp.MustCompile("{color:[a-zA-Z0-9#]+}"),
    EndRegex:   regexp.MustCompile("{color}"),
}

markdownExtractor := NewMarkdownExtractor(customTag)
plaintextExtractor := plaintext.NewExtractor(markdownExtractor.PlainText)
plaintext, err := plaintextExtractor.PlainText(markdownContent)
if err != nil{
    panic(nil)
}
fmt.Println(plaintext)
// Output: This is red

HTML Extractor

html := `<div>This is a <a href="https://example.com">link</a></div>`
extractor := NewHtmlExtractor()
output, err := extractor.PlainText(html)
if err != nil {
    panic(err)
}
fmt.Println(output)
// Output: This is a link

Multiple extractors

input := `<div> html </div> *markdown*`
markdownExtractor := markdown.NewExtractor()
htmlExtractor := html.NewExtractor()
extractor := NewExtractor(markdownExtractor.PlainText, htmlExtractor.PlainText)
output, err := extractor.PlainText(input)
if err != nil {
    panic(err)
}
fmt.Println(output)
// Output: html markdown