-
Notifications
You must be signed in to change notification settings - Fork 2
/
kindleParser.js
94 lines (78 loc) · 2.42 KB
/
kindleParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
const moment = require('moment')
const { getQuoteByChapter } = require('./utils')
const SEPARATOR = '=========='
const INPUT_DATE_FORMAT = 'D [escaped] MMMM [escaped] YYYY H:mm:ss'
const OUTPUT_DATE_FORMAT = 'YYYY-MM-DD'
const parseSymbols = require('./commonTools')
async function parse(input) {
const rawClippings = input
.split(SEPARATOR)
.filter((clipping) => clipping != '' && clipping != '\r\n')
let books = []
rawClippings.map((clipping) => {
const [bookData, data, empty, quote] = clipping.trim().split('\n')
const regex =
/^- Your (Highlight|Note|Bookmark) on page ([0-9]+) \| Location ([0-9]+-[0-9]+) \| Added on \w+, (.+) (\d+:\d+:\d+ [A-Z]{2})\r$/i
const dataMatch = data.match(regex)
if (quote == null || quote.trim() == '' || !dataMatch) return
const [d0, d1, d2, d3, d4] = dataMatch
const datedQuote = {
date: d4,
chapter: `Page ${d2}`,
quote: `${parseSymbols(quote)} | ${d4}`,
// date: getDate(data),
}
const currentBookTitle = getBookTitle(bookData)
let existingBook = getExistingBook(books, currentBookTitle)
if (existingBook) {
const prevQuote = getQuoteByChapter(
existingBook.quotes,
datedQuote.chapter
)
if (!!prevQuote) {
prevQuote.quote = `${prevQuote.quote}\n\n${datedQuote.quote}`
} else {
existingBook.quotes.push(datedQuote)
}
} else {
books.push({
title: currentBookTitle,
author: getAuthor(bookData),
quotes: [datedQuote],
})
}
})
books.map((book) => {
book.date = getOldestQuoteDate(book)
})
return books
}
function getDate(data) {
if (data) {
const spanishDate = data
.substring(data.lastIndexOf(',') + 2, data.length)
.trim()
return moment(spanishDate, INPUT_DATE_FORMAT, 'es').format(
OUTPUT_DATE_FORMAT
)
}
return ''
}
function getBookTitle(bookData) {
return parseSymbols(bookData.substring(0, bookData.lastIndexOf('(')).trim())
}
function getExistingBook(books, currentBookTitle) {
const filteredBooks = books.filter((b) => b.title === currentBookTitle)
if (filteredBooks.length > 0) return filteredBooks[0]
return null
}
function getAuthor(bookData) {
return bookData.substring(
bookData.lastIndexOf('(') + 1,
bookData.lastIndexOf(')')
)
}
function getOldestQuoteDate(book) {
return book.quotes.reduce((r, o) => (o.date > r.date ? o : r)).date
}
module.exports = parse