feat: Ombyggd parsning av nyhetsbrev (#65)
* feat: 🎸 Ombyggd parsning av nyhetsbrev
This commit is contained in:
parent
905b893ca7
commit
a5dfb704f4
|
@ -1,6 +1,6 @@
|
|||
module.exports = {
|
||||
preset: 'ts-jest',
|
||||
testEnvironment: 'node',
|
||||
testEnvironment: 'jsdom',
|
||||
transform: {
|
||||
'.(ts|tsx)': 'ts-jest'
|
||||
},
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,37 +1,57 @@
|
|||
import * as h2m from 'h2m'
|
||||
import { htmlDecode } from 'js-htmlencode'
|
||||
import { decode } from 'he'
|
||||
import {
|
||||
parse, HTMLElement, TextNode,
|
||||
} from 'node-html-parser'
|
||||
|
||||
export const trim = (html: string = ''): string => {
|
||||
const trimmed = html
|
||||
.replace(/ /g, ' ')
|
||||
.split('>')
|
||||
.map((token) => token.trim())
|
||||
.join('>')
|
||||
.split('</')
|
||||
.map((token) => token.trim())
|
||||
.join('</')
|
||||
|
||||
let result = trimmed
|
||||
const rxSpaces = /href="(.*)"/g
|
||||
let matches = trimmed.match(rxSpaces)
|
||||
if (matches) {
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
for (const match of matches) {
|
||||
result = result.replace(match, match.replace(/ /g, '%20'))
|
||||
}
|
||||
}
|
||||
|
||||
const rxSpacesSing = /href='(.*)'/g
|
||||
matches = trimmed.match(rxSpacesSing)
|
||||
if (matches) {
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
for (const match of matches) {
|
||||
result = result.replace(match, match.replace(/ /g, '%20'))
|
||||
}
|
||||
}
|
||||
return result
|
||||
const trimNodes = [
|
||||
'strong',
|
||||
'b',
|
||||
'em',
|
||||
'i',
|
||||
'u',
|
||||
's',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'li',
|
||||
'a',
|
||||
]
|
||||
const cleanText = (node: TextNode, parentType: string): TextNode => {
|
||||
const text = (parentType && trimNodes.includes(parentType.toLowerCase()))
|
||||
? node.rawText.trim() : node.rawText
|
||||
return new TextNode(text)
|
||||
}
|
||||
|
||||
const deepClean = (node: HTMLElement): HTMLElement => {
|
||||
const attributes = Object.entries(node.attributes)
|
||||
.map(([key, val]) => {
|
||||
if (key === 'href' && val) {
|
||||
// eslint-disable-next-line no-param-reassign
|
||||
val = val.replace(/ /g, '%20')
|
||||
}
|
||||
return `${key}="${val}"`
|
||||
})
|
||||
.join(' ')
|
||||
const cleaned = new HTMLElement(node.tagName, {}, attributes, node.parentNode)
|
||||
node.childNodes.forEach((childNode) => {
|
||||
if (childNode instanceof HTMLElement) {
|
||||
cleaned.childNodes.push(deepClean(childNode))
|
||||
} else if (childNode instanceof TextNode) {
|
||||
cleaned.childNodes.push(cleanText(childNode, node.tagName))
|
||||
}
|
||||
})
|
||||
return cleaned
|
||||
}
|
||||
|
||||
export const clean = (html: string = ''): string => (
|
||||
deepClean(parse(decode(html))).outerHTML
|
||||
)
|
||||
|
||||
interface Node {
|
||||
name: string
|
||||
attrs: { [key: string]: string }
|
||||
|
@ -44,7 +64,7 @@ const overides = {
|
|||
}
|
||||
|
||||
export const toMarkdown = (html: string): string => {
|
||||
const trimmed = trim(html)
|
||||
const trimmed = clean(html)
|
||||
const markdown = h2m(trimmed, { overides, converter })
|
||||
const decoded = htmlDecode(markdown)
|
||||
return decoded
|
||||
|
|
|
@ -36,10 +36,13 @@
|
|||
"typescript": "^4.1.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"@types/he": "^1.1.1",
|
||||
"camelcase-keys": "^6.2.2",
|
||||
"events": "^3.2.0",
|
||||
"h2m": "^0.7.0",
|
||||
"he": "^1.2.0",
|
||||
"js-htmlencode": "^0.3.0",
|
||||
"luxon": "^1.25.0"
|
||||
"luxon": "^1.25.0",
|
||||
"node-html-parser": "^2.1.0"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,9 +5,14 @@
|
|||
"declaration": true,
|
||||
"outDir": "./dist",
|
||||
"strict": true,
|
||||
"moduleResolution": "node",
|
||||
"allowSyntheticDefaultImports": true
|
||||
},
|
||||
"include": ["lib"],
|
||||
"exclude": ["node_modules", "**/__tests__/*", "**/*.test.ts"]
|
||||
}
|
||||
"include": [
|
||||
"lib"
|
||||
],
|
||||
"exclude": [
|
||||
"node_modules",
|
||||
"**/__tests__/*",
|
||||
"**/*.test.ts"
|
||||
]
|
||||
}
|
17
yarn.lock
17
yarn.lock
|
@ -556,6 +556,11 @@
|
|||
dependencies:
|
||||
"@types/node" "*"
|
||||
|
||||
"@types/he@^1.1.1":
|
||||
version "1.1.1"
|
||||
resolved "https://registry.yarnpkg.com/@types/he/-/he-1.1.1.tgz#19e14033c4ee8f1a702c74dcc6182664839ac2b7"
|
||||
integrity sha512-jpzrsR1ns0n3kyWt92QfOUQhIuJGQ9+QGa7M62rO6toe98woQjnsnzjdMtsQXCdvjjmqjS2ZBCC7xKw0cdzU+Q==
|
||||
|
||||
"@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0", "@types/istanbul-lib-coverage@^2.0.1":
|
||||
version "2.0.3"
|
||||
resolved "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.3.tgz"
|
||||
|
@ -2259,6 +2264,11 @@ has@^1.0.3:
|
|||
dependencies:
|
||||
function-bind "^1.1.1"
|
||||
|
||||
he@1.2.0, he@^1.2.0:
|
||||
version "1.2.0"
|
||||
resolved "https://registry.yarnpkg.com/he/-/he-1.2.0.tgz#84ae65fa7eafb165fddb61566ae14baf05664f0f"
|
||||
integrity sha512-F/1DnUGPopORZi0ni+CvrCgHQ5FyEAHRLSApuYWMmrbSwoN2Mn/7k+Gl38gJnR7yyDZk6WLXwiGod1JOWNDKGw==
|
||||
|
||||
hosted-git-info@^2.1.4:
|
||||
version "2.8.8"
|
||||
resolved "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.8.tgz"
|
||||
|
@ -3379,6 +3389,13 @@ node-fetch@^2.6.1:
|
|||
resolved "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.1.tgz"
|
||||
integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==
|
||||
|
||||
node-html-parser@^2.1.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/node-html-parser/-/node-html-parser-2.1.0.tgz#36345804d743a5a1f672d4821a53f6b0e60629a9"
|
||||
integrity sha512-kbCNfqjrwHAbG+mevL8aqjwVtF0Qv66XurWHoGLOc5G9rPR1L3k602jfeczAUUBldLNnCrdsDmO5G5nqAoMW+g==
|
||||
dependencies:
|
||||
he "1.2.0"
|
||||
|
||||
node-int64@^0.4.0:
|
||||
version "0.4.0"
|
||||
resolved "https://registry.npmjs.org/node-int64/-/node-int64-0.4.0.tgz"
|
||||
|
|
Loading…
Reference in New Issue