14 lines
344 B
Python
14 lines
344 B
Python
#
|
|
# takes html string from gnulinux.ch and return the plain text as a string
|
|
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
from trafilatura import extract
|
|
|
|
|
|
class HmltToText:
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, htmlstr):
|
|
return extract(htmlstr)
|