15 lines
344 B
Python
15 lines
344 B
Python
|
#
|
||
|
# takes html string from gnulinux.ch and return the plain text as a string
|
||
|
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
|
||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||
|
#
|
||
|
from trafilatura import extract
|
||
|
|
||
|
|
||
|
class HmltToText:
|
||
|
def __init__(self):
|
||
|
pass
|
||
|
|
||
|
def __call__(self, htmlstr):
|
||
|
return extract(htmlstr)
|