gnu-keywords/hmlttotext.py

15 lines
344 B
Python
Raw Permalink Normal View History

2022-05-18 18:51:42 +02:00
#
# takes html string from gnulinux.ch and return the plain text as a string
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
# SPDX-License-Identifier: GPL-3.0-or-later
#
from trafilatura import extract
class HmltToText:
def __init__(self):
pass
def __call__(self, htmlstr):
return extract(htmlstr)