gnu-keywords/gnukeywords.py

20 lines
446 B
Python
Raw Normal View History

2022-05-18 18:51:42 +02:00
#
# find keywords in articles and returns them
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
# SPDX-License-Identifier: GPL-3.0-or-later
#
import spacy
class GnuKeywords:
modules = {
'de': 'de_core_news_sm',
'en': 'en_core_web_lg'
}
def __init__(self, lang):
self.lang = lang # de_core_news_sm or
def __call__(self, htmlstr):
return ['gnu', 'linux', 'for', 'ever', 'love']