19 lines
446 B
Python
19 lines
446 B
Python
#
|
|
# find keywords in articles and returns them
|
|
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
import spacy
|
|
|
|
|
|
class GnuKeywords:
|
|
modules = {
|
|
'de': 'de_core_news_sm',
|
|
'en': 'en_core_web_lg'
|
|
}
|
|
|
|
def __init__(self, lang):
|
|
self.lang = lang # de_core_news_sm or
|
|
|
|
def __call__(self, htmlstr):
|
|
return ['gnu', 'linux', 'for', 'ever', 'love']
|