Update preprocesamiento_articulos.py
Browse files
preprocesamiento_articulos.py
CHANGED
@@ -5,7 +5,7 @@ from nltk.tokenize import word_tokenize, RegexpTokenizer
|
|
5 |
from nltk.corpus import stopwords
|
6 |
from nltk.stem import SnowballStemmer
|
7 |
import textacy
|
8 |
-
import
|
9 |
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
import csv
|
@@ -102,5 +102,5 @@ def limpieza_articulos(df):
|
|
102 |
def obtener_kpes(df):
|
103 |
df_titulos=pd.DataFrame(df['titulo'], columns=['titulo'])
|
104 |
all_text = ' '. join(df_titulos['titulo'])
|
105 |
-
titulos=textacy.make_spacy_doc(all_text, lang='
|
106 |
return textacy.extract.keyterms.textrank(titulos,topn=10)
|
|
|
5 |
from nltk.corpus import stopwords
|
6 |
from nltk.stem import SnowballStemmer
|
7 |
import textacy
|
8 |
+
import es_core_news_sm
|
9 |
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
import csv
|
|
|
102 |
def obtener_kpes(df):
|
103 |
df_titulos=pd.DataFrame(df['titulo'], columns=['titulo'])
|
104 |
all_text = ' '. join(df_titulos['titulo'])
|
105 |
+
titulos=textacy.make_spacy_doc(all_text, lang='es_core_news_sm')
|
106 |
return textacy.extract.keyterms.textrank(titulos,topn=10)
|