Lisibonny commited on
Commit
5038d1a
1 Parent(s): e29f75c

Update preprocesamiento_articulos.py

Browse files
Files changed (1) hide show
  1. preprocesamiento_articulos.py +4 -0
preprocesamiento_articulos.py CHANGED
@@ -30,6 +30,10 @@ def remove_html_markup(s):
30
  out = out + c
31
 
32
  return out
 
 
 
 
33
 
34
  def eliminar_puntuacion(articulo):
35
  deletetion_symbols = ['!','(',')',"'",'-','[',']','{','}',';',':','"','“','’','”',"'",'`','‘','``','\\' ,'/','|',',','|','<','>','.','..','...','?','@',"#",'$','^','&','*','_','~','+','%','=','¿','¡',"''"]
 
30
  out = out + c
31
 
32
  return out
33
+
34
+ def remove_URL(s):
35
+ """Remove URLs from a sample string"""
36
+ return re.sub(r"http\S+", "", s)
37
 
38
  def eliminar_puntuacion(articulo):
39
  deletetion_symbols = ['!','(',')',"'",'-','[',']','{','}',';',':','"','“','’','”',"'",'`','‘','``','\\' ,'/','|',',','|','<','>','.','..','...','?','@',"#",'$','^','&','*','_','~','+','%','=','¿','¡',"''"]