Thursday, 19 December 2019

Parse a textfile and generate tokens to a file in Python

Hi All

find the program.

------------------------------------
import re
def ngram_gen(sn):
    s = s.lower()
    s = re.sub(r'[^a-zA-Z0-9\s]'' ', s)
    tokens = [token for token in s.split(" "if token != ""]
    ngrams = zip(*[tokens[i:] for i in range(n)])
    return [" ".join(ngram) for ngram in ngrams]


fil=open("aaa.php""r")
cou=fil.read()
am= ngram_gen(cou, n=1)
am

str1=''
for i in am:
  print (i)
  i=i.replace("\n","")
  i=i.replace("\t","")
  str1=str1+" "+i
print(str1)
f = open("hup.csv""w")
f.write(str1)
f.close()

No comments:

Post a Comment