- import CSV
- file_path = r'F:\SMSSpamCollectionjs.txt'
- sms = open(file_path,'r',encoding = 'utf-8')
- sms_data = []
- sms_label = []
- csv_reader = CSV.reader(sms,delimiter = '\t') #用 CSV 读取邮件数据
- for line in csv_reader:
- sms_label.append(line[0])
- sms_data.append(line[1])
- #sms_data.append(preprocessing(line[1]))
- sms.close()
- print(len(sms_label))
- sms_label
- file_path = r'F:\SMSSpamCollectionjs.txt'
- fo = open(file_path,'r',encoding = 'utf-8')
- text = fo.read()
- text
- import nltk
- nltk.download()
- text = "'Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat..'"
- import nltk
- from nltk.corpus import stopwords
- from nltk.stem import WordNetlemmatizer
- def preprocessing(text):
- #text
来源: http://www.bubuko.com/infodetail-2865400.html