深圳办公室装修公司,百度推广关键词怎么优化,网站建设构建方案,主题 外贸网站 模板我正在尝试使用Scikit学习来学习文本的多标签分类#xff0c;我正在尝试调整Scikit附带的一个初始示例教程#xff0c;用于使用wikipedia文章作为培训数据对语言进行分类。我试图在下面实现这一点#xff0c;但代码仍然为每个返回一个标签#xff0c;我希望最后一个预测返回…我正在尝试使用Scikit学习来学习文本的多标签分类我正在尝试调整Scikit附带的一个初始示例教程用于使用wikipedia文章作为培训数据对语言进行分类。我试图在下面实现这一点但代码仍然为每个返回一个标签我希望最后一个预测返回fren有谁能建议正确的方法来启用多标签分类。在import sysfrom sklearn.feature_extraction.text import TfidfVectorizerfrom sklearn.feature_extraction.text import CountVectorizerfrom sklearn.feature_extraction.text import TfidfTransformerfrom sklearn.datasets import make_multilabel_classificationfrom sklearn.preprocessing import LabelBinarizerfrom sklearn.svm import LinearSVCfrom sklearn.pipeline import Pipelinefrom sklearn.datasets import load_filesfrom sklearn.cross_validation import train_test_splitfrom sklearn import metricsfrom sklearn.multiclass import OneVsRestClassifier#change model_selection to cross_validation# The training data folder must be passed as first argument - This uses the example wiki language data fileslanguages_data_folder sys.argv[1]dataset load_files(languages_data_folder)# Split the dataset in training and test set:docs_train, docs_test, y_train, y_test train_test_split(dataset.data, dataset.target, test_size0.5)#pipelineclf Pipeline([(vectorizer, CountVectorizer(ngram_range(1,2))),(tfidf, TfidfTransformer()),(clf, OneVsRestClassifier(LinearSVC())),])target_namesdataset.target_names# TASK: Fit the pipeline on the training setclf.fit(docs_train, y_train)# TASK: Predict the outcome on the testing set in a variable named y_predictedy_predicted clf.predict(docs_test)print target_names# Predict the result on some short new sentences:sentences [uThis is a language detection test.,uCeci est un test de d\xe9tection de la langue.,uDies ist ein Test, um die Sprache zu erkennen.,uBonjour Mon ami. This is a language detection test.,]predicted clf.predict(sentences)for s, p in zip(sentences, predicted):print(uThe language of %s is %s % (s, target_names[p]))返回-“这是语言检测测试”的语言是“en”“Ceci est un test de detection de la langue.”的语言是“fr”“死在考验中嗯我是在考验。”是“德”“你好朋友”的语言。这是一个语言检测测试