当前位置: 首页 > news >正文

手机网站宽度多少合适杭州房产信息网官网

手机网站宽度多少合适,杭州房产信息网官网,深圳网站开发外包哪家好,网页游戏排行榜前十名2021采用Nagao算法统计各个子字符串的频次#xff0c;然后基于这些频次统计每个字符串的词频、左右邻个数、左右熵、交互信息(内部凝聚度)。名词解释#xff1a;Nagao算法#xff1a;一种快速的统计文本里所有子字符串频次的算法。详细算法可见http://www.doc88.com/p-664123446…采用Nagao算法统计各个子字符串的频次然后基于这些频次统计每个字符串的词频、左右邻个数、左右熵、交互信息(内部凝聚度)。名词解释Nagao算法一种快速的统计文本里所有子字符串频次的算法。详细算法可见http://www.doc88.com/p-664123446503.html词频该字符串在文档中出现的次数。出现次数越多越重要。左右邻个数文档中该字符串的左边和右边出现的不同的字的个数。左右邻越多说明字符串成词概率越高。左右熵文档中该字符串的左边和右边出现的不同的字的数量分布的熵。类似上面的指标有一定区别。交互信息每次将某字符串分成两部分左半部分字符串和右半部分字符串计算其同时出现的概率除于其各自独立出现的概率最后取所有的划分里面概率最小值。这个值越大说明字符串内部凝聚度越高越可能成词。算法具体流程1.  将输入文件逐行读入按照非汉字([^\u4E00-\u9FA5])以及停词“的很了么呢是嘛个都也比还这于不与才上用就好在和对挺去后没说”分成一个个字符串代码如下String[] phrases line.split([^\u4E00-\u9FA5]|[stopwords]);停用词可以修改。2.  获取所有切分后的字符串的左子串和右子串分别加入左、右PTable3.  对PTable排序并计算LTable。LTable记录的是排序后的PTable中下一个子串同上一个子串具有相同字符的数量4.  遍历PTable和LTable即可得到所有子字符串的词频、左右邻5.  根据所有子字符串的词频、左右邻结果输出字符串的词频、左右邻个数、左右熵、交互信息1.  NagaoAlgorithm.javapackage com.algo.word;import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;import java.util.ArrayList;import java.util.Arrays;import java.util.Collections;import java.util.HashMap;import java.util.HashSet;import java.util.List;import java.util.Map;import java.util.Set;public class NagaoAlgorithm {private int N;private List leftPTable;private int[] leftLTable;private List rightPTable;private int[] rightLTable;private double wordNumber;private Map wordTFNeighbor;private final static String stopwords 的很了么呢是嘛个都也比还这于不与才上用就好在和对挺去后没说;private NagaoAlgorithm(){//default N 5N 5;leftPTable new ArrayList();rightPTable new ArrayList();wordTFNeighbor new HashMap();}//reverse phraseprivate String reverse(String phrase) {StringBuilder reversePhrase new StringBuilder();for (int i phrase.length() - 1; i 0; i--)reversePhrase.append(phrase.charAt(i));return reversePhrase.toString();}//co-prefix length of s1 and s2private int coPrefixLength(String s1, String s2){int coPrefixLength 0;for(int i 0; i Math.min(s1.length(), s2.length()); i){if(s1.charAt(i) s2.charAt(i)) coPrefixLength;else break;}return coPrefixLength;}//add substring of line to pTableprivate void addToPTable(String line){//split line according to consecutive none Chinese characterString[] phrases line.split([^\u4E00-\u9FA5]|[stopwords]);for(String phrase : phrases){for(int i 0; i phrase.length(); i)rightPTable.add(phrase.substring(i));String reversePhrase reverse(phrase);for(int i 0; i reversePhrase.length(); i)leftPTable.add(reversePhrase.substring(i));wordNumber phrase.length();}}//count lTableprivate void countLTable(){Collections.sort(rightPTable);rightLTable new int[rightPTable.size()];for(int i 1; i rightPTable.size(); i)rightLTable[i] coPrefixLength(rightPTable.get(i-1), rightPTable.get(i));Collections.sort(leftPTable);leftLTable new int[leftPTable.size()];for(int i 1; i leftPTable.size(); i)leftLTable[i] coPrefixLength(leftPTable.get(i-1), leftPTable.get(i));System.out.println(Info: [Nagao Algorithm Step 2]: having sorted PTable and counted left and right LTable);}//according to pTable and lTable, count statistical result: TF, neighbor distributionprivate void countTFNeighbor(){//get TF and right neighborfor(int pIndex 0; pIndex rightPTable.size(); pIndex){String phrase rightPTable.get(pIndex);for(int length 1 rightLTable[pIndex]; length N length phrase.length(); length){String word phrase.substring(0, length);TFNeighbor tfNeighbor new TFNeighbor();tfNeighbor.incrementTF();if(phrase.length() length)tfNeighbor.addToRightNeighbor(phrase.charAt(length));for(int lIndex pIndex1; lIndex rightLTable.length; lIndex){if(rightLTable[lIndex] length){tfNeighbor.incrementTF();String coPhrase rightPTable.get(lIndex);if(coPhrase.length() length)tfNeighbor.addToRightNeighbor(coPhrase.charAt(length));}else break;}wordTFNeighbor.put(word, tfNeighbor);}}//get left neighborfor(int pIndex 0; pIndex leftPTable.size(); pIndex){String phrase leftPTable.get(pIndex);for(int length 1 leftLTable[pIndex]; length N length phrase.length(); length){String word reverse(phrase.substring(0, length));TFNeighbor tfNeighbor wordTFNeighbor.get(word);if(phrase.length() length)tfNeighbor.addToLeftNeighbor(phrase.charAt(length));for(int lIndex pIndex 1; lIndex leftLTable.length; lIndex){if(leftLTable[lIndex] length){String coPhrase leftPTable.get(lIndex);if(coPhrase.length() length)tfNeighbor.addToLeftNeighbor(coPhrase.charAt(length));}else break;}}}System.out.println(Info: [Nagao Algorithm Step 3]: having counted TF and Neighbor);}//according to wordTFNeighbor, count MI of wordprivate double countMI(String word){if(word.length() 1) return 0;double coProbability wordTFNeighbor.get(word).getTF()/wordNumber;List mi new ArrayList(word.length());for(int pos 1; pos word.length(); pos){String leftPart word.substring(0, pos);String rightPart word.substring(pos);double leftProbability wordTFNeighbor.get(leftPart).getTF()/wordNumber;double rightProbability wordTFNeighbor.get(rightPart).getTF()/wordNumber;mi.add(coProbability/(leftProbability*rightProbability));}return Collections.min(mi);}//save TF, (left and right) neighbor number, neighbor entropy, mutual informationprivate void saveTFNeighborInfoMI(String out, String stopList, String[] threshold){try {//read stop words fileSet stopWords new HashSet();BufferedReader br new BufferedReader(new FileReader(stopList));String line;while((line br.readLine()) ! null){if(line.length() 1)stopWords.add(line);}br.close();//output words TF, neighbor info, MIBufferedWriter bw new BufferedWriter(new FileWriter(out));for(Map.Entry entry : wordTFNeighbor.entrySet()){if( entry.getKey().length() 1 || stopWords.contains(entry.getKey()) ) continue;TFNeighbor tfNeighbor entry.getValue();int tf, leftNeighborNumber, rightNeighborNumber;double mi;tf tfNeighbor.getTF();leftNeighborNumber tfNeighbor.getLeftNeighborNumber();rightNeighborNumber tfNeighbor.getRightNeighborNumber();mi countMI(entry.getKey());if(tf Integer.parseInt(threshold[0]) leftNeighborNumber Integer.parseInt(threshold[1]) rightNeighborNumber Integer.parseInt(threshold[2]) mi Integer.parseInt(threshold[3]) ){StringBuilder sb new StringBuilder();sb.append(entry.getKey());sb.append(,).append(tf);sb.append(,).append(leftNeighborNumber);sb.append(,).append(rightNeighborNumber);sb.append(,).append(tfNeighbor.getLeftNeighborEntropy());sb.append(,).append(tfNeighbor.getRightNeighborEntropy());sb.append(,).append(mi).append(\n);bw.write(sb.toString());}}bw.close();} catch (IOException e) {throw new RuntimeException(e);}System.out.println(Info: [Nagao Algorithm Step 4]: having saved to file);}//apply nagao algorithm to input filepublic static void applyNagao(String[] inputs, String out, String stopList){NagaoAlgorithm nagao new NagaoAlgorithm();//step 1: add phrases to PTableString line;for(String in : inputs){try {BufferedReader br new BufferedReader(new FileReader(in));while((line br.readLine()) ! null){nagao.addToPTable(line);}br.close();} catch (IOException e) {throw new RuntimeException();}}System.out.println(Info: [Nagao Algorithm Step 1]: having added all left and right substrings to PTable);//step 2: sort PTable and count LTablenagao.countLTable();//step3: count TF and Neighbornagao.countTFNeighbor();//step4: save TF NeighborInfo and MInagao.saveTFNeighborInfoMI(out, stopList, 20,3,3,5.split(,));}public static void applyNagao(String[] inputs, String out, String stopList, int n, String filter){NagaoAlgorithm nagao new NagaoAlgorithm();nagao.setN(n);String[] threshold filter.split(,);if(threshold.length ! 4){System.out.println(ERROR: filter must have 4 numbers, seperated with , );return;}//step 1: add phrases to PTableString line;for(String in : inputs){try {BufferedReader br new BufferedReader(new FileReader(in));while((line br.readLine()) ! null){nagao.addToPTable(line);}br.close();} catch (IOException e) {throw new RuntimeException();}}System.out.println(Info: [Nagao Algorithm Step 1]: having added all left and right substrings to PTable);//step 2: sort PTable and count LTablenagao.countLTable();//step3: count TF and Neighbornagao.countTFNeighbor();//step4: save TF NeighborInfo and MInagao.saveTFNeighborInfoMI(out, stopList, threshold);}private void setN(int n){N n;}public static void main(String[] args) {String[] ins {E://test//ganfen.txt};applyNagao(ins, E://test//out.txt, E://test//stoplist.txt);}}2. TFNeighbor.javapackage com.algo.word;import java.util.HashMap;import java.util.Map;public class TFNeighbor {private int tf;private Map leftNeighbor;private Map rightNeighbor;TFNeighbor(){leftNeighbor new HashMap();rightNeighbor new HashMap();}//add word to leftNeighborpublic void addToLeftNeighbor(char word){//leftNeighbor.put(word, 1 leftNeighbor.getOrDefault(word, 0));Integer number leftNeighbor.get(word);leftNeighbor.put(word, number null? 1: 1number);}//add word to rightNeighborpublic void addToRightNeighbor(char word){//rightNeighbor.put(word, 1 rightNeighbor.getOrDefault(word, 0));Integer number rightNeighbor.get(word);rightNeighbor.put(word, number null? 1: 1number);}//increment tfpublic void incrementTF(){tf;}public int getLeftNeighborNumber(){return leftNeighbor.size();}public int getRightNeighborNumber(){return rightNeighbor.size();}public double getLeftNeighborEntropy(){double entropy 0;int sum 0;for(int number : leftNeighbor.values()){entropy number*Math.log(number);sum number;}if(sum 0) return 0;return Math.log(sum) - entropy/sum;}public double getRightNeighborEntropy(){double entropy 0;int sum 0;for(int number : rightNeighbor.values()){entropy number*Math.log(number);sum number;}if(sum 0) return 0;return Math.log(sum) - entropy/sum;}public int getTF(){return tf;}}3. Main.javapackage com.algo.word;public class Main {public static void main(String[] args) {//if 3 arguments, first argument is input files splitting with ,//second argument is output file//output 7 columns split with , , like below://word, term frequency, left neighbor number, right neighbor number, left neighbor entropy, right neighbor entropy, mutual information//third argument is stop words listif(args.length 3)NagaoAlgorithm.applyNagao(args[0].split(,), args[1], args[2]);//if 4 arguments, forth argument is the NGram parameter N//5th argument is threshold of output words, default is 20,3,3,5//output TF 20 (left | right) neighbor number 3 MI 5else if(args.length 5)NagaoAlgorithm.applyNagao(args[0].split(,), args[1], args[2], Integer.parseInt(args[3]), args[4]);}}以上所述就是本文的全部内容了希望大家能够喜欢。
http://www.pierceye.com/news/313306/

相关文章:

  • 用别人家网站做跳转做商品抬价是什么兼职网站
  • 合肥市建设信息中心网站怎么做网站的签约编辑
  • 6入空间网站免费观看网站标题怎么修改
  • iis服务器的默认网站wordpress多级分销插件
  • jquery 网站后台模板 仿2021好心人给个开车的网站
  • 济宁定制网站建设推广关于协会网站建设的意见
  • 门户网站建站要求滨州网站seo服务
  • 国外 视频上传网站源码怎么看网站的访问量
  • 网站的建设框架3网站建设
  • 购物网站设计的目的wordpress 游戏主题下载失败
  • 兰州最好的网站开发公司单页网站是什么样子的
  • 在线购物商城网站江苏营销型网站
  • 如何在百度上为企业做网站wordpress轮播图设置
  • qt 网站开发怎样开发一个app软件
  • html5博客网站模板泉州市住房与城乡建设网站
  • 没网站怎么做app创意设计图片
  • 重庆涪陵网站设计公司推荐外贸优化网站制作
  • 网页怎么制作四页石家庄seo排名外包
  • 高校信息化建设 网站东阳网络科技有限公司
  • 网站 如何做 中英文切换网站怎么提升关键词排名
  • 企业只有建立自己的网站平台金坛建设局网站
  • 网站建设及维护费用网站开发质量控制计划书
  • dede免费网站模板内蒙古住房与建设官方网站
  • 网络设计开发网站电子商务网站建设财务分析
  • 牛商做网站的理念国外便宜域名注册商
  • 深圳网站制作880网页设计与制作教程 个人简历代码
  • 皇岗网站建设济南市住房和城乡建设局
  • 产品创新设计方案seo 网站案例
  • 做名片最好的网站上海网站开发制作公司
  • 网站如何做三端适配wordpress get_the_author_meta