mirror of
https://github.com/tcsenpai/pensieve.git
synced 2025-06-06 19:25:24 +00:00
chore: remove unused dict folder
This commit is contained in:
parent
9d1b155f63
commit
f575077e55
@ -1,31 +0,0 @@
|
||||
# CppJieba字典
|
||||
|
||||
文件后缀名代表的是词典的编码方式。
|
||||
比如filename.utf8 是 utf8编码,filename.gbk 是 gbk编码方式。
|
||||
|
||||
|
||||
## 分词
|
||||
|
||||
### jieba.dict.utf8/gbk
|
||||
|
||||
作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
|
||||
|
||||
### hmm_model.utf8/gbk
|
||||
|
||||
作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
|
||||
|
||||
__对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
|
||||
|
||||
|
||||
## 关键词抽取
|
||||
|
||||
### idf.utf8
|
||||
|
||||
IDF(Inverse Document Frequency)
|
||||
在KeywordExtractor中,使用的是经典的TF-IDF算法,所以需要这么一个词典提供IDF信息。
|
||||
|
||||
### stop_words.utf8
|
||||
|
||||
停用词词典
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
258826
dict/idf.utf8
258826
dict/idf.utf8
File diff suppressed because it is too large
Load Diff
348982
dict/jieba.dict.utf8
348982
dict/jieba.dict.utf8
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@ -1,259 +0,0 @@
|
||||
#初始状态的概率
|
||||
#格式
|
||||
#状态:概率
|
||||
B,a:-4.7623052146
|
||||
B,ad:-6.68006603678
|
||||
B,ag:-3.14e+100
|
||||
B,an:-8.69708322302
|
||||
B,b:-5.01837436211
|
||||
B,bg:-3.14e+100
|
||||
B,c:-3.42388018495
|
||||
B,d:-3.97504752976
|
||||
B,df:-8.88897423083
|
||||
B,dg:-3.14e+100
|
||||
B,e:-8.56355183039
|
||||
B,en:-3.14e+100
|
||||
B,f:-5.49163041848
|
||||
B,g:-3.14e+100
|
||||
B,h:-13.53336513
|
||||
B,i:-6.11578472756
|
||||
B,in:-3.14e+100
|
||||
B,j:-5.05761912847
|
||||
B,jn:-3.14e+100
|
||||
B,k:-3.14e+100
|
||||
B,l:-4.90588358466
|
||||
B,ln:-3.14e+100
|
||||
B,m:-3.6524299819
|
||||
B,mg:-3.14e+100
|
||||
B,mq:-6.7869530014
|
||||
B,n:-1.69662577975
|
||||
B,ng:-3.14e+100
|
||||
B,nr:-2.23104959138
|
||||
B,nrfg:-5.87372217541
|
||||
B,nrt:-4.98564273352
|
||||
B,ns:-2.8228438315
|
||||
B,nt:-4.84609166818
|
||||
B,nz:-3.94698846058
|
||||
B,o:-8.43349870215
|
||||
B,p:-4.20098413209
|
||||
B,q:-6.99812385896
|
||||
B,qe:-3.14e+100
|
||||
B,qg:-3.14e+100
|
||||
B,r:-3.40981877908
|
||||
B,rg:-3.14e+100
|
||||
B,rr:-12.4347528413
|
||||
B,rz:-7.94611647157
|
||||
B,s:-5.52267359084
|
||||
B,t:-3.36474790945
|
||||
B,tg:-3.14e+100
|
||||
B,u:-9.1639172775
|
||||
B,ud:-3.14e+100
|
||||
B,ug:-3.14e+100
|
||||
B,uj:-3.14e+100
|
||||
B,ul:-3.14e+100
|
||||
B,uv:-3.14e+100
|
||||
B,uz:-3.14e+100
|
||||
B,v:-2.67405848743
|
||||
B,vd:-9.04472876024
|
||||
B,vg:-3.14e+100
|
||||
B,vi:-12.4347528413
|
||||
B,vn:-4.33156108902
|
||||
B,vq:-12.1470707689
|
||||
B,w:-3.14e+100
|
||||
B,x:-3.14e+100
|
||||
B,y:-9.84448567586
|
||||
B,yg:-3.14e+100
|
||||
B,z:-7.04568111149
|
||||
B,zg:-3.14e+100
|
||||
E,a:-3.14e+100
|
||||
E,ad:-3.14e+100
|
||||
E,ag:-3.14e+100
|
||||
E,an:-3.14e+100
|
||||
E,b:-3.14e+100
|
||||
E,bg:-3.14e+100
|
||||
E,c:-3.14e+100
|
||||
E,d:-3.14e+100
|
||||
E,df:-3.14e+100
|
||||
E,dg:-3.14e+100
|
||||
E,e:-3.14e+100
|
||||
E,en:-3.14e+100
|
||||
E,f:-3.14e+100
|
||||
E,g:-3.14e+100
|
||||
E,h:-3.14e+100
|
||||
E,i:-3.14e+100
|
||||
E,in:-3.14e+100
|
||||
E,j:-3.14e+100
|
||||
E,jn:-3.14e+100
|
||||
E,k:-3.14e+100
|
||||
E,l:-3.14e+100
|
||||
E,ln:-3.14e+100
|
||||
E,m:-3.14e+100
|
||||
E,mg:-3.14e+100
|
||||
E,mq:-3.14e+100
|
||||
E,n:-3.14e+100
|
||||
E,ng:-3.14e+100
|
||||
E,nr:-3.14e+100
|
||||
E,nrfg:-3.14e+100
|
||||
E,nrt:-3.14e+100
|
||||
E,ns:-3.14e+100
|
||||
E,nt:-3.14e+100
|
||||
E,nz:-3.14e+100
|
||||
E,o:-3.14e+100
|
||||
E,p:-3.14e+100
|
||||
E,q:-3.14e+100
|
||||
E,qe:-3.14e+100
|
||||
E,qg:-3.14e+100
|
||||
E,r:-3.14e+100
|
||||
E,rg:-3.14e+100
|
||||
E,rr:-3.14e+100
|
||||
E,rz:-3.14e+100
|
||||
E,s:-3.14e+100
|
||||
E,t:-3.14e+100
|
||||
E,tg:-3.14e+100
|
||||
E,u:-3.14e+100
|
||||
E,ud:-3.14e+100
|
||||
E,ug:-3.14e+100
|
||||
E,uj:-3.14e+100
|
||||
E,ul:-3.14e+100
|
||||
E,uv:-3.14e+100
|
||||
E,uz:-3.14e+100
|
||||
E,v:-3.14e+100
|
||||
E,vd:-3.14e+100
|
||||
E,vg:-3.14e+100
|
||||
E,vi:-3.14e+100
|
||||
E,vn:-3.14e+100
|
||||
E,vq:-3.14e+100
|
||||
E,w:-3.14e+100
|
||||
E,x:-3.14e+100
|
||||
E,y:-3.14e+100
|
||||
E,yg:-3.14e+100
|
||||
E,z:-3.14e+100
|
||||
E,zg:-3.14e+100
|
||||
M,a:-3.14e+100
|
||||
M,ad:-3.14e+100
|
||||
M,ag:-3.14e+100
|
||||
M,an:-3.14e+100
|
||||
M,b:-3.14e+100
|
||||
M,bg:-3.14e+100
|
||||
M,c:-3.14e+100
|
||||
M,d:-3.14e+100
|
||||
M,df:-3.14e+100
|
||||
M,dg:-3.14e+100
|
||||
M,e:-3.14e+100
|
||||
M,en:-3.14e+100
|
||||
M,f:-3.14e+100
|
||||
M,g:-3.14e+100
|
||||
M,h:-3.14e+100
|
||||
M,i:-3.14e+100
|
||||
M,in:-3.14e+100
|
||||
M,j:-3.14e+100
|
||||
M,jn:-3.14e+100
|
||||
M,k:-3.14e+100
|
||||
M,l:-3.14e+100
|
||||
M,ln:-3.14e+100
|
||||
M,m:-3.14e+100
|
||||
M,mg:-3.14e+100
|
||||
M,mq:-3.14e+100
|
||||
M,n:-3.14e+100
|
||||
M,ng:-3.14e+100
|
||||
M,nr:-3.14e+100
|
||||
M,nrfg:-3.14e+100
|
||||
M,nrt:-3.14e+100
|
||||
M,ns:-3.14e+100
|
||||
M,nt:-3.14e+100
|
||||
M,nz:-3.14e+100
|
||||
M,o:-3.14e+100
|
||||
M,p:-3.14e+100
|
||||
M,q:-3.14e+100
|
||||
M,qe:-3.14e+100
|
||||
M,qg:-3.14e+100
|
||||
M,r:-3.14e+100
|
||||
M,rg:-3.14e+100
|
||||
M,rr:-3.14e+100
|
||||
M,rz:-3.14e+100
|
||||
M,s:-3.14e+100
|
||||
M,t:-3.14e+100
|
||||
M,tg:-3.14e+100
|
||||
M,u:-3.14e+100
|
||||
M,ud:-3.14e+100
|
||||
M,ug:-3.14e+100
|
||||
M,uj:-3.14e+100
|
||||
M,ul:-3.14e+100
|
||||
M,uv:-3.14e+100
|
||||
M,uz:-3.14e+100
|
||||
M,v:-3.14e+100
|
||||
M,vd:-3.14e+100
|
||||
M,vg:-3.14e+100
|
||||
M,vi:-3.14e+100
|
||||
M,vn:-3.14e+100
|
||||
M,vq:-3.14e+100
|
||||
M,w:-3.14e+100
|
||||
M,x:-3.14e+100
|
||||
M,y:-3.14e+100
|
||||
M,yg:-3.14e+100
|
||||
M,z:-3.14e+100
|
||||
M,zg:-3.14e+100
|
||||
S,a:-3.90253968313
|
||||
S,ad:-11.0484584802
|
||||
S,ag:-6.95411391796
|
||||
S,an:-12.8402179494
|
||||
S,b:-6.47288876397
|
||||
S,bg:-3.14e+100
|
||||
S,c:-4.78696679586
|
||||
S,d:-3.90391976418
|
||||
S,df:-3.14e+100
|
||||
S,dg:-8.9483976513
|
||||
S,e:-5.94251300628
|
||||
S,en:-3.14e+100
|
||||
S,f:-5.19482024998
|
||||
S,g:-6.50782681533
|
||||
S,h:-8.65056320738
|
||||
S,i:-3.14e+100
|
||||
S,in:-3.14e+100
|
||||
S,j:-4.91199211964
|
||||
S,jn:-3.14e+100
|
||||
S,k:-6.94032059583
|
||||
S,l:-3.14e+100
|
||||
S,ln:-3.14e+100
|
||||
S,m:-3.26920065212
|
||||
S,mg:-10.8253149289
|
||||
S,mq:-3.14e+100
|
||||
S,n:-3.85514838976
|
||||
S,ng:-4.9134348611
|
||||
S,nr:-4.48366310396
|
||||
S,nrfg:-3.14e+100
|
||||
S,nrt:-3.14e+100
|
||||
S,ns:-3.14e+100
|
||||
S,nt:-12.1470707689
|
||||
S,nz:-3.14e+100
|
||||
S,o:-8.46446092775
|
||||
S,p:-2.98684018136
|
||||
S,q:-4.88865861826
|
||||
S,qe:-3.14e+100
|
||||
S,qg:-3.14e+100
|
||||
S,r:-2.76353367841
|
||||
S,rg:-10.2752685919
|
||||
S,rr:-3.14e+100
|
||||
S,rz:-3.14e+100
|
||||
S,s:-3.14e+100
|
||||
S,t:-3.14e+100
|
||||
S,tg:-6.27284253188
|
||||
S,u:-6.94032059583
|
||||
S,ud:-7.72823016105
|
||||
S,ug:-7.53940370266
|
||||
S,uj:-6.85251045118
|
||||
S,ul:-8.41537131755
|
||||
S,uv:-8.15808672229
|
||||
S,uz:-9.29925862537
|
||||
S,v:-3.05329230341
|
||||
S,vd:-3.14e+100
|
||||
S,vg:-5.94301818437
|
||||
S,vi:-3.14e+100
|
||||
S,vn:-11.4539235883
|
||||
S,vq:-3.14e+100
|
||||
S,w:-3.14e+100
|
||||
S,x:-8.42741965607
|
||||
S,y:-6.19707946995
|
||||
S,yg:-13.53336513
|
||||
S,z:-3.14e+100
|
||||
S,zg:-3.14e+100
|
File diff suppressed because it is too large
Load Diff
1534
dict/stop_words.utf8
1534
dict/stop_words.utf8
File diff suppressed because it is too large
Load Diff
@ -1,4 +0,0 @@
|
||||
云计算
|
||||
韩玉鉴赏
|
||||
蓝翔 nz
|
||||
区块链 10 nz
|
Loading…
x
Reference in New Issue
Block a user