-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path20.py
More file actions
118 lines (94 loc) Β· 3.2 KB
/
Copy path20.py
File metadata and controls
118 lines (94 loc) Β· 3.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#! /usr/bin/python2.7
# -*- coding: utf-8 -*-
from time import time
from konlpy import tag
from konlpy.corpus import kolaw
from konlpy.utils import pprint
def tagging(tagger, text):
r = []
try:
print (text)
r = getattr(tag, tagger)().pos(text)
except Exception as e:
pass
print ("Uhoh,", e)
return r
def measure_time(taggers, mult=6):
doc = [u'μλ²μ§κ°λ°©μλ€μ΄κ°μ λ€', # λμ΄μ°κΈ°
u'λλ λ°₯μ λ¨Ήλλ€', u'νλμ λλ μλμ°¨', # μ€μμ± ν΄μ
u'μμ΄ν° κΈ°λ€λ¦¬λ€ μ§μ³ μ ν곡νμμ μΈλ½ν°μ§λ¬λ²λ Έλ€ 6+ 128κΈ°κ°μ€λ²γ
'] # μμ΄
data = [['n'] + taggers]
for i in range(mult):
doclen = 10**i
times = [time()]
diffs = [doclen]
for tagger in taggers:
r = tagging(tagger, doc[:doclen])
times.append(time())
diffs.append(times[-1] - times[-2])
# print ('%s\t%s\t%s' % (tagger[:5], doclen, diffs[-1]))
# pprint(r[:5])
data.append(diffs)
print
return data
def measure_accuracy(taggers, text):
# print ('\n%s' % text)
result = []
for tagger in taggers:
print (tagger)
r = tagging(tagger, text)
pprint(r)
result.append([tagger] + list(map(lambda s: ' / '.join(s), r)))
return result
def plot(result):
import matplotlib
matplotlib.use('TkAgg')
from matplotlib import pylab as pl
import scipy as sp
if not result:
result = sp.loadtxt('morph.csv', delimiter=',', skiprows=1).T
x, y = result[0], result[1:]
for i in y:
pl.plot(x, i)
pl.xlabel('Number of characters')
pl.ylabel('Time (sec)')
pl.xscale('log')
pl.grid(True)
pl.savefig("images/time.png")
pl.show()
if __name__=='__main__':
PLOT = True
MULT = 6
examples = [u'μλ²μ§κ°λ°©μλ€μ΄κ°μ λ€', # λμ΄μ°κΈ°
u'λλ λ°₯μ λ¨Ήλλ€', u'νλμ λλ μλμ°¨', # μ€μμ± ν΄μ
u'μμ΄ν° κΈ°λ€λ¦¬λ€ μ§μ³ μ ν곡νμμ μΈλ½ν°μ§λ¬λ²λ Έλ€ 6+ 128κΈ°κ°μ€λ²γ
'] # μμ΄
taggers = [t for t in dir(tag) if t[0].isupper()]
print ("THIS IS TAGGERS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
print ("LOOK AT THIS!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
print (taggers)
# Time
data = measure_time(taggers, mult=MULT)
with open('morph.csv', 'w') as f:
# csvwrite(data, f)
for li in data:
line = ','.join(str(v) for v in li)
line = line + '\n'
f.write(line)
# Accuracy
for i, example in enumerate(examples):
result = measure_accuracy(taggers, example)
print (result)
result = list(map(lambda *row: [i or '' for i in row], *result))
with open('morph-%s.csv' % i, 'w') as f:
# csvwrite(result, f)
for li in result:
line = ','.join(str(v) for v in li)
line = line + '\n'
f.write(line)
# Plot
if PLOT:
print ("---------------------------------")
print (result)
print ("---------------------------------")
plot(result)
print ("Plotting is done!")