hylee719 commited on
Commit
4f1669b
1 Parent(s): 4c48e8e

add math term constants to utils

Browse files
Files changed (1) hide show
  1. utils.py +282 -0
utils.py CHANGED
@@ -13,6 +13,288 @@ punct_chars.sort()
13
  punctuation = ''.join(punct_chars)
14
  replace = re.compile('[%s]' % re.escape(punctuation))
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  def get_num_words(text):
17
  if not isinstance(text, str):
18
  print("%s is not a string" % text)
 
13
  punctuation = ''.join(punct_chars)
14
  replace = re.compile('[%s]' % re.escape(punctuation))
15
 
16
+ MATH_PREFIXES = [
17
+ "sum",
18
+ "arc",
19
+ "mass",
20
+ "digit",
21
+ "graph",
22
+ "liter",
23
+ "gram",
24
+ "add",
25
+ "angle",
26
+ "scale",
27
+ "data",
28
+ "array",
29
+ "ruler",
30
+ "meter",
31
+ "total",
32
+ "unit",
33
+ "prism",
34
+ "median",
35
+ "ratio",
36
+ "area",
37
+ ]
38
+
39
+ MATH_WORDS = [
40
+ "absolute value",
41
+ "area",
42
+ "average",
43
+ "base of",
44
+ "box plot",
45
+ "categorical",
46
+ "coefficient",
47
+ "common factor",
48
+ "common multiple",
49
+ "compose",
50
+ "coordinate",
51
+ "cubed",
52
+ "decompose",
53
+ "dependent variable",
54
+ "distribution",
55
+ "dot plot",
56
+ "double number line diagram",
57
+ "equivalent",
58
+ "equivalent expression",
59
+ "ratio",
60
+ "exponent",
61
+ "frequency",
62
+ "greatest common factor",
63
+ "gcd",
64
+ "height of",
65
+ "histogram",
66
+ "independent variable",
67
+ "interquartile range",
68
+ "iqr",
69
+ "least common multiple",
70
+ "long division",
71
+ "mean absolute deviation",
72
+ "median",
73
+ "negative number",
74
+ "opposite vertex",
75
+ "parallelogram",
76
+ "percent",
77
+ "polygon",
78
+ "polyhedron",
79
+ "positive number",
80
+ "prism",
81
+ "pyramid",
82
+ "quadrant",
83
+ "quadrilateral",
84
+ "quartile",
85
+ "rational number",
86
+ "reciprocal",
87
+ "equality",
88
+ "inequality",
89
+ "squared",
90
+ "statistic",
91
+ "surface area",
92
+ "identity property",
93
+ "addend",
94
+ "unit",
95
+ "number sentence",
96
+ "make ten",
97
+ "take from ten",
98
+ "number bond",
99
+ "total",
100
+ "estimate",
101
+ "hashmark",
102
+ "meter",
103
+ "number line",
104
+ "ruler",
105
+ "centimeter",
106
+ "base ten",
107
+ "expanded form",
108
+ "hundred",
109
+ "thousand",
110
+ "place value",
111
+ "number disk",
112
+ "standard form",
113
+ "unit form",
114
+ "word form",
115
+ "tens place",
116
+ "algorithm",
117
+ "equation",
118
+ "simplif",
119
+ "addition",
120
+ "subtract",
121
+ "array",
122
+ "even number",
123
+ "odd number",
124
+ "repeated addition",
125
+ "tessellat",
126
+ "whole number",
127
+ "number path",
128
+ "rectangle",
129
+ "square",
130
+ "bar graph",
131
+ "data",
132
+ "degree",
133
+ "line plot",
134
+ "picture graph",
135
+ "scale",
136
+ "survey",
137
+ "thermometer",
138
+ "estimat",
139
+ "tape diagram",
140
+ "value",
141
+ "analog",
142
+ "angle",
143
+ "parallel",
144
+ "partition",
145
+ "pentagon",
146
+ "right angle",
147
+ "cube",
148
+ "digital",
149
+ "quarter of",
150
+ "tangram",
151
+ "circle",
152
+ "hexagon",
153
+ "half circle",
154
+ "half-circle",
155
+ "quarter circle",
156
+ "quarter-circle",
157
+ "semicircle",
158
+ "semi-circle",
159
+ "rectang",
160
+ "rhombus",
161
+ "trapezoid",
162
+ "triangle",
163
+ "commutative",
164
+ "equal group",
165
+ "distributive",
166
+ "divide",
167
+ "division",
168
+ "multipl",
169
+ "parentheses",
170
+ "quotient",
171
+ "rotate",
172
+ "unknown",
173
+ "add",
174
+ "capacity",
175
+ "continuous",
176
+ "endpoint",
177
+ "gram",
178
+ "interval",
179
+ "kilogram",
180
+ "volume",
181
+ "liter",
182
+ "milliliter",
183
+ "approximate",
184
+ "area model",
185
+ "square unit",
186
+ "unit square",
187
+ "geometr",
188
+ "equivalent fraction",
189
+ "fraction form",
190
+ "fractional unit",
191
+ "unit fraction",
192
+ "unit interval",
193
+ "measur",
194
+ "graph",
195
+ "scaled graph",
196
+ "diagonal",
197
+ "perimeter",
198
+ "regular polygon",
199
+ "tessellate",
200
+ "tetromino",
201
+ "heptagon",
202
+ "octagon",
203
+ "digit",
204
+ "expression",
205
+ "sum",
206
+ "kilometer",
207
+ "mass",
208
+ "mixed unit",
209
+ "length",
210
+ "measure",
211
+ "simplify",
212
+ "associative",
213
+ "composite",
214
+ "divisible",
215
+ "divisor",
216
+ "partial product",
217
+ "prime number",
218
+ "remainder",
219
+ "acute",
220
+ "arc",
221
+ "collinear",
222
+ "equilateral",
223
+ "intersect",
224
+ "isosceles",
225
+ "symmetry",
226
+ "line segment",
227
+ "line",
228
+ "obtuse",
229
+ "perpendicular",
230
+ "protractor",
231
+ "scalene",
232
+ "straight angle",
233
+ "supplementary angle",
234
+ "vertex",
235
+ "common denominator",
236
+ "denominator",
237
+ "fraction",
238
+ "mixed number",
239
+ "numerator",
240
+ "whole",
241
+ "decimal expanded form",
242
+ "decimal",
243
+ "hundredth",
244
+ "tenth",
245
+ "customary system of measurement",
246
+ "customary unit",
247
+ "gallon",
248
+ "metric",
249
+ "metric unit",
250
+ "ounce",
251
+ "pint",
252
+ "quart",
253
+ "convert",
254
+ "distance",
255
+ "millimeter",
256
+ "thousandth",
257
+ "hundredths",
258
+ "conversion factor",
259
+ "decimal fraction",
260
+ "multiplier",
261
+ "equivalence",
262
+ "multiple",
263
+ "product",
264
+ "benchmark fraction",
265
+ "cup",
266
+ "pound",
267
+ "yard",
268
+ "whole unit",
269
+ "decimal divisor",
270
+ "factors",
271
+ "bisect",
272
+ "cubic units",
273
+ "hierarchy",
274
+ "unit cube",
275
+ "attribute",
276
+ "kite",
277
+ "bisector",
278
+ "solid figure",
279
+ "square units",
280
+ "dimension",
281
+ "axis",
282
+ "ordered pair",
283
+ "angle measure",
284
+ "horizontal",
285
+ "vertical",
286
+ "categorical data",
287
+ "lcm",
288
+ "measure of center",
289
+ "meters per second",
290
+ "numerical",
291
+ "solution",
292
+ "unit price",
293
+ "unit rate",
294
+ "variability",
295
+ "variable",
296
+ ]
297
+
298
  def get_num_words(text):
299
  if not isinstance(text, str):
300
  print("%s is not a string" % text)