File size: 2,596 Bytes
1d5604f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import sys;

#
# _fix_me_
# maybe use Unicode character classes instead, even if it likely would mean
# many calls to match one-character regular expressions?
#
PUNCTUATION = frozenset(".?!;,:β€œ\"β€β€˜'’()[]{} \t\n\f")
SPACE = frozenset(" \t\n\f")

def intersect(golds, systems, quiet = False):
  golds = {(graph.language(), graph.framework, graph.id): graph
           for graph in golds};
  seen = set();
  for graph in systems:
    language = graph.language();
    key = (language, graph.framework, graph.id);
    if language is None and key not in golds:
      language = "eng";
      key = (language, graph.framework, graph.id);
    if key in seen:
      if not quiet:
        print("score.intersect(): ignoring duplicate {} {} graph #{}"
              .format(language, graph.framework, graph.id),
              file=sys.stderr);
    else:
      seen.add(key);
      gold = golds.get(key);
      if gold is None:
        if not quiet:
          print("score.intersect(): ignoring {} {} graph #{} with no gold graph"
                .format(graph.language(), graph.framework, graph.id),
                file=sys.stderr);
      else:
        yield gold, graph;

  for key in golds.keys() - seen:
    gold = golds[key];
    if not quiet:
      print("score.intersect(): missing system {} {} graph #{}"
            .format(gold.language(), gold.framework, gold.id),
            file=sys.stderr);
    #
    # manufacture an empty graph as the system graph
    #
    from graph import Graph;
    yield gold, Graph(gold.id, flavor = gold.flavor,
                      framework = gold.framework);

def anchor(node):
  result = list();
  if node.anchors is not None:
    for span in node.anchors:
      if "from" in span and "to" in span:
        result.append((span["from"], span["to"]));
  return result;

def explode(string, anchors, trim = PUNCTUATION):
  result = set();
  for anchor in anchors:
    start = end = None;
    if isinstance(anchor, tuple):
      start, end = anchor;
    elif "from" in anchor and "to" in anchor:
      start = anchor["from"]; end = anchor["to"];
    if start is not None and end is not None:
      while start < end and string[start] in trim:
        start += 1;
      while end > start and string[end - 1] in trim:
        end -= 1;
      for i in range(start, end):
        if string[i] not in SPACE:
          result.add(i);
  return frozenset(result);

def fscore(gold, system, correct):
  p = correct / system if system else 0.0;
  r = correct / gold if gold else 0.0;
  f = 2 * p * r / (p + r) if p + r != 0 else 0.0;
  return p, r, f;