Source code for english

"""An English grammar for chartparse.

This grammar was originally written by Steve Isard at the
University of Sussex. The vocabulary is designed to amuse
undergraduate Experimental Psychology students, hence the
references to pigeons and cages.

The grammar is almost entirely Steve's original. The only changes
are a few words, proper names, and the production:

    NP -> det Nn PP

which was changed to

    NP -> NP PP

The intent is to demonstrate ambiguous grouping of modifiers.

As in the original LIB CHART _[1], features on the categories
are ignored. There are three features used `case`, `num` and
`tr`. Thy could reasonably be handled in this file, via
compilation to a plain CFG, since their purpose is only
to enforce agreement.

References
----------

The original LIB CHART [1]_

.. [1] http://www.poplog.org/gospl/packages/pop11/lib/chart.p

>>> import chart
>>> chart.parse(["the","director",'is','clint', 'eastwood'])
['the', 'director', 'is', 'clint', 'eastwood']
Parse 1:
S
 Np
  det the
  Nn
   n director
 Vp
  cop is
  Pn
   n clint
   Pn
    n eastwood
1 parses

>>> import chart
>>> chart.parse(["show", "me","a","movie","where", "the","director",'is','clint', 'eastwood'],topcat='SImp',sep='_')
['show', 'me', 'a', 'movie', 'where', 'the', 'director', 'is', 'clint', 'eastwood']
Parse 1:
SImp
_Vp
__v show
__Np
___pn me
__Np
___Np
____det a
____Nn
_____n movie
___Relp
____rp where
____S
_____Np
______det the
______Nn
_______n director
_____Vp
______cop is
______Pn
_______n clint
_______Pn
________n eastwood
1 parses



"""

##
# Created 10 March 2014
# author: Chris Brew
# author: Stephen Isard
# license: Apache 2.0
##

from collections import namedtuple
import numpy.random as npr


[docs]class Rule(namedtuple('Rule', ('lhs','rhs'))): """One production of a context-free grammar. Attributes ---------- lhs: string The left hand side of the rule. rhs: list [string] The right hand side of the rule. Examples -------- >>> r = Rule('s',('np','vp')) """ def __repr__(self): return "Rule(lhs='{lhs}', rhs={rhs})".format( lhs=self.lhs, rhs=self.rhs) @property def constraints(self): return None
[docs]class Grammar(object): """ Class for creating grammars from text strings. Parameters ---------- grammar: string the grammar rules, lines of the form `lhs -> rhs (|rhs)*` lexicon: string the words, lines of the form `word category+` Examples -------- >>> g = Grammar(RULES, WORDS) >>> g.grammar[0] Rule(lhs='S', rhs=['Np', 'Vp']) """ def __init__(self, grammar, lexicon, state=None): """ Create a grammar from strings. """ self.state = (npr.RandomState(42) if state is None else state) self.grammar = self.__rulify(grammar) + self.__lexicalize(lexicon) def make_rule(self, lhs): return Rule(lhs=lhs, rhs=rhs) def __remove_balanced_brackets(self, string): r = [] collecting = True for ch in string: if ch == "(": collecting = False elif ch == ")": collecting = True elif collecting: r.append(ch) return "".join(r) def __rulify(self, s): r = [] s = self.__remove_balanced_brackets(s) lines = s.split('\n') for line in lines: lhs, rhs = line.split('->') lhs = lhs.split()[0] elems = rhs.split('|') r += [Rule(lhs=lhs, rhs=elem.split()) for elem in elems] return r def __lexicalize(self, string): string = self.__remove_balanced_brackets(string) lines = string.split("\n") rules = [] for line in lines: a = line.split() w = a[0] r = "".join(a[1:]) elems = r.split('|') for elem in elems: a = elem.split() rules.append(Rule(lhs=a[0], rhs=[w])) return rules
RULES = """S(num) -> Np(num,case:subj) Vp(num) | S conj S S(num) -> Np(num,case:subj) cop(num) ppart S(num) -> Np(num,case:subj) cop(num) ppart passmarker Np(case:obj) SImp -> Vp Relp -> rp S Np(num,case) -> det(num) Nn(num) | Np(num,case) Pp | pn(num,case) | Np(num,case) Relp | Np(case) conj Np(case) Nn(num) -> n(num) | adj n(num) Vp(num) -> v(num,tr:trans) Np(case:obj) | v(num,tr:intrans) | cop(num) adj | cop(num) Pn | v(num,tr:ditrans) Np Np Vp(num) -> Vp(num) Pp Pn -> n | n Pn Pp -> prep Np(case:obj)""" WORDS = """a det(num:sing) and conj are cop(num:pl) ball n(num:sing) big adj bitten ppart blue adj boy n(num:sing) boys n(num:pl) by passmarker | prep cage n(num:sing) | v(num:pl,tr:trans) caged v(tr:trans) | ppart cages n(num:pl) | v(num:sing,tr:trans) chris n(num:sing) clint n(num:sing) computer n(num:sing) computers n(num:pl) director n(num:sing) directors n(num:pl) eastwood n(num:sing) enormous adj fifty det(num:pl) four det(num:pl) girl n(num:sing) girls n(num:pl) green adj he pn(num:sing,case:subj) her pn(num:sing,case:obj) him pn(num:sing,case:obj) hit v(tr:trans) | ppart hits v(tr:trans,num:sing) house n(num:sing) in prep is cop(num:sing) little adj me pn(num:sing) mic pn(num:sing) micro n(num:sing) micros n(num:pl) movie n(num:sing) movies n(num:pl) on prep one n(num:sing) | pn(num:sing) | det(num:sing) ones n(num:pl) pdp11 n(num:sing) pdp11s n(num:pl) pigeon n(num:sing) pigeons n(num:pl) program n(num:sing) | v(num:pl,tr:trans) programmed v(tr:trans) | ppart programs n(num:pl) | v(num:sing,tr:trans) punish v(num:pl,tr:trans) punished v(tr:trans)|ppart punishes v(num:sing,tr:trans) ran v(tr:intrans) rat n(num:sing) rats n(num:pl) red adj reinforce v(num:pl,tr:trans) reinforced v(tr:trans) | ppart reinforces v(num:s,tr:trans) room n(num:sing) rooms n(num:pl) run v(tr:intrans,num:pl) runs v(tr:intrans,num:sing) scientists n(num:pl) she pn(num:sing,case:subj) sheep n show v(tr:ditrans) steve pn(num:sing) stuart pn(num:sing) suffer v(num:pl,tr:intrans) suffered v(tr:intrans) suffers v(num:sing,tr:intrans) that det(num:sing) the det them pn(num:pl,case:obj) these det(num:pl) they pn(num:pl,case:subj) those det(num:pl) three det(num:pl) two det(num:pl) undergraduates n(num:pl) universities n(num:pl) university n(num:sing) was cop(num:sing) were cop(num:pl) when rp(rptype:tmp) where rp(rptype:loc) direct v(tr:trans) wood n(num:sing) would md dye v(tr:trans) or conj rector n(num:sing) east adj""" GRAMMAR = Grammar(RULES, WORDS)