--# -path=.:../abstract:../common:../../prelude --1 Russian auxiliary operations. -- This module contains operations that are needed to make the -- resource syntax work. To define everything that is needed to -- implement $Test$, it moreover contains regular lexical -- patterns needed for $Lex$. resource ResRus = ParamX ** open Prelude in { flags coding=utf8 ; optimize=all ; --2 Enumerated parameter types -- -- These types are the ones found in school grammars. -- Their parameter values are atomic. -- Some parameters, such as $Number$, are inherited from $ParamX$. param Gender = Masc | Fem | Neut ; Case = Nom | Gen | Dat | Acc | Inst | Prepos PrepKind ; PrepKind = PrepOther | PrepVNa; Animacy = Animate | Inanimate ; Voice = Act | Pass ; Aspect = Imperfective | Perfective ; RusTense = Present | PastRus | Future ; -- Degree = Pos | Comp | Super ; AfterPrep = Yes | No ; Possessive = NonPoss | Poss GenNum ; -- Anteriority = Simul | Anter ; ClForm = ClIndic RusTense Anteriority | ClCondit | ClInfinit | ClImper; -- "naked infinitive" clauses -- A number of Russian nouns have common gender. They can -- denote both males and females: "умница" (a clever person), "инженер" (an engineer). -- We overlook this phenomenon for now. -- The AfterPrep parameter is introduced in order to describe -- the variations of the third person personal pronoun forms -- depending on whether they come after a preposition or not. -- Declination forms depend on Case, Animacy , Gender: -- "большие дома" - "больших домов" (big houses - big houses'), -- Animacy plays role only in the Accusative case (Masc Sg and Plural forms): -- Accusative Animate = Genetive, Accusaive Inanimate = Nominative -- "я люблю большие дома-"я люблю больших мужчин" -- (I love big houses - I love big men); -- and on Number: "большой дом - "большие дома" -- (a big house - big houses). -- The plural never makes a gender distinction. GenNum = GSg Gender | GPl ; -- Coercions between the compound gen-num type and gender and number: oper gennum : Gender -> Number -> GenNum = \g,n -> case n of { Sg => GSg g ; Pl => GPl } ; numGenNum : GenNum -> Number = \gn -> case gn of { GSg _ => Sg ; GPl => Pl } ; -- The Possessive parameter is introduced in order to describe -- the possessives of personal pronouns, which are used in the -- Genetive constructions like "моя мама" (my mother) instead of -- "мама моя" (the mother of mine). --2 For $Noun$ -- Nouns decline according to number and case. -- For the sake of shorter description these parameters are -- combined in the type SubstForm. param NForm = NF Number Case ; -- Real parameter types (i.e. ones on which words and phrases depend) -- are mostly hierarchical. The alternative would be cross-products of -- simple parameters, but this would usually overgenerate. -- However, we use the cross-products in complex cases -- (for example, aspect and tense parameter in the verb description) -- where the relationship between the parameters are non-trivial -- even though we aware that some combinations do not exist -- (for example, present perfective does not exist, but removing -- this combination would lead to having different descriptions -- for perfective and imperfective verbs, which we do not want for the -- sake of uniformity). param PronForm = PF Case AfterPrep Possessive; oper Pronoun = { s : PronForm => Str ; n : Number ; p : Person ; g: PronGen ; pron: Bool} ; -- Gender is not morphologically determined for first -- and second person pronouns. param PronGen = PGen Gender | PNoGen ; -- The following coercion is useful: oper pgen2gen : PronGen -> Gender = \p -> case p of { PGen g => g ; PNoGen => Masc ---- variants {Masc ; Fem} --- the best we can do for ya, tu } ; oper extCase: PronForm -> Case = \pf -> case pf of { PF c _ _ => c } ; mkPronForm: Case -> AfterPrep -> Possessive -> PronForm = \c,n,p -> PF c n p ; CommNoun = {s : NForm => Str ; g : Gender ; anim : Animacy } ; NounPhrase : Type = { s : PronForm => Str ; n : Number ; p : Person ; g: PronGen ; anim : Animacy ; pron: Bool} ; mkNP : Number -> CommNoun -> NounPhrase = \n,chelovek -> {s = \\cas => chelovek.s ! NF n (extCase cas) ; n = n ; g = PGen chelovek.g ; p = P3 ; pron =False ; anim = chelovek.anim } ; det2NounPhrase : Adjective -> NounPhrase = \eto -> {s = \\pf => eto.s ! (AF (extCase pf) Inanimate (GSg Neut)); n = Sg ; g = PGen Neut ; pron = False ; p = P3 ; anim = Inanimate } ; pron2NounPhraseNum : Pronoun -> Animacy -> Number -> NounPhrase = \ona, anim, num -> {s = ona.s ; n = num ; g = ona.g ; pron = ona.pron; p = ona.p ; anim = anim } ; -- Agreement of $NP$ is a record. We'll add $Gender$ later. -- oper Agr = {n : Number ; p : Person} ; ----2 For $Verb$ -- Mood is the main verb classification parameter. -- The verb mood can be infinitive, subjunctive, imperative, and indicative. -- Note: subjunctive mood is analytical, i.e. formed from the past form of the -- indicative mood plus the particle "ли". That is why they have the same GenNum -- parameter. We choose to keep the "redundant" form in order to indicate -- the presence of the subjunctive mood in Russian verbs. -- Aspect and Voice parameters are present in every mood, so Voice is put -- before the mood parameter in verb form description the hierachy. -- Moreover Aspect is regarded as an inherent parameter of a verb entry. -- The primary reason for that is that one imperfective form can have several -- perfective forms: "ломать" - "сломать" - "поломать" (to break). -- Besides, the perfective form could be formed from imperfective -- by prefixation, but also by taking a completely different stem: -- "говорить"-"сказать" (to say). In the later case it is even natural to -- regard them as different verb entries. -- Another reason is that looking at the Aspect as an inherent verb parameter -- seem to be customary in other similar projects: -- http://starling.rinet.ru/morph.htm -- Note: Of course, the whole inflection table has many redundancies -- in a sense that many verbs do not have all grammatically possible -- forms. For example, passive does not exist for the verb -- "любить" (to love), but exists for the verb "ломаться" (to break). -- In present tense verbs do not conjugate according to Genus, -- so parameter GenNum instead Number is used for the sake of -- using for example as adjective in predication. -- Depending on the tense verbs conjugate according to combinations -- of gender, person and number of the verb objects. -- Participles (Present and PastRus) and Gerund forms are not included in the -- current description. This is the verb type used in the lexicon: oper Verbum : Type = { s: VerbForm => Str ; asp : Aspect }; param VerbForm = VFORM Voice VerbConj ; VerbConj = VIND GenNum VTense | VIMP Number Person | VINF | VSUB GenNum ; VTense = VPresent Person | VPast | VFuture Person ; oper getVTense : RusTense -> Person -> VTense= \t,p -> case t of { Present => VPresent p ; PastRus => VPast; Future => VFuture p } ; getTense : Tense -> RusTense= \t -> case t of { Pres => Present ; Fut => Future --# notpresent ; _ => PastRus --# notpresent } ; getVoice: VerbForm -> Voice = \vf -> case vf of { VFORM Act _ => Act; VFORM Pass _ => Pass }; oper sebya : Case => Str =table { Nom => ""; Gen => "себя"; Dat=> "себе"; Acc => "себя"; Inst => "собой"; Prep =>"себе"}; Verb : Type = {s : ClForm => GenNum => Person => Str ; asp : Aspect ; w: Voice} ; -- Verb phrases are discontinuous: the parts of a verb phrase are -- (s) an inflected verb, (s2) verb adverbials (not negation though), and -- (s3) complement. This discontinuity is needed in sentence formation -- to account for word order variations. VerbPhrase : Type = Verb ** {s2: Str; s3 : Gender => Number => Str ; negBefore: Bool} ; -- This is one instance of Gazdar's *slash categories*, corresponding to his -- $S/NP$. -- We cannot have - nor would we want to have - a productive slash-category former. -- Perhaps a handful more will be needed. -- -- Notice that the slash category has the same relation to sentences as -- transitive verbs have to verbs: it's like a *sentence taking a complement*. SlashNounPhrase = Clause ** {c2 : Complement} ; Clause = {s : Polarity => ClForm => Str} ; -- This is the traditional $S -> NP VP$ rule. predVerbPhrase : NounPhrase -> VerbPhrase -> SlashNounPhrase = \Ya, tebyaNeVizhu -> { s = \\b,clf => let { ya = Ya.s ! (mkPronForm Nom No NonPoss); khorosho = tebyaNeVizhu.s2; vizhu = tebyaNeVizhu.s ! clf !(gennum (pgen2gen Ya.g) Ya.n)! Ya.p; tebya = tebyaNeVizhu.s3 ! (pgen2gen Ya.g) ! Ya.n } in ya ++ khorosho ++ vizhu ++ tebya; c2 = {s = ""; c = Nom} } ; -- Questions are either direct ("Ты счастлив?") -- or indirect ("Потом он спросил счастлив ли ты"). param QuestForm = DirQ | IndirQ ; ---- The order of sentence is needed already in $VP$. -- -- Order = ODir | OQuest ; oper getActVerbForm : ClForm -> Gender -> Number -> Person -> VerbForm = \clf,g,n, p -> case clf of { ClIndic Future _ => VFORM Act (VIND (gennum g n) (VFuture p)); ClIndic PastRus _ => VFORM Act (VIND (gennum g n) VPast); ClIndic Present _ => VFORM Act (VIND (gennum g n) (VPresent p)); ClCondit => VFORM Act (VSUB (gennum g n)); ClInfinit => VFORM Act VINF ; ClImper => VFORM Act (VIMP n p) }; getPassVerbForm : ClForm -> Gender -> Number -> Person -> VerbForm = \clf,g,n, p -> case clf of { ClIndic Future _ => VFORM Pass (VIND (gennum g n) (VFuture p)); ClIndic PastRus _ => VFORM Pass (VIND (gennum g n) VPast); ClIndic Present _ => VFORM Pass (VIND (gennum g n) (VPresent p)); ClCondit => VFORM Pass (VSUB (gennum g n)); ClInfinit => VFORM Pass VINF ; ClImper => VFORM Pass (VIMP n p) }; --2 For $Adjective$ -- The short form is only inflected in gender and number. -- Fixing this would require changing the Degree type. param AdjForm = AF Case Animacy GenNum | AFShort GenNum | AdvF; oper Complement = {s : Str ; c : Case} ; oper Refl ={s: Case => Str}; oper sam: Refl= {s = table{ Nom => "сам"; Gen => "себя"; Dat => "себе"; Acc => "себя"; Inst => "собой"; Prepos _ => "себе" } }; pgNum : PronGen -> Number -> GenNum = \g,n -> case n of { Sg => GSg (pgen2gen g) ; -- assuming pronoun "I" is a male Pl => GPl } ; -- _ => variants {GSg Masc ; GSg Fem} } ; -- "variants" version cause "no term variants" error during linearization oper genGNum : GenNum -> Gender = \gn -> case gn of { GSg Fem => Fem; GSg Masc => Masc; _ => Neut } ; oper numAF: AdjForm -> Number = \af -> case af of { AdvF => Sg; AFShort gn => numGenNum gn; AF _ _ gn => (numGenNum gn) } ; oper genAF: AdjForm -> Gender = \af -> case af of { AdvF => Neut; AFShort gn => genGNum gn; AF _ _ gn => (genGNum gn) } ; oper caseAF: AdjForm -> Case = \af -> case af of { AdvF => Nom; AFShort _ => Nom; AF c _ _ => c } ; -- The Degree parameter should also be more complex, since most Russian -- adjectives have two comparative forms: -- attributive (syntactic (compound), declinable) - -- "более высокий" (corresponds to "more high") -- and predicative (indeclinable)- "выше" (higher) and more than one -- superlative forms: "самый высокий" (corresponds to "the most high") - -- "высочайший" (the highest). -- Even one more parameter independent of the degree can be added, -- since Russian adjectives in the positive degree also have two forms: -- long (attributive and predicative) - "высокий" (high) and short (predicative) - "высок" -- although this parameter will not be exactly orthogonal to the -- degree parameter. -- Short form has no case declension, so in principle -- it can be considered as an additional case. -- Note: although the predicative usage of the long -- form is perfectly grammatical, it can have a slightly different meaning -- compared to the short form. -- For example: "он - больной" (long, predicative) vs. -- "он - болен" (short, predicative). --3 Adjective phrases -- -- An adjective phrase may contain a complement, e.g. "моложе Риты". -- Then it is used as postfix in modification, e.g. "человек, моложе Риты". IsPostfixAdj = Bool ; -- Simple adjectives are not postfix: -- Adjective type includes both non-degree adjective classes: -- possesive ("мамин"[mother's], "лисий" [fox'es]) -- and relative ("русский" [Russian]) adjectives. Adjective : Type = {s : AdjForm => Str} ; -- A special type of adjectives just having positive forms -- (for semantic reasons) is useful, e.g. "финский". AdjPhrase = Adjective ** {p : IsPostfixAdj} ; mkAdjPhrase : Adjective -> IsPostfixAdj -> AdjPhrase = \novuj ,p -> novuj ** {p = p} ; ----2 For $Relative$ -- -- RAgr = RNoAg | RAg {n : Number ; p : Person} ; -- RCase = RPrep | RC Case ; -- --2 For $Numeral$ param DForm = unit | teen | ten | hund ; param Place = attr | indep ; param Size = nom | sgg | plg ; --param Gend = masc | fem | neut ; oper mille : Size => Str = table { nom => "тысяча" ; sgg => "тысячи" ; _ => "тысяч"} ; oper gg : Str -> Gender => Str = \s -> table {_ => s} ; -- CardOrd = NCard | NOrd ; }