Merge branch 'master' of https://github.com/DiscoveryGroup/syntaxmaker

mikahama · Dec 15, 2017 · 6c37430 · 6c37430
2 parents 599ed85 + 6d5b39d
commit 6c37430
Show file tree

Hide file tree

Showing 21 changed files with 113,495 additions and 1,038,635 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 *.*~
 *.pyc
+.idea/*
diff --git a/.idea/.name b/.idea/.name
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/syntaxmaker.iml b/.idea/syntaxmaker.iml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,4 +1,4 @@
-   Copyright 2015 Mika Hämäläinen, University of Helsinki
+   Copyright 2015-2017 Mika Hämäläinen
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/README.md b/README.md
@@ -1,4 +1,20 @@
-# Syntaxmaker
-A python NLG tool for Finnish
+Syntax maker
+=======
+The tool NLG tool for Finnish by [Mika Hämäläinen](https://mikakalevi.com)
 
-For readme, see the [Wiki](https://github.com/DiscoveryGroup/syntaxmaker/wiki) 
+Syntax maker is the natural language generation tool for generating syntactically correct sentences in Finnish automatically. The tool is especially useful in the case of Finnish which has such a high diversity in its morphosyntax. All you need to know are the lemmas and their parts-of-speech and syntax maker will take care of the rest.
+
+For instance, just throw in words `rantaleijona`, `uneksia`, `korkea` and `aalto` and you will get `rantaleijonat uneksivat korkeista aalloista`. So you will get the morphology right automatically! Don't believe me? [Just take a look at this tutorial to find out how.](https://github.com/mikahama/syntaxmaker/wiki/Creating-a-sentence,-the-basics)
+
+
+# Requirements
+1. This tool requires Omorfi, you can download the correct binary version from [http://mikakalevi.com/omorfi](http://mikakalevi.com/omorfi)
+2. HFST `pip install hfst` for more instructions, see my post about [HFST and Python](https://mikalikes.men/using-hfst-on-python/).
+
+# Installing
+You do `pip install syntaxmaker` to install this library.
+After installing it, go to [Creating a sentence, the basics](https://github.com/DiscoveryGroup/syntaxmaker/wiki/Creating-a-sentence,-the-basics) for a quick start guide.
+
+# More information?
+
+Just go ahead and [take a look at the wiki](https://github.com/mikahama/syntaxmaker/wiki) or my [blog post about Syntax maker](https://mikalikes.men/create-finnish-sentences-computationally-in-python-nlg/).
diff --git a/adposition_tool.py b/adposition_tool.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-__author__ = 'mikahama'
+__author__ = 'Mika Hämäläinen'
 import csv
 import random
 import os

diff --git a/backup_of_build_scripts/DESCRIPTION.rst b/backup_of_build_scripts/DESCRIPTION.rst
@@ -1,14 +1,26 @@
-An NLG tool for Finnish
-=======================
+Syntax Maker
+=============
+The tool NLG tool for Finnish by `Mika Hämäläinen <https://mikakalevi.com>`_
 
-syntax_maker is a script that tries to create syntactically correct Finnish sentences based on a hand written grammar and automatically learned information about the language.
+
+Syntax maker is the natural language generation tool for generating syntactically correct sentences in Finnish automatically. The tool is especially useful in the case of Finnish which has such a high diversity in its morphosyntax. All you need to know are the lemmas and their parts-of-speech and syntax maker will take care of the rest.
+
+For instance, just throw in words rantaleijona, uneksia, korkea and aalto and you will get rantaleijonat uneksivat korkeista aalloista. So you will get the morphology right automatically! Don't believe me? `Just take a look at this tutorial to find out how. <https://github.com/mikahama/syntaxmaker/wiki/Creating-a-sentence,-the-basics>`_
+
+**Update:** Python 2 and Python 3 are both now supported!
+
+============
+Installation
+============
 
 **NOTE 1:** This tool requires Omorfi, you can download the correct binary version from http://mikakalevi.com/omorfi
 
-**NOTE 2:** You need to install libhfst and hfst from https://kitwiki.csc.fi/twiki/bin/view/KitWiki/HfstCommandLineTools
+**NOTE 2:** If you have any issues with installing HFST, see `an HSFT tutorial
+<https://mikalikes.men/using-hfst-on-python/>`_.
 
 ===========================
 How to use
 ===========================
 
-Start of by following this tutorial: https://github.com/DiscoveryGroup/syntaxmaker/wiki/Creating-a-sentence,-the-basics
+Start of by following this tutorial: https://github.com/mikahama/syntaxmaker/wiki/Creating-a-sentence,-the-basics . Or you can go ahead and `take a look at the wiki <https://github.com/mikahama/syntaxmaker/wiki>`_
+or my `blog post about Syntax maker <https://mikalikes.men/create-finnish-sentences-computationally-in-python-nlg/>`_
diff --git a/backup_of_build_scripts/MANIFEST.in b/backup_of_build_scripts/MANIFEST.in
@@ -5,6 +5,6 @@ include DESCRIPTION.rst
 
 # If using Python 2.6 or less, then have to include package data, even though
 # it's already declared in setup.py
-include verb_valences_new.bin
+include verb_valences_new.json
 include data/postpositions.csv
-include data/prepositions.csv
+include data/prepositions.csv
diff --git a/backup_of_build_scripts/setup.py b/backup_of_build_scripts/setup.py
@@ -23,16 +23,16 @@
     # Versions should comply with PEP440.  For a discussion on single-sourcing
     # the version across setup.py and the project code, see
     # https://packaging.python.org/en/latest/single_source_version.html
-    version='1.0.2',
+    version='1.1.0',
 
     description='An NLG tool for Finnish',
     long_description=long_description,
 
     # The project's main homepage.
-    url='https://github.com/DiscoveryGroup/syntaxmaker/',
+    url='https://mikakalevi.com/nlp/syntax-maker/',
 
     # Author details
-    author='Mika Hämäläinen, University of Helsinki',
+    author='Mika Hämäläinen, Dept. of  Modern Languages, University of Helsinki',
     author_email='mika.hamalainen@cs.helsinki.fi',
 
     # Choose your license
@@ -51,12 +51,10 @@
         'Topic :: Text Processing',
         "Natural Language :: Finnish",
 
-        # Pick your license as you wish (should match "license" above)
-        'License :: OSI Approved ::  Apache Software License',
-
         # Specify the Python versions you support here. In particular, ensure
         # that you indicate whether you support Python 2, Python 3 or both.
         'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 2.6',
         'Programming Language :: Python :: 2.7',
 
@@ -74,7 +72,7 @@
     # your project is installed. For an analysis of "install_requires" vs pip's
     # requirements files see:
     # https://packaging.python.org/en/latest/requirements.html
-    install_requires=[],
+    install_requires=["hfst"],
 
     # List additional groups of dependencies here (e.g. development
     # dependencies). You can install these using the following syntax,
@@ -86,7 +84,7 @@
     # installed, specify them here.  If using Python 2.6 or less, then these
     # have to be included in MANIFEST.in as well.
     package_data={
-        'syntaxmaker': ['verb_valences_new.bin', 'data/*.csv', '*.json'],
+        'syntaxmaker': ['verb_valences_new.json', 'data/*.csv', '*.json'],
     },
 
     # Although 'package_data' is the preferred approach, in some case you may

diff --git a/head.py b/head.py
@@ -1,3 +1,5 @@
+#encoding: utf-8
+__author__ = 'Mika Hämäläinen'
 import inflector
 
 class Head:

diff --git a/inflector.py b/inflector.py
@@ -1,9 +1,18 @@
 # -*- coding: utf-8 -*-
+__author__ = 'Mika Hämäläinen'
 import hfst
 import os
 import pronoun_tool
-from itertools import ifilterfalse as ffilter
+import sys
 
+if (sys.version_info > (3, 0)):
+    # Python 3
+    new_python = True
+    from itertools import filterfalse as ffilter
+else:
+    # Python 2
+    new_python = False
+    from itertools import ifilterfalse as ffilter
 
 datadir = "/usr/local/share/hfst/fi/"
 if os.name == 'nt':
@@ -25,10 +34,10 @@
 
 def inflect(word, pos, args):
     for el in args:
-        if type(args[el]) is unicode:
+        if not new_python and type(args[el]) is unicode:
             args[el] = args[el].encode('utf-8')
 
-    if type(word) is unicode:
+    if not new_python and type(word) is unicode:
         word = word.encode('utf-8')
     word = word.replace("|", "")
     if len(args) == 0:
@@ -190,7 +199,6 @@ def standard_nominal_inflection(noun, case, number):
     return noun
 
 def new_generator(analysis):
-    print analysis
     results = synthetiser.lookup(analysis)
     if len(results) != 0:
         word = results[0][0]

diff --git a/phrase.py b/phrase.py
@@ -1,10 +1,17 @@
-__author__ = 'mikahama'
+#encoding: utf-8
+__author__ = 'Mika Hämäläinen'
 from head import Head
 import copy
-import re
+import re, sys
 
 class Phrase:
     def __init__(self, head, structure, morphology={}):
+        if (sys.version_info > (3, 0)):
+            # Python 3
+            self.new_python = True
+        else:
+            # Python 2
+            self.new_python = False
         self.parent = None
         self.head = Head(head, structure["head"])
         self.components = copy.deepcopy(structure["components"])
@@ -46,7 +53,7 @@ def to_string(self, received_governance = {}):
                 string_representation = string_representation + " " + head_word
             else:
                 phrase = self.components[item]
-                if type(phrase) is str or type(phrase) is unicode:
+                if type(phrase) is str or (not self.new_python and type(phrase) is unicode):
                     #Data not set
                     pass
                 else:

diff --git a/pronoun_tool.py b/pronoun_tool.py
@@ -1,8 +1,5 @@
 # -*- coding: utf-8 -*-
-__author__ = 'mikahama'
-import pickle
-import random
-import os
+__author__ = 'mika hämäläinen'
 
 
 pronouns = {"SG1" : "minä", "SG2" : "sinä", "SG3" : "se", "PL1" : "me", "PL2": "te", "PL3": "ne"}

diff --git a/syntax_maker.py b/syntax_maker.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+__author__ = 'Mika Hämäläinen'
 import verb_valence
 from phrase import Phrase
 import json
@@ -246,14 +247,14 @@ def create_adposition_phrase(adposition, np):
 set_vp_mood_and_tense(vp, mood="POTN")
 
 turn_vp_into_question(vp)
-print vp.to_string()
+print(vp.to_string())
 
 np = create_phrase("NP", "kissa")
 pp = create_adposition_phrase("ilman", np)
-print pp.to_string()
-"""
+print(pp.to_string())
+
+
 
-"""
 np1 = create_phrase("NP", "mies")
 relp = create_verb_pharse("katsoa")
 ppp = create_phrase("NP", "orava")
@@ -272,5 +273,7 @@ def create_adposition_phrase(adposition, np):
 
 add_advlp_to_vp(vep, pp)
 
-print vep
-"""
+print(vep)
+
+
+"""
diff --git a/verb_valence.py b/verb_valence.py
@@ -1,8 +1,9 @@
 # -*- coding: utf-8 -*-
-import pickle
+__author__ = 'Mika Hämäläinen'
 import os
 import random
 import json
+import codecs
 
 valences = {}
 direct_cases = {"Gen", "Par", "Ela", "Ill"}
@@ -14,8 +15,8 @@
 
 def load_valences_from_bin():
 	global valences
-	valence_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'verb_valences_new.bin')
-	valences = pickle.load(open(valence_path, "rb"))
+	valence_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'verb_valences_new.json')
+	valences = json.load(codecs.open(valence_path, "r", encoding="utf-8"))
 
 
 load_valences_from_bin()
-Original file line number
+Diff line change
@@ -1,2 +1,3 @@
     *.*~
     *.pyc
+    .idea/*