diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..48226e55 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +branch = True + +[report] +show_missing = True +omit = + textile/tests/* \ No newline at end of file diff --git a/.gitignore b/.gitignore index 09c1722b..d4599882 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,9 @@ *~ *.pyo *.egg-info +.cache/ .coverage +.eggs/ .noseids* docs/build docs/coverage @@ -12,6 +14,7 @@ build bin dist eggs +htmlcov parts develop-eggs .DS_Store diff --git a/.travis.yml b/.travis.yml index dec1c730..7f40ce9b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,25 @@ language: python +env: + - REQUIREMENTS=true + - REQUIREMENTS=false python: - "2.6" - "2.7" - "3.2" - "3.3" - "3.4" + - "3.5" - "pypy" # command to install dependencies install: - - pip install -r requirements.txt - - python setup.py -q install - - if [[ ! $TRAVIS_PYTHON_VERSION == pypy ]] ; then pip install regex; fi + - if [[ $REQUIREMENTS == true ]] ; then pip install -r requirements.txt ; fi + - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]] ; then pip install coverage==3.7.1; fi + - pip install coveralls pytest pytest-cov coverage codecov + - pip install -e . + - if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy' ]] ; then pip install regex; fi # command to run tests -script: nosetests +script: py.test +sudo: false +after_success: + - coveralls + - codecov diff --git a/CHANGELOG.textile b/CHANGELOG.textile index a8cafbea..c9466fce 100644 --- a/CHANGELOG.textile +++ b/CHANGELOG.textile @@ -1,19 +1,38 @@ h1. Textile Changelog -h1. Version 2.2.1 +h2. Version 2.3.1 +* Regression bugfix: empty string input returns empty string again. + +h2. Version 2.3.0 + +* Bugfixes: +** Support data URIs in img tags +** Fix autolink urls with image references ("#17":https://github.com/textile/python-textile/issues/17) +** Fix textile links containing parentheses ("#20":https://github.com/textile/python-textile/issues/20) +** Fix double-encoding of code blocks ("#21":https://github.com/textile/python-textile/issues/21) +** Fix handling of scheme in self-linked URLs ("#16":https://github.com/textile/python-textile/issues/16) +** Fix Markup not parsed if followed by certain characters ("#22":Markup not parsed if followed by certain characters) +* Convert testing over to "py.test":http://pytest.org/, improving unicode testing +* Update functionality for tables, notelists, and footnotes. This involved a major reworking of parts of the code, but it should now match php-textile and txstyle.org precisely. Please file an issue for any bugs you come across. + +h2. Version 2.2.2 + +* bugfix: "regex":https://pypi.python.org/pypi/regex is now an optional dependency + +h2. Version 2.2.1 * drop textilefactory support for html. * Various development-related bugfixes. * Added this changelog. -h1. Version 2.2.0 +h2. Version 2.2.0 * Started refactoring the code to be less repetitive. @textile.Textile().parse()@ is a little more friendly than @textile.Textile().textile()@ There may be more work to be done on this front to make the flow a little smoother. * We now support versions 2.6 - 3.4 (including 3.2) using the same codebase. Many thanks to Radek Czajka for this. * Drop support for html4. We now only output xhtml or html5. * Various development-related bugfixes. -h1. Version 2.1.8 +h2. Version 2.1.8 * Add support for html5 output. * Lots of new functionality added bringing us in line with the official Textile 2.4 diff --git a/README.textile b/README.textile index b2cc5372..e645e341 100644 --- a/README.textile +++ b/README.textile @@ -1,14 +1,16 @@ -!https://travis-ci.org/textile/python-textile.svg?branch=develop!:https://travis-ci.org/textile/python-textile +!https://travis-ci.org/textile/python-textile.svg!:https://travis-ci.org/textile/python-textile !https://coveralls.io/repos/github/textile/python-textile/badge.svg!:https://coveralls.io/github/textile/python-textile?branch=master !https://codecov.io/github/textile/python-textile/coverage.svg!:https://codecov.io/github/textile/python-textile h1. python-textile -python-textile is a Python port of Textile, Dean Allen's humane web text generator. +python-textile is a Python port of "Textile":http://txstyle.org/, Dean Allen's humane web text generator. h2. Installation -Install the 'textile' folder on your python path, or @pip install textile@. -Optional dependencies include PIL/Pillow (for checking images size) -and regex (for faster unicode-aware string matching). +@pip install textile@ + +Optional dependencies include: +* "PIL/Pillow":http://python-pillow.github.io/ (for checking images size) +* "regex":https://pypi.python.org/pypi/regex (for faster unicode-aware string matching). h2. Usage diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..882527b9 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +addopts = --cov=textile --cov-report=html --cov-append --cov-report=term-missing \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 42633e26..5cfb4428 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,2 @@ -nose==1.3.4 -coverage==3.7.1 html5lib==0.999 -Pillow==2.6.0 +Pillow==3.0.0 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 5544feea..3a3405b0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,4 @@ -[nosetests] -detailed-errors=1 -with-coverage=1 -cover-package=textile -cover-erase=1 -with-doctest=1 -with-id = 1 +[aliases] +test=pytest [bdist_wheel] universal=1 diff --git a/setup.py b/setup.py index 386c7744..897f07dd 100644 --- a/setup.py +++ b/setup.py @@ -2,14 +2,6 @@ import os import sys -install_requires = [] - - -if 'develop' in sys.argv: - install_requires.extend([ - 'tox', - ]) - def get_version(): basedir = os.path.dirname(__file__) with open(os.path.join(basedir, 'textile/version.py')) as f: @@ -32,14 +24,24 @@ def get_version(): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Topic :: Software Development :: Libraries :: Python Modules', + 'Programming Language :: Python', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.6', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', ], - keywords='textile,text', - install_requires=install_requires, + keywords='textile,text,html markup', + install_requires=['six',], extras_require={ ':python_version=="2.6"': ['ordereddict>=1.1'], + 'develop': ['regex', 'pytest', 'pytest-cov'], }, - test_suite='nose.collector', - tests_require=['nose'], + setup_requires=['pytest-runner'], + tests_require=['pytest', 'pytest-cov'], include_package_data=True, zip_safe=False, ) diff --git a/tests/test_attributes.py b/tests/test_attributes.py new file mode 100644 index 00000000..70da8422 --- /dev/null +++ b/tests/test_attributes.py @@ -0,0 +1,15 @@ +from textile.utils import parse_attributes +import re + +def test_parse_attributes(): + assert parse_attributes('\\1', element='td') == {'colspan': '1'} + assert parse_attributes('/1', element='td') == {'rowspan': '1'} + assert parse_attributes('^', element='td') == {'style': 'vertical-align:top;'} + assert parse_attributes('{color: blue}') == {'style': 'color: blue;'} + assert parse_attributes('[en]') == {'lang': 'en'} + assert parse_attributes('(cssclass)') == {'class': 'cssclass'} + assert parse_attributes('(') == {'style': 'padding-left:1em;'} + assert parse_attributes(')') == {'style': 'padding-right:1em;'} + assert parse_attributes('<') == {'style': 'text-align:left;'} + assert parse_attributes('(c#i)') == {'class': 'c', 'id': 'i'} + assert parse_attributes('\\2 100', element='col') == {'span': '2', 'width': '100'} diff --git a/tests/test_block.py b/tests/test_block.py new file mode 100644 index 00000000..16873006 --- /dev/null +++ b/tests/test_block.py @@ -0,0 +1,49 @@ +from __future__ import unicode_literals + +from textile import Textile +from textile.objects import Block + +try: + from collections import OrderedDict +except ImportError: + from ordereddict import OrderedDict + +def test_block(): + t = Textile() + result = t.block('h1. foobar baby') + expect = '\t

foobar baby

' + assert result == expect + + b = Block(t, "bq", "", None, "", "Hello BlockQuote") + expect = ('blockquote', OrderedDict(), 'p', OrderedDict(), + 'Hello BlockQuote') + result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content) + assert result == expect + + b = Block(t, "bq", "", None, "http://google.com", "Hello BlockQuote") + citation = '{0}1:url'.format(t.uid) + expect = ('blockquote', OrderedDict([('cite', + '{0.uid}{0.refIndex}:url'.format(t))]), 'p', OrderedDict(), + 'Hello BlockQuote') + result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content) + assert result == expect + + b = Block(t, "bc", "", None, "", 'printf "Hello, World";') + # the content of text will be turned shelved, so we'll asert only the + # deterministic portions of the expected values, below + expect = ('pre', OrderedDict(), 'code', OrderedDict()) + result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts) + assert result == expect + + b = Block(t, "h1", "", None, "", "foobar") + expect = ('h1', OrderedDict(), '', OrderedDict(), 'foobar') + result = (b.outer_tag, b.outer_atts, b.inner_tag, b.inner_atts, b.content) + assert result == expect + +def test_block_tags_false(): + t = Textile(block_tags=False) + assert t.block_tags is False + + result = t.parse('test') + expect = 'test' + assert result == expect diff --git a/tests/test_footnoteRef.py b/tests/test_footnoteRef.py new file mode 100644 index 00000000..b773ad2f --- /dev/null +++ b/tests/test_footnoteRef.py @@ -0,0 +1,8 @@ +from textile import Textile +import re + +def test_footnoteRef(): + t = Textile() + result = t.footnoteRef('foo[1]') + expect = 'foo1'.format(t.linkPrefix) + assert expect == result diff --git a/tests/test_getRefs.py b/tests/test_getRefs.py new file mode 100644 index 00000000..f6e0ae4f --- /dev/null +++ b/tests/test_getRefs.py @@ -0,0 +1,11 @@ +from textile import Textile + +def test_getRefs(): + t = Textile() + result = t.getRefs("some text [Google]http://www.google.com") + expect = 'some text ' + assert result == expect + + result = t.urlrefs + expect = {'Google': 'http://www.google.com'} + assert result == expect diff --git a/tests/test_getimagesize.py b/tests/test_getimagesize.py new file mode 100644 index 00000000..43f85e3a --- /dev/null +++ b/tests/test_getimagesize.py @@ -0,0 +1,9 @@ +from textile.tools.imagesize import getimagesize +import pytest + +PIL = pytest.importorskip('PIL') + +def test_imagesize(): + assert getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif") == (276, 110) + assert getimagesize("http://bad.domain/") == '' + assert getimagesize("http://www.google.com/robots.txt") is None diff --git a/tests/test_github_issues.py b/tests/test_github_issues.py new file mode 100644 index 00000000..29fc1b04 --- /dev/null +++ b/tests/test_github_issues.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import textile + +def test_github_issue_16(): + result = textile.textile('"$":http://google.com "$":https://google.com "$":mailto:blackhole@sun.comet') + expect = '\t

google.com google.com blackhole@sun.comet

' + assert result == expect + +def test_github_issue_17(): + result = textile.textile('!http://www.ox.ac.uk/favicon.ico!') + expect = '\t

' + assert result == expect + +def test_github_issue_20(): + text = 'This is a link to a ["Wikipedia article about Textile":http://en.wikipedia.org/wiki/Textile_(markup_language)].' + result = textile.textile(text) + expect = '\t

This is a link to a Wikipedia article about Textile.

' + assert result == expect + +def test_github_issue_21(): + text = '''h1. xml example + +bc. + + bar +''' + result = textile.textile(text) + expect = '\t

xml example

\n\n
\n<foo>\n  bar\n</foo>\n
' + assert result == expect + +def test_github_issue_22(): + text = '''_(artist-name)Ty Segall_’s''' + result = textile.textile(text) + expect = '\t

Ty Segall’s

' + assert result == expect + +def test_github_issue_26(): + text = '' + result = textile.textile(text) + expect = '' + assert result == expect diff --git a/tests/test_glyphs.py b/tests/test_glyphs.py new file mode 100644 index 00000000..fcf2636d --- /dev/null +++ b/tests/test_glyphs.py @@ -0,0 +1,32 @@ +from textile import Textile + +def test_glyphs(): + t = Textile() + + result = t.glyphs("apostrophe's") + expect = 'apostrophe’s' + assert result == expect + + result = t.glyphs("back in '88") + expect = 'back in ’88' + assert result == expect + + result = t.glyphs('foo ...') + expect = 'foo …' + assert result == expect + + result = t.glyphs('--') + expect = '—' + assert result == expect + + result = t.glyphs('FooBar[tm]') + expect = 'FooBar™' + assert result == expect + + result = t.glyphs("

Cat's Cradle by Vonnegut

") + expect = '

Cat’s Cradle by Vonnegut

' + assert result == expect + + result = t.glyphs('test"') + expect = 'test” ' + assert result == expect diff --git a/tests/test_image.py b/tests/test_image.py new file mode 100644 index 00000000..aad39e29 --- /dev/null +++ b/tests/test_image.py @@ -0,0 +1,21 @@ +from textile import Textile + +def test_image(): + t = Textile() + result = t.image('!/imgs/myphoto.jpg!:http://jsamsa.com') + expect = (''.format( + t.uid)) + assert result == expect + assert t.refCache[1] == 'http://jsamsa.com' + assert t.refCache[2] == '/imgs/myphoto.jpg' + + result = t.image('!'.format(t.uid)) + assert result == expect diff --git a/tests/test_imagesize.py b/tests/test_imagesize.py new file mode 100644 index 00000000..112989e1 --- /dev/null +++ b/tests/test_imagesize.py @@ -0,0 +1,13 @@ +import textile + +def test_imagesize(): + imgurl = 'http://www.google.com/intl/en_ALL/images/srpr/logo1w.png' + result = textile.tools.imagesize.getimagesize(imgurl) + try: + import PIL + + expect = (275, 95) + assert result == expect + except ImportError: + expect = '' + assert result == expect diff --git a/tests/test_lists.py b/tests/test_lists.py new file mode 100644 index 00000000..4e85f4c8 --- /dev/null +++ b/tests/test_lists.py @@ -0,0 +1,7 @@ +from textile import Textile + +def test_lists(): + t = Textile() + result = t.textileLists("* one\n* two\n* three") + expect = '\t' + assert result == expect diff --git a/tests/test_retrieve.py b/tests/test_retrieve.py new file mode 100644 index 00000000..10bd1733 --- /dev/null +++ b/tests/test_retrieve.py @@ -0,0 +1,6 @@ +from textile import Textile + +def test_retrieve(): + t = Textile() + id = t.shelve("foobar") + assert t.retrieve(id) == 'foobar' diff --git a/tests/test_span.py b/tests/test_span.py new file mode 100644 index 00000000..d83530dd --- /dev/null +++ b/tests/test_span.py @@ -0,0 +1,19 @@ +from textile import Textile + +def test_span(): + t = Textile() + result = t.span("hello %(bob)span *strong* and **bold**% goodbye") + expect = ('hello span strong and ' + 'bold goodbye') + assert result == expect + + result = t.span('%:http://domain.tld test%') + expect = 'test' + assert result == expect + + t = Textile() + # cover the partial branch where we exceed the max_span_depth. + t.max_span_depth = 2 + result = t.span('_-*test*-_') + expect = '*test*' + assert result == expect diff --git a/tests/test_subclassing.py b/tests/test_subclassing.py new file mode 100644 index 00000000..9235e032 --- /dev/null +++ b/tests/test_subclassing.py @@ -0,0 +1,17 @@ +import textile + +def test_change_glyphs(): + class TextilePL(textile.Textile): + glyph_definitions = dict(textile.Textile.glyph_definitions, + quote_double_open = '„' + ) + + test = 'Test "quotes".' + expect = '\t

Test „quotes”.

' + result = TextilePL().parse(test) + assert expect == result + + # Base Textile is unchanged. + expect = '\t

Test “quotes”.

' + result = textile.textile(test) + assert expect == result diff --git a/tests/test_table.py b/tests/test_table.py new file mode 100644 index 00000000..0a3cb0d6 --- /dev/null +++ b/tests/test_table.py @@ -0,0 +1,12 @@ +from textile import Textile + +def test_table(): + t = Textile() + result = t.table('(rowclass). |one|two|three|\n|a|b|c|') + expect = '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
\n\n' + assert result == expect + + t = Textile(lite=True) + result = t.table('(lite). |one|two|three|\n|a|b|c|\n| * test\n* test|1|2|') + expect = '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
* test\n* test12
\n\n' + assert result == expect diff --git a/tests/test_textile.py b/tests/test_textile.py new file mode 100644 index 00000000..dd069fb8 --- /dev/null +++ b/tests/test_textile.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +import pytest +import re +import textile + +def test_FootnoteReference(): + html = textile.textile('YACC[1]') + assert re.search(r'^\t

YACC1

', html) is not None + +def test_Footnote(): + html = textile.textile('This is covered elsewhere[1].\n\nfn1. Down here, in fact.\n\nfn2. Here is another footnote.') + assert re.search(r'^\t

This is covered elsewhere1.

\n\n\t

1 Down here, in fact.

\n\n\t

2 Here is another footnote.

$', html) is not None + + html = textile.textile('''See[1] for details -- or perhaps[100] at a push.\n\nfn1. Here are the details.\n\nfn100(footy#otherid). A totally unrelated footnote.''') + assert re.search(r'^\t

See1 for details — or perhaps100 at a push.

\n\n\t

1 Here are the details.

\n\n\t

100 A totally unrelated footnote.

$', html) is not None + + html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''') + assert re.search(r'^\t

See2 for details, and later, reference it again2.

\n\n\t

2 Here are the details.

$', html) is not None + + html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''') + assert re.search(r'^\t

See3 for details.

\n\n\t

3 Here are the details.

$', html) is not None + + html = textile.textile('''See[4!] for details.\n\nfn4^. Here are the details.''') + assert re.search(r'^\t

See4 for details.

\n\n\t

4 Here are the details.

$', html) is not None + +def test_issue_35(): + result = textile.textile('"z"') + expect = '\t

“z”

' + assert result == expect + + result = textile.textile('" z"') + expect = '\t

“ z”

' + assert result == expect + +def test_restricted(): + #Note that the HTML is escaped, thus rendering the " + result = textile.textile_restricted(test) + expect = "\t

Here is some text.
\n<script>alert(‘hello world’)</script>

" + + assert result == expect + + test = "Here's some text." + result = textile.textile_restricted(test) + expect = "\t

Here’s some <!— commented out —> text.

" + + assert result == expect + + test = "p[fr]. Partir, c'est toujours mourir un peu." + result = textile.textile_restricted(test) + expect = '\t

Partir, c’est toujours mourir un peu.

' + + assert result == expect + +def test_unicode_footnote(): + html = textile.textile('текст[1]') + assert re.compile(r'^\t

текст1

$', re.U).search(html) is not None + +def test_autolinking(): + test = """some text "test":http://www.google.com http://www.google.com "$":http://www.google.com""" + result = """\t

some text test http://www.google.com www.google.com

""" + expect = textile.textile(test) + + assert result == expect + +def test_sanitize(): + test = "a paragraph of benign text" + result = "\t

a paragraph of benign text

" + try: + expect = textile.Textile().parse(test, sanitize=True) + assert result == expect + + test = """

a paragraph of evil text

""" + result = '

a paragraph of evil text

' + expect = textile.Textile().parse(test, sanitize=True) + assert result == expect + + test = """

a paragraph of benign text
and more text

""" + result = '

a paragraph of benign text
\nand more text

' + expect = textile.Textile(html_type='html5').parse(test, sanitize=True) + assert result == expect + except Exception as e: + message = '{0}'.format(e) + assert "html5lib not available" in message + +def test_imagesize(): + PIL = pytest.importorskip('PIL') + + test = "!http://www.google.com/intl/en_ALL/images/srpr/logo1w.png!" + result = '\t

' + expect = textile.Textile(get_sizes=True).parse(test) + assert result == expect + +def test_endnotes_simple(): + test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.\n\nnote#my_first_label Over the past billion years, about a quarter of the moon's 4.5 billion-year lifespan, it has shrunk about 200 meters (700 feet) in diameter.""" + html = textile.textile(test) + result_pattern = r"""\t

Scientists say the moon is slowly shrinking1.

\n\n\t
    \n\t\t
  1. Over the past billion years, about a quarter of the moon’s 4.5 billion-year lifespan, it has shrunk about 200 meters \(700 feet\) in diameter.
  2. \n\t
$""" + result_re = re.compile(result_pattern) + assert result_re.search(html) is not None + +def test_endnotes_complex(): + test = """Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality[#netneutral] and has expressed the view that ISPs should supply "connectivity with no strings attached"[#netneutral!] [#tbl_quote]\n\nBerners-Lee admitted that the forward slashes ("//") in a web address were actually unnecessary. He told the newspaper that he could easily have designed URLs not to have the forward slashes. "... it seemed like a good idea at the time,"[#slashes]\n\nnote#netneutral. "Web creator rejects net tracking":http://news.bbc.co.uk/2/hi/technology/7613201.stm. BBC. 15 September 2008\n\nnote#tbl_quote. "Web inventor's warning on spy software":http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html. The Daily Telegraph (London). 25 May 2008\n\nnote#slashes. "Berners-Lee 'sorry' for slashes":http://news.bbc.co.uk/1/hi/technology/8306631.stm. BBC. 14 October 2009\n\nnotelist.""" + html = textile.textile(test) + result_pattern = r"""\t

Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality1 and has expressed the view that ISPs should supply “connectivity with no strings attached”1 2

\n\n\t

Berners-Lee admitted that the forward slashes \(“//”\) in a web address were actually unnecessary. He told the newspaper that he could easily have designed URLs not to have the forward slashes. “… it seemed like a good idea at the time,”3

\n\n\t
    \n\t\t
  1. a b Web creator rejects net tracking. BBC. 15 September 2008
  2. \n\t\t
  3. a Web inventor’s warning on spy software. The Daily Telegraph \(London\). 25 May 2008
  4. \n\t\t
  5. a Berners-Lee ‘sorry’ for slashes. BBC. 14 October 2009
  6. \n\t
$""" + result_re = re.compile(result_pattern) + assert result_re.search(html) is not None + +def test_endnotes_unreferenced_note(): + test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#lavader(noteclass). "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman(#noteid). "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13. After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:§^.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:‡""" + html = textile.textile(test) + result_pattern = r"""\t

Scientists say1 the moon is quite small. But I, for one, don’t believe them. Others claim it to be made of cheese2. If this proves true I suspect we are in for troubled times3 as people argue over their “share” of the moon’s cheese. In the end, its limited size1 may prove problematic.

\n\n\t
    \n\t\t
  1. a b Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. a Proof of a cheese moon
  4. \n\t\t
  5. a After all, things do go wrong.
  6. \n\t
\n\n\t
    \n\t\t
  1. § Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. § Proof of a cheese moon
  4. \n\t\t
  5. § After all, things do go wrong.
  6. \n\t
\n\n\t
    \n\t\t
  1. Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. Proof of a cheese moon
  4. \n\t\t
  5. After all, things do go wrong.
  6. \n\t
""" + result_re = re.compile(result_pattern, re.U) + assert result_re.search(html) is not None + +def test_endnotes_malformed(): + test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13!] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#unused An unreferenced note.\n\nnote#lavader^ "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman^ "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13^ After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:α!+""" + html = textile.textile(test) + result_pattern = r"""^\t

Scientists say1 the moon is quite small. But I, for one, don’t believe them. Others claim it to be made of cheese2. If this proves true I suspect we are in for troubled times3 as people argue over their “share” of the moon’s cheese. In the end, its limited size1 may prove problematic.

\n\n\t
    \n\t\t
  1. α Proof of the small moon hypothesis. Copyright© Laurent Laveder
  2. \n\t\t
  3. α Proof of a cheese moon
  4. \n\t\t
  5. α After all, things do go wrong.
  6. \n\t\t
  7. An unreferenced note.
  8. \n\t
$""" + result_re = re.compile(result_pattern, re.U) + assert result_re.search(html) is not None + +def test_endnotes_undefined_note(): + test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.""" + html = textile.textile(test) + result_pattern = r"""\t

Scientists say the moon is slowly shrinking1.

\n\n\t
    \n\t\t
  1. Undefined Note \[#my_first_label\].
  2. \n\t
$""" + result_re = re.compile(result_pattern) + assert result_re.search(html) is not None + +def test_encode_url(): + # I tried adding these as doctests, but the unicode tests weren't + # returning the correct results. + t = textile.Textile() + + url = 'http://www.example.local' + result = 'http://www.example.local' + eurl = t.encode_url(url) + assert eurl == result + + url = 'http://user@www.example.local' + result = 'http://user@www.example.local' + eurl = t.encode_url(url) + assert eurl == result + + url = 'http://user:password@www.example.local' + result = 'http://user:password@www.example.local' + eurl = t.encode_url(url) + assert eurl == result + + url = 'http://user:password@www.example.local/Ubermensch' + result = 'http://user:password@www.example.local/Ubermensch' + eurl = t.encode_url(url) + assert eurl == result + + url = "http://user:password@www.example.local/Übermensch" + result = "http://user:password@www.example.local/%C3%9Cbermensch" + eurl = t.encode_url(url) + assert eurl == result + + url = 'http://user:password@www.example.local:8080/Übermensch' + result = 'http://user:password@www.example.local:8080/%C3%9Cbermensch' + eurl = t.encode_url(url) + assert eurl == result + +def test_footnote_crosslink(): + html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''') + searchstring = r'\t

See2 for details, and later, reference it again2.

\n\n\t

2 Here are the details.

$' + assert re.compile(searchstring).search(html) is not None + +def test_footnote_without_reflink(): + html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''') + searchstring = r'^\t

See3 for details.

\n\n\t

3 Here are the details.

$' + assert re.compile(searchstring).search(html) is not None + +def testSquareBrackets(): + html = textile.textile("""1[^st^], 2[^nd^], 3[^rd^]. 2 log[~n~]\n\nA close[!http://textpattern.com/favicon.ico!]image.\nA tight["text":http://textpattern.com/]link.\nA ["footnoted link":http://textpattern.com/][182].""") + searchstring = r'^\t

1st, 2nd, 3rd. 2 logn

\n\n\t

A closeimage.
\nA tighttextlink.
\nA footnoted link182.

' + assert re.compile(searchstring).search(html) is not None + +def test_html5(): + """docstring for testHTML5""" + + test = 'We use CSS(Cascading Style Sheets).' + result = '\t

We use CSS.

' + expect = textile.textile(test, html_type="html5") + assert result == expect + +def test_relURL(): + t = textile.Textile() + t.restricted = True + assert t.relURL("gopher://gopher.com/") == '#' diff --git a/tests/test_textilefactory.py b/tests/test_textilefactory.py new file mode 100644 index 00000000..846b9275 --- /dev/null +++ b/tests/test_textilefactory.py @@ -0,0 +1,28 @@ +from textile import textilefactory +import pytest + +def test_TextileFactory(): + f = textilefactory.TextileFactory() + result = f.process("some text here") + expect = '\t

some text here

' + assert result == expect + + f = textilefactory.TextileFactory(restricted=True) + result = f.process("more text here") + expect = '\t

more text here

' + assert result == expect + + f = textilefactory.TextileFactory(noimage=True) + result = f.process("this covers a partial branch.") + expect = '\t

this covers a partial branch.

' + assert result == expect + + # Certain parameter values are not permitted because they are illogical: + + with pytest.raises(ValueError) as ve: + f = textilefactory.TextileFactory(lite=True) + assert 'lite can only be enabled in restricted mode' in str(ve.value) + + with pytest.raises(ValueError) as ve: + f = textilefactory.TextileFactory(html_type='invalid') + assert "html_type must be 'xhtml' or 'html5'" in str(ve.value) diff --git a/tests/test_urls.py b/tests/test_urls.py new file mode 100644 index 00000000..69bae4f5 --- /dev/null +++ b/tests/test_urls.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +from textile import Textile +import re + +def test_urls(): + t = Textile() + assert t.relURL("http://www.google.com/") == 'http://www.google.com/' + + result = t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') + expect = 'fooobar {0}2:shelve and hello world {0}4:shelve '.format(t.uid) + assert result == expect + + result = t.links('""Open the door, HAL!"":https://xkcd.com/375/') + expect = '{0}6:shelve'.format(t.uid) + assert result == expect + + result = t.links('"$":http://domain.tld/test_[brackets]') + expect = '{0}8:shelve'.format(t.uid) + assert result == expect + + result = t.links('"$":http://domain.tld/test_') + expect = '{0}10:shelve'.format(t.uid) + assert result == expect + + expect = '"":test' + result = t.links(expect) + assert result == expect + + expect = '"$":htt://domain.tld' + result = t.links(expect) + assert result == expect + + result = t.shelveURL('') + expect = '' + assert result == expect + + result = t.retrieveURLs('{0}2:url'.format(t.uid)) + expect = '' + assert result == expect + + result = t.encode_url('http://domain.tld/übermensch') + expect = 'http://domain.tld/%C3%BCbermensch' + assert result == expect + + result = t.parse('A link that starts with an h is "handled":/test/ incorrectly.') + expect = '\t

A link that starts with an h is handled incorrectly.

' + assert result == expect + +def test_rel_attribute(): + t = Textile(rel='nofollow') + result = t.parse('"$":http://domain.tld') + expect = '\t

domain.tld

' + assert result == expect diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 00000000..9acb3dc8 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from textile import utils + +def test_encode_html(): + result = utils.encode_html('''this is a "test" of text that's safe to ''' + 'put in an attribute.') + expect = ('this is a "test" of text that's safe to put in an ' + '<html> attribute.') + assert result == expect + +def test_has_raw_text(): + assert utils.has_raw_text('

foo bar biz baz

') is False + assert utils.has_raw_text(' why yes, yes it does') is True + +def test_is_rel_url(): + assert utils.is_rel_url("http://www.google.com/") is False + assert utils.is_rel_url("/foo") is True + +def test_generate_tag(): + result = utils.generate_tag('span', 'inner text', {'class': 'test'}) + expect = 'inner text' + assert result == expect + + text = 'Übermensch' + attributes = {'href': 'http://de.wikipedia.org/wiki/%C3%C9bermensch'} + expect = 'Übermensch' + result = utils.generate_tag('a', text, attributes) + assert result == expect diff --git a/tests/test_values.py b/tests/test_values.py new file mode 100644 index 00000000..4a6fe606 --- /dev/null +++ b/tests/test_values.py @@ -0,0 +1,322 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals +import textile +import pytest + +xhtml_known_values = ( + ('hello, world', '\t

hello, world

'), + + ('A single paragraph.\n\nFollowed by another.', + '\t

A single paragraph.

\n\n\t

Followed by another.

'), + + ('I am very serious.\n\n
\nI am very serious.\n
', + '\t

I am very serious.

\n\n
\nI am <b>very</b> serious.\n
'), + + ('I spoke.\nAnd none replied.', '\t

I spoke.
\nAnd none replied.

'), + + ('"Observe!"', '\t

“Observe!”

'), + + ('Observe -- very nice!', '\t

Observe — very nice!

'), + + ('Observe - tiny and brief.', '\t

Observe – tiny and brief.

'), + + ('Observe...', '\t

Observe…

'), + + ('Observe ...', '\t

Observe …

'), + + ('Observe: 2 x 2.', '\t

Observe: 2 × 2.

'), + + ('one(TM), two(R), three(C).', '\t

one™, two®, three©.

'), + + ('h1. Header 1', '\t

Header 1

'), + + ('h2. Header 2', '\t

Header 2

'), + + ('h3. Header 3', '\t

Header 3

'), + + ('An old text\n\nbq. A block quotation.\n\nAny old text''', + '\t

An old text

\n\n\t
\n\t\t

A block quotation.

\n\t
\n\n\t

Any old text

'), + + ('I _believe_ every word.', '\t

I believe every word.

'), + + ('And then? She *fell*!', '\t

And then? She fell!

'), + + ('I __know__.\nI **really** __know__.', '\t

I know.
\nI really know.

'), + + ("??Cat's Cradle?? by Vonnegut", '\t

Cat’s Cradle by Vonnegut

'), + + ('Convert with @str(foo)@', '\t

Convert with str(foo)

'), + + ('I\'m -sure- not sure.', '\t

I’m sure not sure.

'), + + ('You are a +pleasant+ child.', '\t

You are a pleasant child.

'), + + ('a ^2^ + b ^2^ = c ^2^', '\t

a 2 + b 2 = c 2

'), + + ('log ~2~ x', '\t

log 2 x

'), + + ('I\'m %unaware% of most soft drinks.', '\t

I’m unaware of most soft drinks.

'), + + ("I'm %{color:red}unaware%\nof most soft drinks.", '\t

I’m unaware
\nof most soft drinks.

'), + + ('p(example1). An example', '\t

An example

'), + + ('p(#big-red). Red here', '\t

Red here

'), + + ('p(example1#big-red2). Red here', '\t

Red here

'), + + ('p{color:blue;margin:30px}. Spacey blue', '\t

Spacey blue

'), + + ('p[fr]. rouge', '\t

rouge

'), + + ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.', + '\t

I seriously blushed
\nwhen I sprouted' + ' that
\ncorn stalk from my
\ncabeza.

'), + + ('p<. align left', '\t

align left

'), + + ('p>. align right', '\t

align right

'), + + ('p=. centered', '\t

centered

'), + + ('p<>. justified', '\t

justified

'), + + ('p(. left ident 1em', '\t

left ident 1em

'), + + ('p((. left ident 2em', '\t

left ident 2em

'), + + ('p))). right ident 3em', '\t

right ident 3em

'), + + ('h2()>. Bingo.', '\t

Bingo.

'), + + ('h3()>[no]{color:red}. Bingo', '\t

Bingo

'), + + ('
\n\na.gsub!( /\n
', + '
\n\na.gsub!( /</, "" )\n\n
'), + + ('
\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n
\n\n' + 'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.', + '\t

\n\n\t

Sidebar

\n\n\t

Hobix
\n' + 'Ruby

\n\n\t

\n\n\t

The main text of the
\n' + 'page goes here and will
\nstay to the left of the
\nsidebar.

'), + + ('# A first item\n# A second item\n# A third', + '\t
    \n\t\t
  1. A first item
  2. \n\t\t
  3. A second item
  4. \n\t\t
  5. A third
  6. \n\t
'), + + ('# Fuel could be:\n## Coal\n## Gasoline\n## Electricity\n# Humans need only:\n## Water\n## Protein', + '\t
    \n\t\t
  1. Fuel could be:\n\t\t
      \n\t\t\t
    1. Coal
    2. \n\t\t\t
    3. Gasoline
    4. \n\t\t\t
    5. Electricity
    6. \n\t\t
  2. \n\t\t
  3. Humans need only:\n\t\t
      \n\t\t\t
    1. Water
    2. \n\t\t\t
    3. Protein
    4. \n\t\t
  4. \n\t\t
'), + + ('* A first item\n* A second item\n* A third', + '\t'), + + ('* Fuel could be:\n** Coal\n** Gasoline\n** Electricity\n* Humans need only:\n** Water\n** Protein', + '\t'), + + ('I searched "Google":http://google.com.', '\t

I searched Google.

'), + + ('I searched "a search engine (Google)":http://google.com.', '\t

I searched a search engine.

'), + + ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com', + '\t

I am crazy about Hobix
\nand it’s ' + 'all I ever
\nlink to!

'), + + ('!http://hobix.com/sample.jpg!', '\t

'), + + ('!openwindow1.gif(Bunny.)!', '\t

Bunny.

'), + + ('!openwindow1.gif!:http://hobix.com/', '\t

'), + + ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.', + '\t

\n\n\t' + '

And others sat all round the small
\nmachine and paid it to sing to them.

'), + + ('We use CSS(Cascading Style Sheets).', '\t

We use CSS.

'), + + ('|one|two|three|\n|a|b|c|', + '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
'), + + ('| name | age | sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |', + '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
name age sex
joan 24 f
archie 29 m
bella 45 f
'), + + ('|_. name |_. age |_. sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |', + '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t' + '\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
name age sex
joan 24 f
archie 29 m
bella 45 f
'), + + ('', '\t

'), + + ('pre.. Hello\n\nHello Again\n\np. normal text', '
Hello\n\nHello Again\n
\n\n\t

normal text

'), + + ('
this is in a pre tag
', '
this is in a pre tag
'), + + ('"test1":http://foo.com/bar--baz\n\n"test2":http://foo.com/bar---baz\n\n"test3":http://foo.com/bar-17-18-baz', + '\t

test1

\n\n\t' + '

test2

\n\n\t' + '

test3

'), + + ('"foo ==(bar)==":#foobar', '\t

foo (bar)

'), + + ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!', + '\t

'), + + ('* Point one\n* Point two\n## Step 1\n## Step 2\n## Step 3\n* Point three\n** Sub point 1\n** Sub point 2', + '\t'), + + ('@array[4] = 8@', '\t

array[4] = 8

'), + + ('#{color:blue} one\n# two\n# three', + '\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
'), + + ('Links (like "this":http://foo.com), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.', + '\t

Links (like this), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.

'), + + ('@monospaced text@, followed by text', + '\t

monospaced text, followed by text

'), + + ('h2. A header\n\n\n\n\n\nsome text', '\t

A header

\n\n\t

some text

'), + + ('pre.. foo bar baz\nquux', '
foo bar baz\nquux\n
'), + + ('line of text\n\n leading spaces', + '\t

line of text

\n\n leading spaces'), + + ('"some text":http://www.example.com/?q=foo%20bar and more text', + '\t

some text and more text

'), + + ('(??some text??)', '\t

(some text)

'), + + ('(*bold text*)', '\t

(bold text)

'), + + ('H[~2~]O', '\t

H2O

'), + + ("p=. Où est l'école, l'église s'il vous plaît?", + """\t

Où est l’école, l’église s’il vous plaît?

"""), + + ("p=. *_The_* _*Prisoner*_", + """\t

The Prisoner

"""), + + ("""p=. "An emphasised _word._" & "*A spanned phrase.*" """, + """\t

“An emphasised word.” & “A spanned phrase.

"""), + + ("""p=. "*Here*'s a word!" """, + """\t

Here’s a word!”

"""), + + ("""p=. "Please visit our "Textile Test Page":http://textile.sitemonks.com" """, + """\t

“Please visit our Textile Test Page

"""), + ("""| Foreign EXPÓŅÉNTIAL |""", + """\t\n\t\t\n\t\t\t\n\t\t\n\t
Foreign EXPÓŅÉNTIAL
"""), + ("""Piękne ŹDŹBŁO""", + """\t

Piękne ŹDŹBŁO

"""), + + ("""p=. Tell me, what is AJAX(Asynchronous Javascript and XML), please?""", + """\t

Tell me, what is AJAX, please?

"""), + ('p{font-size:0.8em}. *TxStyle* is a documentation project of Textile 2.4 for "Textpattern CMS":http://texpattern.com.', + '\t

TxStyle is a documentation project of Textile 2.4 for Textpattern CMS.

'), + (""""Übermensch":http://de.wikipedia.org/wiki/Übermensch""", """\t

Übermensch

"""), + ("""Here is some text with a block.\n\n\n\n\n\nbc. """, + """\t

Here is some text with a block.

\n\n\t

\n\n\t

\n\n
<!-- Here is a comment block in a code block. -->\n
"""), + (""""Textile(c)" is a registered(r) 'trademark' of Textpattern(tm) -- or TXP(That's textpattern!) -- at least it was - back in '88 when 2x4 was (+/-)5(o)C ... QED!\n\np{font-size: 200%;}. 2(1/4) 3(1/2) 4(3/4)""", + """\t

“Textile©” is a registered® ‘trademark’ of Textpattern™ — or TXP — at least it was – back in ’88 when 2×4 was ±5°C … QED!

\n\n\t

2¼ 3½ 4¾

"""), + ("""|=. Testing colgroup and col syntax\n|:\\5. 80\n|a|b|c|d|e|\n\n|=. Testing colgroup and col syntax|\n|:\\5. 80|\n|a|b|c|d|e|""", """\t\n\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
Testing colgroup and col syntax
abcde
\n\n\t\n\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
Testing colgroup and col syntax
abcde
"""), + ("""table(#dvds){border-collapse:collapse}. Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups\n|={font-size:140%;margin-bottom:15px}. DVDs with two Textiled tbody elements\n|:\\3. 100 |{background:#ddd}|250||50|300|\n|^(header).\n|_. Title |_. Starring |_. Director |_. Writer |_. Notes |\n|~(footer).\n|\\5=. This is the tfoot, centred |\n|-(toplist){background:#c5f7f6}.\n| _The Usual Suspects_ | Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey | Bryan Singer | Chris McQaurrie | One of the finest films ever made |\n| _Se7en_ | Morgan Freeman, Brad Pitt, Kevin Spacey | David Fincher | Andrew Kevin Walker | Great psychological thriller |\n| _Primer_ | David Sullivan, Shane Carruth | Shane Carruth | Shane Carruth | Amazing insight into trust and human psychology
rather than science fiction. Terrific! |\n| _District 9_ | Sharlto Copley, Jason Cope | Neill Blomkamp | Neill Blomkamp, Terri Tatchell | Social commentary layered on thick,\nbut boy is it done well |\n|-(medlist){background:#e7e895;}.\n| _Arlington Road_ | Tim Robbins, Jeff Bridges | Mark Pellington | Ehren Kruger | Awesome study in neighbourly relations |\n| _Phone Booth_ | Colin Farrell, Kiefer Sutherland, Forest Whitaker | Joel Schumacher | Larry Cohen | Edge-of-the-seat stuff in this\nshort but brilliantly executed thriller |""", + """\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\n\t
DVDs with two Textiled tbody elements
Title Starring Director Writer Notes
This is the tfoot, centred
The Usual Suspects Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey Bryan Singer Chris McQaurrie One of the finest films ever made
Se7en Morgan Freeman, Brad Pitt, Kevin Spacey David Fincher Andrew Kevin Walker Great psychological thriller
Primer David Sullivan, Shane Carruth Shane Carruth Shane Carruth Amazing insight into trust and human psychology
\nrather than science fiction. Terrific!
District 9 Sharlto Copley, Jason Cope Neill Blomkamp Neill Blomkamp, Terri Tatchell Social commentary layered on thick,
\nbut boy is it done well
Arlington Road Tim Robbins, Jeff Bridges Mark Pellington Ehren Kruger Awesome study in neighbourly relations
Phone Booth Colin Farrell, Kiefer Sutherland, Forest Whitaker Joel Schumacher Larry Cohen Edge-of-the-seat stuff in this
\nshort but brilliantly executed thriller
"""), + ("""-(hot) *coffee* := Hot _and_ black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk := Nourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n\n-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:""", + """
\n\t
coffee
\n\t
Hot and black
\n\t
tea
\n\t
Also hot, but a little less black
\n\t
milk
\n\t
Nourishing beverage for baby cows.
\nCold drink that goes great with cookies.
\n
\n\n
\n\t
coffee
\n\t
Hot and black
\n\t
tea
\n\t
Also hot, but a little less black
\n\t
milk
\n\t

Nourishing beverage for baby cows.
\nCold drink that goes great with cookies.

\n
"""), + (""";(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3""", + """\t
\n\t\t
Term 1
\n\t\t
Def 1
\n\t\t
Def 2
\n\t\t
Def 3
\n\t
"""), + ("""*Here is a comment*\n\nHere is *(class)a comment*\n\n*(class)Here is a class* that is a little extended and is\n*followed* by a strong word!\n\nbc. ; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n\n*123 test*\n\n*test 123*\n\n**123 test**\n\n**test 123**""", + """\t

Here is a comment

\n\n\t

Here is a comment

\n\n\t

Here is a class that is a little extended and is
\nfollowed by a strong word!

\n\n
; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n
\n\n\t

123 test

\n\n\t

test 123

\n\n\t

123 test

\n\n\t

test 123

"""), + ("""#_(first#list) one\n# two\n# three\n\ntest\n\n#(ordered#list2).\n# one\n# two\n# three\n\ntest\n\n#_(class_4).\n# four\n# five\n# six\n\ntest\n\n#_ seven\n# eight\n# nine\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n#22 22\n# 23\n# 24""", + """\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. four
  2. \n\t\t
  3. five
  4. \n\t\t
  5. six
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. seven
  2. \n\t\t
  3. eight
  4. \n\t\t
  5. nine
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. one
  2. \n\t\t
  3. two
  4. \n\t\t
  5. three
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. 22
  2. \n\t\t
  3. 23
  4. \n\t\t
  5. 24
  6. \n\t
"""), + ("""# one\n##3 one.three\n## one.four\n## one.five\n# two\n\ntest\n\n#_(continuation#section2).\n# three\n# four\n##_ four.six\n## four.seven\n# five\n\ntest\n\n#21 twenty-one\n# twenty-two""", + """\t
    \n\t\t
  1. one\n\t\t
      \n\t\t\t
    1. one.three
    2. \n\t\t\t
    3. one.four
    4. \n\t\t\t
    5. one.five
    6. \n\t\t
  2. \n\t\t
  3. two
  4. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. three
  2. \n\t\t
  3. four\n\t\t
      \n\t\t\t
    1. four.six
    2. \n\t\t\t
    3. four.seven
    4. \n\t\t
  4. \n\t\t
  5. five
  6. \n\t
\n\n\t

test

\n\n\t
    \n\t\t
  1. twenty-one
  2. \n\t\t
  3. twenty-two
  4. \n\t
"""), + ("""|* Foo[^2^]\n* _bar_\n* ~baz~ |\n|#4 *Four*\n# __Five__ |\n|-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n|""", + """\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t
\t
    \n\t\t
  • Foo2
  • \n\t\t
  • bar
  • \n\t\t
  • baz
  • \n\t
\t
    \n\t\t
  1. Four
  2. \n\t\t
  3. Five
  4. \n\t
\n\t
coffee
\n\t
Hot and black
\n\t
tea
\n\t
Also hot, but a little less black
\n\t
milk
\n\t

Nourishing beverage for baby cows.
\nCold drink that goes great with cookies.


\n
"""), + ("""h4. A more complicated table\n\ntable(tableclass#tableid){color:blue}.\n|_. table |_. more |_. badass |\n|\\3. Horizontal span of 3|\n(firstrow). |first|HAL(open the pod bay doors)|1|\n|some|{color:green}. styled|content|\n|/2. spans 2 rows|this is|quite a|\n| deep test | don't you think?|\n(lastrow). |fifth|I'm a lumberjack|5|\n|sixth| _*bold italics*_ |6|""", + """\t

A more complicated table

\n\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
table more badass
Horizontal span of 3
firstHAL1
somestyledcontent
spans 2 rowsthis isquite a
deep test don’t you think?
fifthI’m a lumberjack5
sixth bold italics 6
"""), + ("""| *strong* |\n\n| _em_ |\n\n| Inter-word -dashes- | ZIP-codes are 5- or 9-digit codes |""", + """\t\n\t\t\n\t\t\t\n\t\t\n\t
strong
\n\n\t\n\t\t\n\t\t\t\n\t\t\n\t
em
\n\n\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
Inter-word dashes ZIP-codes are 5- or 9-digit codes
"""), + ("""|_. attribute list |\n|<. align left |\n|>. align right|\n|=. center |\n|<>. justify me|\n|^. valign top |\n|~. bottom |""", + """\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\n\t
attribute list
align left
align right
center
justify me
valign top
bottom
"""), + ("""h2. A definition list\n\n;(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3\n;; Center\n;; NATO(Why Em Cee Ayy)\n:: Subdef 1\n:: Subdef 2\n;;; SubSub Term\n::: SubSub Def 1\n::: SubSub Def 2\n::: Subsub Def 3\nWith newline\n::: Subsub Def 4\n:: Subdef 3\n: DEF 4\n; Term 2\n: Another def\n: And another\n: One more\n:: A def without a term\n:: More defness\n; Third term for good measure\n: My definition of a boombastic jazz""", + """\t

A definition list

\n\n\t
\n\t\t
Term 1
\n\t\t
Def 1
\n\t\t
Def 2
\n\t\t
Def 3\n\t\t
\n\t\t\t
Center
\n\t\t\t
NATO
\n\t\t\t
Subdef 1
\n\t\t\t
Subdef 2\n\t\t\t
\n\t\t\t\t
SubSub Term
\n\t\t\t\t
SubSub Def 1
\n\t\t\t\t
SubSub Def 2
\n\t\t\t\t
Subsub Def 3
\nWith newline
\n\t\t\t\t
Subsub Def 4
\n\t\t\t
\n\t\t\t
Subdef 3
\n\t\t
\n\t\t
DEF 4
\n\t\t
Term 2
\n\t\t
Another def
\n\t\t
And another
\n\t\t
One more\n\t\t
\n\t\t\t
A def without a term
\n\t\t\t
More defness
\n\t\t
\n\t\t
Third term for good measure
\n\t\t
My definition of a boombastic jazz
\n\t
"""), + ("""###. Here's a comment.\n\nh3. Hello\n\n###. And\nanother\none.\n\nGoodbye.""", """\t

Hello

\n\n\t

Goodbye.

"""), + ("""h2. A Definition list which covers the instance where a new definition list is created with a term without a definition\n\n- term :=\n- term2 := def""", """\t

A Definition list which covers the instance where a new definition list is created with a term without a definition

\n\n
\n\t
term2
\n\t
def
\n
"""), + ('!{height:20px;width:20px;}https://1.gravatar.com/avatar/!', + '\t

'), + ('& test', '\t

& test

'), +) + +# A few extra cases for HTML4 +html_known_values = ( + ('I spoke.\nAnd none replied.', '\t

I spoke.
\nAnd none replied.

'), + ('I __know__.\nI **really** __know__.', '\t

I know.
\nI really know.

'), + ("I'm %{color:red}unaware%\nof most soft drinks.", '\t

I’m unaware
\nof most soft drinks.

'), + ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.', + '\t

I seriously blushed
\nwhen I sprouted' + ' that
\ncorn stalk from my
\ncabeza.

'), + ('
\n\na.gsub!( /\n
', + '
\n\na.gsub!( /</, "" )\n\n
'), + ('
\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n
\n\n' + 'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.', + '\t

\n\n\t

Sidebar

\n\n\t

Hobix
\n' + 'Ruby

\n\n\t

\n\n\t

The main text of the
\n' + 'page goes here and will
\nstay to the left of the
\nsidebar.

'), + ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com', + '\t

I am crazy about Hobix
\nand it’s ' + 'all I ever
\nlink to!

'), + ('!http://hobix.com/sample.jpg!', '\t

'), + ('!openwindow1.gif(Bunny.)!', '\t

Bunny.

'), + ('!openwindow1.gif!:http://hobix.com/', '\t

'), + ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.', + '\t

\n\n\t' + '

And others sat all round the small
\nmachine and paid it to sing to them.

'), + ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!', + '\t

'), + ('notextile. foo bar baz\n\np. quux\n', + ' foo bar baz\n\n\t

quux

'), + ('"foo":http://google.com/one--two', '\t

foo

'), + # issue 24 colspan + ('|\\2. spans two cols |\n| col 1 | col 2 |', '\t\n\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
spans two cols
col 1 col 2
'), + # issue 2 escaping + ('"foo ==(bar)==":#foobar', '\t

foo (bar)

'), + # issue 14 newlines in extended pre blocks + ("pre.. Hello\n\nAgain\n\np. normal text", '
Hello\n\nAgain\n
\n\n\t

normal text

'), + # url with parentheses + ('"python":http://en.wikipedia.org/wiki/Python_(programming_language)', '\t

python

'), + # table with hyphen styles + ('table(linkblog-thumbnail).\n|(linkblog-thumbnail-cell). apple|bear|', '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
applebear
'), + # issue 32 empty table cells + ("|thing|||otherthing|", "\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
thingotherthing
"), + # issue 36 link reference names http and https + ('"signup":signup\n[signup]http://myservice.com/signup', '\t

signup

'), + ('"signup":signup\n[signup]https://myservice.com/signup', '\t

signup

'), + # nested formatting + ("*_test text_*", "\t

test text

"), + ("_*test text*_", "\t

test text

"), + # quotes in code block + ("'quoted string'", "\t

'quoted string'

"), + ("
some preformatted text
other text", "\t

some preformatted text
other text

"), + # at sign and notextile in table + ("|@@|@@ @@|\n|*B1*|*B2* *B3*|", "\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
<A1><A2> <A3>
*B1**B2* *B3*
"), + # cite attribute + ('bq.:http://textism.com/ Text...', '\t
\n\t\t

Text…

\n\t
'), + ('Hello ["(Mum) & dad"]', '\t

Hello [“(Mum) & dad”]

'), +) + +@pytest.mark.parametrize("input, expected_output", xhtml_known_values) +def test_KnownValuesXHTML(input, expected_output): + # XHTML + output = textile.textile(input, html_type='xhtml') + assert output == expected_output + +@pytest.mark.parametrize("input, expected_output", html_known_values) +def test_KnownValuesHTML(input, expected_output): + # HTML5 + output = textile.textile(input, html_type='html5') + assert output == expected_output diff --git a/textile/core.py b/textile/core.py index d580bc6f..dc5b13ac 100644 --- a/textile/core.py +++ b/textile/core.py @@ -20,87 +20,41 @@ """ import uuid +import six from textile.tools import sanitizer, imagesize +from textile.regex_strings import (align_re_s, cls_re_s, halign_re_s, + pnct_re_s, regex_snippets, syms_re_s, table_span_re_s, valign_re_s) +from textile.utils import (decode_high, encode_high, encode_html, generate_tag, + has_raw_text, is_rel_url, is_valid_url, list_type, normalize_newlines, + parse_attributes, pba) +from textile.objects import Block, Table -# We're going to use the Python 2.7+ OrderedDict data type. Import it if it's -# available, otherwise, use the included tool. try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict +from six.moves import urllib +urlparse, urlsplit, urlunsplit, quote, unquote = (urllib.parse.urlparse, + urllib.parse.urlsplit, urllib.parse.urlunsplit, urllib.parse.quote, + urllib.parse.unquote) try: - # Python 3 - from urllib.parse import urlparse, urlsplit, urlunsplit, quote, unquote - from html.parser import HTMLParser - xrange = range - unichr = chr - unicode = str -except (ImportError): - # Python 2 - from urllib import quote, unquote - from urlparse import urlparse, urlsplit, urlunsplit - from HTMLParser import HTMLParser - - -try: - # Use regex module for matching uppercase characters if installed, - # otherwise fall back to finding all the uppercase chars in a loop. import regex as re - upper_re_s = r'\p{Lu}' except ImportError: import re - from sys import maxunicode - upper_re_s = "".join([unichr(c) for c in - xrange(maxunicode) if unichr(c).isupper()]) - - -def _normalize_newlines(string): - out = string.strip() - out = re.sub(r'\r\n', '\n', out) - out = re.sub(r'\n{3,}', '\n\n', out) - out = re.sub(r'\n\s*\n', '\n\n', out) - out = re.sub(r'"$', '" ', out) - return out class Textile(object): - halign_re_s = r'(?:\<(?!>)|(?|\<\>|\=|[()]+(?! ))' - valign_re_s = r'[\-^~]' - class_re_s = r'(?:\([^)\n]+\))' # Don't allow classes/ids, - language_re_s = r'(?:\[[^\]\n]+\])' # languages, - style_re_s = r'(?:\{[^}\n]+\})' # or styles to span across newlines - colspan_re_s = r'(?:\\\d+)' - rowspan_re_s = r'(?:\/\d+)' - align_re_s = r'(?:%s|%s)*' % (halign_re_s, valign_re_s) - table_span_re_s = r'(?:%s|%s)*' % (colspan_re_s, rowspan_re_s) - # regex string to match class, style, language and horizontal alignment - # attributes - cslh_re_s = r'(?:%s)*' % '|'.join([class_re_s, style_re_s, language_re_s, - halign_re_s]) - # regex string to match class, style and language attributes - csl_re_s = r'(?:%s)*' % '|'.join([class_re_s, style_re_s, language_re_s]) - - pnct_re_s = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]' - urlchar_re_s = r'[\w"$\-_.+!*\'(),";\/?:@=&%#{}|\\^~\[\]`]' - syms_re_s = '¤§µ¶†‡•∗∴◊♠♣♥♦' - restricted_url_schemes = ('http', 'https', 'ftp', 'mailto') unrestricted_url_schemes = restricted_url_schemes + ('file', 'tel', - 'callto', 'sftp') + 'callto', 'sftp', 'data') btag = ('bq', 'bc', 'notextile', 'pre', 'h[1-6]', 'fn\d+', 'p', '###') btag_lite = ('bq', 'bc', 'p') - iAlign = {'<': 'float: left;', - '>': 'float: right;', - '=': 'display: block; margin: 0 auto;'} - vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'} - hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'} - note_index = 1 doctype_whitelist = ['xhtml', 'html5'] @@ -126,26 +80,35 @@ class Textile(object): 'threequarters': '¾', 'degrees': '°', 'plusminus': '±', - 'fn_ref_pattern': '%(marker)s', - 'fn_foot_pattern': '%(marker)s', - 'nl_ref_pattern': '%(marker)s', } def __init__(self, restricted=False, lite=False, noimage=False, - auto_link=False, get_sizes=False, html_type='xhtml'): + get_sizes=False, html_type='xhtml', rel='', block_tags=True): """Textile properties that are common to regular textile and textile_restricted""" self.restricted = restricted self.lite = lite self.noimage = noimage self.get_sizes = get_sizes - self.auto_link = auto_link self.fn = {} self.urlrefs = {} self.shelf = {} - self.rel = '' + self.rel = rel self.html_type = html_type self.max_span_depth = 5 + self.span_depth = 0 + uid = uuid.uuid4().hex + self.uid = 'textileRef:{0}:'.format(uid) + self.linkPrefix = '{0}-'.format(uid) + self.linkIndex = 0 + self.refCache = {} + self.refIndex = 0 + self.block_tags = block_tags + + cur = r'' + if regex_snippets['cur']: # pragma: no branch + cur = r'(?:[{0}]{1}*)?'.format(regex_snippets['cur'], + regex_snippets['space']) # We'll be searching for characters that need to be HTML-encoded to # produce properly valid html. These are the defaults that work in @@ -153,457 +116,189 @@ def __init__(self, restricted=False, lite=False, noimage=False, # to make it work for characters at the beginning of the string. self.glyph_search = [ # apostrophe's - re.compile(r"(^|\w)'(\w)", re.U), + re.compile(r"(^|{0}|\))'({0})".format(regex_snippets['wrd']), + flags=re.U), # back in '88 - re.compile(r"(\s)'(\d+\w?)\b(?!')", re.U), + re.compile(r"({0})'(\d+{1}?)\b(?![.]?[{1}]*?')".format( + regex_snippets['space'], regex_snippets['wrd']), + flags=re.U), + # single opening following an open bracket. + re.compile(r"([([{])'(?=\S)", flags=re.U), # single closing - re.compile(r"(^|\S)'(?=\s|%s|$)" % self.pnct_re_s, re.U), + re.compile(r"(^|\S)'(?={0}|{1}|<|$)".format( + regex_snippets['space'], pnct_re_s), flags=re.U), # single opening re.compile(r"'", re.U), + # double opening following an open bracket. Allows things like + # Hello ["(Mum) & dad"] + re.compile(r'([([{])"(?=\S)', flags=re.U), # double closing - re.compile(r'(^|\S)"(?=\s|%s|$)' % self.pnct_re_s, re.U), + re.compile(r'(^|\S)"(?={0}|{1}|<|$)'.format( + regex_snippets['space'], pnct_re_s), re.U), # double opening re.compile(r'"'), # ellipsis - re.compile(r'([^.]?)\.{3}', re.U), + re.compile(r'([^.]?)\.{3}'), # ampersand - re.compile(r'(\s)&(\s)', re.U), + re.compile(r'(\s?)&(\s)', re.U), # em dash - re.compile(r'(\s?)--(\s?)', re.U), + re.compile(r'(\s?)--(\s?)'), # en dash - re.compile(r'\s-(?:\s|$)', re.U), + re.compile(r' - '), # dimension sign - re.compile(r'(\d+)( ?)x( ?)(?=\d+)', re.U), + re.compile(r'([0-9]+[\])]?[\'"]? ?)[x]( ?[\[(]?)' + r'(?=[+-]?{0}[0-9]*\.?[0-9]+)'.format(cur), flags=re.I | re.U), # trademark - re.compile(r'\b ?[([]TM[])]', re.I | re.U), + re.compile(r'(\b ?|{0}|^)[([]TM[])]'.format(regex_snippets['space'] + ), flags=re.I | re.U), # registered - re.compile(r'\b ?[([]R[])]', re.I | re.U), + re.compile(r'(\b ?|{0}|^)[([]R[])]'.format(regex_snippets['space'] + ), flags=re.I | re.U), # copyright - re.compile(r'\b ?[([]C[])]', re.I | re.U), + re.compile(r'(\b ?|{0}|^)[([]C[])]'.format(regex_snippets['space'] + ), flags=re.I | re.U), # 1/2 - re.compile(r'[([]1\/2[])]', re.I | re.U), + re.compile(r'[([]1\/2[])]'), # 1/4 - re.compile(r'[([]1\/4[])]', re.I | re.U), + re.compile(r'[([]1\/4[])]'), # 3/4 - re.compile(r'[([]3\/4[])]', re.I | re.U), + re.compile(r'[([]3\/4[])]'), # degrees - re.compile(r'[([]o[])]', re.I | re.U), + re.compile(r'[([]o[])]'), # plus/minus - re.compile(r'[([]\+\/-[])]', re.I | re.U), + re.compile(r'[([]\+\/-[])]'), # 3+ uppercase acronym - re.compile(r'\b([%s][%s0-9]{2,})\b(?:[(]([^)]*)[)])' % (upper_re_s, upper_re_s)), + re.compile(r'\b([{0}][{1}]{{2,}})\b(?:[(]([^)]*)[)])'.format( + regex_snippets['abr'], regex_snippets['acr']), flags=re.U), # 3+ uppercase - re.compile(r"""(?:(?<=^)|(?<=\s)|(?<=[>\(;-]))([%s]{3,})(\w*)(?=\s|%s|$)(?=[^">]*?(<|$))""" % - (upper_re_s, self.pnct_re_s)), + re.compile(r'({space}|^|[>(;-])([{abr}]{{3,}})([{nab}]*)' + '(?={space}|{pnct}|<|$)(?=[^">]*?(<|$))'.format(**{ 'space': + regex_snippets['space'], 'abr': regex_snippets['abr'], + 'nab': regex_snippets['nab'], 'pnct': pnct_re_s}), re.U), ] # These are the changes that need to be made for characters that occur # at the beginning of the string. self.glyph_search_initial = list(self.glyph_search) # apostrophe's - self.glyph_search_initial[0] = re.compile(r"(\w)'(\w)", re.U) + self.glyph_search_initial[0] = re.compile(r"({0}|\))'({0})".format( + regex_snippets['wrd']), flags=re.U) # single closing - self.glyph_search_initial[2] = re.compile(r"(\S)'(?=\s|%s|$)" % - self.pnct_re_s, re.U) + self.glyph_search_initial[3] = re.compile(r"(\S)'(?={0}|{1}|$)".format( + regex_snippets['space'], pnct_re_s), re.U) # double closing - self.glyph_search_initial[4] = re.compile(r'(\S)"(?=\s|%s|$)' % - self.pnct_re_s, re.U) - - self.glyph_replace = [x % self.glyph_definitions for x in ( - r'\1%(apostrophe)s\2', # apostrophe's - r'\1%(apostrophe)s\2', # back in '88 - r'\1%(quote_single_close)s', # single closing - r'%(quote_single_open)s', # single opening - r'\1%(quote_double_close)s', # double closing - r'%(quote_double_open)s', # double opening - r'\1%(ellipsis)s', # ellipsis - r'\1%(ampersand)s\2', # ampersand - r'\1%(emdash)s\2', # em dash - r' %(endash)s ', # en dash - r'\1\2%(dimension)s\3', # dimension sign - r'%(trademark)s', # trademark - r'%(registered)s', # registered - r'%(copyright)s', # copyright - r'%(half)s', # 1/2 - r'%(quarter)s', # 1/4 - r'%(threequarters)s', # 3/4 - r'%(degrees)s', # degrees - r'%(plusminus)s', # plus/minus + self.glyph_search_initial[6] = re.compile(r'(\S)"(?={0}|{1}|<|$)'.format( + regex_snippets['space'], pnct_re_s), re.U) + + self.glyph_replace = [x.format(**self.glyph_definitions) for x in ( + r'\1{apostrophe}\2', # apostrophe's + r'\1{apostrophe}\2', # back in '88 + r'\1{quote_single_open}', # single opening after bracket + r'\1{quote_single_close}', # single closing + r'{quote_single_open}', # single opening + r'\1{quote_double_open}', # double opening after bracket + r'\1{quote_double_close}', # double closing + r'{quote_double_open}', # double opening + r'\1{ellipsis}', # ellipsis + r'\1{ampersand}\2', # ampersand + r'\1{emdash}\2', # em dash + r' {endash} ', # en dash + r'\1{dimension}\2', # dimension sign + r'\1{trademark}', # trademark + r'\1{registered}', # registered + r'\1{copyright}', # copyright + r'{half}', # 1/2 + r'{quarter}', # 1/4 + r'{threequarters}', # 3/4 + r'{degrees}', # degrees + r'{plusminus}', # plus/minus r'\1', # 3+ uppercase acronym - r'\1\2', # 3+ uppercase + r'\1{0}:glyph:\2' # 3+ uppercase + r'\3'.format(self.uid), )] if self.html_type == 'html5': - self.glyph_replace[19] = r'\1' + self.glyph_replace[21] = r'\1' if self.restricted is True: self.url_schemes = self.restricted_url_schemes else: self.url_schemes = self.unrestricted_url_schemes - - def parse(self, text, rel=None, head_offset=0, sanitize=False): - """ - >>> import textile - >>> Py3 << textile.textile('some textile') - '\\t

some textile

' - """ + def parse(self, text, rel=None, sanitize=False): + """Parse the input text as textile and return html output.""" self.notes = OrderedDict() self.unreferencedNotes = OrderedDict() self.notelist_cache = OrderedDict() - text = _normalize_newlines(text) + if text == '': + return text if self.restricted: - text = self.encode_html(text, quotes=False) + text = encode_html(text, quotes=False) + + text = normalize_newlines(text) + text = text.replace(self.uid, '') + + if self.block_tags: + if self.lite: + self.blocktag_whitelist = ['bq', 'p'] + text = self.block(text) + else: + self.blocktag_whitelist = [ 'bq', 'p', 'bc', 'notextile', + 'pre', 'h[1-6]', + 'fn{0}+'.format(regex_snippets['digit']), '###'] + text = self.block(text) + text = self.placeNoteLists(text) + else: + # Inline markup (em, strong, sup, sub, del etc). + text = self.span(text) + + # Glyph level substitutions (mainly typographic -- " & ' => curly + # quotes, -- => em-dash etc. + text = self.glyphs(text) if rel: - self.rel = ' rel="%s"' % rel + self.rel = ' rel="{0}"'.format(rel) text = self.getRefs(text) - # The original php puts the below within an if not self.lite, but our - # block function handles self.lite itself. - text = self.block(text, int(head_offset)) - if not self.lite: text = self.placeNoteLists(text) text = self.retrieve(text) + text = text.replace('{0}:glyph:'.format(self.uid), '') if sanitize: text = sanitizer.sanitize(text) + text = self.retrieveURLs(text) + # if the text contains a break tag (
or
) not followed by # a newline, replace it with a new style break tag and a newline. text = re.sub(r'(?!\n)', '
\n', text) return text - def pba(self, block_attributes, element=None): - """ - Parse block attributes. - - >>> t = Textile() - >>> Py3 << t.pba(r'\3') - '' - >>> Py3 << t.pba(r'\\3', element='td') - ' colspan="3"' - >>> Py3 << t.pba(r'/4', element='td') - ' rowspan="4"' - >>> Py3 << t.pba(r'\\3/4', element='td') - ' colspan="3" rowspan="4"' - - >>> Py3 << t.pba('^', element='td') - ' style="vertical-align:top;"' - - >>> Py3 << t.pba('{line-height:18px}') - ' style="line-height:18px;"' - - >>> Py3 << t.pba('(foo-bar)') - ' class="foo-bar"' - - >>> Py3 << t.pba('(#myid)') - ' id="myid"' - - >>> Py3 << t.pba('(foo-bar#myid)') - ' class="foo-bar" id="myid"' - - >>> Py3 << t.pba('((((') - ' style="padding-left:4em;"' - - >>> Py3 << t.pba(')))') - ' style="padding-right:3em;"' - - >>> Py3 << t.pba('[fr]') - ' lang="fr"' - - >>> Py3 << t.pba(r'\\5 80', 'col') - ' span="5" width="80"' - - >>> rt = Textile() - >>> rt.restricted = True - >>> Py3 << rt.pba('[en]') - ' lang="en"' - - >>> Py3 << rt.pba('(#id)') - '' - - """ - style = [] - aclass = '' - lang = '' - colspan = '' - rowspan = '' - block_id = '' - span = '' - width = '' - - if not block_attributes: - return '' - - matched = block_attributes - if element == 'td': - m = re.search(r'\\(\d+)', matched) - if m: - colspan = m.group(1) - - m = re.search(r'/(\d+)', matched) - if m: - rowspan = m.group(1) - - if element == 'td' or element == 'tr': - m = re.search(r'(%s)' % self.valign_re_s, matched) - if m: - style.append("vertical-align:%s" % self.vAlign[m.group(1)]) - - m = re.search(r'\{([^}]*)\}', matched) - if m: - style += m.group(1).rstrip(';').split(';') - matched = matched.replace(m.group(0), '') - - m = re.search(r'\[([^\]]+)\]', matched, re.U) - if m: - lang = m.group(1) - matched = matched.replace(m.group(0), '') - - m = re.search(r'\(([^()]+)\)', matched, re.U) - if m: - aclass = m.group(1) - matched = matched.replace(m.group(0), '') - - m = re.search(r'([(]+)', matched) - if m: - style.append("padding-left:%sem" % len(m.group(1))) - matched = matched.replace(m.group(0), '') - - m = re.search(r'([)]+)', matched) - if m: - style.append("padding-right:%sem" % len(m.group(1))) - matched = matched.replace(m.group(0), '') - - m = re.search(r'(%s)' % self.halign_re_s, matched) - if m: - style.append("text-align:%s" % self.hAlign[m.group(1)]) - - m = re.search(r'^(.*)#(.*)$', aclass) - if m: - block_id = m.group(2) - aclass = m.group(1) - - if element == 'col': - pattern = r'(?:\\(\d+))?\s*(\d+)?' - csp = re.match(pattern, matched) - span, width = csp.groups() - - if self.restricted: - if lang: - return ' lang="%s"' % lang - else: - return '' - - result = [] - if style: - # Previous splits that created style may have introduced extra - # whitespace into the list elements. Clean it up. - style = [x.strip() for x in style] - result.append(' style="%s;"' % "; ".join(style)) - if aclass: - result.append(' class="%s"' % aclass) - if block_id: - result.append(' id="%s"' % block_id) - if lang: - result.append(' lang="%s"' % lang) - if colspan: - result.append(' colspan="%s"' % colspan) - if rowspan: - result.append(' rowspan="%s"' % rowspan) - if span: - result.append(' span="%s"' % span) - if width: - result.append(' width="%s"' % width) - return ''.join(result) - - def hasRawText(self, text): - """ - checks whether the text has text not already enclosed by a block tag - - >>> t = Textile() - >>> t.hasRawText('

foo bar biz baz

') - False - - >>> t.hasRawText(' why yes, yes it does') - True - - """ - r = re.compile(r'<(p|blockquote|div|form|table|ul|ol|dl|pre|h\d)[^>]*?>.*', - re.S).sub('', text.strip()).strip() - r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r) - return '' != r - def table(self, text): - r""" - >>> t = Textile() - >>> Py3 << t.table('(rowclass). |one|two|three|\n|a|b|c|') - '\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t\n\t\t\n\t
onetwothree
abc
\n\n' - """ - text = text + "\n\n" - pattern = re.compile(r'^(?:table(_?%(s)s%(a)s%(c)s)\.(.*?)\n)?^(%(a)s%(c)s\.? ?\|.*\|)[\s]*\n\n' % - {'s': self.table_span_re_s, 'a': self.align_re_s, 'c': - self.cslh_re_s}, re.S | re.M | re.U) - return pattern.sub(self.fTable, text) - - def fTable(self, match): - tatts = self.pba(match.group(1), 'table') - - summary = (' summary="%s"' % match.group(2).strip() if match.group(2) - else '') - cap = '' - colgrp, last_rgrp = '', '' - c_row = 1 - rows = [] - try: - split = re.split(r'\|\s*?$', match.group(3), flags=re.M) - except TypeError: - split = re.compile(r'\|\s*?$', re.M).split(match.group(3)) - for row in [x for x in split if x]: - row = row.lstrip() - - # Caption -- only occurs on row 1, otherwise treat '|=. foo |...' - # as a normal center-aligned cell. - captionpattern = r"^\|\=(%(s)s%(a)s%(c)s)\. ([^\n]*)(.*)" % {'s': - self.table_span_re_s, 'a': self.align_re_s, 'c': - self.cslh_re_s} - caption_re = re.compile(captionpattern, re.S) - cmtch = caption_re.match(row) - if c_row == 1 and cmtch: - capatts = self.pba(cmtch.group(1)) - cap = "\t%s\n" % (capatts, - cmtch.group(2).strip()) - row = cmtch.group(3).lstrip() - if row == '': - continue - - c_row += 1 - - # Colgroup - grppattern = r"^\|:(%(s)s%(a)s%(c)s\. .*)" % {'s': - self.table_span_re_s, 'a': self.align_re_s, 'c': - self.cslh_re_s} - grp_re = re.compile(grppattern, re.M) - gmtch = grp_re.match(row.lstrip()) - if gmtch: - has_newline = "\n" in row - idx = 0 - for col in gmtch.group(1).replace('.', '').split("|"): - gatts = self.pba(col.strip(), 'col') - if idx == 0: - gatts = "group%s>" % gatts - else: - gatts = gatts + " />" - colgrp = colgrp + "\t 0: - ctag = "t%s" % ctyp - cline = ("\t\t\t<%(ctag)s%(catts)s>%(cell)s" % - {'ctag': ctag, 'catts': catts, 'cell': cell}) - cells.append(self.doTagBr(ctag, cline)) - - cellctr += 1 - - if rgrp and last_rgrp: - grp = "\t\n" % last_rgrp - else: - grp = '' - - if rgrp: - grp += "\t\n" % (rgrp, rgrpatts) - - last_rgrp = rgrp if rgrp else last_rgrp - - rows.append("%s\t\t\n%s%s\t\t" % (grp, ratts, - '\n'.join(cells), '\n' if cells else '')) - cells = [] - catts = None - - if last_rgrp: - last_rgrp = '\t\n' % last_rgrp - tbl = ("\t\n%(cap)s%(colgrp)s%(rows)s\n%(last_rgrp)s\t\n\n" - % {'tatts': tatts, 'summary': summary, 'cap': cap, 'colgrp': - colgrp, 'last_rgrp': last_rgrp, 'rows': '\n'.join(rows)}) - return tbl - - def lists(self, text): - """ - >>> t = Textile() - >>> Py3 << t.lists("* one\\n* two\\n* three") - '\\t
    \\n\\t\\t
  • one
  • \\n\\t\\t
  • two
  • \\n\\t\\t
  • three
  • \\n\\t
' - """ - - #Replace line-initial bullets with asterisks - bullet_pattern = re.compile('^•', re.U | re.M) + text = "{0}\n\n".format(text) + pattern = re.compile(r'^(?:table(?P_?{s}{a}{c})\.' + r'(?P
.*?)\n)?^(?P{a}{c}\.? ?\|.*\|)' + r'[\s]*\n\n'.format(**{'s': table_span_re_s, 'a': align_re_s, + 'c': cls_re_s}), flags=re.S | re.M | re.U) + match = pattern.search(text) + if match: + table = Table(self, **match.groupdict()) + return table.process() + return text - pattern = re.compile(r'^((?:[*;:]+|[*;:#]*#(?:_|\d+)?)%s[ .].*)$(?![^#*;:])' - % self.csl_re_s, re.U | re.M | re.S) - return pattern.sub(self.fList, bullet_pattern.sub('*', text)) + def textileLists(self, text): + pattern = re.compile(r'^((?:[*;:]+|[*;:#]*#(?:_|\d+)?){0}[ .].*)$' + r'(?![^#*;:])'.format(cls_re_s), re.U | re.M | re.S) + return pattern.sub(self.fTextileList, text) - def fList(self, match): - try: - text = re.split(r'\n(?=[*#;:])', match.group(), flags=re.M) - except TypeError: - text = re.compile(r'\n(?=[*#;:])', re.M).split(match.group()) + def fTextileList(self, match): + text = re.split(r'\n(?=[*#;:])', match.group(), flags=re.M) pt = '' result = [] ls = OrderedDict() @@ -613,107 +308,97 @@ def fList(self, match): except IndexError: nextline = '' - m = re.search(r"^([#*;:]+)(_|\d+)?(%s)[ .](.*)$" % self.csl_re_s, - line, re.S) - if m: - tl, start, atts, content = m.groups() - content = content.strip() - nl = '' - ltype = self.listType(tl) - if ';' in tl: - litem = 'dt' - elif ':' in tl: - litem = 'dd' - else: - litem = 'li' - - showitem = len(content) > 0 - - # handle list continuation/start attribute on ordered lists - if ltype == 'o': - if not hasattr(self, 'olstarts'): - self.olstarts = {tl: 1} - - # does the first line of this ol have a start attribute - if len(tl) > len(pt): - # no, set it to 1 - if start is None: - self.olstarts[tl] = 1 - # yes, set it to the given number - elif start != '_': - self.olstarts[tl] = int(start) - # we won't need to handle the '_' case, we'll just - # print out the number when it's needed - - # put together the start attribute if needed - if len(tl) > len(pt) and start is not None: - start = ' start="%s"' % self.olstarts[tl] - - # This will only increment the count for list items, not - # definition items - if showitem: - self.olstarts[tl] += 1 - - nm = re.match("^([#\*;:]+)(_|[\d]+)?%s[ .].*" % self.csl_re_s, - nextline) - if nm: - nl = nm.group(1) - - # We need to handle nested definition lists differently. If - # the next tag is a dt (';') of a lower nested level than the - # current dd (':'), - if ';' in pt and ':' in tl: - ls[tl] = 2 - - atts = self.pba(atts) - # If start is still None, set it to '', else leave the value - # that we've already formatted. - start = start or '' - - # if this item tag isn't in the list, create a new list and - # item, else just create the item - if tl not in ls: - ls[tl] = 1 - itemtag = ("\n\t\t<%s>%s" % (litem, content) if - showitem else '') - line = "\t<%sl%s%s>%s" % (ltype, atts, start, itemtag) - else: - line = ("\t\t<%s%s>%s" % (litem, atts, content) if showitem - else '') - - if len(nl) <= len(tl): - line = line + ("" % litem if showitem else '') - # work backward through the list closing nested lists/items - for k, v in reversed(list(ls.items())): - if len(k) > len(nl): - if v != 2: - line = line + "\n\t" % self.listType(k) - if len(k) > 1 and v != 2: - line = line + "" % litem - del ls[k] - - # Remember the current Textile tag - pt = tl + m = re.search(r"^(?P[#*;:]+)(?P_|\d+)?(?P{0})[ .]" + "(?P.*)$".format(cls_re_s), line, re.S) + tl, start, atts, content = m.groups() + content = content.strip() + nl = '' + ltype = list_type(tl) + tl_tags = {';': 'dt', ':': 'dd'} + litem = tl_tags.get(tl[0], 'li') + + showitem = len(content) > 0 + + # handle list continuation/start attribute on ordered lists + if ltype == 'o': + if not hasattr(self, 'olstarts'): + self.olstarts = {tl: 1} + + # does the first line of this ol have a start attribute + if len(tl) > len(pt): + # no, set it to 1 + if start is None: + self.olstarts[tl] = 1 + # yes, set it to the given number + elif start != '_': + self.olstarts[tl] = int(start) + # we won't need to handle the '_' case, we'll just + # print out the number when it's needed + + # put together the start attribute if needed + if len(tl) > len(pt) and start is not None: + start = ' start="{0}"'.format(self.olstarts[tl]) + + # This will only increment the count for list items, not + # definition items + if showitem: + self.olstarts[tl] = self.olstarts[tl] + 1 + + nm = re.match("^(?P[#\*;:]+)(_|[\d]+)?{0}" + "[ .].*".format(cls_re_s), nextline) + if nm: + nl = nm.group('nextlistitem') + + # We need to handle nested definition lists differently. If + # the next tag is a dt (';') of a lower nested level than the + # current dd (':'), + if ';' in pt and ':' in tl: + ls[tl] = 2 + + atts = pba(atts) + tabs = '\t' * len(tl) + # If start is still None, set it to '', else leave the value + # that we've already formatted. + start = start or '' + + # if this item tag isn't in the list, create a new list and + # item, else just create the item + if tl not in ls: + ls[tl] = 1 + itemtag = ("\n{0}\t<{1}>{2}".format(tabs, litem, content) if + showitem else '') + line = "<{0}l{1}{2}>{3}".format(ltype, atts, start, itemtag) + else: + line = ("\t<{0}{1}>{2}".format(litem, atts, content) if + showitem else '') + line = '{0}{1}'.format(tabs, line) + + if len(nl) <= len(tl): + if showitem: + line = "{0}".format(line, litem) + # work backward through the list closing nested lists/items + for k, v in reversed(list(ls.items())): + if len(k) > len(nl): + if v != 2: + line = "{0}\n{1}".format(line, tabs, + list_type(k)) + if len(k) > 1 and v != 2: + line = "{0}".format(line, litem) + del ls[k] + + # Remember the current Textile tag + pt = tl # This else exists in the original php version. I'm not sure how # to come up with a case where the line would not match. I think # it may have been necessary due to the way php returns matches. #else: - #line = line + "\n" + #line = "{0}\n".format(line) result.append(line) return self.doTagBr(litem, "\n".join(result)) - def listType(self, list_string): - listtypes = { - list_string.startswith('*'): 'u', - list_string.startswith('#'): 'o', - (not list_string.startswith('*') and not - list_string.startswith('#')): 'd' - } - return listtypes[True] - def doTagBr(self, tag, input): - return re.compile(r'<(%s)([^>]*?)>(.*)()' % re.escape(tag), + return re.compile(r'<({0})([^>]*?)>(.*)()'.format(re.escape(tag)), re.S).sub(self.doBr, input) def doPBr(self, in_): @@ -723,15 +408,10 @@ def doPBr(self, in_): def doBr(self, match): content = re.sub(r'(.+)(?:(?)|(?))\n(?![#*;:\s|])', r'\1
', match.group(3)) - return '<%s%s>%s%s' % (match.group(1), match.group(2), content, - match.group(4)) + return '<{0}{1}>{2}{3}'.format(match.group(1), match.group(2), content, + match.group(4)) - def block(self, text, head_offset=0): - """ - >>> t = Textile() - >>> Py3 << t.block('h1. foobar baby') - '\\t

foobar baby

' - """ + def block(self, text): if not self.lite: tre = '|'.join(self.btag) else: @@ -740,55 +420,54 @@ def block(self, text, head_offset=0): tag = 'p' atts = cite = graf = ext = '' - c1 = '' out = [] - anon = False for line in text: - pattern = r'^(%s)(%s%s)\.(\.?)(?::(\S+))? (.*)$' % ( - tre, self.align_re_s, self.cslh_re_s - ) - match = re.search(pattern, line, re.S) + pattern = (r'^(?P{0})(?P{1}{2})\.(?P\.?)' + r'(?::(?P\S+))? (?P.*)$'.format(tre, + align_re_s, cls_re_s)) + match = re.search(pattern, line, flags=re.S | re.U) + # tag specified on this line. if match: + tag, atts, ext, cite, content = match.groups() + block = Block(self, **match.groupdict()) + inner_block = generate_tag(block.inner_tag, block.content, + block.inner_atts) + # code tags and raw text won't be indented inside outer_tag. + if block.inner_tag != 'code' and not has_raw_text(inner_block): + inner_block = "\n\t\t{0}\n\t".format(inner_block) if ext: - out.append(out.pop() + c1) - - tag, atts, ext, cite, graf = match.groups() - h_match = re.search(r'h([1-6])', tag) - if h_match: - head_level, = h_match.groups() - tag = 'h%i' % max(1, min(int(head_level) + head_offset, 6)) - o1, o2, content, c2, c1, eat = self.fBlock(tag, atts, ext, - cite, graf) - # leave off c1 if this block is extended, - # we'll close it at the start of the next block - - if ext: - line = "%s%s%s%s" % (o1, o2, content, c2) + line = block.content else: - line = "%s%s%s%s%s" % (o1, o2, content, c2, c1) - + line = generate_tag(block.outer_tag, inner_block, + block.outer_atts) + # pre tags and raw text won't be indented. + if block.outer_tag != 'pre' and not has_raw_text(line): + line = "\t{0}".format(line) + # no tag specified else: - anon = True - if ext or not re.search(r'^\s', line): - o1, o2, content, c2, c1, eat = self.fBlock(tag, atts, ext, - cite, line) - # skip $o1/$c1 because this is part of a continuing - # extended block - if tag == 'p' and not self.hasRawText(content): - line = content + # if we're inside an extended block, add the text from the + # previous extension to the front + if ext: + line = '{0}\n{1}'.format(out.pop(), line) + whitespace = ' \t\n\r\f\v' + if ext or not line[0] in whitespace: + block = Block(self, tag, atts, ext, cite, line) + if block.tag == 'p' and not has_raw_text(block.content): + line = block.content else: - line = "%s%s%s" % (o2, content, c2) + line = generate_tag(block.outer_tag, block.content, + block.outer_atts) + if block.outer_tag != 'pre' and not has_raw_text(line): + line = "\t{0}".format(line) else: line = self.graf(line) line = self.doPBr(line) - line = re.sub(r'
', '
', line) + line = line.replace('
', '
') - if ext and anon: - out.append(out.pop() + "\n" + line) - elif not eat: + if line.strip(): out.append(line) if not ext: @@ -798,151 +477,30 @@ def block(self, text, head_offset=0): graf = '' if ext: - out.append(out.pop() + c1) + out.append(generate_tag(block.outer_tag, out.pop(), + block.outer_atts)) return '\n\n'.join(out) - def fBlock(self, tag, atts, ext, cite, content): - """ - >>> t = Textile() - >>> Py3 << t.fBlock("bq", "", None, "", "Hello BlockQuote") - ('\\t
\\n', '\\t\\t

', 'Hello BlockQuote', '

', '\\n\\t
', False) - - >>> Py3 << t.fBlock("bq", "", None, "http://google.com", "Hello BlockQuote") - ('\\t
\\n', '\\t\\t

', 'Hello BlockQuote', '

', '\\n\\t
', False) - - >>> Py3 << t.fBlock("bc", "", None, "", 'printf "Hello, World";') # doctest: +ELLIPSIS - ('
', '', ..., '', '
', False) - - >>> Py3 << t.fBlock("h1", "", None, "", "foobar") - ('', '\\t

', 'foobar', '

', '', False) - """ - att = atts - atts = self.pba(atts) - o1 = o2 = c2 = c1 = '' - eat = False - - if tag == 'p': - # is this an anonymous block with a note definition? - notedef_re = re.compile(r""" - ^note\# # start of note def marker - ([^%%<*!@#^([{ \s.]+) # !label - ([*!^]?) # !link - (%s) # !att - \.? # optional period. - [\s]+ # whitespace ends def marker - (.*)$ # !content""" % (self.cslh_re_s), re.X) - notedef = notedef_re.sub(self.fParseNoteDefs, content) - - # It will be empty if the regex matched and ate it. - if '' == notedef: - return o1, o2, notedef, c2, c1, True - - m = re.search(r'fn(\d+)', tag) - if m: - tag = 'p' - if m.group(1) in self.fn: - fnid = self.fn[m.group(1)] - else: - fnid = m.group(1) - - # If there is an author-specified ID goes on the wrapper & the - # auto-id gets pushed to the - supp_id = '' - - # if class has not been previously specified, set it to "footnote" - if atts.find('class=') < 0: - atts = atts + ' class="footnote"' - - # if there's no specified id, use the generated one. - if atts.find('id=') < 0: - atts = atts + ' id="fn%s"' % fnid - else: - supp_id = ' id="fn%s"' % fnid - - if att.find('^') < 0: - sup = self.formatFootnote(m.group(1), supp_id) - else: - fnrev = '%s' % (fnid, m.group(1)) - sup = self.formatFootnote(fnrev, supp_id) - - content = sup + ' ' + content - - if tag == 'bq': - cite = self.checkRefs(cite) - if cite: - cite = ' cite="%s"' % cite - else: - cite = '' - o1 = "\t\n" % (cite, atts) - o2 = "\t\t" % atts - c2 = "

" - c1 = "\n\t" - - elif tag == 'bc': - o1 = "" % atts - o2 = "" % atts - c2 = "
" - c1 = "" - content = self.shelve(self.encode_html(content.rstrip("\n") + - "\n")) - - elif tag == 'notextile': - content = self.shelve(content) - o1 = o2 = '' - c1 = c2 = '' - - elif tag == 'pre': - content = self.shelve(self.encode_html(content.rstrip("\n") + - "\n")) - o1 = "" % atts - o2 = c2 = '' - c1 = '' - - elif tag == '###': - eat = True - - else: - o2 = "\t<%s%s>" % (tag, atts) - c2 = "" % tag - - if not eat: - content = self.graf(content) - else: - content = '' - return o1, o2, content, c2, c1, eat - - def formatFootnote(self, marker, atts='', anchor=True): - if anchor: - pattern = self.glyph_definitions['fn_foot_pattern'] - else: - pattern = self.glyph_definitions['fn_ref_pattern'] - return pattern % {'atts': atts, 'marker': marker} - def footnoteRef(self, text): - """ - >>> t = Textile() - >>> Py3 << t.footnoteRef('foo[1] ') # doctest: +ELLIPSIS - 'foo1 ' - """ - return re.compile(r'(?<=\S)\[(\d+)(!?)\](\s)?', re.U).sub( - self.footnoteID, text - ) - - def footnoteID(self, match): - footnoteNum, nolink, space = match.groups() - if not space: - space = '' - backref = ' class="footnote"' - if footnoteNum not in self.fn: - a = uuid.uuid4().hex - self.fn[footnoteNum] = a - backref = '%s id="fnrev%s"' % (backref, a) - footnoteID = self.fn[footnoteNum] - footref = '!' == nolink and footnoteNum or '%s' % ( - footnoteID, footnoteNum - ) - footref = self.formatFootnote(footref, backref, False) - return footref + space + # somehow php-textile gets away with not capturing the space. + return re.compile(r'(?<=\S)\[(?P{0}+)(?P!?)\]' + r'(?P{1}?)'.format(regex_snippets['digit'], + regex_snippets['space']), re.U).sub(self.footnoteID, text) + + def footnoteID(self, m): + fn_att = OrderedDict({'class': 'footnote'}) + if m.group('id') not in self.fn: + self.fn[m.group('id')] = '{0}{1}'.format(self.linkPrefix, + self._increment_link_index()) + fnid = self.fn[m.group('id')] + fn_att['id'] = 'fnrev{0}'.format(fnid) + fnid = self.fn[m.group('id')] + footref = generate_tag('a', m.group('id'), {'href': '#fn{0}'.format( + fnid)}) + if '!' == m.group('nolink'): + footref = m.group('id') + footref = generate_tag('sup', footref, fn_att) + return '{0}{1}'.format(footref, m.group('space')) def glyphs(self, text): """ @@ -961,35 +519,17 @@ def glyphs(self, text): A similar situation occurs for double quotes as well. So, for the first pass, we use the glyph_search_initial set of regexes. For all remaining passes, we use glyph_search - - >>> t = Textile() - - >>> Py3 << t.glyphs("apostrophe's") - 'apostrophe’s' - - >>> Py3 << t.glyphs("back in '88") - 'back in ’88' - - >>> Py3 << t.glyphs('foo ...') - 'foo …' - - >>> Py3 << t.glyphs('--') - '—' - - >>> Py3 << t.glyphs('FooBar[tm]') - 'FooBar™' - - >>> Py3 << t.glyphs("

Cat's Cradle by Vonnegut

") - '

Cat’s Cradle by Vonnegut

' - """ # fix: hackish - text = re.sub(r'"\Z', r'" ', text) + if text.endswith('"'): + text = '{0} '.format(text) + text = text.rstrip('\n') result = [] searchlist = self.glyph_search_initial - for i, line in enumerate(re.compile(r'(<[\w\/!?].*?>)', - re.U).split(text)): + # split the text by any angle-bracketed tags + for i, line in enumerate(re.compile(r'(<[\w\/!?].*?>)', re.U).split( + text)): if not i % 2: for s, r in zip(searchlist, self.glyph_replace): line = s.sub(r, line) @@ -999,16 +539,7 @@ def glyphs(self, text): return ''.join(result) def getRefs(self, text): - """ - Capture and store URL references in self.urlrefs. - - >>> t = Textile() - >>> Py3 << t.getRefs("some text [Google]http://www.google.com") - 'some text ' - >>> Py3 << t.urlrefs - {'Google': 'http://www.google.com'} - - """ + """Capture and store URL references in self.urlrefs.""" pattern = re.compile(r'(?:(?<=^)|(?<=\s))\[(.+)\]((?:http(?:s?):\/\/|\/)\S+)(?=\s|$)', re.U) text = pattern.sub(self.refs, text) @@ -1019,50 +550,19 @@ def refs(self, match): self.urlrefs[flag] = url return '' - def checkRefs(self, url): - return self.urlrefs.get(url, url) - - def isRelURL(self, url): - """ - Identify relative urls. - - >>> t = Textile() - >>> t.isRelURL("http://www.google.com/") - False - >>> t.isRelURL("/foo") - True - - """ - (scheme, netloc) = urlparse(url)[0:2] - return not scheme and not netloc - def relURL(self, url): - """ - >>> t = Textile() - >>> Py3 << t.relURL("http://www.google.com/") - 'http://www.google.com/' - >>> t.restricted = True - >>> Py3 << t.relURL("gopher://gopher.com/") - '#' - - """ scheme = urlparse(url)[0] if scheme and scheme not in self.url_schemes: return '#' return url def shelve(self, text): - itemID = uuid.uuid4().hex + self.refIndex = self.refIndex + 1 + itemID = '{0}{1}:shelve'.format(self.uid, self.refIndex) self.shelf[itemID] = text return itemID def retrieve(self, text): - """ - >>> t = Textile() - >>> id = t.shelve("foobar") - >>> Py3 << t.retrieve(id) - 'foobar' - """ while True: old = text for k, v in self.shelf.items(): @@ -1071,25 +571,6 @@ def retrieve(self, text): break return text - def encode_html(self, text, quotes=True): - """Return text that's safe for an HTML attribute. - >>> t = Textile() - >>> Py3 << t.encode_html('this is a "test" of text that\\\'s safe to put in an attribute.') - 'this is a "test" of text that's safe to put in an <html> attribute.' - """ - a = ( - ('&', '&'), - ('<', '<'), - ('>', '>')) - - if quotes: - a = a + (("'", '''), - ('"', '"')) - - for k, v in a: - text = text.replace(k, v) - return text - def graf(self, text): if not self.lite: text = self.noTextile(text) @@ -1099,9 +580,6 @@ def graf(self, text): text = self.getRefs(text) text = self.links(text) - if self.auto_link: - text = self.autoLink(text) - text = self.links(text) if not self.noimage: text = self.image(text) @@ -1109,7 +587,7 @@ def graf(self, text): if not self.lite: text = self.table(text) text = self.redcloth_list(text) - text = self.lists(text) + text = self.textileLists(text) text = self.span(text) text = self.footnoteRef(text) @@ -1118,89 +596,288 @@ def graf(self, text): return text.rstrip('\n') - def autoLink(self, text): - """ - >>> t = Textile() - >>> Py3 << t.autoLink("http://www.ya.ru") - '"$":http://www.ya.ru' - """ - - pattern = re.compile(r"""\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))""", - re.U | re.I) - return pattern.sub(r'"$":\1', text) - def links(self, text): - """ - >>> t = Textile() - >>> Py3 << t.links('fooobar "Google":http://google.com/foobar/ and hello world "flickr":http://flickr.com/photos/jsamsa/ ') # doctest: +ELLIPSIS - 'fooobar ... and hello world ...' - """ + """For some reason, the part of the regex below that matches the url + does not match a trailing parenthesis. It gets caught by tail, and + we check later to see if it should be included as part of the url.""" + text = self.markStartOfLinks(text) + + return self.replaceLinks(text) + + def markStartOfLinks(self, text): + """Finds and marks the start of well formed links in the input text.""" + # Slice text on '":' boundaries. These always occur in + # inline links between the link text and the url part and are much more + # infrequent than '"' characters so we have less possible links to + # process. + slices = text.split('":') + output = [] + + if len(slices) > 1: + # There are never any start of links in the last slice, so pop it + # off (we'll glue it back later). + last_slice = slices.pop() + + for s in slices: + # Cut this slice into possible starting points wherever we find + # a '"' character. Any of these parts could represent the start + # of the link text - we have to find which one. + possible_start_quotes = s.split('"') + + # Start our search for the start of the link with the closest + # prior quote mark. + possibility = possible_start_quotes.pop() + + # Init the balanced count. If this is still zero at the end of + # our do loop we'll mark the " that caused it to balance as the + # start of the link and move on to the next slice. + balanced = 0 + linkparts = [] + i = 0 + + while balanced is not 0 or i is 0: # pragma: no branch + # Starting at the end, pop off the previous part of the + # slice's fragments. + + # Add this part to those parts that make up the link text. + linkparts.append(possibility) + + if len(possibility) > 0: + # did this part inc or dec the balanced count? + if re.search(r'^\S|=$', possibility, flags=re.U): # pragma: no branch + balanced = balanced - 1 + if re.search(r'\S$', possibility, flags=re.U): # pragma: no branch + balanced = balanced + 1 + possibility = possible_start_quotes.pop() + else: + # If quotes occur next to each other, we get zero + # length strings. eg. ...""Open the door, + # HAL!"":url... In this case we count a zero length in + # the last position as a closing quote and others as + # opening quotes. + if i is 0: + balanced = balanced + 1 + else: + balanced = balanced - 1 + i = i + 1 + + try: + possibility = possible_start_quotes.pop() + except IndexError: + # If out of possible starting segments we back the + # last one from the linkparts array + linkparts.pop() + break + # If the next possibility is empty or ends in a space + # we have a closing ". + if (possibility is '' or possibility.endswith(' ')): + # force search exit + balanced = 0; + + if balanced <= 0: + possible_start_quotes.append(possibility) + break + + # Rebuild the link's text by reversing the parts and sticking + # them back together with quotes. + linkparts.reverse() + link_content = '"'.join(linkparts) + # Rebuild the remaining stuff that goes before the link but + # that's already in order. + pre_link = '"'.join(possible_start_quotes) + # Re-assemble the link starts with a specific marker for the + # next regex. + o = '{0}{1}linkStartMarker:"{2}'.format(pre_link, self.uid, + link_content) + output.append(o) + + # Add the last part back + output.append(last_slice) + # Re-assemble the full text with the start and end markers + text = '":'.join(output) - # For some reason, the part of the regex below that matches the url - # does not match a trailing parenthesis. It gets caught by tail, and - # we check later to see if it should be included as part of the url. - pattern = r''' - (?P
^|(?<=[\s>.\(\|])|[{[])?    # leading text
-            "                                   # opening quote
-            (?P%s)                        # block attributes
-            (?P[^"]+?)                    # link text
-            \s?                                 # optional space
-            (?:\((?P[^)]+?)\)(?="))?     # optional title
-            ":                                  # closing quote, colon
-            (?P<url>%s+?)                       # URL
-            (?P<slash>\/)?                      # slash
-            (?P<post>[^\w\/]*?)                 # trailing text
-            (?P<tail>[\]})]|(?=\s|$|\|))        # tail
-        ''' % (self.cslh_re_s, self.urlchar_re_s)
-
-        text = re.compile(pattern, re.X | re.U).sub(self.fLink, text)
+        return text
 
+    def replaceLinks(self, text):
+        """Replaces links with tokens and stores them on the shelf."""
+        stopchars = r"\s|^'\"*"
+        pattern = r"""
+            (?P<pre>\[)?           # Optionally open with a square bracket eg. Look ["here":url]
+            {0}linkStartMarker:"   # marks start of the link
+            (?P<inner>(?:.|\n)*?)  # grab the content of the inner "..." part of the link, can be anything but
+                                   # do not worry about matching class, id, lang or title yet
+            ":                     # literal ": marks end of atts + text + title block
+            (?P<urlx>[^{1}]*)      # url upto a stopchar
+        """.format(self.uid, stopchars)
+        text = re.compile(pattern, flags=re.X | re.U).sub(self.fLink, text)
         return text
 
-    def fLink(self, match):
-        pre, atts, text, title, url, slash, post, tail = match.groups()
+    def fLink(self, m):
+        in_ = m.group()
+        pre, inner, url = m.groups()
+        pre = pre or ''
+
+        if inner == '':
+            return '{0}"{1}":{2}'.format(pre, inner, url)
+
+        m = re.search(r'''^
+            (?P<atts>{0})                # $atts (if any)
+            {1}*                         # any optional spaces
+            (?P<text>                    # $text is...
+                (!.+!)                   #     an image
+            |                            #   else...
+                .+?                      #     link text
+            )                            # end of $text
+            (?:\((?P<title>[^)]+?)\))?   # $title (if any)
+            $'''.format(cls_re_s, regex_snippets['space']), inner,
+                flags=re.X | re.U)
+
+        atts = m.group('atts') or ''
+        text = m.group('text') or '' or inner
+        title = m.group('title') or ''
+
+        pop, tight = '', ''
+        counts = { '[': None, ']': url.count(']'), '(': None, ')': None }
+
+        # Look for footnotes or other square-bracket delimited stuff at the end
+        # of the url...
+        #
+        # eg. "text":url][otherstuff... will have "[otherstuff" popped back
+        # out.
+        #
+        # "text":url?q[]=x][123]    will have "[123]" popped off the back, the
+        # remaining closing square brackets will later be tested for balance
+        if (counts[']']):
+            m = re.search('(?P<url>^.*\])(?P<tight>\[.*?)$', url, flags=re.U)
+            if m:
+                url, tight = m.groups()
+
+        # Split off any trailing text that isn't part of an array assignment.
+        # eg. "text":...?q[]=value1&q[]=value2 ... is ok
+        # "text":...?q[]=value1]following  ... would have "following" popped
+        # back out and the remaining square bracket will later be tested for
+        # balance
+        if (counts[']']):
+            m = re.search(r'(?P<url>^.*\])(?!=)(?P<end>.*?)$', url, flags=re.U)
+            url = m.group('url')
+            tight = '{0}{1}'.format(m.group('end'), tight)
+
+        # Now we have the array of all the multi-byte chars in the url we will
+        # parse the  uri backwards and pop off  any chars that don't belong
+        # there (like . or , or unmatched brackets of various kinds).
+        first = True
+        popped = True
+
+        counts[']'] = url.count(']')
+        url_chars = list(url)
+
+        def _endchar(c, pop, popped, url_chars, counts, pre):
+            """Textile URL shouldn't end in these characters, we pop them off
+            the end and push them out the back of the url again."""
+            pop = '{0}{1}'.format(c, pop)
+            url_chars.pop()
+            popped = True
+            return pop, popped, url_chars, counts, pre
+
+        def _rightanglebracket(c, pop, popped, url_chars, counts, pre):
+            url_chars.pop()
+            urlLeft = ''.join(url_chars)
+
+            m = re.search(r'(?P<url_chars>.*)(?P<tag><\/[a-z]+)$', urlLeft)
+            url_chars = m.group('url_chars')
+            pop = '{0}{1}{2}'.format(m.group('tag'), c, pop)
+            popped = True
+            return pop, popped, url_chars, counts, pre
+
+        def _closingsquarebracket(c, pop, popped, url_chars, counts, pre):
+            """If we find a closing square bracket we are going to see if it is
+            balanced.  If it is balanced with matching opening bracket then it
+            is part of the URL else we spit it back out of the URL."""
+            # If counts['['] is None, count the occurrences of '[' 
+            counts['['] = counts['['] or url.count('[')
+
+            if counts['['] == counts[']']:
+                # It is balanced, so keep it
+                url_chars.append(c)
+            else:
+                # In the case of un-matched closing square brackets we just eat
+                # it
+                popped = True
+                url_chars.pop()
+                counts[']'] = counts[']'] - 1;
+                if first: # pragma: no branch
+                    pre = ''
+            return pop, popped, url_chars, counts, pre
+
+        def _closingparenthesis(c, pop, popped, url_chars, counts, pre):
+            if counts[')'] is None: # pragma: no branch
+                counts['('] = url.count('(')
+                counts[')'] = url.count(')')
+
+            if counts['('] != counts[')']:
+                # Unbalanced so spit it out the back end
+                popped = True
+                pop = '{0}{1}'.format(url_chars.pop(), pop)
+                counts[')'] = counts[')'] - 1
+            return pop, popped, url_chars, counts, pre
+
+        def _casesdefault(c, pop, popped, url_chars, counts, pre):
+            return pop, popped, url_chars, counts, pre
+
+        cases = {
+                '!': _endchar,
+                '?': _endchar,
+                ':': _endchar,
+                ';': _endchar,
+                '.': _endchar,
+                ',': _endchar,
+                '>': _rightanglebracket,
+                ']': _closingsquarebracket,
+                ')': _closingparenthesis,
+                }
+        for c in url_chars[-1::-1]: # pragma: no branch
+            popped = False
+            pop, popped, url_chars, counts, pre = cases.get(c,
+                    _casesdefault)(c, pop, popped, url_chars, counts, pre)
+            first = False
+            if popped is False:
+                break
 
-        if not pre:
-            pre = ''
+        url = ''.join(url_chars)
+        uri_parts = urlsplit(url)
 
-        if not slash:
-            slash = ''
+        scheme_in_list = uri_parts.scheme in self.url_schemes
+        valid_scheme = (uri_parts.scheme and scheme_in_list)
+        if not is_valid_url(url) and not valid_scheme:
+            return in_.replace('{0}linkStartMarker:'.format(self.uid), '')
 
         if text == '$':
-            text = re.sub(r'^\w+://(.+)', r'\1', url)
-
-        # assume ) at the end of the url is not actually part of the url
-        # unless the url also contains a (
-        if tail == ')' and url.find('(') > -1:
-            url = url + tail
-            tail = None
-
-        url = self.checkRefs(url)
-        try:
-            url = self.encode_url(url)
-        except:
-            pass
+            text = url
+            if "://" in text:
+                text = text.split("://")[1]
+            else:
+                text = text.split(":")[1]
 
-        atts = self.pba(atts)
-        if title:
-            atts = atts + ' title="%s"' % self.encode_html(title)
+        text = text.strip()
+        title = encode_html(title)
 
-        if not self.noimage:
+        if not self.noimage: # pragma: no branch
             text = self.image(text)
-
         text = self.span(text)
         text = self.glyphs(text)
+        url = self.shelveURL(self.encode_url(urlunsplit(uri_parts)))
+        attributes = parse_attributes(atts)
+        if title:
+            attributes['title'] = title
+        attributes['href'] = url
+        if self.rel:
+            attributes['rel'] = self.rel
+        a_text = generate_tag('a', text, attributes)
+        a_shelf_id = self.shelve(a_text)
 
-        url = self.relURL(url) + slash
-        out = '<a href="%s"%s%s>%s</a>' % (self.encode_html(url), atts,
-                                           self.rel, text)
-
-        if (pre and not tail) or (tail and not pre):
-            out = ''.join([pre, out, post, tail])
-            post = ''
+        out = '{0}{1}{2}{3}'.format(pre, a_shelf_id, pop, tight)
 
-        out = self.shelve(out)
-        return ''.join([out, post])
+        return out
 
     def encode_url(self, url):
         """
@@ -1211,19 +888,23 @@ def encode_url(self, url):
             http://stackoverflow.com/a/804380/72656
         """
         # turn string into unicode
-        if not isinstance(url, unicode):
+        if not isinstance(url, six.text_type):
             url = url.decode('utf8')
 
         # parse it
         parsed = urlsplit(url)
 
-        # divide the netloc further
-        netloc_pattern = re.compile(r"""
-            (?:(?P<user>[^:@]+)(?::(?P<password>[^:@]+))?@)?
-            (?P<host>[^:]+)
-            (?::(?P<port>[0-9]+))?
-        """, re.X | re.U)
-        netloc_parsed = netloc_pattern.match(parsed.netloc).groupdict()
+        if parsed.netloc:
+            # divide the netloc further
+            netloc_pattern = re.compile(r"""
+                (?:(?P<user>[^:@]+)(?::(?P<password>[^:@]+))?@)?
+                (?P<host>[^:]+)
+                (?::(?P<port>[0-9]+))?
+            """, re.X | re.U)
+            netloc_parsed = netloc_pattern.match(parsed.netloc).groupdict()
+        else:
+            netloc_parsed = {'user': '', 'password': '', 'host': '', 'port':
+                    ''}
 
         # encode each component
         scheme = parsed.scheme
@@ -1242,42 +923,41 @@ def encode_url(self, url):
         # put it back together
         netloc = ''
         if user:
-            netloc += user
+            netloc = '{0}{1}'.format(netloc, user)
             if password:
-                netloc += ':' + password
-            netloc += '@'
-        netloc += host
+                netloc = '{0}:{1}'.format(netloc, password)
+            netloc = '{0}@'.format(netloc)
+        netloc = '{0}{1}'.format(netloc, host)
         if port:
-            netloc += ':'+port
+            netloc = '{0}:{1}'.format(netloc, port)
         return urlunsplit((scheme, netloc, path, query, fragment))
 
     def span(self, text):
-        """
-        >>> t = Textile()
-        >>> Py3 << t.span(r"hello %(bob)span *strong* and **bold**% goodbye")
-        'hello <span class="bob">span <strong>strong</strong> and <b>bold</b></span> goodbye'
-        """
         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
                  r'_', r'%', r'\+', r'~', r'\^')
-        pnct = ".,\"'?!;:("
-
-        for qtag in qtags:
-            pattern = re.compile(r"""
-                (?:^|(?<=[\s>%(pnct)s])|([\[{]))
-                (%(qtag)s)(?!%(qtag)s)
-                (%(c)s)
-                (?::\(([^)]+?)\))?
-                ([^\s%(qtag)s]+|\S[^%(qtag)s\n]*[^\s%(qtag)s\n])
-                ([%(pnct)s]*)
-                %(qtag)s
-                (?:$|([\]}])|(?=%(selfpnct)s{1,2}|\s))
-            """ % {'qtag': qtag, 'c': self.cslh_re_s, 'pnct': pnct,
-                   'selfpnct': self.pnct_re_s}, re.X)
-            text = pattern.sub(self.fSpan, text)
+        pnct = r""".,"'?!;:‹›«»„“”‚‘’"""
+        self.span_depth = self.span_depth + 1
+
+        if self.span_depth <= self.max_span_depth:
+            for tag in qtags:
+                pattern = re.compile(r"""
+                    (?P<pre>^|(?<=[\s>{pnct}\(])|[{{[])
+                    (?P<tag>{tag})(?!{tag})
+                    (?P<atts>{cls})
+                    (?!{tag})
+                    (?::(?P<cite>\S+[^{tag}]{space}))?
+                    (?P<content>[^{space}{tag}]+|\S.*?[^\s{tag}\n])
+                    (?P<end>[{pnct}]*)
+                    {tag}
+                    (?P<tail>$|[\[\]}}<]|(?=[{pnct}]{{1,2}}[^0-9]|\s|\)))
+                """.format(**{'tag': tag, 'cls': cls_re_s, 'pnct': pnct,
+                    'space': regex_snippets['space']}), flags=re.X | re.U)
+                text = pattern.sub(self.fSpan, text)
+        self.span_depth = self.span_depth - 1
         return text
 
     def fSpan(self, match):
-        _, tag, atts, cite, content, end, _ = match.groups()
+        pre, tag, atts, cite, content, end, tail = match.groups()
 
         qtags = {
             '*':  'strong',
@@ -1293,42 +973,37 @@ def fSpan(self, match):
         }
 
         tag = qtags[tag]
-        atts = self.pba(atts)
+        atts = pba(atts)
         if cite:
-            atts = atts + ' cite="%s"' % cite
+            atts = '{0} cite="{1}"'.format(atts, cite.rstrip())
 
         content = self.span(content)
 
-        out = "<%s%s>%s%s</%s>" % (tag, atts, content, end, tag)
+        out = "<{0}{1}>{2}{3}</{4}>".format(tag, atts, content, end, tag)
+        if pre and not tail or tail and not pre:
+            out = '{0}{1}{2}'.format(pre, out, tail)
         return out
 
     def image(self, text):
-        """
-        >>> t = Textile()
-        >>> Py3 << t.image('!/imgs/myphoto.jpg!:http://jsamsa.com')
-        '<a href="http://jsamsa.com" class="img"><img alt="" src="/imgs/myphoto.jpg" /></a>'
-        >>> Py3 << t.image('!</imgs/myphoto.jpg!')
-        '<img align="left" alt="" src="/imgs/myphoto.jpg" />'
-        """
         pattern = re.compile(r"""
-            (?:[\[{])?         # pre
-            \!                 # opening !
-            (\<|\=|\>)?        # optional alignment atts
-            (%s)               # optional style,class atts
-            (?:\. )?           # optional dot-space
-            ([^\s(!]+)         # presume this is the src
-            \s?                # optional space
-            (?:\(([^\)]+)\))?  # optional title
-            \!                 # closing
-            (?::(\S+))?        # optional href
-            (?:[\]}]|(?=\s|$)) # lookahead: space or end of string
-        """ % self.cslh_re_s, re.U | re.X)
+            (?:[\[{{])?         # pre
+            \!                  # opening !
+            (\<|\=|\>)?         # optional alignment atts
+            ({0})               # optional style,class atts
+            (?:\. )?            # optional dot-space
+            ([^\s(!]+)          # presume this is the src
+            \s?                 # optional space
+            (?:\(([^\)]+)\))?   # optional title
+            \!                  # closing
+            (?::(\S+))?         # optional href
+            (?:[\]}}]|(?=\s|$)) # lookahead: space or end of string
+        """.format(cls_re_s), re.U | re.X)
         return pattern.sub(self.fImage, text)
 
     def fImage(self, match):
         # (None, '', '/imgs/myphoto.jpg', None, None)
-        align, atts, url, title, href = match.groups()
-        atts = self.pba(atts)
+        align, attributes, url, title, href = match.groups()
+        atts = OrderedDict()
         size = None
 
         alignments = {'<': 'left', '=': 'center', '>': 'right'}
@@ -1336,36 +1011,33 @@ def fImage(self, match):
         if not title:
             title = ''
 
-        if not self.isRelURL(url) and self.get_sizes:
+        if not is_rel_url(url) and self.get_sizes:
             size = imagesize.getimagesize(url)
 
         if href:
-            href = self.checkRefs(href)
+            href = self.shelveURL(href)
 
-        url = self.checkRefs(url)
-        url = self.relURL(url)
+        url = self.shelveURL(url)
 
-        out = []
-        if href:
-            out.append('<a href="%s" class="img">' % href)
-        out.append('<img')
         if align:
-            out.append(' align="%s"' % alignments[align])
-        out.append(' alt="%s"' % title)
+            atts.update(align=alignments[align])
+        atts.update(alt=title)
         if size:
-            out.append(' height="%s"' % size[1])
-        out.append(' src="%s"' % url)
-        if atts:
-            out.append(atts)
+            atts.update(height=six.text_type(size[1]))
+        atts.update(src=url)
+        if attributes:
+            atts.update(parse_attributes(attributes))
         if title:
-            out.append(' title="%s"' % title)
+            atts.update(title=title)
         if size:
-            out.append(' width="%s"' % size[0])
-        out.append(' />')
+            atts.update(width=six.text_type(size[0]))
+        img = generate_tag('img', ' /', atts)
         if href:
-            out.append('</a>')
-
-        return ''.join(out)
+            a_atts = OrderedDict(href=href)
+            if self.rel:
+                a_atts.update(rel=self.rel)
+            img = generate_tag('a', img, a_atts)
+        return img
 
     def code(self, text):
         text = self.doSpecial(text, '<code>', '</code>', self.fCode)
@@ -1375,25 +1047,22 @@ def code(self, text):
 
     def fCode(self, match):
         before, text, after = match.groups()
-        if after is None:
-            after = ''
+        after = after or ''
         # text needs to be escaped
-        if not self.restricted:
-            text = self.encode_html(text, quotes=False)
-        return ''.join([before, self.shelve('<code>%s</code>' % text), after])
+        text = encode_html(text, quotes=False)
+        return ''.join([before, self.shelve('<code>{0}</code>'.format(text)), after])
 
     def fPre(self, match):
         before, text, after = match.groups()
         if after is None:
             after = ''
         # text needs to be escaped
-        if not self.restricted:
-            text = self.encode_html(text)
+        text = encode_html(text)
         return ''.join([before, '<pre>', self.shelve(text), '</pre>', after])
 
     def doSpecial(self, text, start, end, method):
-        pattern = re.compile(r'(^|\s|[\[({>|])%s(.*?)%s($|[\])}])?'
-                             % (re.escape(start), re.escape(end)), re.M | re.S)
+        pattern = re.compile(r'(^|\s|[\[({{>|]){0}(.*?){1}($|[\])}}])?'.format(
+            re.escape(start), re.escape(end)), re.M | re.S)
         return pattern.sub(method, text)
 
     def noTextile(self, text):
@@ -1403,7 +1072,7 @@ def noTextile(self, text):
 
     def fTextile(self, match):
         before, notextile, after = match.groups()
-        if after is None:
+        if after is None: # pragma: no branch
             after = ''
         return ''.join([before, self.shelve(notextile), after])
 
@@ -1423,31 +1092,28 @@ def fParseHTMLComments(self, match):
         """
         before, commenttext, after = match.groups()
         commenttext = self.shelve(commenttext)
-        return '<!--%s-->' % commenttext
+        return '{0}<!--{1}-->'.format(before, commenttext)
 
     def redcloth_list(self, text):
         """Parse the text for definition lists and send them to be
         formatted."""
-        pattern = re.compile(r"^([-]+%s[ .].*:=.*)$(?![^-])" % self.csl_re_s,
-                             re.M | re.U | re.S)
+        pattern = re.compile(r"^([-]+{0}[ .].*:=.*)$(?![^-])".format(cls_re_s),
+                re.M | re.U | re.S)
         return pattern.sub(self.fRCList, text)
 
     def fRCList(self, match):
         """Format a definition list."""
         out = []
-        try:
-            text = re.split(r'\n(?=[-])', match.group(), flags=re.M)
-        except TypeError:
-            text = re.compile(r'\n(?=[-])', re.M).split(match.group())
+        text = re.split(r'\n(?=[-])', match.group(), flags=re.M)
         for line in text:
             # parse the attributes and content
-            m = re.match(r'^[-]+(%s)[ .](.*)$' % self.csl_re_s, line, re.M |
-                    re.S)
+            m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line,
+                    flags=re.M | re.S)
 
             atts, content = m.groups()
             # cleanup
             content = content.strip()
-            atts = self.pba(atts)
+            atts = pba(atts)
 
             # split the content into the term and definition
             xm = re.match(r'^(.*?)[\s]*:=(.*?)[\s]*(=:|:=)?[\s]*$', content,
@@ -1460,21 +1126,21 @@ def fRCList(self, match):
             # if this is the first time through, out as a bool is False
             if not out:
                 if definition == '':
-                    dltag = "<dl%s>" % atts
+                    dltag = "<dl{0}>".format(atts)
                 else:
                     dltag = "<dl>"
                 out.append(dltag)
 
             if definition != '' and term != '':
                 if definition.startswith('\n'):
-                    definition = '<p>%s</p>' % definition.lstrip()
+                    definition = '<p>{0}</p>'.format(definition.lstrip())
                 definition = definition.replace('\n', '<br />').strip()
 
                 term = self.graf(term)
                 definition = self.graf(definition)
 
-                out.extend(['\t<dt%s>%s</dt>' % (atts, term), '\t<dd>%s</dd>' %
-                           definition])
+                out.extend(['\t<dt{0}>{1}</dt>'.format(atts, term),
+                    '\t<dd>{0}</dd>'.format(definition)])
 
         out.append('</dl>')
         out = '\n'.join(out)
@@ -1492,12 +1158,12 @@ def placeNoteLists(self, text):
                 else:
                     self.unreferencedNotes[label] = info
 
-            if o:
+            if o: # pragma: no branch
                 # sort o by key
                 o = OrderedDict(sorted(o.items(), key=lambda t: t[0]))
             self.notes = o
-        text_re = re.compile('<p>notelist(%s)(?:\:([\w|%s]))?([\^!]?)(\+?)\.?[\s]*</p>'
-                             % (self.cslh_re_s, self.syms_re_s), re.U)
+        text_re = re.compile('<p>notelist({0})(?:\:([\w|{1}]))?([\^!]?)(\+?)'
+                '\.?[\s]*</p>'.format(cls_re_s, syms_re_s), re.U)
         text = text_re.sub(self.fNoteLists, text)
         return text
 
@@ -1505,12 +1171,12 @@ def fNoteLists(self, match):
         """Given the text that matches as a note, format it into HTML."""
         att, start_char, g_links, extras = match.groups()
         start_char = start_char or 'a'
-        index = '%s%s%s' % (g_links, extras, start_char)
+        index = '{0}{1}{2}'.format(g_links, extras, start_char)
         result = ''
 
-        if index not in self.notelist_cache:
+        if index not in self.notelist_cache: # pragma: no branch
             o = []
-            if self.notes:
+            if self.notes: # pragma: no branch
                 for seq, info in self.notes.items():
                     links = self.makeBackrefLink(info, g_links, start_char)
                     atts = ''
@@ -1518,24 +1184,23 @@ def fNoteLists(self, match):
                         infoid = info['id']
                         atts = info['def']['atts']
                         content = info['def']['content']
-                        li = ("""\t<li%s>%s<span id="note%s"> </span>%s</li>"""
-                              % (atts, links, infoid, content))
+                        li = ('\t\t<li{0}>{1}<span id="note{2}"> '
+                                '</span>{3}</li>').format(atts, links, infoid,
+                                        content)
                     else:
-                        li = ("""\t<li%s>%s Undefined Note [#%s].<li>""" %
-                              (atts, links, info['seq']))
+                        li = ('\t\t<li{0}>{1} Undefined Note [#{2}].<li>'
+                                ).format(atts, links, info['seq'])
                     o.append(li)
             if '+' == extras and self.unreferencedNotes:
                 for seq, info in self.unreferencedNotes.items():
-                    if info['def']:
-                        atts = info['def']['atts']
-                        content = info['def']['content']
-                        li = """\t<li%s>%s</li>""" % (atts, content)
+                    atts = info['def']['atts']
+                    content = info['def']['content']
+                    li = '\t\t<li{0}>{1}</li>'.format(atts, content)
                     o.append(li)
             self.notelist_cache[index] = "\n".join(o)
             result = self.notelist_cache[index]
-        if result:
-            list_atts = self.pba(att)
-            result = """<ol%s>\n%s\n</ol>""" % (list_atts, result)
+        list_atts = pba(att)
+        result = '<ol{0}>\n{1}\n\t</ol>'.format(list_atts, result)
         return result
 
     def makeBackrefLink(self, info, g_links, i):
@@ -1544,50 +1209,54 @@ def makeBackrefLink(self, info, g_links, i):
         if 'def' in info:
             link = info['def']['link']
         backlink_type = link or g_links
-        i_ = self.encode_high(i)
-        allow_inc = i not in self.syms_re_s
+        i_ = encode_high(i)
+        allow_inc = i not in syms_re_s
         i_ = int(i_)
 
         if backlink_type == "!":
             return ''
         elif backlink_type == '^':
-            return """<sup><a href="#noteref%s">%s</a></sup>""" % (
+            return """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
                 info['refids'][0], i)
         else:
             result = []
             for refid in info['refids']:
-                i_entity = self.decode_high(i_)
-                sup = """<sup><a href="#noteref%s">%s</a></sup>""" % (refid,
-                        i_entity)
+                i_entity = decode_high(i_)
+                sup = """<sup><a href="#noteref{0}">{1}</a></sup>""".format(
+                        refid, i_entity)
                 if allow_inc:
-                    i_ += 1
+                    i_ = i_ + 1
                 result.append(sup)
             result = ' '.join(result)
             return result
 
     def fParseNoteDefs(self, m):
         """Parse the note definitions and format them as HTML"""
-        label, link, att, content = m.groups()
+        label = m.group('label')
+        link = m.group('link')
+        att = m.group('att')
+        content = m.group('content')
 
         # Assign an id if the note reference parse hasn't found the label yet.
         if label not in self.notes:
-            self.notes[label] = {'id': uuid.uuid4().hex}
+            self.notes[label] = {'id': '{0}{1}'.format(self.linkPrefix,
+                self._increment_link_index())}
 
         # Ignores subsequent defs using the same label
-        if 'def' not in self.notes[label]:
-            self.notes[label]['def'] = {'atts': self.pba(att), 'content':
-                                        self.graf(content), 'link': link}
+        if 'def' not in self.notes[label]: # pragma: no branch
+            self.notes[label]['def'] = {'atts': pba(att), 'content':
+                    self.graf(content), 'link': link}
         return ''
 
     def noteRef(self, text):
         """Search the text looking for note references."""
         text_re = re.compile(r"""
         \[          # start
-        (%s)        # !atts
+        ({0})       # !atts
         \#
-        ([^\]!]+)  # !label
+        ([^\]!]+)   # !label
         ([!]?)      # !nolink
-        \]""" % self.cslh_re_s, re.X)
+        \]""".format(cls_re_s), re.X)
         text = text_re.sub(self.fParseNoteRefs, text)
         return text
 
@@ -1597,7 +1266,7 @@ def fParseNoteRefs(self, match):
         processed into the notes array. So now we can resolve the link numbers
         in the order we process the refs..."""
         atts, label, nolink = match.groups()
-        atts = self.pba(atts)
+        atts = pba(atts)
         nolink = nolink == '!'
 
         # Assign a sequence number to this reference if there isn't one already
@@ -1608,57 +1277,70 @@ def fParseNoteRefs(self, match):
                 'seq': self.note_index, 'refids': [], 'id': ''
             }
             num = self.note_index
-            self.note_index += 1
+            self.note_index = self.note_index + 1
 
         # Make our anchor point and stash it for possible use in backlinks when
         # the note list is generated later...
-        refid = uuid.uuid4().hex
+        refid = '{0}{1}'.format(self.linkPrefix, self._increment_link_index())
         self.notes[label]['refids'].append(refid)
 
         # If we are referencing a note that hasn't had the definition parsed
         # yet, then assign it an ID...
         if not self.notes[label]['id']:
-            self.notes[label]['id'] = uuid.uuid4().hex
+            self.notes[label]['id'] = '{0}{1}'.format(self.linkPrefix,
+                    self._increment_link_index())
         labelid = self.notes[label]['id']
 
         # Build the link (if any)...
-        result = '<span id="noteref%s">%s</span>' % (refid, num)
+        result = '<span id="noteref{0}">{1}</span>'.format(refid, num)
         if not nolink:
-            result = """<a href="#note%s">%s</a>""" % (labelid, result)
+            result = '<a href="#note{0}">{1}</a>'.format(labelid, result)
 
         # Build the reference...
-        result = '<sup%s>%s</sup>' % (atts, result)
+        result = '<sup{0}>{1}</sup>'.format(atts, result)
         return result
 
-    def encode_high(self, text):
-        """Encode the text so that it is an appropriate HTML entity."""
-        return ord(text)
+    def shelveURL(self, text):
+        if text == '':
+            return ''
+        self.refIndex = self.refIndex + 1
+        self.refCache[self.refIndex] = text
+        output = '{0}{1}{2}'.format(self.uid, self.refIndex, ':url')
+        return output
+
+    def retrieveURLs(self, text):
+        return re.sub(r'{0}(?P<token>[0-9]+):url'.format(self.uid), self.retrieveURL, text)
+
+    def retrieveURL(self, match):
+        url = self.refCache.get(int(match.group('token')), '')
+        if url is '':
+            return url
+
+        if url in self.urlrefs:
+            url = self.urlrefs[url]
+
+        return url
 
-    def decode_high(self, text):
-        """Decode encoded HTML entities."""
-        h = HTMLParser()
-        text = '&#%s;' % text
-        return h.unescape(text)
+    def _increment_link_index(self):
+        """The self.linkIndex property needs to be incremented in various
+        places.  Don't Repeat Yourself."""
+        self.linkIndex = self.linkIndex + 1
+        return self.linkIndex
 
 
-def textile(text, head_offset=0, html_type='xhtml', auto_link=False,
-            encoding=None, output=None):
+def textile(text, html_type='xhtml', encoding=None, output=None):
     """
     Apply Textile to a block of text.
 
     This function takes the following additional parameters:
 
-    auto_link - enable automatic linking of URLs (default: False)
-    head_offset - offset to apply to heading levels (default: 0)
     html_type - 'xhtml' or 'html5' style tags (default: 'xhtml')
 
     """
-    return Textile(auto_link=auto_link, html_type=html_type).parse(text,
-            head_offset=head_offset)
+    return Textile(html_type=html_type).parse(text)
 
 
-def textile_restricted(text, lite=True, noimage=True, html_type='xhtml',
-                       auto_link=False):
+def textile_restricted(text, lite=True, noimage=True, html_type='xhtml'):
     """
     Apply Textile to a block of text, with restrictions designed for weblog
     comments and other untrusted input.  Raw HTML is escaped, style attributes
@@ -1666,22 +1348,11 @@ def textile_restricted(text, lite=True, noimage=True, html_type='xhtml',
 
     This function takes the following additional parameters:
 
-    auto_link - enable automatic linking of URLs (default: False)
     html_type - 'xhtml' or 'html5' style tags (default: 'xhtml')
     lite - restrict block tags to p, bq, and bc, disable tables (default: True)
     noimage - disable image tags (default: True)
 
     """
     return Textile(restricted=True, lite=lite, noimage=noimage,
-            auto_link=auto_link, html_type=html_type).parse( text,
-                    rel='nofollow')
-
-
-def setup_module(mod):
-    """Inject Py3 to builtins for doctests."""
-    try:
-        import builtins
-    except ImportError:
-        import __builtin__ as builtins
-    from textile.tools.doctest_utils import Py3
-    builtins.Py3 = Py3
+            html_type=html_type, rel='nofollow').parse(
+                    text)
diff --git a/textile/objects/__init__.py b/textile/objects/__init__.py
new file mode 100644
index 00000000..b2373809
--- /dev/null
+++ b/textile/objects/__init__.py
@@ -0,0 +1,4 @@
+from .block import Block
+from .table import Table
+
+__all__ = ['Block', 'Table']
diff --git a/textile/objects/block.py b/textile/objects/block.py
new file mode 100644
index 00000000..7b46bc1f
--- /dev/null
+++ b/textile/objects/block.py
@@ -0,0 +1,123 @@
+try:
+    from collections import OrderedDict
+except ImportError:
+    from ordereddict import OrderedDict
+try:
+    import regex as re
+except ImportError:
+    import re
+
+from textile.regex_strings import cls_re_s, regex_snippets
+from textile.utils import encode_html, generate_tag, parse_attributes
+
+
+class Block(object):
+    def __init__(self, textile, tag, atts, ext, cite, content):
+        self.textile = textile
+        self.tag = tag
+        self.atts = atts
+        self.ext = ext
+        self.cite = cite
+        self.content = content
+
+        self.attributes = parse_attributes(atts)
+        self.outer_tag = ''
+        self.inner_tag = ''
+        self.outer_atts = OrderedDict()
+        self.inner_atts = OrderedDict()
+        self.eat = False
+        self.process()
+
+    def process(self):
+        if self.tag == 'p':
+            # is this an anonymous block with a note definition?
+            notedef_re = re.compile(r"""
+            ^note\#                               # start of note def marker
+            (?P<label>[^%<*!@\#^([{{ {space}.]+)  # label
+            (?P<link>[*!^]?)                      # link
+            (?P<att>{cls})                        # att
+            \.?                                   # optional period.
+            [{space}]+                            # whitespace ends def marker
+            (?P<content>.*)$                      # content""".format(
+                space=regex_snippets['space'], cls=cls_re_s),
+            flags=re.X | re.U)
+            notedef = notedef_re.sub(self.textile.fParseNoteDefs, self.content)
+
+            # It will be empty if the regex matched and ate it.
+            if '' == notedef:
+                self.content = notedef
+
+        fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
+                self.tag, flags=re.U)
+        if fns:
+            self.tag = 'p'
+            fnid = self.textile.fn.get(fns.group('fnid'), None)
+            if fnid is None:
+                fnid = '{0}{1}'.format(self.textile.linkPrefix,
+                        self.textile._increment_link_index())
+
+            # If there is an author-specified ID goes on the wrapper & the
+            # auto-id gets pushed to the <sup>
+            supp_id = OrderedDict()
+
+            # if class has not been previously specified, set it to "footnote"
+            if 'class' not in self.attributes:
+                self.attributes.update({'class': 'footnote'})
+
+            # if there's no specified id, use the generated one.
+            if 'id' not in self.attributes:
+                self.attributes.update({'id': 'fn{0}'.format(fnid)})
+            else:
+                supp_id = parse_attributes('(#fn{0})'.format(fnid))
+
+
+            if '^' not in self.atts:
+                sup = generate_tag('sup', fns.group('fnid'), supp_id)
+            else:
+                fnrev = generate_tag('a', fns.group('fnid'), {'href':
+                    '#fnrev{0}'.format(fnid)})
+                sup = generate_tag('sup', fnrev, supp_id)
+
+            self.content = '{0} {1}'.format(sup, self.content)
+
+        if self.tag == 'bq':
+            if self.cite:
+                self.cite = self.textile.shelveURL(self.cite)
+                cite_att = OrderedDict(cite=self.cite)
+                self.cite = ' cite="{0}"'.format(self.cite)
+            else:
+                self.cite = ''
+                cite_att = OrderedDict()
+            cite_att.update(self.attributes)
+            self.outer_tag = 'blockquote'
+            self.outer_atts = cite_att
+            self.inner_tag = 'p'
+            self.inner_atts = self.attributes
+            self.eat = False
+
+        elif self.tag == 'bc' or self.tag == 'pre':
+            i_tag = ''
+            if self.tag == 'bc':
+                i_tag = 'code'
+            self.content = self.textile.shelve(encode_html('{0}\n'.format(
+                self.content.rstrip("\n"))))
+            self.outer_tag = 'pre'
+            self.outer_atts = self.attributes
+            self.inner_tag = i_tag
+            self.inner_atts = self.attributes
+            self.eat = False
+
+        elif self.tag == 'notextile':
+            self.content = self.textile.shelve(self.content)
+
+        elif self.tag == '###':
+            self.eat = True
+
+        else:
+            self.outer_tag = self.tag
+            self.outer_atts = self.attributes
+
+        if not self.eat:
+            self.content = self.textile.graf(self.content)
+        else:
+            self.content = ''
diff --git a/textile/objects/table.py b/textile/objects/table.py
new file mode 100644
index 00000000..f6940985
--- /dev/null
+++ b/textile/objects/table.py
@@ -0,0 +1,227 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import six
+from xml.etree import ElementTree
+
+from textile.regex_strings import (align_re_s, cls_re_s, regex_snippets,
+        table_span_re_s, valign_re_s)
+from textile.utils import encode_html, generate_tag, parse_attributes
+
+try:
+    import regex as re
+except ImportError:
+    import re
+
+
+class Table(object):
+    def __init__(self, textile, tatts, rows, summary):
+        self.textile = textile
+        self.attributes = parse_attributes(tatts, 'table')
+        if summary:
+            self.attributes.update(summary=summary.strip())
+        self.input = rows
+        self.caption = ''
+        self.colgroup = ''
+        self.content = []
+
+    def process(self):
+        rgrp = None
+        groups = []
+        if self.input[-1] == '|': # pragma: no branch
+            self.input = '{0}\n'.format(self.input)
+        split = self.input.split('|\n')
+        for i, row in enumerate([x for x in split if x]):
+            row = row.lstrip()
+
+            # Caption -- only occurs on row 1, otherwise treat '|=. foo |...'
+            # as a normal center-aligned cell.
+            if i == 0 and row[:2] == '|=':
+                captionpattern = (r"^\|\=(?P<capts>{s}{a}{c})\. "
+                        r"(?P<cap>[^\n]*)(?P<row>.*)".format(**{'s':
+                            table_span_re_s, 'a': align_re_s, 'c': cls_re_s}))
+                caption_re = re.compile(captionpattern, re.S)
+                cmtch = caption_re.match(row)
+                caption = Caption(**cmtch.groupdict())
+                self.caption = '\n{0}'.format(caption.caption)
+                row = cmtch.group('row').lstrip()
+                if row == '':
+                    continue
+
+            # Colgroup -- A colgroup row will not necessarily end with a |.
+            # Hence it may include the next row of actual table data.
+            if row[:2] == '|:':
+                if '\n' in row:
+                    colgroup_data, row = row[2:].split('\n')
+                else:
+                    colgroup_data, row = row[2:], ''
+                colgroup_atts, cols = colgroup_data, None
+                if '|' in colgroup_data:
+                    colgroup_atts, cols = colgroup_data.split('|', 1)
+                colgrp = Colgroup(cols, colgroup_atts)
+                self.colgroup = colgrp.process()
+                if row == '':
+                    continue
+
+            # search the row for a table group - thead, tfoot, or tbody
+            grpmatchpattern = (r"(:?^\|(?P<part>{v})(?P<rgrpatts>{s}{a}{c})"
+                    r"\.\s*$\n)?^(?P<row>.*)").format(**{'v': valign_re_s, 's':
+                        table_span_re_s, 'a': align_re_s, 'c': cls_re_s})
+            grpmatch_re = re.compile(grpmatchpattern, re.S | re.M)
+            grpmatch = grpmatch_re.match(row.lstrip())
+
+            grptypes = {'^': Thead, '~': Tfoot, '-': Tbody}
+            if grpmatch.group('part'):
+                # we're about to start a new group, so process the current one
+                # and add it to the output
+                if rgrp:
+                    groups.append('\n\t{0}'.format(rgrp.process()))
+                rgrp = grptypes[grpmatch.group('part')](grpmatch.group(
+                    'rgrpatts'))
+            row = grpmatch.group('row')
+
+            rmtch = re.search(r'^(?P<ratts>{0}{1}\. )(?P<row>.*)'.format(
+                align_re_s, cls_re_s), row.lstrip())
+            if rmtch:
+                row_atts = parse_attributes(rmtch.group('ratts'), 'tr')
+                row = rmtch.group('row')
+            else:
+                row_atts = {}
+
+            # create a row to hold the cells.
+            r = Row(row_atts, row)
+            for cellctr, cell in enumerate(row.split('|')[1:]):
+                ctag = 'td'
+                if cell.startswith('_'):
+                    ctag = 'th'
+
+                cmtch = re.search(r'^(?P<catts>_?{0}{1}{2}\. )'
+                        '(?P<cell>.*)'.format(table_span_re_s, align_re_s,
+                            cls_re_s), cell, flags=re.S)
+                if cmtch:
+                    catts = cmtch.group('catts')
+                    cell_atts = parse_attributes(catts, 'td')
+                    cell = cmtch.group('cell')
+                else:
+                    cell_atts = {}
+
+                if not self.textile.lite:
+                    a_pattern = r'(?P<space>{0}*)(?P<cell>.*)'.format(
+                            regex_snippets['space'])
+                    a = re.search(a_pattern, cell, flags=re.S)
+                    cell = self.textile.redcloth_list(a.group('cell'))
+                    cell = self.textile.textileLists(cell)
+                    cell = '{0}{1}'.format(a.group('space'), cell)
+
+                # create a cell
+                c = Cell(ctag, cell, cell_atts)
+                cline_tag = '\n\t\t\t{0}'.format(c.process())
+                # add the cell to the row
+                r.cells.append(self.textile.doTagBr(ctag, cline_tag))
+
+            # if we're in a group, add it to the group's rows, else add it
+            # directly to the content
+            if rgrp:
+                rgrp.rows.append(r.process())
+            else:
+                self.content.append(r.process())
+
+        # if there's still an rgrp, process it and add it to the output
+        if rgrp:
+            groups.append('\n\t{0}'.format(rgrp.process()))
+
+        content = '{0}{1}{2}{3}\n\t'.format(self.caption, self.colgroup,
+                ''.join(groups), ''.join(self.content))
+        tbl = generate_tag('table', content, self.attributes)
+        return '\t{0}\n\n'.format(tbl)
+
+
+class Caption(object):
+    def __init__(self, capts, cap, row):
+        self.attributes = parse_attributes(capts)
+        self.caption = self.process(cap)
+
+    def process(self, cap):
+        tag = generate_tag('caption', cap, self.attributes)
+        return '\t{0}\n\t'.format(tag)
+
+
+class Colgroup(object):
+    def __init__(self, cols, atts):
+        self.row = ''
+        self.attributes = atts
+        self.cols = cols
+
+    def process(self):
+        enc = 'unicode'
+        if six.PY2: # pragma: no branch
+            enc = 'UTF-8'
+
+        group_atts = parse_attributes(self.attributes, 'col')
+        colgroup = ElementTree.Element('colgroup', attrib=group_atts)
+        colgroup.text = '\n\t'
+        if self.cols is not None:
+            has_newline = "\n" in self.cols
+            match_cols = self.cols.replace('.', '').split('|')
+            # colgroup is the first item in match_cols, the remaining items are
+            # cols.
+            for idx, col in enumerate(match_cols):
+                col_atts = parse_attributes(col.strip(), 'col')
+                ElementTree.SubElement(colgroup, 'col', col_atts)
+        colgrp = ElementTree.tostring(colgroup, encoding=enc)
+        # cleanup the extra xml declaration if it exists, (python versions
+        # differ) and then format the resulting string accordingly: newline and
+        # tab between cols and a newline at the end
+        xml_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n"
+        colgrp = colgrp.replace(xml_declaration, '')
+        return colgrp.replace('><', '>\n\t<')
+
+
+class Row(object):
+    def __init__(self, attributes, row):
+        self.tag = 'tr'
+        self.attributes = attributes
+        self.cells = []
+
+    def process(self):
+        output = []
+        for c in self.cells:
+            output.append(c)
+        cell_data = '{0}\n\t\t'.format(''.join(output))
+        tag = generate_tag('tr', cell_data, self.attributes)
+        return '\n\t\t{0}'.format(tag)
+
+
+class Cell(object):
+    def __init__(self, tag, content, attributes):
+        self.tag = tag
+        self.content = content
+        self.attributes = attributes
+
+    def process(self):
+        return generate_tag(self.tag, self.content, self.attributes)
+
+
+class _TableSection(object):
+    def __init__(self, tag, attributes):
+        self.tag = tag
+        self.attributes = parse_attributes(attributes)
+        self.rows = []
+
+    def process(self):
+        return generate_tag(self.tag, '{0}\n\t'.format(''.join(self.rows)), self.attributes)
+
+
+class Thead(_TableSection):
+    def __init__(self, attributes):
+        super(Thead, self).__init__('thead', attributes)
+
+
+class Tbody(_TableSection):
+    def __init__(self, attributes):
+        super(Tbody, self).__init__('tbody', attributes)
+
+
+class Tfoot(_TableSection):
+    def __init__(self, attributes):
+        super(Tfoot, self).__init__('tfoot', attributes)
diff --git a/textile/regex_strings.py b/textile/regex_strings.py
new file mode 100644
index 00000000..23c15696
--- /dev/null
+++ b/textile/regex_strings.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import six
+
+try:
+    # Use regex module for matching uppercase characters if installed,
+    # otherwise fall back to finding all the uppercase chars in a loop.
+    import regex as re
+    upper_re_s = r'\p{Lu}'
+    regex_snippets = {
+        'acr': r'\p{Lu}\p{Nd}',
+        'abr': r'\p{Lu}',
+        'nab': r'\p{Ll}',
+        'wrd': r'(?:\p{L}|\p{M}|\p{N}|\p{Pc})',
+        'cur': r'\p{Sc}',
+        'digit': r'\p{N}',
+        'space': r'(?:\p{Zs}|\v)',
+        'char': r'(?:[^\p{Zs}\v])',
+        }
+except ImportError:
+    import re
+    from sys import maxunicode
+    upper_re_s = "".join(
+            [six.unichr(c) for c in six.moves.range(maxunicode) if six.unichr(
+                c).isupper()])
+    regex_snippets = {
+        'acr': r'{0}0-9'.format(upper_re_s),
+        'abr': r'{0}'.format(upper_re_s),
+        'nab': r'a-z',
+        'wrd': r'\w',
+        'cur': r'',
+        'digit': r'\d',
+        'space': r'(?:\s|\v)',
+        'char': r'\S',
+        }
+
+halign_re_s = r'(?:\<(?!>)|(?<!<)\>|\<\>|\=|[()]+(?! ))'
+valign_re_s = r'[\-^~]'
+class_re_s = r'(?:\([^)\n]+\))'       # Don't allow classes/ids,
+language_re_s = r'(?:\[[^\]\n]+\])'   # languages,
+style_re_s = r'(?:\{[^}\n]+\})'       # or styles to span across newlines
+colspan_re_s = r'(?:\\\d+)'
+rowspan_re_s = r'(?:\/\d+)'
+align_re_s = r'(?:{0}|{1})*'.format(halign_re_s, valign_re_s)
+table_span_re_s = r'(?:{0}|{1})*'.format(colspan_re_s, rowspan_re_s)
+# regex string to match class, style and language attributes
+cls_re_s = (r'(?:'
+               r'{c}(?:{l}(?:{s})?|{s}(?:{l})?)?|'
+               r'{l}(?:{c}(?:{s})?|{s}(?:{c})?)?|'
+               r'{s}(?:{c}(?:{l})?|{l}(?:{c})?)?'
+            r')?'
+           ).format(c=class_re_s, s=style_re_s, l=language_re_s)
+pnct_re_s = r'[-!"#$%&()*+,/:;<=>?@\'\[\\\]\.^_`{|}~]'
+syms_re_s = '¤§µ¶†‡•∗∴◊♠♣♥♦'
diff --git a/textile/tests/__init__.py b/textile/tests/__init__.py
deleted file mode 100644
index 9b2d4d95..00000000
--- a/textile/tests/__init__.py
+++ /dev/null
@@ -1,611 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-import textile
-import re
-from nose.tools import eq_, assert_true
-from nose.plugins.skip import SkipTest
-
-"""
-('>>> import textile')
-'<p>>>> import textile</p>'
-
-"""
-
-
-class TestKnownValues():
-    xhtml_known_values = (
-        ('hello, world', '\t<p>hello, world</p>'),
-
-        ('A single paragraph.\n\nFollowed by another.',
-         '\t<p>A single paragraph.</p>\n\n\t<p>Followed by another.</p>'),
-
-        ('I am <b>very</b> serious.\n\n<pre>\nI am <b>very</b> serious.\n</pre>',
-         '\t<p>I am <b>very</b> serious.</p>\n\n<pre>\nI am <b>very</b> serious.\n</pre>'),
-
-        ('I spoke.\nAnd none replied.', '\t<p>I spoke.<br />\nAnd none replied.</p>'),
-
-        ('"Observe!"', '\t<p>“Observe!” </p>'),
-
-        ('Observe -- very nice!', '\t<p>Observe — very nice!</p>'),
-
-        ('Observe - tiny and brief.', '\t<p>Observe – tiny and brief.</p>'),
-
-        ('Observe...', '\t<p>Observe…</p>'),
-
-        ('Observe ...', '\t<p>Observe …</p>'),
-
-        ('Observe: 2 x 2.', '\t<p>Observe: 2 × 2.</p>'),
-
-        ('one(TM), two(R), three(C).', '\t<p>one™, two®, three©.</p>'),
-
-        ('h1. Header 1', '\t<h1>Header 1</h1>'),
-
-        ('h2. Header 2', '\t<h2>Header 2</h2>'),
-
-        ('h3. Header 3', '\t<h3>Header 3</h3>'),
-
-        ('An old text\n\nbq. A block quotation.\n\nAny old text''',
-        '\t<p>An old text</p>\n\n\t<blockquote>\n\t\t<p>A block quotation.</p>\n\t</blockquote>\n\n\t<p>Any old text</p>'),
-
-        ('I _believe_ every word.', '\t<p>I <em>believe</em> every word.</p>'),
-
-        ('And then? She *fell*!', '\t<p>And then? She <strong>fell</strong>!</p>'),
-
-        ('I __know__.\nI **really** __know__.', '\t<p>I <i>know</i>.<br />\nI <b>really</b> <i>know</i>.</p>'),
-
-        ("??Cat's Cradle?? by Vonnegut", '\t<p><cite>Cat’s Cradle</cite> by Vonnegut</p>'),
-
-        ('Convert with @str(foo)@', '\t<p>Convert with <code>str(foo)</code></p>'),
-
-        ('I\'m -sure- not sure.', '\t<p>I’m <del>sure</del> not sure.</p>'),
-
-        ('You are a +pleasant+ child.', '\t<p>You are a <ins>pleasant</ins> child.</p>'),
-
-        ('a ^2^ + b ^2^ = c ^2^', '\t<p>a <sup>2</sup> + b <sup>2</sup> = c <sup>2</sup></p>'),
-
-        ('log ~2~ x', '\t<p>log <sub>2</sub> x</p>'),
-
-        ('I\'m %unaware% of most soft drinks.', '\t<p>I’m <span>unaware</span> of most soft drinks.</p>'),
-
-        ("I'm %{color:red}unaware%\nof most soft drinks.", '\t<p>I’m <span style="color:red;">unaware</span><br />\nof most soft drinks.</p>'),
-
-        ('p(example1). An example', '\t<p class="example1">An example</p>'),
-
-        ('p(#big-red). Red here', '\t<p id="big-red">Red here</p>'),
-
-        ('p(example1#big-red2). Red here', '\t<p class="example1" id="big-red2">Red here</p>'),
-
-        ('p{color:blue;margin:30px}. Spacey blue', '\t<p style="color:blue; margin:30px;">Spacey blue</p>'),
-
-        ('p[fr]. rouge', '\t<p lang="fr">rouge</p>'),
-
-        ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.',
-        '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
-        ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
-
-        ('p<. align left', '\t<p style="text-align:left;">align left</p>'),
-
-        ('p>. align right', '\t<p style="text-align:right;">align right</p>'),
-
-        ('p=. centered', '\t<p style="text-align:center;">centered</p>'),
-
-        ('p<>. justified', '\t<p style="text-align:justify;">justified</p>'),
-
-        ('p(. left ident 1em', '\t<p style="padding-left:1em;">left ident 1em</p>'),
-
-        ('p((. left ident 2em', '\t<p style="padding-left:2em;">left ident 2em</p>'),
-
-        ('p))). right ident 3em', '\t<p style="padding-right:3em;">right ident 3em</p>'),
-
-        ('h2()>. Bingo.', '\t<h2 style="padding-left:1em; padding-right:1em; text-align:right;">Bingo.</h2>'),
-
-        ('h3()>[no]{color:red}. Bingo', '\t<h3 style="color:red; padding-left:1em; padding-right:1em; text-align:right;" lang="no">Bingo</h3>'),
-
-        ('<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>',
-         '<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>'),
-
-        ('<div style="float:right;">\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n</div>\n\n'
-         'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.',
-         '\t<p><div style="float:right;"></p>\n\n\t<h3>Sidebar</h3>\n\n\t<p><a href="http://hobix.com/">Hobix</a><br />\n'
-         '<a href="http://ruby-lang.org/">Ruby</a></p>\n\n\t<p></div></p>\n\n\t<p>The main text of the<br />\n'
-         'page goes here and will<br />\nstay to the left of the<br />\nsidebar.</p>'),
-
-        ('# A first item\n# A second item\n# A third',
-         '\t<ol>\n\t\t<li>A first item</li>\n\t\t<li>A second item</li>\n\t\t<li>A third</li>\n\t</ol>'),
-
-        ('# Fuel could be:\n## Coal\n## Gasoline\n## Electricity\n# Humans need only:\n## Water\n## Protein',
-         '\t<ol>\n\t\t<li>Fuel could be:\n\t<ol>\n\t\t<li>Coal</li>\n\t\t<li>Gasoline</li>\n\t\t<li>Electricity</li>\n\t</ol></li>\n\t\t'
-         '<li>Humans need only:\n\t<ol>\n\t\t<li>Water</li>\n\t\t<li>Protein</li>\n\t</ol></li>\n\t</ol>'),
-
-        ('* A first item\n* A second item\n* A third',
-         '\t<ul>\n\t\t<li>A first item</li>\n\t\t<li>A second item</li>\n\t\t<li>A third</li>\n\t</ul>'),
-
-        ('• A first item\n• A second item\n• A third',
-         '\t<ul>\n\t\t<li>A first item</li>\n\t\t<li>A second item</li>\n\t\t<li>A third</li>\n\t</ul>'),
-
-        ('* Fuel could be:\n** Coal\n** Gasoline\n** Electricity\n* Humans need only:\n** Water\n** Protein',
-         '\t<ul>\n\t\t<li>Fuel could be:\n\t<ul>\n\t\t<li>Coal</li>\n\t\t<li>Gasoline</li>\n\t\t<li>Electricity</li>\n\t</ul></li>\n\t\t'
-         '<li>Humans need only:\n\t<ul>\n\t\t<li>Water</li>\n\t\t<li>Protein</li>\n\t</ul></li>\n\t</ul>'),
-
-        ('I searched "Google":http://google.com.', '\t<p>I searched <a href="http://google.com">Google</a>.</p>'),
-
-        ('I searched "a search engine (Google)":http://google.com.', '\t<p>I searched <a href="http://google.com" title="Google">a search engine</a>.</p>'),
-
-        ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com',
-         '\t<p>I am crazy about <a href="http://hobix.com">Hobix</a><br />\nand <a href="http://hobix.com">it’s</a> '
-         '<a href="http://hobix.com">all</a> I ever<br />\n<a href="http://hobix.com">link to</a>!</p>'),
-
-        ('!http://hobix.com/sample.jpg!', '\t<p><img alt="" src="http://hobix.com/sample.jpg" /></p>'),
-
-        ('!openwindow1.gif(Bunny.)!', '\t<p><img alt="Bunny." src="openwindow1.gif" title="Bunny." /></p>'),
-
-        ('!openwindow1.gif!:http://hobix.com/', '\t<p><a href="http://hobix.com/" class="img"><img alt="" src="openwindow1.gif" /></a></p>'),
-
-        ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.',
-         '\t<p><img align="right" alt="" src="obake.gif" /></p>\n\n\t'
-         '<p>And others sat all round the small<br />\nmachine and paid it to sing to them.</p>'),
-
-        ('We use CSS(Cascading Style Sheets).', '\t<p>We use <acronym title="Cascading Style Sheets"><span class="caps">CSS</span></acronym>.</p>'),
-
-        ('|one|two|three|\n|a|b|c|',
-         '\t<table>\n\t\t<tr>\n\t\t\t<td>one</td>\n\t\t\t<td>two</td>\n\t\t\t<td>three</td>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t</tr>\n\t</table>'),
-
-        ('| name | age | sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |',
-         '\t<table>\n\t\t<tr>\n\t\t\t<td> name </td>\n\t\t\t<td> age </td>\n\t\t\t<td> sex </td>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td> joan </td>\n\t\t\t<td> 24 </td>\n\t\t\t<td> f </td>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td> archie </td>\n\t\t\t<td> 29 </td>\n\t\t\t<td> m </td>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td> bella </td>\n\t\t\t<td> 45 </td>\n\t\t\t<td> f </td>\n\t\t</tr>\n\t</table>'),
-
-        ('|_. name |_. age |_. sex |\n| joan | 24 | f |\n| archie | 29 | m |\n| bella | 45 | f |',
-         '\t<table>\n\t\t<tr>\n\t\t\t<th>name </th>\n\t\t\t<th>age </th>\n\t\t\t<th>sex </th>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td> joan </td>\n\t\t\t<td> 24 </td>\n\t\t\t<td> f </td>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td> archie </td>\n\t\t\t<td> 29 </td>\n\t\t\t<td> m </td>\n\t\t</tr>'
-         '\n\t\t<tr>\n\t\t\t<td> bella </td>\n\t\t\t<td> 45 </td>\n\t\t\t<td> f </td>\n\t\t</tr>\n\t</table>'),
-
-        ('<script>alert("hello");</script>', '\t<p><script>alert(“hello”);</script></p>'),
-
-        ('pre.. Hello\n\nHello Again\n\np. normal text', '<pre>Hello\n\nHello Again\n</pre>\n\n\t<p>normal text</p>'),
-
-        ('<pre>this is in a pre tag</pre>', '<pre>this is in a pre tag</pre>'),
-
-        ('"test1":http://foo.com/bar--baz\n\n"test2":http://foo.com/bar---baz\n\n"test3":http://foo.com/bar-17-18-baz',
-         '\t<p><a href="http://foo.com/bar--baz">test1</a></p>\n\n\t'
-         '<p><a href="http://foo.com/bar---baz">test2</a></p>\n\n\t'
-         '<p><a href="http://foo.com/bar-17-18-baz">test3</a></p>'),
-
-        ('"foo ==(bar)==":#foobar', '\t<p><a href="#foobar">foo (bar)</a></p>'),
-
-        ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!',
-         '\t<p><img alt="" src="http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29." /></p>'),
-
-        ('* Point one\n* Point two\n## Step 1\n## Step 2\n## Step 3\n* Point three\n** Sub point 1\n** Sub point 2',
-         '\t<ul>\n\t\t<li>Point one</li>\n\t\t<li>Point two\n\t<ol>\n\t\t<li>Step 1</li>\n\t\t<li>Step 2</li>\n\t\t'
-         '<li>Step 3</li>\n\t</ol></li>\n\t\t<li>Point three\n\t<ul>\n\t\t<li>Sub point 1</li>\n\t\t'
-         '<li>Sub point 2</li>\n\t</ul></li>\n\t</ul>'),
-
-        ('@array[4] = 8@', '\t<p><code>array[4] = 8</code></p>'),
-
-        ('#{color:blue} one\n# two\n# three',
-         '\t<ol style="color:blue;">\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>'),
-
-        ('Links (like "this":http://foo.com), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.',
-         '\t<p>Links (like <a href="http://foo.com">this</a>), are now mangled in 2.1.0, whereas 2.0 parsed them correctly.</p>'),
-
-        ('@monospaced text@, followed by text',
-         '\t<p><code>monospaced text</code>, followed by text</p>'),
-
-        ('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\t<p>some text</p>'),
-
-        ('*:(foo)foo bar baz*',
-         '\t<p><strong cite="foo">foo bar baz</strong></p>'),
-
-        ('pre.. foo bar baz\nquux', '<pre>foo bar baz\nquux\n</pre>'),
-
-        ('line of text\n\n    leading spaces',
-         '\t<p>line of text</p>\n\n    leading spaces'),
-
-        ('"some text":http://www.example.com/?q=foo%20bar and more text',
-         '\t<p><a href="http://www.example.com/?q=foo%20bar">some text</a> and more text</p>'),
-
-        ('(??some text??)', '\t<p>(<cite>some text</cite>)</p>'),
-
-        ('(*bold text*)', '\t<p>(<strong>bold text</strong>)</p>'),
-
-        ('H[~2~]O', '\t<p>H<sub>2</sub>O</p>'),
-
-        ("p=. Où est l'école, l'église s'il vous plaît?",
-         """\t<p style="text-align:center;">Où est l’école, l’église s’il vous plaît?</p>"""),
-
-        ("p=. *_The_* _*Prisoner*_",
-         """\t<p style="text-align:center;"><strong><em>The</em></strong> <em><strong>Prisoner</strong></em></p>"""),
-
-        ("""p=. "An emphasised _word._" & "*A spanned phrase.*" """,
-         """\t<p style="text-align:center;">“An emphasised <em>word.</em>” & “<strong>A spanned phrase.</strong>” </p>"""),
-
-        ("""p=. "*Here*'s a word!" """,
-         """\t<p style="text-align:center;">“<strong>Here</strong>’s a word!” </p>"""),
-
-        ("""p=. "Please visit our "Textile Test Page":http://textile.sitemonks.com" """,
-         """\t<p style="text-align:center;">“Please visit our <a href="http://textile.sitemonks.com">Textile Test Page</a>” </p>"""),
-        ("""| Foreign EXPÓŅÉNTIAL |""",
-         """\t<table>\n\t\t<tr>\n\t\t\t<td> Foreign <span class="caps">EXPÓŅÉNTIAL</span> </td>\n\t\t</tr>\n\t</table>"""),
-        ("""Piękne ŹDŹBŁO""",
-         """\t<p>Piękne <span class="caps">ŹDŹBŁO</span></p>"""),
-
-        ("""p=. Tell me, what is AJAX(Asynchronous Javascript and XML), please?""",
-         """\t<p style="text-align:center;">Tell me, what is <acronym title="Asynchronous Javascript and XML"><span class="caps">AJAX</span></acronym>, please?</p>"""),
-        ('p{font-size:0.8em}. *TxStyle* is a documentation project of Textile 2.4 for "Textpattern CMS":http://texpattern.com.',
-         '\t<p style="font-size:0.8em;"><strong>TxStyle</strong> is a documentation project of Textile 2.4 for <a href="http://texpattern.com">Textpattern <span class="caps">CMS</span></a>.</p>'),
-        (""""Übermensch":http://de/wikipedia.org/wiki/Übermensch""", """\t<p><a href="http://de/wikipedia.org/wiki/%C3%9Cbermensch">Übermensch</a></p>"""),
-        ("""Here is some text with a <!-- Commented out[1] --> block.\n\n<!-- Here is a single <span>line</span> comment block -->\n\n<!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n-->\n\nbc. <!-- Here is a comment block in a code block. -->""",
-         """\t<p>Here is some text with a<!-- Commented out[1] --> block.</p>\n\n\t<p><!-- Here is a single <span>line</span> comment block --></p>\n\n\t<p><!-- Here is a whole\nmultiline\n<span>HTML</span>\nComment\n--></p>\n\n<pre><code><!-- Here is a comment block in a code block. -->\n</code></pre>"""),
-        (""""Textile(c)" is a registered(r) 'trademark' of Textpattern(tm) -- or TXP(That's textpattern!) -- at least it was - back in '88 when 2x4 was (+/-)5(o)C ... QED!\n\np{font-size: 200%;}. 2(1/4) 3(1/2) 4(3/4)""",
-         """\t<p>“Textile©” is a registered® ‘trademark’ of Textpattern™ — or <acronym title="That’s textpattern!"><span class="caps">TXP</span></acronym> — at least it was – back in ’88 when 2×4 was ±5°C … <span class="caps">QED</span>!</p>\n\n\t<p style="font-size: 200%;">2¼ 3½ 4¾</p>"""),
-        ("""|=. Testing colgroup and col syntax\n|:\\5. 80\n|a|b|c|d|e|\n\n|=. Testing colgroup and col syntax|\n|:\\5. 80|\n|a|b|c|d|e|""", """\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n\t<colgroup span="5" width="80">\n\t</colgroup>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t<caption>Testing colgroup and col syntax</caption>\n\t<colgroup span="5" width="80">\n\t</colgroup>\n\t\t<tr>\n\t\t\t<td>a</td>\n\t\t\t<td>b</td>\n\t\t\t<td>c</td>\n\t\t\t<td>d</td>\n\t\t\t<td>e</td>\n\t\t</tr>\n\t</table>"""),
-        ("""table(#dvds){border-collapse:collapse}. Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups\n|={font-size:140%;margin-bottom:15px}. DVDs with two Textiled tbody elements\n|:\\3. 100 |{background:#ddd}|250||50|300|\n|^(header).\n|_. Title |_. Starring |_. Director |_. Writer |_. Notes |\n|~(footer).\n|\\5=. This is the tfoot, centred |\n|-(toplist){background:#c5f7f6}.\n| _The Usual Suspects_ | Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey | Bryan Singer | Chris McQaurrie | One of the finest films ever made |\n| _Se7en_ | Morgan Freeman, Brad Pitt, Kevin Spacey | David Fincher | Andrew Kevin Walker | Great psychological thriller |\n| _Primer_ | David Sullivan, Shane Carruth | Shane Carruth | Shane Carruth | Amazing insight into trust and human psychology <br />rather than science fiction. Terrific! |\n| _District 9_ | Sharlto Copley, Jason Cope | Neill Blomkamp | Neill Blomkamp, Terri Tatchell | Social commentary layered on thick,\nbut boy is it done well |\n|-(medlist){background:#e7e895;}.\n| _Arlington Road_ | Tim Robbins, Jeff Bridges | Mark Pellington | Ehren Kruger | Awesome study in neighbourly relations |\n| _Phone Booth_ | Colin Farrell, Kiefer Sutherland, Forest Whitaker | Joel Schumacher | Larry Cohen | Edge-of-the-seat stuff in this\nshort but brilliantly executed thriller |""",
-         """\t<table style="border-collapse:collapse;" id="dvds" summary="Great films on DVD employing Textile summary, caption, thead, tfoot, two tbody elements and colgroups">\n\t<caption style="font-size:140%; margin-bottom:15px;"><span class="caps">DVD</span>s with two Textiled tbody elements</caption>\n\t<colgroup span="3" width="100">\n\t<col style="background:#ddd;" />\n\t<col width="250" />\n\t<col />\n\t<col width="50" />\n\t<col width="300" />\n\t</colgroup>\n\t<thead class="header">\n\t\t<tr>\n\t\t\t<th>Title </th>\n\t\t\t<th>Starring </th>\n\t\t\t<th>Director </th>\n\t\t\t<th>Writer </th>\n\t\t\t<th>Notes </th>\n\t\t</tr>\n\t</thead>\n\t<tfoot class="footer">\n\t\t<tr>\n\t\t\t<td style="text-align:center;" colspan="5">This is the tfoot, centred </td>\n\t\t</tr>\n\t</tfoot>\n\t<tbody style="background:#c5f7f6;" class="toplist">\n\t\t<tr>\n\t\t\t<td> <em>The Usual Suspects</em> </td>\n\t\t\t<td> Benicio Del Toro, Gabriel Byrne, Stephen Baldwin, Kevin Spacey </td>\n\t\t\t<td> Bryan Singer </td>\n\t\t\t<td> Chris McQaurrie </td>\n\t\t\t<td> One of the finest films ever made </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Se7en</em> </td>\n\t\t\t<td> Morgan Freeman, Brad Pitt, Kevin Spacey </td>\n\t\t\t<td> David Fincher </td>\n\t\t\t<td> Andrew Kevin Walker </td>\n\t\t\t<td> Great psychological thriller </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Primer</em> </td>\n\t\t\t<td> David Sullivan, Shane Carruth </td>\n\t\t\t<td> Shane Carruth </td>\n\t\t\t<td> Shane Carruth </td>\n\t\t\t<td> Amazing insight into trust and human psychology <br />\nrather than science fiction. Terrific! </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>District 9</em> </td>\n\t\t\t<td> Sharlto Copley, Jason Cope </td>\n\t\t\t<td> Neill Blomkamp </td>\n\t\t\t<td> Neill Blomkamp, Terri Tatchell </td>\n\t\t\t<td> Social commentary layered on thick,<br />\nbut boy is it done well </td>\n\t\t</tr>\n\t</tbody>\n\t<tbody style="background:#e7e895;" class="medlist">\n\t\t<tr>\n\t\t\t<td> <em>Arlington Road</em> </td>\n\t\t\t<td> Tim Robbins, Jeff Bridges </td>\n\t\t\t<td> Mark Pellington </td>\n\t\t\t<td> Ehren Kruger </td>\n\t\t\t<td> Awesome study in neighbourly relations </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> <em>Phone Booth</em> </td>\n\t\t\t<td> Colin Farrell, Kiefer Sutherland, Forest Whitaker </td>\n\t\t\t<td> Joel Schumacher </td>\n\t\t\t<td> Larry Cohen </td>\n\t\t\t<td> Edge-of-the-seat stuff in this<br />\nshort but brilliantly executed thriller </td>\n\t\t</tr>\n\t</tbody>\n\t</table>"""),
-        ("""-(hot) *coffee* := Hot _and_ black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk := Nourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n\n-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:""",
-        """<dl>\n\t<dt class="hot"><strong>coffee</strong></dt>\n\t<dd>Hot <em>and</em> black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</dd>\n</dl>\n\n<dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd>\n</dl>"""),
-        (""";(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3""",
-         """\t<dl class="class" id="id">\n\t\t<dt>Term 1</dt>\n\t\t<dd>Def 1</dd>\n\t\t<dd>Def 2</dd>\n\t\t<dd>Def 3</dd>\n\t</dl>"""),
-        ("""*Here is a comment*\n\nHere is *(class)a comment*\n\n*(class)Here is a class* that is a little extended and is\n*followed* by a strong word!\n\nbc. ; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n\n*123 test*\n\n*test 123*\n\n**123 test**\n\n**test 123**""",
-         """\t<p><strong>Here is a comment</strong></p>\n\n\t<p>Here is <strong class="class">a comment</strong></p>\n\n\t<p><strong class="class">Here is a class</strong> that is a little extended and is<br />\n<strong>followed</strong> by a strong word!</p>\n\n<pre><code>; Content-type: text/javascript\n; Cache-Control: no-store, no-cache, must-revalidate, pre-check=0, post-check=0, max-age=0\n; Expires: Sat, 24 Jul 2003 05:00:00 GMT\n; Last-Modified: Wed, 1 Jan 2025 05:00:00 GMT\n; Pragma: no-cache\n</code></pre>\n\n\t<p><strong>123 test</strong></p>\n\n\t<p><strong>test 123</strong></p>\n\n\t<p><b>123 test</b></p>\n\n\t<p><b>test 123</b></p>"""),
-        ("""#_(first#list) one\n# two\n# three\n\ntest\n\n#(ordered#list2).\n# one\n# two\n# three\n\ntest\n\n#_(class_4).\n# four\n# five\n# six\n\ntest\n\n#_ seven\n# eight\n# nine\n\ntest\n\n# one\n# two\n# three\n\ntest\n\n#22 22\n# 23\n# 24""",
-         """\t<ol class="first" id="list" start="1">\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="ordered" id="list2">\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="class_4" start="4">\n\t\t<li>four</li>\n\t\t<li>five</li>\n\t\t<li>six</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="7">\n\t\t<li>seven</li>\n\t\t<li>eight</li>\n\t\t<li>nine</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol>\n\t\t<li>one</li>\n\t\t<li>two</li>\n\t\t<li>three</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="22">\n\t\t<li>22</li>\n\t\t<li>23</li>\n\t\t<li>24</li>\n\t</ol>"""),
-        ("""# one\n##3 one.three\n## one.four\n## one.five\n# two\n\ntest\n\n#_(continuation#section2).\n# three\n# four\n##_ four.six\n## four.seven\n# five\n\ntest\n\n#21 twenty-one\n# twenty-two""",
-         """\t<ol>\n\t\t<li>one\n\t<ol start="3">\n\t\t<li>one.three</li>\n\t\t<li>one.four</li>\n\t\t<li>one.five</li>\n\t</ol></li>\n\t\t<li>two</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol class="continuation" id="section2" start="3">\n\t\t<li>three</li>\n\t\t<li>four\n\t<ol start="6">\n\t\t<li>four.six</li>\n\t\t<li>four.seven</li>\n\t</ol></li>\n\t\t<li>five</li>\n\t</ol>\n\n\t<p>test</p>\n\n\t<ol start="21">\n\t\t<li>twenty-one</li>\n\t\t<li>twenty-two</li>\n\t</ol>"""),
-        ("""|* Foo[^2^]\n* _bar_\n* ~baz~ |\n|#4 *Four*\n# __Five__ |\n|-(hot) coffee := Hot and black\n-(hot#tea) tea := Also hot, but a little less black\n-(cold) milk :=\nNourishing beverage for baby cows.\nCold drink that goes great with cookies. =:\n|""",
-         """\t<table>\n\t\t<tr>\n\t\t\t<td>\t<ul>\n\t\t<li>Foo<sup>2</sup></li>\n\t\t<li><em>bar</em></li>\n\t\t<li><sub>baz</sub></li>\n\t</ul></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>\t<ol start="4">\n\t\t<li><strong>Four</strong></li>\n\t\t<li><i>Five</i></li>\n\t</ol></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td><dl>\n\t<dt class="hot">coffee</dt>\n\t<dd>Hot and black</dd>\n\t<dt class="hot" id="tea">tea</dt>\n\t<dd>Also hot, but a little less black</dd>\n\t<dt class="cold">milk</dt>\n\t<dd><p>Nourishing beverage for baby cows.<br />\nCold drink that goes great with cookies.</p></dd><br />\n</dl></td>\n\t\t</tr>\n\t</table>"""),
-        ("""h4. A more complicated table\n\ntable(tableclass#tableid){color:blue}.\n|_. table |_. more |_. badass |\n|\\3. Horizontal span of 3|\n(firstrow). |first|HAL(open the pod bay doors)|1|\n|some|{color:green}. styled|content|\n|/2. spans 2 rows|this is|quite a|\n| deep test | don't you think?|\n(lastrow). |fifth|I'm a lumberjack|5|\n|sixth| _*bold italics*_ |6|""",
-         """\t<h4>A more complicated table</h4>\n\n\t<table style="color:blue;" class="tableclass" id="tableid">\n\t\t<tr>\n\t\t\t<th>table </th>\n\t\t\t<th>more </th>\n\t\t\t<th>badass </th>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td colspan="3">Horizontal span of 3</td>\n\t\t</tr>\n\t\t<tr class="firstrow">\n\t\t\t<td>first</td>\n\t\t\t<td><acronym title="open the pod bay doors"><span class="caps">HAL</span></acronym></td>\n\t\t\t<td>1</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>some</td>\n\t\t\t<td style="color:green;">styled</td>\n\t\t\t<td>content</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td rowspan="2">spans 2 rows</td>\n\t\t\t<td>this is</td>\n\t\t\t<td>quite a</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> deep test </td>\n\t\t\t<td> don’t you think?</td>\n\t\t</tr>\n\t\t<tr class="lastrow">\n\t\t\t<td>fifth</td>\n\t\t\t<td>I’m a lumberjack</td>\n\t\t\t<td>5</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>sixth</td>\n\t\t\t<td> <em><strong>bold italics</strong></em> </td>\n\t\t\t<td>6</td>\n\t\t</tr>\n\t</table>"""),
-        ("""| *strong* |\n\n| _em_ |\n\n| Inter-word -dashes- | ZIP-codes are 5- or 9-digit codes |""",
-         """\t<table>\n\t\t<tr>\n\t\t\t<td> <strong>strong</strong> </td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t\t<tr>\n\t\t\t<td> <em>em</em> </td>\n\t\t</tr>\n\t</table>\n\n\t<table>\n\t\t<tr>\n\t\t\t<td> Inter-word <del>dashes</del> </td>\n\t\t\t<td> <span class="caps">ZIP</span>-codes are 5- or 9-digit codes </td>\n\t\t</tr>\n\t</table>"""),
-        ("""|_. attribute list |\n|<. align left |\n|>. align right|\n|=. center |\n|<>. justify me|\n|^. valign top |\n|~. bottom |""",
-         """\t<table>\n\t\t<tr>\n\t\t\t<th>attribute list </th>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:left;">align left </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:right;">align right</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:center;">center </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="text-align:justify;">justify me</td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="vertical-align:top;">valign top </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td style="vertical-align:bottom;">bottom </td>\n\t\t</tr>\n\t</table>"""),
-        ("""h2. A definition list\n\n;(class#id) Term 1\n: Def 1\n: Def 2\n: Def 3\n;; Center\n;; NATO(Why Em Cee Ayy)\n:: Subdef 1\n:: Subdef 2\n;;; SubSub Term\n::: SubSub Def 1\n::: SubSub Def 2\n::: Subsub Def 3\nWith newline\n::: Subsub Def 4\n:: Subdef 3\n: DEF 4\n; Term 2\n: Another def\n: And another\n: One more\n:: A def without a term\n:: More defness\n; Third term for good measure\n: My definition of a boombastic jazz""",
-         """\t<h2>A definition list</h2>\n\n\t<dl class="class" id="id">\n\t\t<dt>Term 1</dt>\n\t\t<dd>Def 1</dd>\n\t\t<dd>Def 2</dd>\n\t\t<dd>Def 3\n\t<dl>\n\t\t<dt>Center</dt>\n\t\t<dt><acronym title="Why Em Cee Ayy"><span class="caps">NATO</span></acronym></dt>\n\t\t<dd>Subdef 1</dd>\n\t\t<dd>Subdef 2\n\t<dl>\n\t\t<dt>SubSub Term</dt>\n\t\t<dd>SubSub Def 1</dd>\n\t\t<dd>SubSub Def 2</dd>\n\t\t<dd>Subsub Def 3<br />\nWith newline</dd>\n\t\t<dd>Subsub Def 4</dd>\n\t</dl></dd>\n\t\t<dd>Subdef 3</dd>\n\t</dl></dd>\n\t\t<dd><span class="caps">DEF</span> 4</dd>\n\t\t<dt>Term 2</dt>\n\t\t<dd>Another def</dd>\n\t\t<dd>And another</dd>\n\t\t<dd>One more\n\t<dl>\n\t\t<dd>A def without a term</dd>\n\t\t<dd>More defness</dd>\n\t</dl></dd>\n\t\t<dt>Third term for good measure</dt>\n\t\t<dd>My definition of a boombastic jazz</dd>\n\t</dl>"""),
-        ("""###. Here's a comment.\n\nh3. Hello\n\n###. And\nanother\none.\n\nGoodbye.""", """\t<h3>Hello</h3>\n\n\t<p>Goodbye.</p>"""),
-        ("""h2. A Definition list which covers the instance where a new definition list is created with a term without a definition\n\n- term :=\n- term2 := def""", """\t<h2>A Definition list which covers the instance where a new definition list is created with a term without a definition</h2>\n\n<dl>\n\t<dt>term2</dt>\n\t<dd>def</dd>\n</dl>"""),
-        ('!{height:20px;width:20px;}https://1.gravatar.com/avatar/!',
-         '\t<p><img alt="" src="https://1.gravatar.com/avatar/" style="height:20px; width:20px;" /></p>')
-
-    )
-
-    # A few extra cases for HTML4
-    html_known_values = (
-        ('I spoke.\nAnd none replied.', '\t<p>I spoke.<br />\nAnd none replied.</p>'),
-        ('I __know__.\nI **really** __know__.', '\t<p>I <i>know</i>.<br />\nI <b>really</b> <i>know</i>.</p>'),
-        ("I'm %{color:red}unaware%\nof most soft drinks.", '\t<p>I’m <span style="color:red;">unaware</span><br />\nof most soft drinks.</p>'),
-        ('I seriously *{color:red}blushed*\nwhen I _(big)sprouted_ that\ncorn stalk from my\n%[es]cabeza%.',
-        '\t<p>I seriously <strong style="color:red;">blushed</strong><br />\nwhen I <em class="big">sprouted</em>'
-        ' that<br />\ncorn stalk from my<br />\n<span lang="es">cabeza</span>.</p>'),
-        ('<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>',
-         '<pre>\n<code>\na.gsub!( /</, "" )\n</code>\n</pre>'),
-        ('<div style="float:right;">\n\nh3. Sidebar\n\n"Hobix":http://hobix.com/\n"Ruby":http://ruby-lang.org/\n\n</div>\n\n'
-         'The main text of the\npage goes here and will\nstay to the left of the\nsidebar.',
-         '\t<p><div style="float:right;"></p>\n\n\t<h3>Sidebar</h3>\n\n\t<p><a href="http://hobix.com/">Hobix</a><br />\n'
-         '<a href="http://ruby-lang.org/">Ruby</a></p>\n\n\t<p></div></p>\n\n\t<p>The main text of the<br />\n'
-         'page goes here and will<br />\nstay to the left of the<br />\nsidebar.</p>'),
-        ('I am crazy about "Hobix":hobix\nand "it\'s":hobix "all":hobix I ever\n"link to":hobix!\n\n[hobix]http://hobix.com',
-         '\t<p>I am crazy about <a href="http://hobix.com">Hobix</a><br />\nand <a href="http://hobix.com">it’s</a> '
-         '<a href="http://hobix.com">all</a> I ever<br />\n<a href="http://hobix.com">link to</a>!</p>'),
-        ('!http://hobix.com/sample.jpg!', '\t<p><img alt="" src="http://hobix.com/sample.jpg" /></p>'),
-        ('!openwindow1.gif(Bunny.)!', '\t<p><img alt="Bunny." src="openwindow1.gif" title="Bunny." /></p>'),
-        ('!openwindow1.gif!:http://hobix.com/', '\t<p><a href="http://hobix.com/" class="img"><img alt="" src="openwindow1.gif" /></a></p>'),
-        ('!>obake.gif!\n\nAnd others sat all round the small\nmachine and paid it to sing to them.',
-         '\t<p><img align="right" alt="" src="obake.gif" /></p>\n\n\t'
-         '<p>And others sat all round the small<br />\nmachine and paid it to sing to them.</p>'),
-        ('!http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29.!',
-         '\t<p><img alt="" src="http://render.mathim.com/A%5EtAx%20%3D%20A%5Et%28Ax%29." /></p>'),
-        ('notextile. <b> foo bar baz</b>\n\np. quux\n',
-         '<b> foo bar baz</b>\n\n\t<p>quux</p>')
-    )
-
-    def testKnownValuesXHTML(self):
-        # XHTML
-        for t, h in self.xhtml_known_values:
-            yield self.check_textile, t, h, 'xhtml'
-
-    def testKnownValuesHTML(self):
-        # HTML5
-        for t, h in self.html_known_values:
-            yield self.check_textile, t, h, 'html5'
-
-    def check_textile(self, input, expected_output, html_type):
-        output = textile.textile(input, html_type=html_type)
-        eq_(output, expected_output)
-
-
-class Tests():
-    def testFootnoteReference(self):
-        html = textile.textile('YACC[1]')
-        assert_true(re.search(r'^\t<p><span class="caps">YACC</span><sup class="footnote" id="fnrev[a-f0-9]{32}"><a href="#fn[a-f0-9]{32}">1</a></sup></p>', html))
-
-    def testFootnote(self):
-        html = textile.textile('This is covered elsewhere[1].\n\nfn1. Down here, in fact.\n\nfn2. Here is another footnote.')
-        assert_true(re.search(r'^\t<p>This is covered elsewhere<sup class="footnote" id="fnrev[a-f0-9]{32}"><a href="#fn([a-f0-9]{32})">1</a></sup>.</p>\n\n\t<p class="footnote" id="fn\1"><sup>1</sup> Down here, in fact.</p>\n\n\t<p class="footnote" id="fn2"><sup>2</sup> Here is another footnote.</p>$', html))
-
-        html = textile.textile('''See[1] for details -- or perhaps[100] at a push.\n\nfn1. Here are the details.\n\nfn100(footy#otherid). A totally unrelated footnote.''')
-        assert_true(re.search(r'^\t<p>See<sup class="footnote" id="fnrev[a-f0-9]{32}"><a href="#fn([a-f0-9]{32})">1</a></sup> for details — or perhaps<sup class="footnote" id="fnrev[a-f0-9]{32}"><a href="#fn([a-f0-9]{32})">100</a></sup> at a push.</p>\n\n\t<p class="footnote" id="fn\1"><sup>1</sup> Here are the details.</p>\n\n\t<p class="footy" id="otherid"><sup id="fn\2">100</sup> A totally unrelated footnote.</p>$', html))
-
-        html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''')
-        assert_true(re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})"><a href="#fn([a-f0-9]{32})">2</a></sup> for details, and later, reference it again<sup class="footnote"><a href="#fn\2">2</a></sup>.</p>\n\n\t<p class="footy" id="otherid" lang="en"><sup id="fn\2"><a href="#fnrev\1">2</a></sup> Here are the details.</p>$', html))
-
-        html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''')
-        assert_true(re.search(r'^\t<p>See<sup class="footnote" id="fnrev[a-f0-9]{32}">3</sup> for details.</p>\n\n\t<p class="footnote" id="fn[a-f0-9]{32}"><sup>3</sup> Here are the details.</p>$', html))
-
-        html = textile.textile('''See[4!] for details.\n\nfn4^. Here are the details.''')
-        assert_true(re.search(r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})">4</sup> for details.</p>\n\n\t<p class="footnote" id="fn[a-f0-9]{32}"><sup><a href="#fnrev\1">4</a></sup> Here are the details.</p>$', html))
-
-    def testURLWithHyphens(self):
-        eq_(textile.textile('"foo":http://google.com/one--two'), '\t<p><a href="http://google.com/one--two">foo</a></p>')
-
-    def testIssue024TableColspan(self):
-        eq_(textile.textile('|\\2. spans two cols |\n| col 1 | col 2 |'),
-            '\t<table>\n\t\t<tr>\n\t\t\t<td colspan="2">spans two cols </td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td> col 1 </td>\n\t\t\t<td> col 2 </td>\n\t\t</tr>\n\t</table>')
-
-    def testPBAColspan(self):
-        eq_(textile.Textile().pba(r'\3', element='td'), ' colspan="3"')
-
-    def testIssue002Escaping(self):
-        foo = '"foo ==(bar)==":#foobar'
-        eq_(textile.textile(foo), '\t<p><a href="#foobar">foo (bar)</a></p>')
-
-    def testIssue014NewlinesInExtendedPreBlocks(self):
-        text = "pre.. Hello\n\nAgain\n\np. normal text"
-        eq_(textile.textile(text), '<pre>Hello\n\nAgain\n</pre>\n\n\t<p>normal text</p>')
-
-    def testURLWithParens(self):
-        text = '"python":http://en.wikipedia.org/wiki/Python_(programming_language)'
-        expect = '\t<p><a href="http://en.wikipedia.org/wiki/Python_%28programming_language%29">python</a></p>'
-        result = textile.textile(text)
-        eq_(result, expect)
-
-    def testTableWithHyphenStyles(self):
-        text = 'table(linkblog-thumbnail).\n|(linkblog-thumbnail-cell). apple|bear|'
-        expect = '\t<table class="linkblog-thumbnail">\n\t\t<tr>\n\t\t\t<td style="vertical-align:middle;" class="linkblog-thumbnail-cell">apple</td>\n\t\t\t<td>bear</td>\n\t\t</tr>\n\t</table>'
-        result = textile.textile(text)
-        eq_(result, expect)
-
-    def testHeadOffset(self):
-        text = 'h2. This is a header'
-        head_offset = 2
-        expect = '\t<h4>This is a header</h4>'
-        result = textile.textile(text, head_offset=head_offset)
-        eq_(result, expect)
-
-    def testIssue035(self):
-        result = textile.textile('"z"')
-        expect = '\t<p>“z” </p>'
-        eq_(result, expect)
-
-        result = textile.textile('" z"')
-        expect = '\t<p>“ z” </p>'
-        eq_(result, expect)
-
-    def testIssue032(self):
-        text = "|thing|||otherthing|"
-        result = textile.textile(text)
-        expect = "\t<table>\n\t\t<tr>\n\t\t\t<td>thing</td>\n\t\t\t<td></td>\n\t\t\t<td></td>\n\t\t\t<td>otherthing</td>\n\t\t</tr>\n\t</table>"
-        eq_(result, expect)
-
-    def testIssue036(self):
-        test = '"signup":signup\n[signup]http://myservice.com/signup'
-        result = textile.textile(test)
-        expect = '\t<p><a href="http://myservice.com/signup">signup</a></p>'
-        eq_(result, expect)
-
-        test = '"signup":signup\n[signup]https://myservice.com/signup'
-        result = textile.textile(test)
-        expect = '\t<p><a href="https://myservice.com/signup">signup</a></p>'
-        eq_(result, expect)
-
-    def testNestedFormatting(self):
-        test = "*_test text_*"
-        result = textile.textile(test)
-        expect = "\t<p><strong><em>test text</em></strong></p>"
-
-        eq_(result, expect)
-
-        test = "_*test text*_"
-        result = textile.textile(test)
-        expect = "\t<p><em><strong>test text</strong></em></p>"
-
-        eq_(result, expect)
-
-    def testRestricted(self):
-        test = "this is \"some\" *bold text*."
-        result = textile.textile_restricted(test)
-        expect = "\t<p>this is “some” <strong>bold text</strong>.</p>"
-
-        eq_(result, expect)
-
-        #Note that the HTML is escaped, thus rendering
-        #the <script> tag harmless.
-        test = "Here is some text.\n<script>alert('hello world')</script>"
-        result = textile.textile_restricted(test)
-        expect = "\t<p>Here is some text.<br />\n<script>alert(‘hello world’)</script></p>"
-
-        eq_(result, expect)
-
-        test = "Here's some <!-- commented *out* --> text."
-        result = textile.textile_restricted(test)
-        expect = "\t<p>Here’s some <!— commented <strong>out</strong> —> text.</p>"
-
-        eq_(result, expect)
-
-    def testQuotesInCode(self):
-        test = "<code>'quoted string'</code>"
-        result = textile.textile(test)
-        expect = "\t<p><code>'quoted string'</code></p>"
-
-        eq_(result, expect)
-
-    def testUnicodeFootnote(self):
-        html = textile.textile('текст[1]')
-        assert_true(re.compile('^\t<p>текст<sup class="footnote" id="fnrev[a-f0-9]{32}"><a href="#fn[a-f0-9]{32}">1</a></sup></p>', re.U).search(html))
-
-    def testAutoLinking(self):
-        test = """some text "test":http://www.google.com http://www.google.com "$":http://www.google.com"""
-        result = """\t<p>some text <a href="http://www.google.com">test</a> <a href="http://www.google.com">www.google.com</a> <a href="http://www.google.com">www.google.com</a></p>"""
-        expect = textile.textile(test, auto_link=True)
-
-        eq_(result, expect)
-
-    def testPre(self):
-        test = "<pre>some preformatted text</pre>other text"
-        result = "\t<p><pre>some preformatted text</pre>other text</p>"
-        expect = textile.textile(test)
-
-        eq_(result, expect)
-
-    def testSanitize(self):
-        try:
-            __import__('html5lib')
-        except ImportError:
-            raise SkipTest()
-
-        test = "a paragraph of benign text"
-        result = "\t<p>a paragraph of benign text</p>"
-        expect = textile.Textile().parse(test, sanitize=True)
-        eq_(result, expect)
-
-        test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
-        result = '<p style="">a paragraph of evil text</p>'
-        expect = textile.Textile().parse(test, sanitize=True)
-        eq_(result, expect)
-
-        test = """<p>a paragraph of benign text<br />and more text</p>"""
-        result = '<p>a paragraph of benign text<br />\nand more text</p>'
-        expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
-        eq_(result, expect)
-
-    def testImageSize(self):
-        try:
-            __import__('PIL')
-        except ImportError:
-            raise SkipTest()
-
-        test = "!http://www.google.com/intl/en_ALL/images/srpr/logo1w.png!"
-        result = '\t<p><img alt="" height="95" src="http://www.google.com/intl/en_ALL/images/srpr/logo1w.png" width="275" /></p>'
-        expect = textile.Textile(get_sizes=True).parse(test)
-        eq_(result, expect)
-
-    def testAtSignAndNotextileInTable(self):
-        test = "|@<A1>@|@<A2>@ @<A3>@|\n|<notextile>*B1*</notextile>|<notextile>*B2*</notextile> <notextile>*B3*</notextile>|"
-        result = "\t<table>\n\t\t<tr>\n\t\t\t<td><code><A1></code></td>\n\t\t\t<td><code><A2></code> <code><A3></code></td>\n\t\t</tr>\n\t\t<tr>\n\t\t\t<td>*B1*</td>\n\t\t\t<td>*B2* *B3*</td>\n\t\t</tr>\n\t</table>"
-        expect = textile.textile(test)
-        eq_(result, expect)
-
-    def testEndnotesSimple(self):
-        test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!.\n\nnote#my_first_label Over the past billion years, about a quarter of the moon's 4.5 billion-year lifespan, it has shrunk about 200 meters (700 feet) in diameter."""
-        html = textile.textile(test)
-        result_pattern = r"""\t<p>Scientists say the moon is slowly shrinking<sup><a href="#(note[a-f0-9]{32})"><span id="noteref[a-f0-9]{32}">1</span></a></sup>.</p>\n\n\t<ol>\n\t<li><span id="\1"> </span>Over the past billion years, about a quarter of the moon’s 4.5 billion-year lifespan, it has shrunk about 200 meters \(700 feet\) in diameter.</li>\n</ol>$"""
-        result_re = re.compile(result_pattern)
-        assert_true(result_re.search(html))
-
-    def testEndnotesComplex(self):
-        test = """Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality[#netneutral] and has expressed the view that ISPs should supply "connectivity with no strings attached"[#netneutral!] [#tbl_quote]\n\nBerners-Lee admitted that the forward slashes ("//") in a web address were actually unnecessary.  He told the newspaper that he could easily have designed URLs not to have the forward slashes.  "... it seemed like a good idea at the time,"[#slashes]\n\nnote#netneutral. "Web creator rejects net tracking":http://news.bbc.co.uk/2/hi/technology/7613201.stm. BBC. 15 September 2008\n\nnote#tbl_quote. "Web inventor's warning on spy software":http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html. The Daily Telegraph (London). 25 May 2008\n\nnote#slashes. "Berners-Lee 'sorry' for slashes":http://news.bbc.co.uk/1/hi/technology/8306631.stm. BBC. 14 October 2009\n\nnotelist."""
-        html = textile.textile(test)
-        result_pattern = r"""\t<p>Tim Berners-Lee is one of the pioneer voices in favour of Net Neutrality<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">1</span></a></sup> and has expressed the view that <span class="caps">ISP</span>s should supply “connectivity with no strings attached”<sup><span id="(noteref[a-f0-9]{32})">1</span></sup> <sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">2</span></a></sup></p>\n\n\t<p>Berners-Lee admitted that the forward slashes \(”//”\) in a web address were actually unnecessary.  He told the newspaper that he could easily have designed <span class="caps">URL</span>s not to have the forward slashes.  “… it seemed like a good idea at the time,”<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">3</span></a></sup></p>\n\n\t<ol>\n\t<li><sup><a href="#\2">a</a></sup> <sup><a href="#\3">b</a></sup><span id="\1"> </span><a href="http://news.bbc.co.uk/2/hi/technology/7613201.stm">Web creator rejects net tracking</a>. <span class="caps">BBC</span>. 15 September 2008</li>\n\t<li><sup><a href="#\5">a</a></sup><span id="\4"> </span><a href="http://www.telegraph.co.uk/news/uknews/1581938/Web-inventor%27s-warning-on-spy-software.html">Web inventor’s warning on spy software</a>. The Daily Telegraph \(London\). 25 May 2008</li>\n\t<li><sup><a href="#\7">a</a></sup><span id="\6"> </span><a href="http://news.bbc.co.uk/1/hi/technology/8306631.stm">Berners-Lee ‘sorry’ for slashes</a>. <span class="caps">BBC</span>. 14 October 2009</li>\n</ol>$"""
-        result_re = re.compile(result_pattern)
-        assert_true(result_re.search(html))
-
-    def testEndnotesUnreferencedNote(self):
-        test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#lavader(noteclass). "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman(#noteid). "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13. After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:§^.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:‡"""
-        html = textile.textile(test)
-        result_pattern = r"""\t<p>Scientists say<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">1</span></a></sup> the moon is quite small. But I, for one, don’t believe them. Others claim it to be made of cheese<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">2</span></a></sup>. If this proves true I suspect we are in for troubled times<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">3</span></a></sup> as people argue over their “share” of the moon’s cheese. In the end, its limited size<sup><a href="#\1"><span id="(noteref[a-f0-9]{32})">1</span></a></sup> may prove problematic.</p>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t<li class="noteclass"><sup><a href="#\2">a</a></sup> <sup><a href="#\7">b</a></sup><span id="\1"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright© Laurent Laveder</li>\n\t<li id="noteid"><sup><a href="#\4">a</a></sup><span id="\3"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t<li><sup><a href="#\6">a</a></sup><span id="\5"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n</ol>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t<li class="noteclass"><sup><a href="#\2">\xa7</a></sup><span id="\1"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright© Laurent Laveder</li>\n\t<li id="noteid"><sup><a href="#\4">\xa7</a></sup><span id="\3"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t<li><sup><a href="#\6">\xa7</a></sup><span id="\5"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n</ol>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t<li class="noteclass"><sup><a href="#\2">‡</a></sup> <sup><a href="#\7">‡</a></sup><span id="\1"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright© Laurent Laveder</li>\n\t<li id="noteid"><sup><a href="#\4">‡</a></sup><span id="\3"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t<li><sup><a href="#\6">‡</a></sup><span id="\5"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n</ol>"""
-        result_re = re.compile(result_pattern, re.U)
-        assert_true(result_re.search(html))
-
-    def testEndnotesMalformed(self):
-        test = """Scientists say[#lavader] the moon is quite small. But I, for one, don't believe them. Others claim it to be made of cheese[#aardman]. If this proves true I suspect we are in for troubled times[#apollo13!] as people argue over their "share" of the moon's cheese. In the end, its limited size[#lavader] may prove problematic.\n\nnote#unused An unreferenced note.\n\nnote#lavader^ "Proof of the small moon hypothesis":http://antwrp.gsfc.nasa.gov/apod/ap080801.html. Copyright(c) Laurent Laveder\n\nnote#aardman^ "Proof of a cheese moon":http://www.imdb.com/title/tt0104361\n\nnote#apollo13^ After all, things do go "wrong":http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident.\n\nnotelist{padding:1em; margin:1em; border-bottom:1px solid gray}:α!+"""
-        html = textile.textile(test)
-        result_pattern = r"""^\t<p>Scientists say<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">1</span></a></sup> the moon is quite small. But I, for one, don’t believe them. Others claim it to be made of cheese<sup><a href="#(note[a-f0-9]{32})"><span id="(noteref[a-f0-9]{32})">2</span></a></sup>. If this proves true I suspect we are in for troubled times<sup><span id="(noteref[a-f0-9]{32})">3</span></sup> as people argue over their “share” of the moon’s cheese. In the end, its limited size<sup><a href="#\1"><span id="noteref[a-f0-9]{32}">1</span></a></sup> may prove problematic.</p>\n\n\t<ol style="padding:1em; margin:1em; border-bottom:1px solid gray;">\n\t<li><sup><a href="#\2">α</a></sup><span id="\1"> </span><a href="http://antwrp.gsfc.nasa.gov/apod/ap080801.html">Proof of the small moon hypothesis</a>. Copyright© Laurent Laveder</li>\n\t<li><sup><a href="#\4">α</a></sup><span id="\3"> </span><a href="http://www.imdb.com/title/tt0104361">Proof of a cheese moon</a></li>\n\t<li><sup><a href="#\5">α</a></sup><span id="note[a-f0-9]{32}"> </span>After all, things do go <a href="http://en.wikipedia.org/wiki/Apollo_13#The_oxygen_tank_incident">wrong</a>.</li>\n\t<li>An unreferenced note.</li>\n</ol>$"""
-        result_re = re.compile(result_pattern, re.U)
-        assert_true(result_re.search(html))
-
-    def testEndnotesUndefinedNote(self):
-        test = """Scientists say the moon is slowly shrinking[#my_first_label].\n\nnotelist!."""
-        html = textile.textile(test)
-        result_pattern = r"""\t<p>Scientists say the moon is slowly shrinking<sup><a href="#note[a-f0-9]{32}"><span id="noteref[a-f0-9]{32}">1</span></a></sup>.</p>\n\n\t<ol>\n\t<li> Undefined Note \[#my_first_label\].<li>\n</ol>$"""
-        result_re = re.compile(result_pattern)
-        assert_true(result_re.search(html))
-
-    def testEncodeUrl(self):
-        # I tried adding these as doctests, but the unicode tests weren't
-        # returning the correct results.
-        t = textile.Textile()
-
-        url = 'http://www.example.local'
-        result = 'http://www.example.local'
-        eurl = t.encode_url(url)
-        eq_(eurl, result)
-
-        url = 'http://user@www.example.local'
-        result = 'http://user@www.example.local'
-        eurl = t.encode_url(url)
-        eq_(eurl, result)
-
-        url = 'http://user:password@www.example.local'
-        result = 'http://user:password@www.example.local'
-        eurl = t.encode_url(url)
-        eq_(eurl, result)
-
-        url = 'http://user:password@www.example.local/Ubermensch'
-        result = 'http://user:password@www.example.local/Ubermensch'
-        eurl = t.encode_url(url)
-        eq_(eurl, result)
-
-        url = "http://user:password@www.example.local/Übermensch"
-        result = "http://user:password@www.example.local/%C3%9Cbermensch"
-        eurl = t.encode_url(url)
-        eq_(eurl, result)
-
-        url = 'http://user:password@www.example.local:8080/Übermensch'
-        result = 'http://user:password@www.example.local:8080/%C3%9Cbermensch'
-        eurl = t.encode_url(url)
-        eq_(eurl, result)
-
-    def testFootnoteCrosslink(self):
-        html = textile.textile('''See[2] for details, and later, reference it again[2].\n\nfn2^(footy#otherid)[en]. Here are the details.''')
-        searchstring = r'\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})"><a href="#fn\1">2</a></sup> for details, and later, reference it again<sup class="footnote"><a href="#fn\1">2</a></sup>.</p>\n\n\t<p class="footy" id="otherid" lang="en"><sup id="fn\1"><a href="#fnrev\1">2</a></sup> Here are the details.</p>$'
-        assert_true(re.compile(searchstring).search(html))
-
-    def testFootnoteWithoutReflink(self):
-        html = textile.textile('''See[3!] for details.\n\nfn3. Here are the details.''')
-        searchstring = r'^\t<p>See<sup class="footnote" id="fnrev([a-f0-9]{32})">3</sup> for details.</p>\n\n\t<p class="footnote" id="fn\1"><sup>3</sup> Here are the details.</p>$'
-        assert_true(re.compile(searchstring).search(html))
-
-    def testSquareBrackets(self):
-        html = textile.textile("""1[^st^], 2[^nd^], 3[^rd^]. 2 log[~n~]\n\nA close[!http://textpattern.com/favicon.ico!]image.\nA tight["text":http://textpattern.com/]link.\nA ["footnoted link":http://textpattern.com/][182].""")
-        searchstring = r'^\t<p>1<sup>st</sup>, 2<sup>nd</sup>, 3<sup>rd</sup>. 2 log<sub>n</sub></p>\n\n\t<p>A close<img alt="" src="http://textpattern.com/favicon.ico" />image.<br />\nA tight<a href="http://textpattern.com/">text</a>link.<br />\nA <a href="http://textpattern.com/">footnoted link</a><sup class="footnote" id="fnrev([a-f0-9]{32})"><a href="#fn\1">182</a></sup>.</p>'
-        assert_true(re.compile(searchstring).search(html))
-
-    def testHTML5(self):
-        """docstring for testHTML5"""
-
-        test = 'We use CSS(Cascading Style Sheets).'
-        result = '\t<p>We use <abbr title="Cascading Style Sheets"><span class="caps">CSS</span></abbr>.</p>'
-        expect = textile.textile(test, html_type="html5")
-        eq_(result, expect)
-
-
-class TestSubclassing():
-    """Test Textile subclassing ability."""
-    def testChangeGlyphs(self):
-        class TextilePL(textile.Textile):
-            glyph_definitions = dict(textile.Textile.glyph_definitions,
-                quote_double_open = '„'
-            )
-
-        test = 'Test "quotes".'
-        expect = '\t<p>Test „quotes”.</p>'
-        result = TextilePL().parse(test)
-        eq_(expect, result)
-
-        # Base Textile is unchanged.
-        expect = '\t<p>Test “quotes”.</p>'
-        result = textile.textile(test)
-        eq_(expect, result)
diff --git a/textile/textilefactory.py b/textile/textilefactory.py
index 167f1629..e5e2458e 100644
--- a/textile/textilefactory.py
+++ b/textile/textilefactory.py
@@ -3,42 +3,11 @@
 
 
 class TextileFactory(object):
-    """
-    Use TextileFactory to create a Textile object which can be re-used
-    to process multiple strings with the same settings.
-
-    >>> from .tools.doctest_utils import Py3
-    >>> f = TextileFactory()
-    >>> Py3 << f.process("some text here")
-    '\\t<p>some text here</p>'
-
-    >>> f = TextileFactory(restricted=True)
-    >>> Py3 << f.process("more text here")
-    '\\t<p>more text here</p>'
-
-    Certain parameter values are not permitted because they are illogical:
-
-    >>> f = TextileFactory(lite=True)
-    Traceback (most recent call last):
-    ...
-    ValueError: lite can only be enabled in restricted mode
-
-    >>> f = TextileFactory(head_offset=7)
-    Traceback (most recent call last):
-    ...
-    ValueError: head_offset must be 0-6
-
-    >>> f = TextileFactory(html_type='invalid')
-    Traceback (most recent call last):
-    ...
-    ValueError: html_type must be 'xhtml' or 'html5'
-
-
-    """
+    """ Use TextileFactory to create a Textile object which can be re-used to
+    process multiple strings with the same settings."""
 
     def __init__(self, restricted=False, lite=False, sanitize=False,
-                 noimage=None, auto_link=False, get_sizes=False,
-                 head_offset=0, html_type='xhtml'):
+                 noimage=None, get_sizes=False, html_type='xhtml'):
 
         self.class_parms = {}
         self.method_parms = {}
@@ -59,14 +28,8 @@ def __init__(self, restricted=False, lite=False, sanitize=False,
 
         self.class_parms['noimage'] = noimage
         self.method_parms['sanitize'] = sanitize
-        self.class_parms['auto_link'] = auto_link
         self.class_parms['get_sizes'] = get_sizes
 
-        if int(head_offset) not in range(0, 6):
-            raise ValueError("head_offset must be 0-6")
-        else:
-            self.method_parms['head_offset'] = head_offset
-
         if html_type not in ['xhtml', 'html5']:
             raise ValueError("html_type must be 'xhtml' or 'html5'")
         else:
diff --git a/textile/tools/doctest_utils.py b/textile/tools/doctest_utils.py
deleted file mode 100644
index 30844c3d..00000000
--- a/textile/tools/doctest_utils.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-Utilities needed for making doctests compatible with both Py2 and Py3.
-
-Author: Radek Czajka <radekczajka@nowoczesnapolska.org.pl>
-"""
-
-from __future__ import unicode_literals
-import sys
-
-if sys.version_info[0] < 3:
-    import copy
-
-    class Py3Wrapper(object):
-        '''
-        You can have Python 2 and 3 compatible Unicode-aware code
-        without 'u' prefixes (unsupported in Python 3.2) using
-        __future__.unicode_literals, but any doctests expecting strings
-        will fail in Python 2, because unicode.__repr__ still adds
-        a prefix anyway, and bytes.__repr__ doesn't.
-
-        >>> from doctest import run_docstring_examples
-        >>> def sad_doctest():
-        ...     """
-        ...     >>> (b'tuple', 'of', (3, 'things'))
-        ...     (b'tuple', 'of', (3, 'things'))
-        ...     """
-        ...     pass
-        >>> run_docstring_examples(sad_doctest, globals())  # doctest: +ELLIPSIS
-        ***************...
-        Got:
-            ('tuple', u'of', (3, u'things'))...
-
-        This class provides a workaround for this issue. 'Shifting'
-        an object to Py3 (which is an instance of this class) creates
-        a deep copy of it, with all unicode and bytes objects wrapped
-        in a class providing a Py3-compatbile __repr__.
-
-        >>> Py3 << (b'tuple', 'of', (3, 'things'))
-        (b'tuple', 'of', (3, 'things'))
-
-        '''
-        class Py3WrappingMemo(dict):
-            """
-            The copy.deepcopy function uses one optional argument, which
-            is a `memo` dict, used internally as an object cache.
-            Normally, deepcopy creates this dict for itself, but we're
-            going to use it to modify the behaviour of deepcopy to wrap
-            all the unicode and str objects with our wrapper classes.
-
-            This way, deepcopy still behaves as expected if not
-            explicitly passed an instance of this class.
-
-            """
-            class Py3Unicode(unicode):
-                """Wrapper for unicode objects."""
-                def __repr__(self):
-                    return unicode.__repr__(self)[1:]
-
-            class Py3Str(str):
-                """Wrapper for str objects."""
-                def __repr__(self):
-                    return 'b' + str.__repr__(self)
-
-            # We're meddling with copy.deepcopy internals here.
-            # However, iIf deepcopy isn't explicitly passed an instance
-            # of this class as `memo`, the only thing this meddling
-            # causes is a dict lookup for each unicode and str copied.
-            copy._deepcopy_dispatch[unicode] = lambda x, memo: memo.get(x, x)
-            copy._deepcopy_dispatch[str] = lambda x, memo: memo.get(x, x)
-            copy._deepcopy_dispatch[Py3Unicode] = copy._deepcopy_atomic
-            copy._deepcopy_dispatch[Py3Str] = copy._deepcopy_atomic
-
-            def get(self, item, default=None):
-                """Actual wrapping happens here."""
-                if type(item) is unicode:
-                    return self.Py3Unicode(item)
-                elif type(item) is str:
-                    return self.Py3Str(item)
-                else:
-                    return dict.get(self, item, default)
-
-        def __lshift__(self, obj):
-            return copy.deepcopy(obj, memo=self.Py3WrappingMemo())
-
-else:
-    class Py3Wrapper(object):
-        """Under Python 3, that's a no-op."""
-        def __lshift__(self, obj):
-            return obj
-
-
-Py3 = Py3Wrapper()
diff --git a/textile/tools/imagesize.py b/textile/tools/imagesize.py
index d75167ca..d283ef12 100644
--- a/textile/tools/imagesize.py
+++ b/textile/tools/imagesize.py
@@ -4,10 +4,6 @@ def getimagesize(url):
     (width, height), in pixels or an empty string in case of failure.
     Requires that PIL is installed.
 
-    >>> getimagesize("http://www.google.com/intl/en_ALL/images/logo.gif")
-    (276, 110)
-    >>> getimagesize("http://bad.domain/")
-    ''
     """
 
     try:
@@ -32,11 +28,3 @@ def getimagesize(url):
                 return p.image.size
     except (IOError, ValueError):
         return ''
-
-
-def setup_module(module):
-    from nose.plugins.skip import SkipTest
-    try:
-        __import__('PIL')
-    except ImportError:
-        raise SkipTest()
diff --git a/textile/utils.py b/textile/utils.py
new file mode 100644
index 00000000..55a5de0c
--- /dev/null
+++ b/textile/utils.py
@@ -0,0 +1,217 @@
+from __future__ import unicode_literals
+import six
+
+try:
+    import regex as re
+except ImportError:
+    import re
+
+from six.moves import urllib, html_parser
+urlparse = urllib.parse.urlparse
+HTMLParser = html_parser.HTMLParser
+
+try:
+    from collections import OrderedDict
+except ImportError:
+    from ordereddict import OrderedDict
+
+from xml.etree import ElementTree
+
+from textile.regex_strings import valign_re_s, halign_re_s
+
+
+def decode_high(text):
+    """Decode encoded HTML entities."""
+    h = HTMLParser()
+    text = '&#{0};'.format(text)
+    return h.unescape(text)
+
+def encode_high(text):
+    """Encode the text so that it is an appropriate HTML entity."""
+    return ord(text)
+
+def encode_html(text, quotes=True):
+    """Return text that's safe for an HTML attribute."""
+    a = (
+        ('&', '&'),
+        ('<', '<'),
+        ('>', '>'))
+
+    if quotes:
+        a = a + (("'", '''),
+                 ('"', '"'))
+
+    for k, v in a:
+        text = text.replace(k, v)
+    return text
+
+def generate_tag(tag, content, attributes=None):
+    """Generate a complete html tag using the ElementTree module.  tag and
+    content are strings, the attributes argument is a dictionary.  As
+    a convenience, if the content is ' /', a self-closing tag is generated."""
+    content = six.text_type(content)
+    element = ElementTree.Element(tag, attrib=attributes)
+    enc = 'unicode'
+    if six.PY2:
+        enc = 'UTF-8'
+    if not tag:
+        return content
+    # FIXME: Kind of an ugly hack.  There *must* be a cleaner way.  I tried
+    # adding text by assigning it to a.text.  That results in non-ascii text
+    # being html-entity encoded.  Not bad, but not entirely matching
+    # php-textile either.
+    try:
+        element_tag = ElementTree.tostringlist(element, encoding=enc,
+                method='html')
+        element_tag.insert(len(element_tag) - 1, content)
+        element_text = ''.join(element_tag)
+    except AttributeError:
+        # Python 2.6 doesn't have the tostringlist method, so we have to treat
+        # it different.
+        element_tag = ElementTree.tostring(element, encoding=enc)
+        element_text = re.sub(r"<\?xml version='1.0' encoding='UTF-8'\?>\n",
+                '', element_tag)
+        if content != six.text_type(' /'):
+            element_text = element_text.rstrip(' />')
+            element_text = six.text_type('{0}>{1}</{2}>').format(six.text_type(
+                element_text), content, tag)
+    return element_text
+
+def has_raw_text(text):
+    """checks whether the text has text not already enclosed by a block tag"""
+    # The php version has orders the below list of tags differently.  The
+    # important thing to note here is that the pre must occur before the p or
+    # else the regex module doesn't properly match pre-s. It only matches the
+    # p in pre.
+    r = re.compile(r'<(pre|p|blockquote|div|form|table|ul|ol|dl|h[1-6])[^>]*?>.*</\1>',
+                   re.S).sub('', text.strip()).strip()
+    r = re.compile(r'<(hr|br)[^>]*?/>').sub('', r)
+    return '' != r
+
+def is_rel_url(url):
+    """Identify relative urls."""
+    (scheme, netloc) = urlparse(url)[0:2]
+    return not scheme and not netloc
+
+def is_valid_url(url):
+    parsed = urlparse(url)
+    if parsed.scheme == '':
+        return True
+    return False
+
+def list_type(list_string):
+    listtypes = {
+        list_string.startswith('*'): 'u',
+        list_string.startswith('#'): 'o',
+        (not list_string.startswith('*') and not list_string.startswith('#')):
+        'd'
+    }
+    return listtypes.get(True, False)
+
+def normalize_newlines(string):
+    out = string.strip()
+    out = re.sub(r'\r\n', '\n', out)
+    out = re.sub(r'\n{3,}', '\n\n', out)
+    out = re.sub(r'\n\s*\n', '\n\n', out)
+    out = re.sub(r'"$', '" ', out)
+    return out
+
+def parse_attributes(block_attributes, element=None, include_id=True):
+    vAlign = {'^': 'top', '-': 'middle', '~': 'bottom'}
+    hAlign = {'<': 'left', '=': 'center', '>': 'right', '<>': 'justify'}
+    style = []
+    aclass = ''
+    lang = ''
+    colspan = ''
+    rowspan = ''
+    block_id = ''
+    span = ''
+    width = ''
+    result = OrderedDict()
+
+    if not block_attributes:
+        return result
+
+    matched = block_attributes
+    if element == 'td':
+        m = re.search(r'\\(\d+)', matched)
+        if m:
+            colspan = m.group(1)
+
+        m = re.search(r'/(\d+)', matched)
+        if m:
+            rowspan = m.group(1)
+
+    if element == 'td' or element == 'tr':
+        m = re.search(r'({0})'.format(valign_re_s), matched)
+        if m:
+            style.append("vertical-align:{0}".format(vAlign[m.group(1)]))
+
+    m = re.search(r'\{([^}]*)\}', matched)
+    if m:
+        style.extend(m.group(1).rstrip(';').split(';'))
+        matched = matched.replace(m.group(0), '')
+
+    m = re.search(r'\[([^\]]+)\]', matched, re.U)
+    if m:
+        lang = m.group(1)
+        matched = matched.replace(m.group(0), '')
+
+    m = re.search(r'\(([^()]+)\)', matched, re.U)
+    if m:
+        aclass = m.group(1)
+        matched = matched.replace(m.group(0), '')
+
+    m = re.search(r'([(]+)', matched)
+    if m:
+        style.append("padding-left:{0}em".format(len(m.group(1))))
+        matched = matched.replace(m.group(0), '')
+
+    m = re.search(r'([)]+)', matched)
+    if m:
+        style.append("padding-right:{0}em".format(len(m.group(1))))
+        matched = matched.replace(m.group(0), '')
+
+    m = re.search(r'({0})'.format(halign_re_s), matched)
+    if m:
+        style.append("text-align:{0}".format(hAlign[m.group(1)]))
+
+    m = re.search(r'^(.*)#(.*)$', aclass)
+    if m:
+        block_id = m.group(2)
+        aclass = m.group(1)
+
+    if element == 'col':
+        pattern = r'(?:\\(\d+)\.?)?\s*(\d+)?'
+        csp = re.match(pattern, matched)
+        span, width = csp.groups()
+
+    if colspan:
+        result['colspan'] = colspan
+
+    if style:
+        # Previous splits that created style may have introduced extra
+        # whitespace into the list elements.  Clean it up.
+        style = [x.strip() for x in style]
+        result['style'] = '{0};'.format("; ".join(style))
+    if aclass:
+        result['class'] = aclass
+    if block_id and include_id:
+        result['id'] = block_id
+    if lang:
+        result['lang'] = lang
+    if rowspan:
+        result['rowspan'] = rowspan
+    if span:
+        result['span'] = span
+    if width:
+        result['width'] = width
+    return result
+
+def pba(block_attributes, element=None, include_id=True):
+    """Parse block attributes."""
+    attrs = parse_attributes(block_attributes, element, include_id)
+    if not attrs:
+        return ''
+    result = ' '.join(['{0}="{1}"'.format(k, v) for k, v in attrs.items()])
+    return ' {0}'.format(result)
diff --git a/textile/version.py b/textile/version.py
index 8ec4737c..7e696978 100644
--- a/textile/version.py
+++ b/textile/version.py
@@ -1 +1 @@
-VERSION = '2.2.1'
+VERSION = '2.3.1'
diff --git a/tox.ini b/tox.ini
deleted file mode 100644
index d1884662..00000000
--- a/tox.ini
+++ /dev/null
@@ -1,16 +0,0 @@
-[tox]
-envlist = py26, py27, py32, py33, py34, pypy
-
-[testenv]
-deps = nose
-       coverage
-       html5lib
-       regex
-       Pillow
-commands = nosetests --id-file=.noseids.{envname}
-
-[testenv:pypy]
-deps = nose
-       coverage
-       html5lib
-       Pillow