initial checkin
This commit is contained in:
parent
a0fa804a86
commit
6621575d74
27
.travis.yml
Normal file
27
.travis.yml
Normal file
|
@ -0,0 +1,27 @@
|
|||
# Ref. http://about.travis-ci.org/docs/user/languages/python/
|
||||
language: python
|
||||
python:
|
||||
# Disabled for a while until fixes happen in outside of anytemplate, e.g. ordereddict, asteroid.
|
||||
# - 2.6
|
||||
- 2.7
|
||||
- 3.4
|
||||
- 3.5
|
||||
- 3.6
|
||||
# .. seealso:: https://github.com/travis-ci/travis-ci/issues/9815
|
||||
matrix:
|
||||
include:
|
||||
- python: 3.7
|
||||
dist: xenial
|
||||
sudo: true
|
||||
|
||||
install:
|
||||
- pip install tox-travis
|
||||
script:
|
||||
- tox
|
||||
after_success:
|
||||
- coveralls
|
||||
notifications:
|
||||
email:
|
||||
recipients:
|
||||
- satoru.satoh+github@gmail.com
|
||||
on_failure: always
|
8
MANIFEST.in
Normal file
8
MANIFEST.in
Normal file
|
@ -0,0 +1,8 @@
|
|||
include LICENSE
|
||||
include MANIFEST.in
|
||||
include README.*
|
||||
include setup.*
|
||||
include tox.ini .gitignore .travis.yml
|
||||
include pkg/*
|
||||
recursive-include smhtml *.py
|
||||
recursive-include tests *.py
|
35
README.rst
Normal file
35
README.rst
Normal file
|
@ -0,0 +1,35 @@
|
|||
=============
|
||||
smhtml
|
||||
=============
|
||||
|
||||
About
|
||||
======
|
||||
|
||||
.. .. image:: https://img.shields.io/pypi/v/smhtml.svg
|
||||
:target: https://pypi.python.org/pypi/smhtml/
|
||||
:alt: [Latest Version]
|
||||
|
||||
.. .. image:: https://img.shields.io/pypi/pyversions/smhtml.svg
|
||||
:target: https://pypi.python.org/pypi/smhtml/
|
||||
:alt: [Python versions]
|
||||
|
||||
.. image:: https://api.travis-ci.org/ssato/python-smhtml.png
|
||||
:target: https://travis-ci.org/ssato/python-smhtml
|
||||
:alt: [Test status]
|
||||
|
||||
.. image:: https://coveralls.io/repos/ssato/python-smhtml/badge.png
|
||||
:target: https://coveralls.io/r/ssato/python-smhtml
|
||||
:alt: [Coverage Status]
|
||||
|
||||
.. .. image:: https://landscape.io/github/ssato/python-smhtml/master/landscape.png
|
||||
:target: https://landscape.io/github/ssato/python-smhtml/master
|
||||
:alt: [Code Health]
|
||||
|
||||
This is a simple and experimental python library to parse and dump MHTML [#]_ data.
|
||||
|
||||
- Author: Satoru SATOH <ssato@redhat.com>
|
||||
- License: MIT
|
||||
|
||||
.. [#] https://ja.wikipedia.org/wiki/MHTML
|
||||
|
||||
.. vim:sw=2:ts=2:et:
|
10
pkg/nose.cfg
Normal file
10
pkg/nose.cfg
Normal file
|
@ -0,0 +1,10 @@
|
|||
[nosetests]
|
||||
verbosity=2
|
||||
with-doctest=1
|
||||
#all-modules=1
|
||||
# Requires that nosetest processes multiple modules' test cases at once.
|
||||
# processes=4
|
||||
|
||||
# coverage:
|
||||
cover-package=smhtml
|
||||
cover-branches=1
|
75
pkg/package.spec.in
Normal file
75
pkg/package.spec.in
Normal file
|
@ -0,0 +1,75 @@
|
|||
%global pkgname smhtml
|
||||
|
||||
%if 0%{?fedora} || 0%{?rhel} > 7 || 0%{?epel} > 7
|
||||
%global with_python3 1
|
||||
%endif
|
||||
|
||||
%global desc \
|
||||
A simple and experimental python library to parse and dump MHTML data.
|
||||
|
||||
Name: python-%{pkgname}
|
||||
Version: @VERSION@
|
||||
Release: 1%{?dist}
|
||||
Summary: Python library to parse and dump MHTML data
|
||||
Group: Development/Tools
|
||||
License: MIT
|
||||
URL: https://github.com/ssato/python-smhtml
|
||||
Source0: %{url}/archive/RELEASE_%{version}.tar.gz
|
||||
BuildArch: noarch
|
||||
%if 0%{?with_python3}
|
||||
BuildRequires: python3-devel
|
||||
BuildRequires: python3-setuptools
|
||||
%else
|
||||
BuildRequires: python2-setuptools
|
||||
BuildRequires: python2-devel
|
||||
%endif
|
||||
|
||||
%description %{desc}
|
||||
|
||||
%if 0%{?with_python3}
|
||||
%package -n python3-%{pkgname}
|
||||
Summary: %{summary}
|
||||
Requires: python3-chardet
|
||||
%{?python_provide:%python_provide python3-%{pkgname}}
|
||||
|
||||
%description -n python3-%{pkgname} %{desc}
|
||||
%else
|
||||
|
||||
%package -n python2-%{pkgname}
|
||||
Summary: %{summary}
|
||||
Requires: python2-chardet
|
||||
%{?python_provide:%python_provide python2-%{pkgname}}
|
||||
|
||||
%description -n python2-%{pkgname} %{desc}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -n %{pkgname}-%{version}
|
||||
|
||||
%build
|
||||
%if 0%{?with_python3}
|
||||
%py3_build
|
||||
%else
|
||||
%py2_build
|
||||
%endif
|
||||
|
||||
%install
|
||||
%if 0%{?with_python3}
|
||||
%py3_install
|
||||
%else
|
||||
%py2_install
|
||||
%endif
|
||||
|
||||
%if 0%{?with_python3}
|
||||
%files -n python3-%{pkgname}
|
||||
%{python3_sitelib}/%{pkgname}*
|
||||
%else
|
||||
%files -n python2-%{pkgname}
|
||||
%{python_sitelib}/%{pkgname}*
|
||||
%endif
|
||||
%doc README.rst examples
|
||||
%{_bindir}/*
|
||||
|
||||
%changelog
|
||||
* Sun Feb 24 2019 Satoru SATOH <ssato@redhat.com> - 0.0.1-1
|
||||
- Initial packaging
|
1
pkg/requirements.txt
Normal file
1
pkg/requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
chardet
|
6
pkg/test_requirements.txt
Normal file
6
pkg/test_requirements.txt
Normal file
|
@ -0,0 +1,6 @@
|
|||
-r requirements.txt
|
||||
coveralls
|
||||
flake8 < 3.5.0
|
||||
nose
|
||||
pycodestyle < 2.4.0
|
||||
pylint
|
50
setup.cfg
Normal file
50
setup.cfg
Normal file
|
@ -0,0 +1,50 @@
|
|||
# ref. https://setuptools.readthedocs.io/en/latest/setuptools.html#configuring-setup-using-setup-cfg-files
|
||||
[bdist_wheel]
|
||||
universal = 1
|
||||
|
||||
[metadata]
|
||||
name = smthml
|
||||
# .. todo::
|
||||
# version = attr: src.VERSION
|
||||
description = A module to parse and dump MHTML data
|
||||
long_description = file: README.rst
|
||||
author = Satoru SATOH
|
||||
author_email = satoru.satoh@gmail.com
|
||||
maintainer = Satoru SATOH
|
||||
maintainer_email = satoru.satoh@gmail.com
|
||||
url = https://github.com/ssato/python-smhtml
|
||||
license = MIT
|
||||
classifiers =
|
||||
Development Status :: 4 - Beta
|
||||
License :: OSI Approved :: MIT License
|
||||
Intended Audience :: Developers
|
||||
Programming Language :: Python
|
||||
Programming Language :: Python :: 2
|
||||
Programming Language :: Python :: 2.7
|
||||
Programming Language :: Python :: 3
|
||||
Programming Language :: Python :: 3.3
|
||||
Programming Language :: Python :: 3.4
|
||||
Programming Language :: Python :: 3.5
|
||||
Programming Language :: Python :: 3.6
|
||||
Programming Language :: Python :: 3.7
|
||||
Environment :: Console
|
||||
Operating System :: OS Independent
|
||||
Topic :: Software Development :: Libraries :: Python Modules
|
||||
Topic :: Text Processing :: Markup
|
||||
Topic :: Utilities
|
||||
|
||||
[options]
|
||||
include_package_data = True
|
||||
# .. note:: It does not look worked as expected w/ older setuptools.
|
||||
#package_dir =
|
||||
# = src
|
||||
packages = find:
|
||||
|
||||
[options.packages.find]
|
||||
exclude =
|
||||
tests
|
||||
tests.*
|
||||
|
||||
#[options.entry_points]
|
||||
#console_scripts =
|
||||
# smhtml_cli = smhtml.cli:main
|
46
setup.py
Normal file
46
setup.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os.path
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import setuptools
|
||||
import setuptools.command.bdist_rpm
|
||||
|
||||
|
||||
# It might throw IndexError and so on.
|
||||
VERSION = [re.search(r'^VERSION = "([^"]+)"', l).groups()[0] for l
|
||||
in open("smhtml/globals.py").readlines()
|
||||
if "VERSION" in l][0]
|
||||
|
||||
# For daily snapshot versioning mode:
|
||||
if os.environ.get("_SNAPSHOT_BUILD", None) is not None:
|
||||
import datetime
|
||||
VERSION = VERSION + datetime.datetime.now().strftime(".%Y%m%d")
|
||||
|
||||
|
||||
class bdist_rpm(setuptools.command.bdist_rpm.bdist_rpm):
|
||||
"""Override the default content of the RPM SPEC.
|
||||
"""
|
||||
spec_tmpl = os.path.join(os.path.abspath(os.curdir),
|
||||
"pkg/package.spec.in")
|
||||
|
||||
def _replace(self, line):
|
||||
"""Replace some strings in the RPM SPEC template"""
|
||||
if "@VERSION@" in line:
|
||||
return line.replace("@VERSION@", VERSION)
|
||||
|
||||
if "Source0:" in line: # Dirty hack
|
||||
return "Source0: %{pkgname}-%{version}.tar.gz"
|
||||
|
||||
return line
|
||||
|
||||
def _make_spec_file(self):
|
||||
return [self._replace(l.rstrip()) for l
|
||||
in open(self.spec_tmpl).readlines()]
|
||||
|
||||
|
||||
setuptools.setup(name="smhtml", version=VERSION,
|
||||
cmdclass=dict(bdist_rpm=bdist_rpm))
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
25
smhtml/__init__.py
Normal file
25
smhtml/__init__.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
r"""
|
||||
.. module:: smhtml
|
||||
:platform: Unix, Windows
|
||||
:synopsis: Simple MHTML parsing library
|
||||
|
||||
python-smhtml is a simple and experimental MHTML parsing library for python.
|
||||
|
||||
- Home: https://github.com/ssato/python-smhtml
|
||||
|
||||
"""
|
||||
from .api import (
|
||||
AUTHOR, VERSION,
|
||||
parse, parse_itr
|
||||
)
|
||||
|
||||
__author__ = AUTHOR
|
||||
__version__ = VERSION
|
||||
|
||||
__all__ = ["parse", "parse_itr"]
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
23
smhtml/api.py
Normal file
23
smhtml/api.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=unused-import
|
||||
r"""Public APIs of smhtml module.
|
||||
|
||||
.. versionadded:: 0.1
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .globals import (
|
||||
PACKAGE, AUTHOR, VERSION, LOGGER # flake8: noqa
|
||||
)
|
||||
from .parser import parse, parse_itr # flake8: noqa
|
||||
|
||||
|
||||
def version():
|
||||
""":return: Version info tuple, (major, minor, release), e.g. (0, 8, 2)
|
||||
"""
|
||||
return VERSION.split('.')
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
16
smhtml/globals.py
Normal file
16
smhtml/globals.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
"""Some globals of smhtml module.
|
||||
"""
|
||||
import logging
|
||||
|
||||
|
||||
PACKAGE = "smhtml"
|
||||
AUTHOR = "Satoru SATOH <satoru.satoh@gmail.com>"
|
||||
VERSION = "0.0.1"
|
||||
|
||||
LOGGER = logging.getLogger(PACKAGE)
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
112
smhtml/parser.py
Normal file
112
smhtml/parser.py
Normal file
|
@ -0,0 +1,112 @@
|
|||
#
|
||||
# -*- coding: utf-8; mode: python -*-
|
||||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=unused-import
|
||||
r"""Simple and very experimental MHTML Parser
|
||||
|
||||
.. versionadded:: 0.0.1
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import base64
|
||||
import email
|
||||
import mimetypes
|
||||
import quopri
|
||||
import chardet
|
||||
|
||||
|
||||
def detect_charset(bmsg, default="ascii"):
|
||||
r"""
|
||||
:param bmsg: A byte data to detect charset
|
||||
:return: A string represents charset such as 'utf-8', 'iso-2022-jp'
|
||||
|
||||
>>> detect_charset(b"a")
|
||||
'ascii'
|
||||
>>> detect_charset(b"")
|
||||
'ascii'
|
||||
>>> detect_charset(u"あ".encode("utf-8"))
|
||||
'utf-8'
|
||||
"""
|
||||
if not bmsg:
|
||||
return default
|
||||
|
||||
return chardet.detect(bmsg)["encoding"]
|
||||
|
||||
|
||||
def decode_part(part):
|
||||
"""
|
||||
:param part: :class:`email.message.Message` object (MIME part)
|
||||
"""
|
||||
cte = part.get_all("Content-Transfer-Encoding")[0]
|
||||
msg = part.get_payload()
|
||||
|
||||
ctype = part.get_content_type()
|
||||
mtype = part.get_content_maintype()
|
||||
|
||||
if cte == "base64":
|
||||
bdata = base64.b64decode(msg)
|
||||
elif cte == "quoted-printable":
|
||||
bdata = quopri.decodestring(msg)
|
||||
else:
|
||||
bdata = msg
|
||||
|
||||
if mtype == "text":
|
||||
charset = detect_charset(bdata)
|
||||
data = bdata.decode(charset, "ignore")
|
||||
else:
|
||||
charset = None
|
||||
data = bdata
|
||||
|
||||
return dict(type=ctype, encoding=charset, data=data,
|
||||
location=part.get_all("Content-Location"))
|
||||
|
||||
|
||||
def get_or_gen_filename(part, idx=0):
|
||||
"""
|
||||
:param part: :class:`email.message.Message` object (MIME part)
|
||||
"""
|
||||
filename = part.get_filename()
|
||||
if not filename:
|
||||
fileext = mimetypes.guess_extension(part.get_content_type())
|
||||
if not fileext:
|
||||
fileext = ".bin"
|
||||
filename = "part-%03d%s" % (idx, fileext)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def parse_itr(filepath):
|
||||
"""
|
||||
:param filepath: :class:`pathlib.Path` object
|
||||
:return: A generator yields each part parsed from `filepath` opened
|
||||
"""
|
||||
with open(filepath) as fobj:
|
||||
mhtml_data = email.message_from_file(fobj)
|
||||
|
||||
if not mhtml_data.is_multipart():
|
||||
raise ValueError("Multi-part MIME data was not found in "
|
||||
"'%s'" % filepath)
|
||||
|
||||
for idx, part in enumerate(mhtml_data.walk()):
|
||||
if part.get_content_maintype() == "multipart":
|
||||
continue
|
||||
|
||||
filename = get_or_gen_filename(part, idx=idx)
|
||||
info = decode_part(part)
|
||||
info["index"] = idx
|
||||
info["filename"] = filename
|
||||
|
||||
yield info
|
||||
|
||||
|
||||
def parse(filepath):
|
||||
"""
|
||||
:param filepath: :class:`pathlib.Path` object
|
||||
:return: A list of parsed data
|
||||
"""
|
||||
return list(parse_itr(filepath))
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
46
tests/common.py
Normal file
46
tests/common.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh at gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=missing-docstring
|
||||
from __future__ import absolute_import
|
||||
|
||||
import difflib
|
||||
import os.path
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
|
||||
def selfdir():
|
||||
"""
|
||||
:return: module path itself
|
||||
"""
|
||||
return os.path.dirname(__file__)
|
||||
|
||||
|
||||
def setup_workdir():
|
||||
"""
|
||||
:return: Path of the created working dir
|
||||
"""
|
||||
return tempfile.mkdtemp(dir="/tmp", prefix="python-tests-")
|
||||
|
||||
|
||||
def cleanup_workdir(workdir):
|
||||
"""
|
||||
FIXME: Danger!
|
||||
"""
|
||||
os.system("rm -rf " + workdir)
|
||||
|
||||
|
||||
def diff(result, exp):
|
||||
"""
|
||||
Print unified diff.
|
||||
|
||||
:param result: Result string
|
||||
:param exp: Expected result string
|
||||
"""
|
||||
diff_ = difflib.unified_diff(result.splitlines(), exp.splitlines(),
|
||||
'Result', 'Expected')
|
||||
return "\n'" + "\n".join(diff_) + "'"
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
23
tests/parser.py
Normal file
23
tests/parser.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Copyright (C). 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=missing-docstring, invalid-name
|
||||
from __future__ import absolute_import, with_statement
|
||||
|
||||
import os.path
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import smhtml.parser as TT
|
||||
import tests.common as TC
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
|
||||
def test_20_parse(self):
|
||||
ipath = os.path.join(TC.selfdir(), "res/python-smhtml.mhtml")
|
||||
res = TT.parse(ipath)
|
||||
self.assertTrue(res)
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
21969
tests/res/python-smhtml.mhtml
Normal file
21969
tests/res/python-smhtml.mhtml
Normal file
File diff suppressed because it is too large
Load diff
14
tox.ini
Normal file
14
tox.ini
Normal file
|
@ -0,0 +1,14 @@
|
|||
[tox]
|
||||
envlist = py27, py34, py35, py36, py37
|
||||
|
||||
[flake8]
|
||||
exclude = .git,.tox,dist,*egg,setup.py
|
||||
|
||||
[testenv]
|
||||
deps = -r{toxinidir}/pkg/test_requirements.txt
|
||||
commands =
|
||||
flake8 --doctests smhtml tests
|
||||
- pylint --disable=invalid-name,locally-disabled smhtml
|
||||
python -m nose -v --with-doctest --all-modules --where tests --with-coverage --cover-tests
|
||||
setenv =
|
||||
PYTHONPATH = {toxinidir}/
|
Loading…
Reference in a new issue