start rework
This commit is contained in:
parent
56271cee9a
commit
0358c971aa
2
LICENSE
2
LICENSE
|
@ -1,6 +1,6 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2019 Satoru SATOH
|
||||
Copyright (c) 2021 Zoey Mae
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
@ -1,9 +0,0 @@
|
|||
include LICENSE
|
||||
include MANIFEST.in
|
||||
include README.*
|
||||
include setup.*
|
||||
include tox.ini .gitignore .travis.yml
|
||||
include pkg/*
|
||||
include examples/*
|
||||
recursive-include smhtml *.py
|
||||
recursive-include tests *.py
|
7
README.md
Normal file
7
README.md
Normal file
|
@ -0,0 +1,7 @@
|
|||
# PyMHTML
|
||||
|
||||
A simple library to parse MHTML files and compile them to a single HTML string. Can export the string as an HTML file or even export the original HTML file and all the supporting files
|
||||
|
||||
Forked from [SMHTML](https://github.com/ssato/python-smhtml)
|
||||
|
||||
Note: work in progress
|
44
README.rst
44
README.rst
|
@ -1,44 +0,0 @@
|
|||
=================
|
||||
python-smhtml
|
||||
=================
|
||||
|
||||
About
|
||||
======
|
||||
|
||||
.. .. image:: https://img.shields.io/pypi/v/smhtml.svg
|
||||
:target: https://pypi.python.org/pypi/smhtml/
|
||||
:alt: [Latest Version]
|
||||
|
||||
.. .. image:: https://img.shields.io/pypi/pyversions/smhtml.svg
|
||||
:target: https://pypi.python.org/pypi/smhtml/
|
||||
:alt: [Python versions]
|
||||
|
||||
.. image:: https://api.travis-ci.org/ssato/python-smhtml.png
|
||||
:target: https://travis-ci.org/ssato/python-smhtml
|
||||
:alt: [Test status]
|
||||
|
||||
.. image:: https://coveralls.io/repos/ssato/python-smhtml/badge.png
|
||||
:target: https://coveralls.io/r/ssato/python-smhtml
|
||||
:alt: [Coverage Status]
|
||||
|
||||
.. .. image:: https://landscape.io/github/ssato/python-smhtml/master/landscape.png
|
||||
:target: https://landscape.io/github/ssato/python-smhtml/master
|
||||
:alt: [Code Health]
|
||||
|
||||
This is a simple and experimental python library to load MHTML files and
|
||||
extract files from them, and dump (make) MHTML [#]_ data from files.
|
||||
|
||||
- Author: Satoru SATOH <satoru.satoh@gmail.com>
|
||||
- License: MIT
|
||||
|
||||
.. [#] https://en.wikipedia.org/wiki/MHTML
|
||||
|
||||
Misc
|
||||
======
|
||||
|
||||
Here is a demo screenshot to show its CLI frontend can extract files from MHTML
|
||||
data and make (dump) MHTML data from files.
|
||||
|
||||
.. image:: examples/smhtml_cli-screenshot-0.png
|
||||
|
||||
.. vim:sw=2:ts=2:et:
|
Binary file not shown.
Before Width: | Height: | Size: 208 KiB |
10
pkg/nose.cfg
10
pkg/nose.cfg
|
@ -1,10 +0,0 @@
|
|||
[nosetests]
|
||||
verbosity=2
|
||||
with-doctest=1
|
||||
#all-modules=1
|
||||
# Requires that nosetest processes multiple modules' test cases at once.
|
||||
# processes=4
|
||||
|
||||
# coverage:
|
||||
cover-package=smhtml
|
||||
cover-branches=1
|
|
@ -1,93 +0,0 @@
|
|||
%global pkgname smhtml
|
||||
|
||||
%if 0%{?fedora} || 0%{?rhel} > 7 || 0%{?epel} > 7
|
||||
%global with_python3 1
|
||||
%endif
|
||||
|
||||
%global desc \
|
||||
A simple and experimental python library to parse and dump MHTML data.
|
||||
|
||||
Name: python-%{pkgname}
|
||||
Version: @VERSION@
|
||||
Release: 1%{?dist}
|
||||
Summary: Python library to parse and dump MHTML data
|
||||
Group: Development/Tools
|
||||
License: MIT
|
||||
URL: https://github.com/ssato/python-smhtml
|
||||
Source0: %{url}/archive/RELEASE_%{version}.tar.gz
|
||||
BuildArch: noarch
|
||||
%if 0%{?with_python3}
|
||||
BuildRequires: python3-devel
|
||||
BuildRequires: python3-setuptools
|
||||
%else
|
||||
BuildRequires: python2-setuptools
|
||||
BuildRequires: python2-devel
|
||||
%endif
|
||||
|
||||
%description %{desc}
|
||||
|
||||
%if 0%{?with_python3}
|
||||
%package -n python3-%{pkgname}
|
||||
Summary: %{summary}
|
||||
Requires: python3-chardet
|
||||
%{?python_provide:%python_provide python3-%{pkgname}}
|
||||
|
||||
%description -n python3-%{pkgname} %{desc}
|
||||
%else
|
||||
|
||||
%package -n python2-%{pkgname}
|
||||
Summary: %{summary}
|
||||
Requires: python2-chardet
|
||||
%{?python_provide:%python_provide python2-%{pkgname}}
|
||||
|
||||
%description -n python2-%{pkgname} %{desc}
|
||||
%endif
|
||||
|
||||
%prep
|
||||
%autosetup -n %{pkgname}-%{version}
|
||||
|
||||
%build
|
||||
%if 0%{?with_python3}
|
||||
%py3_build
|
||||
%else
|
||||
%py2_build
|
||||
# Dirty hacks.
|
||||
test -d %{buildroot}%{python_sitelib}/%{pkgname} || {
|
||||
cp -a src/%{pkgname} %{buildroot}%{python_sitelib}/
|
||||
}
|
||||
test -d %{buildroot}/usr/bin || {
|
||||
install -d %{buildroot}/usr/bin
|
||||
cat << EOF > %{buildroot}/usr/bin/smhtml_cli
|
||||
#! /usr/bin/python
|
||||
import sys
|
||||
from pkg_resources import load_entry_point
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(
|
||||
load_entry_point('smhtml', 'console_scripts', 'smhtml_cli')()
|
||||
)
|
||||
EOF
|
||||
}
|
||||
chmod +x %{buildroot}/usr/bin/smhtml_cli
|
||||
%endif
|
||||
|
||||
%install
|
||||
%if 0%{?with_python3}
|
||||
%py3_install
|
||||
%else
|
||||
%py2_install
|
||||
%endif
|
||||
|
||||
%if 0%{?with_python3}
|
||||
%files -n python3-%{pkgname}
|
||||
%{python3_sitelib}/%{pkgname}*
|
||||
%else
|
||||
%files -n python2-%{pkgname}
|
||||
%{python_sitelib}/%{pkgname}*
|
||||
%endif
|
||||
%doc README.rst
|
||||
%{_bindir}/*
|
||||
|
||||
%changelog
|
||||
* Mon Feb 25 2019 Satoru SATOH <ssato@redhat.com> - 0.0.1-1
|
||||
- Initial packaging
|
|
@ -1 +0,0 @@
|
|||
chardet
|
|
@ -1,6 +0,0 @@
|
|||
-r requirements.txt
|
||||
coveralls
|
||||
flake8 < 3.5.0
|
||||
nose
|
||||
pycodestyle < 2.4.0
|
||||
pylint
|
14
pymhtml/__init__.py
Normal file
14
pymhtml/__init__.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
AUTHOR = 'Zoey Mae'
|
||||
VERSION = (0, 1, 0)
|
||||
LICENSE = 'MIT'
|
||||
PACKAGE = 'PyMHTML'
|
||||
REPO = 'https://git.barkshark.xyz/izaliamae/pymhtml'
|
||||
|
||||
__version__ = '.'.join(str(v) for v in VERSION)
|
||||
|
||||
from .misc import MhtmlObject, DotDict
|
||||
from .parser import MhtmlParser
|
||||
|
||||
|
||||
def parse(filename):
|
||||
return MhtmlParser(filename)
|
25
pymhtml/__main__.py
Normal file
25
pymhtml/__main__.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
import sys
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .parser import MhtmlParser
|
||||
|
||||
|
||||
try:
|
||||
raw_path = Path(sys.argv[1])
|
||||
|
||||
if str(raw_path).startswith('~'):
|
||||
path = raw_path.expanduser()
|
||||
|
||||
else:
|
||||
path = raw_path.resolve()
|
||||
|
||||
except IndexError:
|
||||
raise IndexError('Forgot to specify an MHTML file to parse') from None
|
||||
|
||||
test = MhtmlParser(path)
|
||||
print('MHTML Object:', test)
|
||||
print('HTML File:', test.html)
|
||||
|
||||
for part in test.files.values():
|
||||
print('MHTML Part:', part)
|
40
pymhtml/misc.py
Normal file
40
pymhtml/misc.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
import chardet, mimetypes
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class DotDict(dict):
|
||||
__getattr__ = dict.__getitem__
|
||||
__setattr__ = dict.__setitem__
|
||||
__delattr__ = dict.__getitem__
|
||||
|
||||
|
||||
def copy(self):
|
||||
return DotDict(self)
|
||||
|
||||
class MhtmlObject(DotDict):
|
||||
def __init__(self, idx, part):
|
||||
super().__init__()
|
||||
|
||||
self.body = part.get_payload(decode=True)
|
||||
self.mimetype = part.get_content_type()
|
||||
self.base_type = part.get_content_maintype()
|
||||
self.location = part.get_all('Content-Location')[0]
|
||||
self.filename = part.get_filename()
|
||||
self.encoding = None
|
||||
|
||||
if not self.filename:
|
||||
if self.location.startswith('data:'):
|
||||
ext = mimetypes.guess_extension(part.get_content_type()) or '.bin'
|
||||
self.filename = f'part-{idx}{ext}'
|
||||
|
||||
else:
|
||||
self.filename = Path(self.location).name.split('?', 1)[0]
|
||||
|
||||
if self.base_type == 'text':
|
||||
self.encoding = chardet.detect(self.body)['encoding']
|
||||
self.body = self.body.decode(self.encoding or 'ascii')
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.__class__.__name__}(name="{self.filename}", location="{self.location}", mimetype="{self.mimetype}")'
|
54
pymhtml/parser.py
Normal file
54
pymhtml/parser.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=unused-import
|
||||
r"""Load and parse MHTML data.
|
||||
|
||||
.. versionadded:: 0.0.1
|
||||
"""
|
||||
|
||||
import email
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
from .misc import MhtmlObject
|
||||
|
||||
|
||||
class MhtmlParser:
|
||||
def __init__(self, filepath):
|
||||
self.filepath = Path(filepath).resolve()
|
||||
self.html = None
|
||||
self.files = {}
|
||||
|
||||
self.parse_file()
|
||||
|
||||
|
||||
def __str__(self):
|
||||
return f'{self.__class__.__name__}(file="{self.filepath}", page="{self.html.location}")'
|
||||
|
||||
|
||||
def parse_file(self):
|
||||
mdata = email.message_from_string(self.filepath.open('r').read())
|
||||
|
||||
if not mdata.is_multipart():
|
||||
raise TypeError('Not an MHTML file or missing MIME data')
|
||||
|
||||
for idx, part in enumerate(mdata.walk()):
|
||||
if part.get_content_maintype() == "multipart":
|
||||
continue
|
||||
|
||||
data = MhtmlObject(idx, part)
|
||||
|
||||
if data.mimetype == 'text/html' and not self.html:
|
||||
self.html = data
|
||||
|
||||
else:
|
||||
self.files[data.filename] = data
|
||||
|
||||
|
||||
def build_page(self):
|
||||
pass
|
||||
|
||||
|
||||
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
|
@ -0,0 +1 @@
|
|||
chardet>=4.0.0
|
73
setup.py
Normal file → Executable file
73
setup.py
Normal file → Executable file
|
@ -1,46 +1,33 @@
|
|||
from __future__ import absolute_import
|
||||
|
||||
import os.path
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import setuptools
|
||||
import setuptools.command.bdist_rpm
|
||||
#!/usr/bin/env python3
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
|
||||
# It might throw IndexError and so on.
|
||||
VERSION = [re.search(r'^VERSION = "([^"]+)"', l).groups()[0] for l
|
||||
in open("smhtml/globals.py").readlines()
|
||||
if "VERSION" in l][0]
|
||||
setup(
|
||||
name='PyMHTML',
|
||||
version='0.1.0',
|
||||
packages=find_packages(),
|
||||
python_requires='>=3.3.0',
|
||||
include_package_data=False,
|
||||
author='Zoey Mae',
|
||||
author_email='admin@barkshark.xyz',
|
||||
description='Simple MHTML parser and exporter',
|
||||
keywords='web html mhtml',
|
||||
url='https://git.barkshark.xyz/izaliamae/pymhtml',
|
||||
project_urls={
|
||||
'Bug Tracker': 'https://git.barkshark.xyz/izaliamae/pymhtml/issues',
|
||||
'Documentation': 'https://git.barkshark.xyz/izaliamae/pymhtml/wiki',
|
||||
'Source Code': 'https://git.barkshark.xyz/izaliamae/pymhtml'
|
||||
},
|
||||
|
||||
# For daily snapshot versioning mode:
|
||||
if os.environ.get("_SNAPSHOT_BUILD", None) is not None:
|
||||
import datetime
|
||||
VERSION = VERSION + datetime.datetime.now().strftime(".%Y%m%d")
|
||||
|
||||
|
||||
class bdist_rpm(setuptools.command.bdist_rpm.bdist_rpm):
|
||||
"""Override the default content of the RPM SPEC.
|
||||
"""
|
||||
spec_tmpl = os.path.join(os.path.abspath(os.curdir),
|
||||
"pkg/package.spec.in")
|
||||
|
||||
def _replace(self, line):
|
||||
"""Replace some strings in the RPM SPEC template"""
|
||||
if "@VERSION@" in line:
|
||||
return line.replace("@VERSION@", VERSION)
|
||||
|
||||
if "Source0:" in line: # Dirty hack
|
||||
return "Source0: %{pkgname}-%{version}.tar.gz"
|
||||
|
||||
return line
|
||||
|
||||
def _make_spec_file(self):
|
||||
return [self._replace(l.rstrip()) for l
|
||||
in open(self.spec_tmpl).readlines()]
|
||||
|
||||
|
||||
setuptools.setup(name="smhtml", version=VERSION,
|
||||
cmdclass=dict(bdist_rpm=bdist_rpm))
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
||||
classifiers=[
|
||||
'License :: License :: OSI Approved :: MIT License',
|
||||
'Development Status :: 3 - Alpha',
|
||||
'Programming Language :: Python :: 3.3',
|
||||
'Operating System :: POSIX',
|
||||
'Operating System :: MacOS :: MacOS X',
|
||||
'Operating System :: Microsoft :: Windows',
|
||||
'Topic :: Internet :: WWW/HTTP',
|
||||
'Topic :: Software Development :: Libraries',
|
||||
'Topic :: Software Development :: Libraries :: Python Modules'
|
||||
]
|
||||
)
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
r"""
|
||||
.. module:: smhtml
|
||||
:platform: Unix, Windows
|
||||
:synopsis: Simple python library to load, extract and dump MHTML data
|
||||
|
||||
python-smhtml is a simple and experimental python library to load MHTML files and
|
||||
extract files from them, and dump (make) MHTML data from files.
|
||||
|
||||
- Home: https://github.com/ssato/python-smhtml
|
||||
|
||||
About MHTML format, please refer other web pages such like
|
||||
https://en.wikipedia.org/wiki/MHTML.
|
||||
"""
|
||||
from .api import (
|
||||
AUTHOR, VERSION, load, loads, dump, dumps, extract # flake8: noqa
|
||||
)
|
||||
|
||||
__author__ = AUTHOR
|
||||
__version__ = VERSION
|
||||
|
||||
__all__ = ["load", "load", "extract", "dump", "dumps"]
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
|
@ -1,24 +0,0 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=unused-import
|
||||
r"""Public APIs of smhtml module.
|
||||
|
||||
.. versionadded:: 0.0.1
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
from .globals import (
|
||||
PACKAGE, AUTHOR, VERSION, LOGGER # flake8: noqa
|
||||
)
|
||||
from .loader import load, loads, extract # flake8: noqa
|
||||
from .dumper import dump, dumps # flake8: noqa
|
||||
|
||||
|
||||
def version():
|
||||
""":return: Version info tuple, (major, minor, release), e.g. (0, 8, 2)
|
||||
"""
|
||||
return VERSION.split('.')
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
|
@ -1,16 +0,0 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
"""Some globals of smhtml module.
|
||||
"""
|
||||
import logging
|
||||
|
||||
|
||||
PACKAGE = "smhtml"
|
||||
AUTHOR = "Satoru SATOH <satoru.satoh@gmail.com>"
|
||||
VERSION = "0.0.1"
|
||||
|
||||
LOGGER = logging.getLogger(PACKAGE)
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
185
smhtml/loader.py
185
smhtml/loader.py
|
@ -1,185 +0,0 @@
|
|||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=unused-import
|
||||
r"""Load and parse MHTML data.
|
||||
|
||||
.. versionadded:: 0.0.1
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import email
|
||||
import mimetypes
|
||||
import os.path
|
||||
import os
|
||||
|
||||
import smhtml.utils
|
||||
from smhtml.globals import LOGGER
|
||||
|
||||
|
||||
def decode_part(part):
|
||||
"""
|
||||
Decode a part of MIME multi-part data.
|
||||
|
||||
:param part: :class:`email.mime.base.MIMEBase` object
|
||||
:return: A dict contains various info of given MIME `part` data
|
||||
"""
|
||||
bdata = part.get_payload(decode=True)
|
||||
ctype = part.get_content_type()
|
||||
mtype = part.get_content_maintype()
|
||||
|
||||
if mtype == "text":
|
||||
charset = smhtml.utils.detect_charset(bdata)
|
||||
data = bdata.decode(charset, "ignore")
|
||||
else:
|
||||
charset = None
|
||||
data = bdata
|
||||
|
||||
location = part.get_all("Content-Location")
|
||||
return dict(type=ctype, encoding=charset, data=data, payload=bdata,
|
||||
location=location[0] if location else None)
|
||||
|
||||
|
||||
def get_or_gen_filename(part, idx=0):
|
||||
"""
|
||||
Get the filename from given MIME `part` data or generate filename to be
|
||||
used to save its payload later.
|
||||
|
||||
:param part: :class:`email.mime.base.MIMEBase` object
|
||||
:return: A filename as a string
|
||||
"""
|
||||
filename = part.get_filename()
|
||||
if not filename:
|
||||
fileext = mimetypes.guess_extension(part.get_content_type())
|
||||
if not fileext:
|
||||
fileext = ".bin"
|
||||
filename = "part-%03d%s" % (idx, fileext)
|
||||
|
||||
return filename
|
||||
|
||||
|
||||
def parse_itr(mdata):
|
||||
"""
|
||||
An iterator to yield each info from given MIME multi-part data.
|
||||
|
||||
:param mdata: :class:`email.message.Message` object
|
||||
:return: A generator yields info of each part in `mdata`
|
||||
"""
|
||||
for idx, part in enumerate(mdata.walk()):
|
||||
if part.get_content_maintype() == "multipart":
|
||||
continue
|
||||
|
||||
filename = get_or_gen_filename(part, idx=idx)
|
||||
info = decode_part(part)
|
||||
info["index"] = idx
|
||||
info["filename"] = filename
|
||||
|
||||
LOGGER.debug("part#%d: filename=%s", idx, filename)
|
||||
|
||||
yield info
|
||||
|
||||
|
||||
def loads_itr(content):
|
||||
"""
|
||||
An iterator to yield each info from given MIME multi-part data as a string
|
||||
after some checks.
|
||||
|
||||
:param content: Input MHTML data as a string
|
||||
:return: A generator yields info of each part loaded from `content`
|
||||
:raises: ValueError
|
||||
"""
|
||||
mdata = email.message_from_string(content)
|
||||
|
||||
if not mdata.is_multipart():
|
||||
raise ValueError("Multi-part MIME data was not found in "
|
||||
"given string: %s ..." % content[:100])
|
||||
|
||||
for info in parse_itr(mdata):
|
||||
yield info
|
||||
|
||||
|
||||
def load_itr(filepath):
|
||||
"""
|
||||
An iterator to yield each info from given MIME multi-part data as a file
|
||||
after some checks.
|
||||
|
||||
:param filepath: :class:`pathlib.Path` object or a string represents path
|
||||
:return: A generator yields each part parsed from `filepath` opened
|
||||
:raises: ValueError
|
||||
"""
|
||||
with open(filepath) as fobj:
|
||||
mdata = email.message_from_file(fobj)
|
||||
|
||||
if not mdata.is_multipart():
|
||||
raise ValueError("Multi-part MIME data was not found in "
|
||||
"'%s'" % filepath)
|
||||
|
||||
for info in parse_itr(mdata):
|
||||
yield info
|
||||
|
||||
|
||||
def loads(content):
|
||||
"""
|
||||
Load and return a list of info of each part of MIME multi-part data from
|
||||
given data as a string.
|
||||
|
||||
:param content: Input MHTML data as a string
|
||||
:return: A list of info of each part of MIME multi-part data
|
||||
:raises: ValueError
|
||||
"""
|
||||
return list(loads_itr(content))
|
||||
|
||||
|
||||
def load(filepath):
|
||||
"""
|
||||
Load and return a list of info of each part of MIME multi-part data from
|
||||
given data as a file.
|
||||
|
||||
:param filepath: :class:`pathlib.Path` object or a string represents path
|
||||
:return: A list of info of each part of MIME multi-part data
|
||||
:raises: ValueError
|
||||
"""
|
||||
return list(load_itr(filepath))
|
||||
|
||||
|
||||
def extract(filepath, output, usebasename=False, outputfilenamer=None):
|
||||
"""
|
||||
Load and extract each part of MIME multi-part data as files from given data
|
||||
as a file.
|
||||
|
||||
:param filepath: :class:`pathlib.Path` object represents input
|
||||
:param output: :class:`pathlib.Path` object represents output dir
|
||||
:param usebasename: Use the basename, not full path, when writing files
|
||||
:param outputfilenamer: Callback fn takes `inf` and returns a filename
|
||||
For example, it could return a filename based on `inf['location']`
|
||||
:raises: ValueError
|
||||
"""
|
||||
if output == "-":
|
||||
raise ValueError("Output dir must be given to extract")
|
||||
|
||||
if os.path.exists(output) and os.path.isfile(output):
|
||||
raise OSError("Output '%s' already exists as a file!" % output)
|
||||
|
||||
os.makedirs(output)
|
||||
for inf in load_itr(filepath):
|
||||
filename = inf["filename"]
|
||||
|
||||
if usebasename:
|
||||
filename = os.path.split(filename)[-1]
|
||||
|
||||
if outputfilenamer:
|
||||
filename = outputfilenamer(inf)
|
||||
|
||||
outpath = os.path.join(output, filename)
|
||||
outdir = os.path.dirname(outpath)
|
||||
|
||||
LOGGER.debug("Extract %s from %s", filename, filepath)
|
||||
|
||||
if not os.path.exists(outdir):
|
||||
os.makedirs(outdir)
|
||||
|
||||
with open(outpath, "wb") as out:
|
||||
out.write(inf["payload"])
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
|
@ -1,34 +0,0 @@
|
|||
#
|
||||
# -*- coding: utf-8; mode: python -*-
|
||||
#
|
||||
# Copyright (C) 2019 Satoru SATOH <satoru.satoh@gmail.com>
|
||||
# License: MIT
|
||||
#
|
||||
# pylint: disable=unused-import
|
||||
r"""Utility functions.
|
||||
|
||||
.. versionadded:: 0.0.1
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
|
||||
import chardet
|
||||
|
||||
|
||||
def detect_charset(bmsg, default="ascii"):
|
||||
r"""
|
||||
:param bmsg: A byte data to detect charset
|
||||
:return: A string represents charset such as 'utf-8', 'iso-2022-jp'
|
||||
|
||||
>>> detect_charset(b"a")
|
||||
'ascii'
|
||||
>>> detect_charset(b"")
|
||||
'ascii'
|
||||
>>> detect_charset(u"あ".encode("utf-8"))
|
||||
'utf-8'
|
||||
"""
|
||||
if not bmsg:
|
||||
return default
|
||||
|
||||
return chardet.detect(bmsg)["encoding"]
|
||||
|
||||
# vim:sw=4:ts=4:et:
|
14
tox.ini
14
tox.ini
|
@ -1,14 +0,0 @@
|
|||
[tox]
|
||||
envlist = py27, py34, py35, py36, py37
|
||||
|
||||
[flake8]
|
||||
exclude = .git,.tox,dist,*egg,setup.py
|
||||
|
||||
[testenv]
|
||||
deps = -r{toxinidir}/pkg/test_requirements.txt
|
||||
commands =
|
||||
flake8 --doctests smhtml tests
|
||||
- pylint --disable=invalid-name,locally-disabled smhtml
|
||||
python -m nose -v --with-doctest --all-modules --where tests --with-coverage --cover-tests --cover-package=smhtml
|
||||
setenv =
|
||||
PYTHONPATH = {toxinidir}/
|
Loading…
Reference in a new issue