Browse Source

Merge pull request #106 from fsfe/fix-shebang

Preserve shebangs and other pre-SPDX headers
tags/v0.6.0
carmenbianca 1 month ago
parent
commit
b3c85783f8
No account linked to committer's email address
7 changed files with 243 additions and 50 deletions
  1. 2
    0
      AUTHORS.rst
  2. 5
    0
      CHANGELOG.md
  3. 5
    4
      docs/usage.rst
  4. 9
    0
      src/reuse/_util.py
  5. 94
    36
      src/reuse/header.py
  6. 1
    1
      tests/conftest.py
  7. 127
    9
      tests/test_header.py

+ 2
- 0
AUTHORS.rst View File

@@ -32,6 +32,8 @@ Contributors

- Stefan Bakker <s.bakker777@gmail.com>

- Kirill Elagin <kirelagin@gmail.com>

Translators
-----------


+ 5
- 0
CHANGELOG.md View File

@@ -31,6 +31,11 @@ The versions follow [semantic versioning](https://semver.org).
- Made the workaround for `MachineReadableFormatError` introduced in 0.5.2 more
generic.

- Improved shebang detection in `addheader`.

- For `addheader`, the SPDX comment block now need not be the first thing in the
file. It will find the SPDX comment block and deal with it in-place.

- Git submodules are now ignored by default.

## 0.5.2 - 2019-10-27

+ 5
- 4
docs/usage.rst View File

@@ -36,10 +36,11 @@ current year is 2019):
You can use as many ``--copyright`` and ``--copyright`` arguments, so long as
there is at least one such argument.

The REUSE header always starts at the first character in a file. If a different
REUSE header already existed, its tags are copied, and the header is replaced.
If the pre-existing comment header did not contain any copyright and licensing
information, it is moved downwards in the file. A shebang is always preserved.
The REUSE header is placed at the very top of the file. If a different REUSE
header already existed---at the top or elsewhere---its tags are copied, and the
header is replaced in-place.

Shebangs are always preserved at the top of the file.

Comment styles
--------------

+ 9
- 0
src/reuse/_util.py View File

@@ -201,6 +201,7 @@ def extract_spdx_info(text: str) -> None:
"""Extract SPDX information from comments in a string.

:raises ExpressionError: if an SPDX expression could not be parsed
:raises ParseError: if an SPDX expression could not be parsed
"""
expression_matches = set(map(str.strip, _IDENTIFIER_PATTERN.findall(text)))
expressions = set()
@@ -221,6 +222,14 @@ def extract_spdx_info(text: str) -> None:
return SpdxInfo(expressions, copyright_matches)


def contains_spdx_info(text: str) -> bool:
"""The text contains SPDX info."""
try:
return any(extract_spdx_info(text))
except (ExpressionError, ParseError):
return False


def make_copyright_line(statement: str, year: str = None) -> str:
"""Given a statement, prefix it with ``SPDX-FileCopyrightText:`` if it is
not already prefixed with some manner of copyright tag.

+ 94
- 36
src/reuse/header.py View File

@@ -1,5 +1,6 @@
# SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V.
# SPDX-FileCopyrightText: 2019 Stefan Bakker <s.bakker777@gmail.com>
# SPDX-FileCopyrightText: 2019 Kirill Elagin <kirelagin@gmail.com>
#
# SPDX-License-Identifier: GPL-3.0-or-later

@@ -7,11 +8,12 @@

import datetime
import logging
import re
import sys
from gettext import gettext as _
from os import PathLike
from pathlib import Path
from typing import Optional
from typing import NamedTuple, Optional, Sequence

from binaryornot.check import is_binary
from boolean.boolean import ParseError
@@ -31,6 +33,7 @@ from ._comment import (
from ._util import (
PathType,
_determine_license_path,
contains_spdx_info,
extract_spdx_info,
make_copyright_line,
spdx_identifier,
@@ -44,6 +47,16 @@ _ENV = Environment(
)
DEFAULT_TEMPLATE = _ENV.get_template("default_template.jinja2")

_NEWLINE_PATTERN = re.compile(r"\n", re.MULTILINE)


class _TextSections(NamedTuple):
"""Used to split up text in three parts."""

before: str
middle: str
after: str


class MissingSpdxInfo(Exception):
"""Some SPDX information is missing from the result."""
@@ -132,16 +145,54 @@ def create_header(
spdx_info.copyright_lines.union(existing_spdx.copyright_lines),
)

if header.startswith("#!"):
new_header = header.split("\n")[0] + "\n"

new_header += _create_new_header(
spdx_info,
template=template,
template_is_commented=template_is_commented,
style=style,
)
return new_header
return new_header + "\n"


def _indices_of_newlines(text: str) -> Sequence[int]:
indices = [0]
start = 0

while True:
match = _NEWLINE_PATTERN.search(text, start)
if match:
start = match.span()[1]
indices.append(start)
else:
break

return indices


def _find_first_spdx_comment(
text: str, style: CommentStyle = None
) -> _TextSections:
"""Find the first SPDX comment in the file. Return a tuple with everything
preceding the comment, the comment itself, and everything following it.

:raises MissingSpdxInfo: if no SPDX info can be found in any comment
"""
if style is None:
style = PythonCommentStyle

indices = _indices_of_newlines(text)

for index in indices:
try:
comment = style.comment_at_first_character(text[index:])
except CommentParseError:
continue
if contains_spdx_info(comment):
return _TextSections(
text[:index], comment + "\n", text[index + len(comment) + 1 :]
)

raise MissingSpdxInfo()


def find_and_replace_header(
@@ -151,11 +202,10 @@ def find_and_replace_header(
template_is_commented: bool = False,
style: CommentStyle = None,
) -> str:
"""Find the comment block starting at the first character in *text*. That
comment block is replaced by a new comment block containing *spdx_info*. It
is formatted as according to *template*. The template is normally
uncommented, but if it is already commented, *template_is_commented* should
be :const:`True`.
"""Find the first SPDX comment block in *text*. That comment block is
replaced by a new comment block containing *spdx_info*. It is formatted as
according to *template*. The template is normally uncommented, but if it is
already commented, *template_is_commented* should be :const:`True`.

If both *style* and *template_is_commented* are provided, *style* is only
used to find the header comment.
@@ -171,27 +221,38 @@ def find_and_replace_header(
:raises MissingSpdxInfo: if the generated comment is missing SPDX
information.
"""
if template is None:
template = DEFAULT_TEMPLATE
if style is None:
style = PythonCommentStyle

try:
header = style.comment_at_first_character(text)
except CommentParseError:
# TODO: Log this
header = ""

# TODO: This is a duplicated check that also happens inside of
# create_header.
try:
existing_spdx = extract_spdx_info(header)
except (ExpressionError, ParseError):
# This error is handled in create_header. Just set the value to None
# here to satisfy the linter.
existing_spdx = None

new_header = create_header(
before, header, after = _find_first_spdx_comment(text, style=style)
except MissingSpdxInfo:
before, header, after = "", "", text

# pylint: disable=logging-format-interpolation
_LOGGER.debug("before = {}".format(repr(before)))
_LOGGER.debug("header = {}".format(repr(header)))
_LOGGER.debug("after = {}".format(repr(after)))

# Extract shebang from header and put it in before. It's a bit messy, but
# it ends up working.
if header.startswith("#!") and not before.strip():
before = ""
for line in header.splitlines():
if line.startswith("#!"):
before = before + "\n" + line
header = header.replace(line, "", 1)
else:
break
elif after.startswith("#!") and not any((before, header)):
for line in after.splitlines():
if line.startswith("#!"):
before = before + "\n" + line
after = after.replace(line, "", 1)
else:
break

header = create_header(
spdx_info,
header,
template=template,
@@ -199,15 +260,12 @@ def find_and_replace_header(
style=style,
)

if header and any(existing_spdx):
text = text.replace(header, "", 1)
else:
# Some extra spacing for the new header.
new_header = new_header + "\n"
if not text.startswith("\n"):
new_header = new_header + "\n"

return new_header + text
new_text = header.strip("\n")
if before.strip():
new_text = before.strip("\n") + "\n\n" + new_text
if after.strip():
new_text = new_text + "\n\n" + after.strip("\n")
return new_text


def _verify_paths_supported(paths, parser):

+ 1
- 1
tests/conftest.py View File

@@ -37,7 +37,7 @@ TESTS_DIRECTORY = Path(__file__).parent.resolve()
RESOURCES_DIRECTORY = TESTS_DIRECTORY / "resources"


def pytest_configure(config):
def pytest_configure():
"""Called after command line options have been parsed and all plugins and
initial conftest files been loaded.
"""

+ 127
- 9
tests/test_header.py View File

@@ -34,7 +34,7 @@ def test_create_header_simple():
"""
).replace("spdx", "SPDX")

assert create_header(spdx_info) == expected
assert create_header(spdx_info).strip() == expected


def test_create_header_template_simple(template_simple):
@@ -52,7 +52,9 @@ def test_create_header_template_simple(template_simple):
"""
).replace("spdx", "SPDX")

assert create_header(spdx_info, template=template_simple) == expected
assert (
create_header(spdx_info, template=template_simple).strip() == expected
)


def test_create_header_template_no_spdx(template_no_spdx):
@@ -86,7 +88,7 @@ def test_create_header_template_commented(template_commented):
template=template_commented,
template_is_commented=True,
style=CCommentStyle,
)
).strip()
== expected
)

@@ -113,7 +115,7 @@ def test_create_header_already_contains_spdx():
"""
).replace("spdx", "SPDX")

assert create_header(spdx_info, header=existing) == expected
assert create_header(spdx_info, header=existing).strip() == expected


def test_create_header_existing_is_wrong():
@@ -139,8 +141,6 @@ def test_create_header_old_syntax():
existing = cleandoc(
"""
# Copyright John Doe

pass
"""
)
expected = cleandoc(
@@ -151,7 +151,30 @@ def test_create_header_old_syntax():
"""
).replace("spdx", "SPDX")

assert create_header(spdx_info, header=existing) == expected
assert create_header(spdx_info, header=existing).strip() == expected


def test_create_header_remove_fluff():
"""Any stuff that isn't SPDX info is removed when using create_header."""
spdx_info = SpdxInfo(set(["GPL-3.0-or-later"]), set())
existing = cleandoc(
"""
# spdx-FileCopyrightText: John Doe
#
# Hello, world!

pass
"""
).replace("spdx", "SPDX")
expected = cleandoc(
"""
# SPDX-FileCopyrightText: John Doe
#
# spdx-License-Identifier: GPL-3.0-or-later
"""
).replace("spdx", "SPDX")

assert create_header(spdx_info, header=existing).strip() == expected


def test_find_and_replace_no_header():
@@ -190,8 +213,8 @@ def test_find_and_replace_verbatim():


def test_find_and_replace_newline_before_header():
"""In a scenario where the header is not the first character in the file,
create a new header. It would be nice if this were handled more elegantly.
"""In a scenario where the header is preceded by whitespace, remove the
preceding whitespace.
"""
spdx_info = SpdxInfo(
set(["GPL-3.0-or-later"]), set(["SPDX" "-FileCopyrightText: Mary Sue"])
@@ -206,11 +229,46 @@ def test_find_and_replace_newline_before_header():
text = "\n" + text
expected = cleandoc(
"""
# spdx-FileCopyrightText: Jane Doe
# spdx-FileCopyrightText: Mary Sue
#
# spdx-License-Identifier: GPL-3.0-or-later

pass
"""
).replace("spdx", "SPDX")

assert find_and_replace_header(text, spdx_info) == expected


def test_find_and_replace_preserve_preceding():
"""When the SPDX header is in the middle of the file, keep it there."""
spdx_info = SpdxInfo(
set(["GPL-3.0-or-later"]), set(["SPDX" "-FileCopyrightText: Mary Sue"])
)
text = cleandoc(
"""
# Hello, world!

def foo(bar):
return bar

# spdx-FileCopyrightText: Jane Doe

pass
"""
).replace("spdx", "SPDX")
expected = cleandoc(
"""
# Hello, world!

def foo(bar):
return bar

# spdx-FileCopyrightText: Jane Doe
# spdx-FileCopyrightText: Mary Sue
#
# spdx-License-Identifier: GPL-3.0-or-later

pass
"""
@@ -229,6 +287,7 @@ def test_find_and_replace_keep_shebang():
text = cleandoc(
"""
#!/usr/bin/env python3

# spdx-FileCopyrightText: Jane Doe

pass
@@ -237,6 +296,7 @@ def test_find_and_replace_keep_shebang():
expected = cleandoc(
"""
#!/usr/bin/env python3

# spdx-FileCopyrightText: Jane Doe
# spdx-FileCopyrightText: Mary Sue
#
@@ -249,6 +309,64 @@ def test_find_and_replace_keep_shebang():
assert find_and_replace_header(text, spdx_info) == expected


def test_find_and_replace_separate_shebang():
"""When the shebang is part of the same comment as the SPDX comment,
separate the two.
"""
spdx_info = SpdxInfo(set(["GPL-3.0-or-later"]), set())
text = cleandoc(
"""
#!/usr/bin/env python3
#!nix-shell -p python3
# spdx-FileCopyrightText: Jane Doe

pass
"""
).replace("spdx", "SPDX")
expected = cleandoc(
"""
#!/usr/bin/env python3
#!nix-shell -p python3

# spdx-FileCopyrightText: Jane Doe
#
# spdx-License-Identifier: GPL-3.0-or-later

pass
"""
).replace("spdx", "SPDX")

assert find_and_replace_header(text, spdx_info) == expected


def test_find_and_replace_only_shebang():
"""When the file only contains a shebang, keep it at the top of the file.
"""
spdx_info = SpdxInfo(set(["GPL-3.0-or-later"]), set())
text = cleandoc(
"""
#!/usr/bin/env python3

# Hello, world!

pass
"""
)
expected = cleandoc(
"""
#!/usr/bin/env python3

# spdx-License-Identifier: GPL-3.0-or-later

# Hello, world!

pass
"""
).replace("spdx", "SPDX")

assert find_and_replace_header(text, spdx_info) == expected


def test_find_and_replace_keep_old_comment():
"""When encountering a comment that does not contain copyright and
licensing information, preserve it below the REUSE header.

Loading…
Cancel
Save