Skip to content

Commit

Permalink
insert-license: adding support for ISO-8859-1 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas-C committed Feb 23, 2022
1 parent b4465b8 commit 8e21851
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 5 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ repos:
- id: forbid-crlf
- id: remove-crlf
- id: forbid-tabs
exclude: tests/resources/main_with_license.cpp
exclude: tests/resources/main.*_with_license.cpp
- id: remove-tabs
exclude: tests/resources/main_with_license.cpp
exclude: tests/resources/main.*_with_license.cpp
- repo: git://github.com/pre-commit/pre-commit-hooks
rev: v4.1.0
hooks:
- id: trailing-whitespace
files: ''
exclude: tests/resources/main_with_license.cpp
exclude: tests/resources/main.*_with_license.cpp
- id: check-yaml
- repo: git://github.com/pre-commit/mirrors-pylint
rev: v3.0.0a4
Expand Down
15 changes: 13 additions & 2 deletions pre_commit_hooks/insert_license.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ def process_files(args, changed_files, todo_files, license_info):
:return: True if some files were changed or t.o.d.o is detected
"""
for src_filepath in args.filenames:
with open(src_filepath, encoding='utf8') as src_file:
src_file_content = src_file.readlines()
src_file_content = _read_file_content(src_filepath)
if skip_license_insert_found(
src_file_content=src_file_content,
skip_license_insertion_comment=args.skip_license_insertion_comment,
Expand Down Expand Up @@ -161,6 +160,18 @@ def process_files(args, changed_files, todo_files, license_info):
return changed_files or todo_files


def _read_file_content(src_filepath):
last_error = None
for encoding in ('utf8', 'ISO-8859-1'): # we could use the chardet library to support more encodings
try:
with open(src_filepath, encoding=encoding) as src_file:
return src_file.readlines()
except UnicodeDecodeError as error:
last_error = error
print("Error while processing: {} - file encoding is probably not supported".format(src_filepath))
raise last_error


def license_not_found(remove_header, license_info, src_file_content, src_filepath):
"""
Executed when license is not found. It either adds license if remove_header is False,
Expand Down
1 change: 1 addition & 0 deletions tests/insert_license_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
('module_with_license_todo.css', '/*| *| */', None, True),
('main_without_license.cpp', '/*|\t| */', 'main_with_license.cpp', True),
('main_iso8859_without_license.cpp', '/*|\t| */', 'main_iso8859_with_license.cpp', True),
),
)),
)
Expand Down
11 changes: 11 additions & 0 deletions tests/resources/main_iso8859_with_license.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/*
Copyright (C) 2017 Teela O'Malley
Licensed under the Apache License, Version 2.0 (the "License");
*/

// ISO-8859 character : æ
int main(int argc, char *argv[])
{
return 0;
}
5 changes: 5 additions & 0 deletions tests/resources/main_iso8859_without_license.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// ISO-8859 character : æ
int main(int argc, char *argv[])
{
return 0;
}

0 comments on commit 8e21851

Please sign in to comment.