From 9022a871b53be8494e4456e5eb6e6e8a5b27cb37 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Fri, 4 Sep 2020 10:27:10 -0700
Subject: [PATCH 01/15] empty list should be initialized *before* it might be
 populated

---
 link_checker/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 2e6b629..8b8001a 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -459,9 +459,9 @@ def main():
     run_full_inspection = (
         no_parser_args or all_parser_args_but_no_subparser_args
     )
+    exit_status_list = []
     if run_sub_command:
         exit_status_list = args.func(args)
-    exit_status_list = []
     if args.legalcode and not all_parser_args_but_no_subparser_args:
         exit_status_list = check_legalcode(args)
     if args.deeds and not all_parser_args_but_no_subparser_args:

From 259e9dc1f39261aea5146b832a917c91cf4d663b Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Fri, 4 Sep 2020 17:37:45 -0700
Subject: [PATCH 02/15] refactored argument parser to only use subcommands and
 added additional subcommands

---
 link_checker/__main__.py | 505 ++++++++++++++++++++++-----------------
 link_checker/utils.py    |   9 +-
 2 files changed, 298 insertions(+), 216 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 8b8001a..60e97e7 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -27,7 +27,6 @@
     INFO,
     DEBUG,
 )
-
 from link_checker.utils import (
     CheckerError,
     get_legalcode,
@@ -47,129 +46,165 @@
 )
 
 
-def parse_argument(arguments):
+def parse_arguments():
     """parse arguments from cli
 
     Args:
         args (list): list of arguments parsed from command line
     """
-    # Setup argument parser
+
+    # Primary argument parser and sub-parser (for subcommands)
     parser = argparse.ArgumentParser(
-        prog="link_checker.py", description=__doc__
-    )
-    parser.add_argument(
-        "--legalcode",
-        help="Runs link_checker for legalcode only. (Note: --licenses is"
-        " deprecated and will be dropped from a future release. Please use"
-        " --legalcode instead.)",
-        action="store_true",
-    )
-    parser.add_argument(
-        "--deeds",
-        help="Runs link_checker for deeds only (the legalcode files will still"
-        " be scraped, but not checked for broken links)",
-        action="store_true",
-    )
-    parser.add_argument(
-        "--rdf", help="Runs link_checker for rdf only", action="store_true"
+        prog="link_checker",
+        description=__doc__,
+        formatter_class=argparse.RawTextHelpFormatter,
+        epilog="Also see the help output each subcommand",
     )
-    parser.add_argument(
-        "--index",
-        help="Runs link_checker for index.rdf only",
-        action="store_true",
-    )
-    parser.add_argument(
-        "--local",
-        help="Scrapes legalcode files from local file system",
-        action="store_true",
-    )
-    parser.add_argument(
-        "--output-errors",
-        help="Outputs all link errors to file (default: errorlog.txt) and"
-        " creates junit-xml type summary(test-summary/junit-xml-report.xml)",
-        metavar="output_file",
-        const="errorlog.txt",
-        nargs="?",
-        type=argparse.FileType("w", encoding="utf-8"),
+    subparsers = parser.add_subparsers(
+        title="subcommands (a single subcomamnd is required)",
+        dest="subcommand",
+        required=True,
     )
-    parser.add_argument(
+
+    # Shared Parsers
+
+    # Shared parser (optional arguments used by all subcommands)
+    parser_shared = argparse.ArgumentParser()
+    parser_shared.add_argument(
         "-q",
         "--quiet",
         action="append_const",
         const=10,
+        help="decrease verbosity (can be specified multiple times)",
         dest="verbosity",
-        help="Decrease verbosity. Can be specified multiple times.",
-    )
-    parser.add_argument(
-        "--root-url", help=f"Set root URL (default: {DEFAULT_ROOT_URL})",
     )
-    parser.add_argument(
+    parser_shared.add_argument(
         "-v",
         "--verbose",
         action="append_const",
         const=-10,
+        help="increase verbosity (can be specified multiple times)",
         dest="verbosity",
-        help="Increase verbosity. Can be specified multiple times.",
     )
-    # Sub-Parser Section
-    subparsers = parser.add_subparsers(help="sub-command help")
-    # legalcode section: link_checker legalcode -h
-    parser_legalcode = subparsers.add_parser(
-        "legalcode", help="legalcode help"
+    parser_shared.add_argument(
+        "--root-url",
+        default=DEFAULT_ROOT_URL,
+        help=f"set root URL (default: '{DEFAULT_ROOT_URL}')",
     )
-    parser_legalcode.add_argument(
+
+    # Shared licenses parser (optional arguments used by all license
+    # subcommands)
+    parser_shared_licenses = argparse.ArgumentParser(add_help=False)
+    parser_shared_licenses.add_argument(
         "--local",
-        help=(
-            "Scrapes legalcode files from local file system.\n"
-            "Add 'LICENSE_LOCAL_PATH' to your environment,\n"
-            "otherwise this tool will search for legalcode files\n"
-            f"in '{LICENSES_DIR}'."
-        ),
         action="store_true",
+        help="process local filesystem legalcode files to determine valid"
+        " license paths (uses LICENSE_LOCAL_PATH environment variable and"
+        f" falls back to default: '{LICENSES_DIR}')",
     )
-    parser_legalcode.set_defaults(func=check_legalcode)
-    # deeds section: link_checker deeds -h
-    parser_deeds = subparsers.add_parser("deeds", help="deeds help")
-    parser_deeds.add_argument(
-        "--local",
-        help=(
-            "Scrapes deed files based on the legalcode files "
-            "found on the local file system.\n"
-            "Add 'LICENSE_LOCAL_PATH' to your environment,\n"
-            "otherwise this tool will search for legalcode files\n"
-            f"in '{LICENSES_DIR}'."
-        ),
+
+    # Shared reporting parser (optional arguments used by all reporting
+    # subcommands)
+    parser_shared_reporting = argparse.ArgumentParser(add_help=False)
+    parser_shared_reporting.add_argument(
+        "--output-errors",
+        nargs="?",
+        const="errorlog.txt",
+        type=argparse.FileType("w", encoding="utf-8"),
+        help="output all link errors to file (default: errorlog.txt) and"
+        " create junit-xml type summary (test-summary/junit-xml-report.xml)",
+        metavar="output_file",
+    )
+
+    # Shared RDF parser (optional arguments used by all RDF subcommands)
+    parser_shared_rdf = argparse.ArgumentParser(add_help=False)
+    parser_shared_rdf.add_argument(
+        "--local-index",
         action="store_true",
+        help="process local filesystem index.rdf (uses INDEX_RDF_LOCAL_PATH"
+        " environment variable and falls back to default: './index.rdf')",
+    )
+
+    # Subcommands
+
+    # Deeds subcommand: link_checker deeds -h
+    parser_deeds = subparsers.add_parser(
+        "deeds",
+        add_help=False,
+        help="check the links for each license's deed",
+        parents=[
+            parser_shared,
+            parser_shared_licenses,
+            parser_shared_reporting,
+        ],
     )
     parser_deeds.set_defaults(func=check_deeds)
-    # rdf section: link_checker rdf -h
-    parser_rdf = subparsers.add_parser("rdf", help="rdf help")
-    parser_rdf.add_argument(
-        "--local",
-        help=(
-            "Scrapes rdf files based on the legalcode files "
-            "found on the local file system.\n"
-            "Add 'LICENSE_LOCAL_PATH' to your environment,\n"
-            "otherwise this tool will search for legalcode files\n"
-            f"in '{LICENSES_DIR}'."
-        ),
-        action="store_true",
+
+    # Legalcode subcommand: link_checker legalcode -h
+    parser_legalcode = subparsers.add_parser(
+        "legalcode",
+        add_help=False,
+        help="check the links for each license's legalcode",
+        parents=[
+            parser_shared,
+            parser_shared_licenses,
+            parser_shared_reporting,
+        ],
     )
-    parser_rdf.add_argument(
-        "--index",
-        help=(
-            "Checks index.rdf file instead of checking rdf files.\n"
-            "If you want to check the index.rdf file locally add\n"
-            "'INDEX_RDF_LOCAL_PATH' to your environment; otherwise this\n"
-            "variable defaults to './index.rdf'."
-        ),
-        action="store_true",
+    parser_legalcode.set_defaults(func=check_legalcode)
+
+    # RDF subcommand: link_checker rdf -h
+    parser_rdf = subparsers.add_parser(
+        "rdf",
+        add_help=False,
+        help="check the links for each license's RDF",
+        parents=[
+            parser_shared,
+            parser_shared_licenses,
+            parser_shared_rdf,
+            parser_shared_reporting,
+        ],
     )
     parser_rdf.set_defaults(func=check_rdfs)
 
-    args = parser.parse_args(arguments)
-    if args.root_url is None:
-        args.root_url = DEFAULT_ROOT_URL
+    # index.rdf subcommand: link_checker index -h
+    parser_index = subparsers.add_parser(
+        "index",
+        add_help=False,
+        help="check the links within index.rdf",
+        parents=[parser_shared, parser_shared_rdf, parser_shared_reporting],
+    )
+    parser_index.set_defaults(func=check_index_rdf)
+
+    # complete subcommand: link_checker complete -h
+    parser_complete = subparsers.add_parser(
+        "complete",
+        add_help=False,
+        help="Complete check (deeds, legalcode, rdf, and index)",
+        parents=[
+            parser_shared,
+            parser_shared_licenses,
+            parser_shared_rdf,
+            parser_shared_reporting,
+        ],
+    )
+    parser_complete.set_defaults(func=check_complete)
+
+    # Canonical License URLs subcommand: link_checker canonical -h
+    parser_canonical = subparsers.add_parser(
+        "canonical",
+        add_help=False,
+        help="print canonical license URLs",
+        parents=[parser_shared, parser_shared_licenses],
+    )
+    parser_canonical.set_defaults(func=print_canonical)
+    parser_canonical.add_argument(
+        "--include-gnu",
+        action="store_true",
+        help="include GNU licenses in addition to Creative Commons licenses",
+    )
+
+    args = parser.parse_args()
     args.log_level = WARNING
     if args.verbosity:
         for v in args.verbosity:
@@ -178,91 +213,14 @@ def parse_argument(arguments):
             args.log_level = DEBUG
         elif args.log_level > CRITICAL:
             args.log_level = CRITICAL
-    if not args.output_errors:
+    del args.verbosity
+    if "output_errors" not in args or not args.output_errors:
         args.output_errors = None
-    return args
-
-
-def check_legalcode(args):
-    print("\n\nChecking LegalCode License...\n\n")
-    license_names = get_legalcode(args)
-    if args.log_level <= INFO:
-        print("Number of files to be checked:", len(license_names))
-    errors_total = 0
-    exit_status = 0
-    for license_name in license_names:
-        caught_errors = 0
-        context_printed = False
-        filename = license_name[: -len(".html")]
-        base_url = create_base_link(args, filename)
-        context = f"\n\nChecking: {license_name}\nURL: {base_url}"
-        if args.local:
-            source_html = request_local_text(LICENSE_LOCAL_PATH, license_name)
-        else:
-            page_url = "{}{}".format(LICENSE_GITHUB_BASE, license_name)
-            source_html = request_text(page_url)
-        license_soup = BeautifulSoup(source_html, "lxml")
-        links_found = license_soup.find_all("a")
-        link_count = len(links_found)
-        if args.log_level <= INFO:
-            print(f"{context}\nNumber of links found: {link_count}")
-            context_printed = True
-        valid_anchors, valid_links, context_printed = get_scrapable_links(
-            args, base_url, links_found, context, context_printed
-        )
-        if valid_links:
-            memoized_results = get_memoized_result(valid_links, valid_anchors)
-            stored_links = memoized_results[0]
-            stored_anchors = memoized_results[1]
-            stored_result = memoized_results[2]
-            check_links = memoized_results[3]
-            check_anchors = memoized_results[4]
-            if check_links:
-                rs = (
-                    # Since we're only checking for validity, we can retreive
-                    # only the headers/metadata
-                    grequests.head(link, timeout=REQUESTS_TIMEOUT)
-                    for link in check_links
-                )
-                responses = list()
-                # Explicitly close connections to free up file handles and
-                # avoid Connection Errors per:
-                # https://stackoverflow.com/a/22839550
-                for response in grequests.map(
-                    rs, exception_handler=exception_handler
-                ):
-                    try:
-                        responses.append(response.status_code)
-                        response.close()
-                    except AttributeError:
-                        responses.append(response)
-                memoize_result(check_links, responses)
-                stored_anchors += check_anchors
-                stored_result += responses
-            stored_links += check_links
-            caught_errors = write_response(
-                args,
-                stored_links,
-                stored_result,
-                base_url,
-                license_name,
-                stored_anchors,
-                context,
-                context_printed,
-            )
-
-        if caught_errors:
-            errors_total += caught_errors
-            exit_status = 1
-
-    print("\nCompleted in: {}".format(time.time() - START_TIME))
 
-    if args.output_errors:
-        output_summary(args, license_names, errors_total)
-        print("\nError file present at: ", args.output_errors.name)
-        output_test_summary(errors_total)
+    if args.log_level == DEBUG:
+        print(f"DEBUG: args: {args}")
 
-    return [exit_status, 0, 0]
+    return args
 
 
 def check_deeds(args):
@@ -354,8 +312,90 @@ def check_deeds(args):
     return [0, exit_status, 0]
 
 
-def check_rdfs(args):
-    if args.index:
+def check_legalcode(args):
+    print("\n\nChecking LegalCode License...\n\n")
+    license_names = get_legalcode(args)
+    if args.log_level <= INFO:
+        print("Number of files to be checked:", len(license_names))
+    errors_total = 0
+    exit_status = 0
+    for license_name in license_names:
+        caught_errors = 0
+        context_printed = False
+        filename = license_name[: -len(".html")]
+        base_url = create_base_link(args, filename)
+        context = f"\n\nChecking: {license_name}\nURL: {base_url}"
+        if args.local:
+            source_html = request_local_text(LICENSE_LOCAL_PATH, license_name)
+        else:
+            page_url = "{}{}".format(LICENSE_GITHUB_BASE, license_name)
+            source_html = request_text(page_url)
+        license_soup = BeautifulSoup(source_html, "lxml")
+        links_found = license_soup.find_all("a")
+        link_count = len(links_found)
+        if args.log_level <= INFO:
+            print(f"{context}\nNumber of links found: {link_count}")
+            context_printed = True
+        valid_anchors, valid_links, context_printed = get_scrapable_links(
+            args, base_url, links_found, context, context_printed
+        )
+        if valid_links:
+            memoized_results = get_memoized_result(valid_links, valid_anchors)
+            stored_links = memoized_results[0]
+            stored_anchors = memoized_results[1]
+            stored_result = memoized_results[2]
+            check_links = memoized_results[3]
+            check_anchors = memoized_results[4]
+            if check_links:
+                rs = (
+                    # Since we're only checking for validity, we can retreive
+                    # only the headers/metadata
+                    grequests.head(link, timeout=REQUESTS_TIMEOUT)
+                    for link in check_links
+                )
+                responses = list()
+                # Explicitly close connections to free up file handles and
+                # avoid Connection Errors per:
+                # https://stackoverflow.com/a/22839550
+                for response in grequests.map(
+                    rs, exception_handler=exception_handler
+                ):
+                    try:
+                        responses.append(response.status_code)
+                        response.close()
+                    except AttributeError:
+                        responses.append(response)
+                memoize_result(check_links, responses)
+                stored_anchors += check_anchors
+                stored_result += responses
+            stored_links += check_links
+            caught_errors = write_response(
+                args,
+                stored_links,
+                stored_result,
+                base_url,
+                license_name,
+                stored_anchors,
+                context,
+                context_printed,
+            )
+
+        if caught_errors:
+            errors_total += caught_errors
+            exit_status = 1
+
+    print("\nCompleted in: {}".format(time.time() - START_TIME))
+
+    if args.output_errors:
+        output_summary(args, license_names, errors_total)
+        print("\nError file present at: ", args.output_errors.name)
+        output_test_summary(errors_total)
+
+    return [exit_status, 0, 0]
+
+
+def check_rdfs(args, index=False):
+    if index:
         print("\n\nChecking index.rdf...\n\n")
         rdf_obj_list = get_index_rdf(args)
     else:
@@ -446,44 +486,81 @@ def check_rdfs(args):
     return [0, 0, exit_status]
 
 
-def main():
-    args = parse_argument(sys.argv[1:])
-    args_dict = vars(args)
-    run_sub_command = args_dict.get("func", False)
-    no_parser_args = not any(
-        [args.legalcode, args.deeds, args.rdf, run_sub_command]
-    )
-    all_parser_args_but_no_subparser_args = (
-        all([args.legalcode, args.deeds, args.rdf]) and not run_sub_command
-    )
-    run_full_inspection = (
-        no_parser_args or all_parser_args_but_no_subparser_args
+def check_index_rdf(args):
+    exit_status_list = check_rdfs(args, index=True)
+    return exit_status_list
+
+
+def check_complete(args):
+    print(
+        "Running Full Inspection:"
+        " Checking links for LegalCode, Deeds, RDF, and index.rdf"
     )
-    exit_status_list = []
-    if run_sub_command:
-        exit_status_list = args.func(args)
-    if args.legalcode and not all_parser_args_but_no_subparser_args:
-        exit_status_list = check_legalcode(args)
-    if args.deeds and not all_parser_args_but_no_subparser_args:
-        exit_status_list = check_deeds(args)
-    if args.rdf and not all_parser_args_but_no_subparser_args:
-        exit_status_list = check_rdfs(args)
-    if run_full_inspection:
-        print(
-            "\nRunning Full Inspection:"
-            " Checking Links for LegalCode, Deed, RDF, and index.rdf files"
-        )
-        exit_status_legalcode, y, z = check_legalcode(args)
-        x, exit_status_deeds, z = check_deeds(args)
-        x, y, exit_status_rdf = check_rdfs(args)
-        args.index = True
-        x, y, exit_status_index_rdf = check_rdfs(args)
-        exit_status_list = [
-            exit_status_legalcode,
-            exit_status_deeds,
-            exit_status_rdf,
-            exit_status_index_rdf,
-        ]
+    exit_status_legalcode, _, _ = check_legalcode(args)
+    _, exit_status_deeds, _ = check_deeds(args)
+    _, _, exit_status_rdf = check_rdfs(args)
+    _, _, exit_status_index_rdf = check_rdfs(args, index=True)
+    exit_status_list = [
+        exit_status_legalcode,
+        exit_status_deeds,
+        exit_status_rdf,
+        exit_status_index_rdf,
+    ]
+    return exit_status_list
+
+
+def print_canonical(args):
+    license_names = get_legalcode(args)
+    grouped = [
+        set(),  # 0: by* 4.0 licenses
+        set(),  # 1: by* 3.0 licenses
+        set(),  # 2: by* 2.5 licenses
+        set(),  # 3: by* 2.1 licenses
+        set(),  # 4: by* 2.0 licenses
+        set(),  # 5: by* 1.x licenes
+        set(),  # 6: miscellanious licenses
+        set(),  # 7: zero 1.0 public domain
+        set(),  # 8: miscellanious public domain
+    ]
+    for license_name in license_names:
+        if not args.include_gnu:
+            testname = license_name.lower()
+            if testname.startswith("gpl") or testname.startswith("lgpl"):
+                continue
+        filename = license_name[: -len(".html")]
+        url = create_base_link(args, filename, for_canonical=True)
+        parts = url.split("/")
+        bystar_starts = ("by", "nc", "nd", "sa")
+        if parts[3] == "licenses" and parts[4].startswith(bystar_starts):
+            if parts[5].startswith("4"):
+                grouped[0].add(url)
+            elif parts[5].startswith("3"):
+                grouped[1].add(url)
+            elif parts[5] == "2.5":
+                grouped[2].add(url)
+            elif parts[5] == "2.1":
+                grouped[3].add(url)
+            elif parts[5] == "2.0":
+                grouped[4].add(url)
+            elif parts[5].startswith("1"):
+                grouped[5].add(url)
+            else:
+                grouped[6].add(url)
+        elif parts[3] == "publicdomain" and parts[4] == "zero":
+            grouped[7].add(url)
+        else:
+            grouped[8].add(url)
+    for urls in grouped:
+        urls = list(urls)
+        urls.sort()
+        for url in urls:
+            print(url)
+    return [0, 0, 0]
+
+
+def main():
+    args = parse_arguments()
+    exit_status_list = args.func(args)
     if 1 in exit_status_list:
         return sys.exit(1)
     return sys.exit(0)
diff --git a/link_checker/utils.py b/link_checker/utils.py
index 36e8a81..64a6886 100644
--- a/link_checker/utils.py
+++ b/link_checker/utils.py
@@ -397,7 +397,9 @@ def get_scrapable_links(
     return (valid_anchors, valid_links, context_printed)
 
 
-def create_base_link(args, filename, for_deeds=False, for_rdfs=False):
+def create_base_link(
+    args, filename, for_deeds=False, for_rdfs=False, for_canonical=False
+):
     """Generates base URL on which the license file will be displayed
 
     Args:
@@ -437,7 +439,10 @@ def create_base_link(args, filename, for_deeds=False, for_rdfs=False):
     if jurisdiction:
         url = posixpath.join(url, jurisdiction)
 
-    url = posixpath.join(url, legalcode)
+    if for_canonical:
+        url = posixpath.join(url, "")
+    else:
+        url = posixpath.join(url, legalcode)
     if for_deeds:
         url = get_url_from_legalcode_url(url)
     if for_rdfs:

From 20772ebe037405111235cf4e985b71b1ac0ad230 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Fri, 4 Sep 2020 21:44:55 -0700
Subject: [PATCH 03/15] updated variables for refactor and improve comments

---
 link_checker/__main__.py | 10 ++++------
 link_checker/utils.py    | 35 ++++++++++++++++++-----------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 60e97e7..94745fd 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -325,7 +325,7 @@ def check_legalcode(args):
         filename = license_name[: -len(".html")]
         base_url = create_base_link(args, filename)
         context = f"\n\nChecking: {license_name}\nURL: {base_url}"
-        if args.local:
+        if args.local_index:
             source_html = request_local_text(LICENSE_LOCAL_PATH, license_name)
         else:
             page_url = "{}{}".format(LICENSE_GITHUB_BASE, license_name)
@@ -402,7 +402,7 @@ def check_rdfs(args, index=False):
         print("\n\nChecking RDFs...\n\n")
         rdf_obj_list = get_rdf(args)
     if args.log_level <= INFO:
-        if not args.index:
+        if not index:
             print("Number of rdf files to be checked:", len(rdf_obj_list))
         else:
             print(
@@ -415,12 +415,10 @@ def check_rdfs(args, index=False):
         caught_errors = 0
         context_printed = False
         rdf_url = (
-            rdf_obj["rdf:about"]
-            if args.index
-            else f'{rdf_obj["rdf:about"]}rdf'
+            rdf_obj["rdf:about"] if index else f"{rdf_obj['rdf:about']}rdf"
         )
         links_found = get_links_from_rdf(rdf_obj)
-        checking = "URL" if not args.index else "RDF_ABOUT"
+        checking = "URL" if not index else "RDF_ABOUT"
         context = f"\n\nChecking: \n{checking}: {rdf_url}"
         link_count = len(links_found)
         if args.log_level <= INFO:
diff --git a/link_checker/utils.py b/link_checker/utils.py
index 64a6886..671a4ec 100644
--- a/link_checker/utils.py
+++ b/link_checker/utils.py
@@ -162,7 +162,7 @@ def get_local_legalcode():
 
 
 def get_rdf(args):
-    """Helper function that determines rdf urls
+    """Helper function that determines RDF urls
     from license_names found locally or on github and
     returns a list of valid rdf objects.
 
@@ -188,13 +188,13 @@ def get_rdf(args):
 
 
 def get_index_rdf(args, local_path=""):
-    """Determine if local rdf files or remote rdf files
-    should be parsed and call the appropriate function.
+    """Determine if local index.rdf file or remote index.rdf file
+    should be parsed and then call the appropriate function.
 
     Returns:
-        rdf_obj_list: list of rdf objects found in index.rdf
+        rdf_obj_list: list of RDF objects found in index.rdf
     """
-    if args.local:
+    if args.local_index:
         rdf_obj_list = get_local_index_rdf(local_path)
     else:
         rdf_obj_list = get_remote_index_rdf()
@@ -202,7 +202,7 @@ def get_index_rdf(args, local_path=""):
 
 
 def get_remote_index_rdf():
-    """This function reads rdfs found at
+    """This function reads RDFs found at
     https://creativecommons.org/licenses/index.rdf
 
     Returns:
@@ -220,12 +220,12 @@ def get_local_index_rdf(local_path=""):
     """This function reads from index.rdf stored locally
 
     Parameters:
-        local_path: path to rdf file. If not supplied
-        the INDEX_RDF_LOCAL_PATH constant is used
-        (which uses your environment or defaults to
-        "./index.rdf"; see constants.py)
+        local_path: path to index.rdf file. If not supplied
+                    the INDEX_RDF_LOCAL_PATH constant is used
+                    (which uses your environment or defaults to
+                    "./index.rdf"; see constants.py)
     Returns:
-        rdf_obj_list: list of rdf objects found in index.rdf
+        rdf_obj_list: list of RDF objects found in index.rdf
     """
     try:
         local_path = local_path or INDEX_RDF_LOCAL_PATH
@@ -244,11 +244,11 @@ def get_local_index_rdf(local_path=""):
 
 
 def get_links_from_rdf(rdf_obj):
-    """This function parses an rdf and returns links found
+    """This function parses an RDF and returns links found
     Parameters:
         rdf_obj: soup object
     Returns:
-        links_found: list of link dictionaries found in rdf soup object
+        links_found: list of link dictionaries found in RDF soup object
     """
     tags = rdf_obj.findChildren()
     links_found = []
@@ -326,8 +326,9 @@ def get_scrapable_links(
         links_found (list): List of all the links found in file
 
     Returns:
-        set: valid_anchors - list of all scrapable anchor tags
-                valid_links - list of all absolute scrapable links
+        list: valid_anchors - list of all scrapable anchor tags
+        list: valid_links - list of all absolute scrapable links
+        bool: context_printed
     """
     valid_links = []
     valid_anchors = []
@@ -344,7 +345,7 @@ def get_scrapable_links(
                     #     "  {:<24}{}".format("Skipping internal link ", link)
                     # )
                     continue
-                if href.startswith("mailto:"):
+                elif href.startswith("mailto:"):
                     # mailto links are valid, but out of scope
                     # No need to report non-issue (not actionable)
                     # warnings.append
@@ -375,7 +376,7 @@ def get_scrapable_links(
                 #     "  {:<24}{}".format("Skipping internal link ", link)
                 # )
                 continue
-            if href.startswith("mailto:"):
+            elif href.startswith("mailto:"):
                 # mailto links are valid, but out of scope
                 # No need to report non-issue (not actionable)
                 # warnings.append

From 3349907167c43d374e77a1ad3a209c2e6d355a81 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Fri, 4 Sep 2020 21:57:23 -0700
Subject: [PATCH 04/15] replaced duplicate print statements with single INFO
 print statement

---
 link_checker/__main__.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 94745fd..ba48181 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -302,7 +302,6 @@ def check_deeds(args):
                 errors_total += caught_errors
                 exit_status = 1
 
-    print("\nCompleted in: {}".format(time.time() - START_TIME))
 
     if args.output_errors:
         output_summary(args, license_names, errors_total)
@@ -384,8 +383,6 @@ def check_legalcode(args):
             errors_total += caught_errors
             exit_status = 1
 
-    print("\nCompleted in: {}".format(time.time() - START_TIME))
-
     if args.output_errors:
         output_summary(args, license_names, errors_total)
         print("\nError file present at: ", args.output_errors.name)
@@ -474,8 +471,6 @@ def check_rdfs(args, index=False):
             errors_total += caught_errors
             exit_status = 1
 
-    print("\nCompleted in: {}".format(time.time() - START_TIME))
-
     if args.output_errors:
         output_summary(args, rdf_obj_list, errors_total)
         print("\nError file present at: ", args.output_errors.name)
@@ -559,6 +554,9 @@ def print_canonical(args):
 def main():
     args = parse_arguments()
     exit_status_list = args.func(args)
+    if args.log_level <= INFO:
+        print()
+        print(f"Completed in: {time.time() - START_TIME:.2f} seconds")
     if 1 in exit_status_list:
         return sys.exit(1)
     return sys.exit(0)

From fd3be17208dabafad49781effe1992c327a9ed2f Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 09:48:43 -0700
Subject: [PATCH 05/15] refactored output and exit status handling

- output summaries now handled in a single location
- exit status handling refactored and simplified to match subcommand
  organization
---
 link_checker/__main__.py | 76 ++++++++++++++++++++--------------------
 link_checker/utils.py    | 37 +++++++++++++------
 2 files changed, 64 insertions(+), 49 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index ba48181..79d3e55 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -41,8 +41,7 @@
     exception_handler,
     memoize_result,
     write_response,
-    output_summary,
-    output_test_summary,
+    output_summaries,
 )
 
 
@@ -176,11 +175,11 @@ def parse_arguments():
     )
     parser_index.set_defaults(func=check_index_rdf)
 
-    # complete subcommand: link_checker complete -h
-    parser_complete = subparsers.add_parser(
-        "complete",
+    # combined subcommand: link_checker combined -h
+    parser_combined = subparsers.add_parser(
+        "combined",
         add_help=False,
-        help="Complete check (deeds, legalcode, rdf, and index)",
+        help="Combined check (deeds, legalcode, rdf, and index)",
         parents=[
             parser_shared,
             parser_shared_licenses,
@@ -188,7 +187,7 @@ def parse_arguments():
             parser_shared_reporting,
         ],
     )
-    parser_complete.set_defaults(func=check_complete)
+    parser_combined.set_defaults(func=check_combined)
 
     # Canonical License URLs subcommand: link_checker canonical -h
     parser_canonical = subparsers.add_parser(
@@ -302,13 +301,7 @@ def check_deeds(args):
                 errors_total += caught_errors
                 exit_status = 1
 
-
-    if args.output_errors:
-        output_summary(args, license_names, errors_total)
-        print("\nError file present at: ", args.output_errors.name)
-        output_test_summary(errors_total)
-
-    return [0, exit_status, 0]
+    return license_names, errors_total, exit_status
 
 
 def check_legalcode(args):
@@ -324,7 +317,7 @@ def check_legalcode(args):
         filename = license_name[: -len(".html")]
         base_url = create_base_link(args, filename)
         context = f"\n\nChecking: {license_name}\nURL: {base_url}"
-        if args.local_index:
+        if args.local:
             source_html = request_local_text(LICENSE_LOCAL_PATH, license_name)
         else:
             page_url = "{}{}".format(LICENSE_GITHUB_BASE, license_name)
@@ -383,12 +376,7 @@ def check_legalcode(args):
             errors_total += caught_errors
             exit_status = 1
 
-    if args.output_errors:
-        output_summary(args, license_names, errors_total)
-        print("\nError file present at: ", args.output_errors.name)
-        output_test_summary(errors_total)
-
-    return [exit_status, 0, 0]
+    return license_names, errors_total, exit_status
 
 
 def check_rdfs(args, index=False):
@@ -471,17 +459,12 @@ def check_rdfs(args, index=False):
             errors_total += caught_errors
             exit_status = 1
 
-    if args.output_errors:
-        output_summary(args, rdf_obj_list, errors_total)
-        print("\nError file present at: ", args.output_errors.name)
-        output_test_summary(errors_total)
-
-    return [0, 0, exit_status]
+    return rdf_obj_list, errors_total, exit_status
 
 
 def check_index_rdf(args):
     exit_status_list = check_rdfs(args, index=True)
-    return exit_status_list
+    return license_names, errors_total, exit_status_list
 
 
 def check_complete(args):
@@ -489,17 +472,35 @@ def check_complete(args):
         "Running Full Inspection:"
         " Checking links for LegalCode, Deeds, RDF, and index.rdf"
     )
-    exit_status_legalcode, _, _ = check_legalcode(args)
-    _, exit_status_deeds, _ = check_deeds(args)
-    _, _, exit_status_rdf = check_rdfs(args)
-    _, _, exit_status_index_rdf = check_rdfs(args, index=True)
+    license_names = []
+    errors_total = 0
+    exit_status = 0
+
+    names, total, exit_status_legalcode = check_legalcode(args)
+    license_names += names
+    errors_total += total
+
+    names, total, exit_status_deeds = check_deeds(args)
+    license_names += names
+    errors_total += total
+
+    names, total, exit_status_rdf = check_rdfs(args)
+    license_names += names
+    errors_total += total
+
+    names, total, exit_status_index_rdf = check_rdfs(args, index=True)
+    license_names += names
+    errors_total += total
+
     exit_status_list = [
         exit_status_legalcode,
         exit_status_deeds,
         exit_status_rdf,
         exit_status_index_rdf,
     ]
-    return exit_status_list
+    if 1 in exit_status_list:
+        exit_status = 1
+    return license_names, errors_total, exit_status
 
 
 def print_canonical(args):
@@ -548,18 +549,17 @@ def print_canonical(args):
         urls.sort()
         for url in urls:
             print(url)
-    return [0, 0, 0]
+    return [], 0, 0
 
 
 def main():
     args = parse_arguments()
-    exit_status_list = args.func(args)
+    license_names, errors_total, exit_status = args.func(args)
+    output_summaries(args, license_names, errors_total)
     if args.log_level <= INFO:
         print()
         print(f"Completed in: {time.time() - START_TIME:.2f} seconds")
-    if 1 in exit_status_list:
-        return sys.exit(1)
-    return sys.exit(0)
+    return sys.exit(exit_status)
 
 
 if __name__ == "__main__":
diff --git a/link_checker/utils.py b/link_checker/utils.py
index 671a4ec..a3514d7 100644
--- a/link_checker/utils.py
+++ b/link_checker/utils.py
@@ -338,7 +338,7 @@ def get_scrapable_links(
             try:
                 href = link["href"]
             except KeyError:
-                if href[0] == "#":
+                if href.startswith("#"):
                     # anchor links are valid, but out of scope
                     # No need to report non-issue (not actionable)
                     # warnings.append(
@@ -362,14 +362,22 @@ def get_scrapable_links(
                     try:
                         assert link["name"]
                         warnings.append(
-                            "  {:<24}{}".format("Anchor uses name", link)
+                            "  {:<24}{}".format("Anchor uses name",
+                            str(link).replace("\n", ""))
                         )
                     except:
                         warnings.append(
-                            "  {:<24}{}".format("Anchor w/o href or id", link)
+                            "  {:<24}{}".format("Anchor w/o href or id",
+                            str(link).replace("\n", ""))
                         )
                 continue
-            if href != "" and href[0] == "#":
+            if href == "":
+                warnings.append(
+                    "  {:<24}{}".format("Empty href",
+                    str(link).replace("\n", ""))
+                )
+                continue
+            elif href.startswith("#"):
                 # anchor links are valid, but out of scope
                 # No need to report non-issue (not actionable)
                 # warnings.append(
@@ -586,12 +594,10 @@ def write_response(
                     if not context_printed:
                         print(context)
                     print("Errors:")
-                output_write(
-                    args, "\n{}\nURL: {}".format(license_name, base_url)
-                )
-            result = "  {:<24}{}\n{}{}".format(
-                str(status), all_links[idx], " " * 26, valid_anchors[idx]
-            )
+                output_write(args, f"\n{license_name}\nURL: {base_url}")
+            link = all_links[idx]
+            anchor = str(valid_anchors[idx]).replace("\n", "").strip()
+            result = f"  {str(status):<24}{link}\n{'':<26}{anchor}"
             if args.log_level <= ERROR:
                 print(result)
             output_write(args, result)
@@ -619,7 +625,7 @@ def output_write(args, *args_, **kwargs):
         print(*args_, **kwargs)
 
 
-def output_summary(args, license_names, num_errors):
+def output_issues_summary(args, license_names, num_errors):
     """Prints short summary of broken links in the output error file
 
     Args:
@@ -661,3 +667,12 @@ def output_test_summary(errors_total):
             )
         ts = TestSuite("cc-link-checker", [test_case])
         to_xml_report_file(test_summary, [ts])
+
+
+def output_summaries(args, license_names, errors_total):
+    if not args.output_errors:
+        return
+    output_issues_summary(args, license_names, errors_total)
+    if args.log_level <= INFO:
+        print("\nOutput to error file: ", args.output_errors.name)
+    output_test_summary(errors_total)

From eae0a3a16d52dcd19fae49a132cbe1c78f101b3d Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 11:12:02 -0700
Subject: [PATCH 06/15] added --limit option and improved output

- added --limit option to reduce time required for testing and iteration
- replaced format strings with fstrings
- added additional DEBUG statements (seen with -vv)
---
 link_checker/__main__.py | 30 ++++++++++++++++++------------
 link_checker/utils.py    | 28 +++++++++++++++++++---------
 2 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 79d3e55..678f329 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -46,10 +46,10 @@
 
 
 def parse_arguments():
-    """parse arguments from cli
+    """parse arguments from CLI
 
     Args:
-        args (list): list of arguments parsed from command line
+        args (list): list of arguments parsed from command line interface
     """
 
     # Primary argument parser and sub-parser (for subcommands)
@@ -77,6 +77,17 @@ def parse_arguments():
         help="decrease verbosity (can be specified multiple times)",
         dest="verbosity",
     )
+    parser_shared.add_argument(
+        "--root-url",
+        default=DEFAULT_ROOT_URL,
+        help=f"set root URL (default: '{DEFAULT_ROOT_URL}')",
+    )
+    parser_shared.add_argument(
+        "--limit",
+        default=10,
+        type=int,
+        help=f"Limit check lists to specified integer (default: 10)",
+    )
     parser_shared.add_argument(
         "-v",
         "--verbose",
@@ -85,11 +96,6 @@ def parse_arguments():
         help="increase verbosity (can be specified multiple times)",
         dest="verbosity",
     )
-    parser_shared.add_argument(
-        "--root-url",
-        default=DEFAULT_ROOT_URL,
-        help=f"set root URL (default: '{DEFAULT_ROOT_URL}')",
-    )
 
     # Shared licenses parser (optional arguments used by all license
     # subcommands)
@@ -239,7 +245,7 @@ def check_deeds(args):
         # cc/engine/templates/legalcode/standard_deed.html
         # Scrapping the html found on the active site
         if deed_base_url:
-            context = f"\n\nChecking: \nURL: {deed_base_url}"
+            context = f"\n\nChecking: deed\nURL: {deed_base_url}"
             page_url = deed_base_url
             source_html = request_text(page_url)
             license_soup = BeautifulSoup(source_html, "lxml")
@@ -316,7 +322,7 @@ def check_legalcode(args):
         context_printed = False
         filename = license_name[: -len(".html")]
         base_url = create_base_link(args, filename)
-        context = f"\n\nChecking: {license_name}\nURL: {base_url}"
+        context = f"\n\nChecking: legalcode\nURL: {base_url}"
         if args.local:
             source_html = request_local_text(LICENSE_LOCAL_PATH, license_name)
         else:
@@ -388,10 +394,10 @@ def check_rdfs(args, index=False):
         rdf_obj_list = get_rdf(args)
     if args.log_level <= INFO:
         if not index:
-            print("Number of rdf files to be checked:", len(rdf_obj_list))
+            print("Number of RDF files to be checked:", len(rdf_obj_list))
         else:
             print(
-                "Number of rdf objects/sections to be checked in index.rdf:",
+                "Number of RDF objects/sections to be checked in index.rdf:",
                 len(rdf_obj_list),
             )
     errors_total = 0
@@ -467,7 +473,7 @@ def check_index_rdf(args):
     return license_names, errors_total, exit_status_list
 
 
-def check_complete(args):
+def check_combined(args):
     print(
         "Running Full Inspection:"
         " Checking links for LegalCode, Deeds, RDF, and index.rdf"
diff --git a/link_checker/utils.py b/link_checker/utils.py
index a3514d7..3756ae7 100644
--- a/link_checker/utils.py
+++ b/link_checker/utils.py
@@ -27,6 +27,8 @@
     TEST_ORDER,
     ERROR,
     WARNING,
+    INFO,
+    DEBUG,
 )
 
 
@@ -92,9 +94,15 @@ def get_legalcode(args):
         str[]: The list of license/deeds files found in the repository
     """
     if args.local:
+        if args.log_level == DEBUG:
+            print("DEBUG: processing local legacode files")
         license_names = get_local_legalcode()
     else:
+        if args.log_level == DEBUG:
+            print("DEBUG: processing GitHub legacode files")
         license_names = get_github_legalcode()
+    if args.limit and args.subcommand != "rdf":
+        license_names = license_names[0:args.limit]
     return license_names
 
 
@@ -175,8 +183,12 @@ def get_rdf(args):
     for license_name in license_names:
         filename = license_name[: -len(".html")]
         rdf_base_url = create_base_link(args, filename, for_rdfs=True)
+        if not rdf_base_url:
+            continue
         rdf_urls.append(rdf_base_url)
     unique_rdf_urls = list(set(rdf_urls))
+    if args.limit:
+        unique_rdf_urls = unique_rdf_urls[0:args.limit]
     for url in unique_rdf_urls:
         if url:
             page_text = request_text(url)
@@ -198,6 +210,8 @@ def get_index_rdf(args, local_path=""):
         rdf_obj_list = get_local_index_rdf(local_path)
     else:
         rdf_obj_list = get_remote_index_rdf()
+    if args.limit:
+        rdf_obj_list = rdf_obj_list[0:args.limit]
     return rdf_obj_list
 
 
@@ -353,6 +367,7 @@ def get_scrapable_links(
                     # )
                     continue
         else:
+            link_text = str(link).replace("\n", "")
             try:
                 href = link["href"]
             except KeyError:
@@ -362,20 +377,15 @@ def get_scrapable_links(
                     try:
                         assert link["name"]
                         warnings.append(
-                            "  {:<24}{}".format("Anchor uses name",
-                            str(link).replace("\n", ""))
+                            f"  {'Anchor uses name':<24}{link_text}"
                         )
                     except:
                         warnings.append(
-                            "  {:<24}{}".format("Anchor w/o href or id",
-                            str(link).replace("\n", ""))
+                            f"  {'Anchor w/o href or id':<24}{link_text}"
                         )
                 continue
             if href == "":
-                warnings.append(
-                    "  {:<24}{}".format("Empty href",
-                    str(link).replace("\n", ""))
-                )
+                warnings.append(f"  {'Empty href':<24}{link_text}")
                 continue
             elif href.startswith("#"):
                 # anchor links are valid, but out of scope
@@ -674,5 +684,5 @@ def output_summaries(args, license_names, errors_total):
         return
     output_issues_summary(args, license_names, errors_total)
     if args.log_level <= INFO:
-        print("\nOutput to error file: ", args.output_errors.name)
+        print("\nOutput to error file:", args.output_errors.name)
     output_test_summary(errors_total)

From 2ba2e755e133782381fe5c62fa9cadd7253233c6 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 11:17:32 -0700
Subject: [PATCH 07/15] black formatted and ignore flake8 E203 where it
 conflicts with black

---
 link_checker/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/link_checker/utils.py b/link_checker/utils.py
index 3756ae7..934c371 100644
--- a/link_checker/utils.py
+++ b/link_checker/utils.py
@@ -102,7 +102,7 @@ def get_legalcode(args):
             print("DEBUG: processing GitHub legacode files")
         license_names = get_github_legalcode()
     if args.limit and args.subcommand != "rdf":
-        license_names = license_names[0:args.limit]
+        license_names = license_names[0 : args.limit]  # noqa: E203
     return license_names
 
 
@@ -188,7 +188,7 @@ def get_rdf(args):
         rdf_urls.append(rdf_base_url)
     unique_rdf_urls = list(set(rdf_urls))
     if args.limit:
-        unique_rdf_urls = unique_rdf_urls[0:args.limit]
+        unique_rdf_urls = unique_rdf_urls[0 : args.limit]  # noqa: E203
     for url in unique_rdf_urls:
         if url:
             page_text = request_text(url)
@@ -211,7 +211,7 @@ def get_index_rdf(args, local_path=""):
     else:
         rdf_obj_list = get_remote_index_rdf()
     if args.limit:
-        rdf_obj_list = rdf_obj_list[0:args.limit]
+        rdf_obj_list = rdf_obj_list[0 : args.limit]  # noqa: E203
     return rdf_obj_list
 
 

From a2851a86ade4ede3e30606f9382b4ad97ba5cfd2 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 11:20:03 -0700
Subject: [PATCH 08/15] fixed flake8 errors and fixed check_index_rdf function

---
 link_checker/__main__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 678f329..cea7c08 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -86,7 +86,7 @@ def parse_arguments():
         "--limit",
         default=10,
         type=int,
-        help=f"Limit check lists to specified integer (default: 10)",
+        help="Limit check lists to specified integer (default: 10)",
     )
     parser_shared.add_argument(
         "-v",
@@ -469,8 +469,7 @@ def check_rdfs(args, index=False):
 
 
 def check_index_rdf(args):
-    exit_status_list = check_rdfs(args, index=True)
-    return license_names, errors_total, exit_status_list
+    return check_rdfs(args, index=True)
 
 
 def check_combined(args):

From 7664b65e0835a416af4723a07f473872fa0cc245 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 11:27:57 -0700
Subject: [PATCH 09/15] updated help text and TOC

---
 README.md | 202 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 147 insertions(+), 55 deletions(-)

diff --git a/README.md b/README.md
index e3591e5..563b5c4 100644
--- a/README.md
+++ b/README.md
@@ -13,12 +13,12 @@
     -   [User](#User)
     -   [Development](#Development)
 -   [Usage](#Usage)
-    -   [`-h` or `--help`](#-h-or---help)
-    -   [Default mode](#default-mode)
-    -   [`-q` or `--quiet`](#-q-or---quiet)
-    -   [`-v` or `--verbose`](#-v-or---verbose)
-    -   [`--output-error`](#--output-error)
-    -   [`--local`](#--local)
+    -   [deeds](#deeds)
+    -   [legalcode](#legalcode)
+    -   [rdf](#rdf)
+    -   [index](#index)
+    -   [combined](#combined)
+    -   [canonical](#canonical)
 -   [Integrating with CI](#Integrating-with-CI)
 -   [Unit Testing](#Unit-Testing)
 -   [Troubleshooting](#Troubleshooting)
@@ -84,92 +84,184 @@ environment and install dependencies
 ```shell
 pipenv run link_checker -h
 ```
-
 ```
-usage: link_checker.py [-h] [--legalcode] [--deeds] [--rdf] [--index] [--local]
-                       [--output-errors [output_file]] [-q] [--root-url ROOT_URL]
-                       [-v]
-                       {legalcode,deeds,rdf} ...
+usage: link_checker [-h] {deeds,legalcode,rdf,index,combined,canonical} ...
 
 Check for broken links in Creative Commons license deeds, legalcode, and rdf
 
-positional arguments:
-  {legalcode,deeds,rdf}
-                        sub-command help
-    legalcode           legalcode help
-    deeds               deeds help
-    rdf                 rdf help
+optional arguments:
+  -h, --help            show this help message and exit
+
+subcommands (a single subcomamnd is required):
+  {deeds,legalcode,rdf,index,combined,canonical}
+    deeds               check the links for each license's deed
+    legalcode           check the links for each license's legalcode
+    rdf                 check the links for each license's RDF
+    index               check the links within index.rdf
+    combined            Combined check (deeds, legalcode, rdf, and index)
+    canonical           print canonical license URLs
+
+Also see the help output each subcommand
+```
+
+
+### deeds
+
+```shell
+pipenv run link_checker deeds -h
+```
+```
+usage: link_checker deeds [-h] [-q] [--root-url ROOT_URL] [--limit LIMIT] [-v]
+                          [--local] [--output-errors [output_file]]
 
 optional arguments:
   -h, --help            show this help message and exit
-  --legalcode           Runs link_checker for legalcode only. (Note: --licenses is
-                        deprecated and will be dropped from a future release.
-                        Please use --legalcode instead.)
-  --deeds               Runs link_checker for deeds only (the legalcode files will
-                        still be scraped, but not checked for broken links)
-  --rdf                 Runs link_checker for rdf only
-  --index               Runs link_checker for index.rdf only
-  --local               Scrapes legalcode files from local file system
+  -q, --quiet           decrease verbosity (can be specified multiple times)
+  --root-url ROOT_URL   set root URL (default: 'https://creativecommons.org')
+  --limit LIMIT         Limit check lists to specified integer (default: 10)
+  -v, --verbose         increase verbosity (can be specified multiple times)
+  --local               process local filesystem legalcode files to determine
+                        valid license paths (uses LICENSE_LOCAL_PATH environment
+                        variable and falls back to default:
+                        '../creativecommons.org/docroot/legalcode')
   --output-errors [output_file]
-                        Outputs all link errors to file (default: errorlog.txt)
-                        and creates junit-xml type summary(test-summary/junit-xml-
+                        output all link errors to file (default: errorlog.txt) and
+                        create junit-xml type summary (test-summary/junit-xml-
                         report.xml)
-  -q, --quiet           Decrease verbosity. Can be specified multiple times.
-  --root-url ROOT_URL   Set root URL (default: https://creativecommons.org)
-  -v, --verbose         Increase verbosity. Can be specified multiple times.
 ```
 
+
 ### legalcode
 
 ```shell
 pipenv run link_checker legalcode -h
 ```
 ```
-usage: link_checker.py legalcode [-h] [--local]
+usage: link_checker legalcode [-h] [-q] [--root-url ROOT_URL] [--limit LIMIT] [-v]
+                              [--local] [--output-errors [output_file]]
 
 optional arguments:
-  -h, --help  show this help message and exit
-  --local     Scrapes legalcode files from local file system. Add
-              'LICENSE_LOCAL_PATH' to your environment, otherwise this tool will
-              search for legalcode files in
-              '../creativecommons.org/docroot/legalcode'.
+  -h, --help            show this help message and exit
+  -q, --quiet           decrease verbosity (can be specified multiple times)
+  --root-url ROOT_URL   set root URL (default: 'https://creativecommons.org')
+  --limit LIMIT         Limit check lists to specified integer (default: 10)
+  -v, --verbose         increase verbosity (can be specified multiple times)
+  --local               process local filesystem legalcode files to determine
+                        valid license paths (uses LICENSE_LOCAL_PATH environment
+                        variable and falls back to default:
+                        '../creativecommons.org/docroot/legalcode')
+  --output-errors [output_file]
+                        output all link errors to file (default: errorlog.txt) and
+                        create junit-xml type summary (test-summary/junit-xml-
+                        report.xml)
 ```
 
 
-### deeds
+### rdf
 
 ```shell
-pipenv run link_checker deeds -h
+pipenv run link_checker rdf -h
 ```
 ```
-usage: link_checker.py deeds [-h] [--local]
+usage: link_checker rdf [-h] [-q] [--root-url ROOT_URL] [--limit LIMIT] [-v]
+                        [--local] [--local-index] [--output-errors [output_file]]
 
 optional arguments:
-  -h, --help  show this help message and exit
-  --local     Scrapes deed files based on the legalcode files found on the local
-              file system. Add 'LICENSE_LOCAL_PATH' to your environment, otherwise
-              this tool will search for legalcode files in
-              '../creativecommons.org/docroot/legalcode'.
+  -h, --help            show this help message and exit
+  -q, --quiet           decrease verbosity (can be specified multiple times)
+  --root-url ROOT_URL   set root URL (default: 'https://creativecommons.org')
+  --limit LIMIT         Limit check lists to specified integer (default: 10)
+  -v, --verbose         increase verbosity (can be specified multiple times)
+  --local               process local filesystem legalcode files to determine
+                        valid license paths (uses LICENSE_LOCAL_PATH environment
+                        variable and falls back to default:
+                        '../creativecommons.org/docroot/legalcode')
+  --local-index         process local filesystem index.rdf (uses
+                        INDEX_RDF_LOCAL_PATH environment variable and falls back
+                        to default: './index.rdf')
+  --output-errors [output_file]
+                        output all link errors to file (default: errorlog.txt) and
+                        create junit-xml type summary (test-summary/junit-xml-
+                        report.xml)
 ```
 
 
-### rdf
+### index
 
 ```shell
-pipenv run link_checker rdf -h
+pipenv run link_checker index -h
+```
+```
+usage: link_checker index [-h] [-q] [--root-url ROOT_URL] [--limit LIMIT] [-v]
+                          [--local-index] [--output-errors [output_file]]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -q, --quiet           decrease verbosity (can be specified multiple times)
+  --root-url ROOT_URL   set root URL (default: 'https://creativecommons.org')
+  --limit LIMIT         Limit check lists to specified integer (default: 10)
+  -v, --verbose         increase verbosity (can be specified multiple times)
+  --local-index         process local filesystem index.rdf (uses
+                        INDEX_RDF_LOCAL_PATH environment variable and falls back
+                        to default: './index.rdf')
+  --output-errors [output_file]
+                        output all link errors to file (default: errorlog.txt) and
+                        create junit-xml type summary (test-summary/junit-xml-
+                        report.xml)
+```
+
+
+### combined
+
+```shell
+pipenv run link_checker combined -h
+```
+```
+usage: link_checker combined [-h] [-q] [--root-url ROOT_URL] [--limit LIMIT] [-v]
+                             [--local] [--local-index]
+                             [--output-errors [output_file]]
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -q, --quiet           decrease verbosity (can be specified multiple times)
+  --root-url ROOT_URL   set root URL (default: 'https://creativecommons.org')
+  --limit LIMIT         Limit check lists to specified integer (default: 10)
+  -v, --verbose         increase verbosity (can be specified multiple times)
+  --local               process local filesystem legalcode files to determine
+                        valid license paths (uses LICENSE_LOCAL_PATH environment
+                        variable and falls back to default:
+                        '../creativecommons.org/docroot/legalcode')
+  --local-index         process local filesystem index.rdf (uses
+                        INDEX_RDF_LOCAL_PATH environment variable and falls back
+                        to default: './index.rdf')
+  --output-errors [output_file]
+                        output all link errors to file (default: errorlog.txt) and
+                        create junit-xml type summary (test-summary/junit-xml-
+                        report.xml)
+```
+
+
+### canonical
+
+```shell
+pipenv run link_checker canonical -h
 ```
 ```
-usage: link_checker.py rdf [-h] [--local] [--index]
+usage: link_checker canonical [-h] [-q] [--root-url ROOT_URL] [--limit LIMIT] [-v]
+                              [--local] [--include-gnu]
 
 optional arguments:
-  -h, --help  show this help message and exit
-  --local     Scrapes rdf files based on the legalcode files found on the local
-              file system. Add 'LICENSE_LOCAL_PATH' to your environment, otherwise
-              this tool will search for legalcode files in
-              '../creativecommons.org/docroot/legalcode'.
-  --index     Checks index.rdf file instead of checking rdf files. If you want to
-              check the index.rdf file locally add 'INDEX_RDF_LOCAL_PATH' to your
-              environment; otherwise this variable defaults to './index.rdf'.
+  -h, --help           show this help message and exit
+  -q, --quiet          decrease verbosity (can be specified multiple times)
+  --root-url ROOT_URL  set root URL (default: 'https://creativecommons.org')
+  --limit LIMIT        Limit check lists to specified integer (default: 10)
+  -v, --verbose        increase verbosity (can be specified multiple times)
+  --local              process local filesystem legalcode files to determine valid
+                       license paths (uses LICENSE_LOCAL_PATH environment variable
+                       and falls back to default:
+                       '../creativecommons.org/docroot/legalcode')
+  --include-gnu        include GNU licenses in addition to Creative Commons
+                       licenses
 ```
 
 

From d7bd9eca5ec597c70b3be40b8b8d589d8cf31048 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 11:29:47 -0700
Subject: [PATCH 10/15] fixed --limit behavior (0 disables limit)

---
 link_checker/__main__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index cea7c08..9e40098 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -84,9 +84,9 @@ def parse_arguments():
     )
     parser_shared.add_argument(
         "--limit",
-        default=10,
+        default=0,
         type=int,
-        help="Limit check lists to specified integer (default: 10)",
+        help="Limit check lists to specified integer",
     )
     parser_shared.add_argument(
         "-v",

From 7cf54dc85f7478891174ad6e7eb8c7a3ecd9ef72 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 11:30:28 -0700
Subject: [PATCH 11/15] updated cannonical help text

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 563b5c4..03e47d7 100644
--- a/README.md
+++ b/README.md
@@ -254,7 +254,7 @@ optional arguments:
   -h, --help           show this help message and exit
   -q, --quiet          decrease verbosity (can be specified multiple times)
   --root-url ROOT_URL  set root URL (default: 'https://creativecommons.org')
-  --limit LIMIT        Limit check lists to specified integer (default: 10)
+  --limit LIMIT        Limit check lists to specified integer
   -v, --verbose        increase verbosity (can be specified multiple times)
   --local              process local filesystem legalcode files to determine valid
                        license paths (uses LICENSE_LOCAL_PATH environment variable

From 0d52a57a2cd82892bfd766685168e3477a74ae84 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 12:15:54 -0700
Subject: [PATCH 12/15] refactored link_checker tests to use subcommands and
 parsers

---
 link_checker/__main__.py                |   6 +-
 link_checker/tests/test_link_checker.py | 172 ++++++++++++++----------
 2 files changed, 103 insertions(+), 75 deletions(-)

diff --git a/link_checker/__main__.py b/link_checker/__main__.py
index 9e40098..38d5d89 100755
--- a/link_checker/__main__.py
+++ b/link_checker/__main__.py
@@ -45,7 +45,7 @@
 )
 
 
-def parse_arguments():
+def parse_arguments(arguments):
     """parse arguments from CLI
 
     Args:
@@ -209,7 +209,7 @@ def parse_arguments():
         help="include GNU licenses in addition to Creative Commons licenses",
     )
 
-    args = parser.parse_args()
+    args = parser.parse_args(arguments)
     args.log_level = WARNING
     if args.verbosity:
         for v in args.verbosity:
@@ -558,7 +558,7 @@ def print_canonical(args):
 
 
 def main():
-    args = parse_arguments()
+    args = parse_arguments(sys.argv[1:])
     license_names, errors_total, exit_status = args.func(args)
     output_summaries(args, license_names, errors_total)
     if args.log_level <= INFO:
diff --git a/link_checker/tests/test_link_checker.py b/link_checker/tests/test_link_checker.py
index d133d30..5ba5221 100644
--- a/link_checker/tests/test_link_checker.py
+++ b/link_checker/tests/test_link_checker.py
@@ -2,75 +2,103 @@
 from link_checker import __main__ as link_checker
 
 
-def test_parse_argument(tmpdir):
-    # Test default options
-    args = link_checker.parse_argument([])
-    assert args.log_level == 30
-    assert bool(args.output_errors) is False
-    assert args.local is False
-    assert args.root_url == "https://creativecommons.org"
-    # Test --licenses
-    args = link_checker.parse_argument(["--legalcode"])
-    assert args.legalcode is True
-    args = link_checker.parse_argument(["legalcode"])
-    assert args.func.__name__ == "check_legalcode"
-    args = link_checker.parse_argument(["legalcode", "--local"])
-    assert args.local is True
-    # Test --deeds
-    args = link_checker.parse_argument(["--deeds"])
-    assert args.deeds is True
-    args = link_checker.parse_argument(["deeds"])
-    assert args.func.__name__ == "check_deeds"
-    args = link_checker.parse_argument(["deeds", "--local"])
-    assert args.local is True
-    # Test --rdf
-    args = link_checker.parse_argument(["--rdf"])
-    assert args.rdf is True
-    args = link_checker.parse_argument(["rdf"])
-    assert args.func.__name__ == "check_rdfs"
-    args = link_checker.parse_argument(["rdf", "--index"])
-    assert args.index is True
-    args = link_checker.parse_argument(["rdf", "--local"])
-    assert args.local is True
-    # Test --index
-    args = link_checker.parse_argument(["--index"])
-    assert args.index is True
-    # Test --local
-    args = link_checker.parse_argument(["--local"])
-    assert args.local is True
-    # Test Logging Levels -q/--quiet
-    args = link_checker.parse_argument(["-q"])
-    assert args.log_level == 40
-    args = link_checker.parse_argument(["-qq"])
-    assert args.log_level == 50
-    args = link_checker.parse_argument(["-qqq"])
-    assert args.log_level == 50
-    args = link_checker.parse_argument(["-q", "--quiet"])
-    assert args.log_level == 50
-    # Test Logging Levels -v/--verbose
-    args = link_checker.parse_argument(["-v"])
-    assert args.log_level == 20
-    args = link_checker.parse_argument(["-vv"])
-    assert args.log_level == 10
-    args = link_checker.parse_argument(["-vvv"])
-    assert args.log_level == 10
-    args = link_checker.parse_argument(["-v", "--verbose"])
-    assert args.log_level == 10
-    # Test Logging Levels with both -v and -q
-    args = link_checker.parse_argument(["-vq"])
-    assert args.log_level == 30
-    args = link_checker.parse_argument(["-vvq"])
-    assert args.log_level == 20
-    args = link_checker.parse_argument(["-vqq"])
-    assert args.log_level == 40
-    # Test default value of --output-errors
-    args = link_checker.parse_argument(["--output-errors"])
-    assert bool(args.output_errors) is True
-    assert args.output_errors.name == "errorlog.txt"
-    # Test custom value of --output-errors
-    output_file = tmpdir.join("errorlog.txt")
-    args = link_checker.parse_argument(
-        ["--output-errors", output_file.strpath]
-    )
-    assert bool(args.output_errors) is True
-    assert args.output_errors.name == output_file.strpath
+def test_parser_shared():
+    subcmds = ["deeds", "legalcode", "rdf", "index", "combined", "canonical"]
+
+    # Test defaults
+    for subcmd in subcmds:
+        args = link_checker.parse_arguments([subcmd])
+        assert args.limit == 0
+        assert args.log_level == 30
+        assert args.root_url == "https://creativecommons.org"
+
+    # Test arguments
+    for subcmd in subcmds:
+        # Test --limit
+        args = link_checker.parse_arguments([subcmd, "--limit", "10"])
+        assert args.limit == 10
+        args = link_checker.parse_arguments([subcmd, "--limit=100"])
+        assert args.limit == 100
+        # Test Logging Levels -q/--quiet
+        args = link_checker.parse_arguments([subcmd, "-q"])
+        assert args.log_level == 40
+        args = link_checker.parse_arguments([subcmd, "-qq"])
+        assert args.log_level == 50
+        args = link_checker.parse_arguments([subcmd, "-qqq"])
+        assert args.log_level == 50
+        args = link_checker.parse_arguments([subcmd, "-q", "--quiet"])
+        assert args.log_level == 50
+        # Test Logging Levels -v/--verbose
+        args = link_checker.parse_arguments([subcmd, "-v"])
+        assert args.log_level == 20
+        args = link_checker.parse_arguments([subcmd, "-vv"])
+        assert args.log_level == 10
+        args = link_checker.parse_arguments([subcmd, "-vvv"])
+        assert args.log_level == 10
+        args = link_checker.parse_arguments([subcmd, "-v", "--verbose"])
+        assert args.log_level == 10
+        # Test Logging Levels with both -v and -q
+        args = link_checker.parse_arguments([subcmd, "-vq"])
+        assert args.log_level == 30
+        args = link_checker.parse_arguments([subcmd, "-vvq"])
+        assert args.log_level == 20
+        args = link_checker.parse_arguments([subcmd, "-vqq"])
+        assert args.log_level == 40
+        # Test --root-url
+        args = link_checker.parse_arguments(
+            [subcmd, "--root-url", "https://pytest.creativecommons.org"]
+        )
+        assert args.root_url == "https://pytest.creativecommons.org"
+
+
+def test_parser_shared_licenses():
+    subcmds = ["deeds", "legalcode", "rdf", "combined", "canonical"]
+
+    # Test defaults
+    for subcmd in subcmds:
+        args = link_checker.parse_arguments([subcmd])
+        assert args.local is False
+
+    # Test argumetns
+    for subcmd in subcmds:
+        # Test --local
+        args = link_checker.parse_arguments([subcmd, "--local"])
+        assert args.local is True
+
+
+def test_parser_shared_rdf():
+    subcmds = ["rdf", "index"]
+
+    # Test defaults
+    for subcmd in subcmds:
+        args = link_checker.parse_arguments([subcmd])
+        assert args.local_index is False
+
+    # Test argumetns
+    for subcmd in subcmds:
+        # Test --local
+        args = link_checker.parse_arguments([subcmd, "--local-index"])
+        assert args.local_index is True
+
+
+def test_parser_shared_reporting(tmpdir):
+    subcmds = ["deeds", "legalcode", "rdf", "index", "combined"]
+
+    # Test defaults
+    for subcmd in subcmds:
+        args = link_checker.parse_arguments([subcmd])
+        assert bool(args.output_errors) is False
+
+    # Test argumetns
+    for subcmd in subcmds:
+        # Test --output-errors with default value
+        args = link_checker.parse_arguments([subcmd, "--output-errors"])
+        assert bool(args.output_errors) is True
+        assert args.output_errors.name == "errorlog.txt"
+        # Test --output-errors with custom value
+        output_file = tmpdir.join("errorlog.txt")
+        args = link_checker.parse_arguments(
+            [subcmd, "--output-errors", output_file.strpath]
+        )
+        assert bool(args.output_errors) is True
+        assert args.output_errors.name == output_file.strpath

From f1c69c9666ec192f59a9ba60b5c70816f7add303 Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Sat, 5 Sep 2020 13:00:47 -0700
Subject: [PATCH 13/15] updated utils tests to use subcommands

---
 link_checker/tests/test_utils.py | 125 +++++++++++++++++--------------
 1 file changed, 67 insertions(+), 58 deletions(-)

diff --git a/link_checker/tests/test_utils.py b/link_checker/tests/test_utils.py
index deb020a..5687d0a 100644
--- a/link_checker/tests/test_utils.py
+++ b/link_checker/tests/test_utils.py
@@ -24,7 +24,7 @@
     map_links_file,
     memoize_result,
     write_response,
-    output_summary,
+    output_issues_summary,
     output_write,
     output_test_summary,
 )
@@ -43,70 +43,75 @@ def test_get_github_legalcode():
     assert len(all_links) > 0
 
 
+def id_generator(data):
+    id_list = []
+    for license in data:
+        id_list.append(license[0])
+    return id_list
+
+
 license_url_data = [
     # 2 part URL
-    (
+    [
         "by-nc-nd_2.0",
-        "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
         "https://creativecommons.org/licenses/by-nc-nd/2.0/",
+        "https://creativecommons.org/licenses/by-nc-nd/2.0/legalcode",
         "https://creativecommons.org/licenses/by-nc-nd/2.0/rdf",
-    ),
+    ],
     # 3 part URL
-    (
+    [
         "by-nc-nd_4.0_cs",
-        "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode.cs",
         "https://creativecommons.org/licenses/by-nc-nd/4.0/deed.cs",
+        "https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode.cs",
         "https://creativecommons.org/licenses/by-nc-nd/4.0/rdf",
-    ),
+    ],
     # 4 part URL
-    (
+    [
         "by-nc-nd_3.0_rs_sr-Latn",
+        "https://creativecommons.org/licenses/by-nc-nd/3.0/rs/",
         "https://creativecommons.org/licenses/by-nc-nd/3.0/rs/"
         "legalcode.sr-Latn",
-        "https://creativecommons.org/licenses/by-nc-nd/3.0/rs/",
         "https://creativecommons.org/licenses/by-nc-nd/3.0/rs/rdf",
-    ),
+    ],
     # Special case - samplingplus
-    (
+    [
         "samplingplus_1.0",
-        "https://creativecommons.org/licenses/sampling+/1.0/legalcode",
         "https://creativecommons.org/licenses/sampling+/1.0/",
+        "https://creativecommons.org/licenses/sampling+/1.0/legalcode",
         "https://creativecommons.org/licenses/sampling+/1.0/rdf",
-    ),
-    (
+    ],
+    [
         "samplingplus_1.0_br",
-        "https://creativecommons.org/licenses/sampling+/1.0/br/legalcode",
         "https://creativecommons.org/licenses/sampling+/1.0/br/",
+        "https://creativecommons.org/licenses/sampling+/1.0/br/legalcode",
         "https://creativecommons.org/licenses/sampling+/1.0/br/rdf",
-    ),
+    ],
     # Special case - CC0
-    (
+    [
         "zero_1.0",
-        "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
         "https://creativecommons.org/publicdomain/zero/1.0/",
+        "https://creativecommons.org/publicdomain/zero/1.0/legalcode",
         "https://creativecommons.org/publicdomain/zero/1.0/rdf",
-    ),
+    ],
 ]
 
 
-def id_generator(data):
-    id_list = []
-    for license in data:
-        id_list.append(license[0])
-    return id_list
-
-
 @pytest.mark.parametrize(
-    "filename, result, deed_result, rdf_result",
+    "filename, deed_result, legalcode_result, rdf_result",
     license_url_data,
     ids=id_generator(license_url_data),
 )
-def test_create_base_link(filename, result, deed_result, rdf_result):
-    args = link_checker.parse_argument([])
-    baseURL = create_base_link(args, filename)
-    assert baseURL == result
+def test_create_base_link(filename, deed_result, legalcode_result, rdf_result):
+    # deeds
+    args = link_checker.parse_arguments(["deeds"])
     baseURL = create_base_link(args, filename, for_deeds=True)
     assert baseURL == deed_result
+    # legalcode
+    args = link_checker.parse_arguments(["legalcode"])
+    baseURL = create_base_link(args, filename)
+    assert baseURL == legalcode_result
+    # rdf
+    args = link_checker.parse_arguments(["rdf"])
     baseURL = create_base_link(args, filename, for_rdfs=True)
     assert baseURL == rdf_result
 
@@ -114,19 +119,19 @@ def test_create_base_link(filename, result, deed_result, rdf_result):
 def test_output_write(tmpdir):
     # output_errors is set and written to
     output_file = tmpdir.join("errorlog.txt")
-    args = link_checker.parse_argument(
-        ["--output-errors", output_file.strpath]
+    args = link_checker.parse_arguments(
+        ["deeds", "--output-errors", output_file.strpath]
     )
     output_write(args, "Output enabled")
     args.output_errors.flush()
     assert output_file.read() == "Output enabled\n"
 
 
-def test_output_summary(reset_global, tmpdir):
+def test_output_issues_summary(reset_global, tmpdir):
     # output_errors is set and written to
     output_file = tmpdir.join("errorlog.txt")
-    args = link_checker.parse_argument(
-        ["--output-errors", output_file.strpath]
+    args = link_checker.parse_arguments(
+        ["deeds", "--output-errors", output_file.strpath]
     )
     utils.MAP_BROKEN_LINKS = {
         "https://link1.demo": [
@@ -136,7 +141,7 @@ def test_output_summary(reset_global, tmpdir):
         "https://link2.demo": ["https://file4.url/here"],
     }
     all_links = ["some link"] * 5
-    output_summary(args, all_links, 3)
+    output_issues_summary(args, all_links, 3)
     args.output_errors.flush()
     lines = output_file.readlines()
     i = 0
@@ -198,7 +203,7 @@ def test_create_absolute_link(link, result):
 
 
 def test_get_scrapable_links():
-    args = link_checker.parse_argument([])
+    args = link_checker.parse_arguments(["deeds"])
     test_file = (
         "<a name='hello'>without href</a>,"
         " <a href='#hello'>internal link</a>,"
@@ -221,7 +226,7 @@ def test_get_scrapable_links():
         == "['https://creativecommons.ca', 'https://www.demourl.com/index']"
     )
     # Testing RDF
-    args = link_checker.parse_argument(["--local"])
+    args = link_checker.parse_arguments(["index", "--local-index"])
     rdf_obj_list = get_index_rdf(
         args, local_path=constants.TEST_RDF_LOCAL_PATH
     )
@@ -231,7 +236,7 @@ def test_get_scrapable_links():
     valid_anchors, valid_links, _ = get_scrapable_links(
         args, base_url, links_found, None, False, rdf=True,
     )
-    assert str(valid_anchors) == (
+    expected_anchors = (
         "[<cc:permits "
         'rdf:resource="http://creativecommons.org/ns#DerivativeWorks"/>, '
         "<cc:permits "
@@ -264,22 +269,26 @@ def test_get_scrapable_links():
         "<cc:requires "
         'rdf:resource="http://creativecommons.org/ns#Notice"/>]'
     )
-    assert str(valid_links) == (
-        "['http://creativecommons.org/ns#DerivativeWorks', "
-        "'http://creativecommons.org/ns#Reproduction', "
-        "'http://creativecommons.org/ns#Distribution', "
-        "'http://creativecommons.org/international/ch/', "
-        "'https://i.creativecommons.org/l/by-nc-sa/2.5/ch/88x31.png', "
-        "'https://i.creativecommons.org/l/by-nc-sa/2.5/ch/80x15.png', "
-        "'http://creativecommons.org/licenses/by-nc-sa/2.5/ch/legalcode.de', "
-        "'http://creativecommons.org/licenses/by-nc-sa/2.5/', "
-        "'http://creativecommons.org', "
-        "'http://creativecommons.org/ns#CommercialUse', "
-        "'http://creativecommons.org/license/', "
-        "'http://creativecommons.org/ns#ShareAlike', "
-        "'http://creativecommons.org/ns#Attribution', "
-        "'http://creativecommons.org/ns#Notice']"
-    )
+    assert str(valid_anchors) == expected_anchors
+    valid_links.sort()
+    expected_links = [
+        "http://creativecommons.org",
+        "http://creativecommons.org/international/ch/",
+        "http://creativecommons.org/license/",
+        "http://creativecommons.org/licenses/by-nc-sa/2.5/",
+        "http://creativecommons.org/licenses/by-nc-sa/2.5/ch/legalcode.de",
+        "http://creativecommons.org/ns#Attribution",
+        "http://creativecommons.org/ns#CommercialUse",
+        "http://creativecommons.org/ns#DerivativeWorks",
+        "http://creativecommons.org/ns#Distribution",
+        "http://creativecommons.org/ns#Notice",
+        "http://creativecommons.org/ns#Reproduction",
+        "http://creativecommons.org/ns#ShareAlike",
+        "https://i.creativecommons.org/l/by-nc-sa/2.5/ch/80x15.png",
+        "https://i.creativecommons.org/l/by-nc-sa/2.5/ch/88x31.png",
+    ]
+    expected_links.sort()
+    assert valid_links == expected_links
 
 
 def test_exception_handler():
@@ -307,8 +316,8 @@ def test_map_links_file(reset_global):
 def test_write_response(tmpdir):
     # Set config
     output_file = tmpdir.join("errorlog.txt")
-    args = link_checker.parse_argument(
-        ["--output-errors", output_file.strpath]
+    args = link_checker.parse_arguments(
+        ["deeds", "--output-errors", output_file.strpath]
     )
 
     # Text to extract valid_anchors

From 3bfca6a66bbc535d4ff9ec8f21c0575ea5460b0b Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Tue, 8 Sep 2020 07:44:03 -0700
Subject: [PATCH 14/15] fixed typo/spelling error

---
 link_checker/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/link_checker/utils.py b/link_checker/utils.py
index 934c371..63d598e 100644
--- a/link_checker/utils.py
+++ b/link_checker/utils.py
@@ -95,11 +95,11 @@ def get_legalcode(args):
     """
     if args.local:
         if args.log_level == DEBUG:
-            print("DEBUG: processing local legacode files")
+            print("DEBUG: processing local legalcode files")
         license_names = get_local_legalcode()
     else:
         if args.log_level == DEBUG:
-            print("DEBUG: processing GitHub legacode files")
+            print("DEBUG: processing GitHub legalcode files")
         license_names = get_github_legalcode()
     if args.limit and args.subcommand != "rdf":
         license_names = license_names[0 : args.limit]  # noqa: E203

From 24261736b24dd338f4b63625c663cd1da47b3e4c Mon Sep 17 00:00:00 2001
From: Timid Robot Zehta <timid@creativecommons.org>
Date: Wed, 9 Sep 2020 08:35:54 -0700
Subject: [PATCH 15/15] Update link_checker/tests/test_link_checker.py

spelling correction

Co-authored-by: Alden S Page <alden@creativecommons.org>
---
 link_checker/tests/test_link_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/link_checker/tests/test_link_checker.py b/link_checker/tests/test_link_checker.py
index 5ba5221..810dadf 100644
--- a/link_checker/tests/test_link_checker.py
+++ b/link_checker/tests/test_link_checker.py
@@ -89,7 +89,7 @@ def test_parser_shared_reporting(tmpdir):
         args = link_checker.parse_arguments([subcmd])
         assert bool(args.output_errors) is False
 
-    # Test argumetns
+    # Test arguments
     for subcmd in subcmds:
         # Test --output-errors with default value
         args = link_checker.parse_arguments([subcmd, "--output-errors"])