#!/usr/bin/env python3

# Copyright (C) 2023-2025 Free Software Foundation, Inc.
#
# Script to regenerate attr-urls.def from generated HTML.
#
# This file is part of GCC.
#
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
#
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3.  If not see
# <http://www.gnu.org/licenses/>.  */

DESCRIPTION = """
Parses the generated HTML (from "make html") to locate anchors
for attributes, and generates a gcc/attr-urls.def file in the source tree,
giving URLs for each attribute, where it can.

Usage (from build/gcc subdirectory):
  ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src

To run unit tests:
  ../../src/gcc/regenerate-attr-urls.py HTML/gcc-15.0.0/ ../../src --unit-test
"""

import argparse
import json
import os
from pathlib import Path
from pprint import pprint
import sys
import re
import unittest

class Index:
    def __init__(self):
        self.entries = []
        self.entries_by_kind = {}

    def add_entry(self, url_suffix, name, kind, extra_text, verbose=False):
        #if len(self.entries) > 5:
        #    return
        self.entries.append( (url_suffix, name, kind, extra_text) )

        if kind in self.entries_by_kind:
            by_kind = self.entries_by_kind[kind]
        else:
            by_kind = []
            self.entries_by_kind[kind] = by_kind
        by_kind.append( (name, url_suffix, extra_text) )

    def parse_attribute_index(self, input_filename, verbose=False):
        with open(input_filename) as f:
            for line in f:
                self.parse_html_line_attribute_index(line, verbose)

    def parse_html_line_attribute_index(self, line, verbose=False):
        if verbose:
            print(repr(line))

        # Update for this in the GCC website's bin/preprocess process_html_file:
        #   | sed -e 's/_002d/-/g' -e 's/_002a/*/g' \
        line = line.replace('_002d', '-')
        line = line.replace('_002a', '*')

        # e.g. <a href="Common-Function-Attributes.html#index-access-function-attribute"><code>access</code> function attribute</a>
        # e.g. <a href="MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS"><code class="code">nocompression</code> function attribute, MIPS</a>
        m = re.search(r'<a href="([\S]+)"><code[^>]*>([\S]+)</code> (\S+) attribute([^<]*)</a>', line)
        if not m:
            return
        if verbose:
            print(m.groups())

        url_suffix, name, kind, extra_text = m.groups()

        if extra_text.startswith(', '):
            extra_text = extra_text[2:]

        # Reject anchors where the name contains a paren
        # e.g. 'target(&quot;3dnowa&quot;)':
        if '(' in name:
            return

        self.add_entry(url_suffix, name, kind, extra_text)

    def generate_file(self, dstpath):
        with open(dstpath, 'w') as outf:
            self.write_file(outf)

    def write_file(self, outf):
        outf.write("/* Autogenerated by regenerate-attr-urls.py.  */\n\n")

        for kind in sorted(self.entries_by_kind.keys()):
            by_kind = self.entries_by_kind[kind]
            outf.write("const attr_url_entry %s_attrs[] = {\n" % kind)
            for name, url_suffix, extra_text in sorted(self.entries_by_kind[kind]):
                outf.write(' { "%s", "gcc/%s", "%s", %i},\n'
                           % (name, url_suffix, extra_text, len(name)))
            outf.write("};\n\n")

        outf.write('static const struct attr_url_table {\n')
        outf.write('  const attr_url_entry *m_table;\n')
        outf.write('  const size_t m_table_sz;\n')
        outf.write('} attr_url_tables[] = {\n')
        for kind in sorted(self.entries_by_kind.keys()):
            outf.write("  { %s_attrs, ARRAY_SIZE (%s_attrs) },\n" % (kind, kind))
        outf.write("};\n")

INDEX_REL_PATH = 'gcc/Concept-and-Symbol-Index.html'

class TestParsingIndex(unittest.TestCase):
    def test_function_attribute(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="Common-Function-Attributes.html#index-access-function-attribute"><code>access</code> function attribute</a>')
        self.assertEqual(index.entries, [('Common-Function-Attributes.html#index-access-function-attribute',
                                          'access',
                                          'function',
                                          '')])

    def test_function_attribute_with_target(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS"><code class="code">nocompression</code> function attribute, MIPS</a>')
        self.assertEqual(index.entries, [('MIPS-Function-Attributes.html#index-nocompression-function-attribute_002c-MIPS',
                                          'nocompression',
                                          'function',
                                          'MIPS')])

    def test_reject_parens(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="x86-Function-Attributes.html#index-target_0028_00223dnow_0022_0029-function-attribute_002c-x86"><code>target(&quot;3dnow&quot;)</code> function attribute, x86</a>')
        self.assertEqual(len(index.entries), 0)

    def test_type_attribute(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="Common-Type-Attributes.html#index-aligned-type-attribute"><code>aligned</code> type attribute</a>')
        self.assertEqual(index.entries, [('Common-Type-Attributes.html#index-aligned-type-attribute',
                                          'aligned',
                                          'type',
                                          '')])

    def test_enumerator_attribute(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="Enumerator-Attributes.html#index-deprecated-enumerator-attribute"><code>deprecated</code> enumerator attribute</a>')
        self.assertEqual(index.entries, [('Enumerator-Attributes.html#index-deprecated-enumerator-attribute',
                                          'deprecated',
                                          'enumerator',
                                          '')])
    def test_label_attribute(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="Label-Attributes.html#index-cold-label-attribute"><code>cold</code> label attribute</a>')
        self.assertEqual(index.entries, [('Label-Attributes.html#index-cold-label-attribute',
                                          'cold',
                                          'label',
                                          '')])

    def test_statement_attribute(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="Statement-Attributes.html#index-assume-statement-attribute"><code>assume</code> statement attribute</a>')
        self.assertEqual(index.entries, [('Statement-Attributes.html#index-assume-statement-attribute',
                                          'assume',
                                          'statement',
                                          '')])

    def test_variable_attribute(self):
        index = Index()
        index.parse_html_line_attribute_index('<a href="AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR"><code>absdata</code> variable attribute, AVR</a>')
        self.assertEqual(index.entries, [('AVR-Variable-Attributes.html#index-absdata-variable-attribute_002c-AVR',
                                          'absdata',
                                          'variable',
                                          'AVR')])

    def test_parse_attribute_index(self):
        index = Index()
        index.parse_attribute_index(INPUT_HTML_PATH / INDEX_REL_PATH)
        self.assertEqual(index.entries_by_kind['enumerator'][0],
                         ('deprecated',
                          'Enumerator-Attributes.html#index-deprecated-enumerator-attribute',
                          ''))
        self.assertEqual(index.entries_by_kind['label'][0],
                         ('cold', 'Label-Attributes.html#index-cold-label-attribute', ''))

def main(args):
    index = Index()
    index.parse_attribute_index(args.base_html_dir / INDEX_REL_PATH)
    dstpath = args.src_gcc_dir / 'gcc' / 'attr-urls.def'
    index.generate_file(dstpath)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=DESCRIPTION,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('base_html_dir', type=Path)
    parser.add_argument('src_gcc_dir', type=Path)
    parser.add_argument('--unit-test', action='store_true')
    args = parser.parse_args()

    if args.unit_test:
        INPUT_HTML_PATH = args.base_html_dir
        unittest.main(argv=[sys.argv[0], '-v'])
    else:
        main(args)
