3 ### ===========================================================================
4 ### Licensed under the Apache License, Version 2.0 (the "License");
5 ### you may not use this file except in compliance with the License.
6 ### You may obtain a copy of the License at
8 ### http://www.apache.org/licenses/LICENSE-2.0
10 ### Unless required by applicable law or agreed to in writing, software
11 ### distributed under the License is distributed on an "AS IS" BASIS,
12 ### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ### See the License for the specific language governing permissions and
14 ### limitations under the License.
16 ### Copyright (C) 2021 Deutsche Telekom AG
17 ### ============LICENSE_END====================================================
22 # Thomas Kulik, Deutsche Telekom AG, 2021
24 # Processes a list of rst files and retrieves the first title for every single rst file.
25 # Copy program to {branch} directory of cloned ONAP documentation and run it.
27 # python3 getrsttitle.py filename
30 # https://regex101.com/r/YNYK2Q/1/
31 # https://stackoverflow.com/questions/20312443/how-to-find-title-a-la-restructuredtext
43 parser = argparse.ArgumentParser(description='Processes a list of rst files and retrieves the first title for every single rst file.')
44 parser.add_argument('filename')
45 args = parser.parse_args()
47 # regex to find title underlined with various characters
48 #regex1 = r"(?:^|\n)(?!\=)([^\n\r]+)\r?\n(\=+)(?:\r?\n| *$)"
49 #regex2 = r"(?:^|\n)(?!\-)([^\n\r]+)\r?\n(\-+)(?:\r?\n| *$)"
50 #regex3 = r"(?:^|\n)(?!\~)([^\n\r]+)\r?\n(\~+)(?:\r?\n| *$)"
51 #regex4 = r"(?:^|\n)(?!\#)([^\n\r]+)\r?\n(\#+)(?:\r?\n| *$)"
52 #regex5 = r"(?:^|\n)(?!\*)([^\n\r]+)\r?\n(\*+)(?:\r?\n| *$)"
54 # there is a problem with raw strings (r"...") in the regex search below
55 # workaround: using \\ to mask special characters in regex
57 "(?:^|\\n)(?!\\=)([^\\n\\r]+)\\r?\\n(\\=+)(?:\\r?\\n| *$)",
58 "(?:^|\\n)(?!\\-)([^\\n\\r]+)\\r?\\n(\\-+)(?:\\r?\\n| *$)",
59 "(?:^|\\n)(?!\\~)([^\\n\\r]+)\\r?\\n(\\~+)(?:\\r?\\n| *$)",
60 "(?:^|\\n)(?!\\#)([^\\n\\r]+)\\r?\\n(\\#+)(?:\\r?\\n| *$)",
61 "(?:^|\\n)(?!\\*)([^\\n\\r]+)\\r?\\n(\\*+)(?:\\r?\\n| *$)",
65 #for regex in regex_list:
68 #filename = './master_indexrst_docs_root.log'
69 #filename = './master_rstfiles.log'
71 if os.path.isfile(args.filename):
72 with open(args.filename) as fn:
75 #print("DBUG: line={}".format(line))
78 rstfile = "./" + re.sub('\[|\]', '', line).strip()
79 repository_tmp1 = re.sub('\].+$', '',line).strip()
80 repository = re.sub('\[', '',repository_tmp1).strip()
81 project_tmp1 = re.sub('\].+$', '',line).strip()
82 project_tmp2 = re.sub('\/.+$', '',project_tmp1).strip()
83 project = re.sub('\[', '',project_tmp2).strip()
84 #print("DBUG: file #{} {}".format(file_cnt, rstfile))
85 #print("DBUG: repository #{} {}".format(file_cnt, repository))
86 #print("DBUG: project #{} {}".format(file_cnt, project))
88 if os.path.isfile(rstfile):
89 with open(rstfile, 'r') as content:
90 content_rstfile = content.read()
91 #print("DBUG: content_rstfile = \n{}".format(content_rstfile))
93 for regex in regex_list:
95 m = re.search(regex, content_rstfile, re.MULTILINE)
96 #print("DBUG: using regex " + repr(regex))
97 #print("DBUG: using regex1 " + repr(regex1))
98 #print("DBUG: regex_cnt = {}".format(regex_cnt))
101 #print ("DBUG: |REGEX| {} |REGEXCNT| {} |FILECNT| {} |FILE| {} |MATCH| {}".format(repr(regex), regex_cnt, file_cnt, rstfile, match))
102 # end regex loop if we have a title
105 match = "NO-TITLE-FOUND"
106 #print ("DBUG: NO-TITLE-FOUND")
108 print ("ERR: File {} does not exist".format(rstfile))
110 #print ("DBUG: |REGEX| {} |REGEXCNT| {} |FILECNT| {} |FILE| {} |MATCH| {}".format(repr(regex), regex_cnt, file_cnt, rstfile, match))
111 #print ("DBUG: file #{} '{}' '{}'".format(file_cnt, rstfile, match))
113 # clean up result and print
114 match_1 = match.replace(",", "") # remove ,
115 match_final = match_1.strip() # remove \n
116 print ("{},{},{},{}".format(project.strip(), repository.strip(), line.strip(), match_final.strip()))
118 # read next line and loop
121 print ("ERR: File {} does not exist".format(args.filename))
126 # example code to show detailed regex matches and group content
127 # to be used in a future version of this program
129 # matches = re.finditer(regex2, content, re.MULTILINE)
130 # for matchNum, match in enumerate(matches, start=1):
131 # print ("Match {matchNum} was found at {start}-{end}: {match}".format(matchNum = matchNum, start = match.start(), end = match.end(), match = match.group()))
132 # print ("{match}".format(match = match.group()))
133 # for groupNum in range(0, len(match.groups())):
134 # groupNum = groupNum + 1
135 # print ("Group {groupNum} found at {start}-{end}: {group}".format(groupNum = groupNum, start = match.start(groupNum), end = match.end(groupNum), group = match.group(groupNum)))
136 # print ("Test:" "{group}".format(group = match.group(1)))
140 # example code for pandas
141 # to be used in a future version of this program
143 # import pandas as pd
144 # pd.set_option('display.max_rows', 500)
145 # pd.set_option('display.max_columns', 500)
146 # pd.set_option('display.width', 1000)
148 # table = pd.read_csv("master_table.csv")