3 #set -x # uncomment for bash script debugging
5 # ============================================================================
6 # Licensed under the Apache License, Version 2.0 (the "License");
7 # you may not use this file except in compliance with the License.
8 # You may obtain a copy of the License at
10 # http://www.apache.org/licenses/LICENSE-2.0
12 # Unless required by applicable law or agreed to in writing, software
13 # distributed under the License is distributed on an "AS IS" BASIS,
14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 # See the License for the specific language governing permissions and
16 # limitations under the License.
17 # ============LICENSE_END=====================================================
23 ### Thomas Kulik, Deutsche Telekom AG, 2020
26 ### warnstat helps to find the onap modules (projects) and rst-files which are
27 ### responsible for the most warnings during the documentation build process.
28 ### it requires a tox build logfile, parses it line by line, prints out some
29 ### statistics and provides links to the local rst file, its html version, the
30 ### related link to readthedocs and as well the doc8 test result for the rst.
34 ### CHANGELOG (LATEST ON TOP)
36 ### 1.6.2 (2020-05-14) - fixed a major problem with rst files within one module
37 ### which have the same name (but reside in different
38 ### subdirectories). they were not shown in the result
39 ### list. introduced a crc-number for every file for proper
40 ### identification and to build the index.
41 ### - fixed a problem where the results are showing the link
42 ### to a wrong file due to a regex problem in the script.
43 ### 1.6.1 (2020-04-21) - fixed a problem with duplicates in rst filenames
44 ### 1.6.0 (2020-04-03) - extended detection of docs pathes in case they are not
45 ### below the submodules directory
46 ### 1.5.0 (2020-03-23) - doc8 test now executed for every rst file. result is
47 ### provided in the output as "doc8_(nnnnn)" where nnnnn
48 ### is the total number of accumulated doc8 errors.
49 ### - improved readability of output
50 ### 1.4.0 (2020-03-18) - the link to the local html and rst file is provided in
51 ### the output. this may help to ease the debug process.
52 ### use mouse-over/context menu functionality of bash to
53 ### easily open files with your browser or rst editor.
54 ### - improved handling for module names (in case they are
55 ### no real onap projects/modules but directories which
56 ### contain additional documentation in rst format).
57 ### 1.3.1 (2020-03-10) - fixed minor typo in usage message
58 ### 1.3.0 (2020-03-09) - initially released to the community
61 script_version="1.6.2 (2020-05-14)"
62 doc8_dir=$(pwd)/doc8_results
64 doc8_command="doc8 --verbose"; #add options if required
65 web_base_url="https://docs.onap.org/en/latest";
68 echo " warnstats version ${script_version}";
70 declare -A module_array
71 declare -A message_short_array
72 declare -A message_long_array
73 declare -A rstfile_array
74 declare -A rstfilepath_array
75 declare -A htmlfilepath_array
76 declare -A webpath_array
77 declare -A doc8_result_array
80 ### simple script argument handling
83 # check if there is an argument at all
84 if [[ "$logfile" == "" ]] ; then
85 echo 'Usage: warnstats [tox-logfile]'
89 # check if argument is a file
90 if [ ! -f $logfile ] ; then
91 echo "Error: can't find tox-logfile \"$logfile\""
95 # create and clean doc8 directory
96 if [ ! -d "$doc8_dir" ]; then
99 rm ${doc8_dir}/*.txt 2>/dev/null;
102 # get local html build directory
103 html_build_dir=$(grep "Generated docs available in" $logfile);
104 html_build_dir=$(echo "$html_build_dir" | grep -oP " /.*/doc/docs/_build/html$");
105 html_build_dir=$(echo "$html_build_dir" | sed -r 's:^ ::');
106 echo " html build directory ..... $html_build_dir"
107 echo " web base url ............. $web_base_url";
108 echo " doc8 command ............. $doc8_command";
109 echo " doc8 results directory ... $doc8_dir";
110 echo " tox logfile .............. $logfile";
112 # read in the tox build logfile - use only lines which contain a warning
113 readarray -t logfile_array < <(grep ": WARNING:" $logfile);
115 # process filtered logfile line by line
116 for line in "${logfile_array[@]}"
119 # count warning lines
121 echo -n -e " lines processed .......... $counter (doc8 check may take a while ...)\r";
124 # extract path to local rst file
127 # remove problematic text in the original line that causes regex to fail
128 line=$(echo "$line" | sed -r 's:, other instance in.*::');
131 path_rst_debug=$line;
132 #echo "DBUG line: $line"
133 # remove problematic text in line that causes regex to fail
134 path_rst=$(echo "$path_rst" | sed -r 's:, other instance in.*::');
135 #echo "DBUG path_rst: $path_rst"
136 # grep the rst file path
137 path_rst=$(echo "$path_rst" | grep -oP "^(/|docs).*\.rst");
138 #echo "DBUG path_rst: $path_rst"
139 # create an unique identifier for the rst file for the case that the rst file name is used multiple times (but in different subdirectories) within one module
140 rst_crc=$(crc32 "$path_rst" 2>/dev/null);
141 #echo "DBUG rst_crc: $rst_crc"
142 if [[ "$rst_crc" == "" ]] ; then
143 rst_crc="rst_crc_missing"
146 if [[ "$path_rst" == "" ]] ; then
147 path_rst="path_to_rst_missing"
148 #echo "DBUG path_rst: $path_rst"
149 #echo "DBUG path_rst_debug: $path_rst_debug"
151 # finally embed the full rst path in a message to use mouse-over/context menu of bash to open file
152 path_rst_link='\e]8;;file:'$path_rst'\arst\e]8;;\a';
153 #echo -e "DBUG path_rst: "$path_rst;
156 # extract path to the html version of the local rst file
160 #echo "DBUG line: $line"
161 # remove problematic text in line that causes regex to fail
162 path_html=$(echo "$path_html" | sed -r 's:, other instance in.*::');
163 #echo "DBUG path_html: $path_html"
164 # grep the rst file path and modify it so we get the local html build path; grep a little bit more to be save
165 path_html=$(echo "$path_html" | grep -oP "(^|/)docs(/.*|)/[\w -]*\.rst");
166 #echo "DBUG path_html: $path_html"
167 path_html=$(echo "$path_html" | sed -r 's:^/docs::');
168 #echo "DBUG path_html: $path_html"
169 path_html=$(echo "$path_html" | sed -r 's:.rst:.html:');
170 #echo "DBUG path_html: $path_html"
171 # create also the path to the web version
172 path_web_link='\e]8;;'${web_base_url}${path_html}'\aweb\e]8;;\a';
173 #echo "DBUG path_web_link: $path_web_link"
174 # finally embed the full html path in a message to use mouse-over/context menu of bash to open file
175 path_html_link='\e]8;;file:'${html_build_dir}${path_html}'\ahtml\e]8;;\a';
176 #echo -e "DBUG path_html_link: "$path_html_link;
178 # extract module name from line (remove all text before module name; then cut out module name)
179 module=$(echo "$line" | sed -r 's:(^.*/doc/docs/submodules/|^docs/submodules/|checking consistency... )::' | cut -f1 -d\/);
180 #echo "DBUG line: $line"
181 #echo "DBUG module: $module"
183 # in case the extraction has not lead to a valid module name do some additional investigation
184 if [[ "$module" == "" ]] ; then
186 if [[ $line =~ doc/docs/release ]] ; then
187 module="docs_release"
188 #echo "DBUG line: $line"
189 #echo "DBUG module: $module"
190 elif [[ $line =~ doc/docs/use-cases ]] ; then
191 module="docs_use-cases"
192 #echo "DBUG line: $line"
193 #echo "DBUG module: $module"
194 elif [[ $line =~ doc/docs/guides/onap-developer ]] ; then
195 module="docs_guides_onap-developer"
196 #echo "DBUG line: $line"
197 #echo "DBUG module: $module"
198 elif [[ $line =~ doc/docs/guides/onap-operator ]] ; then
199 module="docs_guides_onap-operator"
200 #echo "DBUG line: $line"
201 #echo "DBUG module: $module"
202 elif [[ $line =~ doc/docs/guides/onap-provider ]] ; then
203 module="docs_guides_onap-provider"
204 #echo "DBUG line: $line"
205 #echo "DBUG module: $module"
206 elif [[ $line =~ doc/docs/guides/onap-user ]] ; then
207 module="docs_guides_onap-user"
208 #echo "DBUG line: $line"
209 #echo "DBUG module: $module"
210 elif [[ $line =~ doc/docs/guides/overview ]] ; then
211 module="docs_guides_overview"
212 #echo "DBUG line: $line"
213 #echo "DBUG module: $module"
214 elif [[ $line =~ doc/docs/templates ]] ; then
215 module="docs_templates"
216 #echo "DBUG line: $line"
217 #echo "DBUG module: $module"
218 elif [[ $line =~ doc/docs/guides ]] ; then
220 #echo "DBUG line: $line"
221 #echo "DBUG module: $module"
224 #echo "DBUG line: $line"
225 #echo "DBUG module: $module"
229 #echo "DBUG line: $line";
230 #echo "DBUG module: $module";
232 # get the maximum length of the variable entries to adjust table width later on
233 if [[ ${#module} -gt "$maxlength_module" ]]; then
234 maxlength_module=${#module};
236 #echo "DBUG maxlength_module=$maxlength_module";
238 # extract rst file name from line and do some formatting to use it later as an array name
239 #echo "DBUG line: $line";
240 rstfile=$(echo "$line" | sed -r 's:, other instance in.*::');
241 rstfile=$(echo -e "${rstfile}" | grep -oP "[\w -]*\.rst");
242 rstfile=$(echo -e ${rstfile} | tr '[:blank:]' '_');
243 #echo "DBUG rstfile: '$rstfile'";
245 # get the maximum length of the variable entries to adjust table width later on
246 if [[ ${#rstfile} -gt "$maxlength_rstfile" ]]; then
247 maxlength_rstfile=${#rstfile};
249 #echo "DBUG maxlength_rstfile=$maxlength_rstfile";
251 # count the number of warnings for the module/rstfile combination
252 (( rstfile_array[$module | $rstfile | $rst_crc]++ ));
254 # count the number of warnings for the single module
255 #echo "DBUG $module | $rstfile | $message";
256 (( module_array[$module]++ ));
258 # now we have all the information to fill the html/rst/web (file) path arrays
259 htmlfilepath_array[$module | $rstfile | $rst_crc]=$path_html_link;
260 rstfilepath_array[$module | $rstfile | $rst_crc]=$path_rst_link;
261 webpath_array[$module | $rstfile | $rst_crc]=$path_web_link;
263 #echo "DBUG -------------------------------------------------------------------";
264 #echo "DBUG line: $line";
265 #echo "DBUG htmlfilepath_array: $module | $rstfile | $rst_crc = $path_html_link";
266 #echo "DBUG rstfilepath_array: $module | $rstfile | $rst_crc = $path_rst_link";
267 #echo "DBUG webpath_array: $module | $rstfile | $rst_crc = $path_web_link";
269 # extract the warning message and do some formatting
270 #message=$(echo "$line" | sed -r 's:^/.+WARNING\:\ ::');
271 message=$(echo "$line" | sed -r 's:^.+WARNING\:\ ::');
272 message=$(echo -e ${message} | tr '[:blank:]' '_');
273 message=$(echo -e ${message} | tr '/' '_');
274 message=$(echo -e ${message} | tr '.' '_');
276 # remove all characters from message which may cause problems in the shell
277 message="$(echo -e "${message}" | sed -e 's/[^A-Za-z0-9_-]//g')";
278 #echo "DBUG message=\"$message\""
280 # count the number of warnings for the single message (long version)
281 message_long="$(echo -e "${message}")";
282 (( message_long_array[$message_long]++ ))
284 # reduce length of message to group them more easily and then ...
285 # count the number of warnings for the single message (short version)
286 message_short="$(echo -e "${message}" | cut -c -16)";
287 (( message_short_array[$message_short]++ ))
289 # check rst files with doc8 and store results
290 doc8_result_path="${doc8_dir}/${module}-${rstfile}-${rst_crc}.txt";
291 #echo "DBUG ---------------------------------------------"
292 #echo "DBUG doc8_result_path=\"$doc8_result_path\""
293 # doc8 check only if result file does not exists yet AND if rst file is valid (exists)
294 if [[ ! -f "$doc8_result_path" && -f "$path_rst" ]] ; then
295 echo "FILE:$path_rst" >$doc8_result_path;
296 $doc8_command "$path_rst" >>$doc8_result_path;
297 total_acc_err=$(grep "Total accumulated errors = " $doc8_result_path);
298 #echo "DBUG total_acc_err=$total_acc_err";
299 total_acc_err=$(echo $total_acc_err | sed 's:Total accumulated errors = ::');
300 #echo "DBUG total_acc_err=$total_acc_err";
301 total_acc_err=$(printf "%05d" $total_acc_err);
302 #echo "DBUG command:doc8 ${path_rst} >>${doc8_result_path}";
303 #echo "DBUG total_acc_err=$total_acc_err";
305 doc8_result='\e]8;;file:'${doc8_result_path}'\adoc8_('$total_acc_err')\e]8;;\a';
306 doc8_result_array[$module | $rstfile | $rst_crc]=$doc8_result;
310 #format counter to have always x digits
311 counter=$(printf "%05d" $counter);
313 echo " $counter LINES WITH WARNING IN FILE '$logfile'";
316 echo "################################################################################";
317 echo "~~~ MESSAGES LONG ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
318 echo "################################################################################";
321 #print array content and append to temporary outfile
322 for i in "${!message_long_array[@]}"
325 n=${message_long_array[$i]};
327 #format counter to have always x digits
328 n=$(printf "%05d" $n);
329 echo " $n | $m" >>tempoutfile;
332 #format counter to have always x digits
333 nc=$(printf "%05d" $nc);
334 echo " $nc WARNINGS IN TOTAL WITH ${#message_long_array[@]} UNIQUE MESSAGES" >>tempoutfile;
336 #print a sorted version of the temporary outfile
344 echo "################################################################################";
345 echo "~~~ MESSAGES SHORTENED (FOR SIMPLE GROUPING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
346 echo "################################################################################";
349 #print array content and append to temporary outfile
350 for i in "${!message_short_array[@]}"
353 n=${message_short_array[$i]};
355 #format counter to have always x digits
356 n=$(printf "%05d" $n);
357 echo " $n | $m" >>tempoutfile;
360 #format counter to have always x digits
361 nc=$(printf "%05d" $nc);
362 echo " $nc WARNINGS IN TOTAL WITH ${#message_short_array[@]} UNIQUE MESSAGES" >>tempoutfile;
364 #print a sorted version of the temporary outfile
372 echo "################################################################################";
373 echo "~~~ MODULES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
374 echo "################################################################################";
377 #create temporary outfile
378 for i in "${!module_array[@]}"
381 n=${module_array[$i]};
383 n=$(printf "%05d" $n);
384 echo " $n | $m" >>tempoutfile;
387 #format counter to have always x digits
388 nc=$(printf "%05d" $nc);
389 echo " $nc WARNINGS IN TOTAL IN ${#module_array[@]} MODULES" >>tempoutfile;
391 #print a sorted version of the temporary outfile
397 echo "################################################################################";
398 echo "~~~ MODULES WITH RSTFILES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
399 echo "################################################################################";
402 #print array content and append to temporary outfile
403 for i in "${!rstfile_array[@]}"
406 n=${rstfile_array[$i]};
407 p=${htmlfilepath_array[$i]}
408 r=${rstfilepath_array[$i]}
409 w=${webpath_array[$i]}
410 d=${doc8_result_array[$i]};
411 #echo "DBUG -------------------------------"
412 #echo "DBUG i = '$i'"
413 #echo "DBUG m = '$m'"
414 #echo "DBUG n = '$n'"
415 #echo "DBUG p = '$p'"
416 #echo -e "DBUG p = '$p'"
417 #echo "DBUG w = '$w'"
418 #echo "DBUG d = '$d'"
420 #format counter to have always x digits
421 n=$(printf "%05d" $n);
423 # extend module name to the max for better readability
424 tmp_mod=$(echo "$m" | sed -r 's: \|.+$::');
425 #echo "DBUG tmp_mod=$tmp_mod"
426 len_tmp_mod=${#tmp_mod}
427 to_add="$(($maxlength_module-$len_tmp_mod))"
428 #echo "DBUG to_add=$to_add"
429 while [ $to_add -gt 0 ]; do
430 tmp_mod="${tmp_mod} ";
432 #echo "DBUG to_add=$to_add"
433 #echo "DBUG tmp_mod=\"$tmp_mod\""
436 # remove crc and extend rst name to the max for better readability
437 #echo "DBUG ******************************************************"
438 #echo "DBUG m = '$m'"
439 tmp_rst=$(echo "$m" | sed -r 's:\| [[:alnum:]_]+$::');
440 #echo "DBUG tmp_rst = '$tmp_rst'"
441 tmp_rst=$(echo "$tmp_rst" | sed -r 's:[[:space:]]$::');
442 #echo "DBUG tmp_rst = '$tmp_rst'"
443 tmp_rst=$(echo "$tmp_rst" | sed -r 's:^.+ \| ::');
444 #echo "DBUG tmp_rst = '$tmp_rst'"
445 len_tmp_rst=${#tmp_rst}
446 #echo "DBUG len_tmp_rst = '$len_tmp_rst'"
447 to_add="$(($maxlength_rstfile-$len_tmp_rst))"
448 #echo "DBUG to_add = '$to_add'"
449 while [ $to_add -gt 0 ]; do
450 tmp_rst="${tmp_rst} ";
452 #echo "DBUG to_add = '$to_add'"
453 #echo "DBUG tmp_rst = '$tmp_rst'"
456 # recombine module and rst names
457 m="${tmp_mod} | ${tmp_rst}";
458 #echo "DBUG m = '$m'"
460 # print out to temp file
461 echo -e " $m | $r $p $w $d | $n" >>tempoutfile;
464 #format counter to have always x digits
465 nc=$(printf "%05d" $nc);
466 #in case the name (e.g) index.rst is used multiple times in the same module warnings are combined
467 echo " $nc WARNINGS IN TOTAL IN APPROX. ${#rstfile_array[@]} RST FILES" >>tempoutfile;
469 #print a sorted version of the temporary outfile
477 echo "################################################################################";
478 echo "~~~ RSTFILES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
479 echo "################################################################################";
482 #print array content and append to temporary outfile
483 for i in "${!rstfile_array[@]}"
486 n=${rstfile_array[$i]};
487 p=${htmlfilepath_array[$i]}
488 r=${rstfilepath_array[$i]}
489 w=${webpath_array[$i]}
490 d=${doc8_result_array[$i]};
491 #echo "DBUG -------------------------------"
501 #format counter to have always x digits
502 n=$(printf "%05d" $n);
504 # extend module name to the max for better readability
505 tmp_mod=$(echo "$m" | sed -r 's: \|.+$::');
506 #echo "DBUG tmp_mod=$tmp_mod"
507 len_tmp_mod=${#tmp_mod}
508 to_add="$(($maxlength_module-$len_tmp_mod))"
509 #echo "DBUG to_add=$to_add"
510 while [ $to_add -gt 0 ]; do
511 tmp_mod="${tmp_mod} ";
513 #echo "DBUG to_add=$to_add"
514 #echo "DBUG tmp_mod=\"$tmp_mod\""
517 # remove crc and extend rst name to the max for better readability
518 #echo "DBUG ******************************************************"
519 #echo "DBUG m = '$m'"
520 tmp_rst=$(echo "$m" | sed -r 's:\| [[:alnum:]_]+$::');
521 #echo "DBUG tmp_rst = '$tmp_rst'"
522 tmp_rst=$(echo "$tmp_rst" | sed -r 's:[[:space:]]$::');
523 #echo "DBUG tmp_rst = '$tmp_rst'"
524 tmp_rst=$(echo "$tmp_rst" | sed -r 's:^.+ \| ::');
525 #echo "DBUG tmp_rst = '$tmp_rst'"
526 len_tmp_rst=${#tmp_rst}
527 #echo "DBUG len_tmp_rst = '$len_tmp_rst'"
528 to_add="$(($maxlength_rstfile-$len_tmp_rst))"
529 #echo "DBUG to_add = '$to_add'"
530 while [ $to_add -gt 0 ]; do
531 tmp_rst="${tmp_rst} ";
533 #echo "DBUG to_add = '$to_add'"
534 #echo "DBUG tmp_rst = '$tmp_rst'"
537 # recombine module and rst names
538 m="${tmp_mod} | ${tmp_rst}";
540 # print out to temp file
541 echo -e " $n | $m | $r $p $w $d" >>tempoutfile;
544 #format counter to have always x digits
545 nc=$(printf "%05d" $nc);
546 #in case the name (e.g) index.rst is used multiple times in the same module warnings are combined
547 echo " $nc WARNINGS IN TOTAL IN APPROX. ${#rstfile_array[@]} RST FILES" >>tempoutfile;
549 #print a sorted version of the temporary outfile
560 ### backup code for future extensions
564 # Block_quote_ends_without_a_blank_line_unexpected_unindent
565 # Bullet_list_ends_without_a_blank_line_unexpected_unindent
566 # Citation_[\w-]_is_not_referenced
567 # Citation_unit_test_is_not_referenced
568 # Content_block_expected_for_the_code_directive_none_found
569 # Content_block_expected_for_the_container_directive_none_found
570 # Could_not_lex_literal_block_as_bash__Highlighting_skipped
571 # Could_not_lex_literal_block_as_console__Highlighting_skipped
572 # Could_not_lex_literal_block_as_guess__Highlighting_skipped
573 # Could_not_lex_literal_block_as_json__Highlighting_skipped
574 # Could_not_lex_literal_block_as_yaml__Highlighting_skipped
575 # Definition_list_ends_without_a_blank_line_unexpected_unindent
576 # document_isnt_included_in_any_toctree
577 # download_file_not_readable
578 # Duplicate_explicit_target_name
580 # Enumerated_list_ends_without_a_blank_line_unexpected_unindent
581 # Error_in_code_directive
582 # Error_in_code-block_directive
583 # Error_in_image_directive
584 # Explicit_markup_ends_without_a_blank_line_unexpected_unindent
585 # Field_list_ends_without_a_blank_line_unexpected_unindent
586 # Footnote_[0-9.*]_is_not_referenced
587 # image_file_not_readable
589 # Inconsistent_literal_block_quoting
590 # Inline_emphasis_start-string_without_end-string
591 # Inline_interpreted_text_or_phrase_reference_start-string_without_end-string
592 # Inline_strong_start-string_without_end-string
593 # Inline_substitution_reference_start-string_without_end-string
594 # Literal_block_ends_without_a_blank_line_unexpected_unindent
595 # Literal_block_expected_none_found
597 # Pygments_lexer_name_asn_is_not_known
598 # Title_level_inconsistent
599 # Title_overline__underline_mismatch
600 # Title_overline_too_short
601 # Title_underline_too_short
602 # toctree_contains_reference_to_nonexisting_document
603 # Too_many_autonumbered_footnote_references_only_0_corresponding_footnotes_available
604 # undecodable_source_characters_replacing_with
606 # Unexpected_indentation
607 # Unknown_directive_type_clode-block
609 # Unknown_target_name