tools/warnstats.sh

   1 #!/bin/bash
   2
   3 #set -x # uncomment for bash script debugging
   4
   5 # ============================================================================
   6 # Licensed under the Apache License, Version 2.0 (the "License");
   7 # you may not use this file except in compliance with the License.
   8 # You may obtain a copy of the License at
   9 #
  10 #       http://www.apache.org/licenses/LICENSE-2.0
  11 #
  12 # Unless required by applicable law or agreed to in writing, software
  13 # distributed under the License is distributed on an "AS IS" BASIS,
  14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15 # See the License for the specific language governing permissions and
  16 # limitations under the License.
  17 # ============LICENSE_END=====================================================
  18
  19 ###
  20 ### warnstats
  21 ###
  22 ### AUTHOR(S):
  23 ### Thomas Kulik, Deutsche Telekom AG, 2020
  24 ###
  25 ### DESCRIPTION:
  26 ### warnstat helps to find the onap modules (projects) and rst-files which are
  27 ### responsible for the most warnings during the documentation build process
  28 ### it requires a tox build logfile, parses it line by line and prints out some
  29 ### statistics
  30 ###
  31
  32 ###
  33 ### CHANGELOG (LATEST ON TOP)
  34 ###
  35 ### 1.3.1 (2020-03-10) fixed minor typo in usage message
  36 ### 1.3.0 (2020-03-09) initial release
  37 ###
  38
  39 script_version="1.3.1 (2020-03-10)"
  40
  41 echo " ";
  42 echo "warnstats - Version ${script_version}";
  43 echo " ";
  44
  45 declare -A module_array
  46 declare -A message_short_array
  47 declare -A message_long_array
  48 declare -A rstfile_array
  49
  50 ###
  51 ### simple script argument handling
  52 ###
  53
  54 logfile=$1;
  55
  56 # check if there is an argument at all
  57 if [[ "$logfile" == "" ]] ; then
  58     echo 'Usage: warnstats [tox-logfile]'
  59     exit 1
  60 fi
  61
  62 # check if argument is a file
  63 if [ ! -f $logfile ] ; then
  64     echo "Error: can't find tox-logfile \"$logfile\""
  65     exit 1
  66 fi
  67
  68 # read in the tox build logfile - use only lines which contain a warning
  69 readarray -t logfile_array < <(grep ": WARNING:" $logfile);
  70
  71 # process filtered logfile line by line
  72 for line in "${logfile_array[@]}"
  73 do
  74     # count warning lines
  75     (( counter++ ));
  76     echo -n -e "lines processed: $counter\r";
  77
  78     # extract module name from line
  79     module=$(echo "$line" | sed -r 's:^/.+/doc/docs/(submodules|guides)/::' | cut -f1 -d\/);
  80
  81     # in case the extraction has no valid name fill the missing field
  82     if [[ "$module" == "" ]] ; then
  83         module="<missing_module_name>";
  84     fi
  85
  86     # extract rst file name from line and do some formatting to use it later as an array name
  87     #echo "DBUG line: $line";
  88     rstfile=$(echo "$line" | grep -oP "[\w -]*\.rst");
  89     rstfile=$(echo -e ${rstfile} | tr '[:blank:]' '_');
  90     #echo "DBUG rst-file: $rstfile";
  91
  92     # count the number of warnings for the module/rstfile combination
  93     (( rstfile_array[$module | $rstfile]++ ));
  94
  95     # count the number of warnings for the single module
  96     #echo "DBUG $module | $rstfile | $message";
  97     (( module_array[$module]++ ));
  98
  99     # extract the warning message and do some formatting
 100     #message=$(echo "$line" | sed -r 's:^/.+WARNING\:\ ::');
 101     message=$(echo "$line" | sed -r 's:^.+WARNING\:\ ::');
 102     message=$(echo -e ${message} | tr '[:blank:]' '_');
 103     message=$(echo -e ${message} | tr '/' '_');
 104     message=$(echo -e ${message} | tr '.' '_');
 105
 106     # remove all characters from message which may cause problems in the shell
 107     message="$(echo -e "${message}" | sed -e 's/[^A-Za-z0-9_-]//g')";
 108     #echo "DBUG message=\"$message\""
 109
 110     # count the number of warnings for the single message (long version)
 111     message_long="$(echo -e "${message}")";
 112     (( message_long_array[$message_long]++ ))
 113
 114     # reduce length of message to group them more easily and then ...
 115     # count the number of warnings for the single message (short version)
 116     message_short="$(echo -e "${message}" | cut -c -20)";
 117     (( message_short_array[$message_short]++ ))
 118
 119 done
 120
 121 #format counter to have always x digits
 122 counter=$(printf "%05d" $counter);
 123 echo "                              ";
 124 echo " $counter LINES WITH WARNING IN FILE '$logfile'";
 125
 126 echo " ";
 127 echo "################################################################################";
 128 echo "~~~ MESSAGES LONG ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
 129 echo "################################################################################";
 130 echo " ";
 131
 132 #print array content and append to temporary outfile
 133 for i in "${!message_long_array[@]}"
 134 do
 135   m=$i;
 136   n=${message_long_array[$i]};
 137   ((nc += n))
 138   #format counter to have always x digits
 139   n=$(printf "%05d" $n);
 140   echo " $n | $m" >>tempoutfile;
 141 done
 142
 143 #format counter to have always x digits
 144 nc=$(printf "%05d" $nc);
 145 echo " $nc WARNINGS IN TOTAL WITH ${#message_long_array[@]} UNIQUE MESSAGES" >>tempoutfile;
 146
 147 #print a sorted version of the temporary outfile
 148 sort -br tempoutfile
 149
 150 # clean up
 151 rm tempoutfile
 152 nc=0
 153
 154 echo " ";
 155 echo "################################################################################";
 156 echo "~~~ MESSAGES SHORTENED (FOR BETTER GROUPING) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
 157 echo "################################################################################";
 158 echo " ";
 159
 160 #print array content and append to temporary outfile
 161 for i in "${!message_short_array[@]}"
 162 do
 163   m=$i;
 164   n=${message_short_array[$i]};
 165   ((nc += n))
 166   #format counter to have always x digits
 167   n=$(printf "%05d" $n);
 168   echo " $n | $m" >>tempoutfile;
 169 done
 170
 171 #format counter to have always x digits
 172 nc=$(printf "%05d" $nc);
 173 echo " $nc WARNINGS IN TOTAL WITH ${#message_short_array[@]} UNIQUE MESSAGES" >>tempoutfile;
 174
 175 #print a sorted version of the temporary outfile
 176 sort -br tempoutfile
 177
 178 # clean up
 179 rm tempoutfile
 180 nc=0
 181
 182 echo " ";
 183 echo "################################################################################";
 184 echo "~~~ MODULES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
 185 echo "################################################################################";
 186 echo " ";
 187
 188 #create temporary outfile
 189 for i in "${!module_array[@]}"
 190 do
 191   m=$i;
 192   n=${module_array[$i]};
 193   ((nc += n))
 194   n=$(printf "%05d" $n);
 195   echo " $n | $m" >>tempoutfile;
 196 done
 197
 198 #format counter to have always x digits
 199 nc=$(printf "%05d" $nc);
 200 echo " $nc WARNINGS IN TOTAL IN ${#module_array[@]} MODULES" >>tempoutfile;
 201
 202 #print a sorted version of the temporary outfile
 203 sort -br tempoutfile
 204 rm tempoutfile
 205 nc=0
 206
 207 echo " ";
 208 echo "################################################################################";
 209 echo "~~~ MODULES WITH RSTFILES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
 210 echo "################################################################################";
 211 echo " ";
 212
 213 #print array content and append to temporary outfile
 214 for i in "${!rstfile_array[@]}"
 215 do
 216   m=$i;
 217   n=${rstfile_array[$i]};
 218   ((nc += n))
 219   #format counter to have always x digits
 220   n=$(printf "%05d" $n);
 221   echo " $m | $n" >>tempoutfile;
 222 done
 223
 224 #format counter to have always x digits
 225 nc=$(printf "%05d" $nc);
 226 #in case the name (e.g) index.rst is used multiple times in the same module warnings are combined
 227 echo " $nc WARNINGS IN TOTAL IN APPROX. ${#rstfile_array[@]} RST FILES" >>tempoutfile;
 228
 229 #print a sorted version of the temporary outfile
 230 sort -b tempoutfile
 231
 232 # clean up
 233 rm tempoutfile
 234 nc=0
 235
 236 echo " ";
 237 echo "################################################################################";
 238 echo "~~~ RSTFILES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~";
 239 echo "################################################################################";
 240 echo " ";
 241
 242 #print array content and append to temporary outfile
 243 for i in "${!rstfile_array[@]}"
 244 do
 245   m=$i;
 246   n=${rstfile_array[$i]};
 247   ((nc += n))
 248   #format counter to have always x digits
 249   n=$(printf "%05d" $n);
 250   echo " $n | $m" >>tempoutfile;
 251 done
 252
 253 #format counter to have always x digits
 254 nc=$(printf "%05d" $nc);
 255 #in case the name (e.g) index.rst is used multiple times in the same module warnings are combined
 256 echo " $nc WARNINGS IN TOTAL IN APPROX. ${#rstfile_array[@]} RST FILES" >>tempoutfile;
 257
 258 #print a sorted version of the temporary outfile
 259 sort -br tempoutfile
 260
 261 # clean up
 262 rm tempoutfile
 263 nc=0
 264
 265 echo " ";
 266 exit
 267
 268 ###
 269 ### backup code for future extensions
 270 ###
 271
 272 #
 273 # Block_quote_ends_without_a_blank_line_unexpected_unindent
 274 # Bullet_list_ends_without_a_blank_line_unexpected_unindent
 275 # Citation_[\w-]_is_not_referenced
 276 # Citation_unit_test_is_not_referenced
 277 # Content_block_expected_for_the_code_directive_none_found
 278 # Content_block_expected_for_the_container_directive_none_found
 279 # Could_not_lex_literal_block_as_bash__Highlighting_skipped
 280 # Could_not_lex_literal_block_as_console__Highlighting_skipped
 281 # Could_not_lex_literal_block_as_guess__Highlighting_skipped
 282 # Could_not_lex_literal_block_as_json__Highlighting_skipped
 283 # Could_not_lex_literal_block_as_yaml__Highlighting_skipped
 284 # Definition_list_ends_without_a_blank_line_unexpected_unindent
 285 # document_isnt_included_in_any_toctree
 286 # download_file_not_readable
 287 # Duplicate_explicit_target_name
 288 # duplicate_label
 289 # Enumerated_list_ends_without_a_blank_line_unexpected_unindent
 290 # Error_in_code_directive
 291 # Error_in_code-block_directive
 292 # Error_in_image_directive
 293 # Explicit_markup_ends_without_a_blank_line_unexpected_unindent
 294 # Field_list_ends_without_a_blank_line_unexpected_unindent
 295 # Footnote_[0-9.*]_is_not_referenced
 296 # image_file_not_readable
 297 # Include_file
 298 # Inconsistent_literal_block_quoting
 299 # Inline_emphasis_start-string_without_end-string
 300 # Inline_interpreted_text_or_phrase_reference_start-string_without_end-string
 301 # Inline_strong_start-string_without_end-string
 302 # Inline_substitution_reference_start-string_without_end-string
 303 # Literal_block_ends_without_a_blank_line_unexpected_unindent
 304 # Literal_block_expected_none_found
 305 # Malformed_table
 306 # Pygments_lexer_name_asn_is_not_known
 307 # Title_level_inconsistent
 308 # Title_overline__underline_mismatch
 309 # Title_overline_too_short
 310 # Title_underline_too_short
 311 # toctree_contains_reference_to_nonexisting_document
 312 # Too_many_autonumbered_footnote_references_only_0_corresponding_footnotes_available
 313 # undecodable_source_characters_replacing_with
 314 # undefined_label
 315 # Unexpected_indentation
 316 # Unknown_directive_type_clode-block
 317 # unknown_document
 318 # Unknown_target_name