2 #set -x # uncomment for bash script debugging
4 ### ============================================================================
5 ### Licensed under the Apache License, Version 2.0 (the "License");
6 ### you may not use this file except in compliance with the License.
7 ### You may obtain a copy of the License at
9 ### http://www.apache.org/licenses/LICENSE-2.0
11 ### Unless required by applicable law or agreed to in writing, software
12 ### distributed under the License is distributed on an "AS IS" BASIS,
13 ### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ### See the License for the specific language governing permissions and
15 ### limitations under the License.
16 ### ============LICENSE_END=====================================================
22 ### Thomas Kulik, Deutsche Telekom AG, 2020 - 2021
25 ### Retrieves a full list of ONAP repos from gerrit inluding their state.
26 ### Clones all active repos of the ONAP master branch plus other requested ONAP
27 ### branches. Then the script does some docs related analyses depending on the
28 ### clone results. It creates logfiles containing filtered results. In addition
29 ### a table.csv is created which can be used to import it in a spreadsheed.
30 ### Also a zip-file is created which contains all the results.
33 ### - in the output, repo names are shown in square brackets for readability
34 ### e.g [aai/aai-common]/docs/release-notes.rst
35 ### - in the table.csv file you see data for the requested branch if available.
36 ### if not available, data is retrieved from the master branch. it will be
37 ### denoted in round brackets, e.g. (3) (tox.ini)
45 ### SOME HELPING COMMANDS TO PROCESS LOG FILES:
47 ### curl -s https://git.onap.org/ | grep "^<tr><td class='toplevel-repo'><a title='" | sed -r "s:^<tr><td class='toplevel-repo'><a title='::" | sed -r "s:'.*::"
49 ### remove branchname from the line:
50 ### cat frankfurt_repoclone.log | sed 's:frankfurt|::'
52 ### list only image names
53 ### cat master_dockerimagesfull.log | grep image | sed -r 's:image\:::' | sed -r 's:^ +::' | sed '/^[[:space:]]*$/d'
55 ### more interesting stuff ...
56 ### curl https://gerrit.onap.org/r/projects/?d
57 ### LONG: curl -s 'https://gerrit.onap.org/r/projects/?d' | awk '{if(NR>1)print}' | jq -c '.[] | {id, state}' | sed -r 's:%2F:/:g' | sed -r 's:["{}]::g' | sed -r 's:id\:::' | sed -r 's:,state\::|:' | sed '/All-Projects/d' | sed '/All-Users/d'
58 ### SHORT: curl -s 'https://gerrit.onap.org/r/projects/?d' | awk '{if(NR>1)print}' | jq -c '.[] | {id, state}' | sed -r 's:%2F:/:g; s:["{}]::g; s:id\:::; s:,state\::|:; /All-Projects/d; /All-Users/d'
61 script_version="1.6 (2021/03/30)"
63 # save command for the restart with logging enabled
66 fullcommand="${command} ${arguments}"
75 echo " checkdocs.sh Version ${script_version}"
78 echo " ./checkdocs.sh <arguments> "
81 echo " -u|--user username "
82 echo " linux foundation username used to clone ONAP repositories"
84 echo " -b|--branches branch1,branch2,branch3 "
85 echo " list of branches to be cloned. master is automatically "
86 echo " added to the list. do not add manually! "
89 echo " development-mode - limits number of repos to be cloned "
95 echo "*******************************************************************************"
98 # remove lockfile in case script is interrupted
99 trap InterruptedScript SIGINT SIGTERM SIGHUP SIGKILL SIGSTOP
100 function InterruptedScript {
102 echo "Script was interrupted."
103 if [ -f $lockfile ] ; then
110 ### arguments handling
122 if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
126 echo "Error: Argument for $1 is missing" >&2
132 if [ -n "$2" ] && [ ${2:0:1} != "-" ]; then
136 echo "Error: Argument for $1 is missing" >&2
141 -*|--*=) # unsupported flags
142 echo "Error: Unsupported argument $1" >&2
146 *) # preserve positional arguments
153 # set positional arguments in their proper place
154 eval set -- "$PARAMS"
156 # old: declare -a branches=("master" "frankfurt" "guilin")
157 if [[ $branches_csv == "" || $lfusername == "" ]]; then
162 # master branch is automatically added and must not part of the user arguments
163 if [[ $branches_csv == *"master"* ]]; then
167 # clone master first, then the other branches
168 branches_csv="master,${branches_csv}"
170 # create the branches array by readinging in the values from the variable
171 IFS=',' read -r -a branches <<< "${branches_csv}"
173 #echo "DBUG: devmode = \"${devmode}\""
174 #echo "DBUG: branches_csv = \"${branches_csv}\""
175 #echo "DBUG: lfusername = \"${lfusername}\""
176 #echo "DBUG: branches = \"${branches[@]}\""
178 # restart script with logging enabled
179 lockfile="checkdocs-runtime-lockfile"
180 if [ ! -f $lockfile ] ; then
182 echo "Restarting script with logging enabled."
183 ${fullcommand} 2>&1 | tee checkdocs.log
189 echo "checkdocs.sh Version ${script_version}"
192 # curl must be installed
193 if ! command -v curl &> /dev/null
195 echo "ERROR: curl command could not be found"
199 today=$(date '+%Y-%m-%d');
200 repolist="gerrit-repos-master-"$today".txt";
203 echo "Retrieving a full list of ONAP repositories (master) from gerrit.onap.org."
205 # retrieve the full repolist from gerrit
206 # workaround because of the (wrong?) response of gerrit.onap.org which makes jq command fail
207 # "| awk '{if(NR>1)print}'" filters the first line of the response so that jq will work again (thx marek)
208 curl -s 'https://gerrit.onap.org/r/projects/?d' | awk '{if(NR>1)print}' | jq -c '.[] | {id, state}' | sed -r 's:%2F:/:g; s:["{}]::g; s:id\:::; s:,state\::|:; /All-Projects/d; /All-Users/d' >./$repolist
210 # process the created repolist and try to clone the projects from the mirror
212 source="git://cloud.onap.org/mirror"
213 echo "Using \"${source}\" as the source and username \"${lfusername}\" for cloning the repositories."
214 echo "Start cloning of repositories ..."
216 for branch in "${branches[@]}"
225 branch_upper=$(echo "${branch}" | tr '[:lower:]' '[:upper:]')
237 if [[ $devmode == "TRUE" ]]; then
238 devcounter=$((devcounter+1))
241 if [[ $devcounter -lt "50" ]]; then
243 if [[ $devmode == "TRUE" ]]; then
244 echo "INFO: devmode! counter=${devcounter}"
248 reponame=$(echo $line | awk -F "|" '{print $1}');
249 repostate=$(echo $line | awk -F "|" '{print $2}');
253 if [[ $repostate == "ACTIVE" ]] || [[ $repostate == "READ_ONLY" ]]; then
254 echo "Cloning \"${branch}\" branch of \"${repostate}\" project ${reponame}..."
256 # previously used: git clone --branch ${branch} --recurse-submodules ssh://${lfusername}@gerrit.onap.org:29418/$reponame ./$reponame
257 # clone script Jess: git clone "git://cloud.onap.org/mirror/${i}" "${LOCALNAME}"
258 git clone --branch ${branch} --recurse-submodules ${source}/${reponame} ./${reponame}
261 if [[ ! ${gitexitcode} == "0" ]]; then
262 errormsg=$(tail -1 ../checkdocs.log)
267 # repoclone.log format: $1=gitexitcode|$2=reponame|$3=repostate|$4=errormsg
268 echo "${gitexitcode}|${reponame}|${repostate}|${errormsg}" | tee -a ${branch}_repoclone.log
270 #elif [[ $repostate == "READ_ONLY" ]]; then
271 #echo "-|${reponame}|${repostate}|ignored" | tee -a ${branch}_repoclone.log
273 echo "-|${reponame}|unknown repo state \"${repostate}\"|-" | tee -a ${branch}_repoclone.log
277 if [[ ${gitexitcode} == "0" ]]; then
279 printf "\ndocs directories:\n"
280 find ./$reponame -type d -name docs | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_docs.log
282 printf "\nrst files:\n"
283 find ./$reponame -type f -name *.rst | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_rstfiles.log
285 printf "\nrelease notes rst:\n"
286 find ./$reponame -type f | grep 'release.*note.*.rst' | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_releasenotes.log
288 printf "\ntox.ini files:\n"
289 find ./$reponame -type f -name tox.ini | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_toxini.log
291 printf "\nconf.py files:\n"
292 find ./$reponame -type f -name conf.py | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_confpy.log
294 printf "\nindex.rst files:\n"
295 find ./$reponame -type f -name index.rst | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_indexrst.log
297 printf "\nINFO.yaml files:\n"
298 find ./$reponame -type f -name INFO.yaml | sed -r 's:./::' | sed -r s:${reponame}:[${reponame}]: | tee -a ${branch}_infoyaml.log
302 # end defcounter loop
311 find . -type f -name values.yaml -print -exec grep "image:" {} \; | sed -r 's:^ +::' | tee ${branch}_dockerimagesfull.log
313 ls --format single-column -d */ | sed 's:/$::' | tee ${branch}_directories.log
315 cat ${branch}_dockerimagesfull.log | grep image | sed -r 's:image\:::' | sed -r 's:^ +::' | sed '/^[[:space:]]*$/d' >${branch}_dockerimages.log
317 ls --format single-column -d oom/kubernetes/*/ | tee ${branch}_oomkubernetes.log
321 readarray -t docs_array < ./${branch}_docs.log;
323 for line in "${docs_array[@]}"
326 echo $line | tee -a ${branch}_docsconfig.log
328 # remove [ and ] which are distinguish the project name in the output
329 line=$(echo $line | sed -r 's:\[:: ; s:\]::')
331 if [ -f ./${line}/conf.py ] ; then
332 echo " conf.py ..... found" | tee -a ${branch}_docsconfig.log
334 echo " conf.py ..... NOT FOUND" | tee -a ${branch}_docsconfig.log
337 if [ -f ./${line}/index.rst ] ; then
338 echo " index.rst ... found" | tee -a ${branch}_docsconfig.log
340 echo " index.rst ... NOT FOUND" | tee -a ${branch}_docsconfig.log
343 if [ -f ./${line}/tox.ini ] ; then
344 echo " tox.ini ..... found" | tee -a ${branch}_docsconfig.log
346 echo " tox.ini ..... NOT FOUND" | tee -a ${branch}_docsconfig.log
349 echo " " | tee -a ${branch}_docsconfig.log
357 ### build a csv table that combines results
361 # csv column #1: project name
364 readarray -t array < ./${repolist};
368 for line in "${array[@]}"
370 reponame=$(echo $line | awk -F "|" '{print $1}');
371 project=$(echo $reponame | sed 's:/.*$::')
372 #echo "DBUG: reponame=${reponame}"
373 #echo "DBUG: project=${project}"
384 # csv column #2: repo name
387 readarray -t array < ./${repolist};
389 csv[i]="${csv[i]},MASTER repo name"
391 for line in "${array[@]}"
393 reponame=$(echo $line | awk -F "|" '{print $1}');
394 csv[i]="${csv[i]},${reponame}"
402 # csv column #3: repo state
405 readarray -t array < ./${repolist};
407 csv[i]="${csv[i]},MASTER repo state"
409 for line in "${array[@]}"
411 repostate=$(echo $line | awk -F "|" '{print $2}');
412 csv[i]="${csv[i]},${repostate}"
420 # csv column #4: clone message
423 readarray -t array < ./${branch}_repoclone.log;
425 csv[i]="${csv[i]},${branch_upper} clone message"
427 for line in "${array[@]}"
429 # repoclone.log format: $1=gitexitcode|$2=reponame|$3=repostate|$4=errormsg
430 errormsg=$(echo $line | awk -F "|" '{print $4}');
431 csv[i]="${csv[i]},${errormsg}"
439 # csv column #5: lifecycle state
440 # extracted from the INFO.yaml
443 readarray -t array < ./${repolist};
445 csv[i]="${csv[i]},project lifecycle state"
447 for line in "${array[@]}"
449 reponame=$(echo $line | awk -F "|" '{print $1}');
450 if [ -f ./${reponame}/INFO.yaml ] ; then
451 # check if repo/branch has a INFO.yaml
452 lifecycleproject=$(grep '^project: ' ./${reponame}/INFO.yaml | awk -F ":" '{print $2}' | sed 's:^ ::' | sed "s:'::g" | tr '[:upper:]' '[:lower:]' | sed 's/\r$//')
453 lifecyclestate=$(grep '^lifecycle_state: ' ./${reponame}/INFO.yaml | awk -F ":" '{print $2}' | sed 's:^ ::' | sed "s:'::g" | tr '[:upper:]' '[:lower:]' | sed 's/\r$//')
454 elif [ ${branch} != "master" ] && [ -f ../master/${reponame}/INFO.yaml ] ; then
455 # if current branch is not master AND if info.yaml not found in the current repo/branch THAN use INFO.yaml of repo/master if available
456 #echo "DBUG: branch=${branch} - checking master for INFO.yaml"
457 lifecycleproject=$(grep '^project: ' ../master/${reponame}/INFO.yaml | awk -F ":" '{print $2}' | sed 's:^ ::' | sed "s:'::g" | tr '[:upper:]' '[:lower:]' | sed 's/\r$//')
458 lifecyclestate=$(grep '^lifecycle_state: ' ../master/${reponame}/INFO.yaml | awk -F ":" '{print $2}' | sed 's:^ ::' | sed "s:'::g" | tr '[:upper:]' '[:lower:]' | sed 's/\r$//')
459 lifecyclestate="(${lifecyclestate})"
461 lifecyclestate="INFO.yaml not found"
463 #echo "DBUG: working dir is ...";pwd
464 #echo "DBUG: lifecycleproject=${lifecycleproject}"
465 #echo "DBUG: lifecyclestate=${lifecyclestate}"
466 csv[i]="${csv[i]},${lifecyclestate}"
471 unset lifecycleproject
475 # csv column #6: RELEASE component (yes|maybe|unknown)
476 # to be filled with values of the planned release config file maintained by
477 # the onap release manager
480 # repoclone.log format: $1=gitexitcode|$2=reponame|$3=repostate|$4=errormsg
481 readarray -t array < ./${branch}_repoclone.log;
483 csv[i]="${csv[i]},${branch_upper} component"
485 for line in "${array[@]}"
488 # repoclone.log format: $1=gitexitcode|$2=reponame|$3=repostate|$4=errormsg
489 gitexitcode=$(echo $line | awk -F "|" '{print $1}');
490 reponame=$(echo $line | awk -F "|" '{print $2}');
491 repostate=$(echo $line | awk -F "|" '{print $3}');
492 errormsg=$(echo $line | awk -F "|" '{print $4}');
494 if [[ ${repostate} == "ACTIVE" && ${gitexitcode} == "0" ]]; then
495 releasecomponent="yes"
496 elif [ ${repostate} == "ACTIVE" ]; then
497 #elif [[ ${repostate} == "ACTIVE" && ${gitexitcode} == "128" ]]; then
498 releasecomponent="maybe"
499 elif [[ ${repostate} == "READ_ONLY" && ${gitexitcode} == "0" ]]; then
500 releasecomponent="yes"
501 elif [ ${repostate} == "READ_ONLY" ]; then
502 releasecomponent="maybe"
504 releasecomponent="unknown"
507 csv[i]="${csv[i]},${releasecomponent}"
516 unset releasecomponent
519 # csv column #7: docs (at repo root directory only; no recursive search!)
520 # csv column #8: conf.py
521 # csv column #9: tox.ini
522 # csv column #10: index.rst
524 # columns are filled with values from requested branch.
525 # if data is not available values from master branch are used.
526 # to identify master branch values, data is put into round brackets "(...)"
529 readarray -t array < ./${repolist};
531 csv[$i]="${csv[i]},docs,conf.py,tox.ini,index.rst"
533 for line in "${array[@]}"
535 line=$(echo $line | sed 's:|.*$::')
536 #echo "DBUG: line=${line}"
540 if [ -d ./${line}/docs ] ; then
542 elif [ -d ../master/${line}/docs ] ; then
549 if [ -f ./${line}/docs/conf.py ] ; then
550 docs="${docs},conf.py"
551 elif [ -f ../master/${line}/docs/conf.py ] ; then
552 docs="${docs},(conf.py)"
557 # tox.ini (check docs dir and also check project root dir)
558 if [ -f ./${line}/docs/tox.ini ] || [ -f ./${line}/tox.ini ]; then
559 docs="${docs},tox.ini"
560 # tox.ini @ branch/docs dir
561 if [ -f ./${line}/docs/tox.ini ] ; then
564 # tox.ini @ branch/project root dir
565 if [ -f ./${line}/tox.ini ] ; then
568 elif [ -f ../master/${line}/docs/tox.ini ] || [ -f ../master/${line}/tox.ini ]; then
569 docs="${docs},(tox.ini"
570 # tox.ini @ master/docs dir
571 if [ -f ../master/${line}/docs/tox.ini ] ; then
574 # tox.ini @ master/project root dir
575 if [ -f ../master/${line}/tox.ini ] ; then
578 # just add a round bracket at the end of the value
581 # no tox.ini found in docs or root dir
586 if [ -f ./${line}/docs/index.rst ] ; then
587 docs="${docs},index.rst"
588 elif [ -f ../master/${line}/docs/index.rst ] ; then
589 docs="${docs},(index.rst)"
594 #echo "DBUG: docs=${docs}"
595 line="${csv[i]},${docs}"
604 # csv column #11: index.html@RTD accessibility check
605 # csv column #12: index.html url
608 readarray -t array < ./${branch}_repoclone.log;
610 csv[i]="${csv[i]},index.html@RTD,index.html url"
612 for line in "${array[@]}"
614 # repoclone.log format: $1=gitexitcode|$2=reponame|$3=repostate|$4=errormsg
615 gitexitcode=$(echo $line | awk -F "|" '{print $1}');
616 reponame=$(echo $line | awk -F "|" '{print $2}');
617 repostate=$(echo $line | awk -F "|" '{print $3}');
618 errormsg=$(echo $line | awk -F "|" '{print $4}');
623 # this script works only with release "frankfurt" and later because
624 # earlier releases are using submodule structure for documentation files
625 if echo "$branch" | grep -q '^[abcde]'; then
626 curl_result="unsupported release"
630 # we are working on "frankfurt" branch or later ...
631 if [[ ${repostate} == "ACTIVE" ]] || [[ ${repostate} == "READ_ONLY" ]]; then
633 # OPTIONAL: USE ALSO GITEXITCODE AS A FILTER CRITERIA ???
636 # important! only doc project needs a different url base
637 if [[ ${reponame} == "doc" ]]; then
638 url_start="https://docs.onap.org"
640 url_start="https://docs.onap.org/projects/onap"
645 # "master" branch documentation is available as "latest" in RTD
646 if [[ ${url_branch} == "master" ]]; then
650 # replace all / characters in repo name with - charachter
651 url_repo=$(echo ${reponame} | sed -r 's/\//-/g')
652 url_file="index.html"
655 if [[ ${reponame} == "doc" ]]; then
656 # build the full url for the doc project
657 url="${url_start}/${url_lang}/${url_branch}/${url_file}"
659 # build the full url for the other projects
660 url="${url_start}-${url_repo}/${url_lang}/${url_branch}/${url_file}"
662 #echo "DBUG: url=$url"
664 # test accessibility of url
665 curl --head --silent --fail "${url}?${unique}" >/dev/null
668 # convert numeric results to text
669 if [ "${curl_result}" = "0" ]; then
670 curl_result="accessible"
671 elif [ "${curl_result}" = "22" ]; then
672 curl_result="does not exist"
674 curl_result="ERROR:${curl_result}"
677 # url does not exist for this branch.
678 # in case the requested url is not already for "master" branch,
679 # we try to access the url of the master branch and denote the
680 # result by using round brackets (result)
681 if [[ ${curl_result} == "does not exist" && ! $branch == "master" ]]; then
683 # build the full (master/latest) url
684 url="${url_start}-${url_repo}/${url_lang}/latest/${url_file}"
685 #echo "DBUG: url=$url"
687 # test accessibility of url in "master branch" (latest)
688 curl --head --silent --fail "${url}?${unique}" >/dev/null
690 # denote result as a value from "master" branch (latest)
693 # convert numeric results to text
694 if [ "${curl_result}" = "0" ]; then
695 curl_result="(accessible)"
696 elif [ "${curl_result}" = "22" ]; then
697 curl_result="(does not exist)"
699 curl_result="(ERROR:${curl_result})"
704 # repostate IS NOT ACTIVE OR READ_ONLY - no curl test required
710 echo "$url ... $curl_result"
711 csv[i]="${csv[i]},${curl_result},${url}"
712 #echo "DBUG: csv line=${csv[i]}"
718 # csv column #13: release notes
721 readarray -t array < ../${repolist};
723 csv[i]="${csv[i]},release notes"
725 for line in "${array[@]}"
727 line=$(echo $line | sed 's:|.*$::')
728 #echo "DBUG: line=\"${line}\""
732 # put repo name in square brackets for increased grep hit rate
733 # escape minus and bracket characters to avoid problems with the grep command
734 #repo_grepable=$(echo ${line} | sed -r s:${line}:[${line}]: | sed -r 's/-/\\-/g' | sed -r 's/\[/\\[/g' | sed -r 's/\]/\\]/g')
735 #echo "DBUG: repo_grepable=\"${repo_grepable}\""
737 # check if repo dir exists in this branch
738 if [ -d ./${line} ] ; then
739 # if yes, check if repo name appears in the branch releasenotes.log
740 relnote=$(find "./${line}" -type f | grep 'release.*note.*.rst' | wc -l);
741 #echo "DBUG: relnote=${relnote}"
742 # repo dir DOES NOT exist in this branch - so check if repo dir exists in MASTER branch
743 elif [ -d ../master/${line} ] ; then
744 # if yes, check if repo name appears in the MASTER releasenotes.log
745 # count release notes files in MASTER branch (in repo root and its subdirectories)
746 relnote=$(find "../master/${line}" -type f | grep 'release.*note.*.rst' | wc -l);
747 #echo "DBUG: relnote=${relnote}"
748 # put results in round brackets to show that this is MASTER data
749 relnote=$(echo ${relnote} | sed -r s:${relnote}:\(${relnote}\):)
753 #echo "DBUG: relnote=${relnote}"
755 line="${csv[i]},${relnote}"
766 # build the table.csv file
771 echo "$i" | tee -a ./${branch}_table.csv
775 # create data package for this branch and zip it
778 datadir=${branch}_data
780 cp $repolist $datadir
781 cp ${branch}_table.csv $datadir
782 cp ${branch}_*.log $datadir
783 zip -r ${datadir}.zip $datadir
785 # return from the branch directory
788 # return and work on the next requested branch ... or exit