release-notes/FC-6/xmlbeats README, NONE, 1.1 beatconvert, NONE, 1.1 beatlist, NONE, 1.1 files-map.txt, NONE, 1.1 steps-to-convert-FC6.txt, NONE, 1.1 steps-to-convert-v-1.txt, NONE, 1.1 to-do-fc5-errata-notes.txt, NONE, 1.1 to-do-fc5-gold-notes.txt, NONE, 1.1 wikixml2fdpxml, NONE, 1.1 xmlbeats, NONE, 1.1 xmlfix, NONE, 1.1

Paul W. Frields (pfrields) fedora-docs-commits at redhat.com
Sat Oct 28 22:21:56 UTC 2006


Author: pfrields

Update of /cvs/docs/release-notes/FC-6/xmlbeats
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv6395/FC-6/xmlbeats

Added Files:
	README beatconvert beatlist files-map.txt 
	steps-to-convert-FC6.txt steps-to-convert-v-1.txt 
	to-do-fc5-errata-notes.txt to-do-fc5-gold-notes.txt 
	wikixml2fdpxml xmlbeats xmlfix 
Log Message:
Add FC-6 branch. This branch should be used for errata updates, not devel -- although syncing them is fine.


--- NEW FILE README ---
Edit 'beatlist' to specify which wiki file names to convert to XML, then run './xmlbeats'.


--- NEW FILE beatconvert ---
#!/usr/bin/python -tt
#
# (C) 2006 Paul W. Frields.
# This file is licensed under the GNU General Public License (GPL) v2.

import os
import sys
import urllib
import httplib
from getpass import getpass
from time import sleep

"""Take a list of Beats from the official Fedora Project wiki,
download them, and convert to DocBook using a properly outfitted
MoinMoin wiki."""

### Globals
myName = "beatconvert"
debugging = True

beatSite = "http://fedoraproject.org/wiki/"
beatDir  = "Docs/Beats/"
beatUrl  = beatSite + beatDir

convSite = "http://fedora-test.fedoraproject.org/"
convDir  = "fedora-docs/Docs/Beats/"
convUrl  = convSite + convDir

beatFolder = 'Beats'
waitTime = 15
######

def print_usage():
    print "Usage:", myName, "<infile>"
    print "\t<infile>: list of Beats, one per line"


if len(sys.argv) < 2:
    print_usage()
    sys.exit(1)
    
print "Using", beatUrl, "->", convUrl

try:
    beatFile = open(sys.argv[1], "r")
except:
    print "Problem opening file", sys.argv[1]
    sys.exit(2)

beatList = [beat.rstrip('\n') for beat in beatFile.readlines()]
beatFile.close()

beatData = {}
for beat in beatList:
    inUrl = ''.join((beatUrl, beat))
    outUrl = ''.join((convUrl, beat))
    beatData[beat] = [inUrl, outUrl]

print "Loaded", len(beatList), "beat names"


loginName = raw_input('Enter your wiki login name on the DocBook Moin: ')
password = getpass('Enter your password on the DocBook Moin: ')

conn = httplib.HTTPConnection(convSite[7:][:-1]) # hack to remove http:// /

#    if debugging: conn.debuglevel = 1
print "Logging in to target wiki..."
conn.request('POST', '/fedora-docs',
             urllib.urlencode({'action': 'login',
                               'login': 'Login',
                               'name': loginName,
                               'password': password}))
print 'Cookie monster want cookie!'
resp = conn.getresponse()

try:
    cookie = resp.getheader('Set-Cookie')
    cookie = cookie[:cookie.find(';')]
except:
    inp = raw_input("Couldn't get a cookie.  You can proceed " +
                    "anonymously, but results may not be\n" +
                    "guaranteed.  Continue? [y/N]")
    if inp not in ('y', 'Y'):
        sys.exit(0)


# Make a folder to put the DocBook in.
try:
    os.mkdir(beatFolder)
except OSError:
    beatFolder = raw_input("Couldn't make a directory 'Beats' in your " +
                           "current location.\nSpecify a new one: ")
    if beatFolder is '':
        print "Quitting then."
        sys.exit(0)
    try:
        os.mkdir(beatFolder)
    except:
        print "Failed, bailing."
        sys.exit(5)


for beat, data in beatData.items():
    print "Working on beat", beat
    inUrl = data[0]
    outUrl = data[1]
    data.append(''.join(urllib.urlopen(inUrl+'?action=raw').readlines()))
    print "Retrieved data for", beat
    savetext = data[2]
    submitStr = urllib.urlencode({'action': 'edit',
                                  'editor': 'text',
                                  'button_save': 'Save Changes',
                                  'savetext': savetext})

    print "Submitting beat", beat, "..."
    conn.request('POST', ''.join(('/', convDir, beat)), submitStr,
                 {'Content-Type': 'application/x-www-form-urlencoded',
                  'User-Agent': 'beatconvert',
                  'Cookie': cookie})
    resp = conn.getresponse()

    # Make sure things worked OK so we don't lock ourselves out...
    if resp.status != 200:
        print "Submission failed.  If this were a better script,",
        print "there would be a fallback here."
        sys.exit(6)
    print "Submitted", beat

    # Get the goods!
    convStr = urllib.urlencode({'action': 'RenderAsDocbook'})
    convUrl = data[1]
    print "Retrieving XML for", beat, "at", convUrl
    try:
        resp = ''.join(urllib.urlopen(''.join((convUrl, '?action=RenderAsDocbook'))).readlines())
    except:
        print "Retrieval failed.  If this were a better script,",
        print "there would be a fallback here."
        sys.exit(6)

    print "Retrieved", beat
    outXml = open(os.path.join(beatFolder, beat.replace('/', '')), "w")
    outXml.write(resp)
    outXml.close()
    print "Wrote XML for", beat

    print "Waiting for", waitTime, "seconds..."
    sleep(waitTime)

conn.close()

print "*** Finished! ***"
print "Don't forget to rename and format the XML files with indenting."


--- NEW FILE beatlist ---
Welcome
OverView
Feedback
Installer
ArchSpecific
ArchSpecific/PPC
ArchSpecific/x86
ArchSpecific/x86_64
PackageNotes
Kernel
Desktop
FileSystems
WebServers
Devel
Devel/Runtime
Devel/Tools
Devel/Tools/GCC
Security
Security/SELinux
Java
Multimedia
Entertainment
Virtualization
Xorg
DatabaseServers
I18n
BackwardsCompatibility
PackageChanges
Extras
Legacy
ProjectOverview
Colophon


--- NEW FILE files-map.txt ---
# map of how XML files in the release-notes module interact

RELEASE-NOTES-*.xml
    fdp-info-*.xml
        ../../docs-common/common/legalnotice-relnotes-*.xml
    Welcome-*.xml
    OverView-*.xml
    ../../docs-common/common/legalnotice-*.xml
    Feedback-*.xml
    Introduction-*.xml
    Installer-*.xml
    ArchSpecific-*.xml
        ArchSpecificPPC-*.xml
        ArchSpecificx86-*.xml
        ArchSpecificx86_64-*.xml
    Networking-*.xml
    PackageNotes-*.xml
        ServerTools-*.xml
        PackageNotesJava-*.xml
        Kernel-*.xml
        Security-*.xml
            SecuritySELinux-*.xml
        DevelopmentTools-*.xml
            DevelopmentToolsJava-*.xml
            DevelopmentToolsGCC-*.xml
        I18n-*.xml
        Printing-*.xml
        DatabaseServers-*.xml
        Multimedia-*.xml
        WebServers-*.xml
        Samba-*.xml
        Xorg-*.xml
        Entertainment-*.xml
    Legacy-*.xml
    PackageChanges-*.xml
    ProjectOverview-*.xml
    Colophon-*.xml

# Unused, but maybe we should use/

BackwardsCompatibility-*.xml
Desktop-*.xml
FileSystems-*.xml
FileServers-*.xml
SystemDaemons-*.xml






--- NEW FILE steps-to-convert-FC6.txt ---

0. Remove any existing "Beats" or "Fixed" folder in the local
   directory, and make sure the "beatlist" file contains all the beats
   you need to pull.  Include the parents of any sub-beats!


1. Use beatconvert to pull the beats from the main wiki to the test
   wiki and convert to XML.  **This will take several minutes because
   the test wiki disallows access if you hit it too often within a
   certain (unknown) span of time.  Pay attention and Ctrl+\ to quit
   if needed!**

   ./beatconvert beatlist

   The results are in the "Beats" folder.


2. Run xmlfix to get the section ID's and titles fixed.

   ./xmlfix Beats/

   The results are in the "Fixed" folder.


3. Edit, copy to the relnotes devel/en_US folder, and commit.  Don't
   forget to "make pot" also!


PWF, 2006-09-24


--- NEW FILE steps-to-convert-v-1.txt ---

1. Use xmlbeats to get the content local

2. Convert filenames that need it to match the WikiName
   GCC.xml => DevelopmentToolsGCC.xml
   ...

3. Run xmlformat on all the Beats
   cd release-notes/xmlbeats/Beats
   for i in *.xml; 
     do 
     # Run xmlformat with the nifty config file
     xmlformat -f /path/to/xmlformat-fdp.conf $i > tmpfile; 
     mv tmpfile $i; done

4. Remove the <?xml>, <DOCTYPE>, <article />, and <articleinfo />
   contents from each file.

5. Run xmldiff to get a diff; do not use -p (default) as the colored
   output is icky when piped to a file.

   cd release-notes/xmlbeats/Beats
   mkdir ../diffs
   for i in *.xml; 
     do 
     # Get a mirror of the file name without extension
     echo $i | sed 's/\.xml//' > tmpname;
     # Format "oldfile newfile"
     #   oldfile == XML in CVS
     #   newfile == XML from Wiki
     xmldiff -u ../../`cat tmpname`-en.xml $i > ../diffs/`cat tmpname`.diff;
   done


--- NEW FILE to-do-fc5-errata-notes.txt ---

1. Update parent XML to call all beats in a flat namespace to match
   the wiki Docs/Beats page.  DONE

2. Add top-level sn-BeatName ID attributes for each file. 

3. Fix all admonition tables
   - fix table, or
   - make a proper admonition  

4. Fix missing version number:  

   http://fedoraproject.org/wiki/Docs/Beats?action=fullsearch&context=180&value=GetVal&fullsearch=Text#preview

   grep "Core  " *xml
   grep "Core ." *xml

5. Search all <screen> tags and fix the line breaks; may require
   injection of fresh content  
   - look for solo-list elements surrounding <screen>
     grep -B2 "<screen>" *.xml | grep listitem  

6. Look for unnecessary linebreaks around <code/>, it is being
  treated as a block.  Is this from xmlformat or the wiki output?

7. Watch for over sub-sectioning
   - have to build to notice?

8. When done, grep all XML files for:
   grep "code> ," *xml
   grep "code> ." *xml
   grep "Core  " *xml
   grep "Core ." *xml
   grep "Core ," *xml

## non-essential

8. Figure out how to have a @@RELNAME@@ variable.

9. Add in the release name?


?. Add call to every file to ../locale-entities.xml - scriptable NOT
   NEEDED 

X. Update .pot file? AUTOMATIC
 


## to-do -- Clean-up for the Wiki

1. Change all titles to not follow format of Docs/Beats/BeatName

2. Flatten the sub-sections a bit, where needed, avoiding orphaned
   sections



--- NEW FILE to-do-fc5-gold-notes.txt ---

1. Update parent XML to call all beats in a flat namespace to match
   the wiki Docs/Beats page.  DONE

2. Add top-level sn-BeatName ID attributes for each file. DONE

3. Fix all admonition tables
   - fix table, or
   - make a proper admonition  DONE

4. Fix missing version number:  DONE

   http://fedoraproject.org/wiki/Docs/Beats?action=fullsearch&context=180&value=GetVal&fullsearch=Text#preview

   grep "Core  " *xml
   grep "Core ." *xml

5. Search all <screen> tags and fix the line breaks; may require
   injection of fresh content  DONE
   - look for solo-list elements surrounding <screen>
     grep -B2 "<screen>" *.xml | grep listitem  

6. Watch for over sub-sectioning
   - have to build to notice?

7. Figure out how to have a @@RELNAME@@ variable.

8. Add in in the release name.


?. Add call to every file to ../locale-entities.xml - scriptable NOT
   NEEDED 

X. Update .pot file? AUTOMATIC
 


Clean-up for the Wiki

1. Change all titles to not follow format of Docs/Beats/BeatName

2. Flatten the sub-sections a bit, where needed, avoiding orphandd
   sections



--- NEW FILE wikixml2fdpxml ---
#!/bin/bash
#
# This file can be completely replaced with a better tool written in 
# $LANGUAGE of someone's choice
#
# Original shell script - 29-Jan-2005
# kwade at redhat.com

# Manually rename some files to include their wiki namespace
#echo "Renaming Wiki files."
#mv Beats/PPC.xml Beats/ArchSpecificPPC.xml
#mv Beats/x86_64.xml Beats/ArchSpecificx86_64.xml
#mv Beats/x86.xml Beats/ArchSpecificx86.xml
#mv Beats/GCC.xml Beats/DevelToolsGCC.xml
#mv Beats/SELinux.xml Beats/SecuritySELinux.xml
#echo "Finished renaming files."

# Fix the DocType header from bad Wiki output
#ls Beats/ > xmlfiles
#for i in `cat xmlfiles`;
#do
#  sed s'/DocBook V4\.4/DocBook XML V4\.4/g' Beats/$i > tmpfile;
#  mv tmpfile Beats/$i;
#  echo "DOCTYPE header fixed for" $i
#done
#rm xmlfiles
#echo "Done"


# Add the base language extension to the files
#ls Beats/ > xmlfiles
#for i in `cat xmlfiles`;
#  do
#  echo $i | sed 's/.xml/-en.xml/g' > newfilename;
#  mv Beats/$i Beats/`cat newfilename`;
#done
#rm xmlfiles newfilename
#echo "done"

# Right here is where we want to call perl-fu or python-fu
# to follow this pseudo-code
# 
# for each(<section>); 
#  do
#    get(contents of <title></title>) == $title;
#    replace(" " with "-") == $idattrib;
#    insert($idattrib) -> <section id="$idattrib">;
#  done

# We need to convert the targets of XREFs somehow

# This script uses the FDP implementation of xmldiff
# found in cvs.fedora:/cvs/docs/docs-common/bin/
#
# This script expects to be run in-place in
# the release-notes/xmlbeats module, as the paths
# are relative to that point
#
# $Id:
#
# First version kwade at redhat.com 2006-01-04 -- 0.1

# Variables
#XMLDIFF="../../docs-common/bin/xmldiff"
#XMLDIFF_OPTIONS="-p" # colored unified diff
#BEATPATH="./Beats"
#DBPATH=".."
#FILEEXT="*xml"

# Actions
# Run xmldiff against the beat and canonical XML

#for i in $BEATPATH/$FILEEXT;
#  do $XMLDIFF $XMLDIFF_OPTIONS $i

# Move the XML to the build directory
# mv Beats/*.xml ../

# Fix section names for the top-level
for i in `ls *.xml`;
  do
  echo $i | sed 's/\.xml//' > snID;
  echo "Section name sn-"`cat snID`" for "`echo $i`;
  sed 's/  <\/articleinfo>\n
  <section>/  <\/articleinfo>
  <section id="`cat snID`">/' $i > tmpfile;
  mv tmpfile $i;
  echo $i" has a new section id";
done



--- NEW FILE xmlbeats ---
#!/bin/sh

WIKIURL="http://fedoraproject.org/wiki/"
CONVERTERURL="http://www.n-man.com/moin2docbook.htm"
PAGES="`cat beatlist`"

rm -rf Beats
mkdir -p Beats

for PAGEBASE in $PAGES; do
	PAGENAME="Docs/Beats/${PAGEBASE}"
	PAGEENCODED="`echo "$PAGENAME" | sed 's/\//%2F/g' | sed 's/:/%3A/g'`"
	PAGEOUT="Beats/`echo "${PAGEBASE}" | sed "s/\///g"`.xml"
	echo -en "\"${PAGENAME}\" => \"${PAGEOUT}\"..."
	wget -q "${CONVERTERURL}?submit=submit&url=${WIKIURL}${PAGEENCODED}%3Faction=raw" -O "${PAGEOUT}"
	sed -i 's/DocBook V4\.4/DocBook XML V4\.4/g' "${PAGEOUT}"
        xmlformat -f ../../docs-common/bin/xmlformat-fdp.conf $i > tmpfile
        mv tmpfile $i
	echo -en " done.\n"
done



--- NEW FILE xmlfix ---
#!/bin/bash
#
# Take the output files from the "beatconvert" script and make them
# palatable for updating CVS.
#

print_usage() {
    echo "Usage: $0 [<beatdir>] [<docs-common-dir>]"
    echo "  <beatdir>: folder containing conversions of Beats (default $PWD)"
    echo "  <docs-common-dir>: location of docs-common module"
}

case $1 in
    -h|--help )
	print_usage && exit 0
	;;
    * )
	;;
esac

BEATDIR="$1"

[ $# -lt 2 ] && COMMON="../../../docs-common" || COMMON="$2"
if [ ! -d "$COMMON" ]; then
    echo "$COMMON is not the docs-common you're looking for."
    exit 1
fi

XMLFORMAT="${COMMON}/bin/xmlformat"
XMLFORMATCONF="${COMMON}/bin/xmlformat-fdp.conf"

[ -f "$XMLFORMAT" ] || ( echo "No xmlformat found" && exit 1 )
[ -f "$XMLFORMATCONF" ] || (echo "No xmlformat-fdp.conf found" && exit 1 )

mkdir -p Fixed/ || ( echo "Couldn't make a directory here" && exit 1 )

for i in $BEATDIR/*; do
    OUTXML="$(basename "$i").xml"
    eval "$XMLFORMAT -f $XMLFORMATCONF ${i} > $OUTXML"
    sed -i 's@<\(/\)\?article>@<\1section>@' $OUTXML
    sed -i 's@<!DOCTYPE article@<!DOCTYPE section@' $OUTXML

    # Yes this is hacky. So sue me.
    sed -i "s@^<section@<section id=\"sn-${i}\"@" $OUTXML
    NEWTITLE=$(grep 'RAW HTML:' $OUTXML | sed 's at .\+<h2>\([^<]\+\).\+@\1@')
    sed -i "s@^  <title>.\+@  <title>${NEWTITLE}</title>@" $OUTXML
done




More information about the Fedora-docs-commits mailing list