release-notes/devel/xmlbeats beatconvert,1.5,1.6

Tue Apr 3 03:17:27 UTC 2007

Author: pfrields

Update of /cvs/docs/release-notes/devel/xmlbeats
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv25890

Modified Files:
	beatconvert 
Log Message:
Fix beatconvert script to only pull DocBook from our wiki.  It's ugly but it's what we have for now.


Index: beatconvert
===================================================================
RCS file: /cvs/docs/release-notes/devel/xmlbeats/beatconvert,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6

--- beatconvert	15 Nov 2006 02:19:01 -0000	1.5
+++ beatconvert	3 Apr 2007 03:17:25 -0000	1.6
@@ -1,160 +1,52 @@
 #!/usr/bin/python -tt
 #
-# (C) 2006 Paul W. Frields.
+# (C) 2007 Paul W. Frields.
 # This file is licensed under the GNU General Public License (GPL) v2.
 
-import os
-import sys
+import os, sys
 import urllib
 import httplib
-from getpass import getpass
 from time import sleep
+from optparse import OptionParser
 
-"""Take a list of Beats from the official Fedora Project wiki,
-download them, and convert to DocBook using a properly outfitted
-MoinMoin wiki."""
-
-### Globals
-myName = "beatconvert"
-debugging = True
-
-beatSite = "http://fedoraproject.org/wiki/"
-beatDir  = "Docs/Beats/"
-beatUrl  = beatSite + beatDir
-
-convSite = "http://fedora-test.fedoraproject.org/"
-convDir  = "fedora-docs/Docs/Beats/"
-convUrl  = convSite + convDir
-
-beatFolder = 'Beats'
-waitTime = 15
-######
-
-def print_usage():
-    print "Usage:", myName, "<infile> [wait_time]"
-    print "\t<infile>:  list of Beats, one per line"
-    print "\twait_time: optional time to wait between beats (default: 15s)"
-
-
-if len(sys.argv) < 2:
-    print_usage()
-    sys.exit(1)
-    
-print "Using", beatUrl, "->", convUrl
+"""Take a list of pages from a Moin wiki and convert them to DocBook."""
 
-try:
-    beatFile = open(sys.argv[1], "r")
-except:
-    print "Problem opening file", sys.argv[1]
-    sys.exit(2)
+parser = OptionParser()
+parser.add_option("-d", "--output-directory", dest="outdir",
+                  default=os.getcwd(),                  
+		  help="Output files to directory DIR", metavar="DIR")
+parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+                  help="Use verbose logging", default=False)
+parser.add_option("-i", "--input-file", dest="infname", default="",
+                  help="Take list of pages from file FILE", metavar="FILE")
+parser.add_option("-u", "--url", dest="urlbase",
+                  default="http://fedoraproject.org/wiki/Docs/Beats/",
+                  help="Use URL as base for pages to fetch", metavar="URL")
+parser.add_option("-p", "--pause", dest="pausetime", default=0.5,
+                  help="Wait SEC seconds between fetches", metavar="SEC")
+(opts, args) = parser.parse_args()
 
 try:
-    waitTime = sys.argv[2]
-    # Otherwise the default is fine, although it takes a while
+    infile = open(opts.infname, "r")
 except:
-    pass
-
-beatList = [beat.rstrip('\n') for beat in beatFile.readlines()]
-beatFile.close()
+    print "Can't open input file", opts.infname
+    sys.exit(-2)
 
-beatData = {}
-for beat in beatList:
-    inUrl = ''.join((beatUrl, beat))
-    outUrl = ''.join((convUrl, beat))
-    beatData[beat] = [inUrl, outUrl]
-
-print "Loaded", len(beatList), "beat names"
-
-
-loginName = raw_input('Enter your wiki login name on the DocBook Moin: ')
-password = getpass('Enter your password on the DocBook Moin: ')
-
-conn = httplib.HTTPConnection(convSite[7:][:-1]) # hack to remove http:// /
-
-#    if debugging: conn.debuglevel = 1
-print "Logging in to target wiki..."
-conn.request('POST', '/fedora-docs',
-             urllib.urlencode({'action': 'login',
-                               'login': 'Login',
-                               'name': loginName,
-                               'password': password}))
-print 'Cookie monster want cookie!'
-resp = conn.getresponse()
-
-try:
-    cookie = resp.getheader('Set-Cookie')
-    cookie = cookie[:cookie.find(';')]
-except:
-    inp = raw_input("Couldn't get a cookie.  You can proceed " +
-                    "anonymously, but results may not be\n" +
-                    "guaranteed.  Continue? [y/N]")
-    if inp not in ('y', 'Y'):
-        sys.exit(0)
-
-
-# Make a folder to put the DocBook in.
-try:
-    os.mkdir(beatFolder)
-except OSError:
-    beatFolder = raw_input("Couldn't make a directory 'Beats' in your " +
-                           "current location.\nSpecify a new one: ")
-    if beatFolder is '':
-        print "Quitting then."
-        sys.exit(0)
-    try:
-        os.mkdir(beatFolder)
-    except:
-        print "Failed, bailing."
-        sys.exit(5)
-
-
-for beat, data in beatData.items():
-    print "Working on beat", beat
-    inUrl = data[0]
-    outUrl = data[1]
-    data.append(''.join(urllib.urlopen(inUrl+'?action=raw').readlines()))
-    print "Retrieved data for", beat
-    savetext = data[2]
-    submitStr = urllib.urlencode({'action': 'edit',
-                                  'editor': 'text',
-                                  'button_save': 'Save Changes',
-                                  'savetext': savetext})
-
-    print "Submitting beat", beat, "..."
-    conn.request('POST', ''.join(('/', convDir, beat)), submitStr,
-                 {'Content-Type': 'application/x-www-form-urlencoded',
-                  'User-Agent': 'beatconvert',
-                  'Cookie': cookie})
-    resp = conn.getresponse()
-
-    # Make sure things worked OK so we don't lock ourselves out...
-    if resp.status != 200:
-        print "Submission failed.  If this were a better script,",
-        print "there would be a fallback here."
-        sys.exit(6)
-    print "Submitted", beat
-
-    # Get the goods!
-    convStr = urllib.urlencode({'action': 'RenderAsDocbook'})
-    convUrl = data[1]
-    print "Retrieving XML for", beat, "at", convUrl
-    try:
-        resp = ''.join(urllib.urlopen(''.join((convUrl, '?action=RenderAsDocbook'))).readlines())
-    except:
-        print "Retrieval failed.  If this were a better script,",
-        print "there would be a fallback here."
-        sys.exit(6)
-
-    print "Retrieved", beat
-    outXml = open(os.path.join(beatFolder, beat.replace('/', '')), "w")
-    outXml.write(resp)
+if not os.access(opts.outdir, os.W_OK):
+    print "Can't write to output directory", opts.outdir
+    sys.exit(-3)
+
+pageList = [page.rstrip('\n') for page in infile.readlines()]
+infile.close()
+if opts.verbose: print "Read", len(pageList), "pages"
+
+for page in pageList:
+    sleep(opts.pausetime)
+    if opts.verbose: print "Retrieving", page
+    data = ''.join(urllib.urlopen(opts.urlbase + page +
+                                  "?action=RenderAsDocbook").readlines())
+    outXml = open(os.path.join(opts.outdir, page.replace("/", "")), "w")
+    outXml.write(data)
     outXml.close()
-    print "Wrote XML for", beat
-
-    print "Waiting for", waitTime, "seconds..."
-    sleep(waitTime)
-
-conn.close()
 
-print "*** Finished! ***"
-print "Don't forget to rename and format the XML files with indenting."
+if opts.verbose: print "Finished"