release-notes/devel/xmlbeats beatconvert,1.5,1.6
Paul W. Frields (pfrields)
fedora-docs-commits at redhat.com
Tue Apr 3 03:17:27 UTC 2007
Author: pfrields
Update of /cvs/docs/release-notes/devel/xmlbeats
In directory cvs-int.fedora.redhat.com:/tmp/cvs-serv25890
Modified Files:
beatconvert
Log Message:
Fix beatconvert script to only pull DocBook from our wiki. It's ugly but it's what we have for now.
Index: beatconvert
===================================================================
RCS file: /cvs/docs/release-notes/devel/xmlbeats/beatconvert,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- beatconvert 15 Nov 2006 02:19:01 -0000 1.5
+++ beatconvert 3 Apr 2007 03:17:25 -0000 1.6
@@ -1,160 +1,52 @@
#!/usr/bin/python -tt
#
-# (C) 2006 Paul W. Frields.
+# (C) 2007 Paul W. Frields.
# This file is licensed under the GNU General Public License (GPL) v2.
-import os
-import sys
+import os, sys
import urllib
import httplib
-from getpass import getpass
from time import sleep
+from optparse import OptionParser
-"""Take a list of Beats from the official Fedora Project wiki,
-download them, and convert to DocBook using a properly outfitted
-MoinMoin wiki."""
-
-### Globals
-myName = "beatconvert"
-debugging = True
-
-beatSite = "http://fedoraproject.org/wiki/"
-beatDir = "Docs/Beats/"
-beatUrl = beatSite + beatDir
-
-convSite = "http://fedora-test.fedoraproject.org/"
-convDir = "fedora-docs/Docs/Beats/"
-convUrl = convSite + convDir
-
-beatFolder = 'Beats'
-waitTime = 15
-######
-
-def print_usage():
- print "Usage:", myName, "<infile> [wait_time]"
- print "\t<infile>: list of Beats, one per line"
- print "\twait_time: optional time to wait between beats (default: 15s)"
-
-
-if len(sys.argv) < 2:
- print_usage()
- sys.exit(1)
-
-print "Using", beatUrl, "->", convUrl
+"""Take a list of pages from a Moin wiki and convert them to DocBook."""
-try:
- beatFile = open(sys.argv[1], "r")
-except:
- print "Problem opening file", sys.argv[1]
- sys.exit(2)
+parser = OptionParser()
+parser.add_option("-d", "--output-directory", dest="outdir",
+ default=os.getcwd(),
+ help="Output files to directory DIR", metavar="DIR")
+parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
+ help="Use verbose logging", default=False)
+parser.add_option("-i", "--input-file", dest="infname", default="",
+ help="Take list of pages from file FILE", metavar="FILE")
+parser.add_option("-u", "--url", dest="urlbase",
+ default="http://fedoraproject.org/wiki/Docs/Beats/",
+ help="Use URL as base for pages to fetch", metavar="URL")
+parser.add_option("-p", "--pause", dest="pausetime", default=0.5,
+ help="Wait SEC seconds between fetches", metavar="SEC")
+(opts, args) = parser.parse_args()
try:
- waitTime = sys.argv[2]
- # Otherwise the default is fine, although it takes a while
+ infile = open(opts.infname, "r")
except:
- pass
-
-beatList = [beat.rstrip('\n') for beat in beatFile.readlines()]
-beatFile.close()
+ print "Can't open input file", opts.infname
+ sys.exit(-2)
-beatData = {}
-for beat in beatList:
- inUrl = ''.join((beatUrl, beat))
- outUrl = ''.join((convUrl, beat))
- beatData[beat] = [inUrl, outUrl]
-
-print "Loaded", len(beatList), "beat names"
-
-
-loginName = raw_input('Enter your wiki login name on the DocBook Moin: ')
-password = getpass('Enter your password on the DocBook Moin: ')
-
-conn = httplib.HTTPConnection(convSite[7:][:-1]) # hack to remove http:// /
-
-# if debugging: conn.debuglevel = 1
-print "Logging in to target wiki..."
-conn.request('POST', '/fedora-docs',
- urllib.urlencode({'action': 'login',
- 'login': 'Login',
- 'name': loginName,
- 'password': password}))
-print 'Cookie monster want cookie!'
-resp = conn.getresponse()
-
-try:
- cookie = resp.getheader('Set-Cookie')
- cookie = cookie[:cookie.find(';')]
-except:
- inp = raw_input("Couldn't get a cookie. You can proceed " +
- "anonymously, but results may not be\n" +
- "guaranteed. Continue? [y/N]")
- if inp not in ('y', 'Y'):
- sys.exit(0)
-
-
-# Make a folder to put the DocBook in.
-try:
- os.mkdir(beatFolder)
-except OSError:
- beatFolder = raw_input("Couldn't make a directory 'Beats' in your " +
- "current location.\nSpecify a new one: ")
- if beatFolder is '':
- print "Quitting then."
- sys.exit(0)
- try:
- os.mkdir(beatFolder)
- except:
- print "Failed, bailing."
- sys.exit(5)
-
-
-for beat, data in beatData.items():
- print "Working on beat", beat
- inUrl = data[0]
- outUrl = data[1]
- data.append(''.join(urllib.urlopen(inUrl+'?action=raw').readlines()))
- print "Retrieved data for", beat
- savetext = data[2]
- submitStr = urllib.urlencode({'action': 'edit',
- 'editor': 'text',
- 'button_save': 'Save Changes',
- 'savetext': savetext})
-
- print "Submitting beat", beat, "..."
- conn.request('POST', ''.join(('/', convDir, beat)), submitStr,
- {'Content-Type': 'application/x-www-form-urlencoded',
- 'User-Agent': 'beatconvert',
- 'Cookie': cookie})
- resp = conn.getresponse()
-
- # Make sure things worked OK so we don't lock ourselves out...
- if resp.status != 200:
- print "Submission failed. If this were a better script,",
- print "there would be a fallback here."
- sys.exit(6)
- print "Submitted", beat
-
- # Get the goods!
- convStr = urllib.urlencode({'action': 'RenderAsDocbook'})
- convUrl = data[1]
- print "Retrieving XML for", beat, "at", convUrl
- try:
- resp = ''.join(urllib.urlopen(''.join((convUrl, '?action=RenderAsDocbook'))).readlines())
- except:
- print "Retrieval failed. If this were a better script,",
- print "there would be a fallback here."
- sys.exit(6)
-
- print "Retrieved", beat
- outXml = open(os.path.join(beatFolder, beat.replace('/', '')), "w")
- outXml.write(resp)
+if not os.access(opts.outdir, os.W_OK):
+ print "Can't write to output directory", opts.outdir
+ sys.exit(-3)
+
+pageList = [page.rstrip('\n') for page in infile.readlines()]
+infile.close()
+if opts.verbose: print "Read", len(pageList), "pages"
+
+for page in pageList:
+ sleep(opts.pausetime)
+ if opts.verbose: print "Retrieving", page
+ data = ''.join(urllib.urlopen(opts.urlbase + page +
+ "?action=RenderAsDocbook").readlines())
+ outXml = open(os.path.join(opts.outdir, page.replace("/", "")), "w")
+ outXml.write(data)
outXml.close()
- print "Wrote XML for", beat
-
- print "Waiting for", waitTime, "seconds..."
- sleep(waitTime)
-
-conn.close()
-print "*** Finished! ***"
-print "Don't forget to rename and format the XML files with indenting."
+if opts.verbose: print "Finished"
More information about the Fedora-docs-commits
mailing list