#! /usr/bin/python # This program is released under the GPLv2, please see: # http://www.gnu.org/copyleft/gpl.html # The original author is David Timms ;) # archive-teleconstructor.py # version 2007-06-01 v0.04 from __future__ import division import sys, getopt, string, signal, os def signal_handler(signalRx, frame): if signalRx==signal.SIGINT: print " user interrupt with ctrl-c. exiting..." sys.exit(0) elif signalRx==signal.SIGALRM: # as 1 second timer global showStats # otherwise assigning to a global variable name is # actually creating a new local variable showStats = True signal.alarm(1) #seconds def help_message(): print ''' archive-teleconstructor.py Copyright (c) 2007 David Timms -h --help display this help message -v --version display the version information Parameters to be used on a machine with the original iso image: --iso-source= iso image file to decompose --dump-iso-info create an iso-info structure dump Parameters to be used to locally reconstruct the iso image: -d --file-iso-dest= iso image file to construct -c --createiso {future} build an empty iso image at file-iso-dest --file-iso-info structure dump of the iso-source using dump-iso-info -s --directory-source= locations of files known to be on the iso image can be specified multiple times --file-sha1sum {future} sha1sum results to check files against --verify-files check for files in iso-info structure in the list of directory-sources provided --insert-files insert files from directory-sources into the iso image file-iso-dest, as determined from dump-iso-info Parameters for testing purposes: --correlate compare the iso-source with file-iso-dest, providing stats during the process --debug print progress messages to help with debugging ''' sys.exit(0) def debug_message(message): if debug: print message def create_iso(): print '\n create_iso:' if os.path.isfile(fileIsoDest): print ' destfile [' + fileIsoDest + '] already exists. exiting...' sys.exit(0) else: print "creating iso..." block2k='' for char in range(1,2048): block2k = block2k + '\x00' try: try: fileDest = open(fileIsoDest, 'wb') # l8r need to infer fileBlocks from maybe .torrent or ftp listing # note that insert-files will extend an existing or grow a new file fileBlocks = 293000 debug_message('\n Takes about 66second for 3GB, del in 8 secs.') for blockNo in range(1,fileBlocks): fileDest.write(block2k) #later dude print "." size = fileDest.tell() debug_message(" ;) empty iso: size=" + str(size)) finally: ## fileDest.close(); pass except IOError: print ' something went wrong[' + fileIsoDest + ']' ## fileDest.close; sys.exit(0) def dump_iso(): print ' dump_iso:' if len(fileIsoSource): isoInfoCommand="/usr/bin/iso-info -q -l -i " + fileIsoSource + ">" + fileIsoSource + ".iso-info.txt" debug_message(" isoInfoCommand=" + isoInfoCommand) stdin, stdout, stderr = os.popen3(isoInfoCommand) infoResult = stdout.read() infoErrors = stderr.read() stdin.close(); stdout.close(); stderr.close() debug_message(infoResult) debug_message(infoErrors) print " dump_iso: file generated at: " + fileIsoSource + ".iso-info.txt" else: print ' error:dump_iso: you must also specify --iso-source=`/path/to/your_iso.iso`' sys.exit(0) def extract_file_info(): print ' extract_file_info:' try: try: fileIsoInfoContent = open(fileisoinfo, 'r') isoInfo = fileIsoInfoContent.readlines() splitline = [] for line in isoInfo: splitline = line.split # print splitline() if len(splitline())==1: (foldersplit) = splitline() folderstr=foldersplit[0] if folderstr[0]=='/' and folderstr[len(folderstr)-1]==':': folder = folderstr[0:len(folderstr)-1] print folder elif len(splitline())==12: # was 13 # lsn = logical sector number (permissionssplit, dummy, dummy, dummy, dummy, lsnsplit, \ size, d, d, d, d, filename) = splitline() permissionsstr = permissionssplit[0] if permissionsstr[0]=='-': # entry is a file # print permissionssplit, ' ', lsnsplit, ' ', size, ' ', filename if lsnsplit[len(lsnsplit)-1]==']': # remove right ] lsn = lsnsplit[0:len(lsnsplit)-1] debug_message(folder + ' * ' + filename + ' * ' + size + ' * ' + lsn) fileInfo.append((folder, filename, size, lsn)) # dont forget to shasum later! else: pass finally: fileIsoInfoContent.close(); debug_message(" extract_file_info:finally") except IOError: print ' error: extract_file_info: trouble with file [' + fileIsoInfo + ']' sys.exit(0) def file_find_matching(item): # print ' file_find_matching' foundlocation = '' # item is (path, filename, size, lsn) debug_message(item[0]+item[1]+' '+item[2]) debug_message('++++++++++++++dirSources=') debug_message(dirSources) print ' searching for file [' + item[1] + ']' for location in dirSources: # print 'item=' + item[0]+ item[1] debug_message('---------location=') debug_message(location) testfilepaths=[] testfilepaths.append(location + item[1]) testfilepaths.append(location + item[0] + item[1]) debug_message(testfilepaths) for testfilepath in testfilepaths: debug_message(testfilepath) if os.path.isfile(testfilepath): if os.path.getsize(testfilepath)==long(item[2]): #compare longs foundlocation = testfilepath return foundlocation else: print ' * file had different size: ' + testfilepath + ' size=' + str(os.path.getsize(testfilepath)) + 'refsize=' + item[2] pass else: debug_message(' & filepath not found: ' + testfilepath) pass debug_message("------" +foundlocation) return foundlocation def insert_file(entry, filepath): print ' insert_file:' if os.path.isfile(fileIsoDest): fileInsertCommand="/bin/dd if=" + filepath + " of=" + fileIsoDest + " conv=notrunc ibs=" + entry[2] + " obs=2048 seek=" + entry[3] debug_message(" fileInsertCommand:" + fileInsertCommand) stdin, stdout, stderr = os.popen3(fileInsertCommand) infoResult = stdout.read() infoErrors = stderr.read() stdin.close(); stdout.close(); stderr.close() debug_message(infoResult) debug_message(infoErrors) else: print " err:insert_file: --file-iso-dest must already exist" sys.exit(0) def verify_files(): print '' print ' verify_files:' debug_message(' file-iso-info=' + fileisoinfo) debug_message(' sha1sums=') debug_message(sha1sums) debug_message(' directory-sources=*******************') debug_message(dirSources) if fileisoinfo <> '': extract_file_info() debug_message(fileInfo) missingfilescount=0 foundfilescount=0 missingfiles=[] for entry in fileInfo: # do sha1sum later, but for now just check size ;-) fileExists=False debug_message(entry) filepath=file_find_matching(entry) if filepath != '': print " found: " + filepath foundfilescount +=1 if insertfiles: insert_file(entry, filepath) else: print " missing. " missingfiles.append(entry) missingfilescount += 1 if missingfilescount >= 1: print " error: verify_files: could not find local files for: " + str(missingfilescount) + ' files' allstr = "" else: allstr = " all" print " verify_files: found local files for" + allstr + ": " + str(foundfilescount) + ' files' else: print " verify_files: you must specify the iso-info dump file with --file-iso-info=" def correlate_isos(): #find out if it worked ! print ' correlate_isos... please wait' global showStats if os.path.isfile(fileIsoSource): if os.path.isfile(fileIsoDest): sumByteIdentical=0 sumByteDifferent=0 sumCorrelation=0 try: try: signal.alarm(1) #seconds blockSize=2048 fileSource = open(fileIsoSource, 'rb') fileDest = open(fileIsoDest, 'rb') # // is integer result blocks = os.path.getsize(min(fileIsoSource,fileIsoDest)) // blockSize debug_message(' blockSize: '+ str(blockSize)) showStats = True for block in range(1, blocks): sourceBlock = fileSource.read(blockSize) destBlock = fileDest.read(blockSize) smallerBlock = min(len(sourceBlock), len(destBlock)) byteIdentical=0 byteDifferent=0 for byte in range(0,smallerBlock): #who invented 0 based strings/arrays anyway ? if sourceBlock[byte] == destBlock[byte]: byteIdentical = byteIdentical + 1 else: byteDifferent = byteDifferent + 1 sumByteIdentical = sumByteIdentical + byteIdentical sumByteDifferent = sumByteDifferent + byteDifferent if showStats == True: if byteIdentical + byteDifferent >= 1: blockCorrelation = 100 * byteIdentical / (byteIdentical + byteDifferent) sumCorrelation = 100 * sumByteIdentical / (sumByteIdentical + sumByteDifferent) else: blockCorrelation = 0 sumCorrelation = 0 print ' stats: block %7d of %7d: %6.3f%% {%6d:%6d} running=%6.3f%% {%6d:%6d}'\ %(block, blocks, blockCorrelation, byteIdentical\ , byteDifferent, sumCorrelation, sumByteIdentical\ , sumByteDifferent) showStats=False signal.alarm(1) #seconds except: print " error:correlate_isos: `an earth shattering kaboom` - looney tunes" print "Unhandled error: how'd ya do dat ?", sys.exc_info()[0] raise sys.exit(0) finally: fileSource.close() fileDest.close() print ' overall statistics: identical:different %10d:%10d {%7.4f%%}'\ %(sumByteIdentical, sumByteDifferent, sumCorrelation) else: print " error:correlate_isos: you must also specify --file-iso-dest= for the comparison." sys.exit(0) else: print " error:correlate_isos: you must also specify --iso-source= for the comparison." sys.exit(0) # main... signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGALRM, signal_handler) try: options, xarguments = getopt.getopt(sys.argv[1:], 'hvdcsi' , ['help', 'version', 'iso-source=','dump-iso-info'\ , 'file-iso-dest=', 'createiso', 'file-iso-info=', 'directory-source='\ , 'file-sha1sum=', 'verify-files', 'insert-files'\ , 'correlate', 'debug']) except getopt.error: print ''' error: you provided an unknown option, or an argument\'s option\n was missing. Run: `archive-teleconstructor.py --help` for more information.''' sys.exit(0) fileIsoDest='' fileIsoSource='' dumpiso=False createiso=False verifyfiles=False insertfiles=False fileisoinfo='' dirSources=[] sha1sums=[] fileInfo=[] correlate=False showStats=False debug=False for arg in options[:]: debug_message('[' + arg[0] +']=['+arg[1]+']') if arg[0] == '-h' or arg[0] == '--help': help_message() elif arg[0]=='-v' or arg[0]=='--version': print ' version: 2007-06-01 v0.04' sys.exit(0) elif arg[0]=='--iso-source': if arg[1] != '': fileIsoSource=arg[1] print ' --iso-source=[' +fileIsoSource+ ']' if os.path.isfile(fileIsoSource): pass else: print ' error:--iso-source: the specified file must already exist' sys.exit(0) else: print ' error:--iso-source: you must specify a source iso9660 image' sys.exit(0) elif arg[0]=='--dump-iso-info': if arg[1] == '': dumpiso=True else: print ' error:--dump-iso-info: does not accept parameters' sys.exit(0) elif arg[0]=='-d' or arg[0]=='--file-iso-dest': fileIsoDest=arg[1] if fileIsoDest <> '': print ' --file-iso-dest=' + fileIsoDest else: print ' error:--file-iso-dest file location must be specified.' sys.exit(0) elif arg[0]=='-c' or arg[0]=='--createiso': createiso=True elif arg[0]=='--file-iso-info': try: fileisoinfo=arg[1] if os.path.isfile(fileisoinfo): print ' --file-iso-info: [' + fileisoinfo + '] found.' else: print ' error:--file-iso-info: the specified file must ' \ + 'already exist.' sys.exit(0) except: print " error:--file-iso-info: except" elif arg[0]=='-s' or arg[0]=='--directory-source': try: dirSourceName=arg[1] if os.path.isdir(dirSourceName): dirSources.append(dirSourceName) print ' --directory-source: added [' + dirSourceName + ']' debug_message('=========dirSources=') debug_message(dirSources) else: print ' error:--directory-source: the specified directory must ' \ + 'already exist and preferably contain files that may be present on the iso.' sys.exit(0) except: print " error:--directory-source: except" elif arg[0]=='--file-sha1sum': try: fileSha1sum=arg[1] if os.path.isfile(fileSha1sum): sha1sums.append(fileSha1sum) print ' --file-sha1sum: added [' + fileSha1sum + ']' else: print ' error:--file-sha1sum: the specified file must ' \ + 'already exist.' sys.exit(0) except: print " error:--file-sha1sum: except" elif arg[0]=='--verify-files': if arg[1] == '': verifyfiles=True else: print ' error:--verify-files: does not accept parameters' sys.exit(0) elif arg[0]=='--insert-files': if arg[1] == '': insertfiles=True else: print ' error:--insert-files: does not accept parameters' sys.exit(0) elif arg[0]=='--correlate': if arg[1] == '': correlate=True else: print ' error:--correlate: does not accept parameters' sys.exit(0) elif arg[0]=='--debug': if arg[1] == '': debug=True else: print ' error:--debug: does not accept parameters' sys.exit(0) else: print ' [' + arg[0] + ']=[' + arg[1] +'] is not a legit option !' sys.exit(0) # remove each argument as it is handled - so it wont be read again {?} options.remove(arg) if dumpiso: dump_iso() elif createiso: create_iso() elif verifyfiles: verify_files() elif insertfiles: verify_files() elif correlate: correlate_isos() debug_message(' ===== it\'s the end. =====') # the following is all comments, and some things to do. # naming # construct # warp transporter teleport beem beam teleconstructor # archive-teleconstructor.py # with a iso library, you could read the iso toc stuff directly from a remote # server over http or ftp ? # this would be good cause then you can use this without getting the remote party # hosting the iso to run iso-info and make it available # fuse driver for ftp /http ? # request byte range needed on the servers necessary # tools: result #$ iso-info -q -l -i /home/old/6/FC4-i386-DVD.iso #iso-info version 0.77 i686-redhat-linux-gnu #Copyright (c) 2003, 2004, 2005 R. Bernstein #This is free software; see the source for copying conditions. #There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A #PARTICULAR PURPOSE. #__________________________________ #ISO-9660 Information #/: # drwxr-xr-x 5 0 0 [LSN 29] 4096 Jun 07 2005 02:45:14 . # drwxr-xr-x 6 0 0 [LSN 29] 4096 Jun 07 2005 02:45:14 .. # -rwxr-xr-x 2 0 0 [LSN 277] 248 Jun 07 2005 02:25:47 autorun # -rw-r--r-- 10 0 0 [LSN 278] 5616 Jun 07 2005 02:25:47 eula.txt # drwxr-xr-x 4 0 0 [LSN 32] 2048 Jun 07 2005 02:25:23 Fedora # -rw-r--r-- 10 0 0 [LSN 281] 18385 Jun 07 2005 02:25:47 GPL # drwxr-xr-x 3 0 0 [LSN 174] 2048 Jun 07 2005 02:52:37 images # drwxr-xr-x 2 0 0 [LSN 31] 2048 Jun 07 2005 02:39:44 isolinux # -rw-r--r-- 10 0 0 [LSN 290] 5232 Jun 07 2005 02:25:47 README # -rw-r--r-- 2 0 0 [LSN 293] 12667 Jun 07 2005 02:25:47 README-Accessibility # -rw-r--r-- 2 0 0 [LSN 300] 51265 Jun 07 2005 02:25:47 RELEASE-NOTES # -rw-r--r-- 10 0 0 [LSN 326] 1232 Jun 07 2005 02:25:47 RPM-GPG-KEY-rawhide # -rw-r--r-- 10 0 0 [LSN 327] 1076 Jun 07 2005 02:25:47 RPM-GPG-KEY-fedora-test # -rw-r--r-- 10 0 0 [LSN 328] 1105 Jun 07 2005 02:25:47 RPM-GPG-KEY-fedora-rawhide # -rw-r--r-- 10 0 0 [LSN 329] 1519 Jun 07 2005 02:25:47 RPM-GPG-KEY-fedora # -rw-r--r-- 10 0 0 [LSN 330] 1706 Jun 07 2005 02:25:47 RPM-GPG-KEY-beta # -rw-r--r-- 10 0 0 [LSN 331] 1910 Jun 07 2005 02:25:47 RPM-GPG-KEY # -r--r--r-- 1 0 0 [LSN 332] 3591 Jun 07 2005 03:01:49 TRANS.TBL # -rw-r--r-- 1 0 0 [LSN 334] 86 Jun 07 2005 02:45:12 .discinfo # #/Fedora/: # drwxr-xr-x 4 0 0 [LSN 32] 2048 Jun 07 2005 02:25:23 . # drwxr-xr-x 5 0 0 [LSN 29] 4096 Jun 07 2005 02:45:14 .. # drwxr-xr-x 2 0 0 [LSN 173] 2048 Jun 07 2005 02:54:01 base # drwxr-xr-x 2 0 0 [LSN 33] 286720 Jun 07 2005 02:54:01 RPMS # -r--r--r-- 1 0 0 [LSN 3420] 432 Jun 07 2005 03:01:49 TRANS.TBL # #/Fedora/base/: # drwxr-xr-x 2 0 0 [LSN 173] 2048 Jun 07 2005 02:54:01 . # drwxr-xr-x 4 0 0 [LSN 32] 2048 Jun 07 2005 02:25:23 .. # -rw-r--r-- 4 0 0 [LSN 114659] 10481002 Jun 07 2005 02:54:01 comps.rpm # -rw-r--r-- 2 0 0 [LSN 1203484] 709886 Jun 07 2005 02:25:48 comps.xml # -rw-r--r-- 2 0 0 [LSN 1203831] 9840328 Jun 07 2005 02:52:31 hdlist # -rw-r--r-- 2 0 0 [LSN 1208636] 25029836 Jun 07 2005 02:52:31 hdlist2 #... # -r--r--r-- 1 0 0 [LSN 1342905] 1105 Jun 07 2005 03:01:49 TRANS.TBL # #/images/pxeboot/: # drwxr-xr-x 2 0 0 [LSN 175] 2048 Jun 07 2005 02:39:47 . # drwxr-xr-x 3 0 0 [LSN 174] 2048 Jun 07 2005 02:52:37 .. # -rw-r--r-- 4 0 0 [LSN 337] 4492627 Jun 07 2005 02:39:35 initrd.img # -rw-r--r-- 2 0 0 [LSN 1342906] 275 Jun 07 2005 02:39:47 README # -r--r--r-- 1 0 0 [LSN 1342907] 659 Jun 07 2005 03:01:49 TRANS.TBL # -rw-r--r-- 4 0 0 [LSN 2588] 1702587 Jun 07 2005 02:39:36 vmlinuz # #/isolinux/: # drwxr-xr-x 2 0 0 [LSN 31] 2048 Jun 07 2005 02:39:44 . # drwxr-xr-x 5 0 0 [LSN 29] 4096 Jun 07 2005 02:45:14 .. # -r--r--r-- 1 0 0 [LSN 270] 2048 Jun 07 2005 03:01:49 boot.cat # -rw-r--r-- 2 0 0 [LSN 335] 292 Jun 07 2005 02:39:44 boot.msg # -rw-r--r-- 2 0 0 [LSN 336] 1034 Jun 07 2005 02:39:44 general.msg # -rw-r--r-- 4 0 0 [LSN 337] 4492627 Jun 07 2005 02:39:35 initrd.img # -r--r--r-- 2 0 0 [LSN 271] 10424 Jun 07 2005 02:54:01 isolinux.bin # -r-xr-xr-x 2 0 0 [LSN 2531] 595 Jun 07 2005 02:39:44 isolinux.cfg # -r--r--r-- 2 0 0 [LSN 2532] 94600 Jun 07 2005 02:39:44 memtest # -rw-r--r-- 2 0 0 [LSN 2579] 788 Jun 07 2005 02:39:44 options.msg # -rw-r--r-- 2 0 0 [LSN 2580] 872 Jun 07 2005 02:39:44 param.msg # -rw-r--r-- 2 0 0 [LSN 2581] 490 Jun 07 2005 02:39:44 rescue.msg # -rw-r--r-- 2 0 0 [LSN 2582] 549 Jun 07 2005 02:39:44 snake.msg # -rw-r--r-- 2 0 0 [LSN 2583] 5692 Jun 07 2005 02:39:44 splash.lss # -r--r--r-- 1 0 0 [LSN 2586] 2880 Jun 07 2005 03:01:49 TRANS.TBL # -rw-r--r-- 4 0 0 [LSN 2588] 1702587 Jun 07 2005 02:39:36 vmlinuz # #dd if=FC4-i386-DVD.iso of=eula.txt.extracted bs=1 count=5616 skip=569344 # skip is the 2048*LSN from above listing # weird bug: in iso-info # dump_iso: # isoInfoCommand=/usr/bin/iso-info -q -l -i /home/davidt/#borland.delphi.7.enterprise.iso>/home/davidt/#borland.delphi.7.enterprise.iso.iso-info.txt #*** stack smashing detected ***: /usr/bin/iso-info terminated # #/bin/sh: line 1: 4419 Aborted /usr/bin/iso-info -q -l -i /#home/davidt/borland.delphi.7.enterprise.iso >/home/davidt/#borland.delphi.7.enterprise.iso.iso-info.txt # note: use azureus to create the iso {disable internal code} # operation: # If you have the source iso that you want to distribute: # 1. $ ./archive-teleconstructor.py --iso-source=my-source-iso.iso # --dump-iso-info # 2. make the iso and the my-source-iso.iso.iso-info.txt file available. # 3. make the above available as a torrent # If you want to accelerate your iso download: # 1. in your web browser, download the providers torrent file. # 2. open the .torrent in azureus # 3. if necessary tell azureus not to download files you aren't interested in # 4. check the disk to determine when azureus has finished building the # empty donwload iso file # 5. in azureus stop the download. # 6. quit azureus # 7. $ ./archive-teleconstructor.py --file-iso-dest=my-source-iso.iso # --file-iso-info=my-source-iso.iso.iso-info.txt # --directory-source=/home/install/fedora/development/i386/os # --directory-source=/home/install/fedora/7t4/i386/disc # --verify-files # This will give you some spew regarding files found of correct name # and size, along with a summary. # 8. repeat command 7. but append --insert-files # This takes around 10 minutes for F7t4 to F7 release DVD. # 9. let the standard tools continue the download: # a. rsync is preferable if available # rsync --progress --stats --inplace -a # rsync.server.com::fedora/etc/my-source-iso.iso # /home/mystuff/ # the progress gives a hint as to what is happening: it takes a while ;) # the status can be read at the end, provides the speedup efficiency # b. azureus: restart it # right-click on the stopped download, select force recheck !! # wait # once the recheck is finished notice how the complete % has jumped # from the value it was {hopefully} # restart the download: it's going to still take a while. # bugs: / future # - tidy up whether inputs and extracted paths include / # - speed could be faster for correlation - experiment with block size|try diff -ub ? # - need to use the SHA1SUM to confirm success. # - the discrepancy in block size between an iso image {2048 B}, and bittorrent {eg 256kB}, # means that eg azureus will throw away blocks that have lots of correct inserted data # because it only works on the whole 256k block. # Whenever a contained file is less than 2x 256k, there is a high chance that azureus # will re-fetch the whole block, wasting the work done by this program # # # ./ the correlation function does not work correctly. the percent and numbers seems random. # ./ compare if file sizes are different, dont read past the end of the array ! # ./ document the parameters # ./ add debug function and parameter # ./ change commented prints into debug_messages