#!/usr/bin/env python import os, re, sys, time, stat, string import os.path import shutil # Copyright 2003, Russell Nelson http://angry-economist.russnelson.com # Modified by Rich Magahiz (C) 2003 http://www.magahiz.com:8080/frabjous # Licensed under the Open Software License. # http://opensource.org/licenses/osl.html # Like the GPL, only it's a contract. # how to use publish: # create model.html. # create model.rss. # create content files. Everything that isn't model.html or summary*.html is content. # Put the line # # into any .html files which should not be processed # run 'publish' in the directory holding model.html # it writes INDEX, ARCHIVE and RSSOUT # to the root directory specified as ROOTDIR # and links to the subdirectory specified as SUBDIR # assumptions about editing: # that the editor writes a foobar.html file # that this script gets run when the editor is satisified # and wishes to publish that file. # that the editor edits the foobar.html file and runs # this script again to publish the change. # that all old content is preserved and linked-to. # that the permalink always points to the newest version. # assumptions about model.html: # that it has one line, where the content should go. # that later on it has one line, where the date/time stamp # should go. # assumptions about model.rss: # that it takes the form of an RDF 1.0 file # that it has an rdf:li resource line which is to be filled in with # directory name and file name # that it has an line which is to be filled in with directory # name, file name, title, directory name, file name, description # each of these substitution lines is a proper format line %s for the # fields which are to be substituted # assumptions about all .html files: # files whose names begin in "summary-" are skipped. # that they contain META description lines describing the content # that they have a line with tags # surrounding the title. # that the content begins after the next blank line following that line. # that their content ends in a line with . # that they have a line which begins with 'Last Modified:' and # that the rest of this line is a date/time stamp. # if anything isn't obvious about these assumptions, look at my files # on http://www.magahiz.com:8080/frabjous/blog # Version 1.0 - 20030330- first release. # Version 1.1 - 20030411- moved RSS out into its own file. # Version 1.2 - 20030501- Modified by Rich Magahiz # Version 1.3 - 20040615- rewritefile returns a value, also streamline the loop # Version 1.4 - 20041016- port to Windows # Version 1.5 - 20041025- Use shutil.copyfile instead of linking # Version 1.6 - 20041203- ALTINDEX is a second copy of index.html # Version 1.7 - 20041228- Fix the rss feed. # Version 1.8 - 20041227- Add del.icio.us links # Version 1.9 - 20050126- Add technorati tags on the index page # Version 2.0 - 20050216- Correct revision linking. # Version 2.1 - 20050326- Correct technorati tags (maybe). # Version 2.1 - 20050629- The technorati tag still don't work, change format. # Version 2.2 - 20051118 - Convert to 2006 version # Version 2.3 - 20060930 - Mark changed content on index file # Version 2.4 - 20061010 - Don't output extra RSS lines # Globals - customize here # Filenames SUMMARY = r'summary-' INDEX = "index.html" ARCHIVE = "archive" RSSOUT = "rss-internal.xml" ALTINDEX = "rm.htm" MODEL = "model.html" RSS = "model.rss" # Path where index and archive will be published to #ROOTDIR = "." #ROOTDIR = r"/cygdrive/c/Program Files/Apache Group/Apache2/htdocs/magahiz/frabjous" ROOTDIR = r"/home/rich/public_html/frabjous" ARCHDIR = r"/home/rich/public_html/frabjous/archive" # Path underneath root where the content lives SUBDIR = "b2006" #SUBDIR = None # Number of article lines to keep LINECOUNTGLOBAL = 475 # Number of rss items to publish (minimum) RSSCOUNTGLOBAL = 12 # Minimum number of rewrites it takes to generate the new index/archive MINREWRITES = 0 INITREWRITES = 0 REWFAIL = -1 # Return success REWSUCCESS = 0 # Cutoff to ignore all old timestamps CUTOFF = 1132369836 DELLINK0 = r'' DELLINK2 = "\n" TECHLINK0 = r'\n" FLARE0 = '
' + "\n" TAGROLL = '' IMGMOD = 'http://magahiz.com:8080/frabjous/new.gif' # Function to process new content files. # we have found content with no timestamp in its name - it must be new. # Insert a link to the content it updates (fnold). Preserve the # edit time of the file. def rewritefile(fn, fnold, mtime, atime): inf = open(fn) outf = open(fn+".new", "w") while 1: inl = inf.readline() if not inl: break # Check whether it is a static file, bail if so if re.search('STATICCONTENT',inl): print "Static content, skipping rewrite of",fn inf.close() outf.close() # inf.remove() return REWFAIL outf.write(inl) #if re.match("Last modified:", inl): #if re.match('',inl): if re.search('',inl): fnfull = ROOTDIR + os.sep + SUBDIR + os.sep + fnold outf.write('
Previously published: %s
\n' % (SUBDIR, fnold, time.ctime(os.stat(fnfull)[stat.ST_MTIME]))) inf.close() outf.close() os.rename(fn+".new", fn) print fn,"rewritten." os.utime(fn, (atime, mtime)) return REWSUCCESS # Function to write out the content, index, archive, and rss files. def writecontent(): currmonth = time.localtime()[1] rewritecount = INITREWRITES RSSCOUNT = RSSCOUNTGLOBAL LINECOUNT = LINECOUNTGLOBAL keydict = {} # Read the RSS file here and fill the lists rsstop, rssbot, rssend rssmodel = open(ROOTDIR + os.sep + SUBDIR + os.sep + RSS) rssline = None rssitem = None rssswitch = 0 rsstop = [] rssbot = [] rssend = [] for rssread in rssmodel.readlines(): if re.search(r' newest[fn]: newest[fn] = mtime else: # It's a new content file times = os.stat(ROOTDIR + os.sep + SUBDIR + os.sep + fn) mtime = times[stat.ST_MTIME] atime = times[stat.ST_ATIME] now[fn] = mtime # print "Updating now[",fn,"] to ",mtime # remember the oldest date of this file. if not oldest.has_key(fn) or mtime < oldest[fn]: oldest[fn] = mtime # Throw away the initial files list # we sort articles by their initial publication dates, not their current date files = oldest.items() files.sort(lambda a,b:cmp(b[1],a[1])) # we only keep enough articles to fill at least LINECOUNT lines. for fn,mtime in files: # print "File",fn,"clearing itemcontent" good2write = REWFAIL itemcontent = "" if newest.has_key(fn): pass # print fn,"mtime",mtime,"now",now[fn], # print "newest",newest[fn], # print else: print fn,"mtime",mtime,"now",now[fn], print rewritecount = rewritecount + 1 # is the newest earlier than cutoff? skip if so if newest.has_key(fn) and newest[fn] < CUTOFF: pass else: # is the newest timestamp not ours? if newest.has_key(fn) and newest[fn] != now[fn]: mtime = now[fn] # Rechristen it with a new timestamped name (fnver) fnver = "%s-%d.html" % (fn[:-5], newest[fn]) good2write = rewritefile(ROOTDIR + os.sep + SUBDIR + os.sep + fn, fnver, mtime, mtime) if(good2write == REWFAIL): print "Skipping",fn," good2write=",good2write continue print "rewriting",fn,"quoting",fnver, mtime rewritecount = rewritecount + 1 # else: # print "Newest timestamp for",fn,"is current" # make a link to our timestamped file. if not newest.has_key(fn) or newest[fn] != now[fn]: # This is broken on Windows but works on Unix/Cygwin # os.link(fullpath + fn, fullpath + "%s-%d.html" % (fn[:-5], now[fn])) shutil.copyfile(fullpath + fn, fullpath + "%s-%d.html" % (fn[:-5], now[fn])) # Now we get to open up the content file inf = open(fullpath + fn) # Process the META lines first description = "Frabjous Times" keyword = [] # get the description and keywords allkeys = [] title = None while 1: line = inf.readline() if not line: break descmatch = re.search(r'="[dD]escription" (content|CONTENT)="(.*)"', line) keymatch = re.search(r'="[Kk]ey[Ww]ords" (content|CONTENT)="(.*)"', line) titlematch = re.search(r'(.*)', line) if descmatch: description = descmatch.group(2) continue if keymatch: allkeys = string.split(string.strip(keymatch.group(2)),',') keyword.extend(allkeys) for eachkey in allkeys: keydict[eachkey] = '' # print "adding key ",eachkey continue # get the title out of the file. Title must be on one line inside TITLE{START|END} comment tags if titlematch: title = titlematch.group(1) # print "Title",title break # print good2write # Archive it if it was first created this month # currmonth = 1 # print time.localtime(oldest[fn])[1], good2write # if(good2write > REWFAIL) and (time.localtime(oldest[fn])[1] == currmonth): if(time.localtime(oldest[fn])[1] == currmonth): # print "Adding ",title,"to archive" archive.write('  
' + "\n") archive.write(description + "\n") archive.write("
\n") if (SUBDIR == '.'): archive.write('
  • %s - %s
  • \n' % (fn, title, time.ctime(mtime))) else: archive.write('
  • %s - %s

  • \n' % (SUBDIR, fn, title, time.ctime(mtime))) # Now we need to decide whether to write to the index file if (LINECOUNT < 0) and (good2write == REWFAIL): # Nah, it's too full skippedstories = skippedstories + 1 # print "Skipping",title,"LINECOUNT",LINECOUNT,"good2write",good2write,"REWFAIL",REWFAIL else: # print "Adding",title # print everything from the title through the TITLESTART line. indexf.write('') if(good2write == REWSUCCESS): indexf.write('Modified content' % IMGMOD) # Posting was modified; mark it # print "Writing modified file",title if (SUBDIR == '.'): indexf.write('>

    %s

    %s [permalink]\n' % (title, time.ctime(mtime), fn)) else: indexf.write('

    %s

    %s [permalink]\n' % (title, time.ctime(mtime), SUBDIR, fn)) # print "Writing",title,"decrementing LINECOUNT" while 1: LINECOUNT = LINECOUNT - 1 line = inf.readline() if not line: break # Avoid nesting the tables if re.search(r'', line): # print "Wrote content of ",title break indexf.write(line) itemcontent += line indexf.write('
    Keywords: ') for thekey in keyword: keywordwords = string.split(thekey) for single in keywordwords: indexf.write(DELLINK0 + single + DELLINK1 + single + DELLINK2) dtag[DELLINK0 + single + DELLINK1 + single + DELLINK2] = 1 indexf.write("|") indexf.write("
    \n") # Add the comment link line basefilename = string.split(fn,'.')[0] comm0 = r'
    ' + "\n" indexf.write(comm0 + "'" + basefilename + "'" + comm1 + "'" + basefilename + "'" + comm2 + "'" + basefilename + "'" + comm3 + "'" + basefilename + "'" + comm4) # Feedflare indexf.write(FLARE0 + os.getcwd().split('/')[-1] + "/" + basefilename + FLARE1) indexf.write("") inf.close() # Write the lists out to the rss file # Write up to RSSCOUNT items to the rss file # print "RSSCOUNT=",RSSCOUNT if (RSSCOUNT > 0) or (good2write>REWFAIL): # if (RSSCOUNT > 0): # if (good2write == REWFAIL): # continue #rss.write(rssline % (title, SUBDIR, fn)) # 0.9 format # print "rssline=",rssline # Don't output items which have empty content if (itemcontent != ""): # print "SUBDIR, fn, title, SUBDIR, fn, description, itemcontent",SUBDIR, fn, title, SUBDIR, fn, description, itemcontent rssbot.append(rssitem % (SUBDIR, fn, title, SUBDIR, fn, description, itemcontent)) # print "",rssitem,SUBDIR,fn rsstop.append(rssline % (SUBDIR, fn)) RSSCOUNT = RSSCOUNT - 1 indexf.write("") for topline in rsstop: rss.write(topline) for botline in rssbot: rss.write(botline) for endline in rssend: rss.write(endline) rss.close() return rewritecount # Main program # we rewrite the index file and the archive file modelled on what # we find in MODEL # If all the files live in one directory, uncomment the following indexf = open(ROOTDIR + os.sep + "index.new", "w") archive = open(ROOTDIR + os.sep + "archive.new", "w") rss = open(ROOTDIR + os.sep + "rss.new", "w") modtime = time.ctime(time.time()) dtag = {} model = open(ROOTDIR + os.sep + SUBDIR + os.sep + MODEL) while 1: inl = model.readline() if not inl: break # where we see the INSERTCONTENT line in MODEL, we insert our own content. #if re.match(r'\s*
    $', inl): if re.search(r'', inl): archive.write('
    ") indexf.write('

    %d more posts in the archives.

    ' + "\n") indexf.write('

    368 more posts in the 2005 archives.

    \n') indexf.write('

    585 more posts in the older archives.

    \n') # del.icio.us tags go here indexf.write(TAGROLL) # indexf.write('

    Tags: ') # for t1 in dtag.keys(): # indexf.write(t1) # itemcontent += t1 indexf.write("