head	1.3;
access;
symbols
	tcllib-1-13:1.3
	tcllib-1-12:1.3
	tklib-0-5:1.3
	tcllib-1-11-1:1.3
	tcllib-1-11:1.3
	tcllib-1-10:1.3
	tcllib-1-9:1.3
	tcllib-1-8:1.2
	tcllib-1-7:1.2
	tcllib-1-6-1:1.1.6.1
	tcllib-1-6-branch:1.1.0.6
	tcllib-1-6:1.1
	tcllib-1-4-0:1.1
	tcllib-1-3-0:1.1
	tcllib-1-2-0:1.1.0.4
	RELEASES:1.1.0.2
	tcllib-1-1-0:1.1
	tcllib-1-0-0:1.1;
locks; strict;
comment	@# @;


1.3
date	2006.08.18.00.46.55;	author hobbs;	state Exp;
branches;
next	1.2;

1.2
date	2004.03.09.08.20.18;	author andreas_kupries;	state Exp;
branches;
next	1.1;

1.1
date	2001.05.01.19.01.23;	author andreas_kupries;	state Exp;
branches
	1.1.6.1;
next	;

1.1.6.1
date	2004.05.24.02.58.09;	author andreas_kupries;	state Exp;
branches;
next	;


desc
@@


1.3
log
@obfuscate email addr
@
text
@#! /bin/sh
# -*- tcl -*- \
exec tclsh "$0" ${1+"$@@"}

# Extract and report oscon schedule

package require struct
package require csv
package require report
package require htmlparse
package require textutil
package require log

# Restrict logging to levels 'info' and higher.
log::lvSuppressLE debug

# 1. CSV structure filled by the parser = main data table
#    ----------------------------------------------------
#    Day Time/Start Time/End Track Tower Room Speaker Title
#
#    Matrices: "dmain" and "dmainr"
#
#    Difference: dmainr contains gratituous newlines in the
#    speaker column which make for a better TXT report (less
#    wide).
#
#    This is also report 'main'.
#
# 2. Schedule report to see conflicts, CSV structure
#    ----------------------------------------------
#    Day Time                Location-Columns, one per Room
#        (15min granularity) (Content: Speaker + Topic)
#
#    Matrices: "sched" and "schedr". Difference as for dmain(r)
#	and the location columns
#
#    This will be report 'sched'.

proc main {} {
    global pfx argv

    set pfx   [lindex $argv 0]
    set files [lrange $argv 1 end]

    if {($pfx == {}) || ([llength $files] == 0)} {
	usage
	exit -1
    }

    initialize
    foreach f $files {
	log::log info "Scanning \"$f\" ..."
	parse $f
    }
    gen_schedule
    dump_main
    dump_schedule
    postscript
    return
}

proc usage {} {
    global argv0
    puts "usage: $argv0 prefix file..."
}


proc initialize {} {
    global rooms tracks
    ::struct::matrix::matrix dmain  ; # data 1
    ::struct::matrix::matrix dmainr ; # data 1r
    ::struct::matrix::matrix sched  ; # data 2
    ::struct::matrix::matrix schedr ; # data 2r
    array set rooms  {}
    array set tracks {}
    dmain  add columns 8
    dmain  add row {Day Start End Track Tower Room Speaker Title}
    dmainr add columns 8
    dmainr add row {Day Start End Track Tower Room Speaker Title}
    return
}

proc parse {htmlfile} {
    global rooms tracks

    ::struct::tree::tree t

    log::log info "Reading \"$htmlfile\" ..."
    set html [read [set fh [open $htmlfile]]]
    close $fh

    log::log info "Parsing \"$htmlfile\" ..."
    htmlparse::2tree $html t
    htmlparse::removeVisualFluff t
    htmlparse::removeFormDefs t

    log::log info "Extracting information"

    #puts ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # Navigate and extract the information
    #t walk root -command {print %t %n}
    #exit

    set base [walk {1 1 0 1 1 0 1 0 1 0}]
    set day  [walkf $base {0 0}]
    set day  [escape [t get $day -key data]]
    log::log debug "Day = $day"
    set day [string range $day 0 2]

    # Walk through the sessions of that day.

    set sess [t next $base]
    while {$sess != {}} {
	set start [cvtdate [escape [t get [walkf $sess {0 0}] -key data]]]
	set track [string trim [escape [t get [walkf $sess {1 0}] -key data]]]
	set loc   [escape [t get [walkf $sess {1 1 0}] -key data]]
	set loc   [string trimright $loc "\n\r\t:"]

	log::log debug "    $start - $track - $loc"

	# Separate Room/Tower information ...
	regexp {(.*) in the (.*) Tower} $loc -> room tower
	set room  [string trim $room]
	set tower [string trim $tower]
	set rooms($tower/$room) .
	set tracks($track) .

	set talk [walkf $sess {1 1 3}]
	while {$talk != {}} {
	    set time    [escape [t get $talk -key data]]
	    set talk    [t next $talk]
	    set title   [escape [t get [walkf $talk {0 0 0}] -key data]]
	    set speaker [escape [t get [walkf $talk {0 2}]   -key data]]

	    # Now we have everything to fill the main table ...
	    # (After a bit of munging of the strings we got)

	    foreach {start end} [split $time -] break
	    set start [cvtdate $start]
	    set end   [cvtdate $end]

	    regsub -all \r  $speaker \n speaker
	    regsub -all \n+ $speaker \n speaker
	    regsub -all " *\n *" $speaker "\n" speaker
	    set speakerc [split $speaker "\n"]
	    set speakerc [join $speakerc ", "]
	    log::log debug "        $start - $end - $speakerc - $title"

	    #puts >>$speakerc<<
	    #puts >>$speaker<<

	    #                Day Time/Start Time/End Tower Room Speaker Title
	    dmainr add row [list $day $start $end $track $tower $room $speaker  $title]
	    dmain  add row [list $day $start $end $track $tower $room $speakerc $title]

	    # Forward to next talk
	    catch {set talk [t next $talk]}
	    catch {set talk [t next $talk]}
	}

	set sess [t next $sess]
    }

    t destroy
    return
}

proc print {t n} {
    set  tp  [$t get $n -key type]
    set  d   [$t depth $n]
    set idx ""
    catch {set  idx [$t index $n]}
    incr d  $d
    incr d  $d

    switch -exact -- $tp {
        a {
            log::log debug "[textutil::strRepeat " " $d]$idx $tp ([$t get $n -key data]...)"
        }
        PCDATA {
            log::log debug "[textutil::strRepeat " " $d]$idx $tp ([string range [$t get $n -key data] 0 20]...)"
        }
        default {
            log::log debug "[textutil::strRepeat " " $d]$idx $tp"
        }
    }
}

proc walkf {n p} {
    #log::log info "$n + $p ="
    foreach idx $p {
        if {$n == ""} {break}
        set n [lindex [t children $n] $idx]
        #log::log info "$idx :- $n"
    }
    return $n
}

proc walk {p} {
    return [walkf root $p]
}

proc cvtdate {date} {
    clock format [clock scan $date] -format "%H:%M"
}

proc escape {text} {
    # Special escape for nbsp, convert into space and not the
    # character specified by the standard.

    regsub -all {&nbsp;} $text { } text
    htmlparse::mapEscapes $text
}


proc gen_schedule {} {
    global rooms tracks

    dmain  set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmain  get rect 0 1 end end]]]
    dmainr set rect 0 1 [lsort -decreasing -index 0 [lsort -index 1 [dmainr get rect 0 1 end end]]]

    sched  add columns 2
    schedr add columns 2
    #sched  add columns [array size rooms]
    #schedr add columns [array size rooms]
    sched  add columns [array size tracks]
    schedr add columns [array size tracks]

    #log::log info Tracks=[array size tracks]
    #log::log info Rooms.=[array size rooms]

    set res [list Day Time]
    set c 2
    foreach k [lsort [array names tracks]] {
	lappend res $k
	set tracks($k) $c
	incr c
    }

    sched  add row $res
    schedr add row $res

    # Data in dmain is already sorted by day. By starting time only
    # partially, there are back references.
    # Just move them to the correct rooms and rows!

    #-- Day Time Location-Columns, one per Room --

    set n [dmain rows]
    set p 0

    array set rmap {}

    for {set r 1} {$r < $n} {incr r} {
	foreach {day start end track tower room speaker title} [dmain get row $r] break
	#[list $day $start $end $tower $room $speakerc $title]

	set key $day,$start
	if {![info exists rmap($key)]} {
	    log::log info "Track schedule $day $start"
	    sched  add row
	    schedr add row
	    incr p

	    set rmap($key) $p
	    sched  set cell 0 $p $day
	    sched  set cell 1 $p $start
	    schedr set cell 0 $p $day
	    schedr set cell 1 $p $start
	}

	sched  set cell $tracks($track) $rmap($key) "$tower; $room; $speaker; $title"
	schedr set cell $tracks($track) $rmap($key) "$tower $room\n$speaker\n$title"
    }

    # Squeeze the columns 2+ in the report matrix

    set cols [schedr columns]
    for {set c 2} {$c < $cols} {incr c} {

	if {[schedr columnwidth $c] > 21} {
	    log::log debug "Squeezing $c"
	    set col [schedr get column $c]
	    set res [list]
	    foreach item $col {
		lappend res [wrap $item 21]
	    }
	    schedr set column $c $res
	}
    }

    # Now sort by day (primary key) and starting time (secondary key).
    # (Meaning we have to sort by time first, and then the day)

    # sched  setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [sched  getrect 0 0 end end]]]
    # schedr setrect 0 0 [lsort -decreasing -index 0 [lsort -index 1 [schedr getrect 0 0 end end]]]

    return
}

proc dump_main {} {
    global pfx
    log::log info "Writing talk information /CSV"

    set f [open ${pfx}.main.csv w]
    csv::writematrix dmain $f
    close $f

    log::log info "Writing talk information /TXT"

    # Compute width of report and squeeze the title column to fit
    # below 80 char/line

    # Day Time/Start Time/End Track Tower Room Speaker Title

    set total 0
    incr total [dmain columnwidth 0]
    incr total [dmain columnwidth 1]
    incr total [dmain columnwidth 2]
    incr total [dmain columnwidth 3]
    incr total [dmain columnwidth 4]
    incr total [dmain columnwidth 5]
    incr total [dmain columnwidth 6]

    #log::log info Total=$total

    if {$total < 80} {
	set total [expr {80 - $total}]
	set titles [dmain getcolumn 7]
	set res [list]
	foreach t $titles {
	    lappend res [textutil::adjust $t -length $total]
	}
	dmain setcolumn 7 $res
    }

    ::report::report r [dmainr columns] style captionedtable 1
    set f [open ${pfx}.main.txt w]
    r printmatrix2channel dmainr $f
    close $f
    r destroy

    # Now the HTML report, use 'dmain' as base, actually formatting
    # into lines is done by the browser.

    log::log info "Writing talk information /HTML"

    ::report::report r [dmain columns] style html

    set f [open ${pfx}.main.html w]
    puts $f "<html><head><title>Talk information and schedule</title></head><body>"
    puts $f "<h1>Talk information and schedule</h1>"
    puts $f "<p><table border=1>"
    r printmatrix2channel dmain $f
    puts $f "</table></p></body></html>"
    close $f
    r destroy
}

proc dump_schedule {} {
    global pfx
    log::log info "Writing track schedule /CSV"

    set f [open ${pfx}.sched.csv w]
    csv::writematrix sched $f
    close $f

    log::log info "Writing track schedule /TXT"

    ::report::report r [schedr columns] style captionedtable 1
    r datasep set [r top get]
    r datasep enable

    set f [open ${pfx}.sched.txt w]
    r printmatrix2channel schedr $f
    close $f
    r destroy

    # Now the HTML report, use 'sched' as base, actually formatting
    # into lines is done by the browser.

    log::log info "Writing track schedule /HTML"

    ::report::report r [sched columns] style html

    set f [open ${pfx}.sched.html w]
    puts $f "<html><head><title>Track schedules</title></head><body>"
    puts $f "<h1>Track schedules</h1>"
    puts $f "<p><table border=1>"
    r printmatrix2channel sched $f
    puts $f "</table></p></body></html>"
    close $f
    r destroy
}

proc postscript {} {
    global pfx
    # Transforms texts into printable postscript, using a2ps (if available)

    catch {exec a2ps -o ${pfx}.main.ps  -1 -B -r -f7 ${pfx}.main.txt}
    catch {exec a2ps -o ${pfx}.sched.ps -1 -B -r -f4 ${pfx}.sched.txt}
    return
}

proc wrap {text len} {
    # @@author Jeffrey Hobbs <jeff at hobbs org>
    #
    # @@c Wraps the given <a text> into multiple lines not
    # @@c exceeding <a len> characters each. Lines shorter
    # @@c than <a len> characters might get filled up.
    #
    # @@a text: The string to operate on.
    # @@a len: The maximum allowed length of a single line.
    #
    # @@r Basically <a text>, but with changed newlines to
    # @@r restrict the length of individual lines to at most
    # @@r <a len> characters.

    # @@n This procedure is not checked by the testsuite.

    # @@i wrap, word wrap

    # Convert all newlines into spaces and initialize the result
    # see ::pool::string::oneLine too.

    regsub -all "\n" $text { } text
    incr len -1

    set out {}

    # As long as the string is longer than the intended length of
    # lines in the result:

    while {[string len $text] > $len} {
	# - Find position of last space in the part of the text
	#   which could a line in the result.

	# - We jump out of the loop if there is none and the whole
	#   text does not contain spaces anymore. In the latter case
	#   the rest of the text is one word longer than an intended
	#   line, we cannot avoid the longer line.

	set i [string last { } [string range $text 0 $len]]

	if {$i == -1 && [set i [string first { } $text]] == -1} {
	    break
	}

	# Get the just fitting part of the text, remove any heading
	# and trailing spaces, then append it to the result string,
	# don't close it with a newline!

	append out [string trim [string range $text 0 [incr i -1]]]\n

	# Shorten the text by the length of the processed part and
	# the space used to split it, then iterate.

	set text [string range $text [incr i 2] end]
    }

    return $out$text
}

# -------------------------------------------
# Define the required reports styles

::report::defstyle simpletable {} {
    data   set [split "[string repeat "| "   [columns]]|"]
    top    set [split "[string repeat "+ - " [columns]]+"]
    bottom set [top get]
    top	   enable
    bottom enable
}
::report::defstyle captionedtable {{n 1}} {
    simpletable
    topdata   set [data get]
    topcapsep set [top  get]
    topcapsep enable
    tcaption $n
}
::report::defstyle html {} {
    set c  [columns]
    set cl $c ; incr cl -1
    data set "<tr> [split [string repeat " " $cl] ""] </tr>"
    for {set col 0} {$col < $c} {incr col} {
	pad $col left  "<td>"
	pad $col right "</td>"
    }
    return
}

# -------------------------------------------

main
exit
@


1.2
log
@Unified the startup header of all applications, using
suggestions made by Stuart Cassof <stwo@@telus.net>.
@
text
@d406 1
a406 1
    # @@author Jeffrey Hobbs <jeff.hobbs@@acm.org>
@


1.1
log
@2001-05-01  Andreas Kupries <andreas_kupries@@users.sourceforge.net>

	* Makefile.in (MODULES):  Added module 'report'.

	* all.tcl: Added code to propagate "::tcltest::testDirectory" into
	  the slave actually doing the tests. This tripped some of the
	  tests for the new CSV module as they use some external files and
	  were thus unable to find them correctly without this setting.

	* Makefile.in (MODULES): Added module 'csv'.

	* Added directory 'examples' for future sample applications of
	  tcllib and some example applications too.

	* Added "matrix" data structure to module "struct".
@
text
@d1 4
a4 3
#!/bin/sh
# use -*- tcl -*- \
exec tclsh "$0" "$@@"
@


1.1.6.1
log
@Downgraded to version 1.3.6, removed -decode extension from
this branch.

Import of ftpd bugfix by Gerald Lester.

Last commit was a bad update, caused duplicates of changes
to appear. Failed testsuite. Removed all the duplicates now.

Fixed SF Tcllib Bug 954328. Mime now adapts at runtime to
whatever version of md5 has been loaded.

Updated test for rewritten adjust which fixed the infinite
looping demonstrated by tests 2.6 and 2.7. Also fixed a var
usage typo which caused a copy of the input to appear in the
output, before the expected formatted result.

Fixed bug in the processing of multi-word section titles for
text based formats.

Fixed bug 951568, regarding the usage of Trf's generic
transform.

Fixed problems with jpeg recognition (was unable to detect a
jpeg file, if it contained exif data).

Changelog for last patch, and updates in related package.

Completed application of code for various fixes.

Rewritten text adjustment and hyphenation, fixing SF TCllib
Bug 882402.

Fixed SF Tcllib Bug 936064, and evals more robust.

Fixed SF Tcllib Bug 893516

Fixed SF Tcllib Patch 763712

Fixed SF Tcllib Patch 758742

Fixed SF Tcllib Bug 620852

Eval usage made more robust and similar.

Fixed SF Tcllib Bug 943146.

Fixed SF Tcllib Bug 940651

SF Tcllib Bug 784519 fixed.

Pat: sak.tcl update for better use of critcl.

Joe: Fix in doctools xml support.

Import bugfix by Pat Thoyts, Handling of data starting with
hyphen/dash

Import of uuencode changes by Jeff Hobbs.

Changed defaults for package 'log'. No output for the all
levels below 'error'.

Unified the startup header of all applications, using
suggestions made by Stuart Cassof <stwo@@telus.net>.

Added testcase for Tcllib SF Bug 860753. The bug itself was
already fixed for Tcllib 1.6.

Fix for bug 899204. Test data file is opened read-only, and
tests made independent of each other.

Bugfix 899152, 899209. Require Tcl 8.2 for installer, delete
file before writing over it.

Import of time fix by Pat Thoyts, patch #905132.

Cleanup fix: Snit depends on Tcl 8.4, this is documented,
however neither package index, nor testsuite enforced the
restriction, allowing for errors. This has been changed now.

Fixed typos
@
text
@d1 3
a3 4
#! /bin/sh
# -*- tcl -*- \
exec tclsh "$0" ${1+"$@@"}

@


