metanohi/site/projects/totxt/totxt.js

726 lines
23 KiB
JavaScript
Raw Normal View History

/*
totxt: A program that transforms HTML elements to plain text
Copyright (C) 2009 Niels Serup
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
Version: 0.9
2011-08-12 19:20:40 +02:00
You can contact Niels at <ns@metanohi.name>.
2012-07-31 23:43:07 +02:00
Latest version is always available at <http://metanohi.name/projects/totxt/>,
along with documentation.
*/
/* To avoid overuse of global variables, all functions and actions have been
stuffed into the 'load_totxt_generator' function. */
function load_totxt_generator(action, var_a) {
/* Normally, this function will load a bunch of functions and then generate
PRE elements. This isn't the case if a special action is defined. */
function count(haystack, needle) {
// Counts needles in haystack
if (needle == undefined)
var needle = '\n'
var c, offset
offset = -1
c = 0
while (offset) {
offset = haystack.indexOf(needle, offset) + 1
c++
}
return c
}
if (action == 'show_or_hide') {
/* Used by dynamic boxes that utilize sliding functionality. To open an
article box, it is sufficient to click on it, but to close one, one
must doubleclick on it. This is partly why there are some
differences in what type of state the box is in. */
var obj = document.totxt_preElements[var_a]
if (obj.state) {
// If open, COLLAPSE
obj.state = 0
if (obj.quickdyn) {
// Don't slide
obj.innerHTML = obj.header
obj.className = obj.className.replace(obj.dynclass + '1', obj.dynclass + '0')
obj.onclick = function(){load_totxt_generator('show_or_hide', this.number)}
obj.ondblclick = undefined
}
else {
// Do slide
obj.onclick = undefined
obj.ondblclick = undefined
obj.slide_expand = 0
obj.line = undefined
load_totxt_generator('slide', var_a)
}
}
else {
// If closed, EXPAND
obj.state = 1
obj.className = obj.className.replace(obj.dynclass + '0', obj.dynclass + '1')
if (obj.quickdyn) {
// Don't slide
obj.innerHTML = obj.fulltext
obj.onclick = undefined
obj.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)}
}
else {
// Do slide
obj.onclick = undefined
obj.ondblclick = undefined
obj.slide_expand = 1
obj.line = undefined
load_totxt_generator('slide', var_a)
}
}
return true
}
else if (action == 'slide') {
/* Using the setTimeout function, this action is used by 'show_or_hide'
actions to either expand or collapse a box slidingly. */
var obj, hh, th, obdl
obj = document.totxt_preElements[var_a]
hh = obj.headerheight
th = obj.fulltextheight
if (obj.line == undefined) {
if (obj.slide_expand)
obj.line = hh
else
obj.line = th
}
obj.innerHTML = obj.fulltext.split('\n', obj.line).join('\n')
// Add lines when expanding, remove lines when collapsing
if (obj.slide_expand) {
obdl = obj.line
if (obdl == th)
obdl++
else {
obdl = obj.line + obj.dynlines
if (obdl > th)
obdl = th
}
obj.line = obdl
}
else {
obdl = obj.line
if (obdl == hh)
obdl--
else {
obdl = obj.line - obj.dynlines
if (obdl < hh)
obdl = hh
}
obj.line = obdl
}
if (obj.line >= hh && obj.line <= th)
setTimeout('load_totxt_generator(\'slide\', ' + var_a + ')', obj.dynspeed)
else {
// End 'loop'
if (obj.slide_expand) {
obj.onclick = undefined
obj.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)}
}
else {
obj.onclick = function(){load_totxt_generator('show_or_hide', this.number)}
obj.ondblclick = undefined
obj.className = obj.className.replace(obj.dynclass + '1', obj.dynclass + '0')
}
// Not necessary. Has no noticable effect.
delete document.slide_object
delete obj.slide_expand
delete obj.line
}
return true
}
/*
No more special actions exist. If none of the two above actions were
specified, code execution will begin here.
*/
// Various functions
function isin(ref, str, end, defau) {
// Checks for string str in array ref. If end is true, look for match in
// beginning of str and return the end of str
var x, c
for (x in ref) {
c = ref[x]
if (end) {
if (c.indexOf(str) == 0)
return c.substr(str.length)
}
else if (c == str)
return true
}
if (defau)
return defau
else
return false
}
function ctimes(cha, times) {
// <character> * times (a, 3 = aaa)
var t = ''
for (var i = 0; i < times; i++) {
t += cha
}
return t
}
function transform_text(str, w, align, del_xws) {
/* This function transforms text seen in ordinary HTML tags into text
that fits simple monospace character boxes. */
// Prepare text
str = str.replace(new RegExp('^\\s+'), '').replace(new RegExp('\\s+$'), '')
str = str.replace(/<br \/>/gi, '<br>').replace(/<br>\n/gi, '<br>')
//str = str.replace(/&nbsp;/g, ' ')
str = str.replace(/\n/g, ' ')
if (del_xws) {
// Delete excessive whitespace
str = str.replace(new RegExp('\\s+', 'g'), ' ')
}
str = str.replace(/<br>/gi, '\n')
if (del_xws) {
// Delete excessive whitespace caused by line break transformation
str = str.replace(/\n /g, '\n')
}
var tags, tlen, intag, start, i, c, x, sstr, ntxt, ctxt, txt, cw, clen
var entities, inentity, filter, tminus, l, spaces
// Store and remove HTML tags in text (will be readded in text later)
// Also look for HTML entities, such as &lt; (<)
tagent = [] // To store HTML tags and HTML entities
tlen = str.length
start = 0
intag = false
inentity = false
filter = ' <>!"\'\\/*-+,.:;'
ntxt = ''
for (i = 0; i < tlen; i++) {
c = str.substr(i, 1)
if (c == '<' && !intag) {
intag = true
start = i
}
else if (c == '>' && intag) {
intag = false
tagent[tagent.length] = ['tag', start, str.substr(start, i - start + 1)]
}
else if (!intag) {
if (c == '&' && !inentity) {
if (filter.indexOf(str.substr(i+1, 1)) == -1) {
inentity = true
start = i
}
ntxt += c
}
else if (c == ';' && inentity) {
inentity = false
tagent[tagent.length] = ['ent', start, str.substr(start, i - start + 1)]
}
else if (!inentity)
ntxt += c
}
}
str = ntxt
// Change line width of text. When inserting a hyphen and a newline, check
// if positions of HTML tags and HTML entities need to be changed.
sstr = str.split('\n')
ntxt = ''
for (ctxt in sstr) {
txt = sstr[ctxt]
while (txt) {
if (txt.length <= w) {
ntxt += txt
break
}
if (txt.substr(0, w).indexOf(' ') == -1) {
ntxt += txt.substr(0, w - 1) + '-\n'
txt = txt.substr(w - 1)
clen = ntxt.length
tminus = 0
for (x in tagent) {
if (tagent[x][1] - tminus > clen)
tagent[x][1] += 2
l = tagent[x][2].length
if (tagent[x][0] == 'ent')
l--
tminus += l
}
continue
}
cw = w
while (txt.substr(cw, 1) != ' ')
cw -= 1
ntxt += txt.substr(0, cw) + '\n'
txt = txt.substr(cw + 1)
// No need to readjust positions of HTML stuff here, as the
// new newline replaces a space character
}
ntxt += '\n'
}
while (ntxt.substr(ntxt.length - 1) == '\n')
ntxt = ntxt.substr(0, ntxt.length - 1)
str = ntxt
// Readd removed HTML tags and HTML entities
for (x in tagent) {
c = tagent[x]
if (c[0] == 'tag')
str = str.substr(0, c[1]) + c[2] + str.substr(c[1])
else if (c[0] == 'ent')
str = str.substr(0, c[1]) + c[2] + str.substr(c[1]+1)
}
// Align (0 = left, 1 = center, 2 = right)
if (align != 0 && align != 1 && align != 2)
align = 0
sstr = str.split('\n')
ntxt = ''
for (ctxt in sstr) {
txt = sstr[ctxt]
tlen = txt.length
clen = 0
intag = false
inentity = false
for (i = 0; i < tlen; i++) {
c = txt.substr(i, 1)
if (c == '<' && !intag)
intag = true
else if (c == '>' && intag)
intag = false
else if (c == '&' && !inentity) {
if (filter.indexOf(txt.substr(i+1, 1)) == -1)
inentity = true
clen++
}
else if (c == ';' && inentity)
inentity = false
else if (!intag && !inentity)
clen++
}
spaces = w - clen
ntxt += '\n'
if (align == 0) // Left
ntxt += txt + ctimes(' ', spaces)
else if (align == 2) // Right
ntxt += ctimes(' ', spaces) + txt
else if (align == 1) // Center
ntxt += ctimes(' ', Math.floor(spaces / 2)) + txt + ctimes(' ', Math.ceil(spaces / 2))
}
ntxt = ntxt.substr(1)
if (ntxt)
str = ntxt
return str
}
function add_borders(txt, border, padding) {
// Adds borders to boxes
if (border == undefined)
border = '|'
if (padding == undefined)
padding = ' '
var s, x, c, n
s = txt.split('\n')
n = ''
for (x in s) {
c = s[x]
n += '\n' + border + padding + c + padding + border
}
return n.substr(1)
}
function transform_text_in_list(arr, w, del_xws, borders, padding, align) {
// Transform, then join
var x, c, f, t
f = ''
for (x in arr) {
c = arr[x]
if (align == undefined)
align = 0 // Left
t = transform_text(c, w, align, del_xws)
if (borders)
t = add_borders(t, borders, padding)
f += '\n' + t
}
return f.substr(1)
}
// No more functions
// Begin load + search + generate
// Local variables
var h1s, x, z, y, ok, cur, classes, par, chs, cur_found, ccur,
header, subheaders, texts, elems, subheader, content,
stdclass, dynclass, widclass, xwsclass, quiclass, speclass, linclass,
stdwidth, width, stddynamic, dynamic, isdynamic, delxws,
dynspeed, c_speed, dynlines, c_lines, quickdyn, c_quidyn, alwdelxws,
stdhalign, stdshalign, stdtalign, halclass, shalclass, talclass,
c_halign, c_shalign, c_talign
var h_elmnm, sh_elmnm, txt_elmnm
var tmps, text, preElements, elmminus, prelms, plen, celm
var /*poss_classes, n_classes,*/ pid, tmp
// Default classnames for recognition purposes.
// Can be changed using global variables.
stdclass = 'totxt'
try {
stdclass = totxt_CLASSNAME ? totxt_CLASSNAME : stdclass
}
catch(e) {}
dynclass = 'dynamic'
try {
dynclass = totxt_DYNAMIC_CLASSNAME ? totxt_DYNAMIC_CLASSNAME : dynclass
}
catch(e) {}
widclass = 'width'
try {
widclass = totxt_WIDTH_CLASSNAME ? totxt_WIDTH_CLASSNAME : widclass
}
catch(e) {}
xwsclass = 'del_excess_whitespace'
try {
xwsclass = totxt_DELXWS_CLASSNAME ? totxt_DELXWS_CLASSNAME : xwsclass
}
catch(e) {}
speclass = 'speed'
try {
speclass = totxt_DYNSPEED_CLASSNAME ? totxt_DYNSPEED_CLASSNAME : speclass
}
catch(e) {}
linclass = 'lines'
try {
linclass = totxt_DYNLINES_CLASSNAME ? totxt_DYNLINES_CLASSNAME : linclass
}
catch(e) {}
quiclass = 'quick_dynamic'
try {
quiclass = totxt_QUICK_CLASSNAME ? totxt_QUICK_CLASSNAME : quiclass
}
catch(e) {}
halclass = 'halign'
try {
halclass = totxt_HALIGN_CLASSNAME ? totxt_HALIGN_CLASSNAME : halclass
}
catch(e) {}
shalclass = 'shalign'
try {
shalclass = totxt_SHALIGN_CLASSNAME ? totxt_SHALIGN_CLASSNAME : shalclass
}
catch(e) {}
talclass = 'talign'
try {
talclass = totxt_TALIGN_CLASSNAME ? totxt_TALIGN_CLASSNAME : talclass
}
catch(e) {}
h_elmnm = 'H1'
try {
h_elmnm = totxt_HEADER_ELEMENT ? totxt_HEADER_ELEMENT.toUpperCase() : h_elmnm
}
catch(e) {}
sh_elmnm = 'H2'
try {
sh_elmnm = totxt_SUBHEADER_ELEMENT ? totxt_SUBHEADER_ELEMENT.toUpperCase() : sh_elmnm
}
catch(e) {}
txt_elmnm = 'P'
try {
txt_elmnm = totxt_TEXT_ELEMENT ? totxt_TEXT_ELEMENT.toUpperCase() : txt_elmnm
}
catch(e) {}
h1s = document.getElementsByTagName(h_elmnm)
if (!h1s)
return false
// Default values
// Line width
stdwidth = 80
try {
stdwidth = totxt_WIDTH * 1 ? totxt_WIDTH : stdwidth
}
catch(e) {}
if (stdwidth < 10)
stdwidth = 10
// Should articles be allowed to collapse and expand with a mouseclick?
stddynamic = -1
try {
stddynamic = totxt_DYNAMIC
}
catch(e) {}
// Sliding speed
dynspeed = 50
try {
dynspeed = totxt_DYNAMIC_SPEED * 1
}
catch(e) {}
// Line(s) to (dis)appear per function call
dynlines = 1
try {
dynlines = totxt_DYNAMIC_LINES * 1 ? totxt_DYNAMIC_LINES * 1 : dynlines
}
catch(e) {}
// Don't slide?
quickdyn = false
try {
quickdyn = totxt_QUICK_DYNAMIC
}
catch(e) {}
// Delete excessive whitespace?
alwdelxws = false
try {
alwdelxws = totxt_DEL_XWS
}
catch(e) {}
// Header alignment
stdhalign = 1 // Center
try {
stdhalign = totxt_HEADER_ALIGN * 1
}
catch(e) {}
// Subheader alignment
stdshalign = 0 // Left
try {
stdshalign = totxt_SUBHEADER_ALIGN * 1
}
catch(e) {}
// Text alignment
stdtalign = 0 // Left
try {
stdtalign = totxt_TEXT_ALIGN * 1
}
catch(e) {}
// Checking for impossible alignments
if (stdhalign > 2) stdhalign = 2
else if (stdhalign < 0) stdhalign = 0
if (stdshalign > 2) stdshalign = 2
else if (stdshalign < 0) stdshalign = 0
if (stdtalign > 2) stdtalign = 2
else if (stdtalign < 0) stdtalign = 0
elems = [] // Elements that contain info to be used in final PRE elements.
prelms = [] // PRE elements
/*
// Special classes
poss_classes = [stdclass, dynclass, widclass, xwsclass, speclass, linclass, quiclass, halclass, shalclass, talclass]
*/
// document.totxt_preElements is the variable in which to store the PRE elements.
try {
/* document.totxt_preElements likely doesn't exist, which could result in an
error if not called inside a try statement. */
preElements = document.totxt_preElements
elmminus = preElements.length
}
catch(e) {
document.totxt_preElements = []
preElements = []
elmminus = 0
}
// Look up H1 elements
for (x in h1s) {
cur = h1s[x]
if (!cur || !cur.className)
continue
// Filters
classes = cur.className.split(' ')
if (!isin(classes, stdclass))
continue
par = cur.parentNode
if (!par)
continue
// Find H2 and P elements that are "below" the current H1 element
chs = par.childNodes
cur_found = 0
header = cur.innerHTML
subheaders = []
texts = []
elems[elems.length] = cur
for (z in chs) {
ccur = chs[z]
if (ccur == cur) {
cur_found = 1
continue
}
if (!cur_found || !ccur.tagName)
continue
if (ccur.tagName == sh_elmnm)
subheaders[subheaders.length] = ccur.innerHTML
else if (ccur.tagName == txt_elmnm)
texts[texts.length] = ccur.innerHTML
else
break
// If either H2 or P element, add object to elems var
elems[elems.length] = ccur
}
width = isin(classes, widclass, 1, stdwidth) * 1
if (width < 10)
width = stdwidth
dynamic = isin(classes, dynclass, 1, stddynamic) * 1
isdynamic = dynamic == 0 || dynamic == 1
delxws = isin(classes, xwsclass, 0, alwdelxws)
c_halign = isin(classes, halclass, 1, stdhalign) * 1
c_shalign = isin(classes, shalclass, 1, stdshalign) * 1
c_talign = isin(classes, talclass, 1, stdtalign) * 1
tmps = [' ' + ctimes('-', width - 2),
'|' + ctimes('_', width - 2) + '|',
'| ' + ctimes('~', width - 4) + ' |\n']
// Now generate the content
header = add_borders(transform_text(header, width - 4, c_halign, delxws), '|')
subheader = subheaders ? transform_text_in_list(subheaders, width - 4, delxws, '|', ' ', c_shalign) : ''
content = texts ? transform_text_in_list(texts, width - 4, delxws, '|', ' ', c_talign) : ''
header = tmps[0] + '\n' + header + '\n' + tmps[0]
text = header + '\n'
if (subheader)
text += subheader + '\n' + tmps[2]
if (content)
text += content + '\n'
text += tmps[1]
/*
// Check for special classes that have nothing to do with settings
n_classes = []
for (z in classes) {
for (y in poss_classes) {
ok = true
if (!classes[z].indexOf(poss_classes[y])) {
ok = false
break
}
}
if (ok)
n_classes[n_classes.length] = classes[z]
}
n_classes = n_classes.join(' ')
*/
// Find eventual id
pid = cur.id
delete cur.id // To avoid two elements with alike ids
plen = prelms.length
prelms[plen] = document.createElement('pre')
celm = prelms[plen]
celm.number = plen + elmminus // preElements may not be empty
celm.className = cur.className
/*
celm.className = stdclass
if (n_classes)
celm.className += ' ' + n_classes
*/
if (pid)
celm.id = pid
// Find and apply eventual styles
for (z in cur.style) {
c = cur.style[z]
if (c) {
try {
if (c.indexOf(':') != -1)
tmp = c.split(':')
celm.style.setProperty(tmp[0], tmp[1].replace(';', ''), '')
}
catch(e){}
}
}
celm.style.display = 'none'
celm.innerHTML = text
if (isdynamic) {
c_speed = isin(classes, speclass, 1, dynspeed) * 1
c_lines = isin(classes, linclass, 1, dynlines) * 1
if (!c_lines) c_lines = dynlines
c_quidyn = isin(classes, quiclass, 0, quickdyn)
celm.fulltext = text
celm.header = header
celm.headerheight = count(header)
celm.fulltextheight = count(text)
celm.state = dynamic
if (!isin(classes, dynclass))
celm.className += ' ' + dynclass
if (!isin(classes, dynclass + dynamic))
celm.className += ' ' + dynclass + dynamic
/*
if (c_quidyn)
celm.className += ' ' + quiclass
*/
celm.dynclass = dynclass
celm.dynspeed = c_speed
celm.dynlines = c_lines
celm.quickdyn = c_quidyn
if (dynamic == 0)
celm.onclick = function(){load_totxt_generator('show_or_hide', this.number)}
else
celm.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)}
if (!dynamic)
celm.innerHTML = header
}
par.insertBefore(prelms[plen], cur)
}
// Remove used elements
for (x in elems) {
elems[x].parentNode.removeChild(elems[x])
}
// Show new elements
for (x in prelms) {
prelms[x].style.display = 'block'
}
for (x in prelms) {
preElements[preElements.length] = prelms[x]
}
document.totxt_preElements = preElements
return true
}