/* totxt: A program that transforms HTML elements to plain text Copyright (C) 2009 Niels Serup This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* Version: 0.9 You can contact Niels at . Latest version is always available at , along with documentation. */ /* To avoid overuse of global variables, all functions and actions have been stuffed into the 'load_totxt_generator' function. */ function load_totxt_generator(action, var_a) { /* Normally, this function will load a bunch of functions and then generate PRE elements. This isn't the case if a special action is defined. */ function count(haystack, needle) { // Counts needles in haystack if (needle == undefined) var needle = '\n' var c, offset offset = -1 c = 0 while (offset) { offset = haystack.indexOf(needle, offset) + 1 c++ } return c } if (action == 'show_or_hide') { /* Used by dynamic boxes that utilize sliding functionality. To open an article box, it is sufficient to click on it, but to close one, one must doubleclick on it. This is partly why there are some differences in what type of state the box is in. */ var obj = document.totxt_preElements[var_a] if (obj.state) { // If open, COLLAPSE obj.state = 0 if (obj.quickdyn) { // Don't slide obj.innerHTML = obj.header obj.className = obj.className.replace(obj.dynclass + '1', obj.dynclass + '0') obj.onclick = function(){load_totxt_generator('show_or_hide', this.number)} obj.ondblclick = undefined } else { // Do slide obj.onclick = undefined obj.ondblclick = undefined obj.slide_expand = 0 obj.line = undefined load_totxt_generator('slide', var_a) } } else { // If closed, EXPAND obj.state = 1 obj.className = obj.className.replace(obj.dynclass + '0', obj.dynclass + '1') if (obj.quickdyn) { // Don't slide obj.innerHTML = obj.fulltext obj.onclick = undefined obj.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)} } else { // Do slide obj.onclick = undefined obj.ondblclick = undefined obj.slide_expand = 1 obj.line = undefined load_totxt_generator('slide', var_a) } } return true } else if (action == 'slide') { /* Using the setTimeout function, this action is used by 'show_or_hide' actions to either expand or collapse a box slidingly. */ var obj, hh, th, obdl obj = document.totxt_preElements[var_a] hh = obj.headerheight th = obj.fulltextheight if (obj.line == undefined) { if (obj.slide_expand) obj.line = hh else obj.line = th } obj.innerHTML = obj.fulltext.split('\n', obj.line).join('\n') // Add lines when expanding, remove lines when collapsing if (obj.slide_expand) { obdl = obj.line if (obdl == th) obdl++ else { obdl = obj.line + obj.dynlines if (obdl > th) obdl = th } obj.line = obdl } else { obdl = obj.line if (obdl == hh) obdl-- else { obdl = obj.line - obj.dynlines if (obdl < hh) obdl = hh } obj.line = obdl } if (obj.line >= hh && obj.line <= th) setTimeout('load_totxt_generator(\'slide\', ' + var_a + ')', obj.dynspeed) else { // End 'loop' if (obj.slide_expand) { obj.onclick = undefined obj.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)} } else { obj.onclick = function(){load_totxt_generator('show_or_hide', this.number)} obj.ondblclick = undefined obj.className = obj.className.replace(obj.dynclass + '1', obj.dynclass + '0') } // Not necessary. Has no noticable effect. delete document.slide_object delete obj.slide_expand delete obj.line } return true } /* No more special actions exist. If none of the two above actions were specified, code execution will begin here. */ // Various functions function isin(ref, str, end, defau) { // Checks for string str in array ref. If end is true, look for match in // beginning of str and return the end of str var x, c for (x in ref) { c = ref[x] if (end) { if (c.indexOf(str) == 0) return c.substr(str.length) } else if (c == str) return true } if (defau) return defau else return false } function ctimes(cha, times) { // * times (a, 3 = aaa) var t = '' for (var i = 0; i < times; i++) { t += cha } return t } function transform_text(str, w, align, del_xws) { /* This function transforms text seen in ordinary HTML tags into text that fits simple monospace character boxes. */ // Prepare text str = str.replace(new RegExp('^\\s+'), '').replace(new RegExp('\\s+$'), '') str = str.replace(/
/gi, '
').replace(/
\n/gi, '
') //str = str.replace(/ /g, ' ') str = str.replace(/\n/g, ' ') if (del_xws) { // Delete excessive whitespace str = str.replace(new RegExp('\\s+', 'g'), ' ') } str = str.replace(/
/gi, '\n') if (del_xws) { // Delete excessive whitespace caused by line break transformation str = str.replace(/\n /g, '\n') } var tags, tlen, intag, start, i, c, x, sstr, ntxt, ctxt, txt, cw, clen var entities, inentity, filter, tminus, l, spaces // Store and remove HTML tags in text (will be readded in text later) // Also look for HTML entities, such as < (<) tagent = [] // To store HTML tags and HTML entities tlen = str.length start = 0 intag = false inentity = false filter = ' <>!"\'\\/*-+,.:;' ntxt = '' for (i = 0; i < tlen; i++) { c = str.substr(i, 1) if (c == '<' && !intag) { intag = true start = i } else if (c == '>' && intag) { intag = false tagent[tagent.length] = ['tag', start, str.substr(start, i - start + 1)] } else if (!intag) { if (c == '&' && !inentity) { if (filter.indexOf(str.substr(i+1, 1)) == -1) { inentity = true start = i } ntxt += c } else if (c == ';' && inentity) { inentity = false tagent[tagent.length] = ['ent', start, str.substr(start, i - start + 1)] } else if (!inentity) ntxt += c } } str = ntxt // Change line width of text. When inserting a hyphen and a newline, check // if positions of HTML tags and HTML entities need to be changed. sstr = str.split('\n') ntxt = '' for (ctxt in sstr) { txt = sstr[ctxt] while (txt) { if (txt.length <= w) { ntxt += txt break } if (txt.substr(0, w).indexOf(' ') == -1) { ntxt += txt.substr(0, w - 1) + '-\n' txt = txt.substr(w - 1) clen = ntxt.length tminus = 0 for (x in tagent) { if (tagent[x][1] - tminus > clen) tagent[x][1] += 2 l = tagent[x][2].length if (tagent[x][0] == 'ent') l-- tminus += l } continue } cw = w while (txt.substr(cw, 1) != ' ') cw -= 1 ntxt += txt.substr(0, cw) + '\n' txt = txt.substr(cw + 1) // No need to readjust positions of HTML stuff here, as the // new newline replaces a space character } ntxt += '\n' } while (ntxt.substr(ntxt.length - 1) == '\n') ntxt = ntxt.substr(0, ntxt.length - 1) str = ntxt // Readd removed HTML tags and HTML entities for (x in tagent) { c = tagent[x] if (c[0] == 'tag') str = str.substr(0, c[1]) + c[2] + str.substr(c[1]) else if (c[0] == 'ent') str = str.substr(0, c[1]) + c[2] + str.substr(c[1]+1) } // Align (0 = left, 1 = center, 2 = right) if (align != 0 && align != 1 && align != 2) align = 0 sstr = str.split('\n') ntxt = '' for (ctxt in sstr) { txt = sstr[ctxt] tlen = txt.length clen = 0 intag = false inentity = false for (i = 0; i < tlen; i++) { c = txt.substr(i, 1) if (c == '<' && !intag) intag = true else if (c == '>' && intag) intag = false else if (c == '&' && !inentity) { if (filter.indexOf(txt.substr(i+1, 1)) == -1) inentity = true clen++ } else if (c == ';' && inentity) inentity = false else if (!intag && !inentity) clen++ } spaces = w - clen ntxt += '\n' if (align == 0) // Left ntxt += txt + ctimes(' ', spaces) else if (align == 2) // Right ntxt += ctimes(' ', spaces) + txt else if (align == 1) // Center ntxt += ctimes(' ', Math.floor(spaces / 2)) + txt + ctimes(' ', Math.ceil(spaces / 2)) } ntxt = ntxt.substr(1) if (ntxt) str = ntxt return str } function add_borders(txt, border, padding) { // Adds borders to boxes if (border == undefined) border = '|' if (padding == undefined) padding = ' ' var s, x, c, n s = txt.split('\n') n = '' for (x in s) { c = s[x] n += '\n' + border + padding + c + padding + border } return n.substr(1) } function transform_text_in_list(arr, w, del_xws, borders, padding, align) { // Transform, then join var x, c, f, t f = '' for (x in arr) { c = arr[x] if (align == undefined) align = 0 // Left t = transform_text(c, w, align, del_xws) if (borders) t = add_borders(t, borders, padding) f += '\n' + t } return f.substr(1) } // No more functions // Begin load + search + generate // Local variables var h1s, x, z, y, ok, cur, classes, par, chs, cur_found, ccur, header, subheaders, texts, elems, subheader, content, stdclass, dynclass, widclass, xwsclass, quiclass, speclass, linclass, stdwidth, width, stddynamic, dynamic, isdynamic, delxws, dynspeed, c_speed, dynlines, c_lines, quickdyn, c_quidyn, alwdelxws, stdhalign, stdshalign, stdtalign, halclass, shalclass, talclass, c_halign, c_shalign, c_talign var h_elmnm, sh_elmnm, txt_elmnm var tmps, text, preElements, elmminus, prelms, plen, celm var /*poss_classes, n_classes,*/ pid, tmp // Default classnames for recognition purposes. // Can be changed using global variables. stdclass = 'totxt' try { stdclass = totxt_CLASSNAME ? totxt_CLASSNAME : stdclass } catch(e) {} dynclass = 'dynamic' try { dynclass = totxt_DYNAMIC_CLASSNAME ? totxt_DYNAMIC_CLASSNAME : dynclass } catch(e) {} widclass = 'width' try { widclass = totxt_WIDTH_CLASSNAME ? totxt_WIDTH_CLASSNAME : widclass } catch(e) {} xwsclass = 'del_excess_whitespace' try { xwsclass = totxt_DELXWS_CLASSNAME ? totxt_DELXWS_CLASSNAME : xwsclass } catch(e) {} speclass = 'speed' try { speclass = totxt_DYNSPEED_CLASSNAME ? totxt_DYNSPEED_CLASSNAME : speclass } catch(e) {} linclass = 'lines' try { linclass = totxt_DYNLINES_CLASSNAME ? totxt_DYNLINES_CLASSNAME : linclass } catch(e) {} quiclass = 'quick_dynamic' try { quiclass = totxt_QUICK_CLASSNAME ? totxt_QUICK_CLASSNAME : quiclass } catch(e) {} halclass = 'halign' try { halclass = totxt_HALIGN_CLASSNAME ? totxt_HALIGN_CLASSNAME : halclass } catch(e) {} shalclass = 'shalign' try { shalclass = totxt_SHALIGN_CLASSNAME ? totxt_SHALIGN_CLASSNAME : shalclass } catch(e) {} talclass = 'talign' try { talclass = totxt_TALIGN_CLASSNAME ? totxt_TALIGN_CLASSNAME : talclass } catch(e) {} h_elmnm = 'H1' try { h_elmnm = totxt_HEADER_ELEMENT ? totxt_HEADER_ELEMENT.toUpperCase() : h_elmnm } catch(e) {} sh_elmnm = 'H2' try { sh_elmnm = totxt_SUBHEADER_ELEMENT ? totxt_SUBHEADER_ELEMENT.toUpperCase() : sh_elmnm } catch(e) {} txt_elmnm = 'P' try { txt_elmnm = totxt_TEXT_ELEMENT ? totxt_TEXT_ELEMENT.toUpperCase() : txt_elmnm } catch(e) {} h1s = document.getElementsByTagName(h_elmnm) if (!h1s) return false // Default values // Line width stdwidth = 80 try { stdwidth = totxt_WIDTH * 1 ? totxt_WIDTH : stdwidth } catch(e) {} if (stdwidth < 10) stdwidth = 10 // Should articles be allowed to collapse and expand with a mouseclick? stddynamic = -1 try { stddynamic = totxt_DYNAMIC } catch(e) {} // Sliding speed dynspeed = 50 try { dynspeed = totxt_DYNAMIC_SPEED * 1 } catch(e) {} // Line(s) to (dis)appear per function call dynlines = 1 try { dynlines = totxt_DYNAMIC_LINES * 1 ? totxt_DYNAMIC_LINES * 1 : dynlines } catch(e) {} // Don't slide? quickdyn = false try { quickdyn = totxt_QUICK_DYNAMIC } catch(e) {} // Delete excessive whitespace? alwdelxws = false try { alwdelxws = totxt_DEL_XWS } catch(e) {} // Header alignment stdhalign = 1 // Center try { stdhalign = totxt_HEADER_ALIGN * 1 } catch(e) {} // Subheader alignment stdshalign = 0 // Left try { stdshalign = totxt_SUBHEADER_ALIGN * 1 } catch(e) {} // Text alignment stdtalign = 0 // Left try { stdtalign = totxt_TEXT_ALIGN * 1 } catch(e) {} // Checking for impossible alignments if (stdhalign > 2) stdhalign = 2 else if (stdhalign < 0) stdhalign = 0 if (stdshalign > 2) stdshalign = 2 else if (stdshalign < 0) stdshalign = 0 if (stdtalign > 2) stdtalign = 2 else if (stdtalign < 0) stdtalign = 0 elems = [] // Elements that contain info to be used in final PRE elements. prelms = [] // PRE elements /* // Special classes poss_classes = [stdclass, dynclass, widclass, xwsclass, speclass, linclass, quiclass, halclass, shalclass, talclass] */ // document.totxt_preElements is the variable in which to store the PRE elements. try { /* document.totxt_preElements likely doesn't exist, which could result in an error if not called inside a try statement. */ preElements = document.totxt_preElements elmminus = preElements.length } catch(e) { document.totxt_preElements = [] preElements = [] elmminus = 0 } // Look up H1 elements for (x in h1s) { cur = h1s[x] if (!cur || !cur.className) continue // Filters classes = cur.className.split(' ') if (!isin(classes, stdclass)) continue par = cur.parentNode if (!par) continue // Find H2 and P elements that are "below" the current H1 element chs = par.childNodes cur_found = 0 header = cur.innerHTML subheaders = [] texts = [] elems[elems.length] = cur for (z in chs) { ccur = chs[z] if (ccur == cur) { cur_found = 1 continue } if (!cur_found || !ccur.tagName) continue if (ccur.tagName == sh_elmnm) subheaders[subheaders.length] = ccur.innerHTML else if (ccur.tagName == txt_elmnm) texts[texts.length] = ccur.innerHTML else break // If either H2 or P element, add object to elems var elems[elems.length] = ccur } width = isin(classes, widclass, 1, stdwidth) * 1 if (width < 10) width = stdwidth dynamic = isin(classes, dynclass, 1, stddynamic) * 1 isdynamic = dynamic == 0 || dynamic == 1 delxws = isin(classes, xwsclass, 0, alwdelxws) c_halign = isin(classes, halclass, 1, stdhalign) * 1 c_shalign = isin(classes, shalclass, 1, stdshalign) * 1 c_talign = isin(classes, talclass, 1, stdtalign) * 1 tmps = [' ' + ctimes('-', width - 2), '|' + ctimes('_', width - 2) + '|', '| ' + ctimes('~', width - 4) + ' |\n'] // Now generate the content header = add_borders(transform_text(header, width - 4, c_halign, delxws), '|') subheader = subheaders ? transform_text_in_list(subheaders, width - 4, delxws, '|', ' ', c_shalign) : '' content = texts ? transform_text_in_list(texts, width - 4, delxws, '|', ' ', c_talign) : '' header = tmps[0] + '\n' + header + '\n' + tmps[0] text = header + '\n' if (subheader) text += subheader + '\n' + tmps[2] if (content) text += content + '\n' text += tmps[1] /* // Check for special classes that have nothing to do with settings n_classes = [] for (z in classes) { for (y in poss_classes) { ok = true if (!classes[z].indexOf(poss_classes[y])) { ok = false break } } if (ok) n_classes[n_classes.length] = classes[z] } n_classes = n_classes.join(' ') */ // Find eventual id pid = cur.id delete cur.id // To avoid two elements with alike ids plen = prelms.length prelms[plen] = document.createElement('pre') celm = prelms[plen] celm.number = plen + elmminus // preElements may not be empty celm.className = cur.className /* celm.className = stdclass if (n_classes) celm.className += ' ' + n_classes */ if (pid) celm.id = pid // Find and apply eventual styles for (z in cur.style) { c = cur.style[z] if (c) { try { if (c.indexOf(':') != -1) tmp = c.split(':') celm.style.setProperty(tmp[0], tmp[1].replace(';', ''), '') } catch(e){} } } celm.style.display = 'none' celm.innerHTML = text if (isdynamic) { c_speed = isin(classes, speclass, 1, dynspeed) * 1 c_lines = isin(classes, linclass, 1, dynlines) * 1 if (!c_lines) c_lines = dynlines c_quidyn = isin(classes, quiclass, 0, quickdyn) celm.fulltext = text celm.header = header celm.headerheight = count(header) celm.fulltextheight = count(text) celm.state = dynamic if (!isin(classes, dynclass)) celm.className += ' ' + dynclass if (!isin(classes, dynclass + dynamic)) celm.className += ' ' + dynclass + dynamic /* if (c_quidyn) celm.className += ' ' + quiclass */ celm.dynclass = dynclass celm.dynspeed = c_speed celm.dynlines = c_lines celm.quickdyn = c_quidyn if (dynamic == 0) celm.onclick = function(){load_totxt_generator('show_or_hide', this.number)} else celm.ondblclick = function(){load_totxt_generator('show_or_hide', this.number)} if (!dynamic) celm.innerHTML = header } par.insertBefore(prelms[plen], cur) } // Remove used elements for (x in elems) { elems[x].parentNode.removeChild(elems[x]) } // Show new elements for (x in prelms) { prelms[x].style.display = 'block' } for (x in prelms) { preElements[preElements.length] = prelms[x] } document.totxt_preElements = preElements return true }