diff options
| author | Simon Tatham <anakin@pobox.com> | 2001-09-23 16:37:36 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 2001-09-23 16:37:36 +0000 |
| commit | a8cf7617bb5180d5535021a31f68c558473443f0 (patch) | |
| tree | 25719bb625ee41ee273800e258e33ef47760acee /misc.c | |
| parent | 986da4bb991e4b4c45fcfd88113095ee21156ef1 (diff) | |
| download | halibut-a8cf7617bb5180d5535021a31f68c558473443f0.zip halibut-a8cf7617bb5180d5535021a31f68c558473443f0.tar.gz halibut-a8cf7617bb5180d5535021a31f68c558473443f0.tar.bz2 halibut-a8cf7617bb5180d5535021a31f68c558473443f0.tar.xz | |
Replace the naive greedy paragraph wrapping algorithm with the shiny
optimal dynamic one used by the likes of TeX. Woo.
[originally from svn r1288]
Diffstat (limited to 'misc.c')
| -rw-r--r-- | misc.c | 178 |
1 files changed, 133 insertions, 45 deletions
@@ -155,65 +155,153 @@ void mark_attr_ends(paragraph *sourceform) { wrappedline *wrap_para(word *text, int width, int subsequentwidth, int (*widthfn)(word *)) { wrappedline *head = NULL, **ptr = &head; - word *spc; - int nspc; - int thiswidth, lastgood; + int nwords, wordsize; + struct wrapword { + word *begin, *end; + int width; + int spacewidth; + int cost; + int nwords; + } *wrapwords; + int i, j, n; + /* + * Break the line up into wrappable components. + */ + nwords = wordsize = 0; + wrapwords = NULL; while (text) { - wrappedline *w = mknew(wrappedline); - *ptr = w; - ptr = &w->next; - w->next = NULL; - - w->begin = text; - spc = NULL; - nspc = 0; - thiswidth = lastgood = 0; + if (nwords >= wordsize) { + wordsize = nwords + 64; + wrapwords = srealloc(wrapwords, wordsize * sizeof(*wrapwords)); + } + wrapwords[nwords].width = 0; + wrapwords[nwords].begin = text; while (text) { - thiswidth += widthfn(text); + wrapwords[nwords].width += widthfn(text); + wrapwords[nwords].end = text->next; if (text->next && (text->next->type == word_WhiteSpace || text->next->type == word_EmphSpace || - text->breaks)) { - if (thiswidth > width) + text->breaks)) + break; + text = text->next; + } + if (text && text->next && (text->next->type == word_WhiteSpace || + text->next->type == word_EmphSpace)) { + wrapwords[nwords].spacewidth = widthfn(text->next); + text = text->next; + } else { + wrapwords[nwords].spacewidth = 0; + } + nwords++; + if (text) + text = text->next; + } + + /* + * Perform the dynamic wrapping algorithm: work backwards from + * nwords-1, determining the optimal wrapping for each terminal + * subsequence of the paragraph. + */ + for (i = nwords; i-- ;) { + int best = -1; + int bestcost = 0; + int cost; + int linelen = 0, spacewidth = 0; + int seenspace; + int thiswidth = (i == 0 ? width : subsequentwidth); + + j = 0; + seenspace = 0; + while (i+j < nwords) { + /* + * See what happens if we put j+1 words on this line. + */ + if (spacewidth) + seenspace = 1; + linelen += spacewidth + wrapwords[i+j].width; + spacewidth = wrapwords[i+j].spacewidth; + j++; + if (linelen > thiswidth) { + /* + * If we're over the width limit, abandon ship, + * _unless_ there is no best-effort yet (which will + * only happen if the first word is too long all by + * itself). + */ + if (best > 0) break; - spc = text->next; - lastgood = thiswidth; - nspc++; } - text = text->next; + if (i+j == nwords) { + /* + * Special case: if we're at the very end of the + * paragraph, we don't score penalty points for the + * white space left on the line, unless the line + * contains no spaces (widow penalty). The widow + * penalty is the square of 1/4 the line width. + */ + if (seenspace) + cost = 0; + else + cost = (thiswidth/4)*(thiswidth/4); + } else { + cost = (thiswidth-linelen) * (thiswidth-linelen); + cost += wrapwords[i+j].cost; + } + /* + * We compare bestcost >= cost, not bestcost > cost, + * because in cases where the costs are identical we + * want to try to look like the greedy algorithm, + * because readers are likely to have spent a lot of + * time looking at greedy-wrapped paragraphs and + * there's no point violating the Principle of Least + * Surprise if it doesn't actually gain anything. + */ + if (best < 0 || bestcost >= cost) { + bestcost = cost; + best = j; + } } /* - * We've exited this loop on one of three conditions. spc - * might be non-NULL and we've overflowed: we have broken - * the paragraph there. spc might be NULL and text might be - * NULL too: we've reached the end of the paragraph and - * should output the last line. Or text might be non-NULL - * and spc might be NULL: we've got an anomalously long - * line with no spaces we could have broken at. Output it - * anyway as the best we can do. + * Now we know the optimal answer for this terminal + * subsequence, so put it in wrapwords. */ - if (spc && thiswidth > width) { - w->end = text = spc; - w->nspaces = nspc-1; - w->shortfall = width - lastgood; - } else if (!text) { - w->end = NULL; /* no end marker needed */ - w->nspaces = nspc; - w->shortfall = width - thiswidth; - } else { - w->end = text; - w->nspaces = 0; - w->shortfall = width - thiswidth; - } + wrapwords[i].cost = bestcost; + wrapwords[i].nwords = best; + } + + /* + * We've wrapped the paragraph. Now build the output + * `wrappedline' list. + */ + i = 0; + while (i < nwords) { + wrappedline *w = mknew(wrappedline); + *ptr = w; + ptr = &w->next; + w->next = NULL; + + n = wrapwords[i].nwords; + w->begin = wrapwords[i].begin; + w->end = wrapwords[i+n-1].end; + /* - * Skip the space if we're on one. + * Count along the words to find nspaces and shortfall. */ - if (text && (text->type == word_WhiteSpace || - text->type == word_EmphSpace)) - text = text->next; - width = subsequentwidth; + w->nspaces = 0; + w->shortfall = width; + for (j = 0; j < n; j++) { + w->shortfall -= wrapwords[i+j].width; + if (j < n-1 && wrapwords[i+j].spacewidth) { + w->nspaces++; + w->shortfall -= wrapwords[i+j].spacewidth; + } + } + i += n; } + sfree(wrapwords); + return head; } |