From b280c3bf2ea4148d9f1eee8c4a00c0658964e5b3 Mon Sep 17 00:00:00 2001 From: Nikolay Sivov Date: Tue, 31 Jan 2017 14:43:11 +0300 Subject: [PATCH] dwrite: Improve line breaking logic. Signed-off-by: Nikolay Sivov Signed-off-by: Alexandre Julliard --- dlls/dwrite/analyzer.c | 103 +++++++++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 36 deletions(-) diff --git a/dlls/dwrite/analyzer.c b/dlls/dwrite/analyzer.c index 72ce8136da..14a93e89d8 100644 --- a/dlls/dwrite/analyzer.c +++ b/dlls/dwrite/analyzer.c @@ -363,20 +363,31 @@ enum linebreaking_classes { b_ZWJ, }; +static BOOL has_strong_condition(DWRITE_BREAK_CONDITION old_condition, DWRITE_BREAK_CONDITION new_condition) +{ + if (old_condition == DWRITE_BREAK_CONDITION_MAY_NOT_BREAK || old_condition == DWRITE_BREAK_CONDITION_MUST_BREAK) + return TRUE; + + if (old_condition == DWRITE_BREAK_CONDITION_CAN_BREAK && new_condition != DWRITE_BREAK_CONDITION_MUST_BREAK) + return TRUE; + + return FALSE; +} + /* "Can break" is a weak condition, stronger "may not break" and "must break" override it. Initially all conditions are set to "can break" and could only be changed once. */ static inline void set_break_condition(UINT32 pos, enum BreakConditionLocation location, DWRITE_BREAK_CONDITION condition, struct linebreaking_state *state) { if (location == BreakConditionBefore) { - if (state->breakpoints[pos].breakConditionBefore != DWRITE_BREAK_CONDITION_CAN_BREAK) + if (has_strong_condition(state->breakpoints[pos].breakConditionBefore, condition)) return; state->breakpoints[pos].breakConditionBefore = condition; if (pos > 0) state->breakpoints[pos-1].breakConditionAfter = condition; } else { - if (state->breakpoints[pos].breakConditionAfter != DWRITE_BREAK_CONDITION_CAN_BREAK) + if (has_strong_condition(state->breakpoints[pos].breakConditionAfter, condition)) return; state->breakpoints[pos].breakConditionAfter = condition; if (pos + 1 < state->count) @@ -403,14 +414,12 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B state.breakpoints = breakpoints; state.count = count; - /* LB31 - allow breaks everywhere. It will be overridden if needed as - other rules dictate. */ for (i = 0; i < count; i++) { break_class[i] = get_table_entry(wine_linebreak_table, text[i]); - breakpoints[i].breakConditionBefore = DWRITE_BREAK_CONDITION_CAN_BREAK; - breakpoints[i].breakConditionAfter = DWRITE_BREAK_CONDITION_CAN_BREAK; + breakpoints[i].breakConditionBefore = DWRITE_BREAK_CONDITION_NEUTRAL; + breakpoints[i].breakConditionAfter = DWRITE_BREAK_CONDITION_NEUTRAL; breakpoints[i].isWhitespace = !!isspaceW(text[i]); breakpoints[i].isSoftHyphen = text[i] == 0x00ad /* Unicode Soft Hyphen */; breakpoints[i].padding = 0; @@ -432,8 +441,10 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B /* LB2 - never break at the start */ set_break_condition(0, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); - /* LB3 - always break at the end. This one is ignored. */ + /* LB3 - always break at the end. */ + set_break_condition(count - 1, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); + /* LB4 - LB6 - mandatory breaks. */ for (i = 0; i < count; i++) { switch (break_class[i]) @@ -455,16 +466,27 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B /* LB6 - do not break before hard breaks */ set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; + } + } + + /* LB7 - LB8 - explicit breaks and non-breaks */ + for (i = 0; i < count; i++) + { + switch (break_class[i]) + { /* LB7 - do not break before spaces */ case b_SP: set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; case b_ZW: set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); - /* LB8 - break before character after zero-width space, skip spaces in-between */ - while (i < count-1 && break_class[i+1] == b_SP) - i++; - set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); + + /* LB8 - break before character after zero-width space, skip spaces in-between */ + j = i; + while (j < count-1 && break_class[j+1] == b_SP) + j++; + if (j < count-1 && break_class[j+1] != b_ZW) + set_break_condition(j, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); break; /* LB8a - do not break between ZWJ and an ideograph, emoji base or emoji modifier */ case b_ZWJ: @@ -474,7 +496,7 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B } } - /* LB9 - LB10 */ + /* LB9 - LB10 - combining marks */ for (i = 0; i < count; i++) { if (break_class[i] == b_CM || break_class[i] == b_ZWJ) @@ -492,7 +514,10 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B break_class[i] = b_AL; break; default: + { break_class[i] = break_class[i-1]; + set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); + } } } else break_class[i] = b_AL; @@ -526,22 +551,20 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B case b_SY: set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; - /* LB14 */ + /* LB14 - do not break after OP, even after spaces */ case b_OP: - set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); - while (i < count-1 && break_class[i+1] == b_SP) { - set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); - i++; - } - break; - /* LB15 */ - case b_QU: - j = i+1; - while (j < count-1 && break_class[j] == b_SP) + j = i; + while (j < count-1 && break_class[j+1] == b_SP) j++; - if (break_class[j] == b_OP) - for (; j > i; j--) - set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); + set_break_condition(j, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); + break; + /* LB15 - do not break within QU-OP, even with intervening spaces */ + case b_QU: + j = i; + while (j < count-1 && break_class[j+1] == b_SP) + j++; + if (j < count - 1 && break_class[j+1] == b_OP) + set_break_condition(j, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; /* LB16 */ case b_NS: @@ -552,14 +575,13 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B for (j++; j <= i; j++) set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; - /* LB17 */ + /* LB17 - do not break within B2, even with intervening spaces */ case b_B2: - j = i+1; - while (j < count && break_class[j] == b_SP) + j = i; + while (j < count && break_class[j+1] == b_SP) j++; - if (break_class[j] == b_B2) - for (; j > i; j--) - set_break_condition(j, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); + if (j < count - 1 && break_class[j+1] == b_B2) + set_break_condition(j, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; } } @@ -580,7 +602,8 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B /* LB20 */ case b_CB: set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); - set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); + if (i < count - 1 && break_class[i+1] != b_QU) + set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); break; /* LB21 */ case b_BA: @@ -589,7 +612,8 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; case b_BB: - set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); + if (i < count - 1 && break_class[i+1] != b_CB) + set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); break; /* LB21a, LB21b */ case b_HL: @@ -701,7 +725,7 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B if (break_class[i+1] == b_IN || break_class[i+1] == b_PO) set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); } - if (break_class[i] == b_PO) + if (break_class[i] == b_PR) { switch (break_class[i+1]) { @@ -731,7 +755,7 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B break_class[i+1] == b_OP) set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); if (break_class[i] == b_CP && - (break_class[i+1] == b_AL || break_class[i] == b_HL || break_class[i] == b_NU)) + (break_class[i+1] == b_AL || break_class[i+1] == b_HL || break_class[i+1] == b_NU)) set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_MAY_NOT_BREAK, &state); /* LB30a - break between two RIs if and only if there are an even number of RIs preceding position of the break */ @@ -752,6 +776,13 @@ static HRESULT analyze_linebreaks(const WCHAR *text, UINT32 count, DWRITE_LINE_B } } + /* LB31 - allow breaks everywhere else. */ + for (i = 0; i < count; i++) + { + set_break_condition(i, BreakConditionBefore, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); + set_break_condition(i, BreakConditionAfter, DWRITE_BREAK_CONDITION_CAN_BREAK, &state); + } + heap_free(break_class); return S_OK; }