wine/dlls/dwrite/analyzer.c
2012-11-05 17:44:17 +01:00

386 lines
14 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Text analyzer
*
* Copyright 2012 Nikolay Sivov for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define COBJMACROS
#include "dwrite.h"
#include "dwrite_private.h"
#include "wine/debug.h"
WINE_DEFAULT_DEBUG_CHANNEL(dwrite);
enum scriptcode {
Script_Arabic = 0,
Script_Armenian = 1,
Script_Balinese = 2,
Script_Bengali = 3,
Script_Buginese = 6,
Script_Canadian = 8,
Script_Cherokee = 11,
Script_Controls = 12,
Script_Coptic = 13,
Script_Cyrillic = 16,
Script_Devanagari = 18,
Script_Ethiopic = 19,
Script_Georgian = 20,
Script_Greek = 23,
Script_Gujarati = 24,
Script_Gurmukhi = 25,
Script_Hangul = 27,
Script_Hebrew = 29,
Script_Kannada = 32,
Script_Khmer = 36,
Script_Lao = 37,
Script_Latin = 38,
Script_Lepcha = 39,
Script_Limbu = 40,
Script_Malayalam = 44,
Script_Mongolian = 45,
Script_Myanmar = 46,
Script_New_TaiLue = 47,
Script_NKo = 48,
Script_Ogham = 49,
Script_OlChiki = 50,
Script_Oriya = 53,
Script_Runic = 58,
Script_Sinhala = 61,
Script_Sundanese = 62,
Script_Syriac = 64,
Script_TaiLe = 67,
Script_Tamil = 68,
Script_Telugu = 69,
Script_Thaana = 70,
Script_Thai = 71,
Script_Tibetan = 72,
Script_Symbol = 77,
Script_Unknown = (UINT16)-1
};
struct script_range {
UINT16 script;
DWORD first;
DWORD last;
};
static const struct script_range script_ranges[] = {
/* C0 Controls: U+0000U+001F */
/* ASCII punctuation and symbols: U+0020U+002F */
/* ASCII digits: U+0030U+0039 */
/* ASCII punctuation and symbols: U+003AU+0040 */
{ Script_Symbol, 0x00, 0x040 },
/* Latin uppercase: U+0041U+005A */
{ Script_Latin, 0x41, 0x5a },
/* ASCII punctuation and symbols: U+005BU+0060 */
{ Script_Symbol, 0x5b, 0x060 },
/* Latin lowercase: U+0061U+007A */
{ Script_Latin, 0x61, 0x7a },
/* ASCII punctuation and symbols, control char DEL: U+007BU+007F */
{ Script_Symbol, 0x7b, 0x7f },
/* C1 Controls: U+0080U+009F */
{ Script_Controls, 0x80, 0x9f },
/* Latin-1 Supplement: U+00A0U+00FF */
/* Latin Extended-A: U+0100U+017F */
/* Latin Extended-B: U+0180U+024F */
/* IPA Extensions: U+0250U+02AF */
/* Spacing Modifier Letters: U+02B0U+02FF */
{ Script_Latin, 0xa0, 0x2ff },
/* Combining Diacritical Marks: U+0300U+036F */
{ Script_Symbol, 0x300, 0x36f },
/* Greek: U+0370U+03E1 */
{ Script_Greek, 0x370, 0x3e1 },
/* Coptic: U+03E2U+03Ef */
{ Script_Coptic, 0x3e2, 0x3ef },
/* Greek: U+03F0U+03FF */
{ Script_Greek, 0x3f0, 0x3ff },
/* Cyrillic: U+0400U+04FF */
/* Cyrillic Supplement: U+0500U+052F */
/* Cyrillic Supplement range is incomplete cause it's based on Unicode 5.2
that doesn't define some Abkhaz and Azerbaijani letters, we support Unicode 6.0 range here */
{ Script_Cyrillic, 0x400, 0x52f },
/* Armenian: U+0530U+058F */
{ Script_Armenian, 0x530, 0x58f },
/* Hebrew: U+0590U+05FF */
{ Script_Hebrew, 0x590, 0x5ff },
/* Arabic: U+0600U+06FF */
{ Script_Arabic, 0x600, 0x6ff },
/* Syriac: U+0600U+06FF */
{ Script_Syriac, 0x700, 0x74f },
/* Arabic Supplement: U+0750U+077F */
{ Script_Arabic, 0x750, 0x77f },
/* Thaana: U+0780U+07BF */
{ Script_Thaana, 0x780, 0x7bf },
/* N'Ko: U+07C0U+07FF */
{ Script_NKo, 0x7c0, 0x7ff },
/* Devanagari: U+0900U+097F */
{ Script_Devanagari, 0x900, 0x97f },
/* Bengali: U+0980U+09FF */
{ Script_Bengali, 0x980, 0x9ff },
/* Gurmukhi: U+0A00U+0A7F */
{ Script_Gurmukhi, 0xa00, 0xa7f },
/* Gujarati: U+0A80U+0AFF */
{ Script_Gujarati, 0xa80, 0xaff },
/* Oriya: U+0B00U+0B7F */
{ Script_Oriya, 0xb00, 0xb7f },
/* Tamil: U+0B80U+0BFF */
{ Script_Tamil, 0xb80, 0xbff },
/* Telugu: U+0C00U+0C7F */
{ Script_Telugu, 0xc00, 0xc7f },
/* Kannada: U+0C80U+0CFF */
{ Script_Kannada, 0xc80, 0xcff },
/* Malayalam: U+0D00U+0D7F */
{ Script_Malayalam, 0xd00, 0xd7f },
/* Sinhala: U+0D80U+0DFF */
{ Script_Sinhala, 0xd80, 0xdff },
/* Thai: U+0E00U+0E7F */
{ Script_Thai, 0xe00, 0xe7f },
/* Lao: U+0E80U+0EFF */
{ Script_Lao, 0xe80, 0xeff },
/* Tibetan: U+0F00U+0FFF */
{ Script_Tibetan, 0xf00, 0xfff },
/* Myanmar: U+1000U+109F */
{ Script_Myanmar, 0x1000, 0x109f },
/* Georgian: U+10A0U+10FF */
{ Script_Georgian, 0x10a0, 0x10ff },
/* Hangul Jamo: U+1100U+11FF */
{ Script_Hangul, 0x1100, 0x11ff },
/* Ethiopic: U+1200U+137F */
/* Ethiopic Extensions: U+1380U+139F */
{ Script_Ethiopic, 0x1200, 0x139f },
/* Cherokee: U+13A0U+13FF */
{ Script_Cherokee, 0x13a0, 0x13ff },
/* Canadian Aboriginal Syllabics: U+1400U+167F */
{ Script_Canadian, 0x1400, 0x167f },
/* Ogham: U+1680U+169F */
{ Script_Ogham, 0x1680, 0x169f },
/* Runic: U+16A0U+16F0 */
{ Script_Runic, 0x16a0, 0x16f0 },
/* Khmer: U+1780U+17FF */
{ Script_Khmer, 0x1780, 0x17ff },
/* Mongolian: U+1800U+18AF */
{ Script_Mongolian, 0x1800, 0x18af },
/* Limbu: U+1900U+194F */
{ Script_Limbu, 0x1900, 0x194f },
/* Tai Le: U+1950U+197F */
{ Script_TaiLe, 0x1950, 0x197f },
/* New Tai Lue: U+1980U+19DF */
{ Script_New_TaiLue, 0x1980, 0x19df },
/* Khmer Symbols: U+19E0U+19FF */
{ Script_Khmer, 0x19e0, 0x19ff },
/* Buginese: U+1A00U+1A1F */
{ Script_Buginese, 0x1a00, 0x1a1f },
/* Tai Tham: U+1A20U+1AAF */
{ Script_Symbol, 0x1a20, 0x1aaf },
/* Balinese: U+1B00U+1B7F */
{ Script_Balinese, 0x1b00, 0x1b7f },
/* Sundanese: U+1B80U+1BBF */
{ Script_Sundanese, 0x1b80, 0x1bbf },
/* Batak: U+1BC0U+1BFF */
{ Script_Symbol, 0x1bc0, 0x1bff },
/* Lepcha: U+1C00U+1C4F */
{ Script_Lepcha, 0x1c00, 0x1c4f },
/* Ol Chiki: U+1C50U+1C7F */
{ Script_OlChiki, 0x1c50, 0x1c7f },
/* Sundanese Supplement: U+1CC0U+1CCF */
{ Script_Symbol, 0x1cc0, 0x1ccf },
/* Vedic Extensions: U+1CD0-U+1CFF */
{ Script_Devanagari, 0x1cd0, 0x1cff },
/* Phonetic Extensions: U+1D00U+1DBF */
{ Script_Latin, 0x1d00, 0x1dbf },
/* unsupported range */
{ Script_Unknown }
};
static UINT16 get_char_script( WCHAR c )
{
DWORD ch = c;
int i;
for (i = 0; i < sizeof(script_ranges)/sizeof(struct script_range); i++)
{
const struct script_range *range = &script_ranges[i];
if (range->script == Script_Unknown || (range->first <= ch && range->last >= ch))
return range->script;
}
return Script_Unknown;
}
static HRESULT analyze_script(const WCHAR *text, UINT32 len, IDWriteTextAnalysisSink *sink)
{
DWRITE_SCRIPT_ANALYSIS sa;
UINT32 pos, i, length;
if (!len) return S_OK;
sa.script = get_char_script(*text);
sa.shapes = DWRITE_SCRIPT_SHAPES_DEFAULT;
pos = 0;
length = 1;
for (i = 1; i < len; i++)
{
UINT16 script = get_char_script(text[i]);
/* Script_Latin_Symb script type is ignored when preceded or followed by another script */
if (sa.script == Script_Symbol) sa.script = script;
if (script == Script_Symbol) script = sa.script;
/* this is a length of a sequence to be reported next */
if (sa.script == script) length++;
if (sa.script != script)
{
HRESULT hr = IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
if (FAILED(hr)) return hr;
pos = i;
length = 1;
sa.script = script;
}
}
/* 1 length case or normal completion call */
return IDWriteTextAnalysisSink_SetScriptAnalysis(sink, pos, length, &sa);
}
static HRESULT WINAPI dwritetextanalyzer_QueryInterface(IDWriteTextAnalyzer *iface, REFIID riid, void **obj)
{
TRACE("(%s %p)\n", debugstr_guid(riid), obj);
if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDWriteTextAnalyzer))
{
*obj = iface;
return S_OK;
}
*obj = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI dwritetextanalyzer_AddRef(IDWriteTextAnalyzer *iface)
{
return 2;
}
static ULONG WINAPI dwritetextanalyzer_Release(IDWriteTextAnalyzer *iface)
{
return 1;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeScript(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
const WCHAR *text;
HRESULT hr;
UINT32 len;
TRACE("(%p %u %u %p)\n", source, position, length, sink);
hr = IDWriteTextAnalysisSource_GetTextAtPosition(source, position, &text, &len);
if (FAILED(hr)) return hr;
return analyze_script(text, len, sink);
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeBidi(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeNumberSubstitution(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_AnalyzeLineBreakpoints(IDWriteTextAnalyzer *iface,
IDWriteTextAnalysisSource* source, UINT32 position, UINT32 length, IDWriteTextAnalysisSink* sink)
{
FIXME("(%p %u %u %p): stub\n", source, position, length, sink);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGlyphs(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT32 length, IDWriteFontFace* font_face, BOOL is_sideways,
BOOL is_rtl, DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale,
IDWriteNumberSubstitution* substitution, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_len, UINT32 feature_ranges, UINT32 max_glyph_count,
UINT16* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* text_props, UINT16* glyph_indices,
DWRITE_SHAPING_GLYPH_PROPERTIES* glyph_props, UINT32* actual_glyph_count)
{
FIXME("(%s:%u %p %d %d %p %s %p %p %p %u %u %p %p %p %p %p): stub\n", debugstr_wn(text, length),
length, font_face, is_sideways, is_rtl, analysis, debugstr_w(locale), substitution, features, feature_range_len,
feature_ranges, max_glyph_count, clustermap, text_props, glyph_indices, glyph_props, actual_glyph_count);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGlyphPlacements(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, BOOL is_sideways, BOOL is_rtl,
DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_len, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
FIXME("(%s %p %p %u %p %p %u %p %f %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, is_sideways,
is_rtl, analysis, debugstr_w(locale), features, feature_range_len, feature_ranges, glyph_advances, glyph_offsets);
return E_NOTIMPL;
}
static HRESULT WINAPI dwritetextanalyzer_GetGdiCompatibleGlyphPlacements(IDWriteTextAnalyzer *iface,
WCHAR const* text, UINT16 const* clustermap, DWRITE_SHAPING_TEXT_PROPERTIES* props,
UINT32 text_len, UINT16 const* glyph_indices, DWRITE_SHAPING_GLYPH_PROPERTIES const* glyph_props,
UINT32 glyph_count, IDWriteFontFace * font_face, FLOAT fontEmSize, FLOAT pixels_per_dip,
DWRITE_MATRIX const* transform, BOOL use_gdi_natural, BOOL is_sideways, BOOL is_rtl,
DWRITE_SCRIPT_ANALYSIS const* analysis, WCHAR const* locale, DWRITE_TYPOGRAPHIC_FEATURES const** features,
UINT32 const* feature_range_lengths, UINT32 feature_ranges, FLOAT* glyph_advances, DWRITE_GLYPH_OFFSET* glyph_offsets)
{
FIXME("(%s %p %p %u %p %p %u %p %f %f %p %d %d %d %p %s %p %p %u %p %p): stub\n", debugstr_w(text),
clustermap, props, text_len, glyph_indices, glyph_props, glyph_count, font_face, fontEmSize, pixels_per_dip,
transform, use_gdi_natural, is_sideways, is_rtl, analysis, debugstr_w(locale), features, feature_range_lengths,
feature_ranges, glyph_advances, glyph_offsets);
return E_NOTIMPL;
}
static const struct IDWriteTextAnalyzerVtbl textanalyzervtbl = {
dwritetextanalyzer_QueryInterface,
dwritetextanalyzer_AddRef,
dwritetextanalyzer_Release,
dwritetextanalyzer_AnalyzeScript,
dwritetextanalyzer_AnalyzeBidi,
dwritetextanalyzer_AnalyzeNumberSubstitution,
dwritetextanalyzer_AnalyzeLineBreakpoints,
dwritetextanalyzer_GetGlyphs,
dwritetextanalyzer_GetGlyphPlacements,
dwritetextanalyzer_GetGdiCompatibleGlyphPlacements
};
static IDWriteTextAnalyzer textanalyzer = { &textanalyzervtbl };
HRESULT get_textanalyzer(IDWriteTextAnalyzer **ret)
{
*ret = &textanalyzer;
return S_OK;
}