mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-08 20:47:44 +00:00
790694c449
--HG-- rename : intl/lwbrk/src/Makefile.in => intl/lwbrk/Makefile.in rename : intl/lwbrk/src/crashtests/416721.html => intl/lwbrk/crashtests/416721.html rename : intl/lwbrk/src/crashtests/crashtests.list => intl/lwbrk/crashtests/crashtests.list rename : intl/lwbrk/src/jisx4051class.h => intl/lwbrk/jisx4051class.h rename : intl/lwbrk/src/jisx4051pairtable.txt => intl/lwbrk/jisx4051pairtable.txt rename : intl/lwbrk/src/nsCarbonBreaker.cpp => intl/lwbrk/nsCarbonBreaker.cpp rename : intl/lwbrk/src/nsComplexBreaker.h => intl/lwbrk/nsComplexBreaker.h rename : intl/lwbrk/public/nsILineBreaker.h => intl/lwbrk/nsILineBreaker.h rename : intl/lwbrk/idl/nsISemanticUnitScanner.idl => intl/lwbrk/nsISemanticUnitScanner.idl rename : intl/lwbrk/public/nsIWordBreaker.h => intl/lwbrk/nsIWordBreaker.h rename : intl/lwbrk/src/nsJISx4051LineBreaker.cpp => intl/lwbrk/nsJISx4051LineBreaker.cpp rename : intl/lwbrk/src/nsJISx4051LineBreaker.h => intl/lwbrk/nsJISx4051LineBreaker.h rename : intl/lwbrk/public/nsLWBrkCIID.h => intl/lwbrk/nsLWBrkCIID.h rename : intl/lwbrk/src/nsPangoBreaker.cpp => intl/lwbrk/nsPangoBreaker.cpp rename : intl/lwbrk/src/nsRuleBreaker.cpp => intl/lwbrk/nsRuleBreaker.cpp rename : intl/lwbrk/src/nsSampleWordBreaker.cpp => intl/lwbrk/nsSampleWordBreaker.cpp rename : intl/lwbrk/src/nsSampleWordBreaker.h => intl/lwbrk/nsSampleWordBreaker.h rename : intl/lwbrk/src/nsSemanticUnitScanner.cpp => intl/lwbrk/nsSemanticUnitScanner.cpp rename : intl/lwbrk/src/nsSemanticUnitScanner.h => intl/lwbrk/nsSemanticUnitScanner.h rename : intl/lwbrk/src/nsUniscribeBreaker.cpp => intl/lwbrk/nsUniscribeBreaker.cpp rename : intl/lwbrk/src/rulebrk.c => intl/lwbrk/rulebrk.c rename : intl/lwbrk/src/rulebrk.h => intl/lwbrk/rulebrk.h rename : intl/lwbrk/src/th_char.h => intl/lwbrk/th_char.h
79 lines
2.4 KiB
C++
79 lines
2.4 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "nsSemanticUnitScanner.h"
|
|
|
|
NS_IMPL_ISUPPORTS_INHERITED(nsSemanticUnitScanner, nsSampleWordBreaker, nsISemanticUnitScanner)
|
|
|
|
nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
|
|
{
|
|
/* member initializers and constructor code */
|
|
}
|
|
|
|
nsSemanticUnitScanner::~nsSemanticUnitScanner()
|
|
{
|
|
/* destructor code */
|
|
}
|
|
|
|
|
|
/* void start (in string characterSet); */
|
|
NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
|
|
{
|
|
// do nothing for now.
|
|
return NS_OK;
|
|
}
|
|
|
|
/* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
|
|
NS_IMETHODIMP nsSemanticUnitScanner::Next(const char16_t *text, int32_t length, int32_t pos, bool isLastBuffer, int32_t *begin, int32_t *end, bool *_retval)
|
|
{
|
|
// xxx need to bullet proff and check input pointer
|
|
// make sure begin, end and _retval is not nullptr here
|
|
|
|
// if we reach the end, just return
|
|
if (pos >= length) {
|
|
*begin = pos;
|
|
*end = pos;
|
|
*_retval = false;
|
|
return NS_OK;
|
|
}
|
|
|
|
uint8_t char_class = nsSampleWordBreaker::GetClass(text[pos]);
|
|
|
|
// if we are in chinese mode, return one han letter at a time
|
|
// we should not do this if we are in Japanese or Korean mode
|
|
if (kWbClassHanLetter == char_class) {
|
|
*begin = pos;
|
|
*end = pos+1;
|
|
*_retval = true;
|
|
return NS_OK;
|
|
}
|
|
|
|
int32_t next;
|
|
// find the next "word"
|
|
next = NextWord(text, (uint32_t) length, (uint32_t) pos);
|
|
|
|
// if we don't have enough text to make decision, return
|
|
if (next == NS_WORDBREAKER_NEED_MORE_TEXT) {
|
|
*begin = pos;
|
|
*end = isLastBuffer ? length : pos;
|
|
*_retval = isLastBuffer;
|
|
return NS_OK;
|
|
}
|
|
|
|
// if what we got is space or punct, look at the next break
|
|
if ((char_class == kWbClassSpace) || (char_class == kWbClassPunct)) {
|
|
// if the next "word" is not letters,
|
|
// call itself recursively with the new pos
|
|
return Next(text, length, next, isLastBuffer, begin, end, _retval);
|
|
}
|
|
|
|
// for the rest, return
|
|
*begin = pos;
|
|
*end = next;
|
|
*_retval = true;
|
|
return NS_OK;
|
|
}
|
|
|