123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
-
- ;(function(global, factory) {
- if (typeof module === "object" && typeof module.exports === "object") {
- module.exports = factory(global);
- } else {
- factory(global);
- }
- })(typeof window !== "undefined" ? window : this, function(window) {
- var toneMap =
- {
- "ā": "a1",
- "á": "a2",
- "ǎ": "a3",
- "à": "a4",
- "ō": "o1",
- "ó": "o2",
- "ǒ": "o3",
- "ò": "o4",
- "ē": "e1",
- "é": "e2",
- "ě": "e3",
- "è": "e4",
- "ī": "i1",
- "í": "i2",
- "ǐ": "i3",
- "ì": "i4",
- "ū": "u1",
- "ú": "u2",
- "ǔ": "u3",
- "ù": "u4",
- "ü": "v0",
- "ǖ": "v1",
- "ǘ": "v2",
- "ǚ": "v3",
- "ǜ": "v4",
- "ń": "n2",
- "ň": "n3",
- "": "m2"
- };
- var dict = {};
- var pinyinUtil =
- {
-
- parseDict: function()
- {
-
- if(window.pinyin_dict_firstletter)
- {
- dict.firstletter = pinyin_dict_firstletter;
- }
-
- if(window.pinyin_dict_notone)
- {
- dict.notone = {};
- dict.py2hz = pinyin_dict_notone;
- for(var i in pinyin_dict_notone)
- {
- var temp = pinyin_dict_notone[i];
- for(var j=0, len=temp.length; j<len; j++)
- {
- if(!dict.notone[temp[j]]) dict.notone[temp[j]] = i;
- }
- }
- }
-
- if(window.pinyin_dict_withtone)
- {
- dict.withtone = {};
- var temp = pinyin_dict_withtone.split(',');
- for(var i=0, len = temp.length; i<len; i++)
- {
-
- dict.withtone[String.fromCharCode(i + 19968)] = temp[i];
- }
-
- if(window.pinyin_dict_notone)
- {
-
-
- dict.py2hz = pinyin_dict_notone;
- }
- else
- {
-
-
- var notone = pinyinUtil.removeTone(pinyin_dict_withtone).split(',');
- var py2hz = {}, py, hz;
- for(var i=0, len = notone.length; i<len; i++)
- {
- hz = String.fromCharCode(i + 19968);
- py = notone[i].split(' ');
- for(var j=0; j<py.length; j++)
- {
- py2hz[py[j]] = (py2hz[py[j]] || '') + hz;
- }
- }
- dict.py2hz = py2hz;
- }
- }
- },
-
- getPinyin: function(chinese, splitter, withtone, polyphone)
- {
- if(!chinese || /^ +$/g.test(chinese)) return '';
- splitter = splitter == undefined ? ' ' : splitter;
- withtone = withtone == undefined ? true : withtone;
- polyphone = polyphone == undefined ? false : polyphone;
- var result = [];
- if(dict.withtone)
- {
- var noChinese = '';
- for (var i=0, len = chinese.length; i < len; i++)
- {
- var pinyin = dict.withtone[chinese[i]];
- if(pinyin)
- {
-
-
- if(!polyphone) pinyin = pinyin.replace(/ .*$/g, '');
- if(!withtone) pinyin = this.removeTone(pinyin);
-
- noChinese && ( result.push( noChinese), noChinese = '' );
- result.push( pinyin );
- }
- else if ( !chinese[i] || /^ +$/g.test(chinese[i]) ){
-
- noChinese && ( result.push( noChinese), noChinese = '' );
- }
- else{
- noChinese += chinese[i];
- }
- }
- if ( noChinese ){
- result.push( noChinese);
- noChinese = '';
- }
- }
- else if(dict.notone)
- {
- if(withtone) console.warn('pinyin_dict_notone 字典文件不支持声调!');
- if(polyphone) console.warn('pinyin_dict_notone 字典文件不支持多音字!');
- var noChinese = '';
- for (var i=0, len = chinese.length; i < len; i++)
- {
- var temp = chinese.charAt(i),
- pinyin = dict.notone[temp];
- if ( pinyin ){
-
- noChinese && ( result.push( noChinese), noChinese = '' );
- result.push( pinyin );
- }
- else if ( !temp || /^ +$/g.test(temp) ){
-
- noChinese && ( result.push( noChinese), noChinese = '' );
- }
- else {
-
- noChinese += temp;
- }
- }
- if ( noChinese ){
- result.push( noChinese );
- noChinese = '';
- }
- }
- else
- {
- throw '抱歉,未找到合适的拼音字典文件!';
- }
- if(!polyphone) return result.join(splitter);
- else
- {
- if(window.pinyin_dict_polyphone) return parsePolyphone(chinese, result, splitter, withtone);
- else return handlePolyphone(result, ' ', splitter);
- }
- },
-
- getFirstLetter: function(str, polyphone)
- {
- polyphone = polyphone == undefined ? false : polyphone;
- if(!str || /^ +$/g.test(str)) return '';
- if(dict.firstletter)
- {
- var result = [];
- for(var i=0; i<str.length; i++)
- {
- var unicode = str.charCodeAt(i);
- var ch = str.charAt(i);
- if(unicode >= 19968 && unicode <= 40869)
- {
- ch = dict.firstletter.all.charAt(unicode-19968);
- if(polyphone) ch = dict.firstletter.polyphone[unicode] || ch;
- }
- result.push(ch);
- }
- if(!polyphone) return result.join('');
- else return handlePolyphone(result, '', '');
- }
- else
- {
- var py = this.getPinyin(str, ' ', false, polyphone);
- py = py instanceof Array ? py : [py];
- var result = [];
- for(var i=0; i<py.length; i++)
- {
- result.push(py[i].replace(/(^| )(\w)\w*/g, function(m,$1,$2){return $2.toUpperCase();}));
- }
- if(!polyphone) return result[0];
- else return simpleUnique(result);
- }
- },
-
- getHanzi: function(pinyin)
- {
- if(!dict.py2hz)
- {
- throw '抱歉,未找到合适的拼音字典文件!';
- }
- return dict.py2hz[this.removeTone(pinyin)] || '';
- },
-
- getSameVoiceWord: function(hz, sameTone)
- {
- sameTone = sameTone || false
- return this.getHanzi(this.getPinyin(hz, ' ', false))
- },
-
- removeTone: function(pinyin)
- {
- return pinyin.replace(/[āáǎàōóǒòēéěèīíǐìūúǔùüǖǘǚǜńň]/g, function(m){ return toneMap[m][0]; });
- },
-
- getTone: function(pinyinWithoutTone)
- {
- var newToneMap = {};
- for(var i in toneMap) newToneMap[toneMap[i]] = i;
- return (pinyinWithoutTone || '').replace(/[a-z]\d/g, function(m) {
- return newToneMap[m] || m;
- });
- }
- };
-
- function handlePolyphone(array, splitter, joinChar)
- {
- splitter = splitter || '';
- var result = [''], temp = [];
- for(var i=0; i<array.length; i++)
- {
- temp = [];
- var t = array[i].split(splitter);
- for(var j=0; j<t.length; j++)
- {
- for(var k=0; k<result.length; k++)
- temp.push(result[k] + (result[k]?joinChar:'') + t[j]);
- }
- result = temp;
- }
- return simpleUnique(result);
- }
-
- function parsePolyphone(chinese, result, splitter, withtone)
- {
- var poly = window.pinyin_dict_polyphone;
- var max = 7;
- var temp = poly[chinese];
- if(temp)
- {
- temp = temp.split(' ');
- for(var i=0; i<temp.length; i++)
- {
- result[i] = temp[i] || result[i];
- if(!withtone) result[i] = pinyinUtil.removeTone(result[i]);
- }
- return result.join(splitter);
- }
- for(var i=0; i<chinese.length; i++)
- {
- temp = '';
- for(var j=0; j<max && (i+j)<chinese.length; j++)
- {
- if(!/^[\u2E80-\u9FFF]+$/.test(chinese[i+j])) break;
- temp += chinese[i+j];
- var res = poly[temp];
- if(res)
- {
- res = res.split(' ');
- for(var k=0; k<=j; k++)
- {
- if(res[k]) result[i+k] = withtone ? res[k] : pinyinUtil.removeTone(res[k]);
- }
- break;
- }
- }
- }
-
- for(var i=0; i<result.length; i++)
- {
- result[i] = result[i].replace(/ .*$/g, '');
- }
- return result.join(splitter);
- }
-
- function simpleUnique(array)
- {
- var result = [];
- var hash = {};
- for(var i=0; i<array.length; i++)
- {
- var key = (typeof array[i]) + array[i];
- if(!hash[key])
- {
- result.push(array[i]);
- hash[key] = true;
- }
- }
- return result;
- }
- pinyinUtil.parseDict();
- pinyinUtil.dict = dict;
- window.pinyinUtil = pinyinUtil;
- });
|