by Danushka Bollegala, Y. Matsuo, M. Ishizuka
Abstract:
Semantic similarity measures are important for numerous tasks in natural language processing such as word sense disambiguation, automatic synonym extraction, language modelling and document clustering. We propose a method to measure semantic similarity between two words using information available on the Web. We extract page counts and snippets for the AND query of the two words from a Web search engine. We deflne numerous similarity scores based on page counts and lexico-syntactic patterns. These similarity scores are integrated using support vector machines to form a robust semantic similarity measure. Proposed method outperforms all existing Webbased semantic similarity measures on Miller-Charles benchmark dataset achieving a high correlation coe-cient of 0:834 with human ratings.
Reference:
WebSim: a web-based semantic similarity measure (Danushka Bollegala, Y. Matsuo, M. Ishizuka), In The 21st Annual Conference of the Japanese Society for Artificial Intelligence, 2007.
Bibtex Entry:
@inproceedings{Bollegala2007a,
abstract = {Semantic similarity measures are important for numerous tasks in natural language processing such as word sense disambiguation, automatic synonym extraction, language modelling and document clustering. We propose a method to measure semantic similarity between two words using information available on the Web. We extract page counts and snippets for the AND query of the two words from a Web search engine. We deflne numerous similarity scores based on page counts and lexico-syntactic patterns. These similarity scores are integrated using support vector machines to form a robust semantic similarity measure. Proposed method outperforms all existing Webbased semantic similarity measures on Miller-Charles benchmark dataset achieving a high correlation coe-cient of 0:834 with human ratings.},
annote = {
From Duplicate 2 (
WebSim: a web-based semantic similarity measure
- Bollegala, Danushka; Matsuo, Y.; Ishizuka, M. )
},
author = {Bollegala, Danushka and Matsuo, Y. and Ishizuka, M.},
booktitle = {The 21st Annual Conference of the Japanese Society for Artificial Intelligence},
keywords = {SML-LIB-BIBLIO,lang:ENG},
mendeley-tags = {SML-LIB-BIBLIO,lang:ENG},
pages = {757--766},
title = {{WebSim: a web-based semantic similarity measure}},
url = {http://www.ai-gakkai.or.jp/jsai/conf/2007/data/pdf/100136.pdf},
year = {2007}
}