Collocation Dictionary Optimization Using WordNet and k-Nearest Neighbor Learning

return to the website
by Yung Taek Yuseop Kim, Byoung-tak Zhang
Abstract:
In machine translation, collocation dictionaries are important for selecting accurate target words. However, if the dictionary size is too large it can decrease the efficiency of translation. This paper presents a method to develop a compact collocation dictionary for transitive verb+ G G object pairs in English+ G G Korean machine translation without losing translation accuracy. We use WordNet to calculate the semantic distance between words, and k-nearestneighbor learning to select the translations. The entries in the dictionary are minimized to balance the trade-off between translation accuracy and time. We have performed several experiments on a selected set of verbs extracted from a raw corpus of over 3 million words. The results show that in real-time translation environments the size of a collocation dictionary can be reduced up to 40\% of its original size without significant decrease in its accuracy.
Reference:
Collocation Dictionary Optimization Using WordNet and k-Nearest Neighbor Learning (Yung Taek Yuseop Kim, Byoung-tak Zhang), In Wall Street Journal, volume 16, 2002.
Bibtex Entry:
@article{Kim2002,
abstract = {In machine translation, collocation dictionaries are important for selecting accurate target words. However, if the dictionary size is too large it can decrease the efficiency of translation. This paper presents a method to develop a compact collocation dictionary for transitive verb+ G G object pairs in English+ G G Korean machine translation without losing translation accuracy. We use WordNet to calculate the semantic distance between words, and k-nearestneighbor learning to select the translations. The entries in the dictionary are minimized to balance the trade-off between translation accuracy and time. We have performed several experiments on a selected set of verbs extracted from a raw corpus of over 3 million words. The results show that in real-time translation environments the size of a collocation dictionary can be reduced up to 40\% of its original size without significant decrease in its accuracy.},
annote = {
        From Duplicate 1 ( 
        
        
          Collocation Dictionary Optimization Using WordNet and k-Nearest Neighbor Learning
        
        
         - Kim, Yung Taek Yuseop; Zhang, Byoung-tak )

        
        

        From Duplicate 1 ( 
        
        
          Collocation Dictionary Optimization Using WordNet and k-Nearest Neighbor Learning
        
        
         - Kim, Yuseop; Zhang, Byoung-tak; Kim, Yung Taek )
And  Duplicate 2 ( 
        
        
          Collocation Dictionary Optimization Using WordNet and k-Nearest Neighbor Learning
        
        
         - Kim, Yuseop; Zhang, Byoung-tak; Kim, Yung Taek )

        
        

        

        

        

        

      },
author = {Kim, Yung Taek Yuseop and Zhang, Byoung-tak},
journal = {Wall Street Journal},
keywords = {SML-LIB-BIBLIO,aprendizaje,collocation,colocaciones,diccionarios,dictionary optimization,k-nearest neighbor learning,lang:ENG,optimizaci\'{o}n,semantic distance,traduccion autom\'{a}tica},
mendeley-tags = {SML-LIB-BIBLIO,lang:ENG},
number = {2},
pages = {89--108},
title = {{Collocation Dictionary Optimization Using WordNet and k-Nearest Neighbor Learning}},
url = {http://dx.doi.org/10.1023/A:1014540107013},
volume = {16},
year = {2002}
}
Powered by bibtexbrowser