Skip to content

Commit

Permalink
Merge pull request allan-simon#12 from Tatoeba/master
Browse files Browse the repository at this point in the history
Stop converting punct when converting scripts
  • Loading branch information
allan-simon committed Nov 21, 2015
2 parents 49fa2ed + 0efd85f commit 7d45d33
Showing 1 changed file with 9 additions and 26 deletions.
35 changes: 9 additions & 26 deletions src/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@ class Parser{
std::vector<std::string> segments;
std::vector<T*> items;
std::map<std::string, std::string> convertChinese2Latin;
std::map<std::string, std::string> convertLatin2Chinese;

inline void init_maps();

std::string convert_trash_segment(std::string segment, bool toLatin);
std::string convert_trash_char(std::string trashChar, bool toLatin);
std::string convert_trash_segment(std::string segment);
std::string convert_trash_char(std::string trashChar);


inline std::string romanize_segment(int segmentNbr);
Expand Down Expand Up @@ -103,16 +102,6 @@ inline void Parser<T>::init_maps() {
convertChinese2Latin.insert(std::pair<std::string, std::string>("","'"));
convertChinese2Latin.insert(std::pair<std::string, std::string>("","\""));
convertChinese2Latin.insert(std::pair<std::string, std::string>("","\""));

convertLatin2Chinese.insert(std::pair<std::string, std::string>(".",""));
convertLatin2Chinese.insert(std::pair<std::string, std::string>("?",""));
convertLatin2Chinese.insert(std::pair<std::string, std::string>(",",""));
convertLatin2Chinese.insert(std::pair<std::string, std::string>("!",""));
convertLatin2Chinese.insert(std::pair<std::string, std::string>(";",""));
convertLatin2Chinese.insert(std::pair<std::string, std::string>(":",""));
convertLatin2Chinese.insert(std::pair<std::string, std::string>("\"",""));


}

/**
Expand Down Expand Up @@ -297,7 +286,7 @@ inline std::string Parser<T>::romanize_segment(int segmentNbr) {
if (tempItem != NULL) {
temp += tempItem->romanization;
} else {
temp += convert_trash_segment(segments[segmentNbr], true);
temp += convert_trash_segment(segments[segmentNbr]);
}
return temp;
}
Expand Down Expand Up @@ -337,7 +326,7 @@ inline std::string Parser<T>::trad_segment(int segmentNbr) {
temp += tempItem->str_other_script;
}
} else {
temp += convert_trash_segment(segments[segmentNbr], false);
temp += segments[segmentNbr];
}
return temp;
}
Expand Down Expand Up @@ -376,7 +365,7 @@ inline std::string Parser<T>::simp_segment(int segmentNbr) {
temp += tempItem->str_other_script;
}
} else {
temp+= convert_trash_segment(segments[segmentNbr], false);
temp += segments[segmentNbr];
}
return temp;
}
Expand Down Expand Up @@ -441,10 +430,10 @@ std::string Parser<T>::change_script() {
*/

template <typename T>
std::string Parser<T>::convert_trash_segment(std::string segment, bool toLatin) {
std::string Parser<T>::convert_trash_segment(std::string segment) {
std::string temp("");
for (int i = 0; i < segment.size() ; i++) {
temp += convert_trash_char(segment.substr(i,1), toLatin);
temp += convert_trash_char(segment.substr(i,1));
}
return temp;
}
Expand All @@ -455,16 +444,10 @@ std::string Parser<T>::convert_trash_segment(std::string segment, bool toLatin)
*/

template <typename T>
std::string Parser<T>::convert_trash_char(std::string trashChar, bool toLatin) {
std::string Parser<T>::convert_trash_char(std::string trashChar) {

std::map<std::string, std::string>::iterator iter;
std::map<std::string, std::string> convertMap ;

if (toLatin) {
convertMap = convertChinese2Latin;
} else {
convertMap = convertLatin2Chinese;
}
std::map<std::string, std::string> convertMap = convertChinese2Latin;

iter = convertMap.find(trashChar);

Expand Down

0 comments on commit 7d45d33

Please sign in to comment.