diff --git a/Changelog b/Changelog index a4ab2dd..de04f09 100644 --- a/Changelog +++ b/Changelog @@ -6,6 +6,10 @@ $Id$ 2016/05/05 KST - fixed #5 no detect control character on US-ASCII + - fixed #7 wrong detect Danash ISO-8859-15 + - fixed #8 fixed binary safe problems + . replace detect to detect_r + . replace detect_handledata to detect_handledata_r 2016/05/04 KST - fixed #1 separate model directory diff --git a/README.md b/README.md index 432ffb9..3a47b9b 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,14 @@ See also test directory of source code int main (void) { DetectObj *obj; + char * str = "안녕하세요"; if ( (obj = detect_obj_init ()) == NULL ) { fprintf (stderr, "Memory Allocation failed\n"); return CHARDET_MEM_ALLOCATED_FAIL; } - switch (detect ("안녕하세요", &obj)) { + switch (detect_r (str, strlen (str), &obj)) { case CHARDET_OUT_OF_MEMORY : fprintf (stderr, "On handle processing, occured out of memory\n"); detect_obj_free (&obj); @@ -62,6 +63,7 @@ or looping code int main (void) { Detect * d; DetectObj * obj; + char * str = "안녕하세요"; if ( (d = detect_init ()) == NULL ) { fprintf (stderr, "chardet handle initialize failed\n"); @@ -76,7 +78,7 @@ or looping code return CHARDET_MEM_ALLOCATED_FAIL; } - switch (detect_handledata (&d, "안녕하세요", &obj)) { + switch (detect_handledata_r (&d, str, strlen (str), &obj)) { case CHARDET_OUT_OF_MEMORY : fprintf (stderr, "On handle processing, occured out of memory\n"); detect_obj_free (&obj); diff --git a/man/en/detect.3 b/man/en/detect.3 index d15ea8d..55d2261 100644 --- a/man/en/detect.3 +++ b/man/en/detect.3 @@ -1,27 +1,41 @@ .TH detect 3 2015-12-11 "libchardet manuals" .\" Process with .\" nroff -man detect.3 -.\" 2015-12-11 JoungKyun Kim +.\" 2016-05-05 JoungKyun.Kim .\" $Id$ .SH NAME -detect \- Detecting character set and measuring accuracy of charset +detect, detect_r \- Detecting character set and measuring accuracy of charset .SH SYNOPSIS .B "#include " .sp -.BI "short chardet (char * inbuf, DetectObj ** outbuf);" +.BI "short detect (char * inbuf, DetectObj ** outbuf);" +.sp +.BI "short detect_r (char * inbuf, size_t inlen, DetectObj ** outbuf);" .SH DESCRIPTION Storing charset and accuracy of .B inbuf to .B outbuf +The +.BI detect +API is deprecated becase this api is not binary safe. Use or replace to +.BI detect_r +api. + +.SS Arguments: .TP .B inbuf .br input string for detecting +.TP +.B inlen +.br +length of input string for detecting + .TP .B outbuf .br @@ -78,13 +92,16 @@ at internal API int main (void) { DetectObj *obj; + char * checkstr = "안녕하세요"; if ( (obj = detect_obj_init ()) == NULL ) { fprintf (stderr, "Memory Allocation failed\\n"); return CHARDET_MEM_ALLOCATED_FAIL; } - switch (detect ("안녕하세요", &obj)) { + //switch (detect (checkstr, &obj)) + switch (detect_r (checkstr, strlen (checkstr), &obj)) + { case CHARDET_OUT_OF_MEMORY : fprintf (stderr, "On handle processing, occured out of memory\\n"); detect_obj_free (&obj); diff --git a/man/en/detect_handledata.3 b/man/en/detect_handledata.3 index 5fcbe5b..2f3d3f4 100644 --- a/man/en/detect_handledata.3 +++ b/man/en/detect_handledata.3 @@ -1,22 +1,31 @@ .TH detect_handledata 3 2015-12-11 "libchardet manuals" .\" Process with .\" nroff -man detect_handledata.3 -.\" 2015-12-11 JoungKyun Kim +.\" 2016-05-05 JoungKyun.Kim .\" $Id$ .SH NAME -detect_handledata \- Detecting character set and measuring accuracy of charset +detect_handledata, detect_handledata_r \- Detecting character set and measuring accuracy of charset .SH SNOPSYS .B "#include " .sp .BI "short chardet_handledata (Detect ** handle, const char * inbuf, DetectObj ** outbuf);" +.sp +.BI "short chardet_handledata_r (Detect ** handle, const char * inbuf, size_t inlen, DetectObj ** outbuf);" .SH DESCRIPTION Storing charset and accuracy of .B inbuf to .B outbuf +The +.BI detect_handledata +API is deprecated becase this api is not binary safe. Use or replace to +.BI detect_handledata_r +api. + +.SS Arguments: .TP .B handle .br @@ -30,6 +39,11 @@ api. .br input string for detecting +.TP +.B inlen +.br +length of input string for detecting + .TP .B outbuf .br @@ -108,7 +122,9 @@ int main (void) { return CHARDET_MEM_ALLOCATED_FAIL; } - switch (detect_handledata (&d, "안녕하세요", &obj)) { + //switch (detect_handledata (&d, str[i], &obj)) + switch (detect_handledata (&d, str[i], strlen(str[i]), &obj)) + { case CHARDET_OUT_OF_MEMORY : fprintf (stderr, "On handle processing, occured out of memory\\n"); detect_obj_free (&obj); diff --git a/man/en/detect_handledata_r.3 b/man/en/detect_handledata_r.3 new file mode 100644 index 0000000..c00f500 --- /dev/null +++ b/man/en/detect_handledata_r.3 @@ -0,0 +1 @@ +.so man3/detect_handledata.3 diff --git a/man/en/detect_r.3 b/man/en/detect_r.3 new file mode 100644 index 0000000..a3f1248 --- /dev/null +++ b/man/en/detect_r.3 @@ -0,0 +1 @@ +.so man3/detect_r.3 diff --git a/man/ko/detect.3 b/man/ko/detect.3 index 87d0786..2834fcc 100644 --- a/man/ko/detect.3 +++ b/man/ko/detect.3 @@ -1,27 +1,39 @@ -.TH detect 3 2015-12-11 "libchardet manuals" +.TH detect 3 2016-05-05 "libchardet manuals" .\" Process with .\" nroff -man detect.3 -.\" 2015-12-11 JoungKyun Kim +.\" 2016-05-05 JoungKyun.Kim .\" $Id$ .SH 이름 -detect \- 문자열의 문자셋과 정확도를 측정 +detect, detect_r \- 문자열의 문자셋과 정확도를 측정 .SH 사용법 .B "#include " .sp -.BI "short chardet (char * inbuf, DetectObj ** outbuf);" +.BI "short detect (char * inbuf, DetectObj ** outbuf);" +.sp +.BI "short detect_r (char * inbuf, size_t inlen, DetectObj ** outbuf);" .SH 설명 .B inbuf 의 문자셋과 정확도를 .B outbuf에 저장한다. +.BI detect +API는 binary safe 문제로 더이상 사용을 권장하지 않습니다. +.BI detect_r +API를 이용하십시오. + +.SS API 아규먼트 .TP .B inbuf .br 문자셋과 정확도를 측정할 입력 문자열 +.B inlen +.br +문자셋과 정확도를 측정할 입력 문자열의 길이 + .TP .B outbuf .br @@ -83,7 +95,9 @@ int main (void) { return CHARDET_MEM_ALLOCATED_FAIL; } - switch (detect ("안녕하세요", &obj)) { + //switch (detect ("안녕하세요", &obj)) + switch (detect_r ("안녕하세요", 10, &obj)) + { case CHARDET_OUT_OF_MEMORY : fprintf (stderr, "On handle processing, occured out of memory\\n"); detect_obj_free (&obj); diff --git a/man/ko/detect_handledata.3 b/man/ko/detect_handledata.3 index 319b317..2fa17ff 100644 --- a/man/ko/detect_handledata.3 +++ b/man/ko/detect_handledata.3 @@ -1,22 +1,30 @@ -.TH detect_handledata 3 2015-12-11 "libchardet manuals" +.TH detect_handledata 3 2016-05-05 "libchardet manuals" .\" Process with .\" nroff -man detect_handledata.3 -.\" 2015-12-11 JoungKyun Kim +.\" 2016-05-05 JoungKyun.Kim .\" $Id$ .SH 이름 -detect_handledata \- 문자셋과 정확도를 측정 +detect_handledata, detect_handledata_r \- 문자셋과 정확도를 측정 .SH 사용법 .B "#include " .sp -.BI "short chardet_handledata (Detect ** handle, const char * inbuf, DetectObj ** outbuf);" +.BI "short detect_handledata (Detect ** handle, const char * inbuf, DetectObj ** outbuf);" +.sp +.BI "short detect_handledata_r (Detect ** handle, const char * inbuf, size_t inlen, DetectObj ** outbuf);" .SH 설명 .B inbuf 의 문자셋과 정확도를 .B outbuf에 저장한다. +.BI detect_handledata +API는 binary safe 문제로 더이상 사용을 권장하지 않습니다. +.BI detect_handledata_r +API를 이용하십시오. + +.SS API 아규먼트 .TP .B handle .br @@ -28,6 +36,10 @@ api에 의하여 할당된 detect handle resource. .br 문자셋과 정확도를 측정할 입력 문자열 +.B inlen +.br +문자셋과 정확도를 측정할 입력 문자열의 길이 + .TP .B outbuf .br @@ -105,7 +117,9 @@ int main (void) { return CHARDET_MEM_ALLOCATED_FAIL; } - switch (detect_handledata (&d, "안녕하세요", &obj)) { + //switch (detect_handledata (&d, str[i], &obj)) + switch (detect_handledata_r (&d, str[i], strlen (str[i]), &obj)) + { case CHARDET_OUT_OF_MEMORY : fprintf (stderr, "On handle processing, occured out of memory\\n"); detect_obj_free (&obj); diff --git a/man/ko/detect_handledata_r.3 b/man/ko/detect_handledata_r.3 new file mode 100644 index 0000000..c00f500 --- /dev/null +++ b/man/ko/detect_handledata_r.3 @@ -0,0 +1 @@ +.so man3/detect_handledata.3 diff --git a/man/ko/detect_r.3 b/man/ko/detect_r.3 new file mode 100644 index 0000000..a3f1248 --- /dev/null +++ b/man/ko/detect_r.3 @@ -0,0 +1 @@ +.so man3/detect_r.3 diff --git a/src/chardet.cpp b/src/chardet.cpp index 84628f4..4643384 100644 --- a/src/chardet.cpp +++ b/src/chardet.cpp @@ -101,9 +101,13 @@ CHARDET_API void detect_dataend (Detect **det) { } CHARDET_API short detect_handledata (Detect ** det, const char * buf, DetectObj ** obj) { + return detect_handledata_r (det, buf, strlen (buf), obj); +} + +CHARDET_API short detect_handledata_r (Detect ** det, const char * buf, size_t buflen, DetectObj ** obj) { const char * ret; - if ( (*det)->detect->HandleData (buf, strlen (buf)) == NS_ERROR_OUT_OF_MEMORY ) + if ( (*det)->detect->HandleData (buf, buflen) == NS_ERROR_OUT_OF_MEMORY ) return CHARDET_OUT_OF_MEMORY; (*det)->detect->DataEnd (); @@ -126,12 +130,16 @@ CHARDET_API void detect_destroy (Detect **det) { } CHARDET_API short detect (const char *buf, DetectObj ** obj) { + return detect_r (buf, strlen (buf), obj); +} + +CHARDET_API short detect_r (const char *buf, size_t buflen, DetectObj ** obj) { Detector * det; const char * ret; det = new Detector; det->Reset (); - if ( det->HandleData (buf, strlen (buf)) == NS_ERROR_OUT_OF_MEMORY ) { + if ( det->HandleData (buf, buflen) == NS_ERROR_OUT_OF_MEMORY ) { delete det; return CHARDET_OUT_OF_MEMORY; } diff --git a/src/chardet.h b/src/chardet.h index 09ce4f0..1502601 100644 --- a/src/chardet.h +++ b/src/chardet.h @@ -101,8 +101,10 @@ extern "C" { CHARDET_API void detect_reset (Detect **); CHARDET_API void detect_dataend (Detect **); CHARDET_API short detect_handledata (Detect **, const char *, DetectObj **); + CHARDET_API short detect_handledata_r (Detect **, const char *, size_t, DetectObj **); CHARDET_API void detect_destroy (Detect **); CHARDET_API short detect (const char *, DetectObj **); + CHARDET_API short detect_r (const char *, size_t, DetectObj **); #ifdef __cplusplus }; #endif