Skip to content

Commit

Permalink
fixed #8 fixed binary safe problems
Browse files Browse the repository at this point in the history
  • Loading branch information
Joungkyun committed May 5, 2016
1 parent 0593e1c commit de32f82
Show file tree
Hide file tree
Showing 12 changed files with 102 additions and 21 deletions.
4 changes: 4 additions & 0 deletions Changelog
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ $Id$

2016/05/05 KST
- fixed #5 no detect control character on US-ASCII
- fixed #7 wrong detect Danash ISO-8859-15
- fixed #8 fixed binary safe problems
. replace detect to detect_r
. replace detect_handledata to detect_handledata_r

2016/05/04 KST
- fixed #1 separate model directory
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@ See also test directory of source code

int main (void) {
DetectObj *obj;
char * str = "안녕하세요";

if ( (obj = detect_obj_init ()) == NULL ) {
fprintf (stderr, "Memory Allocation failed\n");
return CHARDET_MEM_ALLOCATED_FAIL;
}

switch (detect ("안녕하세요", &obj)) {
switch (detect_r (str, strlen (str), &obj)) {
case CHARDET_OUT_OF_MEMORY :
fprintf (stderr, "On handle processing, occured out of memory\n");
detect_obj_free (&obj);
Expand All @@ -62,6 +63,7 @@ or looping code
int main (void) {
Detect * d;
DetectObj * obj;
char * str = "안녕하세요";
if ( (d = detect_init ()) == NULL ) {
fprintf (stderr, "chardet handle initialize failed\n");
Expand All @@ -76,7 +78,7 @@ or looping code
return CHARDET_MEM_ALLOCATED_FAIL;
}
switch (detect_handledata (&d, "안녕하세요", &obj)) {
switch (detect_handledata_r (&d, str, strlen (str), &obj)) {
case CHARDET_OUT_OF_MEMORY :
fprintf (stderr, "On handle processing, occured out of memory\n");
detect_obj_free (&obj);
Expand Down
25 changes: 21 additions & 4 deletions man/en/detect.3
Original file line number Diff line number Diff line change
@@ -1,27 +1,41 @@
.TH detect 3 2015-12-11 "libchardet manuals"
.\" Process with
.\" nroff -man detect.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$

.SH NAME
detect \- Detecting character set and measuring accuracy of charset
detect, detect_r \- Detecting character set and measuring accuracy of charset

.SH SYNOPSIS
.B "#include <chardet.h>"
.sp
.BI "short chardet (char * inbuf, DetectObj ** outbuf);"
.BI "short detect (char * inbuf, DetectObj ** outbuf);"
.sp
.BI "short detect_r (char * inbuf, size_t inlen, DetectObj ** outbuf);"

.SH DESCRIPTION
Storing charset and accuracy of
.B inbuf to
.B outbuf

The
.BI detect
API is deprecated becase this api is not binary safe. Use or replace to
.BI detect_r
api.

.SS Arguments:
.TP
.B inbuf
.br
input string for detecting

.TP
.B inlen
.br
length of input string for detecting

.TP
.B outbuf
.br
Expand Down Expand Up @@ -78,13 +92,16 @@ at internal API

int main (void) {
DetectObj *obj;
char * checkstr = "안녕하세요";

if ( (obj = detect_obj_init ()) == NULL ) {
fprintf (stderr, "Memory Allocation failed\\n");
return CHARDET_MEM_ALLOCATED_FAIL;
}

switch (detect ("안녕하세요", &obj)) {
//switch (detect (checkstr, &obj))
switch (detect_r (checkstr, strlen (checkstr), &obj))
{
case CHARDET_OUT_OF_MEMORY :
fprintf (stderr, "On handle processing, occured out of memory\\n");
detect_obj_free (&obj);
Expand Down
22 changes: 19 additions & 3 deletions man/en/detect_handledata.3
Original file line number Diff line number Diff line change
@@ -1,22 +1,31 @@
.TH detect_handledata 3 2015-12-11 "libchardet manuals"
.\" Process with
.\" nroff -man detect_handledata.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$

.SH NAME
detect_handledata \- Detecting character set and measuring accuracy of charset
detect_handledata, detect_handledata_r \- Detecting character set and measuring accuracy of charset

.SH SNOPSYS
.B "#include <chardet.h>"
.sp
.BI "short chardet_handledata (Detect ** handle, const char * inbuf, DetectObj ** outbuf);"
.sp
.BI "short chardet_handledata_r (Detect ** handle, const char * inbuf, size_t inlen, DetectObj ** outbuf);"

.SH DESCRIPTION
Storing charset and accuracy of
.B inbuf to
.B outbuf

The
.BI detect_handledata
API is deprecated becase this api is not binary safe. Use or replace to
.BI detect_handledata_r
api.

.SS Arguments:
.TP
.B handle
.br
Expand All @@ -30,6 +39,11 @@ api.
.br
input string for detecting

.TP
.B inlen
.br
length of input string for detecting

.TP
.B outbuf
.br
Expand Down Expand Up @@ -108,7 +122,9 @@ int main (void) {
return CHARDET_MEM_ALLOCATED_FAIL;
}

switch (detect_handledata (&d, "안녕하세요", &obj)) {
//switch (detect_handledata (&d, str[i], &obj))
switch (detect_handledata (&d, str[i], strlen(str[i]), &obj))
{
case CHARDET_OUT_OF_MEMORY :
fprintf (stderr, "On handle processing, occured out of memory\\n");
detect_obj_free (&obj);
Expand Down
1 change: 1 addition & 0 deletions man/en/detect_handledata_r.3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.so man3/detect_handledata.3
1 change: 1 addition & 0 deletions man/en/detect_r.3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.so man3/detect_r.3
24 changes: 19 additions & 5 deletions man/ko/detect.3
Original file line number Diff line number Diff line change
@@ -1,27 +1,39 @@
.TH detect 3 2015-12-11 "libchardet manuals"
.TH detect 3 2016-05-05 "libchardet manuals"
.\" Process with
.\" nroff -man detect.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$

.SH 이름
detect \- 문자열의 문자셋과 정확도를 측정
detect, detect_r \- 문자열의 문자셋과 정확도를 측정

.SH 사용법
.B "#include <chardet.h>"
.sp
.BI "short chardet (char * inbuf, DetectObj ** outbuf);"
.BI "short detect (char * inbuf, DetectObj ** outbuf);"
.sp
.BI "short detect_r (char * inbuf, size_t inlen, DetectObj ** outbuf);"

.SH 설명
.B inbuf
의 문자셋과 정확도를
.B outbuf에 저장한다.

.BI detect
API는 binary safe 문제로 더이상 사용을 권장하지 않습니다.
.BI detect_r
API를 이용하십시오.

.SS API 아규먼트
.TP
.B inbuf
.br
문자셋과 정확도를 측정할 입력 문자열

.B inlen
.br
문자셋과 정확도를 측정할 입력 문자열의 길이

.TP
.B outbuf
.br
Expand Down Expand Up @@ -83,7 +95,9 @@ int main (void) {
return CHARDET_MEM_ALLOCATED_FAIL;
}

switch (detect ("안녕하세요", &obj)) {
//switch (detect ("안녕하세요", &obj))
switch (detect_r ("안녕하세요", 10, &obj))
{
case CHARDET_OUT_OF_MEMORY :
fprintf (stderr, "On handle processing, occured out of memory\\n");
detect_obj_free (&obj);
Expand Down
24 changes: 19 additions & 5 deletions man/ko/detect_handledata.3
Original file line number Diff line number Diff line change
@@ -1,22 +1,30 @@
.TH detect_handledata 3 2015-12-11 "libchardet manuals"
.TH detect_handledata 3 2016-05-05 "libchardet manuals"
.\" Process with
.\" nroff -man detect_handledata.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$

.SH 이름
detect_handledata \- 문자셋과 정확도를 측정
detect_handledata, detect_handledata_r \- 문자셋과 정확도를 측정

.SH 사용법
.B "#include <chardet.h>"
.sp
.BI "short chardet_handledata (Detect ** handle, const char * inbuf, DetectObj ** outbuf);"
.BI "short detect_handledata (Detect ** handle, const char * inbuf, DetectObj ** outbuf);"
.sp
.BI "short detect_handledata_r (Detect ** handle, const char * inbuf, size_t inlen, DetectObj ** outbuf);"

.SH 설명
.B inbuf
의 문자셋과 정확도를
.B outbuf에 저장한다.

.BI detect_handledata
API는 binary safe 문제로 더이상 사용을 권장하지 않습니다.
.BI detect_handledata_r
API를 이용하십시오.

.SS API 아규먼트
.TP
.B handle
.br
Expand All @@ -28,6 +36,10 @@ api에 의하여 할당된 detect handle resource.
.br
문자셋과 정확도를 측정할 입력 문자열

.B inlen
.br
문자셋과 정확도를 측정할 입력 문자열의 길이

.TP
.B outbuf
.br
Expand Down Expand Up @@ -105,7 +117,9 @@ int main (void) {
return CHARDET_MEM_ALLOCATED_FAIL;
}

switch (detect_handledata (&d, "안녕하세요", &obj)) {
//switch (detect_handledata (&d, str[i], &obj))
switch (detect_handledata_r (&d, str[i], strlen (str[i]), &obj))
{
case CHARDET_OUT_OF_MEMORY :
fprintf (stderr, "On handle processing, occured out of memory\\n");
detect_obj_free (&obj);
Expand Down
1 change: 1 addition & 0 deletions man/ko/detect_handledata_r.3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.so man3/detect_handledata.3
1 change: 1 addition & 0 deletions man/ko/detect_r.3
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.so man3/detect_r.3
12 changes: 10 additions & 2 deletions src/chardet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,13 @@ CHARDET_API void detect_dataend (Detect **det) {
}

CHARDET_API short detect_handledata (Detect ** det, const char * buf, DetectObj ** obj) {
return detect_handledata_r (det, buf, strlen (buf), obj);
}

CHARDET_API short detect_handledata_r (Detect ** det, const char * buf, size_t buflen, DetectObj ** obj) {
const char * ret;

if ( (*det)->detect->HandleData (buf, strlen (buf)) == NS_ERROR_OUT_OF_MEMORY )
if ( (*det)->detect->HandleData (buf, buflen) == NS_ERROR_OUT_OF_MEMORY )
return CHARDET_OUT_OF_MEMORY;
(*det)->detect->DataEnd ();

Expand All @@ -126,12 +130,16 @@ CHARDET_API void detect_destroy (Detect **det) {
}

CHARDET_API short detect (const char *buf, DetectObj ** obj) {
return detect_r (buf, strlen (buf), obj);
}

CHARDET_API short detect_r (const char *buf, size_t buflen, DetectObj ** obj) {
Detector * det;
const char * ret;

det = new Detector;
det->Reset ();
if ( det->HandleData (buf, strlen (buf)) == NS_ERROR_OUT_OF_MEMORY ) {
if ( det->HandleData (buf, buflen) == NS_ERROR_OUT_OF_MEMORY ) {
delete det;
return CHARDET_OUT_OF_MEMORY;
}
Expand Down
2 changes: 2 additions & 0 deletions src/chardet.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,10 @@ extern "C" {
CHARDET_API void detect_reset (Detect **);
CHARDET_API void detect_dataend (Detect **);
CHARDET_API short detect_handledata (Detect **, const char *, DetectObj **);
CHARDET_API short detect_handledata_r (Detect **, const char *, size_t, DetectObj **);
CHARDET_API void detect_destroy (Detect **);
CHARDET_API short detect (const char *, DetectObj **);
CHARDET_API short detect_r (const char *, size_t, DetectObj **);
#ifdef __cplusplus
};
#endif
Expand Down

0 comments on commit de32f82

Please sign in to comment.