Skip to content

Commit

Permalink
fixed #13 Report utf-8-sig
Browse files Browse the repository at this point in the history
  • Loading branch information
Joungkyun committed Jul 31, 2019
1 parent df513c4 commit da0a1a0
Show file tree
Hide file tree
Showing 25 changed files with 261 additions and 92 deletions.
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ From 1.0.5, libchardet was reflected single-byte charset detection confidence
algorithm of [uchardet](https://github.com/BYVoid/uchardet/) and new language models.
(Arabic, Danish, Esperanto, German, Spanish, Turkish, Vietnamese)

From 1.0.6, bom members have been added to the DetectObj structure.
The value of the bom member is 1, which means that it has been detected as a BOM.
Support for bom member can be determined by the existence of the CHARDET_BOM_CHECK
constant. See example below.

## Installation

See also [INSTALL](INSTALL) document
Expand Down Expand Up @@ -63,10 +68,18 @@ See also test directory of source code
return CHARDET_NULL_OBJECT;
}

#ifndef CHARDET_BOM_CHECK
printf ("encoding: %s, confidence: %f\n", obj->encoding, obj->confidence);
#else
// from 1.0.6 support return whether exists BOM
printf (
"encoding: %s, confidence: %f, exist BOM: %d\n",
obj->encoding, obj->confidence, obj->bom
);
#endif
detect_obj_free (&obj);

return 0;
return 0;
}
```
Expand Down Expand Up @@ -112,7 +125,15 @@ or looping code
return CHARDET_NULL_OBJECT;
}
#ifndef CHARDET_BOM_CHECK
printf ("encoding: %s, confidence: %f\n", obj->encoding, obj->confidence);
#else
// from 1.0.6 support return whether exists BOM
printf (
"encoding: %s, confidence: %f, exist BOM: %d\n",
obj->encoding, obj->confidence, obj->bom
);
#endif
detect_obj_free (&obj);
if ( 1 )
Expand Down
10 changes: 5 additions & 5 deletions man/en/detect.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect 3 2015-12-11 "libchardet manuals"
.TH detect 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect.3
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun.Kim <htt://oops.org>

.SH NAME
detect, detect_r \- Detecting character set and measuring accuracy of charset
Expand Down Expand Up @@ -113,7 +112,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);

return 0;
Expand All @@ -124,7 +124,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_handledata(3), detect_obj_init(3), detect_obj_free(3)
10 changes: 5 additions & 5 deletions man/en/detect_destroy.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_destroy 3 2015-12-11 "libchardet manuals"
.TH detect_destroy 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_destroy.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH NAME
detect_destroy \- free Detector structure
Expand Down Expand Up @@ -52,7 +51,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
detect_destroy (&d);

Expand All @@ -64,7 +64,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_init(3), detect_reset(3)
10 changes: 5 additions & 5 deletions man/en/detect_handledata.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_handledata 3 2015-12-11 "libchardet manuals"
.TH detect_handledata 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_handledata.3
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun.Kim <htt://oops.org>

.SH NAME
detect_handledata, detect_handledata_r \- Detecting character set and measuring accuracy of charset
Expand Down Expand Up @@ -136,7 +135,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
}
detect_destroy (&d);
Expand All @@ -149,7 +149,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_obj_init(3), detect_obj_free(3), detect_init(3), detect_reset(3), detect_destroy(3)
10 changes: 5 additions & 5 deletions man/en/detect_init.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_init 3 2015-12-11 "libchardet manuals"
.TH detect_init 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_init.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH NAME
detect_init - initialize Detect structure
Expand Down Expand Up @@ -62,7 +61,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
detect_destroy (&d);

Expand All @@ -74,7 +74,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_obj_init(3), detect_obj_free(3), detect_reset(3), detect_handledata(3), detect_destroy(3)
Expand Down
10 changes: 5 additions & 5 deletions man/en/detect_obj_free.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_obj_free 3 2015-12-11 "libchardet manuals"
.TH detect_obj_free 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_obj_free.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH NAME
chardet_obj_free - free DetectObject structure
Expand Down Expand Up @@ -45,7 +44,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);

return 0;
Expand All @@ -56,7 +56,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_obj_init(3)
Expand Down
11 changes: 6 additions & 5 deletions man/en/detect_obj_init.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_obj_init 3 2015-12-11 "libchardet manuals"
.TH detect_obj_init 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_obj_init.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH NAME
detect_obj_init - initialize DetectObject structure
Expand All @@ -28,6 +27,7 @@ api.
typedef struct DetectObject {
char * encoding;
float confidence;
short bom;
} DetectObj;
.PP

Expand Down Expand Up @@ -55,7 +55,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);

return 0;
Expand All @@ -66,7 +67,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_obj_free(3), detect(3), detect_handledata(3)
Expand Down
10 changes: 5 additions & 5 deletions man/en/detect_reset.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_reset 3 2015-12-11 "libchardet manuals"
.TH detect_reset 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_reset.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH NAME
detect_reset - reset Detect structure
Expand Down Expand Up @@ -59,7 +58,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# check support obj->bom with CHARDET_BOM_CHECK constant
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
}
detect_destroy (&d);
Expand All @@ -72,7 +72,7 @@ int main (void) {
JoungKyun.Kim <http://oops.org>

.SH "BUG REPORTS"
Use QnA board on http://oops.org
Use QnA board on https://github.com/Joungkyun/libchardet/issues

.SH "SEE ALSO"
detect_obj_init(3), detect_obj_free(3), detect_init(3), detect_handledata(3), detect_destroy(3)
Expand Down
11 changes: 6 additions & 5 deletions man/ko/detect.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect 3 2016-05-05 "libchardet manuals"
.TH detect 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect.3
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun.Kim <htt://oops.org>

.SH 이름
detect, detect_r \- 문자열의 문자셋과 정확도를 측정
Expand Down Expand Up @@ -46,6 +45,7 @@ API를 이용하십시오.
typedef struct DetectObject {
char * encoding;
float confidence;
short bom;
} DetectObj;
.fi

Expand Down Expand Up @@ -109,7 +109,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# obj-bom 은 CHARDET_BOM_CHECK 상수 지원여부로 지원을 판단할 수 있습니다.
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);

return 0;
Expand All @@ -120,7 +121,7 @@ int main (void) {
김정균

.SH 버그 리포트
<http://oops.org> 의 QnA 게시판을 이용한다.
https://github.com/Joungkyun/libchardet/issues

.SH "참고"
detect_handledata(3), detect_obj_init(3), detect_obj_free(3)
10 changes: 5 additions & 5 deletions man/ko/detect_destroy.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_destroy 3 2015-12-11 "libchardet manuals"
.TH detect_destroy 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_destroy.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH 이름
detect_destroy \- chardet resource를 해제한다.
Expand Down Expand Up @@ -51,7 +50,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# obj-bom 은 CHARDET_BOM_CHECK 상수 지원여부로 지원을 판단할 수 있습니다.
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
detect_destroy (&d);

Expand All @@ -63,7 +63,7 @@ int main (void) {
김정균

.SH 버그 리포트
<http://oops.org> 의 QnA 게시판을 이용한다.
https://github.com/Joungkyun/libchardet/issues

.SH "참고"
detect_init(3), detect_reset(3)
Expand Down
10 changes: 5 additions & 5 deletions man/ko/detect_handledata.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_handledata 3 2016-05-05 "libchardet manuals"
.TH detect_handledata 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_handledata.3
.\" 2016-05-05 JoungKyun.Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun.Kim <htt://oops.org>

.SH 이름
detect_handledata, detect_handledata_r \- 문자셋과 정확도를 측정
Expand Down Expand Up @@ -131,7 +130,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# obj-bom 은 CHARDET_BOM_CHECK 상수 지원여부로 지원을 판단할 수 있습니다.
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
}
detect_destroy (&d);
Expand All @@ -144,7 +144,7 @@ int main (void) {
김정균

.SH 버그 리포트
<http://oops.org> 의 QnA 게시판을 이용한다.
https://github.com/Joungkyun/libchardet/issues

.SH "참고"
detect_obj_init(3), detect_obj_free(3), detect_init(3), detect_reset(3), detect_destroy(3)
12 changes: 6 additions & 6 deletions man/ko/detect_init.3
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
.TH detect_init 3 2015-12-11 "libchardet manuals"
.TH detect_init 3 2019-08-01 "libchardet manuals"
.\" Process with
.\" nroff -man detect_init.3
.\" 2015-12-11 JoungKyun Kim <htt://oops.org>
.\" $Id$
.\" 2019-08-01 JoungKyun Kim <htt://oops.org>

.SH 이름
detect_init - chardet file handle 초기화
Expand Down Expand Up @@ -52,7 +51,8 @@ int main (void) {
return CHARDET_NULL_OBJECT;
}

printf ("encoding: %s, confidence: %f\\n", obj->encoding, obj->confidence);
# obj-bom 은 CHARDET_BOM_CHECK 상수 지원여부로 지원을 판단할 수 있습니다.
printf ("encoding: %s, confidence: %f, exists bom: %d\\n", obj->encoding, obj->confidence, obj->bom);
detect_obj_free (&obj);
detect_destroy (&d);

Expand All @@ -62,9 +62,9 @@ int main (void) {
.SH 저자
김정균
.SH 버그 리포트
<http://oops.org> 의 QnA 게시판을 이용한다.
https://github.com/Joungkyun/libchardet/issues
.SH 저작권
Copyright (c) 2017 JoungKyun.Kim
Copyright (c) 2019 JoungKyun.Kim

이 프로그램은 MPL/GPL2/LGPL2.1 을 따르며, 사용시의 어떠한 문제에 대하여 보증하지 않는다.
.SH "참고"
Expand Down
Loading

0 comments on commit da0a1a0

Please sign in to comment.