Skip to content

Commit

Permalink
Add CSV byte ranges to machiaza API endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
keichan34 committed Oct 8, 2024
1 parent 3f84fd6 commit 590fc31
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 85 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ out
cache
*.0x
*.7z
explorer

8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Geolonia 住所データ v2
# Geolonia 住所データツール v2

全国の住所データを HTTP API として公開いたします
全国の住所データを HTTP API として公開するためのツールを公開いたします

本データは、デジタル庁が整備する「[アドレス・ベース・レジストリ](https://www.digital.go.jp/policies/base_registry_address)」を元に加工し、様々なアプリケーションから便利に使えるように整理したものとなります。

以前、「[Geolonia 住所データ](https://github.com/geolonia/japanese-addresses-v2)」を管理しましたが、v2は従来版と比べて下記の違いがあります。
以前、「[Geolonia 住所データ](https://github.com/geolonia/japanese-addresses)」を管理しましたが、v2は従来版と比べて下記の違いがあります。

* 住居表示住所データと対応(番地・号までのデータが含まれる)
* 地番住所のデータと対応(住居表示住所が導入されていない地域のデータが含まれる)
Expand All @@ -15,7 +15,7 @@

## API

このデータを使用した API をご提供しています。
このデータを使用した API をご提供しています。現在、制限無しの無料公開をしていますが、様子見ながら公開を停止や変更など行うことがあります。商用稼働は、ご自身でデータを作成しホスティングすることを強くおすすめします。 Geolonia は有償で管理・ホスティングするサービスありますので、ご利用の方はお問い合わせてください。

#### 都道府県エンドポイント

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"run:all": "npm run run:01_make_prefecture_city && npm run run:02_make_machi_aza && npm run run:03_make_rsdt && npm run run:04_make_chiban",
"run:01_make_prefecture_city": "tsx ./src/01_make_prefecture_city.ts",
"run:02_make_machi_aza": "tsx ./src/02_make_machi_aza.ts",
"run:03_make_rsdt": "tsx ./src/03_make_rsdt.ts",
"run:03_make_rsdt": "node --max-old-space-size=8192 --import tsx ./src/03_make_rsdt.ts",
"run:04_make_chiban": "node --max-old-space-size=8192 --import tsx ./src/04_make_chiban.ts",
"create:archive": "rm ./api.7z; 7zz a ./api.7z ./out/api",
"start": "http-server ./out",
Expand Down
178 changes: 107 additions & 71 deletions src/03_make_rsdt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import fs from 'node:fs';
import path from 'node:path';
import { getAndParseCSVDataForId, getAndStreamCSVDataForId } from './lib/ckan.js';
import { mergeRsdtdspRsdtData, RsdtdspRsdtData, RsdtdspRsdtDataWithPos, RsdtdspRsdtPosData } from './lib/ckan_data/rsdtdsp_rsdt.js';
import { machiAzaName, RsdtApi, SingleRsdt } from './data.js';
import { MachiAzaApi, machiAzaName, RsdtApi, SingleRsdt } from './data.js';
import { projectABRData } from './lib/proj.js';
import { MachiAzaData } from './lib/ckan_data/machi_aza.js';
import { rawToMachiAza } from './02_machi_aza.js';
Expand All @@ -16,11 +16,17 @@ const HEADER_PBF_CHUNK_SIZE = 8_192;
function getOutPath(ma: MachiAzaData) {
return path.join(
ma.pref,
`${ma.county}${ma.city}${ma.ward}-住居表示`,
`${ma.county}${ma.city}${ma.ward}`,
);
}

function serializeApiDataTxt(apiData: RsdtApi): Buffer {
type HeaderRow = {
name: string;
offset: number;
length: number;
}

function serializeApiDataTxt(apiData: RsdtApi): { headerIterations: number, headerData: HeaderRow[], data: Buffer } {
let outSections: Buffer[] = [];
for ( const { machiAza, rsdts } of apiData ) {
let outSection = `住居表示,${machiAzaName(machiAza)}\n` +
Expand All @@ -35,98 +41,128 @@ function serializeApiDataTxt(apiData: RsdtApi): Buffer {
let header = '';
const headerMaxSize = HEADER_CHUNK_SIZE * iterations;
let lastBytePos = headerMaxSize;
const headerData: HeaderRow[] = [];
for (const [index, section] of outSections.entries()) {
const ma = apiData[index].machiAza;

header += `${machiAzaName(ma)},${lastBytePos},${section.length}\n`;
lastBytePos += section.length;
}
const headerBuf = Buffer.from(header + '=END=\n', 'utf8');
if (headerBuf.length > headerMaxSize) {
return createHeader(iterations + 1);
} else {
const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
padding.fill(0x20);
return Buffer.concat([headerBuf, padding]);
}
};

const header = createHeader();
return Buffer.concat([header, ...outSections]);
}

function _stringIfNotInteger(value: string | undefined) {
if (!value) { return undefined; }
return /^\d+$/.test(value) ? undefined : value;
}

function serializeApiDataPbf(apiData: RsdtApi): Buffer {
let outSections: Buffer[] = [];
for ( const { machiAza, rsdts } of apiData ) {
const section: AddrData.Section = {
kind: AddrData.Kind.RSDT,
name: machiAzaName(machiAza),
rsdtRows: [],
chibanRows: [],
}
for (const rsdt of rsdts) {
section.rsdtRows.push({
blkNum: rsdt.blk_num ? parseInt(rsdt.blk_num, 10) : undefined,
rsdtNum: parseInt(rsdt.rsdt_num, 10),
rsdtNum2: rsdt.rsdt_num2 ? parseInt(rsdt.rsdt_num2, 10) : undefined,
point: rsdt.point ? { lng: rsdt.point[0], lat: rsdt.point[1] } : undefined,
blkNumStr: _stringIfNotInteger(rsdt.blk_num),
rsdtNumStr: _stringIfNotInteger(rsdt.rsdt_num),
rsdtNum2Str: _stringIfNotInteger(rsdt.rsdt_num2),
});
}
const sectionBuf = Buffer.from(AddrData.Section.encode(section).finish());
outSections.push(sectionBuf);
}

const createHeader = (iterations = 1) => {
const header: AddrData.Header = {
kind: AddrData.Kind.RSDT,
rows: [],
};
const headerMaxSize = HEADER_PBF_CHUNK_SIZE * iterations;
let lastBytePos = headerMaxSize;
for (const [index, section] of outSections.entries()) {
const ma = apiData[index].machiAza;

header.rows.push({
headerData.push({
name: machiAzaName(ma),
offset: lastBytePos,
length: section.length,
});
lastBytePos += section.length;
}
const headerBuf = Buffer.from(AddrData.Header.encode(header).finish());
const headerBuf = Buffer.from(header + '=END=\n', 'utf8');
if (headerBuf.length > headerMaxSize) {
return createHeader(iterations + 1);
} else {
const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
padding.fill(0x00);
return Buffer.concat([headerBuf, padding]);
padding.fill(0x20);
return {
iterations,
data: headerData,
buffer: Buffer.concat([headerBuf, padding])
};
}
};

const header = createHeader();
return Buffer.concat([header, ...outSections]);
return {
headerIterations: header.iterations,
headerData: header.data,
data: Buffer.concat([header.buffer, ...outSections]),
};
}

function outputRsdtData(outDir: string, outFilename: string, apiData: RsdtApi) {
const outFileJSON = path.join(outDir, 'ja', outFilename + '.json');
fs.mkdirSync(path.dirname(outFileJSON), { recursive: true });
// function _stringIfNotInteger(value: string | undefined) {
// if (!value) { return undefined; }
// return /^\d+$/.test(value) ? undefined : value;
// }

// function serializeApiDataPbf(apiData: RsdtApi): Buffer {
// let outSections: Buffer[] = [];
// for ( const { machiAza, rsdts } of apiData ) {
// const section: AddrData.Section = {
// kind: AddrData.Kind.RSDT,
// name: machiAzaName(machiAza),
// rsdtRows: [],
// chibanRows: [],
// }
// for (const rsdt of rsdts) {
// section.rsdtRows.push({
// blkNum: rsdt.blk_num ? parseInt(rsdt.blk_num, 10) : undefined,
// rsdtNum: parseInt(rsdt.rsdt_num, 10),
// rsdtNum2: rsdt.rsdt_num2 ? parseInt(rsdt.rsdt_num2, 10) : undefined,
// point: rsdt.point ? { lng: rsdt.point[0], lat: rsdt.point[1] } : undefined,
// blkNumStr: _stringIfNotInteger(rsdt.blk_num),
// rsdtNumStr: _stringIfNotInteger(rsdt.rsdt_num),
// rsdtNum2Str: _stringIfNotInteger(rsdt.rsdt_num2),
// });
// }
// const sectionBuf = Buffer.from(AddrData.Section.encode(section).finish());
// outSections.push(sectionBuf);
// }

// const createHeader = (iterations = 1) => {
// const header: AddrData.Header = {
// kind: AddrData.Kind.RSDT,
// rows: [],
// };
// const headerMaxSize = HEADER_PBF_CHUNK_SIZE * iterations;
// let lastBytePos = headerMaxSize;
// for (const [index, section] of outSections.entries()) {
// const ma = apiData[index].machiAza;

// header.rows.push({
// name: machiAzaName(ma),
// offset: lastBytePos,
// length: section.length,
// });
// lastBytePos += section.length;
// }
// const headerBuf = Buffer.from(AddrData.Header.encode(header).finish());
// if (headerBuf.length > headerMaxSize) {
// return createHeader(iterations + 1);
// } else {
// const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
// padding.fill(0x00);
// return Buffer.concat([headerBuf, padding]);
// }
// };

// const header = createHeader();
// return Buffer.concat([header, ...outSections]);
// }

async function outputRsdtData(outDir: string, outFilename: string, apiData: RsdtApi) {
const machiAzaJSON = path.join(outDir, 'ja', outFilename + '.json');
// fs.mkdirSync(path.dirname(machiAzaJSON), { recursive: true });
// fs.writeFileSync(outFileJSON, JSON.stringify(apiData));

const outFileTXT = path.join(outDir, 'ja', outFilename + '.txt');
fs.writeFileSync(outFileTXT, serializeApiDataTxt(apiData));
const outFileTXT = path.join(outDir, 'ja', outFilename + '-住居表示.txt');
const txt = serializeApiDataTxt(apiData);
await fs.promises.writeFile(outFileTXT, txt.data);

// update machiAzaJSON
const machiAzaF = await fs.promises.open(machiAzaJSON, 'r+');
const maData = JSON.parse(await machiAzaF.readFile('utf8')) as MachiAzaApi;
maData.meta.updated = Math.floor(Date.now() / 1000);
for (const headerRow of txt.headerData) {
const ma = maData.data.find((ma) => machiAzaName(ma) === headerRow.name);
if (ma) {
ma.csv_ranges = ma.csv_ranges || {};
ma.csv_ranges['住居表示'] = { start: headerRow.offset, length: headerRow.length };
}
}
await machiAzaF.truncate(0);
await machiAzaF.write(JSON.stringify(maData), 0, 'utf8');
await machiAzaF.close();

// const outFilePbf = path.join(outDir, 'ja', outFilename + '.pbf');
// fs.writeFileSync(outFilePbf, serializeApiDataPbf(apiData));

console.log(`${outFilename}: ${apiData.length.toString(10).padEnd(4, ' ')} 件の町字を出力した`);
console.log(`${outFilename}-住居表示: ${apiData.length.toString(10).padEnd(4, ' ')} 件の町字を出力した`);
}

async function main(argv: string[]) {
Expand Down Expand Up @@ -168,7 +204,7 @@ async function main(argv: string[]) {
currentRsdtList = [];
}
if (lastOutPath !== thisOutPath && lastOutPath !== undefined) {
outputRsdtData(outDir, lastOutPath, apiData);
await outputRsdtData(outDir, lastOutPath, apiData);
apiData = [];
}
if (lastOutPath !== thisOutPath) {
Expand All @@ -192,7 +228,7 @@ async function main(argv: string[]) {
});
}
if (lastOutPath) {
outputRsdtData(outDir, lastOutPath, apiData);
await outputRsdtData(outDir, lastOutPath, apiData);
}
}

Expand Down
54 changes: 45 additions & 9 deletions src/04_make_chiban.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,21 @@ import path from 'node:path';
import cliProgress from 'cli-progress';

import { ckanPackageSearch, findResultByTypeAndArea, getAndParseCSVDataForId, getAndStreamCSVDataForId } from './lib/ckan.js';
import { ChibanApi, machiAzaName, SingleChiban } from './data.js';
import { ChibanApi, MachiAzaApi, machiAzaName, SingleChiban } from './data.js';
import { projectABRData } from './lib/proj.js';
import { MachiAzaData } from './lib/ckan_data/machi_aza.js';
import { ChibanData, ChibanPosData } from './lib/ckan_data/chiban.js';
import { mergeDataLeftJoin } from './lib/ckan_data/index.js';

const HEADER_CHUNK_SIZE = 50_000;

function serializeApiDataTxt(apiData: ChibanApi): Buffer {
type HeaderRow = {
name: string;
offset: number;
length: number;
}

function serializeApiDataTxt(apiData: ChibanApi): { headerIterations: number, headerData: HeaderRow[], data: Buffer } {
let outSections: Buffer[] = [];
for ( const { machiAza, chibans } of apiData ) {
let outSection = `地番,${machiAzaName(machiAza)}\n` +
Expand All @@ -29,10 +35,17 @@ function serializeApiDataTxt(apiData: ChibanApi): Buffer {
let header = '';
const headerMaxSize = HEADER_CHUNK_SIZE * iterations;
let lastBytePos = headerMaxSize;
const headerData: HeaderRow[] = [];
for (const [index, section] of outSections.entries()) {
const ma = apiData[index].machiAza;

header += `${machiAzaName(ma)},${lastBytePos},${section.length}\n`;
headerData.push({
name: machiAzaName(ma),
offset: lastBytePos,
length: section.length,
});

lastBytePos += section.length;
}
const headerBuf = Buffer.from(header + '=END=\n', 'utf8');
Expand All @@ -41,24 +54,47 @@ function serializeApiDataTxt(apiData: ChibanApi): Buffer {
} else {
const padding = Buffer.alloc(headerMaxSize - headerBuf.length);
padding.fill(0x20);
return Buffer.concat([headerBuf, padding]);
return {
iterations,
data: headerData,
buffer: Buffer.concat([headerBuf, padding])
};
}
};

const header = createHeader();
return Buffer.concat([header, ...outSections]);
return {
headerIterations: header.iterations,
headerData: header.data,
data: Buffer.concat([header.buffer, ...outSections]),
};
}

async function outputChibanData(outDir: string, outFilename: string, apiData: ChibanApi) {
if (apiData.length === 0) {
return;
}
const outFile = path.join(outDir, 'ja', outFilename + '.json');
await fs.promises.mkdir(path.dirname(outFile), { recursive: true });
const machiAzaJSON = path.join(outDir, 'ja', outFilename + '.json');
// await fs.promises.writeFile(outFile, JSON.stringify(apiData, null, 2));

const outFileTXT = path.join(outDir, 'ja', outFilename + '.txt');
await fs.promises.writeFile(outFileTXT, serializeApiDataTxt(apiData));
const outFileTXT = path.join(outDir, 'ja', outFilename + '-地番.txt');
const txt = serializeApiDataTxt(apiData);
await fs.promises.writeFile(outFileTXT, txt.data);

// update machiAzaJSON
const machiAzaF = await fs.promises.open(machiAzaJSON, 'r+');
const maData = JSON.parse(await machiAzaF.readFile('utf8')) as MachiAzaApi;
maData.meta.updated = Math.floor(Date.now() / 1000);
for (const headerRow of txt.headerData) {
const ma = maData.data.find((ma) => machiAzaName(ma) === headerRow.name);
if (ma) {
ma.csv_ranges = ma.csv_ranges || {};
ma.csv_ranges['地番'] = { start: headerRow.offset, length: headerRow.length };
}
}
await machiAzaF.truncate(0);
await machiAzaF.write(JSON.stringify(maData), 0, 'utf8');
await machiAzaF.close();

console.log(`${outFilename}: ${apiData.length.toString(10).padEnd(4, ' ')} 件の町字の地番を出力した`);
}
Expand Down Expand Up @@ -159,7 +195,7 @@ async function main(argv: string[]) {
}
await outputChibanData(outDir, path.join(
ma.pref,
`${ma.county}${ma.city}${ma.ward}-地番`,
`${ma.county}${ma.city}${ma.ward}`,
), apiData);
progress.increment();
}
Expand Down
6 changes: 6 additions & 0 deletions src/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ export type SingleMachiAza = {

/// 代表点
point?: LngLat;

/// CSV APIに付加情報が存在する場合、この町字のバイト範囲を指定します。
csv_ranges?: {
["住居表示"]?: { start: number; length: number; };
["地番"]?: { start: number; length: number; };
}
};

export function machiAzaName(machiAza: SingleMachiAza): string {
Expand Down

0 comments on commit 590fc31

Please sign in to comment.