Skip to content

Commit

Permalink
Merge pull request #3 from ctot-nondef/1-make-xml2js-parsing-results-…
Browse files Browse the repository at this point in the history
…homogenuous

1 make xml2js parsing results homogenuous
  • Loading branch information
ctot-nondef committed Jan 9, 2024
2 parents 50be6ef + 33cc314 commit 2888c5c
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 50 deletions.
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ export class OaiPmh implements IOAIHarvesterInterface {
})

const obj = await parseOaiPmhXml(res.data)
return obj["OAI-PMH"].GetRecord.record;
return obj["OAI-PMH"].GetRecord[0].record;
}

/**
Expand All @@ -138,7 +138,7 @@ export class OaiPmh implements IOAIHarvesterInterface {
identifier: params.identifier
})
const obj = await parseOaiPmhXml(res.data)
return obj["OAI-PMH"].ListMetadataFormats.metadataFormat;
return obj["OAI-PMH"].ListMetadataFormats[0].metadataFormat;
}

/**
Expand Down
14 changes: 7 additions & 7 deletions src/oai-pmh-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@ import { parseOaiPmhXml } from './oai-pmh-xml'
import {TOAIListVerbs} from "./EOAIVerbs.enum";
import {TOAIResumptionToken} from "./oai-pmh-xml";
import {AxiosResponse} from "axios";
function getResumptionToken (result: { resumptionToken?: TOAIResumptionToken }, listSize: number) {
function getResumptionToken (result: { resumptionToken?: TOAIResumptionToken[] }, listSize: number) {
const token = result.resumptionToken
if (!token) return undefined

if (typeof token === 'string') return token

const cursor = token.cursor;
const completeListSize = token.completeListSize;
const cursor = token[0].cursor;
const completeListSize = token[0].completeListSize;
if (
cursor &&
completeListSize &&
cursor + listSize >= completeListSize

Check failure on line 18 in src/oai-pmh-list.ts

View workflow job for this annotation

GitHub Actions / Test Application

Operator '+' cannot be applied to types 'number[]' and 'number'.
) return undefined

return token._
return token[0]._
}

/**
Expand All @@ -32,15 +32,15 @@ export async function* getOaiListItems (oaiPmh: OaiPmh, verb: TOAIListVerbs, par
const initialResponse = await oaiPmh.request({}, verb, {...params});
const initialParsedResponse = await parseOaiPmhXml(initialResponse.data)
const initialResult = initialParsedResponse["OAI-PMH"][verb]
for (const item of initialResult[field]) {
for (const item of initialResult[0][field]) {
yield item
}
let result = initialResult
let result = initialResult[0]
let resumptionToken: string;
while ((resumptionToken = getResumptionToken(result, result[field].length))) {
const response: AxiosResponse = await oaiPmh.request({}, verb, { resumptionToken });
const parsedResponse = await parseOaiPmhXml(response.data)
result = parsedResponse["OAI-PMH"][verb]
result = parsedResponse["OAI-PMH"][verb][0]
for (const item of result[field]) {
yield item
}
Expand Down
79 changes: 41 additions & 38 deletions src/oai-pmh-xml.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,51 @@ const ZOAIError = z.object({

const ZOAIResumptionToken = z.object({
_: z.string().optional(),
expirationDate: z.string().datetime().optional(),
completeListSize: z.number().positive().optional(),
cursor: z.number().nonnegative().optional()
expirationDate: z.array(z.string().datetime()).optional(),
completeListSize: z.array(z.number().positive()).optional(),
cursor: z.array(z.number().nonnegative()).optional()
})

export type TOAIResumptionToken = z.infer<typeof ZOAIResumptionToken>

const ZOAIResponse = z.object({
"OAI-PMH": z.object({
responseDate: z.object({
responseDate: z.array(z.object({
_: z.string().datetime(),
}),
})),
request: z.unknown(),
error: ZOAIError.optional(),
GetRecord: z.object({ record: z.object({}) }).optional(),
ListMetadataFormats: z.object({
error: z.array(ZOAIError).optional(),
GetRecord: z.array(z.object({ record: z.array(z.object({})) })).optional(),
ListMetadataFormats: z.array(z.object({
metadataFormat: z.array(z.object({
metadataPrefix: z.object( { _:z.string()}),
schema: z.object( { _:z.string().url()}),
metadataNamespace: z.object( { _:z.string().url()})
metadataPrefix: z.array(z.object( { _:z.string()})),
schema: z.array(z.object( { _:z.string().url()})),
metadataNamespace: z.array(z.object( { _:z.string().url()}))
}))
}).optional(),
Identify: z.object({
repositoryName: z.object( { _: z.string()}),
baseURL: z.object( { _: z.string().url()}),
protocolVersion: z.object( { _: z.string()}),
adminEmail: z.object( { _: z.string().email()}),
earliestDatestamp: z.object( { _: z.string()}),
deletedRecord: z.object( { _: z.enum(["no", "persistent", "transient"])}),
granularity: z.object( { _: z.enum(["YYYY-MM-DD", "YYYY-MM-DDThh:mm:ssZ"])}),
compression: z.object( { _: z.string()}).optional(),
description: z.any(),
}).optional(),
ListSets: z.object({
})).optional(),
Identify: z.array(z.object({
repositoryName: z.array(z.object( { _: z.string()})),
baseURL: z.array(z.object( { _: z.string().url()})),
protocolVersion: z.array(z.object( { _: z.string()})),
adminEmail: z.array(z.object( { _: z.string().email()})),
earliestDatestamp: z.array(z.object( { _: z.string()})),
deletedRecord: z.array(z.object( { _: z.enum(["no", "persistent", "transient"])})),
granularity: z.array(z.object( { _: z.enum(["YYYY-MM-DD", "YYYY-MM-DDThh:mm:ssZ"])})),
compression: z.array(z.object( { _: z.string()})).optional(),
description: z.array(z.any()),
})).optional(),
ListSets: z.array(z.object({
set: z.array(z.unknown()),
resumptionToken: ZOAIResumptionToken.optional(),
}).optional(),
ListIdentifiers: z.object({
resumptionToken: z.array(ZOAIResumptionToken).optional(),
})).optional(),
ListIdentifiers: z.array(z.object({
header: z.array(z.unknown()),
resumptionToken: ZOAIResumptionToken.optional(),
}).optional(),
ListRecords: z.object({
resumptionToken: z.array(ZOAIResumptionToken).optional(),
})).optional(),
ListRecords: z.array(z.object({
record: z.array(z.unknown()).optional(),
resumptionToken: ZOAIResumptionToken.optional(),
}).optional()
resumptionToken: z.array(ZOAIResumptionToken).optional(),
})).optional()
}),
})

Expand All @@ -70,22 +70,25 @@ export type TOAIResponse = z.infer<typeof ZOAIResponse>
export async function parseOaiPmhXml (xml: string): Promise<TOAIResponse> {
const parser = new xml2js.Parser({
explicitCharkey: true,
explicitArray: false,
explicitArray: true,
trim: true,
normalize: true
});
const obj = await parser.parseStringPromise(xml)
console.log(JSON.stringify(obj,null, 2));
const oaiPmh = ZOAIResponse.passthrough().parse(obj);
if (!oaiPmh) {
throw new OaiPmhError('Returned data does not conform to OAI-PMH' , "none");
}

const error = oaiPmh["OAI-PMH"].error as z.infer<typeof ZOAIError>
const error = oaiPmh["OAI-PMH"].error as z.infer<typeof ZOAIError>[]
if (error) {
throw new OaiPmhError(
`OAI-PMH provider returned an error: ${error._}`,
error.$.code
)
error.forEach(e => {
throw new OaiPmhError(
`OAI-PMH provider returned an error: ${e._}`,
e.$.code
)
})
}

return oaiPmh
Expand Down
3 changes: 0 additions & 3 deletions test/oai-pmh.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,12 @@ describe('OaiPmh', () => {
})

describe('listMetadataFormats()', function () {

this.timeout(30000)

it('should list metadata formats for arxiv', async () => {
const oaiPmh = new OaiPmh(arxivBaseUrl as unknown as URL)
const res = await oaiPmh.listMetadataFormats({})
expect(res.length).to.equal(4);
})

it('should list metadata formats for arxiv id 1208.0264', async () => {
const oaiPmh = new OaiPmh(arxivBaseUrl as unknown as URL)
const res = await oaiPmh.listMetadataFormats({
Expand Down

0 comments on commit 2888c5c

Please sign in to comment.