You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1569 lines
38 KiB

8 years ago
  1. /*!
  2. Papa Parse
  3. v4.3.2
  4. https://github.com/mholt/PapaParse
  5. */
  6. (function(root, factory)
  7. {
  8. if (typeof define === 'function' && define.amd)
  9. {
  10. // AMD. Register as an anonymous module.
  11. define([], factory);
  12. }
  13. else if (typeof module === 'object' && module.exports)
  14. {
  15. // Node. Does not work with strict CommonJS, but
  16. // only CommonJS-like environments that support module.exports,
  17. // like Node.
  18. module.exports = factory();
  19. }
  20. else
  21. {
  22. // Browser globals (root is window)
  23. root.Papa = factory();
  24. }
  25. }(this, function()
  26. {
  27. 'use strict';
  28. var global = (function () {
  29. // alternative method, similar to `Function('return this')()`
  30. // but without using `eval` (which is disabled when
  31. // using Content Security Policy).
  32. if (typeof self !== 'undefined') { return self; }
  33. if (typeof window !== 'undefined') { return window; }
  34. if (typeof global !== 'undefined') { return global; }
  35. // When running tests none of the above have been defined
  36. return {};
  37. })();
  38. var IS_WORKER = !global.document && !!global.postMessage,
  39. IS_PAPA_WORKER = IS_WORKER && /(\?|&)papaworker(=|&|$)/.test(global.location.search),
  40. LOADED_SYNC = false, AUTO_SCRIPT_PATH;
  41. var workers = {}, workerIdCounter = 0;
  42. var Papa = {};
  43. Papa.parse = CsvToJson;
  44. Papa.unparse = JsonToCsv;
  45. Papa.RECORD_SEP = String.fromCharCode(30);
  46. Papa.UNIT_SEP = String.fromCharCode(31);
  47. Papa.BYTE_ORDER_MARK = '\ufeff';
  48. Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
  49. Papa.WORKERS_SUPPORTED = !IS_WORKER && !!global.Worker;
  50. Papa.SCRIPT_PATH = null; // Must be set by your code if you use workers and this lib is loaded asynchronously
  51. // Configurable chunk sizes for local and remote files, respectively
  52. Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
  53. Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
  54. Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
  55. // Exposed for testing and development only
  56. Papa.Parser = Parser;
  57. Papa.ParserHandle = ParserHandle;
  58. Papa.NetworkStreamer = NetworkStreamer;
  59. Papa.FileStreamer = FileStreamer;
  60. Papa.StringStreamer = StringStreamer;
  61. Papa.ReadableStreamStreamer = ReadableStreamStreamer;
  62. if (global.jQuery)
  63. {
  64. var $ = global.jQuery;
  65. $.fn.parse = function(options)
  66. {
  67. var config = options.config || {};
  68. var queue = [];
  69. this.each(function(idx)
  70. {
  71. var supported = $(this).prop('tagName').toUpperCase() === 'INPUT'
  72. && $(this).attr('type').toLowerCase() === 'file'
  73. && global.FileReader;
  74. if (!supported || !this.files || this.files.length === 0)
  75. return true; // continue to next input element
  76. for (var i = 0; i < this.files.length; i++)
  77. {
  78. queue.push({
  79. file: this.files[i],
  80. inputElem: this,
  81. instanceConfig: $.extend({}, config)
  82. });
  83. }
  84. });
  85. parseNextFile(); // begin parsing
  86. return this; // maintains chainability
  87. function parseNextFile()
  88. {
  89. if (queue.length === 0)
  90. {
  91. if (isFunction(options.complete))
  92. options.complete();
  93. return;
  94. }
  95. var f = queue[0];
  96. if (isFunction(options.before))
  97. {
  98. var returned = options.before(f.file, f.inputElem);
  99. if (typeof returned === 'object')
  100. {
  101. if (returned.action === 'abort')
  102. {
  103. error('AbortError', f.file, f.inputElem, returned.reason);
  104. return; // Aborts all queued files immediately
  105. }
  106. else if (returned.action === 'skip')
  107. {
  108. fileComplete(); // parse the next file in the queue, if any
  109. return;
  110. }
  111. else if (typeof returned.config === 'object')
  112. f.instanceConfig = $.extend(f.instanceConfig, returned.config);
  113. }
  114. else if (returned === 'skip')
  115. {
  116. fileComplete(); // parse the next file in the queue, if any
  117. return;
  118. }
  119. }
  120. // Wrap up the user's complete callback, if any, so that ours also gets executed
  121. var userCompleteFunc = f.instanceConfig.complete;
  122. f.instanceConfig.complete = function(results)
  123. {
  124. if (isFunction(userCompleteFunc))
  125. userCompleteFunc(results, f.file, f.inputElem);
  126. fileComplete();
  127. };
  128. Papa.parse(f.file, f.instanceConfig);
  129. }
  130. function error(name, file, elem, reason)
  131. {
  132. if (isFunction(options.error))
  133. options.error({name: name}, file, elem, reason);
  134. }
  135. function fileComplete()
  136. {
  137. queue.splice(0, 1);
  138. parseNextFile();
  139. }
  140. }
  141. }
  142. if (IS_PAPA_WORKER)
  143. {
  144. global.onmessage = workerThreadReceivedMessage;
  145. }
  146. else if (Papa.WORKERS_SUPPORTED)
  147. {
  148. AUTO_SCRIPT_PATH = getScriptPath();
  149. // Check if the script was loaded synchronously
  150. if (!document.body)
  151. {
  152. // Body doesn't exist yet, must be synchronous
  153. LOADED_SYNC = true;
  154. }
  155. else
  156. {
  157. document.addEventListener('DOMContentLoaded', function () {
  158. LOADED_SYNC = true;
  159. }, true);
  160. }
  161. }
  162. function CsvToJson(_input, _config)
  163. {
  164. _config = _config || {};
  165. var dynamicTyping = _config.dynamicTyping || false;
  166. if (isFunction(dynamicTyping)) {
  167. _config.dynamicTypingFunction = dynamicTyping;
  168. // Will be filled on first row call
  169. dynamicTyping = {};
  170. }
  171. _config.dynamicTyping = dynamicTyping;
  172. if (_config.worker && Papa.WORKERS_SUPPORTED)
  173. {
  174. var w = newWorker();
  175. w.userStep = _config.step;
  176. w.userChunk = _config.chunk;
  177. w.userComplete = _config.complete;
  178. w.userError = _config.error;
  179. _config.step = isFunction(_config.step);
  180. _config.chunk = isFunction(_config.chunk);
  181. _config.complete = isFunction(_config.complete);
  182. _config.error = isFunction(_config.error);
  183. delete _config.worker; // prevent infinite loop
  184. w.postMessage({
  185. input: _input,
  186. config: _config,
  187. workerId: w.id
  188. });
  189. return;
  190. }
  191. var streamer = null;
  192. if (typeof _input === 'string')
  193. {
  194. if (_config.download)
  195. streamer = new NetworkStreamer(_config);
  196. else
  197. streamer = new StringStreamer(_config);
  198. }
  199. else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on))
  200. {
  201. streamer = new ReadableStreamStreamer(_config);
  202. }
  203. else if ((global.File && _input instanceof File) || _input instanceof Object) // ...Safari. (see issue #106)
  204. streamer = new FileStreamer(_config);
  205. return streamer.stream(_input);
  206. }
  207. function JsonToCsv(_input, _config)
  208. {
  209. var _output = '';
  210. var _fields = [];
  211. // Default configuration
  212. /** whether to surround every datum with quotes */
  213. var _quotes = false;
  214. /** whether to write headers */
  215. var _writeHeader = true;
  216. /** delimiting character */
  217. var _delimiter = ',';
  218. /** newline character(s) */
  219. var _newline = '\r\n';
  220. /** quote character */
  221. var _quoteChar = '"';
  222. unpackConfig();
  223. var quoteCharRegex = new RegExp(_quoteChar, 'g');
  224. if (typeof _input === 'string')
  225. _input = JSON.parse(_input);
  226. if (_input instanceof Array)
  227. {
  228. if (!_input.length || _input[0] instanceof Array)
  229. return serialize(null, _input);
  230. else if (typeof _input[0] === 'object')
  231. return serialize(objectKeys(_input[0]), _input);
  232. }
  233. else if (typeof _input === 'object')
  234. {
  235. if (typeof _input.data === 'string')
  236. _input.data = JSON.parse(_input.data);
  237. if (_input.data instanceof Array)
  238. {
  239. if (!_input.fields)
  240. _input.fields = _input.meta && _input.meta.fields;
  241. if (!_input.fields)
  242. _input.fields = _input.data[0] instanceof Array
  243. ? _input.fields
  244. : objectKeys(_input.data[0]);
  245. if (!(_input.data[0] instanceof Array) && typeof _input.data[0] !== 'object')
  246. _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
  247. }
  248. return serialize(_input.fields || [], _input.data || []);
  249. }
  250. // Default (any valid paths should return before this)
  251. throw 'exception: Unable to serialize unrecognized input';
  252. function unpackConfig()
  253. {
  254. if (typeof _config !== 'object')
  255. return;
  256. if (typeof _config.delimiter === 'string'
  257. && _config.delimiter.length === 1
  258. && Papa.BAD_DELIMITERS.indexOf(_config.delimiter) === -1)
  259. {
  260. _delimiter = _config.delimiter;
  261. }
  262. if (typeof _config.quotes === 'boolean'
  263. || _config.quotes instanceof Array)
  264. _quotes = _config.quotes;
  265. if (typeof _config.newline === 'string')
  266. _newline = _config.newline;
  267. if (typeof _config.quoteChar === 'string')
  268. _quoteChar = _config.quoteChar;
  269. if (typeof _config.header === 'boolean')
  270. _writeHeader = _config.header;
  271. }
  272. /** Turns an object's keys into an array */
  273. function objectKeys(obj)
  274. {
  275. if (typeof obj !== 'object')
  276. return [];
  277. var keys = [];
  278. for (var key in obj)
  279. keys.push(key);
  280. return keys;
  281. }
  282. /** The double for loop that iterates the data and writes out a CSV string including header row */
  283. function serialize(fields, data)
  284. {
  285. var csv = '';
  286. if (typeof fields === 'string')
  287. fields = JSON.parse(fields);
  288. if (typeof data === 'string')
  289. data = JSON.parse(data);
  290. var hasHeader = fields instanceof Array && fields.length > 0;
  291. var dataKeyedByField = !(data[0] instanceof Array);
  292. // If there a header row, write it first
  293. if (hasHeader && _writeHeader)
  294. {
  295. for (var i = 0; i < fields.length; i++)
  296. {
  297. if (i > 0)
  298. csv += _delimiter;
  299. csv += safe(fields[i], i);
  300. }
  301. if (data.length > 0)
  302. csv += _newline;
  303. }
  304. // Then write out the data
  305. for (var row = 0; row < data.length; row++)
  306. {
  307. var maxCol = hasHeader ? fields.length : data[row].length;
  308. for (var col = 0; col < maxCol; col++)
  309. {
  310. if (col > 0)
  311. csv += _delimiter;
  312. var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
  313. csv += safe(data[row][colIdx], col);
  314. }
  315. if (row < data.length - 1)
  316. csv += _newline;
  317. }
  318. return csv;
  319. }
  320. /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
  321. function safe(str, col)
  322. {
  323. if (typeof str === 'undefined' || str === null)
  324. return '';
  325. str = str.toString().replace(quoteCharRegex, _quoteChar+_quoteChar);
  326. var needsQuotes = (typeof _quotes === 'boolean' && _quotes)
  327. || (_quotes instanceof Array && _quotes[col])
  328. || hasAny(str, Papa.BAD_DELIMITERS)
  329. || str.indexOf(_delimiter) > -1
  330. || str.charAt(0) === ' '
  331. || str.charAt(str.length - 1) === ' ';
  332. return needsQuotes ? _quoteChar + str + _quoteChar : str;
  333. }
  334. function hasAny(str, substrings)
  335. {
  336. for (var i = 0; i < substrings.length; i++)
  337. if (str.indexOf(substrings[i]) > -1)
  338. return true;
  339. return false;
  340. }
  341. }
  342. /** ChunkStreamer is the base prototype for various streamer implementations. */
  343. function ChunkStreamer(config)
  344. {
  345. this._handle = null;
  346. this._paused = false;
  347. this._finished = false;
  348. this._input = null;
  349. this._baseIndex = 0;
  350. this._partialLine = '';
  351. this._rowCount = 0;
  352. this._start = 0;
  353. this._nextChunk = null;
  354. this.isFirstChunk = true;
  355. this._completeResults = {
  356. data: [],
  357. errors: [],
  358. meta: {}
  359. };
  360. replaceConfig.call(this, config);
  361. this.parseChunk = function(chunk)
  362. {
  363. // First chunk pre-processing
  364. if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk))
  365. {
  366. var modifiedChunk = this._config.beforeFirstChunk(chunk);
  367. if (modifiedChunk !== undefined)
  368. chunk = modifiedChunk;
  369. }
  370. this.isFirstChunk = false;
  371. // Rejoin the line we likely just split in two by chunking the file
  372. var aggregate = this._partialLine + chunk;
  373. this._partialLine = '';
  374. var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
  375. if (this._handle.paused() || this._handle.aborted())
  376. return;
  377. var lastIndex = results.meta.cursor;
  378. if (!this._finished)
  379. {
  380. this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
  381. this._baseIndex = lastIndex;
  382. }
  383. if (results && results.data)
  384. this._rowCount += results.data.length;
  385. var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
  386. if (IS_PAPA_WORKER)
  387. {
  388. global.postMessage({
  389. results: results,
  390. workerId: Papa.WORKER_ID,
  391. finished: finishedIncludingPreview
  392. });
  393. }
  394. else if (isFunction(this._config.chunk))
  395. {
  396. this._config.chunk(results, this._handle);
  397. if (this._paused)
  398. return;
  399. results = undefined;
  400. this._completeResults = undefined;
  401. }
  402. if (!this._config.step && !this._config.chunk) {
  403. this._completeResults.data = this._completeResults.data.concat(results.data);
  404. this._completeResults.errors = this._completeResults.errors.concat(results.errors);
  405. this._completeResults.meta = results.meta;
  406. }
  407. if (finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted))
  408. this._config.complete(this._completeResults, this._input);
  409. if (!finishedIncludingPreview && (!results || !results.meta.paused))
  410. this._nextChunk();
  411. return results;
  412. };
  413. this._sendError = function(error)
  414. {
  415. if (isFunction(this._config.error))
  416. this._config.error(error);
  417. else if (IS_PAPA_WORKER && this._config.error)
  418. {
  419. global.postMessage({
  420. workerId: Papa.WORKER_ID,
  421. error: error,
  422. finished: false
  423. });
  424. }
  425. };
  426. function replaceConfig(config)
  427. {
  428. // Deep-copy the config so we can edit it
  429. var configCopy = copy(config);
  430. configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
  431. if (!config.step && !config.chunk)
  432. configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
  433. this._handle = new ParserHandle(configCopy);
  434. this._handle.streamer = this;
  435. this._config = configCopy; // persist the copy to the caller
  436. }
  437. }
  438. function NetworkStreamer(config)
  439. {
  440. config = config || {};
  441. if (!config.chunkSize)
  442. config.chunkSize = Papa.RemoteChunkSize;
  443. ChunkStreamer.call(this, config);
  444. var xhr;
  445. if (IS_WORKER)
  446. {
  447. this._nextChunk = function()
  448. {
  449. this._readChunk();
  450. this._chunkLoaded();
  451. };
  452. }
  453. else
  454. {
  455. this._nextChunk = function()
  456. {
  457. this._readChunk();
  458. };
  459. }
  460. this.stream = function(url)
  461. {
  462. this._input = url;
  463. this._nextChunk(); // Starts streaming
  464. };
  465. this._readChunk = function()
  466. {
  467. if (this._finished)
  468. {
  469. this._chunkLoaded();
  470. return;
  471. }
  472. xhr = new XMLHttpRequest();
  473. if (this._config.withCredentials)
  474. {
  475. xhr.withCredentials = this._config.withCredentials;
  476. }
  477. if (!IS_WORKER)
  478. {
  479. xhr.onload = bindFunction(this._chunkLoaded, this);
  480. xhr.onerror = bindFunction(this._chunkError, this);
  481. }
  482. xhr.open('GET', this._input, !IS_WORKER);
  483. // Headers can only be set when once the request state is OPENED
  484. if (this._config.downloadRequestHeaders)
  485. {
  486. var headers = this._config.downloadRequestHeaders;
  487. for (var headerName in headers)
  488. {
  489. xhr.setRequestHeader(headerName, headers[headerName]);
  490. }
  491. }
  492. if (this._config.chunkSize)
  493. {
  494. var end = this._start + this._config.chunkSize - 1; // minus one because byte range is inclusive
  495. xhr.setRequestHeader('Range', 'bytes='+this._start+'-'+end);
  496. xhr.setRequestHeader('If-None-Match', 'webkit-no-cache'); // https://bugs.webkit.org/show_bug.cgi?id=82672
  497. }
  498. try {
  499. xhr.send();
  500. }
  501. catch (err) {
  502. this._chunkError(err.message);
  503. }
  504. if (IS_WORKER && xhr.status === 0)
  505. this._chunkError();
  506. else
  507. this._start += this._config.chunkSize;
  508. }
  509. this._chunkLoaded = function()
  510. {
  511. if (xhr.readyState != 4)
  512. return;
  513. if (xhr.status < 200 || xhr.status >= 400)
  514. {
  515. this._chunkError();
  516. return;
  517. }
  518. this._finished = !this._config.chunkSize || this._start > getFileSize(xhr);
  519. this.parseChunk(xhr.responseText);
  520. }
  521. this._chunkError = function(errorMessage)
  522. {
  523. var errorText = xhr.statusText || errorMessage;
  524. this._sendError(errorText);
  525. }
  526. function getFileSize(xhr)
  527. {
  528. var contentRange = xhr.getResponseHeader('Content-Range');
  529. if (contentRange === null) { // no content range, then finish!
  530. return -1;
  531. }
  532. return parseInt(contentRange.substr(contentRange.lastIndexOf('/') + 1));
  533. }
  534. }
  535. NetworkStreamer.prototype = Object.create(ChunkStreamer.prototype);
  536. NetworkStreamer.prototype.constructor = NetworkStreamer;
  537. function FileStreamer(config)
  538. {
  539. config = config || {};
  540. if (!config.chunkSize)
  541. config.chunkSize = Papa.LocalChunkSize;
  542. ChunkStreamer.call(this, config);
  543. var reader, slice;
  544. // FileReader is better than FileReaderSync (even in worker) - see http://stackoverflow.com/q/24708649/1048862
  545. // But Firefox is a pill, too - see issue #76: https://github.com/mholt/PapaParse/issues/76
  546. var usingAsyncReader = typeof FileReader !== 'undefined'; // Safari doesn't consider it a function - see issue #105
  547. this.stream = function(file)
  548. {
  549. this._input = file;
  550. slice = file.slice || file.webkitSlice || file.mozSlice;
  551. if (usingAsyncReader)
  552. {
  553. reader = new FileReader(); // Preferred method of reading files, even in workers
  554. reader.onload = bindFunction(this._chunkLoaded, this);
  555. reader.onerror = bindFunction(this._chunkError, this);
  556. }
  557. else
  558. reader = new FileReaderSync(); // Hack for running in a web worker in Firefox
  559. this._nextChunk(); // Starts streaming
  560. };
  561. this._nextChunk = function()
  562. {
  563. if (!this._finished && (!this._config.preview || this._rowCount < this._config.preview))
  564. this._readChunk();
  565. }
  566. this._readChunk = function()
  567. {
  568. var input = this._input;
  569. if (this._config.chunkSize)
  570. {
  571. var end = Math.min(this._start + this._config.chunkSize, this._input.size);
  572. input = slice.call(input, this._start, end);
  573. }
  574. var txt = reader.readAsText(input, this._config.encoding);
  575. if (!usingAsyncReader)
  576. this._chunkLoaded({ target: { result: txt } }); // mimic the async signature
  577. }
  578. this._chunkLoaded = function(event)
  579. {
  580. // Very important to increment start each time before handling results
  581. this._start += this._config.chunkSize;
  582. this._finished = !this._config.chunkSize || this._start >= this._input.size;
  583. this.parseChunk(event.target.result);
  584. }
  585. this._chunkError = function()
  586. {
  587. this._sendError(reader.error);
  588. }
  589. }
  590. FileStreamer.prototype = Object.create(ChunkStreamer.prototype);
  591. FileStreamer.prototype.constructor = FileStreamer;
  592. function StringStreamer(config)
  593. {
  594. config = config || {};
  595. ChunkStreamer.call(this, config);
  596. var string;
  597. var remaining;
  598. this.stream = function(s)
  599. {
  600. string = s;
  601. remaining = s;
  602. return this._nextChunk();
  603. }
  604. this._nextChunk = function()
  605. {
  606. if (this._finished) return;
  607. var size = this._config.chunkSize;
  608. var chunk = size ? remaining.substr(0, size) : remaining;
  609. remaining = size ? remaining.substr(size) : '';
  610. this._finished = !remaining;
  611. return this.parseChunk(chunk);
  612. }
  613. }
  614. StringStreamer.prototype = Object.create(StringStreamer.prototype);
  615. StringStreamer.prototype.constructor = StringStreamer;
  616. function ReadableStreamStreamer(config)
  617. {
  618. config = config || {};
  619. ChunkStreamer.call(this, config);
  620. var queue = [];
  621. var parseOnData = true;
  622. this.stream = function(stream)
  623. {
  624. this._input = stream;
  625. this._input.on('data', this._streamData);
  626. this._input.on('end', this._streamEnd);
  627. this._input.on('error', this._streamError);
  628. }
  629. this._nextChunk = function()
  630. {
  631. if (queue.length)
  632. {
  633. this.parseChunk(queue.shift());
  634. }
  635. else
  636. {
  637. parseOnData = true;
  638. }
  639. }
  640. this._streamData = bindFunction(function(chunk)
  641. {
  642. try
  643. {
  644. queue.push(typeof chunk === 'string' ? chunk : chunk.toString(this._config.encoding));
  645. if (parseOnData)
  646. {
  647. parseOnData = false;
  648. this.parseChunk(queue.shift());
  649. }
  650. }
  651. catch (error)
  652. {
  653. this._streamError(error);
  654. }
  655. }, this);
  656. this._streamError = bindFunction(function(error)
  657. {
  658. this._streamCleanUp();
  659. this._sendError(error.message);
  660. }, this);
  661. this._streamEnd = bindFunction(function()
  662. {
  663. this._streamCleanUp();
  664. this._finished = true;
  665. this._streamData('');
  666. }, this);
  667. this._streamCleanUp = bindFunction(function()
  668. {
  669. this._input.removeListener('data', this._streamData);
  670. this._input.removeListener('end', this._streamEnd);
  671. this._input.removeListener('error', this._streamError);
  672. }, this);
  673. }
  674. ReadableStreamStreamer.prototype = Object.create(ChunkStreamer.prototype);
  675. ReadableStreamStreamer.prototype.constructor = ReadableStreamStreamer;
  676. // Use one ParserHandle per entire CSV file or string
  677. function ParserHandle(_config)
  678. {
  679. // One goal is to minimize the use of regular expressions...
  680. var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
  681. var self = this;
  682. var _stepCounter = 0; // Number of times step was called (number of rows parsed)
  683. var _input; // The input being parsed
  684. var _parser; // The core parser being used
  685. var _paused = false; // Whether we are paused or not
  686. var _aborted = false; // Whether the parser has aborted or not
  687. var _delimiterError; // Temporary state between delimiter detection and processing results
  688. var _fields = []; // Fields are from the header row of the input, if there is one
  689. var _results = { // The last results returned from the parser
  690. data: [],
  691. errors: [],
  692. meta: {}
  693. };
  694. if (isFunction(_config.step))
  695. {
  696. var userStep = _config.step;
  697. _config.step = function(results)
  698. {
  699. _results = results;
  700. if (needsHeaderRow())
  701. processResults();
  702. else // only call user's step function after header row
  703. {
  704. processResults();
  705. // It's possbile that this line was empty and there's no row here after all
  706. if (_results.data.length === 0)
  707. return;
  708. _stepCounter += results.data.length;
  709. if (_config.preview && _stepCounter > _config.preview)
  710. _parser.abort();
  711. else
  712. userStep(_results, self);
  713. }
  714. };
  715. }
  716. /**
  717. * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
  718. * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
  719. * when an input comes in multiple chunks, like from a file.
  720. */
  721. this.parse = function(input, baseIndex, ignoreLastRow)
  722. {
  723. if (!_config.newline)
  724. _config.newline = guessLineEndings(input);
  725. _delimiterError = false;
  726. if (!_config.delimiter)
  727. {
  728. var delimGuess = guessDelimiter(input, _config.newline);
  729. if (delimGuess.successful)
  730. _config.delimiter = delimGuess.bestDelimiter;
  731. else
  732. {
  733. _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
  734. _config.delimiter = Papa.DefaultDelimiter;
  735. }
  736. _results.meta.delimiter = _config.delimiter;
  737. }
  738. else if(isFunction(_config.delimiter))
  739. {
  740. _config.delimiter = _config.delimiter(input);
  741. _results.meta.delimiter = _config.delimiter;
  742. }
  743. var parserConfig = copy(_config);
  744. if (_config.preview && _config.header)
  745. parserConfig.preview++; // to compensate for header row
  746. _input = input;
  747. _parser = new Parser(parserConfig);
  748. _results = _parser.parse(_input, baseIndex, ignoreLastRow);
  749. processResults();
  750. return _paused ? { meta: { paused: true } } : (_results || { meta: { paused: false } });
  751. };
  752. this.paused = function()
  753. {
  754. return _paused;
  755. };
  756. this.pause = function()
  757. {
  758. _paused = true;
  759. _parser.abort();
  760. _input = _input.substr(_parser.getCharIndex());
  761. };
  762. this.resume = function()
  763. {
  764. _paused = false;
  765. self.streamer.parseChunk(_input);
  766. };
  767. this.aborted = function ()
  768. {
  769. return _aborted;
  770. };
  771. this.abort = function()
  772. {
  773. _aborted = true;
  774. _parser.abort();
  775. _results.meta.aborted = true;
  776. if (isFunction(_config.complete))
  777. _config.complete(_results);
  778. _input = '';
  779. };
  780. function processResults()
  781. {
  782. if (_results && _delimiterError)
  783. {
  784. addError('Delimiter', 'UndetectableDelimiter', 'Unable to auto-detect delimiting character; defaulted to \''+Papa.DefaultDelimiter+'\'');
  785. _delimiterError = false;
  786. }
  787. if (_config.skipEmptyLines)
  788. {
  789. for (var i = 0; i < _results.data.length; i++)
  790. if (_results.data[i].length === 1 && _results.data[i][0] === '')
  791. _results.data.splice(i--, 1);
  792. }
  793. if (needsHeaderRow())
  794. fillHeaderFields();
  795. return applyHeaderAndDynamicTyping();
  796. }
  797. function needsHeaderRow()
  798. {
  799. return _config.header && _fields.length === 0;
  800. }
  801. function fillHeaderFields()
  802. {
  803. if (!_results)
  804. return;
  805. for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
  806. for (var j = 0; j < _results.data[i].length; j++)
  807. _fields.push(_results.data[i][j]);
  808. _results.data.splice(0, 1);
  809. }
  810. function shouldApplyDynamicTyping(field) {
  811. // Cache function values to avoid calling it for each row
  812. if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
  813. _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
  814. }
  815. return (_config.dynamicTyping[field] || _config.dynamicTyping) === true
  816. }
  817. function parseDynamic(field, value)
  818. {
  819. if (shouldApplyDynamicTyping(field))
  820. {
  821. if (value === 'true' || value === 'TRUE')
  822. return true;
  823. else if (value === 'false' || value === 'FALSE')
  824. return false;
  825. else
  826. return tryParseFloat(value);
  827. }
  828. return value;
  829. }
  830. function applyHeaderAndDynamicTyping()
  831. {
  832. if (!_results || (!_config.header && !_config.dynamicTyping))
  833. return _results;
  834. for (var i = 0; i < _results.data.length; i++)
  835. {
  836. var row = _config.header ? {} : [];
  837. for (var j = 0; j < _results.data[i].length; j++)
  838. {
  839. var field = j;
  840. var value = _results.data[i][j];
  841. if (_config.header)
  842. field = j >= _fields.length ? '__parsed_extra' : _fields[j];
  843. value = parseDynamic(field, value);
  844. if (field === '__parsed_extra')
  845. {
  846. row[field] = row[field] || [];
  847. row[field].push(value);
  848. }
  849. else
  850. row[field] = value;
  851. }
  852. _results.data[i] = row;
  853. if (_config.header)
  854. {
  855. if (j > _fields.length)
  856. addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, i);
  857. else if (j < _fields.length)
  858. addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, i);
  859. }
  860. }
  861. if (_config.header && _results.meta)
  862. _results.meta.fields = _fields;
  863. return _results;
  864. }
  865. function guessDelimiter(input, newline)
  866. {
  867. var delimChoices = [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
  868. var bestDelim, bestDelta, fieldCountPrevRow;
  869. for (var i = 0; i < delimChoices.length; i++)
  870. {
  871. var delim = delimChoices[i];
  872. var delta = 0, avgFieldCount = 0;
  873. fieldCountPrevRow = undefined;
  874. var preview = new Parser({
  875. delimiter: delim,
  876. newline: newline,
  877. preview: 10
  878. }).parse(input);
  879. for (var j = 0; j < preview.data.length; j++)
  880. {
  881. var fieldCount = preview.data[j].length;
  882. avgFieldCount += fieldCount;
  883. if (typeof fieldCountPrevRow === 'undefined')
  884. {
  885. fieldCountPrevRow = fieldCount;
  886. continue;
  887. }
  888. else if (fieldCount > 1)
  889. {
  890. delta += Math.abs(fieldCount - fieldCountPrevRow);
  891. fieldCountPrevRow = fieldCount;
  892. }
  893. }
  894. if (preview.data.length > 0)
  895. avgFieldCount /= preview.data.length;
  896. if ((typeof bestDelta === 'undefined' || delta < bestDelta)
  897. && avgFieldCount > 1.99)
  898. {
  899. bestDelta = delta;
  900. bestDelim = delim;
  901. }
  902. }
  903. _config.delimiter = bestDelim;
  904. return {
  905. successful: !!bestDelim,
  906. bestDelimiter: bestDelim
  907. }
  908. }
  909. function guessLineEndings(input)
  910. {
  911. input = input.substr(0, 1024*1024); // max length 1 MB
  912. var r = input.split('\r');
  913. var n = input.split('\n');
  914. var nAppearsFirst = (n.length > 1 && n[0].length < r[0].length);
  915. if (r.length === 1 || nAppearsFirst)
  916. return '\n';
  917. var numWithN = 0;
  918. for (var i = 0; i < r.length; i++)
  919. {
  920. if (r[i][0] === '\n')
  921. numWithN++;
  922. }
  923. return numWithN >= r.length / 2 ? '\r\n' : '\r';
  924. }
  925. function tryParseFloat(val)
  926. {
  927. var isNumber = FLOAT.test(val);
  928. return isNumber ? parseFloat(val) : val;
  929. }
  930. function addError(type, code, msg, row)
  931. {
  932. _results.errors.push({
  933. type: type,
  934. code: code,
  935. message: msg,
  936. row: row
  937. });
  938. }
  939. }
  940. /** The core parser implements speedy and correct CSV parsing */
  941. function Parser(config)
  942. {
  943. // Unpack the config object
  944. config = config || {};
  945. var delim = config.delimiter;
  946. var newline = config.newline;
  947. var comments = config.comments;
  948. var step = config.step;
  949. var preview = config.preview;
  950. var fastMode = config.fastMode;
  951. var quoteChar = config.quoteChar || '"';
  952. // Delimiter must be valid
  953. if (typeof delim !== 'string'
  954. || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
  955. delim = ',';
  956. // Comment character must be valid
  957. if (comments === delim)
  958. throw 'Comment character same as delimiter';
  959. else if (comments === true)
  960. comments = '#';
  961. else if (typeof comments !== 'string'
  962. || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
  963. comments = false;
  964. // Newline must be valid: \r, \n, or \r\n
  965. if (newline != '\n' && newline != '\r' && newline != '\r\n')
  966. newline = '\n';
  967. // We're gonna need these at the Parser scope
  968. var cursor = 0;
  969. var aborted = false;
  970. this.parse = function(input, baseIndex, ignoreLastRow)
  971. {
  972. // For some reason, in Chrome, this speeds things up (!?)
  973. if (typeof input !== 'string')
  974. throw 'Input must be a string';
  975. // We don't need to compute some of these every time parse() is called,
  976. // but having them in a more local scope seems to perform better
  977. var inputLen = input.length,
  978. delimLen = delim.length,
  979. newlineLen = newline.length,
  980. commentsLen = comments.length;
  981. var stepIsFunction = isFunction(step);
  982. // Establish starting state
  983. cursor = 0;
  984. var data = [], errors = [], row = [], lastCursor = 0;
  985. if (!input)
  986. return returnable();
  987. if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1))
  988. {
  989. var rows = input.split(newline);
  990. for (var i = 0; i < rows.length; i++)
  991. {
  992. var row = rows[i];
  993. cursor += row.length;
  994. if (i !== rows.length - 1)
  995. cursor += newline.length;
  996. else if (ignoreLastRow)
  997. return returnable();
  998. if (comments && row.substr(0, commentsLen) === comments)
  999. continue;
  1000. if (stepIsFunction)
  1001. {
  1002. data = [];
  1003. pushRow(row.split(delim));
  1004. doStep();
  1005. if (aborted)
  1006. return returnable();
  1007. }
  1008. else
  1009. pushRow(row.split(delim));
  1010. if (preview && i >= preview)
  1011. {
  1012. data = data.slice(0, preview);
  1013. return returnable(true);
  1014. }
  1015. }
  1016. return returnable();
  1017. }
  1018. var nextDelim = input.indexOf(delim, cursor);
  1019. var nextNewline = input.indexOf(newline, cursor);
  1020. var quoteCharRegex = new RegExp(quoteChar+quoteChar, 'g');
  1021. // Parser loop
  1022. for (;;)
  1023. {
  1024. // Field has opening quote
  1025. if (input[cursor] === quoteChar)
  1026. {
  1027. // Start our search for the closing quote where the cursor is
  1028. var quoteSearch = cursor;
  1029. // Skip the opening quote
  1030. cursor++;
  1031. for (;;)
  1032. {
  1033. // Find closing quote
  1034. var quoteSearch = input.indexOf(quoteChar, quoteSearch+1);
  1035. if (quoteSearch === -1)
  1036. {
  1037. if (!ignoreLastRow) {
  1038. // No closing quote... what a pity
  1039. errors.push({
  1040. type: 'Quotes',
  1041. code: 'MissingQuotes',
  1042. message: 'Quoted field unterminated',
  1043. row: data.length, // row has yet to be inserted
  1044. index: cursor
  1045. });
  1046. }
  1047. return finish();
  1048. }
  1049. if (quoteSearch === inputLen-1)
  1050. {
  1051. // Closing quote at EOF
  1052. var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
  1053. return finish(value);
  1054. }
  1055. // If this quote is escaped, it's part of the data; skip it
  1056. if (input[quoteSearch+1] === quoteChar)
  1057. {
  1058. quoteSearch++;
  1059. continue;
  1060. }
  1061. if (input[quoteSearch+1] === delim)
  1062. {
  1063. // Closing quote followed by delimiter
  1064. row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
  1065. cursor = quoteSearch + 1 + delimLen;
  1066. nextDelim = input.indexOf(delim, cursor);
  1067. nextNewline = input.indexOf(newline, cursor);
  1068. break;
  1069. }
  1070. if (input.substr(quoteSearch+1, newlineLen) === newline)
  1071. {
  1072. // Closing quote followed by newline
  1073. row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
  1074. saveRow(quoteSearch + 1 + newlineLen);
  1075. nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
  1076. if (stepIsFunction)
  1077. {
  1078. doStep();
  1079. if (aborted)
  1080. return returnable();
  1081. }
  1082. if (preview && data.length >= preview)
  1083. return returnable(true);
  1084. break;
  1085. }
  1086. }
  1087. continue;
  1088. }
  1089. // Comment found at start of new line
  1090. if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments)
  1091. {
  1092. if (nextNewline === -1) // Comment ends at EOF
  1093. return returnable();
  1094. cursor = nextNewline + newlineLen;
  1095. nextNewline = input.indexOf(newline, cursor);
  1096. nextDelim = input.indexOf(delim, cursor);
  1097. continue;
  1098. }
  1099. // Next delimiter comes before next newline, so we've reached end of field
  1100. if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1))
  1101. {
  1102. row.push(input.substring(cursor, nextDelim));
  1103. cursor = nextDelim + delimLen;
  1104. nextDelim = input.indexOf(delim, cursor);
  1105. continue;
  1106. }
  1107. // End of row
  1108. if (nextNewline !== -1)
  1109. {
  1110. row.push(input.substring(cursor, nextNewline));
  1111. saveRow(nextNewline + newlineLen);
  1112. if (stepIsFunction)
  1113. {
  1114. doStep();
  1115. if (aborted)
  1116. return returnable();
  1117. }
  1118. if (preview && data.length >= preview)
  1119. return returnable(true);
  1120. continue;
  1121. }
  1122. break;
  1123. }
  1124. return finish();
  1125. function pushRow(row)
  1126. {
  1127. data.push(row);
  1128. lastCursor = cursor;
  1129. }
  1130. /**
  1131. * Appends the remaining input from cursor to the end into
  1132. * row, saves the row, calls step, and returns the results.
  1133. */
  1134. function finish(value)
  1135. {
  1136. if (ignoreLastRow)
  1137. return returnable();
  1138. if (typeof value === 'undefined')
  1139. value = input.substr(cursor);
  1140. row.push(value);
  1141. cursor = inputLen; // important in case parsing is paused
  1142. pushRow(row);
  1143. if (stepIsFunction)
  1144. doStep();
  1145. return returnable();
  1146. }
  1147. /**
  1148. * Appends the current row to the results. It sets the cursor
  1149. * to newCursor and finds the nextNewline. The caller should
  1150. * take care to execute user's step function and check for
  1151. * preview and end parsing if necessary.
  1152. */
  1153. function saveRow(newCursor)
  1154. {
  1155. cursor = newCursor;
  1156. pushRow(row);
  1157. row = [];
  1158. nextNewline = input.indexOf(newline, cursor);
  1159. }
  1160. /** Returns an object with the results, errors, and meta. */
  1161. function returnable(stopped)
  1162. {
  1163. return {
  1164. data: data,
  1165. errors: errors,
  1166. meta: {
  1167. delimiter: delim,
  1168. linebreak: newline,
  1169. aborted: aborted,
  1170. truncated: !!stopped,
  1171. cursor: lastCursor + (baseIndex || 0)
  1172. }
  1173. };
  1174. }
  1175. /** Executes the user's step function and resets data & errors. */
  1176. function doStep()
  1177. {
  1178. step(returnable());
  1179. data = [], errors = [];
  1180. }
  1181. };
  1182. /** Sets the abort flag */
  1183. this.abort = function()
  1184. {
  1185. aborted = true;
  1186. };
  1187. /** Gets the cursor position */
  1188. this.getCharIndex = function()
  1189. {
  1190. return cursor;
  1191. };
  1192. }
  1193. // If you need to load Papa Parse asynchronously and you also need worker threads, hard-code
  1194. // the script path here. See: https://github.com/mholt/PapaParse/issues/87#issuecomment-57885358
  1195. function getScriptPath()
  1196. {
  1197. var scripts = document.getElementsByTagName('script');
  1198. return scripts.length ? scripts[scripts.length - 1].src : '';
  1199. }
  1200. function newWorker()
  1201. {
  1202. if (!Papa.WORKERS_SUPPORTED)
  1203. return false;
  1204. if (!LOADED_SYNC && Papa.SCRIPT_PATH === null)
  1205. throw new Error(
  1206. 'Script path cannot be determined automatically when Papa Parse is loaded asynchronously. ' +
  1207. 'You need to set Papa.SCRIPT_PATH manually.'
  1208. );
  1209. var workerUrl = Papa.SCRIPT_PATH || AUTO_SCRIPT_PATH;
  1210. // Append 'papaworker' to the search string to tell papaparse that this is our worker.
  1211. workerUrl += (workerUrl.indexOf('?') !== -1 ? '&' : '?') + 'papaworker';
  1212. var w = new global.Worker(workerUrl);
  1213. w.onmessage = mainThreadReceivedMessage;
  1214. w.id = workerIdCounter++;
  1215. workers[w.id] = w;
  1216. return w;
  1217. }
  1218. /** Callback when main thread receives a message */
  1219. function mainThreadReceivedMessage(e)
  1220. {
  1221. var msg = e.data;
  1222. var worker = workers[msg.workerId];
  1223. var aborted = false;
  1224. if (msg.error)
  1225. worker.userError(msg.error, msg.file);
  1226. else if (msg.results && msg.results.data)
  1227. {
  1228. var abort = function() {
  1229. aborted = true;
  1230. completeWorker(msg.workerId, { data: [], errors: [], meta: { aborted: true } });
  1231. };
  1232. var handle = {
  1233. abort: abort,
  1234. pause: notImplemented,
  1235. resume: notImplemented
  1236. };
  1237. if (isFunction(worker.userStep))
  1238. {
  1239. for (var i = 0; i < msg.results.data.length; i++)
  1240. {
  1241. worker.userStep({
  1242. data: [msg.results.data[i]],
  1243. errors: msg.results.errors,
  1244. meta: msg.results.meta
  1245. }, handle);
  1246. if (aborted)
  1247. break;
  1248. }
  1249. delete msg.results; // free memory ASAP
  1250. }
  1251. else if (isFunction(worker.userChunk))
  1252. {
  1253. worker.userChunk(msg.results, handle, msg.file);
  1254. delete msg.results;
  1255. }
  1256. }
  1257. if (msg.finished && !aborted)
  1258. completeWorker(msg.workerId, msg.results);
  1259. }
  1260. function completeWorker(workerId, results) {
  1261. var worker = workers[workerId];
  1262. if (isFunction(worker.userComplete))
  1263. worker.userComplete(results);
  1264. worker.terminate();
  1265. delete workers[workerId];
  1266. }
  1267. function notImplemented() {
  1268. throw 'Not implemented.';
  1269. }
  1270. /** Callback when worker thread receives a message */
  1271. function workerThreadReceivedMessage(e)
  1272. {
  1273. var msg = e.data;
  1274. if (typeof Papa.WORKER_ID === 'undefined' && msg)
  1275. Papa.WORKER_ID = msg.workerId;
  1276. if (typeof msg.input === 'string')
  1277. {
  1278. global.postMessage({
  1279. workerId: Papa.WORKER_ID,
  1280. results: Papa.parse(msg.input, msg.config),
  1281. finished: true
  1282. });
  1283. }
  1284. else if ((global.File && msg.input instanceof File) || msg.input instanceof Object) // thank you, Safari (see issue #106)
  1285. {
  1286. var results = Papa.parse(msg.input, msg.config);
  1287. if (results)
  1288. global.postMessage({
  1289. workerId: Papa.WORKER_ID,
  1290. results: results,
  1291. finished: true
  1292. });
  1293. }
  1294. }
  1295. /** Makes a deep copy of an array or object (mostly) */
  1296. function copy(obj)
  1297. {
  1298. if (typeof obj !== 'object')
  1299. return obj;
  1300. var cpy = obj instanceof Array ? [] : {};
  1301. for (var key in obj)
  1302. cpy[key] = copy(obj[key]);
  1303. return cpy;
  1304. }
  1305. function bindFunction(f, self)
  1306. {
  1307. return function() { f.apply(self, arguments); };
  1308. }
  1309. function isFunction(func)
  1310. {
  1311. return typeof func === 'function';
  1312. }
  1313. return Papa;
  1314. }));